anyasciiWrite
with zig writer to avoid unnecessary allocations.
+ Add `anyasciiWrite` which writes to a zig writer directly to avoid unnecessary allocations in some use cases. + Use `anyasciiWrite` utf8ToAscii to avoid allocation duplicates in the previous implementation.
This commit is contained in:
parent
95f2130a13
commit
eb85754ece
1 changed files with 17 additions and 4 deletions
19
src/lib.zig
19
src/lib.zig
|
@ -18,6 +18,18 @@ pub fn anyascii(allocator: std.mem.Allocator, codepoint: u21) ![]const u8 {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert a unicode codepoint to its ascii equivalent, in the provided writer.
|
||||||
|
pub fn anyasciiWrite(writer: std.io.AnyWriter, codepoint: u21) !void {
|
||||||
|
// Call C anyascii function.
|
||||||
|
var cChars: [*]u8 = undefined;
|
||||||
|
const charsCount = c.anyascii(codepoint, @ptrCast(&cChars));
|
||||||
|
|
||||||
|
// Write every byte from the raw C pointer.
|
||||||
|
for (0..charsCount) |i| {
|
||||||
|
try writer.writeByte(cChars[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Convert a given UTF-8 string to its ASCII equivalent using anyascii.
|
/// Convert a given UTF-8 string to its ASCII equivalent using anyascii.
|
||||||
pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 {
|
pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 {
|
||||||
// Get a UTF8 iterator.
|
// Get a UTF8 iterator.
|
||||||
|
@ -27,11 +39,12 @@ pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 {
|
||||||
var outStr = try std.ArrayList(u8).initCapacity(allocator, str.len | 15);
|
var outStr = try std.ArrayList(u8).initCapacity(allocator, str.len | 15);
|
||||||
defer outStr.deinit();
|
defer outStr.deinit();
|
||||||
|
|
||||||
|
// Get a writer to the array list.
|
||||||
|
const writer = outStr.writer().any();
|
||||||
|
|
||||||
// For each codepoint, convert it to ascii.
|
// For each codepoint, convert it to ascii.
|
||||||
while (iterator.nextCodepoint()) |codepoint| {
|
while (iterator.nextCodepoint()) |codepoint| {
|
||||||
const ascii = try anyascii(allocator, codepoint);
|
try anyasciiWrite(writer, codepoint);
|
||||||
defer allocator.free(ascii);
|
|
||||||
try outStr.appendSlice(ascii); //TODO use a writer to avoid this copy
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the built full ascii equivalent.
|
// Return the built full ascii equivalent.
|
||||||
|
|
Loading…
Reference in a new issue