anyasciiWrite with zig writer to avoid unnecessary allocations.

+ Add `anyasciiWrite` which writes to a zig writer directly to avoid unnecessary allocations in some use cases.
+ Use `anyasciiWrite` utf8ToAscii to avoid allocation duplicates in the previous implementation.
This commit is contained in:
Madeorsk 2025-01-08 22:14:17 +01:00
parent 95f2130a13
commit eb85754ece
Signed by: Madeorsk
GPG key ID: 677E51CA765BB79F

View file

@ -18,6 +18,18 @@ pub fn anyascii(allocator: std.mem.Allocator, codepoint: u21) ![]const u8 {
return result;
}
/// Convert a unicode codepoint to its ascii equivalent, in the provided writer.
pub fn anyasciiWrite(writer: std.io.AnyWriter, codepoint: u21) !void {
// Call C anyascii function.
var cChars: [*]u8 = undefined;
const charsCount = c.anyascii(codepoint, @ptrCast(&cChars));
// Write every byte from the raw C pointer.
for (0..charsCount) |i| {
try writer.writeByte(cChars[i]);
}
}
/// Convert a given UTF-8 string to its ASCII equivalent using anyascii.
pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 {
// Get a UTF8 iterator.
@ -27,11 +39,12 @@ pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 {
var outStr = try std.ArrayList(u8).initCapacity(allocator, str.len | 15);
defer outStr.deinit();
// Get a writer to the array list.
const writer = outStr.writer().any();
// For each codepoint, convert it to ascii.
while (iterator.nextCodepoint()) |codepoint| {
const ascii = try anyascii(allocator, codepoint);
defer allocator.free(ascii);
try outStr.appendSlice(ascii); //TODO use a writer to avoid this copy
try anyasciiWrite(writer, codepoint);
}
// Return the built full ascii equivalent.