anyasciiWrite with zig writer to avoid unnecessary allocations.

+ Add `anyasciiWrite` which writes to a zig writer directly to avoid unnecessary allocations in some use cases.
+ Use `anyasciiWrite` utf8ToAscii to avoid allocation duplicates in the previous implementation.
This commit is contained in:
Madeorsk 2025-01-08 22:14:17 +01:00
parent 95f2130a13
commit eb85754ece
Signed by: Madeorsk
GPG key ID: 677E51CA765BB79F

View file

@ -18,20 +18,33 @@ pub fn anyascii(allocator: std.mem.Allocator, codepoint: u21) ![]const u8 {
return result; return result;
} }
/// Convert a unicode codepoint to its ascii equivalent, in the provided writer.
pub fn anyasciiWrite(writer: std.io.AnyWriter, codepoint: u21) !void {
// Call C anyascii function.
var cChars: [*]u8 = undefined;
const charsCount = c.anyascii(codepoint, @ptrCast(&cChars));
// Write every byte from the raw C pointer.
for (0..charsCount) |i| {
try writer.writeByte(cChars[i]);
}
}
/// Convert a given UTF-8 string to its ASCII equivalent using anyascii. /// Convert a given UTF-8 string to its ASCII equivalent using anyascii.
pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 { pub fn utf8ToAscii(allocator: std.mem.Allocator, str: []const u8) ![]const u8 {
// Get a UTF8 iterator. // Get a UTF8 iterator.
var iterator = (try std.unicode.Utf8View.init(str)).iterator(); var iterator = (try std.unicode.Utf8View.init(str)).iterator();
// Initialize a out string arraylist where ascii equivalents will be appended. // Initialize a out string array list where ascii equivalents will be appended.
var outStr = try std.ArrayList(u8).initCapacity(allocator, str.len | 15); var outStr = try std.ArrayList(u8).initCapacity(allocator, str.len | 15);
defer outStr.deinit(); defer outStr.deinit();
// Get a writer to the array list.
const writer = outStr.writer().any();
// For each codepoint, convert it to ascii. // For each codepoint, convert it to ascii.
while (iterator.nextCodepoint()) |codepoint| { while (iterator.nextCodepoint()) |codepoint| {
const ascii = try anyascii(allocator, codepoint); try anyasciiWrite(writer, codepoint);
defer allocator.free(ascii);
try outStr.appendSlice(ascii); //TODO use a writer to avoid this copy
} }
// Return the built full ascii equivalent. // Return the built full ascii equivalent.