From d7a8db9d9009246f1d5c135db01741cf35d6fd9c Mon Sep 17 00:00:00 2001 From: Madeorsk Date: Wed, 8 Jan 2025 23:14:17 +0100 Subject: [PATCH] Initial commit. + Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it. + Add tests. + Write a tiny documentation with examples. --- .gitignore | 7 ++++ LICENSE | 9 +++++ README.md | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++ build.zig | 32 ++++++++++++++++ build.zig.zon | 18 +++++++++ src/lib.zig | 85 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 253 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 src/lib.zig diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..22ca5a7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +# IntelliJ IDEA +*.iml +.idea/ + +# Zig +.zig-cache/ +zig-out/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1a094ed --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2024 Zeptotech + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f540192 --- /dev/null +++ b/README.md @@ -0,0 +1,102 @@ +

+ zlugify +

+ +

+ Generate ASCII slugs from unicode strings +

+ +zlugify is part of [_zedd_](https://code.zeptotech.net/zedd), a collection of useful libraries for zig. + +## zlugify + +_zlugify_ is a library to generate slugs from all types of UTF-8 encoded strings. It uses [anyascii.zig](https://code.zeptotech.net/zedd/anyascii.zig) to convert UTF-8 encoded strings into ASCII-only strings. + +## Versions + +ZRM 1.0.0 is made and tested with zig 0.13.0. + +## How to use + +### Install + +In your project directory: + +```shell +$ zig fetch --save https://code.zeptotech.net/zedd/zlugify/archive/v1.0.0.tar.gz +``` + +In `build.zig`: + +```zig +// Add zlugify dependency. +const zlugify = b.dependency("zlugify", .{ + .target = target, + .optimize = optimize, +}); +exe.root_module.addImport("zlugify", zlugify.module("zlugify")); +``` + +### Examples + +These examples are highly inspired from the test cases that you can find at the end of [`lib.zig`](https://code.zeptotech.net/zedd/zlugify/src/branch/main/src/lib.zig). + +#### trim and normalize + +```zig +const slugify = @import("zlugify").slugify; + +const slug = try slugify(allocator, " This is a test.\t\n"); +defer allocator.free(slug); +try std.testing.expectEqualStrings("this-is-a-test", slug); +``` + +#### remove diacritics and unnecessary spaces + +```zig +const slugify = @import("zlugify").slugify; + +const slug = try slugify(allocator, "SôMÈThing \t ÉLSÈ"); +defer allocator.free(slug); +try std.testing.expectEqualStrings("something-else", slug); +``` + +#### convert non-latin characters + +```zig +const slugify = @import("zlugify").slugify; + +const slug = try slugify(allocator, "埼玉 県"); +defer allocator.free(slug); +try std.testing.expectEqualStrings("qiyu-xian", slug); +``` + +#### convert ascii-like characters + +```zig +const slugify = @import("zlugify").slugify; + +const slug = try slugify(allocator, "𝒔𝒍𝒖𝒈𝒊𝒇𝒚 𝒂 𝒔𝒕𝒓𝒊𝒏𝒈"); +defer allocator.free(slug); +try std.testing.expectEqualStrings("slugify-a-string", slug); +``` + +#### convert emojis + +```zig +const slugify = @import("zlugify").slugify; + +const slug = try slugify(allocator, "hello 🦊"); +defer allocator.free(slug); +try std.testing.expectEqualStrings("hello-fox", slug); +``` + +#### customized separator + +```zig +const slugifySeparator = @import("zlugify").slugify; + +const slug = try slugifySeparator(allocator, "tôi yêu những chú kỳ lân", '_'); +defer allocator.free(slug); +try std.testing.expectEqualStrings("toi_yeu_nhung_chu_ky_lan", slug); +``` diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..f3809b9 --- /dev/null +++ b/build.zig @@ -0,0 +1,32 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + // Add anyascii.zig dependency. + const anyascii = b.dependency("anyascii.zig", .{ + .target = target, + .optimize = optimize, + }); + + // Zlugify zig module. + const zlugify = b.addModule("zlugify", .{ + .root_source_file = b.path("src/lib.zig"), + .target = target, + .optimize = optimize, + }); + zlugify.addImport("anyascii", anyascii.module("anyascii")); + + // Library unit tests. + const lib_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/lib.zig"), + .target = target, + .optimize = optimize, + }); + lib_unit_tests.root_module.addImport("anyascii", anyascii.module("anyascii")); + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const test_step = b.step("test", "Run unit tests."); + test_step.dependOn(&run_lib_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..0fd4052 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,18 @@ +.{ + .name = "zlugify", + .version = "1.0.0", + + .dependencies = .{ + .@"anyascii.zig" = .{ + .url = "https://code.zeptotech.net/zedd/anyascii.zig/archive/v1.1.1.tar.gz", + .hash = "1220800d403fc841a4c7b9d09ae8759ae28adff05de33836a3f69f02e8e0ac77bae9", + }, + }, + + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + "README.md", + }, +} diff --git a/src/lib.zig b/src/lib.zig new file mode 100644 index 0000000..c9159a1 --- /dev/null +++ b/src/lib.zig @@ -0,0 +1,85 @@ +const std = @import("std"); +const anyascii = @import("anyascii"); + +/// Convert the provided string to a slugged version of it. +/// With this function, you can set the separator to use. +pub fn slugifySeparator(allocator: std.mem.Allocator, str: []const u8, separator: u8) ![]u8 { + // Trim the provided string. + const trimmed = std.mem.trim(u8, str, " \xA0\t\r\n\'\"/\\"); + // Convert UTF-8 string to ASCII. + const result = try anyascii.utf8ToAscii(allocator, trimmed); + + // Check each char to remove them / replace them by their slugged version if needed. + var previousIsSeparator = true; // Setting it to true at start forbids the result to start with a separator. + var shift: usize = 0; + for (0..result.len, result) |i, char| { + if (char == ' ' or char == '\xA0' or char == '\t' or char == '\r' or char == '\n' or char == '\'' or char == '"' or char == '/' or char == '\\') { + // Whitespace-like character: replace it by a dash, or remove it if the previous character is a dash. + if (!previousIsSeparator) { + result[i - shift] = separator; + previousIsSeparator = true; + } else { + // To remove the current character, we just shift all future written characters. + shift += 1; + } + } else { + // In the general case, we keep alphanumeric characters and all the rest is shifted. + if (std.ascii.isAlphanumeric(char)) { + // Convert the ASCII character to its lowercased version. + result[i - shift] = std.ascii.toLower(char); + previousIsSeparator = false; + } else { + shift += 1; + } + } + } + + // If we removed characters, free the remaining unused memory. + if (shift > 0) { + _ = allocator.resize(result, result.len - shift); + } + + // Return the result without the shifted characters. + return result[0..result.len - shift]; +} + +/// Convert the provided string to a slugged version of it with the default '-' separator. +pub fn slugify(allocator: std.mem.Allocator, str: []const u8) ![]u8 { + return slugifySeparator(allocator, str, '-'); +} + +test slugify { + try testSlugify("this-is-a-test", " This is a test.\t\n"); + try testSlugify("something-else", "SôMÈThing \t ÉLSÈ"); + try testSlugify("slugify-a-string", "𝒔𝒍𝒖𝒈𝒊𝒇𝒚 𝒂 𝒔𝒕𝒓𝒊𝒏𝒈"); + + try testSlugify("blosse-shenzhen", "Blöße 深圳"); + try testSlugify("qiyu-xian", "埼玉 県"); + try testSlugify("samt-redia", "სამტრედია"); + try testSlugify("say-x-ag", "⠠⠎⠁⠽⠀⠭⠀⠁⠛"); + try testSlugify("5-x", "☆ ♯ ♰ ⚄ ⛌"); + try testSlugify("no-m-a-s", "№ ℳ ⅋ ⅍"); + + try testSlugify("hearts", "♥"); + try testSlugify("hello-fox", "hello 🦊"); + try testSlugify("deja-vu", " Déjà Vu! "); + try testSlugify("toi-yeu-nhung-chu-ky-lan", "tôi yêu những chú kỳ lân"); +} +/// Test slugify function. +pub fn testSlugify(expected: []const u8, toSlugify: []const u8) !void { + const slug = try slugify(std.testing.allocator, toSlugify); + defer std.testing.allocator.free(slug); + + try std.testing.expectEqualStrings(expected, slug); +} + +test slugifySeparator { + try testSlugifySeparator("something_else", "SôMÈThing \t ÉLSÈ", '_'); +} +/// Test slugifySeparator function. +pub fn testSlugifySeparator(expected: []const u8, toSlugify: []const u8, separator: u8) !void { + const slug = try slugifySeparator(std.testing.allocator, toSlugify, separator); + defer std.testing.allocator.free(slug); + + try std.testing.expectEqualStrings(expected, slug); +}