Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.
This commit is contained in:
Madeorsk 2025-01-08 23:14:17 +01:00
commit 32128023cc
Signed by: Madeorsk
GPG key ID: 677E51CA765BB79F
6 changed files with 253 additions and 0 deletions

7
.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
# IntelliJ IDEA
*.iml
.idea/
# Zig
.zig-cache/
zig-out/

9
LICENSE Normal file
View file

@ -0,0 +1,9 @@
MIT License
Copyright (c) 2024 Zeptotech
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

102
README.md Normal file
View file

@ -0,0 +1,102 @@
<h1 align="center">
zlugify
</h1>
<p align="center">
Generate ASCII slugs from unicode strings
</p>
zlugify is part of [_zedd_](https://code.zeptotech.net/zedd), a collection of useful libraries for zig.
## zlugify
_zlugify_ is a library to generate slugs from all types of UTF-8 encoded strings. It uses [anyascii.zig](https://code.zeptotech.net/zedd/anyascii.zig) to convert UTF-8 encoded strings into ASCII-only strings.
## Versions
ZRM 1.0.0 is made and tested with zig 0.13.0.
## How to use
### Install
In your project directory:
```shell
$ zig fetch --save https://code.zeptotech.net/zedd/zlugify/archive/v1.0.0.tar.gz
```
In `build.zig`:
```zig
// Add zlugify dependency.
const zlugify = b.dependency("zlugify", .{
.target = target,
.optimize = optimize,
});
exe.root_module.addImport("zlugify", zlugify.module("zlugify"));
```
### Examples
These examples are highly inspired from the test cases that you can find at the end of [`lib.zig`](https://code.zeptotech.net/zedd/zlugify/src/branch/main/src/lib.zig).
#### trim and normalize
```zig
const slugify = @import("zlugify").slugify;
const slug = try slugify(allocator, " This is a test.\t\n");
defer allocator.free(slug);
try std.testing.expectEqualStrings("this-is-a-test", slug);
```
#### remove diacritics and unnecessary spaces
```zig
const slugify = @import("zlugify").slugify;
const slug = try slugify(allocator, "SôMÈThing \t ÉLSÈ");
defer allocator.free(slug);
try std.testing.expectEqualStrings("something-else", slug);
```
#### convert non-latin characters
```zig
const slugify = @import("zlugify").slugify;
const slug = try slugify(allocator, "埼玉 県");
defer allocator.free(slug);
try std.testing.expectEqualStrings("qiyu-xian", slug);
```
#### convert ascii-like characters
```zig
const slugify = @import("zlugify").slugify;
const slug = try slugify(allocator, "𝒔𝒍𝒖𝒈𝒊𝒇𝒚 𝒂 𝒔𝒕𝒓𝒊𝒏𝒈");
defer allocator.free(slug);
try std.testing.expectEqualStrings("slugify-a-string", slug);
```
#### convert emojis
```zig
const slugify = @import("zlugify").slugify;
const slug = try slugify(allocator, "hello 🦊");
defer allocator.free(slug);
try std.testing.expectEqualStrings("hello-fox", slug);
```
#### customized separator
```zig
const slugifySeparator = @import("zlugify").slugify;
const slug = try slugifySeparator(allocator, "tôi yêu những chú kỳ lân", '_');
defer allocator.free(slug);
try std.testing.expectEqualStrings("toi_yeu_nhung_chu_ky_lan", slug);
```

32
build.zig Normal file
View file

@ -0,0 +1,32 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
// Add anyascii.zig dependency.
const anyascii = b.dependency("anyascii.zig", .{
.target = target,
.optimize = optimize,
});
// Zlugify zig module.
const zlugify = b.addModule("zlugify", .{
.root_source_file = b.path("src/lib.zig"),
.target = target,
.optimize = optimize,
});
zlugify.addImport("anyascii", anyascii.module("anyascii"));
// Library unit tests.
const lib_unit_tests = b.addTest(.{
.root_source_file = b.path("src/lib.zig"),
.target = target,
.optimize = optimize,
});
lib_unit_tests.root_module.addImport("anyascii", anyascii.module("anyascii"));
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
const test_step = b.step("test", "Run unit tests.");
test_step.dependOn(&run_lib_unit_tests.step);
}

18
build.zig.zon Normal file
View file

@ -0,0 +1,18 @@
.{
.name = "zlugify",
.version = "1.0.0",
.dependencies = .{
.@"anyascii.zig" = .{
.url = "https://code.zeptotech.net/zedd/anyascii.zig/archive/v1.1.1.tar.gz",
.hash = "1220800d403fc841a4c7b9d09ae8759ae28adff05de33836a3f69f02e8e0ac77bae9",
},
},
.paths = .{
"build.zig",
"build.zig.zon",
"src",
"README.md",
},
}

85
src/lib.zig Normal file
View file

@ -0,0 +1,85 @@
const std = @import("std");
const anyascii = @import("anyascii");
/// Convert the provided string to a slugged version of it.
/// With this function, you can set the separator to use.
pub fn slugifySeparator(allocator: std.mem.Allocator, str: []const u8, separator: u8) ![]u8 {
// Trim the provided string.
const trimmed = std.mem.trim(u8, str, " \xA0\t\r\n\'\"/\\");
// Convert UTF-8 string to ASCII.
const result = try anyascii.utf8ToAscii(allocator, trimmed);
// Check each char to remove them / replace them by their slugged version if needed.
var previousIsSeparator = true; // Setting it to true at start forbids the result to start with a separator.
var shift: usize = 0;
for (0..result.len, result) |i, char| {
if (char == ' ' or char == '\xA0' or char == '\t' or char == '\r' or char == '\n' or char == '\'' or char == '"' or char == '/' or char == '\\') {
// Whitespace-like character: replace it by a dash, or remove it if the previous character is a dash.
if (!previousIsSeparator) {
result[i - shift] = separator;
previousIsSeparator = true;
} else {
// To remove the current character, we just shift all future written characters.
shift += 1;
}
} else {
// In the general case, we keep alphanumeric characters and all the rest is shifted.
if (std.ascii.isAlphanumeric(char)) {
// Convert the ASCII character to its lowercased version.
result[i - shift] = std.ascii.toLower(char);
previousIsSeparator = false;
} else {
shift += 1;
}
}
}
// If we removed characters, free the remaining unused memory.
if (shift > 0) {
_ = allocator.resize(result, result.len - shift);
}
// Return the result without the shifted characters.
return result[0..result.len - shift];
}
/// Convert the provided string to a slugged version of it with the default '-' separator.
pub fn slugify(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
return slugifySeparator(allocator, str, '-');
}
test slugify {
try testSlugify("this-is-a-test", " This is a test.\t\n");
try testSlugify("something-else", "SôMÈThing \t ÉLSÈ");
try testSlugify("slugify-a-string", "𝒔𝒍𝒖𝒈𝒊𝒇𝒚 𝒂 𝒔𝒕𝒓𝒊𝒏𝒈");
try testSlugify("blosse-shenzhen", "Blöße 深圳");
try testSlugify("qiyu-xian", "埼玉 県");
try testSlugify("samt-redia", "სამტრედია");
try testSlugify("say-x-ag", "⠠⠎⠁⠽⠀⠭⠀⠁⠛");
try testSlugify("5-x", "☆ ♯ ♰ ⚄ ⛌");
try testSlugify("no-m-a-s", " ⅋ ⅍");
try testSlugify("hearts", "");
try testSlugify("hello-fox", "hello 🦊");
try testSlugify("deja-vu", " Déjà Vu! ");
try testSlugify("toi-yeu-nhung-chu-ky-lan", "tôi yêu những chú kỳ lân");
}
/// Test slugify function.
pub fn testSlugify(expected: []const u8, toSlugify: []const u8) !void {
const slug = try slugify(std.testing.allocator, toSlugify);
defer std.testing.allocator.free(slug);
try std.testing.expectEqualStrings(expected, slug);
}
test slugifySeparator {
try testSlugifySeparator("something_else", "SôMÈThing \t ÉLSÈ", '_');
}
/// Test slugifySeparator function.
pub fn testSlugifySeparator(expected: []const u8, toSlugify: []const u8, separator: u8) !void {
const slug = try slugifySeparator(std.testing.allocator, toSlugify, separator);
defer std.testing.allocator.free(slug);
try std.testing.expectEqualStrings(expected, slug);
}