zlugify/src/lib.zig

const std = @import("std");
const anyascii = @import("anyascii");

/// The values to strip when trimming the string.
const valuesToStrip = " \t\r\n\'\"/\\";

/// Convert the provided string to a slugged version of it.
/// With this function, you can set the separator to use.
pub fn slugifySeparator(allocator: std.mem.Allocator, str: []const u8, separator: u8) ![]u8 {
	// Convert the provided UTF-8 string to ASCII.
	const fullResult = try anyascii.utf8ToAscii(allocator, str);
	const startShift = fullResult.len - std.mem.trimLeft(u8, fullResult, valuesToStrip).len;
	const endShift = fullResult.len - std.mem.trimRight(u8, fullResult, valuesToStrip).len;
	const result = fullResult[startShift..fullResult.len - endShift];

	// Check each char to remove them / replace them by their slugged version if needed.
	var previousIsSeparator = true; // Setting it to true at start forbids the result to start with a separator.
	var shift: usize = 0;
	for (0..result.len, result) |i, char| {
		if (char == ' ' or char == '\t' or char == '\r' or char == '\n' or char == '\'' or char == '"' or char == '/' or char == '\\') {
			// Whitespace-like character: replace it by a dash, or remove it if the previous character is a dash.
			if (!previousIsSeparator) {
				fullResult[i - shift] = separator;
				previousIsSeparator = true;
			} else {
				// To remove the current character, we just shift all future written characters.
				shift += 1;
			}
		} else {
			// In the general case, we keep alphanumeric characters and all the rest is shifted.
			if (std.ascii.isAlphanumeric(char)) {
				// Convert the ASCII character to its lowercased version.
				fullResult[i - shift] = std.ascii.toLower(char);
				previousIsSeparator = false;
			} else {
				shift += 1;
			}
		}
	}

	// If we removed characters, free the remaining unused memory.
	if (shift > 0 or startShift > 0 or endShift > 0) {
		if (!allocator.resize(fullResult, result.len - shift)) {
			// In case of a failed resize, reallocate.
			defer allocator.free(fullResult);
			const resultAlloc = try allocator.alloc(u8, result.len - shift);
			@memcpy(resultAlloc, fullResult[0..result.len - shift]);
			return resultAlloc;
		}
	}

	// Return the result without the shifted characters.
	return fullResult[0..result.len - shift];
}

/// Convert the provided string to a slugged version of it with the default '-' separator.
pub fn slugify(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
	return slugifySeparator(allocator, str, '-');
}

test slugify {
	try testSlugify("this-is-a-test", "   This is a test.\t\n");
	try testSlugify("something-else", "SôMÈThing   \t    ÉLSÈ");
	try testSlugify("slugify-a-string", "𝒔𝒍𝒖𝒈𝒊𝒇𝒚 𝒂 𝒔𝒕𝒓𝒊𝒏𝒈");
	try testSlugify("a", "à ");

	try testSlugify("blosse-shenzhen", "Blöße 深圳");
	try testSlugify("qiyu-xian", "埼玉 県");
	try testSlugify("samt-redia", "სამტრედია");
	try testSlugify("say-x-ag", "⠠⠎⠁⠽⠀⠭⠀⠁⠛");
	try testSlugify("5-x", "☆ ♯ ♰ ⚄ ⛌");
	try testSlugify("no-m-a-s", "№ ℳ ⅋ ⅍");

	try testSlugify("hearts", "♥");
	try testSlugify("hello-fox", "hello 🦊");
	try testSlugify("deja-vu", "  Déjà Vu!  ");
	try testSlugify("toi-yeu-nhung-chu-ky-lan", "tôi yêu những chú kỳ lân");
}
/// Test slugify function.
fn testSlugify(expected: []const u8, toSlugify: []const u8) !void {
	const slug = try slugify(std.testing.allocator, toSlugify);
	defer std.testing.allocator.free(slug);

	try std.testing.expectEqualStrings(expected, slug);
}

test slugifySeparator {
	try testSlugifySeparator("something_else", "SôMÈThing   \t    ÉLSÈ", '_');
}
/// Test slugifySeparator function.
fn testSlugifySeparator(expected: []const u8, toSlugify: []const u8, separator: u8) !void {
	const slug = try slugifySeparator(std.testing.allocator, toSlugify, separator);
	defer std.testing.allocator.free(slug);

	try std.testing.expectEqualStrings(expected, slug);
}
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+								const std = @import("std");
 								const anyascii = @import("anyascii");
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+								/// The values to strip when trimming the string.
 								const valuesToStrip = " \t\r\n\'\"/\\";
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+								/// Convert the provided string to a slugged version of it.
 								/// With this function, you can set the separator to use.
 								pub fn slugifySeparator(allocator: std.mem.Allocator, str: []const u8, separator: u8) ![]u8 {
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+									// Convert the provided UTF-8 string to ASCII.
 									const fullResult = try anyascii.utf8ToAscii(allocator, str);
 									const startShift = fullResult.len - std.mem.trimLeft(u8, fullResult, valuesToStrip).len;
 									const endShift = fullResult.len - std.mem.trimRight(u8, fullResult, valuesToStrip).len;
 									const result = fullResult[startShift..fullResult.len - endShift];
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
 									// Check each char to remove them / replace them by their slugged version if needed.
 									var previousIsSeparator = true; // Setting it to true at start forbids the result to start with a separator.
 									var shift: usize = 0;
 									for (0..result.len, result) |i, char| {
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+										if (char == ' ' or char == '\t' or char == '\r' or char == '\n' or char == '\'' or char == '"' or char == '/' or char == '\\') {
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+											// Whitespace-like character: replace it by a dash, or remove it if the previous character is a dash.
 											if (!previousIsSeparator) {
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+												fullResult[i - shift] = separator;
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+												previousIsSeparator = true;
 											} else {
 												// To remove the current character, we just shift all future written characters.
 												shift += 1;
 											}
 										} else {
 											// In the general case, we keep alphanumeric characters and all the rest is shifted.
 											if (std.ascii.isAlphanumeric(char)) {
 												// Convert the ASCII character to its lowercased version.
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+												fullResult[i - shift] = std.ascii.toLower(char);
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+												previousIsSeparator = false;
 											} else {
 												shift += 1;
 											}
 										}
 									}
 									// If we removed characters, free the remaining unused memory.
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+									if (shift > 0 or startShift > 0 or endShift > 0) {
 										if (!allocator.resize(fullResult, result.len - shift)) {
 											// In case of a failed resize, reallocate.
 											defer allocator.free(fullResult);
 											const resultAlloc = try allocator.alloc(u8, result.len - shift);
 											@memcpy(resultAlloc, fullResult[0..result.len - shift]);
 											return resultAlloc;
 										}
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+									}
 									// Return the result without the shifted characters.
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+									return fullResult[0..result.len - shift];
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+								}
 								/// Convert the provided string to a slugged version of it with the default '-' separator.
 								pub fn slugify(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
 									return slugifySeparator(allocator, str, '-');
 								}
 								test slugify {
 									try testSlugify("this-is-a-test", "   This is a test.\t\n");
 									try testSlugify("something-else", "SôMÈThing   \t    ÉLSÈ");
 									try testSlugify("slugify-a-string", "𝒔𝒍𝒖𝒈𝒊𝒇𝒚 𝒂 𝒔𝒕𝒓𝒊𝒏𝒈");
-												Trimming the UTF-8 encoded string broke encoding in some cases, doing it on the ASCII string.

											
										
										
											2025-01-09 01:37:08 +01:00
+									try testSlugify("a", "à ");
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
 									try testSlugify("blosse-shenzhen", "Blöße 深圳");
 									try testSlugify("qiyu-xian", "埼玉 県");
 									try testSlugify("samt-redia", "სამტრედია");
 									try testSlugify("say-x-ag", "⠠⠎⠁⠽⠀⠭⠀⠁⠛");
 									try testSlugify("5-x", "☆ ♯ ♰ ⚄ ⛌");
 									try testSlugify("no-m-a-s", "№ ℳ ⅋ ⅍");
 									try testSlugify("hearts", "♥");
 									try testSlugify("hello-fox", "hello 🦊");
 									try testSlugify("deja-vu", "  Déjà Vu!  ");
 									try testSlugify("toi-yeu-nhung-chu-ky-lan", "tôi yêu những chú kỳ lân");
 								}
 								/// Test slugify function.
-												Do not publish test functions.

											
										
										
											2025-01-08 23:33:30 +01:00
+								fn testSlugify(expected: []const u8, toSlugify: []const u8) !void {
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+									const slug = try slugify(std.testing.allocator, toSlugify);
 									defer std.testing.allocator.free(slug);
 									try std.testing.expectEqualStrings(expected, slug);
 								}
 								test slugifySeparator {
 									try testSlugifySeparator("something_else", "SôMÈThing   \t    ÉLSÈ", '_');
 								}
 								/// Test slugifySeparator function.
-												Do not publish test functions.

											
										
										
											2025-01-08 23:33:30 +01:00
+								fn testSlugifySeparator(expected: []const u8, toSlugify: []const u8, separator: u8) !void {
-												Initial commit.

+ Add `slugify` and `slugifySeparator` to convert a provided string to a slugged version of it.
+ Add tests.
+ Write a tiny documentation with examples.

											
										
										
											2025-01-08 23:14:17 +01:00
+									const slug = try slugifySeparator(std.testing.allocator, toSlugify, separator);
 									defer std.testing.allocator.free(slug);
 									try std.testing.expectEqualStrings(expected, slug);
 								}