Anonymize your writing style. Zig WASM engine detects authorship markers, fine-tuned LLM rewrites to remove them. Runs entirely in-browser.
fantasma.qstorage.quilibrium.com/
wasm
privacy
qwen
zig
1const std = @import("std");
2
3/// Decode one UTF-8 codepoint, returning the codepoint and byte length.
4pub fn decodeUtf8(bytes: []const u8) struct { codepoint: u21, len: u3 } {
5 if (bytes.len == 0) return .{ .codepoint = 0, .len = 0 };
6 const b0 = bytes[0];
7 if (b0 < 0x80) return .{ .codepoint = b0, .len = 1 };
8 if (b0 & 0xE0 == 0xC0 and bytes.len >= 2)
9 return .{ .codepoint = (@as(u21, b0 & 0x1F) << 6) | (bytes[1] & 0x3F), .len = 2 };
10 if (b0 & 0xF0 == 0xE0 and bytes.len >= 3)
11 return .{ .codepoint = (@as(u21, b0 & 0x0F) << 12) | (@as(u21, bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F), .len = 3 };
12 if (b0 & 0xF8 == 0xF0 and bytes.len >= 4)
13 return .{ .codepoint = (@as(u21, b0 & 0x07) << 18) | (@as(u21, bytes[1] & 0x3F) << 12) | (@as(u21, bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F), .len = 4 };
14 return .{ .codepoint = 0xFFFD, .len = 1 }; // replacement char
15}
16
17/// Check if a codepoint is an ASCII letter.
18pub fn isAlpha(cp: u21) bool {
19 return (cp >= 'a' and cp <= 'z') or (cp >= 'A' and cp <= 'Z');
20}
21
22pub fn isUpper(cp: u21) bool {
23 return cp >= 'A' and cp <= 'Z';
24}
25
26pub fn toLower(cp: u21) u21 {
27 if (cp >= 'A' and cp <= 'Z') return cp + 32;
28 return cp;
29}
30
31pub fn isDigit(cp: u21) bool {
32 return cp >= '0' and cp <= '9';
33}
34
35pub fn isWhitespace(cp: u21) bool {
36 return cp == ' ' or cp == '\t' or cp == '\n' or cp == '\r' or cp == 0x0C;
37}
38
39/// Check if byte slice starts with a Latin character with diacritics (common in ES/FR/DE).
40pub fn isLatinExtended(cp: u21) bool {
41 return (cp >= 0xC0 and cp <= 0x024F);
42}
43
44/// Detect language from text using character frequency heuristics.
45/// Returns "es" for Spanish, "en" for English (default).
46pub fn detectLanguage(text: []const u8) []const u8 {
47 var n_tilde: u32 = 0; // n with tilde
48 var inverted_punct: u32 = 0; // inverted ? and !
49 var total_alpha: u32 = 0;
50
51 var i: usize = 0;
52 while (i < text.len) {
53 const dec = decodeUtf8(text[i..]);
54 if (dec.len == 0) break;
55 const cp = dec.codepoint;
56
57 if (isAlpha(cp) or isLatinExtended(cp)) total_alpha += 1;
58 if (cp == 0xF1 or cp == 0xD1) n_tilde += 1; // n tilde / N tilde
59 if (cp == 0xBF or cp == 0xA1) inverted_punct += 1; // inverted ? / !
60
61 i += dec.len;
62 }
63
64 if (total_alpha == 0) return "en";
65
66 // Spanish heuristic: presence of n-tilde or inverted punctuation
67 if (n_tilde > 0 or inverted_punct > 0) return "es";
68
69 return "en";
70}
71
72test "detectLanguage basics" {
73 try std.testing.expectEqualStrings("en", detectLanguage("Hello world, this is a test."));
74 try std.testing.expectEqualStrings("es", detectLanguage("El ni\xc3\xb1o corri\xc3\xb3 r\xc3\xa1pido.")); // niño corrió rápido
75 try std.testing.expectEqualStrings("es", detectLanguage("\xc2\xbfComo estas?")); // ¿Como estas?
76}