Anonymize your writing style. Zig WASM engine detects authorship markers, fine-tuned LLM rewrites to remove them. Runs entirely in-browser. fantasma.qstorage.quilibrium.com/
wasm privacy qwen zig
at main 76 lines 2.8 kB view raw
1const std = @import("std"); 2 3/// Decode one UTF-8 codepoint, returning the codepoint and byte length. 4pub fn decodeUtf8(bytes: []const u8) struct { codepoint: u21, len: u3 } { 5 if (bytes.len == 0) return .{ .codepoint = 0, .len = 0 }; 6 const b0 = bytes[0]; 7 if (b0 < 0x80) return .{ .codepoint = b0, .len = 1 }; 8 if (b0 & 0xE0 == 0xC0 and bytes.len >= 2) 9 return .{ .codepoint = (@as(u21, b0 & 0x1F) << 6) | (bytes[1] & 0x3F), .len = 2 }; 10 if (b0 & 0xF0 == 0xE0 and bytes.len >= 3) 11 return .{ .codepoint = (@as(u21, b0 & 0x0F) << 12) | (@as(u21, bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F), .len = 3 }; 12 if (b0 & 0xF8 == 0xF0 and bytes.len >= 4) 13 return .{ .codepoint = (@as(u21, b0 & 0x07) << 18) | (@as(u21, bytes[1] & 0x3F) << 12) | (@as(u21, bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F), .len = 4 }; 14 return .{ .codepoint = 0xFFFD, .len = 1 }; // replacement char 15} 16 17/// Check if a codepoint is an ASCII letter. 18pub fn isAlpha(cp: u21) bool { 19 return (cp >= 'a' and cp <= 'z') or (cp >= 'A' and cp <= 'Z'); 20} 21 22pub fn isUpper(cp: u21) bool { 23 return cp >= 'A' and cp <= 'Z'; 24} 25 26pub fn toLower(cp: u21) u21 { 27 if (cp >= 'A' and cp <= 'Z') return cp + 32; 28 return cp; 29} 30 31pub fn isDigit(cp: u21) bool { 32 return cp >= '0' and cp <= '9'; 33} 34 35pub fn isWhitespace(cp: u21) bool { 36 return cp == ' ' or cp == '\t' or cp == '\n' or cp == '\r' or cp == 0x0C; 37} 38 39/// Check if byte slice starts with a Latin character with diacritics (common in ES/FR/DE). 40pub fn isLatinExtended(cp: u21) bool { 41 return (cp >= 0xC0 and cp <= 0x024F); 42} 43 44/// Detect language from text using character frequency heuristics. 45/// Returns "es" for Spanish, "en" for English (default). 46pub fn detectLanguage(text: []const u8) []const u8 { 47 var n_tilde: u32 = 0; // n with tilde 48 var inverted_punct: u32 = 0; // inverted ? and ! 49 var total_alpha: u32 = 0; 50 51 var i: usize = 0; 52 while (i < text.len) { 53 const dec = decodeUtf8(text[i..]); 54 if (dec.len == 0) break; 55 const cp = dec.codepoint; 56 57 if (isAlpha(cp) or isLatinExtended(cp)) total_alpha += 1; 58 if (cp == 0xF1 or cp == 0xD1) n_tilde += 1; // n tilde / N tilde 59 if (cp == 0xBF or cp == 0xA1) inverted_punct += 1; // inverted ? / ! 60 61 i += dec.len; 62 } 63 64 if (total_alpha == 0) return "en"; 65 66 // Spanish heuristic: presence of n-tilde or inverted punctuation 67 if (n_tilde > 0 or inverted_punct > 0) return "es"; 68 69 return "en"; 70} 71 72test "detectLanguage basics" { 73 try std.testing.expectEqualStrings("en", detectLanguage("Hello world, this is a test.")); 74 try std.testing.expectEqualStrings("es", detectLanguage("El ni\xc3\xb1o corri\xc3\xb3 r\xc3\xa1pido.")); // niño corrió rápido 75 try std.testing.expectEqualStrings("es", detectLanguage("\xc2\xbfComo estas?")); // ¿Como estas? 76}