this repo has no description
at main 4.9 kB view raw
1/** 2 * Created by dabolfathi on 4/26/17. 3 */ 4/** 5 * Returns a single string, which is the result of joining together all 6 * the strings in the list with the provided separator. 7 * @param strings The list of strings to be joined. 8 * @param separator The separator to use when joining them together. 9 * @returns {string} The joined string. 10 */ 11export function join(strings, separator) { 12 if (strings == null || separator == null) { 13 return null; 14 } 15 if (strings.length === 0) { 16 return ""; 17 } 18 let stringCount = strings.length; 19 let joinedString = ""; 20 strings.forEach((element, index) => { 21 if (element === null) { 22 stringCount -= 1; 23 } 24 else { 25 joinedString += element; 26 if (index < stringCount - 1) { 27 joinedString += separator; 28 } 29 } 30 }); 31 return joinedString; 32} 33/** 34 * Generate a normalized string for robust multilingual search that properly handles 35 * CJK (Chinese, Japanese, Korean), Arabic, Cyrillic, and Latin-based scripts. 36 * Preserves Unicode characters while normalizing diacritics and case appropriately. 37 * 38 * @param input The input string to normalize 39 * @returns A normalized string suitable for search matching 40 */ 41export function normalizeForSearch(input) { 42 if (!input) { 43 return ""; 44 } 45 try { 46 // Remove special characters 47 // \u2122 -> (Trade Mark Sign) 48 // \u2120 -> (Service Mark) 49 // \u03a9 -> (Greek Capital Letter Omega) 50 // \u00a9 -> (Copyright Sign) 51 // \u00ae -> (Registered Sign) 52 // \u30fc -> (Katakana-Hiragana Prolonged Sound Mark) 53 // \u03c9 -> (Greek Small Letter Omega) 54 const removedSpecialUnicodesRegex = /[\u2122\u2120\u03a9\u00a9\u00ae\u30fc\u03c9]/g; 55 return (input 56 .toLowerCase() // Case insensitivity 57 .replace(removedSpecialUnicodesRegex, "") 58 // Apply normalization only to Latin characters to preserve CJK integrity 59 // Handle Latin characters safely with basic ranges A-Za-z 60 // \u00C0-\u00FF - Latin-1 Supplement (Upper Half) 61 // \u0100-\u017F - Latin Extended-A (includes dotless i \u0131) 62 // \u0180-\u024F - Latin Extended-B 63 // \u1E00-\u1EFF - Latin Extended Additional 64 .replace(/[A-Za-z\u00A0-\u00FF\u0100-\u017F\u0180-\u024F\u1E00-\u1EFF\p{Diacritic}]+/gu, (latinText) => { 65 return (latinText 66 .normalize("NFKD") 67 .replace(/\p{Diacritic}/gu, "") 68 // Convert specific characters that don't match basic Latin 69 .replace(/\u0131/g, "i") // Convert dotless i to regular i 70 // Keep only actual Latin characters and numbers after normalization 71 .replace(/[^A-Za-z0-9]/g, "")); 72 }) 73 // Remove punctuation, symbols, and control characters but preserve: 74 // - Letters from all writing systems (\p{L}) 75 // - Numbers (\p{N}) 76 // - Whitespace (\s) 77 // - Underscores (_) 78 .replace(/[^\p{L}\p{N}\s_]/gu, "") 79 // Normalize multiple whitespace to single spaces 80 .replace(/\s+/g, " ") 81 // Trim leading/trailing whitespace 82 .trim()); 83 } 84 catch (error) { 85 // Fallback: use basic character classes 86 return (input 87 .toLowerCase() 88 // Remove punctuation, symbols, and control characters but preserve: 89 // - Letters from all writing systems (\p{L}) 90 // - Numbers (\p{N}) 91 // - Whitespace (\s) 92 // - Underscores (_) 93 .replace(/[^\p{L}\p{N}\s_]/gu, "") 94 // Normalize multiple whitespace to single spaces 95 .replace(/\s+/g, " ") 96 // Trim leading/trailing whitespace 97 .trim()); 98 } 99} 100/** 101 * Whether or not the input string is contains search term using normalized string which comparing using case insensitive, locale insensitive and ignore all special characters. 102 * 103 * @param input 104 * @param normalizedTerm 105 * @returns 106 */ 107export function containsSearchTerm(input, normalizedTerm) { 108 const normalizedInput = normalizeForSearch(input); 109 return normalizedInput.includes(normalizedTerm); 110} 111/** 112 * Wraps a string with bidirectional isolate characters to ensure proper text direction handling. 113 * This is particularly useful for user-generated content like display names that may contain 114 * mixed left-to-right and right-to-left text. 115 * 116 * @param text The string to wrap with bidi isolates 117 * @returns The string wrapped with Left-to-Right Isolate (U+2066) and Pop Directional Isolate (U+2069) 118 */ 119export function withBidiIsolates(text) { 120 if (!text) { 121 return text; 122 } 123 // U+2068: First Strong Isolate 124 // U+2069: Pop Directional Isolate 125 return "\u2068" + text + "\u2069"; 126} 127//# sourceMappingURL=string-util.js.map