A game about forced loneliness, made by TACStudios
at master 338 lines 20 kB view raw
1using Unity.Collections.LowLevel.Unsafe; 2 3namespace Unity.Collections 4{ 5 /// <summary> 6 /// Provides methods for copying and encoding Unicode text. 7 /// </summary> 8 [GenerateTestsForBurstCompatibility] 9 public static unsafe class UTF8ArrayUnsafeUtility 10 { 11 12 /// <summary> 13 /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8. 14 /// </summary> 15 /// <remarks>Assumes the source data is valid UCS-2.</remarks> 16 /// <param name="src">The source buffer for reading UCS-2.</param> 17 /// <param name="srcLength">The number of chars to read from the source.</param> 18 /// <param name="dest">The destination buffer for writing UTF-8.</param> 19 /// <param name="destLength">Outputs the number of bytes written to the destination.</param> 20 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param> 21 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 22 public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, char *src, int srcLength) 23 { 24 var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out destLength, destUTF8MaxLengthInBytes); 25 if (error == ConversionError.None) 26 return CopyError.None; 27 return CopyError.Truncation; 28 } 29 30 /// <summary> 31 /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8. 32 /// </summary> 33 /// <remarks>Assumes the source data is valid UCS-2.</remarks> 34 /// <param name="src">The source buffer for reading UCS-2.</param> 35 /// <param name="srcLength">The number of chars to read from the source.</param> 36 /// <param name="dest">The destination buffer for writing UTF-8.</param> 37 /// <param name="destLength">Outputs the number of bytes written to the destination.</param> 38 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param> 39 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 40 public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength) 41 { 42 var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes); 43 destLength = (ushort)temp; 44 if (error == ConversionError.None) 45 return CopyError.None; 46 return CopyError.Truncation; 47 } 48 49 /// <summary> 50 /// Copies a buffer of UCS-8 text. 51 /// </summary> 52 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 53 /// <param name="src">The source buffer.</param> 54 /// <param name="srcLength">The number of chars to read from the source.</param> 55 /// <param name="dest">The destination buffer.</param> 56 /// <param name="destLength">Outputs the number of bytes written to the destination.</param> 57 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param> 58 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 59 public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, byte *src, int srcLength) 60 { 61 var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes); 62 destLength = temp; 63 if (error == ConversionError.None) 64 return CopyError.None; 65 return CopyError.Truncation; 66 } 67 68 /// <summary> 69 /// Copies a buffer of UCS-8 text. 70 /// </summary> 71 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 72 /// <param name="src">The source buffer.</param> 73 /// <param name="srcLength">The number of chars to read from the source.</param> 74 /// <param name="dest">The destination buffer.</param> 75 /// <param name="destLength">Outputs the number of bytes written to the destination.</param> 76 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param> 77 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 78 public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength) 79 { 80 var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes); 81 destLength = (ushort)temp; 82 if (error == ConversionError.None) 83 return CopyError.None; 84 return CopyError.Truncation; 85 } 86 87 /// <summary> 88 /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2. 89 /// </summary> 90 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 91 /// <param name="src">The source buffer for reading UTF-8.</param> 92 /// <param name="srcLength">The number of bytes to read from the source.</param> 93 /// <param name="dest">The destination buffer for writing UCS-2.</param> 94 /// <param name="destLength">Outputs the number of chars written to the destination.</param> 95 /// <param name="destUCS2MaxLengthInChars">The max number of chars that will be written to the destination buffer.</param> 96 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 97 public static CopyError Copy(char *dest, out int destLength, int destUCS2MaxLengthInChars, byte *src, int srcLength) 98 { 99 if (ConversionError.None == Unicode.Utf8ToUtf16(src, srcLength, dest, out destLength, destUCS2MaxLengthInChars)) 100 return CopyError.None; 101 return CopyError.Truncation; 102 } 103 104 /// <summary> 105 /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2. 106 /// </summary> 107 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 108 /// <param name="src">The source buffer for reading UTF-8.</param> 109 /// <param name="srcLength">The number of bytes to read from the source.</param> 110 /// <param name="dest">The destination buffer for writing UCS-2.</param> 111 /// <param name="destLength">Outputs the number of chars written to the destination.</param> 112 /// <param name="destUCS2MaxLengthInChars">The max number of chars that will be written to the destination buffer.</param> 113 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 114 public static CopyError Copy(char *dest, out ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength) 115 { 116 var error = Unicode.Utf8ToUtf16(src, srcLength, dest, out var temp, destUCS2MaxLengthInChars); 117 destLength = (ushort)temp; 118 if (error == ConversionError.None) 119 return CopyError.None; 120 return CopyError.Truncation; 121 } 122 123 /// <summary> 124 /// Appends UTF-8 text to a buffer. 125 /// </summary> 126 /// <remarks>Assumes the source data is valid UTF-8. 127 /// 128 /// No data will be copied if the destination has insufficient capacity for the full append, *i.e.* if `srcLength > (destCapacity - destLength)`. 129 /// </remarks> 130 /// <param name="src">The source buffer.</param> 131 /// <param name="srcLength">The number of bytes to read from the source.</param> 132 /// <param name="dest">The destination buffer.</param> 133 /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the new length *after* the append.</param> 134 /// <param name="destCapacity">The destination buffer capacity in bytes.</param> 135 /// <returns><see cref="FormatError.None"/> if the append fully completes. Otherwise, returns <see cref="FormatError.Overflow"/>.</returns> 136 public static FormatError AppendUTF8Bytes(byte* dest, ref int destLength, int destCapacity, byte* src, int srcLength) 137 { 138 if (destLength + srcLength > destCapacity) 139 return FormatError.Overflow; 140 UnsafeUtility.MemCpy(dest + destLength, src, srcLength); 141 destLength += srcLength; 142 return FormatError.None; 143 } 144 145 /// <summary> 146 /// Appends UTF-8 text to a buffer. 147 /// </summary> 148 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 149 /// <param name="src">The source buffer.</param> 150 /// <param name="srcLength">The number of bytes to read from the source.</param> 151 /// <param name="dest">The destination buffer.</param> 152 /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended.</param> 153 /// <param name="destUTF8MaxLengthInBytes">The destination buffer's length in bytes. Data will not be appended past this length.</param> 154 /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 155 public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength) 156 { 157 var error = Unicode.Utf8ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength); 158 destLength += (ushort)temp; 159 if (error == ConversionError.None) 160 return CopyError.None; 161 return CopyError.Truncation; 162 } 163 164 /// <summary> 165 /// Appends UCS-2 text to a buffer, encoded as UTF-8. 166 /// </summary> 167 /// <remarks>Assumes the source data is valid UCS-2.</remarks> 168 /// <param name="src">The source buffer.</param> 169 /// <param name="srcLength">The number of chars to read from the source.</param> 170 /// <param name="dest">The destination buffer.</param> 171 /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended.</param> 172 /// <param name="destUTF8MaxLengthInBytes">The destination buffer's length in bytes. Data will not be appended past this length.</param> 173 /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 174 public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength) 175 { 176 var error = Unicode.Utf16ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength); 177 destLength += (ushort)temp; 178 if (error == ConversionError.None) 179 return CopyError.None; 180 return CopyError.Truncation; 181 } 182 183 /// <summary> 184 /// Appends UTF-8 text to a buffer, encoded as UCS-2. 185 /// </summary> 186 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 187 /// <param name="src">The source buffer.</param> 188 /// <param name="srcLength">The number of bytes to read from the source.</param> 189 /// <param name="dest">The destination buffer.</param> 190 /// <param name="destLength">Reference to the destination buffer's length in chars *before* the append. Will be assigned the number of chars appended.</param> 191 /// <param name="destUCS2MaxLengthInChars">The destination buffer's length in chars. Data will not be appended past this length.</param> 192 /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns> 193 public static CopyError Append(char *dest, ref ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength) 194 { 195 var error = Unicode.Utf8ToUtf16(src, srcLength, dest + destLength, out var temp, destUCS2MaxLengthInChars - destLength); 196 destLength += (ushort)temp; 197 if (error == ConversionError.None) 198 return CopyError.None; 199 return CopyError.Truncation; 200 } 201 202 internal struct Comparison 203 { 204 public bool terminates; 205 public int result; 206 public Comparison(Unicode.Rune runeA, ConversionError errorA, Unicode.Rune runeB, ConversionError errorB) 207 { 208 if(errorA != ConversionError.None) 209 runeA.value = 0; 210 if(errorB != ConversionError.None) 211 runeB.value = 0; 212 if(runeA.value != runeB.value) 213 { 214 result = runeA.value - runeB.value; 215 terminates = true; 216 } 217 else 218 { 219 result = 0; 220 terminates = (runeA.value == 0 && runeB.value == 0); 221 } 222 } 223 } 224 225 /// <summary>Compares two UTF-8 buffers for relative equality.</summary> 226 /// <param name="utf8BufferA">The first buffer of UTF-8 text.</param> 227 /// <param name="utf8LengthInBytesA">The length in bytes of the first UTF-8 buffer.</param> 228 /// <param name="utf8BufferB">The second buffer of UTF-8 text.</param> 229 /// <param name="utf8LengthInBytesB">The length in bytes of the second UTF-8 buffer.</param> 230 /// <returns> 231 /// Less than zero if first different code point is less in the first UTF-8 buffer. 232 /// Zero if the strings are identical. 233 /// More than zero if first different code point is less in the second UTF-8 buffer. 234 /// </returns> 235 public static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, byte* utf8BufferB, int utf8LengthInBytesB) 236 { 237 int byteIndexA = 0; 238 int byteIndexB = 0; 239 while(true) 240 { 241 var utf8ErrorA = Unicode.Utf8ToUcs(out var utf8RuneA, utf8BufferA,ref byteIndexA, utf8LengthInBytesA); 242 var utf8ErrorB = Unicode.Utf8ToUcs(out var utf8RuneB, utf8BufferB, ref byteIndexB, utf8LengthInBytesB); 243 var comparison = new Comparison(utf8RuneA, utf8ErrorA, utf8RuneB, utf8ErrorB); 244 if(comparison.terminates) 245 return comparison.result; 246 } 247 } 248 249 internal static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, Unicode.Rune* runeBufferB, int lengthInRunesB) 250 { 251 int charIndexA = 0; 252 int charIndexB = 0; 253 while (true) 254 { 255 var utf16ErrorA = Unicode.Utf8ToUcs(out var utf16RuneA, utf8BufferA, ref charIndexA, utf8LengthInBytesA); 256 var errorB = Unicode.UcsToUcs(out var runeB, runeBufferB, ref charIndexB, lengthInRunesB); 257 var comparison = new Comparison(utf16RuneA, utf16ErrorA, runeB, errorB); 258 if (comparison.terminates) 259 return comparison.result; 260 } 261 } 262 263 /// <summary>Compares two UTF-16 buffers for relative equality.</summary> 264 /// <param name="utf16BufferA">The first buffer of UTF-16 text.</param> 265 /// <param name="utf16LengthInCharsA">The length in chars of the first UTF-16 buffer.</param> 266 /// <param name="utf16BufferB">The second buffer of UTF-16 text.</param> 267 /// <param name="utf16LengthInCharsB">The length in chars of the second UTF-16 buffer.</param> 268 /// <returns> 269 /// Less than zero if first different code point is less in the first UTF-16 buffer. 270 /// Zero if the strings are identical. 271 /// More than zero if first different code point is less in the second UTF-16 buffer. 272 /// </returns> 273 public static int StrCmp(char* utf16BufferA, int utf16LengthInCharsA, char* utf16BufferB, int utf16LengthInCharsB) 274 { 275 int charIndexA = 0; 276 int charIndexB = 0; 277 while(true) 278 { 279 var utf16ErrorA = Unicode.Utf16ToUcs(out var utf16RuneA, utf16BufferA,ref charIndexA, utf16LengthInCharsA); 280 var utf16ErrorB = Unicode.Utf16ToUcs(out var utf16RuneB, utf16BufferB, ref charIndexB, utf16LengthInCharsB); 281 var comparison = new Comparison(utf16RuneA, utf16ErrorA, utf16RuneB, utf16ErrorB); 282 if(comparison.terminates) 283 return comparison.result; 284 } 285 } 286 287 /// <summary>Returns true if two UTF-8 buffers have the same length and content.</summary> 288 /// <param name="aBytes">The first buffer of UTF-8 text.</param> 289 /// <param name="aLength">The length in bytes of the first buffer.</param> 290 /// <param name="bBytes">The second buffer of UTF-8 text.</param> 291 /// <param name="bLength">The length in bytes of the second buffer.</param> 292 /// <returns>True if the content of both strings is identical.</returns> 293 public static bool EqualsUTF8Bytes(byte* aBytes, int aLength, byte* bBytes, int bLength) 294 { 295 return aLength == bLength && StrCmp(aBytes, aLength, bBytes, bLength) == 0; 296 } 297 298 /// <summary>Compares a UTF-8 buffer and a UTF-16 buffer for relative equality.</summary> 299 /// <param name="utf8Buffer">The buffer of UTF-8 text.</param> 300 /// <param name="utf8LengthInBytes">The length in bytes of the UTF-8 buffer.</param> 301 /// <param name="utf16Buffer">The buffer of UTF-16 text.</param> 302 /// <param name="utf16LengthInChars">The length in chars of the UTF-16 buffer.</param> 303 /// <returns> 304 /// Less than zero if first different code point is less in UTF-8 buffer. 305 /// Zero if the strings are identical. 306 /// More than zero if first different code point is less in UTF-16 buffer. 307 /// </returns> 308 public static int StrCmp(byte* utf8Buffer, int utf8LengthInBytes, char* utf16Buffer, int utf16LengthInChars) 309 { 310 int byteIndex = 0; 311 int charIndex = 0; 312 while(true) 313 { 314 var utf8Error = Unicode.Utf8ToUcs(out var utf8Rune, utf8Buffer,ref byteIndex, utf8LengthInBytes); 315 var utf16Error = Unicode.Utf16ToUcs(out var utf16Rune, utf16Buffer, ref charIndex, utf16LengthInChars); 316 var comparison = new Comparison(utf8Rune, utf8Error, utf16Rune, utf16Error); 317 if(comparison.terminates) 318 return comparison.result; 319 } 320 } 321 322 /// <summary>Compares a UTF-16 buffer and a UTF-8 buffer for relative equality.</summary> 323 /// <param name="utf16Buffer">The buffer of UTF-16 text.</param> 324 /// <param name="utf16LengthInChars">The length in chars of the UTF-16 buffer.</param> 325 /// <param name="utf8Buffer">The buffer of UTF-8 text.</param> 326 /// <param name="utf8LengthInBytes">The length in bytes of the UTF-8 buffer.</param> 327 /// <returns> 328 /// Less than zero if first different code point is less in UTF-16 buffer. 329 /// Zero if the strings are identical. 330 /// More than zero if first different code point is less in UTF-8 buffer. 331 /// </returns> 332 public static int StrCmp(char* utf16Buffer, int utf16LengthInChars, byte* utf8Buffer, int utf8LengthInBytes) 333 { 334 return -StrCmp(utf8Buffer, utf8LengthInBytes, utf16Buffer, utf16LengthInChars); 335 } 336 337 } 338}