A game about forced loneliness, made by TACStudios
1using System; 2using Unity.Collections.LowLevel.Unsafe; 3 4namespace Unity.Collections 5{ 6 /// <summary> 7 /// Kinds of format errors. 8 /// </summary> 9 public enum FormatError 10 { 11 /// <summary> 12 /// No error. 13 /// </summary> 14 None, 15 16 /// <summary> 17 /// The target storage does not have sufficient capacity. 18 /// Note that the format's write failed. It did not truncate. 19 /// </summary> 20 Overflow, 21 22 /// <summary> 23 /// The source format specifier is not itself correctly formatted, or 24 /// a format specifier tokens were found outside of accepted usage. 25 /// Note that the format's write failed. 26 /// </summary> 27 BadFormatSpecifier, 28 } 29 30 /// <summary> 31 /// Kinds of parse errors. 32 /// </summary> 33 public enum ParseError 34 { 35 /// <summary> 36 /// No parse error. 37 /// </summary> 38 None, 39 40 /// <summary> 41 /// The text parsed does not form a number. 42 /// </summary> 43 Syntax, 44 45 /// <summary> 46 /// The number exceeds the range of the target type. 47 /// The number was either truncated, or failed to write entirely. 48 /// </summary> 49 Overflow, 50 51 /// <summary> 52 /// The number exceeds the precision of the target type. 53 /// </summary> 54 Underflow, 55 } 56 57 /// <summary> 58 /// Kinds of copy errors. 59 /// </summary> 60 public enum CopyError 61 { 62 /// <summary> 63 /// No copy error. 64 /// </summary> 65 None, 66 67 /// <summary> 68 /// The target storage does not have sufficient capacity. 69 /// Unless stated in the API comment, assume that the write operation was partially applied. 70 /// </summary> 71 Truncation, 72 } 73 74 /// <summary> 75 /// Kinds of conversion errors. 76 /// </summary> 77 public enum ConversionError 78 { 79 /// <summary> 80 /// No conversion error. 81 /// </summary> 82 None, 83 84 /// <summary> 85 /// The target storage does not have sufficient capacity. 86 /// For copy operations; the value was either truncated into the target storage, or failed to write entirely. 87 /// </summary> 88 Overflow, 89 90 /// <summary> 91 /// The bytes do not form a valid character. 92 /// </summary> 93 Encoding, 94 95 /// <summary> 96 /// The rune is not a valid code point. 97 /// </summary> 98 CodePoint, 99 } 100 101 /// <summary> 102 /// Provides utility methods for UTF-8, UTF-16, UCS-4 (a.k.a. UTF-32), and WTF-8. 103 /// </summary> 104 [GenerateTestsForBurstCompatibility] 105 public unsafe struct Unicode 106 { 107 /// <summary> 108 /// Representation of a Unicode character as a code point. 109 /// </summary> 110 [GenerateTestsForBurstCompatibility] 111 public struct Rune 112 { 113 /// <summary> 114 /// The code point. 115 /// </summary> 116 /// <value>The code point.</value> 117 public int value; 118 119 /// <summary> 120 /// Initializes and returns an instance of Rune. 121 /// </summary> 122 /// <remarks>You are responsible for the code point being valid.</remarks> 123 /// <param name="codepoint">The code point.</param> 124 public Rune(int codepoint) 125 { 126 value = codepoint; 127 } 128 129 /// <summary> 130 /// Returns a rune. 131 /// </summary> 132 /// <remarks>Because a char is 16-bit, it can only represent the first 2^16 code points, not all 1.1 million.</remarks> 133 /// <param name="codepoint">A code point.</param> 134 /// <returns>A rune.</returns> 135 public static implicit operator Rune(char codepoint) => new Rune { value = codepoint }; 136 137 /// <summary> 138 /// Evaluates if one is equal to the other. 139 /// </summary> 140 /// <param name="lhs">The left-hand side</param> 141 /// <param name="rhs">The right-hand side</param> 142 /// <returns>True if the left-hand side's is equal to the right-hand side's.</returns> 143 public static bool operator ==(Rune lhs, Rune rhs) 144 { 145 return lhs.value == rhs.value; 146 } 147 148 /// <summary> 149 /// Returns true if the value stored in this Rune is equal to an object. 150 /// </summary> 151 /// <remarks>Can only be equal if the object is itself a Rune.</remarks> 152 /// <param name="obj">An object to compare with.</param> 153 /// <returns>True if the value stored in this Rune is equal to the object.</returns> 154 [ExcludeFromBurstCompatTesting("Takes managed object")] 155 public override bool Equals(object obj) 156 { 157 if (obj is Rune) 158 { 159 return value == ((Rune)obj).value; 160 } 161 162 return false; 163 } 164 165 /// <summary> 166 /// A hash used for comparisons. 167 /// </summary> 168 /// <returns>A unique hash code.</returns> 169 public override int GetHashCode() 170 { 171 return value; 172 } 173 174 /// <summary> 175 /// Evaluates if one is not equal to the other. 176 /// </summary> 177 /// <param name="lhs">The left-hand side</param> 178 /// <param name="rhs">The right-hand side</param> 179 /// <returns>True if the left-hand side's is not equal to the right-hand side's.</returns> 180 public static bool operator !=(Rune lhs, Rune rhs) 181 { 182 return lhs.value != rhs.value; 183 } 184 185 /// <summary> 186 /// Returns true if a rune is a numerical digit character. 187 /// </summary> 188 /// <param name="r">The rune.</param> 189 /// <returns>True if the rune is a numerical digit character.</returns> 190 public static bool IsDigit(Rune r) 191 { 192 return r.IsDigit(); 193 } 194 195 internal bool IsAscii() 196 { 197 return value < 0x80; 198 } 199 200 internal bool IsLatin1() 201 { 202 return value < 0x100; 203 } 204 205 internal bool IsDigit() 206 { 207 return value >= '0' && value <= '9'; 208 } 209 210 internal bool IsWhiteSpace() 211 { 212 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode 213 214 if (IsLatin1()) 215 { 216 return value == ' ' 217 || (value >= 0x9 && value <= 0xD) // CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B), FORM FEED (U+000C), CARRIAGE RETURN (U+000D) 218 || value == 0xA0 // NO-BREAK SPACE 219 || value == 0x85 // NEXT LINE 220 ; 221 } 222 223 return value == 0x1680 // OGHAM SPACE MARK 224 || (value >= 0x2000 && value <= 0x200A) // EN QUAD(U+2000) 225 // EM QUAD(U+2001) 226 // EN SPACE(U+2002) 227 // EM SPACE(U+2003) 228 // THREE - PER - EM SPACE(U + 2004) 229 // FOUR - PER - EM SPACE(U + 2005) 230 // SIX - PER - EM SPACE(U + 2006) 231 // FIGURE SPACE(U+2007) 232 // PUNCTUATION SPACE(U+2008) 233 // THIN SPACE(U+2009) 234 // HAIR SPACE(U+200A) 235 || value == 0x2028 // LINE SEPARATOR 236 || value == 0x2029 // PARAGRAPH SEPARATOR 237 || value == 0x202F // NARROW NO-BREAK SPACE 238 || value == 0x205F // MEDIUM MATHEMATICAL SPACE 239 || value == 0x3000 // IDEOGRAPHIC SPACE 240 ; 241 } 242 243 internal Rune ToLowerAscii() 244 { 245 return new Rune(value + (((uint)(value - 'A') <= ('Z' - 'A')) ? 0x20 : 0)); 246 } 247 248 internal Rune ToUpperAscii() 249 { 250 return new Rune(value - (((uint)(value - 'a') <= ('z' - 'a')) ? 0x20 : 0)); 251 } 252 253 /// <summary> 254 /// Returns the number of bytes required to encode this rune as UTF-8. 255 /// </summary> 256 /// <returns>The number of bytes required to encode this rune as UTF-8. If the rune's codepoint 257 /// is invalid, returns 4 (the maximum possible encoding length).</returns> 258 public int LengthInUtf8Bytes() 259 { 260 if (value < 0) 261 return 4; // invalid codepoint 262 if (value <= 0x7F) 263 return 1; 264 if (value <= 0x7FF) 265 return 2; 266 if (value <= 0xFFFF) 267 return 3; 268 if (value <= 0x1FFFFF) 269 return 4; 270 // invalid codepoint, max size. 271 return 4; 272 } 273 } 274 275 /// <summary>The maximum value of a valid UNICODE code point</summary> 276 public const int kMaximumValidCodePoint = 0x10FFFF; 277 278 /// <summary> 279 /// Returns true if a code point is valid. 280 /// </summary> 281 /// <param name="codepoint">A code point.</param> 282 /// <returns>True if a code point is valid.</returns> 283 public static bool IsValidCodePoint(int codepoint) 284 { 285 if (codepoint > kMaximumValidCodePoint) // maximum valid code point 286 return false; 287// if (codepoint >= 0xD800 && codepoint <= 0xDFFF) // surrogate pair 288// return false; 289 if (codepoint < 0) // negative? 290 return false; 291 return true; 292 } 293 294 /// <summary> 295 /// Returns true if the byte is not the last byte of a UTF-8 character. 296 /// </summary> 297 /// <param name="b">The byte.</param> 298 /// <returns>True if the byte is not the last byte of a UTF-8 character.</returns> 299 public static bool NotTrailer(byte b) 300 { 301 return (b & 0xC0) != 0x80; 302 } 303 304 /// <summary> 305 /// The Unicode character �. 306 /// </summary> 307 /// <remarks>This character is used to stand-in for characters that can't be rendered.</remarks> 308 /// <value>The Unicode character �.</value> 309 public static Rune ReplacementCharacter => new Rune { value = 0xFFFD }; 310 311 /// <summary> 312 /// The null rune value. 313 /// </summary> 314 /// <remarks>In this package, the "bad rune" is used as a null character. It represents no valid code point.</remarks> 315 /// <value>The null rune value.</value> 316 public static Rune BadRune => new Rune { value = 0 }; 317 318 /// <summary> 319 /// Reads a UTF-8 encoded character from a buffer. 320 /// </summary> 321 /// <param name="rune">Outputs the character read. If the read fails, outputs <see cref="ReplacementCharacter"/>.</param> 322 /// <param name="buffer">The buffer of bytes to read.</param> 323 /// <param name="index">Reference to a byte index into the buffer. If the read succeeds, index is incremented by the 324 /// size in bytes of the character read. If the read fails, index is incremented by 1.</param> 325 /// <param name="capacity">The size in bytes of the buffer. Used to check that the read is in bounds.</param> 326 /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/> or <see cref="ConversionError.Encoding"/>.</returns> 327 public static ConversionError Utf8ToUcs(out Rune rune, byte* buffer, ref int index, int capacity) 328 { 329 int code = 0; 330 rune = ReplacementCharacter; 331 if (index + 1 > capacity) 332 { 333 return ConversionError.Overflow; 334 } 335 336 if ((buffer[index] & 0b10000000) == 0b00000000) // if high bit is 0, 1 byte 337 { 338 rune.value = buffer[index + 0]; 339 index += 1; 340 return ConversionError.None; 341 } 342 343 if ((buffer[index] & 0b11100000) == 0b11000000) // if high 3 bits are 110, 2 bytes 344 { 345 if (index + 2 > capacity) 346 { 347 index += 1; 348 return ConversionError.Overflow; 349 } 350 code = (buffer[index + 0] & 0b00011111); 351 code = (code << 6) | (buffer[index + 1] & 0b00111111); 352 if (code < (1 << 7) || NotTrailer(buffer[index + 1])) 353 { 354 index += 1; 355 return ConversionError.Encoding; 356 } 357 rune.value = code; 358 index += 2; 359 return ConversionError.None; 360 } 361 362 if ((buffer[index] & 0b11110000) == 0b11100000) // if high 4 bits are 1110, 3 bytes 363 { 364 if (index + 3 > capacity) 365 { 366 index += 1; 367 return ConversionError.Overflow; 368 } 369 code = (buffer[index + 0] & 0b00001111); 370 code = (code << 6) | (buffer[index + 1] & 0b00111111); 371 code = (code << 6) | (buffer[index + 2] & 0b00111111); 372 if (code < (1 << 11) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2])) 373 { 374 index += 1; 375 return ConversionError.Encoding; 376 } 377 rune.value = code; 378 index += 3; 379 return ConversionError.None; 380 } 381 382 if ((buffer[index] & 0b11111000) == 0b11110000) // if high 5 bits are 11110, 4 bytes 383 { 384 if (index + 4 > capacity) 385 { 386 index += 1; 387 return ConversionError.Overflow; 388 } 389 code = (buffer[index + 0] & 0b00000111); 390 code = (code << 6) | (buffer[index + 1] & 0b00111111); 391 code = (code << 6) | (buffer[index + 2] & 0b00111111); 392 code = (code << 6) | (buffer[index + 3] & 0b00111111); 393 if (code < (1 << 16) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]) || NotTrailer(buffer[index + 3])) 394 { 395 index += 1; 396 return ConversionError.Encoding; 397 } 398 rune.value = code; 399 index += 4; 400 return ConversionError.None; 401 } 402 403 index += 1; 404 return ConversionError.Encoding; 405 } 406 407 static int FindUtf8CharStartInReverse(byte* ptr, ref int index) 408 { 409 do 410 { 411 if (index <= 0) 412 { 413 return 0; 414 } 415 416 --index; 417 418 } while ((ptr[index] & 0xC0) == 0x80); 419 420 return index; 421 } 422 423 internal static ConversionError Utf8ToUcsReverse(out Rune rune, byte* buffer, ref int index, int capacity) 424 { 425 var prev = index; 426 --index; 427 428 index = FindUtf8CharStartInReverse(buffer, ref index); 429 430 if (index == prev) 431 { 432 rune = ReplacementCharacter; 433 return ConversionError.Overflow; 434 } 435 436 var ignore = index; 437 return Utf8ToUcs(out rune, buffer, ref ignore, capacity); 438 } 439 440 /// <summary> 441 /// Returns true if a char is a Unicode leading surrogate. 442 /// </summary> 443 /// <param name="c">The char.</param> 444 /// <returns>True if the char is a Unicode leading surrogate.</returns> 445 static bool IsLeadingSurrogate(char c) 446 { 447 return c >= 0xD800 && c <= 0xDBFF; 448 } 449 450 /// <summary> 451 /// Returns true if a char is a Unicode trailing surrogate. 452 /// </summary> 453 /// <param name="c">The char.</param> 454 /// <returns>True if the char is a Unicode trailing surrogate.</returns> 455 static bool IsTrailingSurrogate(char c) 456 { 457 return c >= 0xDC00 && c <= 0xDFFF; 458 } 459 460 /// <summary> 461 /// Reads a UTF-16 encoded character from a buffer. 462 /// </summary> 463 /// <param name="rune">Outputs the character read. If the read fails, rune is not set.</param> 464 /// <param name="buffer">The buffer of chars to read.</param> 465 /// <param name="index">Reference to a char index into the buffer. If the read succeeds, index is incremented by the 466 /// size in chars of the character read. If the read fails, index is not incremented.</param> 467 /// <param name="capacity">The size in chars of the buffer. Used to check that the read is in bounds.</param> 468 /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns> 469 public static ConversionError Utf16ToUcs(out Rune rune, char* buffer, ref int index, int capacity) 470 { 471 int code = 0; 472 rune = ReplacementCharacter; 473 if (index + 1 > capacity) 474 return ConversionError.Overflow; 475 if (!IsLeadingSurrogate(buffer[index]) || (index + 2 > capacity)) 476 { 477 rune.value = buffer[index]; 478 index += 1; 479 return ConversionError.None; 480 } 481 code = (buffer[index + 0] & 0x03FF); 482 char next = buffer[index + 1]; 483 if (!IsTrailingSurrogate(next)) 484 { 485 rune.value = buffer[index]; 486 index += 1; 487 return ConversionError.None; 488 } 489 code = (code << 10) | (buffer[index + 1] & 0x03FF); 490 code += 0x10000; 491 rune.value = code; 492 index += 2; 493 return ConversionError.None; 494 } 495 496 internal static ConversionError UcsToUcs(out Rune rune, Rune* buffer, ref int index, int capacity) 497 { 498 rune = ReplacementCharacter; 499 if (index + 1 > capacity) 500 return ConversionError.Overflow; 501 rune = buffer[index]; 502 index += 1; 503 return ConversionError.None; 504 } 505 506 /// <summary> 507 /// Writes a rune to a buffer as a UTF-8 encoded character. 508 /// </summary> 509 /// <param name="rune">The rune to encode.</param> 510 /// <param name="buffer">The buffer to write to.</param> 511 /// <param name="index">Reference to a byte index into the buffer. If the write succeeds, index is incremented by the 512 /// size in bytes of the character written. If the write fails, index is not incremented.</param> 513 /// <param name="capacity">The size in bytes of the buffer. Used to check that the write is in bounds.</param> 514 /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns> 515 public static ConversionError UcsToUtf8(byte* buffer, ref int index, int capacity, Rune rune) 516 { 517 if (!IsValidCodePoint(rune.value)) 518 { 519 return ConversionError.CodePoint; 520 } 521 522 if (index + 1 > capacity) 523 { 524 return ConversionError.Overflow; 525 } 526 527 if (rune.value <= 0x7F) 528 { 529 buffer[index++] = (byte)rune.value; 530 return ConversionError.None; 531 } 532 533 if (rune.value <= 0x7FF) 534 { 535 if (index + 2 > capacity) 536 { 537 return ConversionError.Overflow; 538 } 539 540 buffer[index++] = (byte)(0xC0 | (rune.value >> 6)); 541 buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F)); 542 return ConversionError.None; 543 } 544 545 if (rune.value <= 0xFFFF) 546 { 547 if (index + 3 > capacity) 548 { 549 return ConversionError.Overflow; 550 } 551 552 buffer[index++] = (byte)(0xE0 | (rune.value >> 12)); 553 buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F)); 554 buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F)); 555 return ConversionError.None; 556 } 557 558 if (rune.value <= 0x1FFFFF) 559 { 560 if (index + 4 > capacity) 561 { 562 return ConversionError.Overflow; 563 } 564 565 buffer[index++] = (byte)(0xF0 | (rune.value >> 18)); 566 buffer[index++] = (byte)(0x80 | ((rune.value >> 12) & 0x3F)); 567 buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F)); 568 buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F)); 569 return ConversionError.None; 570 } 571 572 return ConversionError.Encoding; 573 } 574 575 /// <summary> 576 /// Writes a rune to a buffer as a UTF-16 encoded character. 577 /// </summary> 578 /// <param name="rune">The rune to encode.</param> 579 /// <param name="buffer">The buffer of chars to write to.</param> 580 /// <param name="index">Reference to a char index into the buffer. If the write succeeds, index is incremented by the 581 /// size in chars of the character written. If the write fails, index is not incremented.</param> 582 /// <param name="capacity">The size in chars of the buffer. Used to check that the write is in bounds.</param> 583 /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns> 584 public static ConversionError UcsToUtf16(char* buffer, ref int index, int capacity, Rune rune) 585 { 586 if (!IsValidCodePoint(rune.value)) 587 { 588 return ConversionError.CodePoint; 589 } 590 591 if (index + 1 > capacity) 592 { 593 return ConversionError.Overflow; 594 } 595 596 if (rune.value >= 0x10000) 597 { 598 if (index + 2 > capacity) 599 { 600 return ConversionError.Overflow; 601 } 602 603 int code = rune.value - 0x10000; 604 if (code >= (1 << 20)) 605 { 606 return ConversionError.Encoding; 607 } 608 609 buffer[index++] = (char)(0xD800 | (code >> 10)); 610 buffer[index++] = (char)(0xDC00 | (code & 0x3FF)); 611 return ConversionError.None; 612 } 613 614 buffer[index++] = (char)rune.value; 615 return ConversionError.None; 616 } 617 618 /// <summary> 619 /// Copies UTF-16 characters from one buffer to another buffer as UTF-8. 620 /// </summary> 621 /// <remarks>Assumes the source data is valid UTF-16.</remarks> 622 /// <param name="utf16Buffer">The source buffer.</param> 623 /// <param name="utf16Length">The number of chars to read from the source.</param> 624 /// <param name="utf8Buffer">The destination buffer.</param> 625 /// <param name="utf8Length">Outputs the number of bytes written to the destination.</param> 626 /// <param name="utf8Capacity">The size in bytes of the destination buffer.</param> 627 /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns> 628 public static ConversionError Utf16ToUtf8(char* utf16Buffer, int utf16Length, byte* utf8Buffer, out int utf8Length, int utf8Capacity) 629 { 630 utf8Length = 0; 631 for (var utf16Offset = 0; utf16Offset < utf16Length;) 632 { 633 Utf16ToUcs(out var ucs, utf16Buffer, ref utf16Offset, utf16Length); 634 if (UcsToUtf8(utf8Buffer, ref utf8Length, utf8Capacity, ucs) == ConversionError.Overflow) 635 return ConversionError.Overflow; 636 } 637 return ConversionError.None; 638 } 639 640 /// <summary> 641 /// Copies UTF-8 characters from one buffer to another. 642 /// </summary> 643 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 644 /// <param name="srcBuffer">The source buffer.</param> 645 /// <param name="srcLength">The number of bytes to read from the source.</param> 646 /// <param name="destBuffer">The destination buffer.</param> 647 /// <param name="destLength">Outputs the number of bytes written to the destination.</param> 648 /// <param name="destCapacity">The size in bytes of the destination buffer.</param> 649 /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns> 650 public static ConversionError Utf8ToUtf8(byte* srcBuffer, int srcLength, byte* destBuffer, out int destLength, int destCapacity) 651 { 652 if (destCapacity >= srcLength) 653 { 654 UnsafeUtility.MemCpy(destBuffer, srcBuffer, srcLength); 655 destLength = srcLength; 656 return ConversionError.None; 657 } 658 // TODO even in this case, it's possible to MemCpy all but the last 3 bytes that fit, and then by looking at only 659 // TODO the high bits of the last 3 bytes that fit, decide how many of the 3 to append. but that requires a 660 // TODO little UNICODE presence of mind that nobody has today. 661 destLength = 0; 662 for (var srcOffset = 0; srcOffset < srcLength;) 663 { 664 Utf8ToUcs(out var ucs, srcBuffer, ref srcOffset, srcLength); 665 if (UcsToUtf8(destBuffer, ref destLength, destCapacity, ucs) == ConversionError.Overflow) 666 return ConversionError.Overflow; 667 } 668 return ConversionError.None; 669 } 670 671 /// <summary> 672 /// Copies UTF-8 characters from one buffer to another as UTF-16. 673 /// </summary> 674 /// <remarks>Assumes the source data is valid UTF-8.</remarks> 675 /// <param name="utf8Buffer">The source buffer.</param> 676 /// <param name="utf8Length">The number of bytes to read from the source.</param> 677 /// <param name="utf16Buffer">The destination buffer.</param> 678 /// <param name="utf16Length">Outputs the number of chars written to the destination.</param> 679 /// <param name="utf16Capacity">The size in chars of the destination buffer.</param> 680 /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, <see cref="ConversionError.Overflow"/>.</returns> 681 public static ConversionError Utf8ToUtf16(byte* utf8Buffer, int utf8Length, char* utf16Buffer, out int utf16Length, int utf16Capacity) 682 { 683 utf16Length = 0; 684 for (var utf8Offset 685 = 0; utf8Offset < utf8Length;) 686 { 687 Utf8ToUcs(out var ucs, utf8Buffer, ref utf8Offset, utf8Length); 688 if (UcsToUtf16(utf16Buffer, ref utf16Length, utf16Capacity, ucs) == ConversionError.Overflow) 689 return ConversionError.Overflow; 690 } 691 return ConversionError.None; 692 } 693 694 static int CountRunes(byte* utf8Buffer, int utf8Length, int maxRunes = int.MaxValue) 695 { 696 var numRunes = 0; 697 698 for (var i = 0; numRunes < maxRunes && i < utf8Length; ++i) 699 { 700 if ((utf8Buffer[i] & 0xC0) != 0x80) 701 numRunes++; 702 } 703 704 return numRunes; 705 } 706 } 707}