A game about forced loneliness, made by TACStudios
1using System;
2using Unity.Collections.LowLevel.Unsafe;
3
4namespace Unity.Collections
5{
6 /// <summary>
7 /// Kinds of format errors.
8 /// </summary>
9 public enum FormatError
10 {
11 /// <summary>
12 /// No error.
13 /// </summary>
14 None,
15
16 /// <summary>
17 /// The target storage does not have sufficient capacity.
18 /// Note that the format's write failed. It did not truncate.
19 /// </summary>
20 Overflow,
21
22 /// <summary>
23 /// The source format specifier is not itself correctly formatted, or
24 /// a format specifier tokens were found outside of accepted usage.
25 /// Note that the format's write failed.
26 /// </summary>
27 BadFormatSpecifier,
28 }
29
30 /// <summary>
31 /// Kinds of parse errors.
32 /// </summary>
33 public enum ParseError
34 {
35 /// <summary>
36 /// No parse error.
37 /// </summary>
38 None,
39
40 /// <summary>
41 /// The text parsed does not form a number.
42 /// </summary>
43 Syntax,
44
45 /// <summary>
46 /// The number exceeds the range of the target type.
47 /// The number was either truncated, or failed to write entirely.
48 /// </summary>
49 Overflow,
50
51 /// <summary>
52 /// The number exceeds the precision of the target type.
53 /// </summary>
54 Underflow,
55 }
56
57 /// <summary>
58 /// Kinds of copy errors.
59 /// </summary>
60 public enum CopyError
61 {
62 /// <summary>
63 /// No copy error.
64 /// </summary>
65 None,
66
67 /// <summary>
68 /// The target storage does not have sufficient capacity.
69 /// Unless stated in the API comment, assume that the write operation was partially applied.
70 /// </summary>
71 Truncation,
72 }
73
74 /// <summary>
75 /// Kinds of conversion errors.
76 /// </summary>
77 public enum ConversionError
78 {
79 /// <summary>
80 /// No conversion error.
81 /// </summary>
82 None,
83
84 /// <summary>
85 /// The target storage does not have sufficient capacity.
86 /// For copy operations; the value was either truncated into the target storage, or failed to write entirely.
87 /// </summary>
88 Overflow,
89
90 /// <summary>
91 /// The bytes do not form a valid character.
92 /// </summary>
93 Encoding,
94
95 /// <summary>
96 /// The rune is not a valid code point.
97 /// </summary>
98 CodePoint,
99 }
100
101 /// <summary>
102 /// Provides utility methods for UTF-8, UTF-16, UCS-4 (a.k.a. UTF-32), and WTF-8.
103 /// </summary>
104 [GenerateTestsForBurstCompatibility]
105 public unsafe struct Unicode
106 {
107 /// <summary>
108 /// Representation of a Unicode character as a code point.
109 /// </summary>
110 [GenerateTestsForBurstCompatibility]
111 public struct Rune
112 {
113 /// <summary>
114 /// The code point.
115 /// </summary>
116 /// <value>The code point.</value>
117 public int value;
118
119 /// <summary>
120 /// Initializes and returns an instance of Rune.
121 /// </summary>
122 /// <remarks>You are responsible for the code point being valid.</remarks>
123 /// <param name="codepoint">The code point.</param>
124 public Rune(int codepoint)
125 {
126 value = codepoint;
127 }
128
129 /// <summary>
130 /// Returns a rune.
131 /// </summary>
132 /// <remarks>Because a char is 16-bit, it can only represent the first 2^16 code points, not all 1.1 million.</remarks>
133 /// <param name="codepoint">A code point.</param>
134 /// <returns>A rune.</returns>
135 public static implicit operator Rune(char codepoint) => new Rune { value = codepoint };
136
137 /// <summary>
138 /// Evaluates if one is equal to the other.
139 /// </summary>
140 /// <param name="lhs">The left-hand side</param>
141 /// <param name="rhs">The right-hand side</param>
142 /// <returns>True if the left-hand side's is equal to the right-hand side's.</returns>
143 public static bool operator ==(Rune lhs, Rune rhs)
144 {
145 return lhs.value == rhs.value;
146 }
147
148 /// <summary>
149 /// Returns true if the value stored in this Rune is equal to an object.
150 /// </summary>
151 /// <remarks>Can only be equal if the object is itself a Rune.</remarks>
152 /// <param name="obj">An object to compare with.</param>
153 /// <returns>True if the value stored in this Rune is equal to the object.</returns>
154 [ExcludeFromBurstCompatTesting("Takes managed object")]
155 public override bool Equals(object obj)
156 {
157 if (obj is Rune)
158 {
159 return value == ((Rune)obj).value;
160 }
161
162 return false;
163 }
164
165 /// <summary>
166 /// A hash used for comparisons.
167 /// </summary>
168 /// <returns>A unique hash code.</returns>
169 public override int GetHashCode()
170 {
171 return value;
172 }
173
174 /// <summary>
175 /// Evaluates if one is not equal to the other.
176 /// </summary>
177 /// <param name="lhs">The left-hand side</param>
178 /// <param name="rhs">The right-hand side</param>
179 /// <returns>True if the left-hand side's is not equal to the right-hand side's.</returns>
180 public static bool operator !=(Rune lhs, Rune rhs)
181 {
182 return lhs.value != rhs.value;
183 }
184
185 /// <summary>
186 /// Returns true if a rune is a numerical digit character.
187 /// </summary>
188 /// <param name="r">The rune.</param>
189 /// <returns>True if the rune is a numerical digit character.</returns>
190 public static bool IsDigit(Rune r)
191 {
192 return r.IsDigit();
193 }
194
195 internal bool IsAscii()
196 {
197 return value < 0x80;
198 }
199
200 internal bool IsLatin1()
201 {
202 return value < 0x100;
203 }
204
205 internal bool IsDigit()
206 {
207 return value >= '0' && value <= '9';
208 }
209
210 internal bool IsWhiteSpace()
211 {
212 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
213
214 if (IsLatin1())
215 {
216 return value == ' '
217 || (value >= 0x9 && value <= 0xD) // CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B), FORM FEED (U+000C), CARRIAGE RETURN (U+000D)
218 || value == 0xA0 // NO-BREAK SPACE
219 || value == 0x85 // NEXT LINE
220 ;
221 }
222
223 return value == 0x1680 // OGHAM SPACE MARK
224 || (value >= 0x2000 && value <= 0x200A) // EN QUAD(U+2000)
225 // EM QUAD(U+2001)
226 // EN SPACE(U+2002)
227 // EM SPACE(U+2003)
228 // THREE - PER - EM SPACE(U + 2004)
229 // FOUR - PER - EM SPACE(U + 2005)
230 // SIX - PER - EM SPACE(U + 2006)
231 // FIGURE SPACE(U+2007)
232 // PUNCTUATION SPACE(U+2008)
233 // THIN SPACE(U+2009)
234 // HAIR SPACE(U+200A)
235 || value == 0x2028 // LINE SEPARATOR
236 || value == 0x2029 // PARAGRAPH SEPARATOR
237 || value == 0x202F // NARROW NO-BREAK SPACE
238 || value == 0x205F // MEDIUM MATHEMATICAL SPACE
239 || value == 0x3000 // IDEOGRAPHIC SPACE
240 ;
241 }
242
243 internal Rune ToLowerAscii()
244 {
245 return new Rune(value + (((uint)(value - 'A') <= ('Z' - 'A')) ? 0x20 : 0));
246 }
247
248 internal Rune ToUpperAscii()
249 {
250 return new Rune(value - (((uint)(value - 'a') <= ('z' - 'a')) ? 0x20 : 0));
251 }
252
253 /// <summary>
254 /// Returns the number of bytes required to encode this rune as UTF-8.
255 /// </summary>
256 /// <returns>The number of bytes required to encode this rune as UTF-8. If the rune's codepoint
257 /// is invalid, returns 4 (the maximum possible encoding length).</returns>
258 public int LengthInUtf8Bytes()
259 {
260 if (value < 0)
261 return 4; // invalid codepoint
262 if (value <= 0x7F)
263 return 1;
264 if (value <= 0x7FF)
265 return 2;
266 if (value <= 0xFFFF)
267 return 3;
268 if (value <= 0x1FFFFF)
269 return 4;
270 // invalid codepoint, max size.
271 return 4;
272 }
273 }
274
275 /// <summary>The maximum value of a valid UNICODE code point</summary>
276 public const int kMaximumValidCodePoint = 0x10FFFF;
277
278 /// <summary>
279 /// Returns true if a code point is valid.
280 /// </summary>
281 /// <param name="codepoint">A code point.</param>
282 /// <returns>True if a code point is valid.</returns>
283 public static bool IsValidCodePoint(int codepoint)
284 {
285 if (codepoint > kMaximumValidCodePoint) // maximum valid code point
286 return false;
287// if (codepoint >= 0xD800 && codepoint <= 0xDFFF) // surrogate pair
288// return false;
289 if (codepoint < 0) // negative?
290 return false;
291 return true;
292 }
293
294 /// <summary>
295 /// Returns true if the byte is not the last byte of a UTF-8 character.
296 /// </summary>
297 /// <param name="b">The byte.</param>
298 /// <returns>True if the byte is not the last byte of a UTF-8 character.</returns>
299 public static bool NotTrailer(byte b)
300 {
301 return (b & 0xC0) != 0x80;
302 }
303
304 /// <summary>
305 /// The Unicode character �.
306 /// </summary>
307 /// <remarks>This character is used to stand-in for characters that can't be rendered.</remarks>
308 /// <value>The Unicode character �.</value>
309 public static Rune ReplacementCharacter => new Rune { value = 0xFFFD };
310
311 /// <summary>
312 /// The null rune value.
313 /// </summary>
314 /// <remarks>In this package, the "bad rune" is used as a null character. It represents no valid code point.</remarks>
315 /// <value>The null rune value.</value>
316 public static Rune BadRune => new Rune { value = 0 };
317
318 /// <summary>
319 /// Reads a UTF-8 encoded character from a buffer.
320 /// </summary>
321 /// <param name="rune">Outputs the character read. If the read fails, outputs <see cref="ReplacementCharacter"/>.</param>
322 /// <param name="buffer">The buffer of bytes to read.</param>
323 /// <param name="index">Reference to a byte index into the buffer. If the read succeeds, index is incremented by the
324 /// size in bytes of the character read. If the read fails, index is incremented by 1.</param>
325 /// <param name="capacity">The size in bytes of the buffer. Used to check that the read is in bounds.</param>
326 /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/> or <see cref="ConversionError.Encoding"/>.</returns>
327 public static ConversionError Utf8ToUcs(out Rune rune, byte* buffer, ref int index, int capacity)
328 {
329 int code = 0;
330 rune = ReplacementCharacter;
331 if (index + 1 > capacity)
332 {
333 return ConversionError.Overflow;
334 }
335
336 if ((buffer[index] & 0b10000000) == 0b00000000) // if high bit is 0, 1 byte
337 {
338 rune.value = buffer[index + 0];
339 index += 1;
340 return ConversionError.None;
341 }
342
343 if ((buffer[index] & 0b11100000) == 0b11000000) // if high 3 bits are 110, 2 bytes
344 {
345 if (index + 2 > capacity)
346 {
347 index += 1;
348 return ConversionError.Overflow;
349 }
350 code = (buffer[index + 0] & 0b00011111);
351 code = (code << 6) | (buffer[index + 1] & 0b00111111);
352 if (code < (1 << 7) || NotTrailer(buffer[index + 1]))
353 {
354 index += 1;
355 return ConversionError.Encoding;
356 }
357 rune.value = code;
358 index += 2;
359 return ConversionError.None;
360 }
361
362 if ((buffer[index] & 0b11110000) == 0b11100000) // if high 4 bits are 1110, 3 bytes
363 {
364 if (index + 3 > capacity)
365 {
366 index += 1;
367 return ConversionError.Overflow;
368 }
369 code = (buffer[index + 0] & 0b00001111);
370 code = (code << 6) | (buffer[index + 1] & 0b00111111);
371 code = (code << 6) | (buffer[index + 2] & 0b00111111);
372 if (code < (1 << 11) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]))
373 {
374 index += 1;
375 return ConversionError.Encoding;
376 }
377 rune.value = code;
378 index += 3;
379 return ConversionError.None;
380 }
381
382 if ((buffer[index] & 0b11111000) == 0b11110000) // if high 5 bits are 11110, 4 bytes
383 {
384 if (index + 4 > capacity)
385 {
386 index += 1;
387 return ConversionError.Overflow;
388 }
389 code = (buffer[index + 0] & 0b00000111);
390 code = (code << 6) | (buffer[index + 1] & 0b00111111);
391 code = (code << 6) | (buffer[index + 2] & 0b00111111);
392 code = (code << 6) | (buffer[index + 3] & 0b00111111);
393 if (code < (1 << 16) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]) || NotTrailer(buffer[index + 3]))
394 {
395 index += 1;
396 return ConversionError.Encoding;
397 }
398 rune.value = code;
399 index += 4;
400 return ConversionError.None;
401 }
402
403 index += 1;
404 return ConversionError.Encoding;
405 }
406
407 static int FindUtf8CharStartInReverse(byte* ptr, ref int index)
408 {
409 do
410 {
411 if (index <= 0)
412 {
413 return 0;
414 }
415
416 --index;
417
418 } while ((ptr[index] & 0xC0) == 0x80);
419
420 return index;
421 }
422
423 internal static ConversionError Utf8ToUcsReverse(out Rune rune, byte* buffer, ref int index, int capacity)
424 {
425 var prev = index;
426 --index;
427
428 index = FindUtf8CharStartInReverse(buffer, ref index);
429
430 if (index == prev)
431 {
432 rune = ReplacementCharacter;
433 return ConversionError.Overflow;
434 }
435
436 var ignore = index;
437 return Utf8ToUcs(out rune, buffer, ref ignore, capacity);
438 }
439
440 /// <summary>
441 /// Returns true if a char is a Unicode leading surrogate.
442 /// </summary>
443 /// <param name="c">The char.</param>
444 /// <returns>True if the char is a Unicode leading surrogate.</returns>
445 static bool IsLeadingSurrogate(char c)
446 {
447 return c >= 0xD800 && c <= 0xDBFF;
448 }
449
450 /// <summary>
451 /// Returns true if a char is a Unicode trailing surrogate.
452 /// </summary>
453 /// <param name="c">The char.</param>
454 /// <returns>True if the char is a Unicode trailing surrogate.</returns>
455 static bool IsTrailingSurrogate(char c)
456 {
457 return c >= 0xDC00 && c <= 0xDFFF;
458 }
459
460 /// <summary>
461 /// Reads a UTF-16 encoded character from a buffer.
462 /// </summary>
463 /// <param name="rune">Outputs the character read. If the read fails, rune is not set.</param>
464 /// <param name="buffer">The buffer of chars to read.</param>
465 /// <param name="index">Reference to a char index into the buffer. If the read succeeds, index is incremented by the
466 /// size in chars of the character read. If the read fails, index is not incremented.</param>
467 /// <param name="capacity">The size in chars of the buffer. Used to check that the read is in bounds.</param>
468 /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
469 public static ConversionError Utf16ToUcs(out Rune rune, char* buffer, ref int index, int capacity)
470 {
471 int code = 0;
472 rune = ReplacementCharacter;
473 if (index + 1 > capacity)
474 return ConversionError.Overflow;
475 if (!IsLeadingSurrogate(buffer[index]) || (index + 2 > capacity))
476 {
477 rune.value = buffer[index];
478 index += 1;
479 return ConversionError.None;
480 }
481 code = (buffer[index + 0] & 0x03FF);
482 char next = buffer[index + 1];
483 if (!IsTrailingSurrogate(next))
484 {
485 rune.value = buffer[index];
486 index += 1;
487 return ConversionError.None;
488 }
489 code = (code << 10) | (buffer[index + 1] & 0x03FF);
490 code += 0x10000;
491 rune.value = code;
492 index += 2;
493 return ConversionError.None;
494 }
495
496 internal static ConversionError UcsToUcs(out Rune rune, Rune* buffer, ref int index, int capacity)
497 {
498 rune = ReplacementCharacter;
499 if (index + 1 > capacity)
500 return ConversionError.Overflow;
501 rune = buffer[index];
502 index += 1;
503 return ConversionError.None;
504 }
505
506 /// <summary>
507 /// Writes a rune to a buffer as a UTF-8 encoded character.
508 /// </summary>
509 /// <param name="rune">The rune to encode.</param>
510 /// <param name="buffer">The buffer to write to.</param>
511 /// <param name="index">Reference to a byte index into the buffer. If the write succeeds, index is incremented by the
512 /// size in bytes of the character written. If the write fails, index is not incremented.</param>
513 /// <param name="capacity">The size in bytes of the buffer. Used to check that the write is in bounds.</param>
514 /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns>
515 public static ConversionError UcsToUtf8(byte* buffer, ref int index, int capacity, Rune rune)
516 {
517 if (!IsValidCodePoint(rune.value))
518 {
519 return ConversionError.CodePoint;
520 }
521
522 if (index + 1 > capacity)
523 {
524 return ConversionError.Overflow;
525 }
526
527 if (rune.value <= 0x7F)
528 {
529 buffer[index++] = (byte)rune.value;
530 return ConversionError.None;
531 }
532
533 if (rune.value <= 0x7FF)
534 {
535 if (index + 2 > capacity)
536 {
537 return ConversionError.Overflow;
538 }
539
540 buffer[index++] = (byte)(0xC0 | (rune.value >> 6));
541 buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
542 return ConversionError.None;
543 }
544
545 if (rune.value <= 0xFFFF)
546 {
547 if (index + 3 > capacity)
548 {
549 return ConversionError.Overflow;
550 }
551
552 buffer[index++] = (byte)(0xE0 | (rune.value >> 12));
553 buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F));
554 buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
555 return ConversionError.None;
556 }
557
558 if (rune.value <= 0x1FFFFF)
559 {
560 if (index + 4 > capacity)
561 {
562 return ConversionError.Overflow;
563 }
564
565 buffer[index++] = (byte)(0xF0 | (rune.value >> 18));
566 buffer[index++] = (byte)(0x80 | ((rune.value >> 12) & 0x3F));
567 buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F));
568 buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
569 return ConversionError.None;
570 }
571
572 return ConversionError.Encoding;
573 }
574
575 /// <summary>
576 /// Writes a rune to a buffer as a UTF-16 encoded character.
577 /// </summary>
578 /// <param name="rune">The rune to encode.</param>
579 /// <param name="buffer">The buffer of chars to write to.</param>
580 /// <param name="index">Reference to a char index into the buffer. If the write succeeds, index is incremented by the
581 /// size in chars of the character written. If the write fails, index is not incremented.</param>
582 /// <param name="capacity">The size in chars of the buffer. Used to check that the write is in bounds.</param>
583 /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns>
584 public static ConversionError UcsToUtf16(char* buffer, ref int index, int capacity, Rune rune)
585 {
586 if (!IsValidCodePoint(rune.value))
587 {
588 return ConversionError.CodePoint;
589 }
590
591 if (index + 1 > capacity)
592 {
593 return ConversionError.Overflow;
594 }
595
596 if (rune.value >= 0x10000)
597 {
598 if (index + 2 > capacity)
599 {
600 return ConversionError.Overflow;
601 }
602
603 int code = rune.value - 0x10000;
604 if (code >= (1 << 20))
605 {
606 return ConversionError.Encoding;
607 }
608
609 buffer[index++] = (char)(0xD800 | (code >> 10));
610 buffer[index++] = (char)(0xDC00 | (code & 0x3FF));
611 return ConversionError.None;
612 }
613
614 buffer[index++] = (char)rune.value;
615 return ConversionError.None;
616 }
617
618 /// <summary>
619 /// Copies UTF-16 characters from one buffer to another buffer as UTF-8.
620 /// </summary>
621 /// <remarks>Assumes the source data is valid UTF-16.</remarks>
622 /// <param name="utf16Buffer">The source buffer.</param>
623 /// <param name="utf16Length">The number of chars to read from the source.</param>
624 /// <param name="utf8Buffer">The destination buffer.</param>
625 /// <param name="utf8Length">Outputs the number of bytes written to the destination.</param>
626 /// <param name="utf8Capacity">The size in bytes of the destination buffer.</param>
627 /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
628 public static ConversionError Utf16ToUtf8(char* utf16Buffer, int utf16Length, byte* utf8Buffer, out int utf8Length, int utf8Capacity)
629 {
630 utf8Length = 0;
631 for (var utf16Offset = 0; utf16Offset < utf16Length;)
632 {
633 Utf16ToUcs(out var ucs, utf16Buffer, ref utf16Offset, utf16Length);
634 if (UcsToUtf8(utf8Buffer, ref utf8Length, utf8Capacity, ucs) == ConversionError.Overflow)
635 return ConversionError.Overflow;
636 }
637 return ConversionError.None;
638 }
639
640 /// <summary>
641 /// Copies UTF-8 characters from one buffer to another.
642 /// </summary>
643 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
644 /// <param name="srcBuffer">The source buffer.</param>
645 /// <param name="srcLength">The number of bytes to read from the source.</param>
646 /// <param name="destBuffer">The destination buffer.</param>
647 /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
648 /// <param name="destCapacity">The size in bytes of the destination buffer.</param>
649 /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
650 public static ConversionError Utf8ToUtf8(byte* srcBuffer, int srcLength, byte* destBuffer, out int destLength, int destCapacity)
651 {
652 if (destCapacity >= srcLength)
653 {
654 UnsafeUtility.MemCpy(destBuffer, srcBuffer, srcLength);
655 destLength = srcLength;
656 return ConversionError.None;
657 }
658 // TODO even in this case, it's possible to MemCpy all but the last 3 bytes that fit, and then by looking at only
659 // TODO the high bits of the last 3 bytes that fit, decide how many of the 3 to append. but that requires a
660 // TODO little UNICODE presence of mind that nobody has today.
661 destLength = 0;
662 for (var srcOffset = 0; srcOffset < srcLength;)
663 {
664 Utf8ToUcs(out var ucs, srcBuffer, ref srcOffset, srcLength);
665 if (UcsToUtf8(destBuffer, ref destLength, destCapacity, ucs) == ConversionError.Overflow)
666 return ConversionError.Overflow;
667 }
668 return ConversionError.None;
669 }
670
671 /// <summary>
672 /// Copies UTF-8 characters from one buffer to another as UTF-16.
673 /// </summary>
674 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
675 /// <param name="utf8Buffer">The source buffer.</param>
676 /// <param name="utf8Length">The number of bytes to read from the source.</param>
677 /// <param name="utf16Buffer">The destination buffer.</param>
678 /// <param name="utf16Length">Outputs the number of chars written to the destination.</param>
679 /// <param name="utf16Capacity">The size in chars of the destination buffer.</param>
680 /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, <see cref="ConversionError.Overflow"/>.</returns>
681 public static ConversionError Utf8ToUtf16(byte* utf8Buffer, int utf8Length, char* utf16Buffer, out int utf16Length, int utf16Capacity)
682 {
683 utf16Length = 0;
684 for (var utf8Offset
685 = 0; utf8Offset < utf8Length;)
686 {
687 Utf8ToUcs(out var ucs, utf8Buffer, ref utf8Offset, utf8Length);
688 if (UcsToUtf16(utf16Buffer, ref utf16Length, utf16Capacity, ucs) == ConversionError.Overflow)
689 return ConversionError.Overflow;
690 }
691 return ConversionError.None;
692 }
693
694 static int CountRunes(byte* utf8Buffer, int utf8Length, int maxRunes = int.MaxValue)
695 {
696 var numRunes = 0;
697
698 for (var i = 0; numRunes < maxRunes && i < utf8Length; ++i)
699 {
700 if ((utf8Buffer[i] & 0xC0) != 0x80)
701 numRunes++;
702 }
703
704 return numRunes;
705 }
706 }
707}