Library/PackageCache/com.unity.collections/Unity.Collections/Unicode.cs at master · tacstudios.tngl.sh/AloneGame

A game about forced loneliness, made by TACStudios
AloneGame / Library / PackageCache / com.unity.collections / Unity.Collections / Unicode.cs
at master 707 lines 28 kB view raw
  1using System;
  2using Unity.Collections.LowLevel.Unsafe;
  3
  4namespace Unity.Collections
  5{
  6    /// <summary>
  7    /// Kinds of format errors.
  8    /// </summary>
  9    public enum FormatError
 10    {
 11        /// <summary>
 12        /// No error.
 13        /// </summary>
 14        None,
 15
 16        /// <summary>
 17        /// The target storage does not have sufficient capacity.
 18        /// Note that the format's write failed. It did not truncate.
 19        /// </summary>
 20        Overflow,
 21
 22        /// <summary>
 23        /// The source format specifier is not itself correctly formatted, or
 24        /// a format specifier tokens were found outside of accepted usage.
 25        /// Note that the format's write failed.
 26        /// </summary>
 27        BadFormatSpecifier,
 28    }
 29
 30    /// <summary>
 31    /// Kinds of parse errors.
 32    /// </summary>
 33    public enum ParseError
 34    {
 35        /// <summary>
 36        /// No parse error.
 37        /// </summary>
 38        None,
 39
 40        /// <summary>
 41        /// The text parsed does not form a number.
 42        /// </summary>
 43        Syntax,
 44
 45        /// <summary>
 46        /// The number exceeds the range of the target type.
 47        /// The number was either truncated, or failed to write entirely.
 48        /// </summary>
 49        Overflow,
 50
 51        /// <summary>
 52        /// The number exceeds the precision of the target type.
 53        /// </summary>
 54        Underflow,
 55    }
 56
 57    /// <summary>
 58    /// Kinds of copy errors.
 59    /// </summary>
 60    public enum CopyError
 61    {
 62        /// <summary>
 63        /// No copy error.
 64        /// </summary>
 65        None,
 66
 67        /// <summary>
 68        /// The target storage does not have sufficient capacity.
 69        /// Unless stated in the API comment, assume that the write operation was partially applied.
 70        /// </summary>
 71        Truncation,
 72    }
 73
 74    /// <summary>
 75    /// Kinds of conversion errors.
 76    /// </summary>
 77    public enum ConversionError
 78    {
 79        /// <summary>
 80        /// No conversion error.
 81        /// </summary>
 82        None,
 83
 84        /// <summary>
 85        /// The target storage does not have sufficient capacity.
 86        /// For copy operations; the value was either truncated into the target storage, or failed to write entirely.
 87        /// </summary>
 88        Overflow,
 89
 90        /// <summary>
 91        /// The bytes do not form a valid character.
 92        /// </summary>
 93        Encoding,
 94
 95        /// <summary>
 96        /// The rune is not a valid code point.
 97        /// </summary>
 98        CodePoint,
 99    }
100
101    /// <summary>
102    /// Provides utility methods for UTF-8, UTF-16, UCS-4 (a.k.a. UTF-32), and WTF-8.
103    /// </summary>
104    [GenerateTestsForBurstCompatibility]
105    public unsafe struct Unicode
106    {
107        /// <summary>
108        /// Representation of a Unicode character as a code point.
109        /// </summary>
110        [GenerateTestsForBurstCompatibility]
111        public struct Rune
112        {
113            /// <summary>
114            /// The code point.
115            /// </summary>
116            /// <value>The code point.</value>
117            public int value;
118
119            /// <summary>
120            /// Initializes and returns an instance of Rune.
121            /// </summary>
122            /// <remarks>You are responsible for the code point being valid.</remarks>
123            /// <param name="codepoint">The code point.</param>
124            public Rune(int codepoint)
125            {
126                value = codepoint;
127            }
128
129            /// <summary>
130            /// Returns a rune.
131            /// </summary>
132            /// <remarks>Because a char is 16-bit, it can only represent the first 2^16 code points, not all 1.1 million.</remarks>
133            /// <param name="codepoint">A code point.</param>
134            /// <returns>A rune.</returns>
135            public static implicit operator Rune(char codepoint) => new Rune { value = codepoint };
136
137            /// <summary>
138            /// Evaluates if one is equal to the other.
139            /// </summary>
140            /// <param name="lhs">The left-hand side</param>
141            /// <param name="rhs">The right-hand side</param>
142            /// <returns>True if the left-hand side's is equal to the right-hand side's.</returns>
143            public static bool operator ==(Rune lhs, Rune rhs)
144            {
145                return lhs.value == rhs.value;
146            }
147
148            /// <summary>
149            /// Returns true if the value stored in this Rune is equal to an object.
150            /// </summary>
151            /// <remarks>Can only be equal if the object is itself a Rune.</remarks>
152            /// <param name="obj">An object to compare with.</param>
153            /// <returns>True if the value stored in this Rune is equal to the object.</returns>
154            [ExcludeFromBurstCompatTesting("Takes managed object")]
155            public override bool Equals(object obj)
156            {
157                if (obj is Rune)
158                {
159                    return value == ((Rune)obj).value;
160                }
161
162                return false;
163            }
164
165            /// <summary>
166            /// A hash used for comparisons.
167            /// </summary>
168            /// <returns>A unique hash code.</returns>
169            public override int GetHashCode()
170            {
171                return value;
172            }
173
174            /// <summary>
175            /// Evaluates if one is not equal to the other.
176            /// </summary>
177            /// <param name="lhs">The left-hand side</param>
178            /// <param name="rhs">The right-hand side</param>
179            /// <returns>True if the left-hand side's is not equal to the right-hand side's.</returns>
180            public static bool operator !=(Rune lhs, Rune rhs)
181            {
182                return lhs.value != rhs.value;
183            }
184
185            /// <summary>
186            /// Returns true if a rune is a numerical digit character.
187            /// </summary>
188            /// <param name="r">The rune.</param>
189            /// <returns>True if the rune is a numerical digit character.</returns>
190            public static bool IsDigit(Rune r)
191            {
192                return r.IsDigit();
193            }
194
195            internal bool IsAscii()
196            {
197                return value < 0x80;
198            }
199
200            internal bool IsLatin1()
201            {
202                return value < 0x100;
203            }
204
205            internal bool IsDigit()
206            {
207                return value >= '0' && value <= '9';
208            }
209
210            internal bool IsWhiteSpace()
211            {
212                // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
213
214                if (IsLatin1())
215                {
216                    return value == ' '
217                        || (value >= 0x9 && value <= 0xD) // CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B), FORM FEED (U+000C), CARRIAGE RETURN (U+000D)
218                        || value == 0xA0 // NO-BREAK SPACE
219                        || value == 0x85 // NEXT LINE
220                        ;
221                }
222
223                return value == 0x1680 // OGHAM SPACE MARK
224                    || (value >= 0x2000 && value <= 0x200A) // EN QUAD(U+2000)
225                                                            // EM QUAD(U+2001)
226                                                            // EN SPACE(U+2002)
227                                                            // EM SPACE(U+2003)
228                                                            // THREE - PER - EM SPACE(U + 2004)
229                                                            // FOUR - PER - EM SPACE(U + 2005)
230                                                            // SIX - PER - EM SPACE(U + 2006)
231                                                            // FIGURE SPACE(U+2007)
232                                                            // PUNCTUATION SPACE(U+2008)
233                                                            // THIN SPACE(U+2009)
234                                                            // HAIR SPACE(U+200A)
235                    || value == 0x2028 // LINE SEPARATOR
236                    || value == 0x2029 // PARAGRAPH SEPARATOR
237                    || value == 0x202F // NARROW NO-BREAK SPACE
238                    || value == 0x205F // MEDIUM MATHEMATICAL SPACE
239                    || value == 0x3000 // IDEOGRAPHIC SPACE
240                    ;
241            }
242
243            internal Rune ToLowerAscii()
244            {
245                return new Rune(value + (((uint)(value - 'A') <= ('Z' - 'A')) ? 0x20 : 0));
246            }
247
248            internal Rune ToUpperAscii()
249            {
250                return new Rune(value - (((uint)(value - 'a') <= ('z' - 'a')) ? 0x20 : 0));
251            }
252
253            /// <summary>
254            /// Returns the number of bytes required to encode this rune as UTF-8.
255            /// </summary>
256            /// <returns>The number of bytes required to encode this rune as UTF-8. If the rune's codepoint
257            /// is invalid, returns 4 (the maximum possible encoding length).</returns>
258            public int LengthInUtf8Bytes()
259            {
260                if (value < 0)
261                    return 4; // invalid codepoint
262                if (value <= 0x7F)
263                    return 1;
264                if (value <= 0x7FF)
265                    return 2;
266                if (value <= 0xFFFF)
267                    return 3;
268                if (value <= 0x1FFFFF)
269                    return 4;
270                // invalid codepoint, max size.
271                return 4;
272            }
273        }
274
275        /// <summary>The maximum value of a valid UNICODE code point</summary>
276        public const int kMaximumValidCodePoint = 0x10FFFF;
277
278        /// <summary>
279        /// Returns true if a code point is valid.
280        /// </summary>
281        /// <param name="codepoint">A code point.</param>
282        /// <returns>True if a code point is valid.</returns>
283        public static bool IsValidCodePoint(int codepoint)
284        {
285            if (codepoint > kMaximumValidCodePoint) // maximum valid code point
286                return false;
287//            if (codepoint >= 0xD800 && codepoint <= 0xDFFF) // surrogate pair
288//                return false;
289            if (codepoint < 0) // negative?
290                return false;
291            return true;
292        }
293
294        /// <summary>
295        /// Returns true if the byte is not the last byte of a UTF-8 character.
296        /// </summary>
297        /// <param name="b">The byte.</param>
298        /// <returns>True if the byte is not the last byte of a UTF-8 character.</returns>
299        public static bool NotTrailer(byte b)
300        {
301            return (b & 0xC0) != 0x80;
302        }
303
304        /// <summary>
305        /// The Unicode character �.
306        /// </summary>
307        /// <remarks>This character is used to stand-in for characters that can't be rendered.</remarks>
308        /// <value>The Unicode character �.</value>
309        public static Rune ReplacementCharacter => new Rune { value = 0xFFFD };
310
311        /// <summary>
312        /// The null rune value.
313        /// </summary>
314        /// <remarks>In this package, the "bad rune" is used as a null character. It represents no valid code point.</remarks>
315        /// <value>The null rune value.</value>
316        public static Rune BadRune => new Rune { value = 0 };
317
318        /// <summary>
319        /// Reads a UTF-8 encoded character from a buffer.
320        /// </summary>
321        /// <param name="rune">Outputs the character read. If the read fails, outputs <see cref="ReplacementCharacter"/>.</param>
322        /// <param name="buffer">The buffer of bytes to read.</param>
323        /// <param name="index">Reference to a byte index into the buffer. If the read succeeds, index is incremented by the
324        /// size in bytes of the character read. If the read fails, index is incremented by 1.</param>
325        /// <param name="capacity">The size in bytes of the buffer. Used to check that the read is in bounds.</param>
326        /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/> or <see cref="ConversionError.Encoding"/>.</returns>
327        public static ConversionError Utf8ToUcs(out Rune rune, byte* buffer, ref int index, int capacity)
328        {
329            int code = 0;
330            rune = ReplacementCharacter;
331            if (index + 1 > capacity)
332            {
333                return ConversionError.Overflow;
334            }
335
336            if ((buffer[index] & 0b10000000) == 0b00000000) // if high bit is 0, 1 byte
337            {
338                rune.value = buffer[index + 0];
339                index += 1;
340                return ConversionError.None;
341            }
342
343            if ((buffer[index] & 0b11100000) == 0b11000000) // if high 3 bits are 110, 2 bytes
344            {
345                if (index + 2 > capacity)
346                {
347                    index += 1;
348                    return ConversionError.Overflow;
349                }
350                code = (buffer[index + 0] & 0b00011111);
351                code = (code << 6) | (buffer[index + 1] & 0b00111111);
352                if (code < (1 << 7) || NotTrailer(buffer[index + 1]))
353                {
354                    index += 1;
355                    return ConversionError.Encoding;
356                }
357                rune.value = code;
358                index += 2;
359                return ConversionError.None;
360            }
361
362            if ((buffer[index] & 0b11110000) == 0b11100000) // if high 4 bits are 1110, 3 bytes
363            {
364                if (index + 3 > capacity)
365                {
366                    index += 1;
367                    return ConversionError.Overflow;
368                }
369                code = (buffer[index + 0] & 0b00001111);
370                code = (code << 6) | (buffer[index + 1] & 0b00111111);
371                code = (code << 6) | (buffer[index + 2] & 0b00111111);
372                if (code < (1 << 11) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]))
373                {
374                    index += 1;
375                    return ConversionError.Encoding;
376                }
377                rune.value = code;
378                index += 3;
379                return ConversionError.None;
380            }
381
382            if ((buffer[index] & 0b11111000) == 0b11110000) // if high 5 bits are 11110, 4 bytes
383            {
384                if (index + 4 > capacity)
385                {
386                    index += 1;
387                    return ConversionError.Overflow;
388                }
389                code = (buffer[index + 0] & 0b00000111);
390                code = (code << 6) | (buffer[index + 1] & 0b00111111);
391                code = (code << 6) | (buffer[index + 2] & 0b00111111);
392                code = (code << 6) | (buffer[index + 3] & 0b00111111);
393                if (code < (1 << 16) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]) || NotTrailer(buffer[index + 3]))
394                {
395                    index += 1;
396                    return ConversionError.Encoding;
397                }
398                rune.value = code;
399                index += 4;
400                return ConversionError.None;
401            }
402
403            index += 1;
404            return ConversionError.Encoding;
405        }
406
407        static int FindUtf8CharStartInReverse(byte* ptr, ref int index)
408        {
409            do
410            {
411                if (index <= 0)
412                {
413                    return 0;
414                }
415
416                --index;
417
418            } while ((ptr[index] & 0xC0) == 0x80);
419
420            return index;
421        }
422
423        internal static ConversionError Utf8ToUcsReverse(out Rune rune, byte* buffer, ref int index, int capacity)
424        {
425            var prev = index;
426            --index;
427
428            index = FindUtf8CharStartInReverse(buffer, ref index);
429
430            if (index == prev)
431            {
432                rune = ReplacementCharacter;
433                return ConversionError.Overflow;
434            }
435
436            var ignore = index;
437            return Utf8ToUcs(out rune, buffer, ref ignore, capacity);
438        }
439
440        /// <summary>
441        /// Returns true if a char is a Unicode leading surrogate.
442        /// </summary>
443        /// <param name="c">The char.</param>
444        /// <returns>True if the char is a Unicode leading surrogate.</returns>
445        static bool IsLeadingSurrogate(char c)
446        {
447            return c >= 0xD800 && c <= 0xDBFF;
448        }
449
450        /// <summary>
451        /// Returns true if a char is a Unicode trailing surrogate.
452        /// </summary>
453        /// <param name="c">The char.</param>
454        /// <returns>True if the char is a Unicode trailing surrogate.</returns>
455        static bool IsTrailingSurrogate(char c)
456        {
457            return c >= 0xDC00 && c <= 0xDFFF;
458        }
459
460        /// <summary>
461        /// Reads a UTF-16 encoded character from a buffer.
462        /// </summary>
463        /// <param name="rune">Outputs the character read. If the read fails, rune is not set.</param>
464        /// <param name="buffer">The buffer of chars to read.</param>
465        /// <param name="index">Reference to a char index into the buffer. If the read succeeds, index is incremented by the
466        /// size in chars of the character read. If the read fails, index is not incremented.</param>
467        /// <param name="capacity">The size in chars of the buffer. Used to check that the read is in bounds.</param>
468        /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
469        public static ConversionError Utf16ToUcs(out Rune rune, char* buffer, ref int index, int capacity)
470        {
471            int code = 0;
472            rune = ReplacementCharacter;
473            if (index + 1 > capacity)
474                return ConversionError.Overflow;
475            if (!IsLeadingSurrogate(buffer[index]) || (index + 2 > capacity))
476            {
477                rune.value = buffer[index];
478                index += 1;
479                return ConversionError.None;
480            }
481            code =                (buffer[index + 0] & 0x03FF);
482            char next = buffer[index + 1];
483            if (!IsTrailingSurrogate(next))
484            {
485                rune.value = buffer[index];
486                index += 1;
487                return ConversionError.None;
488            }
489            code = (code << 10) | (buffer[index + 1] & 0x03FF);
490            code += 0x10000;
491            rune.value = code;
492            index += 2;
493            return ConversionError.None;
494        }
495
496        internal static ConversionError UcsToUcs(out Rune rune, Rune* buffer, ref int index, int capacity)
497        {
498            rune = ReplacementCharacter;
499            if (index + 1 > capacity)
500                return ConversionError.Overflow;
501            rune = buffer[index];
502            index += 1;
503            return ConversionError.None;
504        }
505
506        /// <summary>
507        /// Writes a rune to a buffer as a UTF-8 encoded character.
508        /// </summary>
509        /// <param name="rune">The rune to encode.</param>
510        /// <param name="buffer">The buffer to write to.</param>
511        /// <param name="index">Reference to a byte index into the buffer. If the write succeeds, index is incremented by the
512        /// size in bytes of the character written. If the write fails, index is not incremented.</param>
513        /// <param name="capacity">The size in bytes of the buffer. Used to check that the write is in bounds.</param>
514        /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns>
515        public static ConversionError UcsToUtf8(byte* buffer, ref int index, int capacity, Rune rune)
516        {
517            if (!IsValidCodePoint(rune.value))
518            {
519                return ConversionError.CodePoint;
520            }
521
522            if (index + 1 > capacity)
523            {
524                return ConversionError.Overflow;
525            }
526
527            if (rune.value <= 0x7F)
528            {
529                buffer[index++] = (byte)rune.value;
530                return ConversionError.None;
531            }
532
533            if (rune.value <= 0x7FF)
534            {
535                if (index + 2 > capacity)
536                {
537                    return ConversionError.Overflow;
538                }
539
540                buffer[index++] = (byte)(0xC0 | (rune.value >> 6));
541                buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
542                return ConversionError.None;
543            }
544
545            if (rune.value <= 0xFFFF)
546            {
547                if (index + 3 > capacity)
548                {
549                    return ConversionError.Overflow;
550                }
551
552                buffer[index++] = (byte)(0xE0 | (rune.value >> 12));
553                buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F));
554                buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
555                return ConversionError.None;
556            }
557
558            if (rune.value <= 0x1FFFFF)
559            {
560                if (index + 4 > capacity)
561                {
562                    return ConversionError.Overflow;
563                }
564
565                buffer[index++] = (byte)(0xF0 | (rune.value >> 18));
566                buffer[index++] = (byte)(0x80 | ((rune.value >> 12) & 0x3F));
567                buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F));
568                buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
569                return ConversionError.None;
570            }
571
572            return ConversionError.Encoding;
573        }
574
575        /// <summary>
576        /// Writes a rune to a buffer as a UTF-16 encoded character.
577        /// </summary>
578        /// <param name="rune">The rune to encode.</param>
579        /// <param name="buffer">The buffer of chars to write to.</param>
580        /// <param name="index">Reference to a char index into the buffer. If the write succeeds, index is incremented by the
581        /// size in chars of the character written. If the write fails, index is not incremented.</param>
582        /// <param name="capacity">The size in chars of the buffer. Used to check that the write is in bounds.</param>
583        /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns>
584        public static ConversionError UcsToUtf16(char* buffer, ref int index, int capacity, Rune rune)
585        {
586            if (!IsValidCodePoint(rune.value))
587            {
588                return ConversionError.CodePoint;
589            }
590
591            if (index + 1 > capacity)
592            {
593                return ConversionError.Overflow;
594            }
595
596            if (rune.value >= 0x10000)
597            {
598                if (index + 2 > capacity)
599                {
600                    return ConversionError.Overflow;
601                }
602
603                int code = rune.value - 0x10000;
604                if (code >= (1 << 20))
605                {
606                    return ConversionError.Encoding;
607                }
608
609                buffer[index++] = (char)(0xD800 | (code >> 10));
610                buffer[index++] = (char)(0xDC00 | (code & 0x3FF));
611                return ConversionError.None;
612            }
613
614            buffer[index++] = (char)rune.value;
615            return ConversionError.None;
616        }
617
618        /// <summary>
619        /// Copies UTF-16 characters from one buffer to another buffer as UTF-8.
620        /// </summary>
621        /// <remarks>Assumes the source data is valid UTF-16.</remarks>
622        /// <param name="utf16Buffer">The source buffer.</param>
623        /// <param name="utf16Length">The number of chars to read from the source.</param>
624        /// <param name="utf8Buffer">The destination buffer.</param>
625        /// <param name="utf8Length">Outputs the number of bytes written to the destination.</param>
626        /// <param name="utf8Capacity">The size in bytes of the destination buffer.</param>
627        /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
628        public static ConversionError Utf16ToUtf8(char* utf16Buffer, int utf16Length, byte* utf8Buffer, out int utf8Length, int utf8Capacity)
629        {
630            utf8Length = 0;
631            for (var utf16Offset = 0; utf16Offset < utf16Length;)
632            {
633                Utf16ToUcs(out var ucs, utf16Buffer, ref utf16Offset, utf16Length);
634                if (UcsToUtf8(utf8Buffer, ref utf8Length, utf8Capacity, ucs) == ConversionError.Overflow)
635                    return ConversionError.Overflow;
636            }
637            return ConversionError.None;
638        }
639
640        /// <summary>
641        /// Copies UTF-8 characters from one buffer to another.
642        /// </summary>
643        /// <remarks>Assumes the source data is valid UTF-8.</remarks>
644        /// <param name="srcBuffer">The source buffer.</param>
645        /// <param name="srcLength">The number of bytes to read from the source.</param>
646        /// <param name="destBuffer">The destination buffer.</param>
647        /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
648        /// <param name="destCapacity">The size in bytes of the destination buffer.</param>
649        /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
650        public static ConversionError Utf8ToUtf8(byte* srcBuffer, int srcLength, byte* destBuffer, out int destLength, int destCapacity)
651        {
652            if (destCapacity >= srcLength)
653            {
654                UnsafeUtility.MemCpy(destBuffer, srcBuffer, srcLength);
655                destLength = srcLength;
656                return ConversionError.None;
657            }
658            // TODO even in this case, it's possible to MemCpy all but the last 3 bytes that fit, and then by looking at only
659            // TODO the high bits of the last 3 bytes that fit, decide how many of the 3 to append. but that requires a
660            // TODO little UNICODE presence of mind that nobody has today.
661            destLength = 0;
662            for (var srcOffset = 0; srcOffset < srcLength;)
663            {
664                Utf8ToUcs(out var ucs, srcBuffer, ref srcOffset, srcLength);
665                if (UcsToUtf8(destBuffer, ref destLength, destCapacity, ucs) == ConversionError.Overflow)
666                    return ConversionError.Overflow;
667            }
668            return ConversionError.None;
669        }
670
671        /// <summary>
672        /// Copies UTF-8 characters from one buffer to another as UTF-16.
673        /// </summary>
674        /// <remarks>Assumes the source data is valid UTF-8.</remarks>
675        /// <param name="utf8Buffer">The source buffer.</param>
676        /// <param name="utf8Length">The number of bytes to read from the source.</param>
677        /// <param name="utf16Buffer">The destination buffer.</param>
678        /// <param name="utf16Length">Outputs the number of chars written to the destination.</param>
679        /// <param name="utf16Capacity">The size in chars of the destination buffer.</param>
680        /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, <see cref="ConversionError.Overflow"/>.</returns>
681        public static ConversionError Utf8ToUtf16(byte* utf8Buffer, int utf8Length, char* utf16Buffer, out int utf16Length, int utf16Capacity)
682        {
683            utf16Length = 0;
684            for (var utf8Offset
685                = 0; utf8Offset < utf8Length;)
686            {
687                Utf8ToUcs(out var ucs, utf8Buffer, ref utf8Offset, utf8Length);
688                if (UcsToUtf16(utf16Buffer, ref utf16Length, utf16Capacity, ucs) == ConversionError.Overflow)
689                    return ConversionError.Overflow;
690            }
691            return ConversionError.None;
692        }
693
694        static int CountRunes(byte* utf8Buffer, int utf8Length, int maxRunes = int.MaxValue)
695        {
696            var numRunes = 0;
697
698            for (var i = 0; numRunes < maxRunes && i < utf8Length; ++i)
699            {
700                if ((utf8Buffer[i] & 0xC0) != 0x80)
701                    numRunes++;
702            }
703
704            return numRunes;
705        }
706    }
707}