A game about forced loneliness, made by TACStudios
1using Unity.Collections.LowLevel.Unsafe;
2
3namespace Unity.Collections
4{
5 /// <summary>
6 /// Provides methods for copying and encoding Unicode text.
7 /// </summary>
8 [GenerateTestsForBurstCompatibility]
9 public static unsafe class UTF8ArrayUnsafeUtility
10 {
11
12 /// <summary>
13 /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8.
14 /// </summary>
15 /// <remarks>Assumes the source data is valid UCS-2.</remarks>
16 /// <param name="src">The source buffer for reading UCS-2.</param>
17 /// <param name="srcLength">The number of chars to read from the source.</param>
18 /// <param name="dest">The destination buffer for writing UTF-8.</param>
19 /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
20 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
21 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
22 public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, char *src, int srcLength)
23 {
24 var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out destLength, destUTF8MaxLengthInBytes);
25 if (error == ConversionError.None)
26 return CopyError.None;
27 return CopyError.Truncation;
28 }
29
30 /// <summary>
31 /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8.
32 /// </summary>
33 /// <remarks>Assumes the source data is valid UCS-2.</remarks>
34 /// <param name="src">The source buffer for reading UCS-2.</param>
35 /// <param name="srcLength">The number of chars to read from the source.</param>
36 /// <param name="dest">The destination buffer for writing UTF-8.</param>
37 /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
38 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
39 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
40 public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength)
41 {
42 var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes);
43 destLength = (ushort)temp;
44 if (error == ConversionError.None)
45 return CopyError.None;
46 return CopyError.Truncation;
47 }
48
49 /// <summary>
50 /// Copies a buffer of UCS-8 text.
51 /// </summary>
52 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
53 /// <param name="src">The source buffer.</param>
54 /// <param name="srcLength">The number of chars to read from the source.</param>
55 /// <param name="dest">The destination buffer.</param>
56 /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
57 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
58 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
59 public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, byte *src, int srcLength)
60 {
61 var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes);
62 destLength = temp;
63 if (error == ConversionError.None)
64 return CopyError.None;
65 return CopyError.Truncation;
66 }
67
68 /// <summary>
69 /// Copies a buffer of UCS-8 text.
70 /// </summary>
71 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
72 /// <param name="src">The source buffer.</param>
73 /// <param name="srcLength">The number of chars to read from the source.</param>
74 /// <param name="dest">The destination buffer.</param>
75 /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
76 /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
77 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
78 public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength)
79 {
80 var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes);
81 destLength = (ushort)temp;
82 if (error == ConversionError.None)
83 return CopyError.None;
84 return CopyError.Truncation;
85 }
86
87 /// <summary>
88 /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2.
89 /// </summary>
90 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
91 /// <param name="src">The source buffer for reading UTF-8.</param>
92 /// <param name="srcLength">The number of bytes to read from the source.</param>
93 /// <param name="dest">The destination buffer for writing UCS-2.</param>
94 /// <param name="destLength">Outputs the number of chars written to the destination.</param>
95 /// <param name="destUCS2MaxLengthInChars">The max number of chars that will be written to the destination buffer.</param>
96 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
97 public static CopyError Copy(char *dest, out int destLength, int destUCS2MaxLengthInChars, byte *src, int srcLength)
98 {
99 if (ConversionError.None == Unicode.Utf8ToUtf16(src, srcLength, dest, out destLength, destUCS2MaxLengthInChars))
100 return CopyError.None;
101 return CopyError.Truncation;
102 }
103
104 /// <summary>
105 /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2.
106 /// </summary>
107 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
108 /// <param name="src">The source buffer for reading UTF-8.</param>
109 /// <param name="srcLength">The number of bytes to read from the source.</param>
110 /// <param name="dest">The destination buffer for writing UCS-2.</param>
111 /// <param name="destLength">Outputs the number of chars written to the destination.</param>
112 /// <param name="destUCS2MaxLengthInChars">The max number of chars that will be written to the destination buffer.</param>
113 /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
114 public static CopyError Copy(char *dest, out ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength)
115 {
116 var error = Unicode.Utf8ToUtf16(src, srcLength, dest, out var temp, destUCS2MaxLengthInChars);
117 destLength = (ushort)temp;
118 if (error == ConversionError.None)
119 return CopyError.None;
120 return CopyError.Truncation;
121 }
122
123 /// <summary>
124 /// Appends UTF-8 text to a buffer.
125 /// </summary>
126 /// <remarks>Assumes the source data is valid UTF-8.
127 ///
128 /// No data will be copied if the destination has insufficient capacity for the full append, *i.e.* if `srcLength > (destCapacity - destLength)`.
129 /// </remarks>
130 /// <param name="src">The source buffer.</param>
131 /// <param name="srcLength">The number of bytes to read from the source.</param>
132 /// <param name="dest">The destination buffer.</param>
133 /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the new length *after* the append.</param>
134 /// <param name="destCapacity">The destination buffer capacity in bytes.</param>
135 /// <returns><see cref="FormatError.None"/> if the append fully completes. Otherwise, returns <see cref="FormatError.Overflow"/>.</returns>
136 public static FormatError AppendUTF8Bytes(byte* dest, ref int destLength, int destCapacity, byte* src, int srcLength)
137 {
138 if (destLength + srcLength > destCapacity)
139 return FormatError.Overflow;
140 UnsafeUtility.MemCpy(dest + destLength, src, srcLength);
141 destLength += srcLength;
142 return FormatError.None;
143 }
144
145 /// <summary>
146 /// Appends UTF-8 text to a buffer.
147 /// </summary>
148 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
149 /// <param name="src">The source buffer.</param>
150 /// <param name="srcLength">The number of bytes to read from the source.</param>
151 /// <param name="dest">The destination buffer.</param>
152 /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended.</param>
153 /// <param name="destUTF8MaxLengthInBytes">The destination buffer's length in bytes. Data will not be appended past this length.</param>
154 /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
155 public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength)
156 {
157 var error = Unicode.Utf8ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength);
158 destLength += (ushort)temp;
159 if (error == ConversionError.None)
160 return CopyError.None;
161 return CopyError.Truncation;
162 }
163
164 /// <summary>
165 /// Appends UCS-2 text to a buffer, encoded as UTF-8.
166 /// </summary>
167 /// <remarks>Assumes the source data is valid UCS-2.</remarks>
168 /// <param name="src">The source buffer.</param>
169 /// <param name="srcLength">The number of chars to read from the source.</param>
170 /// <param name="dest">The destination buffer.</param>
171 /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended.</param>
172 /// <param name="destUTF8MaxLengthInBytes">The destination buffer's length in bytes. Data will not be appended past this length.</param>
173 /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
174 public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength)
175 {
176 var error = Unicode.Utf16ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength);
177 destLength += (ushort)temp;
178 if (error == ConversionError.None)
179 return CopyError.None;
180 return CopyError.Truncation;
181 }
182
183 /// <summary>
184 /// Appends UTF-8 text to a buffer, encoded as UCS-2.
185 /// </summary>
186 /// <remarks>Assumes the source data is valid UTF-8.</remarks>
187 /// <param name="src">The source buffer.</param>
188 /// <param name="srcLength">The number of bytes to read from the source.</param>
189 /// <param name="dest">The destination buffer.</param>
190 /// <param name="destLength">Reference to the destination buffer's length in chars *before* the append. Will be assigned the number of chars appended.</param>
191 /// <param name="destUCS2MaxLengthInChars">The destination buffer's length in chars. Data will not be appended past this length.</param>
192 /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
193 public static CopyError Append(char *dest, ref ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength)
194 {
195 var error = Unicode.Utf8ToUtf16(src, srcLength, dest + destLength, out var temp, destUCS2MaxLengthInChars - destLength);
196 destLength += (ushort)temp;
197 if (error == ConversionError.None)
198 return CopyError.None;
199 return CopyError.Truncation;
200 }
201
202 internal struct Comparison
203 {
204 public bool terminates;
205 public int result;
206 public Comparison(Unicode.Rune runeA, ConversionError errorA, Unicode.Rune runeB, ConversionError errorB)
207 {
208 if(errorA != ConversionError.None)
209 runeA.value = 0;
210 if(errorB != ConversionError.None)
211 runeB.value = 0;
212 if(runeA.value != runeB.value)
213 {
214 result = runeA.value - runeB.value;
215 terminates = true;
216 }
217 else
218 {
219 result = 0;
220 terminates = (runeA.value == 0 && runeB.value == 0);
221 }
222 }
223 }
224
225 /// <summary>Compares two UTF-8 buffers for relative equality.</summary>
226 /// <param name="utf8BufferA">The first buffer of UTF-8 text.</param>
227 /// <param name="utf8LengthInBytesA">The length in bytes of the first UTF-8 buffer.</param>
228 /// <param name="utf8BufferB">The second buffer of UTF-8 text.</param>
229 /// <param name="utf8LengthInBytesB">The length in bytes of the second UTF-8 buffer.</param>
230 /// <returns>
231 /// Less than zero if first different code point is less in the first UTF-8 buffer.
232 /// Zero if the strings are identical.
233 /// More than zero if first different code point is less in the second UTF-8 buffer.
234 /// </returns>
235 public static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, byte* utf8BufferB, int utf8LengthInBytesB)
236 {
237 int byteIndexA = 0;
238 int byteIndexB = 0;
239 while(true)
240 {
241 var utf8ErrorA = Unicode.Utf8ToUcs(out var utf8RuneA, utf8BufferA,ref byteIndexA, utf8LengthInBytesA);
242 var utf8ErrorB = Unicode.Utf8ToUcs(out var utf8RuneB, utf8BufferB, ref byteIndexB, utf8LengthInBytesB);
243 var comparison = new Comparison(utf8RuneA, utf8ErrorA, utf8RuneB, utf8ErrorB);
244 if(comparison.terminates)
245 return comparison.result;
246 }
247 }
248
249 internal static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, Unicode.Rune* runeBufferB, int lengthInRunesB)
250 {
251 int charIndexA = 0;
252 int charIndexB = 0;
253 while (true)
254 {
255 var utf16ErrorA = Unicode.Utf8ToUcs(out var utf16RuneA, utf8BufferA, ref charIndexA, utf8LengthInBytesA);
256 var errorB = Unicode.UcsToUcs(out var runeB, runeBufferB, ref charIndexB, lengthInRunesB);
257 var comparison = new Comparison(utf16RuneA, utf16ErrorA, runeB, errorB);
258 if (comparison.terminates)
259 return comparison.result;
260 }
261 }
262
263 /// <summary>Compares two UTF-16 buffers for relative equality.</summary>
264 /// <param name="utf16BufferA">The first buffer of UTF-16 text.</param>
265 /// <param name="utf16LengthInCharsA">The length in chars of the first UTF-16 buffer.</param>
266 /// <param name="utf16BufferB">The second buffer of UTF-16 text.</param>
267 /// <param name="utf16LengthInCharsB">The length in chars of the second UTF-16 buffer.</param>
268 /// <returns>
269 /// Less than zero if first different code point is less in the first UTF-16 buffer.
270 /// Zero if the strings are identical.
271 /// More than zero if first different code point is less in the second UTF-16 buffer.
272 /// </returns>
273 public static int StrCmp(char* utf16BufferA, int utf16LengthInCharsA, char* utf16BufferB, int utf16LengthInCharsB)
274 {
275 int charIndexA = 0;
276 int charIndexB = 0;
277 while(true)
278 {
279 var utf16ErrorA = Unicode.Utf16ToUcs(out var utf16RuneA, utf16BufferA,ref charIndexA, utf16LengthInCharsA);
280 var utf16ErrorB = Unicode.Utf16ToUcs(out var utf16RuneB, utf16BufferB, ref charIndexB, utf16LengthInCharsB);
281 var comparison = new Comparison(utf16RuneA, utf16ErrorA, utf16RuneB, utf16ErrorB);
282 if(comparison.terminates)
283 return comparison.result;
284 }
285 }
286
287 /// <summary>Returns true if two UTF-8 buffers have the same length and content.</summary>
288 /// <param name="aBytes">The first buffer of UTF-8 text.</param>
289 /// <param name="aLength">The length in bytes of the first buffer.</param>
290 /// <param name="bBytes">The second buffer of UTF-8 text.</param>
291 /// <param name="bLength">The length in bytes of the second buffer.</param>
292 /// <returns>True if the content of both strings is identical.</returns>
293 public static bool EqualsUTF8Bytes(byte* aBytes, int aLength, byte* bBytes, int bLength)
294 {
295 return aLength == bLength && StrCmp(aBytes, aLength, bBytes, bLength) == 0;
296 }
297
298 /// <summary>Compares a UTF-8 buffer and a UTF-16 buffer for relative equality.</summary>
299 /// <param name="utf8Buffer">The buffer of UTF-8 text.</param>
300 /// <param name="utf8LengthInBytes">The length in bytes of the UTF-8 buffer.</param>
301 /// <param name="utf16Buffer">The buffer of UTF-16 text.</param>
302 /// <param name="utf16LengthInChars">The length in chars of the UTF-16 buffer.</param>
303 /// <returns>
304 /// Less than zero if first different code point is less in UTF-8 buffer.
305 /// Zero if the strings are identical.
306 /// More than zero if first different code point is less in UTF-16 buffer.
307 /// </returns>
308 public static int StrCmp(byte* utf8Buffer, int utf8LengthInBytes, char* utf16Buffer, int utf16LengthInChars)
309 {
310 int byteIndex = 0;
311 int charIndex = 0;
312 while(true)
313 {
314 var utf8Error = Unicode.Utf8ToUcs(out var utf8Rune, utf8Buffer,ref byteIndex, utf8LengthInBytes);
315 var utf16Error = Unicode.Utf16ToUcs(out var utf16Rune, utf16Buffer, ref charIndex, utf16LengthInChars);
316 var comparison = new Comparison(utf8Rune, utf8Error, utf16Rune, utf16Error);
317 if(comparison.terminates)
318 return comparison.result;
319 }
320 }
321
322 /// <summary>Compares a UTF-16 buffer and a UTF-8 buffer for relative equality.</summary>
323 /// <param name="utf16Buffer">The buffer of UTF-16 text.</param>
324 /// <param name="utf16LengthInChars">The length in chars of the UTF-16 buffer.</param>
325 /// <param name="utf8Buffer">The buffer of UTF-8 text.</param>
326 /// <param name="utf8LengthInBytes">The length in bytes of the UTF-8 buffer.</param>
327 /// <returns>
328 /// Less than zero if first different code point is less in UTF-16 buffer.
329 /// Zero if the strings are identical.
330 /// More than zero if first different code point is less in UTF-8 buffer.
331 /// </returns>
332 public static int StrCmp(char* utf16Buffer, int utf16LengthInChars, byte* utf8Buffer, int utf8LengthInBytes)
333 {
334 return -StrCmp(utf8Buffer, utf8LengthInBytes, utf16Buffer, utf16LengthInChars);
335 }
336
337 }
338}