A game about forced loneliness, made by TACStudios
1#if UNITY_EDITOR || BURST_INTERNAL
2using System;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Runtime.CompilerServices;
6
7namespace Unity.Burst.Editor
8{
9 internal partial class BurstDisassembler
10 {
11 /// <summary>
12 /// Base class for providing extended information of an identifier
13 /// </summary>
14 internal abstract class AsmTokenKindProvider
15 {
16 // Internally using string slice instead of string
17 // to support faster lookup from AsmToken
18 private readonly Dictionary<StringSlice, AsmTokenKind> _tokenKinds;
19 private int _maximumLength;
20
21 protected AsmTokenKindProvider(int capacity)
22 {
23 _tokenKinds = new Dictionary<StringSlice, AsmTokenKind>(capacity);
24 }
25
26 protected void AddTokenKind(string text, AsmTokenKind kind)
27 {
28 _tokenKinds.Add(new StringSlice(text), kind);
29 if (text.Length > _maximumLength) _maximumLength = text.Length;
30 }
31
32 public virtual AsmTokenKind FindTokenKind(StringSlice slice)
33 {
34 return slice.Length <= _maximumLength && _tokenKinds.TryGetValue(slice, out var tokenKind)
35 ? tokenKind
36 : AsmTokenKind.Identifier;
37 }
38
39 public virtual bool AcceptsCharAsIdentifierOrRegisterEnd(char c)
40 {
41 return false;
42 }
43
44 public virtual bool IsInstructionOrRegisterOrIdentifier(char c)
45 {
46 // we include . because we have instructions like `b.le` or `f32.const`
47 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' ||
48 c == '@' || c == '.';
49 }
50
51 /// <summary>
52 /// Checks whether regA == regB. This function assumes the given strings are proper registers.
53 /// </summary>
54 public virtual bool RegisterEqual(string regA, string regB) => regA == regB;
55
56 public abstract SIMDkind SimdKind(StringSlice instruction);
57 }
58
59 /// <summary>
60 /// The ASM tokenizer
61 /// </summary>
62 private struct AsmTokenizer
63 {
64 private readonly string _text;
65 private readonly AsmKind _asmKind;
66 private readonly AsmTokenKindProvider _tokenKindProvider;
67 private int _position;
68 private int _nextPosition;
69 private int _alignedPosition;
70 private int _nextAlignedPosition;
71 private char _c;
72 private readonly char _commentStartChar;
73 private bool _doPad;
74 private int _padding;
75
76 public AsmTokenizer(string text, AsmKind asmKind, AsmTokenKindProvider tokenKindProvider, char commentStart)
77 {
78 _text = text;
79 _asmKind = asmKind;
80 _tokenKindProvider = tokenKindProvider;
81 _position = 0;
82 _nextPosition = 0;
83 _alignedPosition = 0;
84 _nextAlignedPosition = 0;
85 _commentStartChar = commentStart;
86 _doPad = false;
87 _padding = 0;
88 _c = (char)0;
89 NextChar();
90 }
91
92 public bool TryGetNextToken(out AsmToken token)
93 {
94 token = new AsmToken();
95 while (true)
96 {
97 var startPosition = _position;
98 var startAlignedPosition = _alignedPosition;
99
100 if (_c == 0)
101 {
102 return false;
103 }
104
105 if (_c == '.')
106 {
107 token = ParseDirective(startPosition, startAlignedPosition);
108 return true;
109 }
110
111 // Like everywhere else in this file, we are inlining the matching characters instead
112 // of using helper functions, as Mono might not be enough good at inlining by itself
113 if (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@')
114 {
115 token = ParseInstructionOrIdentifierOrRegister(startPosition, startAlignedPosition);
116 PrepareAlignment(token);
117 return true;
118 }
119
120 if (_c >= '0' && _c <= '9' || _c == '-')
121 {
122 token = ParseNumber(startPosition, startAlignedPosition);
123 return true;
124 }
125
126 if (_c == '"')
127 {
128 token = ParseString(startPosition, startAlignedPosition);
129 return true;
130 }
131
132 if (_c == _commentStartChar)
133 {
134 token = ParseComment(startPosition, startAlignedPosition);
135 return true;
136 }
137
138 if (_c == '\r')
139 {
140 if (PreviewChar() == '\n')
141 {
142 NextChar(); // skip \r
143 }
144 token = ParseNewLine(startPosition, startAlignedPosition);
145 return true;
146 }
147
148 if (_c == '\n')
149 {
150 token = ParseNewLine(startPosition, startAlignedPosition);
151 return true;
152 }
153
154 if (_doPad)
155 {
156 _nextAlignedPosition += _padding;
157 _doPad = false;
158 }
159 token = ParseMisc(startPosition, startAlignedPosition);
160 return true;
161 }
162 }
163
164 private void PrepareAlignment(AsmToken token)
165 {
166 var kind = token.Kind;
167 _padding = InstructionAlignment - token.Length;
168 _doPad = _asmKind == AsmKind.Intel
169 && (kind == AsmTokenKind.Instruction
170 || kind == AsmTokenKind.BranchInstruction
171 || kind == AsmTokenKind.CallInstruction
172 || kind == AsmTokenKind.JumpInstruction
173 || kind == AsmTokenKind.ReturnInstruction
174 || kind == AsmTokenKind.InstructionSIMD)
175 && _c != '\n' && _c != '\r' // If there is no registers behind instruction don't align.
176 && _padding > 0;
177 }
178
179 private AsmToken ParseNewLine(int startPosition, int startAlignedPosition)
180 {
181 var endPosition = _position;
182 NextChar(); // Skip newline
183 return new AsmToken(AsmTokenKind.NewLine, startPosition, startAlignedPosition, endPosition - startPosition + 1);
184 }
185
186 private AsmToken ParseMisc(int startPosition, int startAlignedPosition)
187 {
188 var endPosition = _position;
189 // Parse anything that is not a directive, instruction, number, string or comment
190 while (!((_c == (char)0) || (_c == '\r') || (_c == '\n') || (_c == '.') || (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') || (_c >= '0' && _c <= '9' || _c == '-') || (_c == '"') || (_c == _commentStartChar)))
191 {
192 endPosition = _position;
193 NextChar();
194 }
195 return new AsmToken(AsmTokenKind.Misc, startPosition, startAlignedPosition, endPosition - startPosition + 1);
196 }
197
198 private static readonly string[] DataDirectiveStrings = new[]
199 {
200 AssertDataDirectiveLength(".long"),
201 AssertDataDirectiveLength(".byte"),
202 AssertDataDirectiveLength(".short"),
203 AssertDataDirectiveLength(".ascii"),
204 AssertDataDirectiveLength(".asciz"),
205 };
206
207 private static string AssertDataDirectiveLength(string text)
208 {
209 var length = text.Length;
210 Debug.Assert(length == 5 || length == 6, $"Invalid length {length} for string {text}. Expecting 5 or 6");
211 return text;
212 }
213
214 private AsmToken ParseDirective(int startPosition, int startAlignedPosition)
215 {
216 var endPosition = _position;
217 NextChar(); // skip .
218 bool isLabel = _c == 'L'; // A label starts with a capital `L` like .Lthis_is_a_jump_label
219 while (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c >= '0' && _c <= '9' || _c == '.' || _c == '_' || _c == '@')
220 {
221 endPosition = _position;
222 NextChar();
223 }
224
225 // Refine the kind of directive:
226 //
227 // .Lfunc_begin => FunctionBegin
228 // .Lfunc_end => FunctionEnd
229 // .L????????? => Label
230 // data directive (.byte, .long, .short...) => DataDirective
231 // anything else => Directive
232 const string MatchFunc = ".Lfunc_";
233 const int MatchFuncLength = 7;
234 Debug.Assert(MatchFunc.Length == MatchFuncLength);
235 var kind = isLabel ? AsmTokenKind.Label : AsmTokenKind.Directive;
236 // Fast early check
237 if (isLabel && string.CompareOrdinal(_text, startPosition, MatchFunc, 0, MatchFuncLength) == 0)
238 {
239 if (string.CompareOrdinal(_text, startPosition, ".Lfunc_begin", 0, ".Lfunc_begin".Length) == 0)
240 {
241 kind = AsmTokenKind.FunctionBegin;
242 }
243 else if (string.CompareOrdinal(_text, startPosition, ".Lfunc_end", 0, ".Lfunc_end".Length) == 0)
244 {
245 kind = AsmTokenKind.FunctionEnd;
246 }
247 }
248
249 // Adjust directive to mark data directives, source location directives...etc.
250 int length = endPosition - startPosition + 1;
251
252 // Use length to early exit
253 if (!isLabel && length >= 4 && length <= 8)
254 {
255 if ((length == 5 || length == 6))
256 {
257 foreach (var dataDirectiveStr in DataDirectiveStrings)
258 {
259 if (string.CompareOrdinal(_text, startPosition, dataDirectiveStr, 0, dataDirectiveStr.Length) == 0)
260 {
261 kind = AsmTokenKind.DataDirective;
262 break;
263 }
264 }
265
266 // .file => SourceFile
267 if (kind == AsmTokenKind.Directive && string.CompareOrdinal(_text, startPosition, ".file", 0, 5) == 0)
268 {
269 kind = AsmTokenKind.SourceFile;
270 }
271 }
272 // .loc => SourceLocation
273 // .cv_loc => SourceLocation
274 else if ((length == 4 && string.CompareOrdinal(_text, startPosition, ".loc", 0, 4) == 0) ||
275 (length == 7 && string.CompareOrdinal(_text, startPosition, ".cv_loc", 0, 7) == 0))
276 {
277 kind = AsmTokenKind.SourceLocation;
278 }
279 // .file .cv_file => SourceFile
280 else if (length == 8 && string.CompareOrdinal(_text, startPosition, ".cv_file", 0, 8) == 0)
281 {
282 kind = AsmTokenKind.SourceFile;
283 }
284 }
285
286 return new AsmToken(kind, startPosition, startAlignedPosition, length);
287 }
288
289 private AsmToken ParseInstructionOrIdentifierOrRegister(int startPosition, int startAlignedPosition)
290 {
291 var endPosition = _position;
292 while (_tokenKindProvider.IsInstructionOrRegisterOrIdentifier(_c))
293 {
294 endPosition = _position;
295 NextChar();
296 }
297
298 if (_tokenKindProvider.AcceptsCharAsIdentifierOrRegisterEnd(_c))
299 {
300 endPosition = _position;
301 NextChar();
302 }
303
304 // Resolve token kind for identifier
305 int length = endPosition - startPosition + 1;
306 var tokenKind = _tokenKindProvider.FindTokenKind(new StringSlice(_text, startPosition, length));
307
308 if (tokenKind == AsmTokenKind.Identifier)
309 {
310 // If we have `:` right after an identifier, change from identifier to label declaration to help the semantic pass later
311 if (_c == ':')
312 {
313 tokenKind = AsmTokenKind.Label;
314 }
315 }
316
317 return new AsmToken(tokenKind, startPosition, startAlignedPosition, endPosition - startPosition + 1);
318 }
319
320 private AsmToken ParseNumber(int startPosition, int startAlignedPostion)
321 {
322 var endPosition = _position;
323 if (_c == '-')
324 {
325 NextChar();
326 }
327 while (_c >= '0' && _c <= '9' || _c >= 'a' && _c <= 'f' || _c >= 'A' && _c <= 'F' || _c == 'x' || _c == '.')
328 {
329 endPosition = _position;
330 NextChar();
331 }
332
333 // If we have `:` right after a number, change from number to label declaration to help the semantic pass later
334 var numberKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.Number;
335 return new AsmToken(numberKind, startPosition, startAlignedPostion, endPosition - startPosition + 1);
336 }
337 private AsmToken ParseString(int startPosition, int startAlignedPostion)
338 {
339 var endPosition = _position;
340 // Skip first "
341 NextChar();
342 while (_c != (char)0 && _c != '"')
343 {
344 // Skip escape \"
345 if (_c == '\\' && PreviewChar() == '"')
346 {
347 NextChar();
348 }
349 endPosition = _position;
350 NextChar();
351 }
352
353 endPosition = _position;
354 NextChar(); // Skip trailing 0
355
356 // If we have `:` right after a string, change from string to label declaration to help the semantic pass later
357 var stringKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.String;
358 return new AsmToken(stringKind, startPosition, startAlignedPostion, endPosition - startPosition + 1);
359 }
360
361 private AsmToken ParseComment(int startPosition, int startAlignedPosition)
362 {
363 var endPosition = _position;
364 while (_c != (char)0 && (_c != '\n' && _c != '\r'))
365 {
366 endPosition = _position;
367 NextChar();
368 }
369
370 return new AsmToken(AsmTokenKind.Comment, startPosition, startAlignedPosition, endPosition - startPosition + 1);
371 }
372
373 [MethodImpl(MethodImplOptions.AggressiveInlining)]
374 private void NextChar()
375 {
376 if (_nextPosition < _text.Length)
377 {
378 _position = _nextPosition;
379 _c = _text[_position];
380 _nextPosition = _position + 1;
381
382 _alignedPosition = _nextAlignedPosition;
383 _nextAlignedPosition = _alignedPosition + 1;
384 }
385 else
386 {
387 _c = (char)0;
388 }
389 }
390
391 private char PreviewChar()
392 {
393 return _nextPosition < _text.Length ? _text[_nextPosition] : (char)0;
394 }
395
396 }
397
398 public enum SIMDkind
399 {
400 Packed,
401 Scalar,
402 Infrastructure,
403 }
404
405
406 /// <summary>
407 /// An ASM token. The token doesn't contain the string of the token, but provides method <see cref="Slice"/> and <see cref="ToString"/> to extract it.
408 /// </summary>
409 internal readonly struct AsmToken
410 {
411 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412 // CAUTION: It is important to not put *any managed objects*
413 // into this struct for GC efficiency
414 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
415
416 public AsmToken(AsmTokenKind kind, int position, int alignedPosition, int length)
417 {
418 Kind = kind;
419 Position = position;
420 AlignedPosition = alignedPosition;
421 Length = length;
422 }
423
424 public readonly AsmTokenKind Kind;
425
426 public readonly int Position;
427
428 public readonly int AlignedPosition;
429
430 public readonly int Length;
431
432 public StringSlice Slice(string text) => new StringSlice(text, Position, Length);
433
434 public string ToString(string text) => text.Substring(Position, Length);
435
436 public string ToFriendlyText(string text)
437 {
438 return $"{text.Substring(Position, Length)} : {Kind}";
439 }
440 }
441
442 /// <summary>
443 /// Kind of an ASM token.
444 /// </summary>
445 internal enum AsmTokenKind
446 {
447 Eof,
448 Directive,
449 DataDirective,
450 SourceFile,
451 SourceLocation,
452 Label,
453 FunctionBegin,
454 FunctionEnd,
455 Identifier,
456 Qualifier,
457 Instruction,
458 CallInstruction,
459 BranchInstruction,
460 JumpInstruction,
461 ReturnInstruction,
462 InstructionSIMD,
463 Register,
464 Number,
465 String,
466 Comment,
467 NewLine,
468 Misc
469 }
470 }
471 /// <summary>
472 /// A slice of a string from an original string.
473 /// </summary>
474 internal readonly struct StringSlice : IEquatable<StringSlice>
475 {
476 private readonly string _text;
477
478 public readonly int Position;
479
480 public readonly int Length;
481
482 public StringSlice(string text)
483 {
484 _text = text ?? throw new ArgumentNullException(nameof(text));
485 Position = 0;
486 Length = text.Length;
487 }
488
489 public StringSlice(string text, int position, int length)
490 {
491 _text = text ?? throw new ArgumentNullException(nameof(text));
492 Position = position;
493 Length = length;
494 }
495
496 public char this[int index] => _text[Position + index];
497
498 public bool Equals(StringSlice other)
499 {
500 if (Length != other.Length) return false;
501
502 for (int i = 0; i < Length; i++)
503 {
504 if (this[i] != other[i])
505 {
506 return false;
507 }
508 }
509 return true;
510 }
511
512 public override bool Equals(object obj)
513 {
514 return obj is StringSlice other && Equals(other);
515 }
516
517 public override int GetHashCode()
518 {
519 unchecked
520 {
521 var hashCode = Length;
522 for (int i = 0; i < Length; i++)
523 {
524 hashCode = (hashCode * 397) ^ this[i];
525 }
526 return hashCode;
527 }
528 }
529
530 public static bool operator ==(StringSlice left, StringSlice right)
531 {
532 return left.Equals(right);
533 }
534
535 public static bool operator !=(StringSlice left, StringSlice right)
536 {
537 return !left.Equals(right);
538 }
539
540 public override string ToString()
541 {
542 return _text.Substring(Position, Length);
543 }
544
545 public bool StartsWith(string text)
546 {
547 if (text == null) throw new ArgumentNullException(nameof(text));
548 if (Length < text.Length) return false;
549 for (var i = 0; i < text.Length; i++)
550 {
551 var c = text[i];
552 if (_text[Position + i] != c) return false;
553 }
554 return true;
555 }
556
557 public bool Contains(char c)
558 {
559 int start = Position;
560 int end = Math.Min(Position + Length, _text.Length);
561 for (int i = start; i < end; i++)
562 {
563 if (_text[i] == c) { return true; }
564 }
565 return false;
566 }
567
568 public int IndexOf(char c)
569 {
570 for (var i = 0; i < Length; i++)
571 {
572 if (_text[Position + i] == c)
573 {
574 return i;
575 }
576 }
577
578 return -1;
579 }
580 }
581}
582
583#endif