A game about forced loneliness, made by TACStudios
at master 583 lines 22 kB view raw
1#if UNITY_EDITOR || BURST_INTERNAL 2using System; 3using System.Collections.Generic; 4using System.Diagnostics; 5using System.Runtime.CompilerServices; 6 7namespace Unity.Burst.Editor 8{ 9 internal partial class BurstDisassembler 10 { 11 /// <summary> 12 /// Base class for providing extended information of an identifier 13 /// </summary> 14 internal abstract class AsmTokenKindProvider 15 { 16 // Internally using string slice instead of string 17 // to support faster lookup from AsmToken 18 private readonly Dictionary<StringSlice, AsmTokenKind> _tokenKinds; 19 private int _maximumLength; 20 21 protected AsmTokenKindProvider(int capacity) 22 { 23 _tokenKinds = new Dictionary<StringSlice, AsmTokenKind>(capacity); 24 } 25 26 protected void AddTokenKind(string text, AsmTokenKind kind) 27 { 28 _tokenKinds.Add(new StringSlice(text), kind); 29 if (text.Length > _maximumLength) _maximumLength = text.Length; 30 } 31 32 public virtual AsmTokenKind FindTokenKind(StringSlice slice) 33 { 34 return slice.Length <= _maximumLength && _tokenKinds.TryGetValue(slice, out var tokenKind) 35 ? tokenKind 36 : AsmTokenKind.Identifier; 37 } 38 39 public virtual bool AcceptsCharAsIdentifierOrRegisterEnd(char c) 40 { 41 return false; 42 } 43 44 public virtual bool IsInstructionOrRegisterOrIdentifier(char c) 45 { 46 // we include . because we have instructions like `b.le` or `f32.const` 47 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' || 48 c == '@' || c == '.'; 49 } 50 51 /// <summary> 52 /// Checks whether regA == regB. This function assumes the given strings are proper registers. 53 /// </summary> 54 public virtual bool RegisterEqual(string regA, string regB) => regA == regB; 55 56 public abstract SIMDkind SimdKind(StringSlice instruction); 57 } 58 59 /// <summary> 60 /// The ASM tokenizer 61 /// </summary> 62 private struct AsmTokenizer 63 { 64 private readonly string _text; 65 private readonly AsmKind _asmKind; 66 private readonly AsmTokenKindProvider _tokenKindProvider; 67 private int _position; 68 private int _nextPosition; 69 private int _alignedPosition; 70 private int _nextAlignedPosition; 71 private char _c; 72 private readonly char _commentStartChar; 73 private bool _doPad; 74 private int _padding; 75 76 public AsmTokenizer(string text, AsmKind asmKind, AsmTokenKindProvider tokenKindProvider, char commentStart) 77 { 78 _text = text; 79 _asmKind = asmKind; 80 _tokenKindProvider = tokenKindProvider; 81 _position = 0; 82 _nextPosition = 0; 83 _alignedPosition = 0; 84 _nextAlignedPosition = 0; 85 _commentStartChar = commentStart; 86 _doPad = false; 87 _padding = 0; 88 _c = (char)0; 89 NextChar(); 90 } 91 92 public bool TryGetNextToken(out AsmToken token) 93 { 94 token = new AsmToken(); 95 while (true) 96 { 97 var startPosition = _position; 98 var startAlignedPosition = _alignedPosition; 99 100 if (_c == 0) 101 { 102 return false; 103 } 104 105 if (_c == '.') 106 { 107 token = ParseDirective(startPosition, startAlignedPosition); 108 return true; 109 } 110 111 // Like everywhere else in this file, we are inlining the matching characters instead 112 // of using helper functions, as Mono might not be enough good at inlining by itself 113 if (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') 114 { 115 token = ParseInstructionOrIdentifierOrRegister(startPosition, startAlignedPosition); 116 PrepareAlignment(token); 117 return true; 118 } 119 120 if (_c >= '0' && _c <= '9' || _c == '-') 121 { 122 token = ParseNumber(startPosition, startAlignedPosition); 123 return true; 124 } 125 126 if (_c == '"') 127 { 128 token = ParseString(startPosition, startAlignedPosition); 129 return true; 130 } 131 132 if (_c == _commentStartChar) 133 { 134 token = ParseComment(startPosition, startAlignedPosition); 135 return true; 136 } 137 138 if (_c == '\r') 139 { 140 if (PreviewChar() == '\n') 141 { 142 NextChar(); // skip \r 143 } 144 token = ParseNewLine(startPosition, startAlignedPosition); 145 return true; 146 } 147 148 if (_c == '\n') 149 { 150 token = ParseNewLine(startPosition, startAlignedPosition); 151 return true; 152 } 153 154 if (_doPad) 155 { 156 _nextAlignedPosition += _padding; 157 _doPad = false; 158 } 159 token = ParseMisc(startPosition, startAlignedPosition); 160 return true; 161 } 162 } 163 164 private void PrepareAlignment(AsmToken token) 165 { 166 var kind = token.Kind; 167 _padding = InstructionAlignment - token.Length; 168 _doPad = _asmKind == AsmKind.Intel 169 && (kind == AsmTokenKind.Instruction 170 || kind == AsmTokenKind.BranchInstruction 171 || kind == AsmTokenKind.CallInstruction 172 || kind == AsmTokenKind.JumpInstruction 173 || kind == AsmTokenKind.ReturnInstruction 174 || kind == AsmTokenKind.InstructionSIMD) 175 && _c != '\n' && _c != '\r' // If there is no registers behind instruction don't align. 176 && _padding > 0; 177 } 178 179 private AsmToken ParseNewLine(int startPosition, int startAlignedPosition) 180 { 181 var endPosition = _position; 182 NextChar(); // Skip newline 183 return new AsmToken(AsmTokenKind.NewLine, startPosition, startAlignedPosition, endPosition - startPosition + 1); 184 } 185 186 private AsmToken ParseMisc(int startPosition, int startAlignedPosition) 187 { 188 var endPosition = _position; 189 // Parse anything that is not a directive, instruction, number, string or comment 190 while (!((_c == (char)0) || (_c == '\r') || (_c == '\n') || (_c == '.') || (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') || (_c >= '0' && _c <= '9' || _c == '-') || (_c == '"') || (_c == _commentStartChar))) 191 { 192 endPosition = _position; 193 NextChar(); 194 } 195 return new AsmToken(AsmTokenKind.Misc, startPosition, startAlignedPosition, endPosition - startPosition + 1); 196 } 197 198 private static readonly string[] DataDirectiveStrings = new[] 199 { 200 AssertDataDirectiveLength(".long"), 201 AssertDataDirectiveLength(".byte"), 202 AssertDataDirectiveLength(".short"), 203 AssertDataDirectiveLength(".ascii"), 204 AssertDataDirectiveLength(".asciz"), 205 }; 206 207 private static string AssertDataDirectiveLength(string text) 208 { 209 var length = text.Length; 210 Debug.Assert(length == 5 || length == 6, $"Invalid length {length} for string {text}. Expecting 5 or 6"); 211 return text; 212 } 213 214 private AsmToken ParseDirective(int startPosition, int startAlignedPosition) 215 { 216 var endPosition = _position; 217 NextChar(); // skip . 218 bool isLabel = _c == 'L'; // A label starts with a capital `L` like .Lthis_is_a_jump_label 219 while (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c >= '0' && _c <= '9' || _c == '.' || _c == '_' || _c == '@') 220 { 221 endPosition = _position; 222 NextChar(); 223 } 224 225 // Refine the kind of directive: 226 // 227 // .Lfunc_begin => FunctionBegin 228 // .Lfunc_end => FunctionEnd 229 // .L????????? => Label 230 // data directive (.byte, .long, .short...) => DataDirective 231 // anything else => Directive 232 const string MatchFunc = ".Lfunc_"; 233 const int MatchFuncLength = 7; 234 Debug.Assert(MatchFunc.Length == MatchFuncLength); 235 var kind = isLabel ? AsmTokenKind.Label : AsmTokenKind.Directive; 236 // Fast early check 237 if (isLabel && string.CompareOrdinal(_text, startPosition, MatchFunc, 0, MatchFuncLength) == 0) 238 { 239 if (string.CompareOrdinal(_text, startPosition, ".Lfunc_begin", 0, ".Lfunc_begin".Length) == 0) 240 { 241 kind = AsmTokenKind.FunctionBegin; 242 } 243 else if (string.CompareOrdinal(_text, startPosition, ".Lfunc_end", 0, ".Lfunc_end".Length) == 0) 244 { 245 kind = AsmTokenKind.FunctionEnd; 246 } 247 } 248 249 // Adjust directive to mark data directives, source location directives...etc. 250 int length = endPosition - startPosition + 1; 251 252 // Use length to early exit 253 if (!isLabel && length >= 4 && length <= 8) 254 { 255 if ((length == 5 || length == 6)) 256 { 257 foreach (var dataDirectiveStr in DataDirectiveStrings) 258 { 259 if (string.CompareOrdinal(_text, startPosition, dataDirectiveStr, 0, dataDirectiveStr.Length) == 0) 260 { 261 kind = AsmTokenKind.DataDirective; 262 break; 263 } 264 } 265 266 // .file => SourceFile 267 if (kind == AsmTokenKind.Directive && string.CompareOrdinal(_text, startPosition, ".file", 0, 5) == 0) 268 { 269 kind = AsmTokenKind.SourceFile; 270 } 271 } 272 // .loc => SourceLocation 273 // .cv_loc => SourceLocation 274 else if ((length == 4 && string.CompareOrdinal(_text, startPosition, ".loc", 0, 4) == 0) || 275 (length == 7 && string.CompareOrdinal(_text, startPosition, ".cv_loc", 0, 7) == 0)) 276 { 277 kind = AsmTokenKind.SourceLocation; 278 } 279 // .file .cv_file => SourceFile 280 else if (length == 8 && string.CompareOrdinal(_text, startPosition, ".cv_file", 0, 8) == 0) 281 { 282 kind = AsmTokenKind.SourceFile; 283 } 284 } 285 286 return new AsmToken(kind, startPosition, startAlignedPosition, length); 287 } 288 289 private AsmToken ParseInstructionOrIdentifierOrRegister(int startPosition, int startAlignedPosition) 290 { 291 var endPosition = _position; 292 while (_tokenKindProvider.IsInstructionOrRegisterOrIdentifier(_c)) 293 { 294 endPosition = _position; 295 NextChar(); 296 } 297 298 if (_tokenKindProvider.AcceptsCharAsIdentifierOrRegisterEnd(_c)) 299 { 300 endPosition = _position; 301 NextChar(); 302 } 303 304 // Resolve token kind for identifier 305 int length = endPosition - startPosition + 1; 306 var tokenKind = _tokenKindProvider.FindTokenKind(new StringSlice(_text, startPosition, length)); 307 308 if (tokenKind == AsmTokenKind.Identifier) 309 { 310 // If we have `:` right after an identifier, change from identifier to label declaration to help the semantic pass later 311 if (_c == ':') 312 { 313 tokenKind = AsmTokenKind.Label; 314 } 315 } 316 317 return new AsmToken(tokenKind, startPosition, startAlignedPosition, endPosition - startPosition + 1); 318 } 319 320 private AsmToken ParseNumber(int startPosition, int startAlignedPostion) 321 { 322 var endPosition = _position; 323 if (_c == '-') 324 { 325 NextChar(); 326 } 327 while (_c >= '0' && _c <= '9' || _c >= 'a' && _c <= 'f' || _c >= 'A' && _c <= 'F' || _c == 'x' || _c == '.') 328 { 329 endPosition = _position; 330 NextChar(); 331 } 332 333 // If we have `:` right after a number, change from number to label declaration to help the semantic pass later 334 var numberKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.Number; 335 return new AsmToken(numberKind, startPosition, startAlignedPostion, endPosition - startPosition + 1); 336 } 337 private AsmToken ParseString(int startPosition, int startAlignedPostion) 338 { 339 var endPosition = _position; 340 // Skip first " 341 NextChar(); 342 while (_c != (char)0 && _c != '"') 343 { 344 // Skip escape \" 345 if (_c == '\\' && PreviewChar() == '"') 346 { 347 NextChar(); 348 } 349 endPosition = _position; 350 NextChar(); 351 } 352 353 endPosition = _position; 354 NextChar(); // Skip trailing 0 355 356 // If we have `:` right after a string, change from string to label declaration to help the semantic pass later 357 var stringKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.String; 358 return new AsmToken(stringKind, startPosition, startAlignedPostion, endPosition - startPosition + 1); 359 } 360 361 private AsmToken ParseComment(int startPosition, int startAlignedPosition) 362 { 363 var endPosition = _position; 364 while (_c != (char)0 && (_c != '\n' && _c != '\r')) 365 { 366 endPosition = _position; 367 NextChar(); 368 } 369 370 return new AsmToken(AsmTokenKind.Comment, startPosition, startAlignedPosition, endPosition - startPosition + 1); 371 } 372 373 [MethodImpl(MethodImplOptions.AggressiveInlining)] 374 private void NextChar() 375 { 376 if (_nextPosition < _text.Length) 377 { 378 _position = _nextPosition; 379 _c = _text[_position]; 380 _nextPosition = _position + 1; 381 382 _alignedPosition = _nextAlignedPosition; 383 _nextAlignedPosition = _alignedPosition + 1; 384 } 385 else 386 { 387 _c = (char)0; 388 } 389 } 390 391 private char PreviewChar() 392 { 393 return _nextPosition < _text.Length ? _text[_nextPosition] : (char)0; 394 } 395 396 } 397 398 public enum SIMDkind 399 { 400 Packed, 401 Scalar, 402 Infrastructure, 403 } 404 405 406 /// <summary> 407 /// An ASM token. The token doesn't contain the string of the token, but provides method <see cref="Slice"/> and <see cref="ToString"/> to extract it. 408 /// </summary> 409 internal readonly struct AsmToken 410 { 411 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 412 // CAUTION: It is important to not put *any managed objects* 413 // into this struct for GC efficiency 414 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 415 416 public AsmToken(AsmTokenKind kind, int position, int alignedPosition, int length) 417 { 418 Kind = kind; 419 Position = position; 420 AlignedPosition = alignedPosition; 421 Length = length; 422 } 423 424 public readonly AsmTokenKind Kind; 425 426 public readonly int Position; 427 428 public readonly int AlignedPosition; 429 430 public readonly int Length; 431 432 public StringSlice Slice(string text) => new StringSlice(text, Position, Length); 433 434 public string ToString(string text) => text.Substring(Position, Length); 435 436 public string ToFriendlyText(string text) 437 { 438 return $"{text.Substring(Position, Length)} : {Kind}"; 439 } 440 } 441 442 /// <summary> 443 /// Kind of an ASM token. 444 /// </summary> 445 internal enum AsmTokenKind 446 { 447 Eof, 448 Directive, 449 DataDirective, 450 SourceFile, 451 SourceLocation, 452 Label, 453 FunctionBegin, 454 FunctionEnd, 455 Identifier, 456 Qualifier, 457 Instruction, 458 CallInstruction, 459 BranchInstruction, 460 JumpInstruction, 461 ReturnInstruction, 462 InstructionSIMD, 463 Register, 464 Number, 465 String, 466 Comment, 467 NewLine, 468 Misc 469 } 470 } 471 /// <summary> 472 /// A slice of a string from an original string. 473 /// </summary> 474 internal readonly struct StringSlice : IEquatable<StringSlice> 475 { 476 private readonly string _text; 477 478 public readonly int Position; 479 480 public readonly int Length; 481 482 public StringSlice(string text) 483 { 484 _text = text ?? throw new ArgumentNullException(nameof(text)); 485 Position = 0; 486 Length = text.Length; 487 } 488 489 public StringSlice(string text, int position, int length) 490 { 491 _text = text ?? throw new ArgumentNullException(nameof(text)); 492 Position = position; 493 Length = length; 494 } 495 496 public char this[int index] => _text[Position + index]; 497 498 public bool Equals(StringSlice other) 499 { 500 if (Length != other.Length) return false; 501 502 for (int i = 0; i < Length; i++) 503 { 504 if (this[i] != other[i]) 505 { 506 return false; 507 } 508 } 509 return true; 510 } 511 512 public override bool Equals(object obj) 513 { 514 return obj is StringSlice other && Equals(other); 515 } 516 517 public override int GetHashCode() 518 { 519 unchecked 520 { 521 var hashCode = Length; 522 for (int i = 0; i < Length; i++) 523 { 524 hashCode = (hashCode * 397) ^ this[i]; 525 } 526 return hashCode; 527 } 528 } 529 530 public static bool operator ==(StringSlice left, StringSlice right) 531 { 532 return left.Equals(right); 533 } 534 535 public static bool operator !=(StringSlice left, StringSlice right) 536 { 537 return !left.Equals(right); 538 } 539 540 public override string ToString() 541 { 542 return _text.Substring(Position, Length); 543 } 544 545 public bool StartsWith(string text) 546 { 547 if (text == null) throw new ArgumentNullException(nameof(text)); 548 if (Length < text.Length) return false; 549 for (var i = 0; i < text.Length; i++) 550 { 551 var c = text[i]; 552 if (_text[Position + i] != c) return false; 553 } 554 return true; 555 } 556 557 public bool Contains(char c) 558 { 559 int start = Position; 560 int end = Math.Min(Position + Length, _text.Length); 561 for (int i = start; i < end; i++) 562 { 563 if (_text[i] == c) { return true; } 564 } 565 return false; 566 } 567 568 public int IndexOf(char c) 569 { 570 for (var i = 0; i < Length; i++) 571 { 572 if (_text[Position + i] == c) 573 { 574 return i; 575 } 576 } 577 578 return -1; 579 } 580 } 581} 582 583#endif