Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
at main 753 lines 25 kB view raw
1// Copyright (C) 2025 Thom Hayward. 2// 3// This program is free software: you can redistribute it and/or modify it under 4// the terms of the GNU General Public License as published by the Free Software 5// Foundation, version 3. 6// 7// This program is distributed in the hope that it will be useful, but WITHOUT 8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 10// details. 11// 12// You should have received a copy of the GNU General Public License along with 13// this program. If not, see <https://www.gnu.org/licenses/>. 14// 15use super::Token; 16use crate::Register; 17use logos::{Logos, Source}; 18use std::{borrow::Cow, collections::HashMap, iter::Peekable, ops::Range, str::CharIndices}; 19 20pub fn parse(_unit: impl std::fmt::Display, source: &str) -> Result<ParsedProgram, Error> { 21 Parser::new(source).parse() 22} 23 24#[derive(Debug)] 25pub enum NodeType<'s> { 26 Pragma(Pragma<'s>), 27 Instruction(Instruction<'s>), 28 Comment(#[allow(unused)] &'s str), 29} 30 31impl NodeType<'_> { 32 pub fn size(&self) -> usize { 33 match self { 34 Self::Pragma(pragma) => match &pragma.payload { 35 PragmaType::U32 { .. } => 1, 36 PragmaType::WideString { value } => value.len() + 1, 37 }, 38 // Instructions are always one platter. 39 Self::Instruction(_) => 1, 40 Self::Comment(_) => 0, 41 } 42 } 43} 44 45#[derive(Debug)] 46pub struct Node<'s> { 47 pub labels: Vec<&'s str>, 48 pub entity: NodeType<'s>, 49 #[allow(unused)] 50 pub span: Range<usize>, 51} 52 53impl Node<'_> { 54 /// Compute encoded size of the node in platters. 55 #[inline] 56 pub fn size(&self) -> usize { 57 self.entity.size() 58 } 59} 60 61#[derive(Debug)] 62pub struct ParsedProgram<'s> { 63 #[allow(unused)] 64 pub source: &'s str, 65 nodes: Vec<Node<'s>>, 66} 67 68impl<'s> ParsedProgram<'s> { 69 pub fn nodes(&self) -> &[Node<'s>] { 70 &self.nodes 71 } 72} 73 74#[derive(Debug, Default)] 75pub struct Parser<'s> { 76 source: &'s str, 77 labels: HashMap<&'s str, Range<usize>>, 78 active_labels: Vec<&'s str>, 79} 80 81impl<'s> Parser<'s> { 82 fn new(source: &'s str) -> Self { 83 Self { 84 source, 85 ..Default::default() 86 } 87 } 88 89 fn parse(mut self) -> Result<ParsedProgram<'s>, Error> { 90 let mut lexer = Token::lexer(self.source); 91 let mut spanned = vec![]; 92 while let Some(res) = lexer.next() { 93 match res { 94 Ok(token) => { 95 spanned.push((token, lexer.span())); 96 } 97 Err(error) => Err(Error::new(format!("lex: {error:?}"), &lexer.span()))?, 98 } 99 } 100 101 let mut nodes = vec![]; 102 let mut tokens = spanned.into_iter().peekable(); 103 while let Some((token, span)) = tokens.peek() { 104 let node = match token { 105 Token::Label(_) => { 106 self.consume_label(&mut tokens)?; 107 continue; 108 } 109 Token::Pragma(_) => self.consume_pragma(&mut tokens)?, 110 Token::Ident(_) => self.consume_instruction(&mut tokens)?, 111 Token::Comment(comment) => { 112 let node = Node { 113 labels: vec![], 114 entity: NodeType::Comment(comment), 115 span: span.clone(), 116 }; 117 tokens.next(); 118 node 119 } 120 Token::Newline => { 121 tokens.next(); 122 continue; 123 } 124 _ => Err(Error::new(format!("unexpected token {token:?}"), span))?, 125 }; 126 127 nodes.push(node); 128 } 129 130 Ok(ParsedProgram { 131 source: self.source, 132 nodes, 133 }) 134 } 135 136 /// Consumes a label from the token stream. 137 fn consume_label<I>(&mut self, tokens: &mut I) -> Result<(), Error> 138 where 139 I: Iterator<Item = (Token<'s>, Range<usize>)>, 140 { 141 let Some((Token::Label(label_ident), span)) = tokens.next() else { 142 unreachable!("consume_label called on non-label token"); 143 }; 144 145 // Add the label to the set of observed labels. 146 let label_span = self 147 .labels 148 .entry(label_ident) 149 .or_insert_with(|| span.clone()); 150 151 // If the span of the current token is not equal to 152 // `label_span`, then we have already seen label with the 153 // same identifier. 154 if label_span != &span { 155 return Err(Error::new( 156 format!("duplicate label '{label_ident}', original label span: {label_span:?}"), 157 &span, 158 )); 159 } 160 161 self.active_labels.push(label_ident); 162 Ok(()) 163 } 164 165 fn consume_pragma<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error> 166 where 167 I: Iterator<Item = (Token<'s>, Range<usize>)>, 168 { 169 assert!( 170 matches!(tokens.peek(), Some((Token::Pragma(_), _))), 171 "consume_pragma called on non-pragma token" 172 ); 173 174 let labels = std::mem::take(&mut self.active_labels); 175 let (pragma, span) = Pragma::consume(tokens)?; 176 177 Ok(Node { 178 labels, 179 entity: NodeType::Pragma(pragma), 180 span, 181 }) 182 } 183 184 fn consume_instruction<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error> 185 where 186 I: Iterator<Item = (Token<'s>, Range<usize>)>, 187 { 188 assert!( 189 matches!(tokens.peek(), Some((Token::Ident(_), _))), 190 "consume_instruction called on non-ident token" 191 ); 192 193 let labels = std::mem::take(&mut self.active_labels); 194 let (instr, span) = Instruction::consume(tokens)?; 195 Ok(Node { 196 labels, 197 entity: NodeType::Instruction(instr), 198 span, 199 }) 200 } 201} 202 203/// An error encountered during parsing. 204#[derive(Debug)] 205#[allow(unused)] 206pub struct Error(pub String, pub Range<usize>); 207 208impl Error { 209 fn new(message: impl ToString, span: &Range<usize>) -> Self { 210 Self(message.to_string(), span.clone()) 211 } 212 213 fn eof() -> Self { 214 Self("unexpected eof".into(), 0..0) 215 } 216} 217 218impl std::fmt::Display for Error { 219 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 220 write!(f, "{self:?}") 221 } 222} 223 224impl std::error::Error for Error {} 225 226#[derive(Debug, Default)] 227pub struct Location { 228 pub block: Register, 229 pub offset: Register, 230} 231 232impl Location { 233 pub fn consume<'s, I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 234 where 235 I: Iterator<Item = (Token<'s>, Range<usize>)>, 236 { 237 // Require a '[' token. 238 let start_span = match tokens.next() { 239 Some((Token::AddressOpen, span)) => span, 240 Some((_, span)) => Err(Error::new("expected an address opening bracket", &span))?, 241 _ => Err(Error::eof())?, 242 }; 243 244 let (block, _) = consume_register(tokens)?; 245 let (offset, _) = consume_register(tokens)?; 246 247 // Require a ']' token. 248 let end_span = match tokens.next() { 249 Some((Token::AddressClose, span)) => span, 250 Some((_, span)) => Err(Error::new("expected an address closing bracket", &span))?, 251 _ => Err(Error::eof())?, 252 }; 253 254 Ok((Self { block, offset }, merge_spans(&start_span, &end_span))) 255 } 256} 257 258#[derive(Debug)] 259pub struct Expr<'s> { 260 pub label: &'s str, 261} 262 263#[derive(Debug)] 264pub enum PragmaType<'s> { 265 U32 { value: u32 }, 266 WideString { value: Cow<'s, str> }, 267} 268 269#[derive(Debug)] 270pub struct Pragma<'s> { 271 #[allow(unused)] 272 relocatable: bool, 273 pub payload: PragmaType<'s>, 274} 275 276impl<'s> Pragma<'s> { 277 pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 278 where 279 I: Iterator<Item = (Token<'s>, Range<usize>)>, 280 { 281 let relocatable = true; 282 let token = tokens.next().ok_or(Error::eof())?; 283 match token { 284 (Token::Pragma("u32"), start_span) => { 285 let (value, end_span) = consume_number(tokens)?; 286 Ok(( 287 Self { 288 relocatable, 289 payload: PragmaType::U32 { value }, 290 }, 291 merge_spans(&start_span, &end_span), 292 )) 293 } 294 (Token::Pragma("wstr"), start_span) => { 295 let (value, end_span) = consume_string(tokens)?; 296 Ok(( 297 Self { 298 relocatable, 299 payload: PragmaType::WideString { value }, 300 }, 301 merge_spans(&start_span, &end_span), 302 )) 303 } 304 (Token::Pragma(command), span) => Err(Error::new( 305 format!("unknown pragma command {command}"), 306 &span, 307 ))?, 308 (_, span) => Err(Error::new("unexpected token", &span))?, 309 } 310 } 311} 312 313#[derive(Debug)] 314pub enum Instruction<'s> { 315 /// Operation #0. 316 ConditionalMove { 317 destination: Register, 318 source: Register, 319 condition: Register, 320 }, 321 /// Operation #13. 322 Address { 323 destination: Register, 324 reference: Expr<'s>, 325 }, 326 /// Operation #13. 327 LiteralMove { 328 destination: Register, 329 literal: u32, 330 }, 331 Load { 332 destination: Register, 333 address: Location, 334 }, 335 Store { 336 source: Register, 337 address: Location, 338 }, 339 Add { 340 destination: Register, 341 a: Register, 342 b: Register, 343 }, 344 AddAssign { 345 destination: Register, 346 a: Register, 347 }, 348 AddSelf { 349 destination: Register, 350 }, 351 Mul { 352 destination: Register, 353 a: Register, 354 b: Register, 355 }, 356 MulAssign { 357 destination: Register, 358 a: Register, 359 }, 360 MulSelf { 361 destination: Register, 362 }, 363 Div { 364 destination: Register, 365 a: Register, 366 b: Register, 367 }, 368 DivAssign { 369 destination: Register, 370 a: Register, 371 }, 372 DivSelf { 373 destination: Register, 374 }, 375 Nand { 376 destination: Register, 377 a: Register, 378 b: Register, 379 }, 380 NandAssign { 381 destination: Register, 382 a: Register, 383 }, 384 NandSelf { 385 destination: Register, 386 }, 387 Halt, 388 Alloc { 389 destination: Register, 390 length: Register, 391 }, 392 Free { 393 block: Register, 394 }, 395 Out { 396 source: Register, 397 }, 398 In { 399 destination: Register, 400 }, 401 Jmp { 402 location: Location, 403 }, 404} 405 406impl<'s> Instruction<'s> { 407 pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 408 where 409 I: Iterator<Item = (Token<'s>, Range<usize>)>, 410 { 411 let ident = tokens.next().unwrap(); 412 match ident { 413 (Token::Ident("halt"), span) => Ok((Self::Halt, span)), 414 (Token::Ident("adr"), start_span) => { 415 let (destination, _) = consume_register(tokens)?; 416 let (identifier, end_span) = consume_ident(tokens)?; 417 Ok(( 418 Self::Address { 419 destination, 420 reference: Expr { label: identifier }, 421 }, 422 merge_spans(&start_span, &end_span), 423 )) 424 } 425 (Token::Ident("mov"), start_span) => { 426 let (destination, _) = consume_register(tokens)?; 427 if peek_register(tokens)?.is_some() { 428 let (source, _) = consume_register(tokens)?; 429 let (condition, end_span) = consume_register(tokens)?; 430 Ok(( 431 Self::ConditionalMove { 432 destination, 433 source, 434 condition, 435 }, 436 merge_spans(&start_span, &end_span), 437 )) 438 } else { 439 let (literal, end_span) = consume_number(tokens)?; 440 Ok(( 441 Self::LiteralMove { 442 destination, 443 literal, 444 }, 445 merge_spans(&start_span, &end_span), 446 )) 447 } 448 } 449 (Token::Ident("ldr"), start_span) => { 450 let (destination, _) = consume_register(tokens)?; 451 let (address, end_span) = Location::consume(tokens)?; 452 Ok(( 453 Self::Load { 454 destination, 455 address, 456 }, 457 merge_spans(&start_span, &end_span), 458 )) 459 } 460 (Token::Ident("str"), start_span) => { 461 let (source, _) = consume_register(tokens)?; 462 let (address, end_span) = Location::consume(tokens)?; 463 Ok(( 464 Self::Store { source, address }, 465 merge_spans(&start_span, &end_span), 466 )) 467 } 468 (Token::Ident("out"), start_span) => { 469 let (source, end_span) = consume_register(tokens)?; 470 Ok((Self::Out { source }, merge_spans(&start_span, &end_span))) 471 } 472 (Token::Ident("in"), start_span) => { 473 let (destination, end_span) = consume_register(tokens)?; 474 Ok(( 475 Self::In { destination }, 476 merge_spans(&start_span, &end_span), 477 )) 478 } 479 (Token::Ident("alloc"), start_span) => { 480 let (destination, _) = consume_register(tokens)?; 481 let (length, end_span) = consume_register(tokens)?; 482 Ok(( 483 Self::Alloc { 484 length, 485 destination, 486 }, 487 merge_spans(&start_span, &end_span), 488 )) 489 } 490 (Token::Ident("free"), start_span) => { 491 let (block, end_span) = consume_register(tokens)?; 492 Ok((Self::Free { block }, merge_spans(&start_span, &end_span))) 493 } 494 (Token::Ident("jmp"), start_span) => { 495 let (location, end_span) = Location::consume(tokens)?; 496 Ok((Self::Jmp { location }, merge_spans(&start_span, &end_span))) 497 } 498 (Token::Ident("add"), start_span) => { 499 let (destination, mid_span) = consume_register(tokens)?; 500 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 501 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 502 match (a, b) { 503 (Some((a, _)), Some((b, end_span))) => Ok(( 504 Self::Add { destination, a, b }, 505 merge_spans(&start_span, &end_span), 506 )), 507 (Some((a, end_span)), None) => Ok(( 508 Self::AddAssign { destination, a }, 509 merge_spans(&start_span, &end_span), 510 )), 511 (None, None) => Ok(( 512 Self::AddSelf { destination }, 513 merge_spans(&start_span, &mid_span), 514 )), 515 _ => unreachable!(), 516 } 517 } 518 (Token::Ident("mul"), start_span) => { 519 let (destination, mid_span) = consume_register(tokens)?; 520 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 521 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 522 match (a, b) { 523 (Some((a, _)), Some((b, end_span))) => Ok(( 524 Self::Mul { destination, a, b }, 525 merge_spans(&start_span, &end_span), 526 )), 527 (Some((a, end_span)), None) => Ok(( 528 Self::MulAssign { destination, a }, 529 merge_spans(&start_span, &end_span), 530 )), 531 (None, None) => Ok(( 532 Self::MulSelf { destination }, 533 merge_spans(&start_span, &mid_span), 534 )), 535 _ => unreachable!(), 536 } 537 } 538 (Token::Ident("div"), start_span) => { 539 let (destination, mid_span) = consume_register(tokens)?; 540 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 541 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 542 match (a, b) { 543 (Some((a, _)), Some((b, end_span))) => Ok(( 544 Self::Div { destination, a, b }, 545 merge_spans(&start_span, &end_span), 546 )), 547 (Some((a, end_span)), None) => Ok(( 548 Self::DivAssign { destination, a }, 549 merge_spans(&start_span, &end_span), 550 )), 551 (None, None) => Ok(( 552 Self::DivSelf { destination }, 553 merge_spans(&start_span, &mid_span), 554 )), 555 _ => unreachable!(), 556 } 557 } 558 (Token::Ident("nand"), start_span) => { 559 let (destination, mid_span) = consume_register(tokens)?; 560 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 561 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 562 match (a, b) { 563 (Some((a, _)), Some((b, end_span))) => Ok(( 564 Self::Nand { destination, a, b }, 565 merge_spans(&start_span, &end_span), 566 )), 567 (Some((a, end_span)), None) => Ok(( 568 Self::NandAssign { destination, a }, 569 merge_spans(&start_span, &end_span), 570 )), 571 (None, None) => Ok(( 572 Self::NandSelf { destination }, 573 merge_spans(&start_span, &mid_span), 574 )), 575 _ => unreachable!(), 576 } 577 } 578 (_, span) => Err(Error::new("unrecognised instruction", &span))?, 579 } 580 } 581} 582 583impl std::fmt::Display for Instruction<'_> { 584 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 585 match self { 586 Self::ConditionalMove { 587 destination, 588 source, 589 condition, 590 } => write!(f, "mov {destination}, {source}, {condition}"), 591 Self::Load { 592 destination, 593 address, 594 } => write!( 595 f, 596 "ldr {destination}, [{}, {}]", 597 address.block, address.offset 598 ), 599 Self::Store { source, address } => { 600 write!(f, "str {source}, [{}, {}]", address.block, address.offset) 601 } 602 Self::Add { destination, a, b } => write!(f, "add {destination}, {a}, {b}"), 603 Self::AddAssign { destination, a } => write!(f, "add {destination}, {a}"), 604 Self::AddSelf { destination } => write!(f, "add {destination}"), 605 Self::Mul { destination, a, b } => write!(f, "mul {destination}, {a}, {b}"), 606 Self::MulAssign { destination, a } => write!(f, "mul {destination}, {a}"), 607 Self::MulSelf { destination } => write!(f, "mul {destination}"), 608 Self::Div { destination, a, b } => write!(f, "div {destination}, {a}, {b}"), 609 Self::DivAssign { destination, a } => write!(f, "div {destination}, {a}"), 610 Self::DivSelf { destination } => write!(f, "div {destination}"), 611 Self::Nand { destination, a, b } => write!(f, "nand {destination}, {a}, {b}"), 612 Self::NandAssign { destination, a } => write!(f, "nand {destination}, {a}"), 613 Self::NandSelf { destination } => write!(f, "nand {destination}"), 614 Self::Halt => write!(f, "halt"), 615 Self::Out { source } => write!(f, "out {source}"), 616 Self::In { destination } => write!(f, "in {destination}"), 617 Self::Alloc { 618 length, 619 destination, 620 } => write!(f, "alloc {destination}, {length}"), 621 Self::Free { block } => { 622 write!(f, "free {block}") 623 } 624 Self::Jmp { location } => write!(f, "jmp [{}, {}]", location.block, location.offset), 625 Self::LiteralMove { 626 destination, 627 literal, 628 } => write!(f, "mov {destination}, {literal}"), 629 Self::Address { 630 destination, 631 reference, 632 } => write!(f, "adr {destination}, {}", reference.label), 633 } 634 } 635} 636 637/// Peeks at the next token and returns it iff it is a Register. 638fn peek_register<'s, I>(tokens: &mut Peekable<I>) -> Result<Option<Register>, Error> 639where 640 I: Iterator<Item = (Token<'s>, Range<usize>)>, 641{ 642 match tokens.peek() { 643 Some((Token::Register(r), _)) => Ok(Some(*r)), 644 Some(_) => Ok(None), 645 None => Err(Error::new("unexpected eof", &(0..0))), 646 } 647} 648 649fn consume_register<'s, I>(tokens: &mut I) -> Result<(Register, Range<usize>), Error> 650where 651 I: Iterator<Item = (Token<'s>, Range<usize>)>, 652{ 653 match tokens.next() { 654 Some((Token::Register(r), span)) => Ok((r, span)), 655 Some((token, span)) => Err(Error::new( 656 format!("expected a register, found: {token:?}"), 657 &span, 658 )), 659 None => Err(Error::eof()), 660 } 661} 662 663fn consume_ident<'s, I>(tokens: &mut I) -> Result<(&'s str, Range<usize>), Error> 664where 665 I: Iterator<Item = (Token<'s>, Range<usize>)>, 666{ 667 match tokens.next() { 668 Some((Token::Ident(ident), span)) => Ok((ident, span)), 669 Some((token, span)) => Err(Error::new( 670 format!("expected an identifier, found: {token:?}"), 671 &span, 672 )), 673 None => Err(Error::eof()), 674 } 675} 676 677fn consume_number<'s, I>(tokens: &mut I) -> Result<(u32, Range<usize>), Error> 678where 679 I: Iterator<Item = (Token<'s>, Range<usize>)>, 680{ 681 match tokens.next() { 682 Some((Token::Number(value), span)) => Ok((value, span)), 683 Some((token, span)) => Err(Error::new( 684 format!("expected a number literal, found: {token:?}"), 685 &span, 686 )), 687 None => Err(Error::eof()), 688 } 689} 690 691fn consume_string<'s, I>(tokens: &mut I) -> Result<(Cow<'s, str>, Range<usize>), Error> 692where 693 I: Iterator<Item = (Token<'s>, Range<usize>)>, 694{ 695 match tokens.next() { 696 Some((Token::String(value), span)) => { 697 let unescaped = unescape_str(value).map_err(|_| Error::eof())?; 698 Ok((unescaped, span)) 699 } 700 Some((token, span)) => Err(Error::new( 701 format!("expected a number literal, found: {token:?}"), 702 &span, 703 )), 704 None => Err(Error::eof()), 705 } 706} 707 708fn merge_spans(start: &Range<usize>, end: &Range<usize>) -> Range<usize> { 709 start.start..end.end 710} 711 712#[derive(Debug)] 713#[allow(unused)] 714pub struct InvalidCharacterEscape(pub char, pub usize); 715 716pub fn unescape_str(s: &str) -> Result<Cow<str>, InvalidCharacterEscape> { 717 fn escape_inner(c: &str, i: &mut CharIndices<'_>) -> Result<String, InvalidCharacterEscape> { 718 let mut buffer = c.to_owned(); 719 let mut in_escape = true; 720 721 for (index, c) in i { 722 match (in_escape, c) { 723 (false, '\\') => { 724 in_escape = true; 725 continue; 726 } 727 (false, c) => buffer.push(c), 728 (true, '\\') => buffer.push('\\'), 729 (true, 'n') => buffer.push('\n'), 730 (true, '0') => buffer.push('\0'), 731 (true, '"') => buffer.push('"'), 732 (true, '\'') => buffer.push('\''), 733 (true, 'r') => buffer.push('\r'), 734 (true, 't') => buffer.push('\t'), 735 (true, c) => Err(InvalidCharacterEscape(c, index))?, 736 } 737 738 in_escape = false; 739 } 740 741 Ok(buffer) 742 } 743 744 let mut char_indicies = s.char_indices(); 745 for (index, c) in &mut char_indicies { 746 let scanned = &s[..index]; 747 if c == '\\' { 748 return Ok(Cow::Owned(escape_inner(scanned, &mut char_indicies)?)); 749 } 750 } 751 752 Ok(Cow::Borrowed(s)) 753}