Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
1// Copyright (C) 2025 Thom Hayward.
2//
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the GNU General Public License as published by the Free Software
5// Foundation, version 3.
6//
7// This program is distributed in the hope that it will be useful, but WITHOUT
8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
10// details.
11//
12// You should have received a copy of the GNU General Public License along with
13// this program. If not, see <https://www.gnu.org/licenses/>.
14//
15use super::Token;
16use crate::Register;
17use logos::{Logos, Source};
18use std::{borrow::Cow, collections::HashMap, iter::Peekable, ops::Range, str::CharIndices};
19
20pub fn parse(_unit: impl std::fmt::Display, source: &str) -> Result<ParsedProgram, Error> {
21 Parser::new(source).parse()
22}
23
24#[derive(Debug)]
25pub enum NodeType<'s> {
26 Pragma(Pragma<'s>),
27 Instruction(Instruction<'s>),
28 Comment(#[allow(unused)] &'s str),
29}
30
31impl NodeType<'_> {
32 pub fn size(&self) -> usize {
33 match self {
34 Self::Pragma(pragma) => match &pragma.payload {
35 PragmaType::U32 { .. } => 1,
36 PragmaType::WideString { value } => value.len() + 1,
37 },
38 // Instructions are always one platter.
39 Self::Instruction(_) => 1,
40 Self::Comment(_) => 0,
41 }
42 }
43}
44
45#[derive(Debug)]
46pub struct Node<'s> {
47 pub labels: Vec<&'s str>,
48 pub entity: NodeType<'s>,
49 #[allow(unused)]
50 pub span: Range<usize>,
51}
52
53impl Node<'_> {
54 /// Compute encoded size of the node in platters.
55 #[inline]
56 pub fn size(&self) -> usize {
57 self.entity.size()
58 }
59}
60
61#[derive(Debug)]
62pub struct ParsedProgram<'s> {
63 #[allow(unused)]
64 pub source: &'s str,
65 nodes: Vec<Node<'s>>,
66}
67
68impl<'s> ParsedProgram<'s> {
69 pub fn nodes(&self) -> &[Node<'s>] {
70 &self.nodes
71 }
72}
73
74#[derive(Debug, Default)]
75pub struct Parser<'s> {
76 source: &'s str,
77 labels: HashMap<&'s str, Range<usize>>,
78 active_labels: Vec<&'s str>,
79}
80
81impl<'s> Parser<'s> {
82 fn new(source: &'s str) -> Self {
83 Self {
84 source,
85 ..Default::default()
86 }
87 }
88
89 fn parse(mut self) -> Result<ParsedProgram<'s>, Error> {
90 let mut lexer = Token::lexer(self.source);
91 let mut spanned = vec![];
92 while let Some(res) = lexer.next() {
93 match res {
94 Ok(token) => {
95 spanned.push((token, lexer.span()));
96 }
97 Err(error) => Err(Error::new(format!("lex: {error:?}"), &lexer.span()))?,
98 }
99 }
100
101 let mut nodes = vec![];
102 let mut tokens = spanned.into_iter().peekable();
103 while let Some((token, span)) = tokens.peek() {
104 let node = match token {
105 Token::Label(_) => {
106 self.consume_label(&mut tokens)?;
107 continue;
108 }
109 Token::Pragma(_) => self.consume_pragma(&mut tokens)?,
110 Token::Ident(_) => self.consume_instruction(&mut tokens)?,
111 Token::Comment(comment) => {
112 let node = Node {
113 labels: vec![],
114 entity: NodeType::Comment(comment),
115 span: span.clone(),
116 };
117 tokens.next();
118 node
119 }
120 Token::Newline => {
121 tokens.next();
122 continue;
123 }
124 _ => Err(Error::new(format!("unexpected token {token:?}"), span))?,
125 };
126
127 nodes.push(node);
128 }
129
130 Ok(ParsedProgram {
131 source: self.source,
132 nodes,
133 })
134 }
135
136 /// Consumes a label from the token stream.
137 fn consume_label<I>(&mut self, tokens: &mut I) -> Result<(), Error>
138 where
139 I: Iterator<Item = (Token<'s>, Range<usize>)>,
140 {
141 let Some((Token::Label(label_ident), span)) = tokens.next() else {
142 unreachable!("consume_label called on non-label token");
143 };
144
145 // Add the label to the set of observed labels.
146 let label_span = self
147 .labels
148 .entry(label_ident)
149 .or_insert_with(|| span.clone());
150
151 // If the span of the current token is not equal to
152 // `label_span`, then we have already seen label with the
153 // same identifier.
154 if label_span != &span {
155 return Err(Error::new(
156 format!("duplicate label '{label_ident}', original label span: {label_span:?}"),
157 &span,
158 ));
159 }
160
161 self.active_labels.push(label_ident);
162 Ok(())
163 }
164
165 fn consume_pragma<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error>
166 where
167 I: Iterator<Item = (Token<'s>, Range<usize>)>,
168 {
169 assert!(
170 matches!(tokens.peek(), Some((Token::Pragma(_), _))),
171 "consume_pragma called on non-pragma token"
172 );
173
174 let labels = std::mem::take(&mut self.active_labels);
175 let (pragma, span) = Pragma::consume(tokens)?;
176
177 Ok(Node {
178 labels,
179 entity: NodeType::Pragma(pragma),
180 span,
181 })
182 }
183
184 fn consume_instruction<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error>
185 where
186 I: Iterator<Item = (Token<'s>, Range<usize>)>,
187 {
188 assert!(
189 matches!(tokens.peek(), Some((Token::Ident(_), _))),
190 "consume_instruction called on non-ident token"
191 );
192
193 let labels = std::mem::take(&mut self.active_labels);
194 let (instr, span) = Instruction::consume(tokens)?;
195 Ok(Node {
196 labels,
197 entity: NodeType::Instruction(instr),
198 span,
199 })
200 }
201}
202
203/// An error encountered during parsing.
204#[derive(Debug)]
205#[allow(unused)]
206pub struct Error(pub String, pub Range<usize>);
207
208impl Error {
209 fn new(message: impl ToString, span: &Range<usize>) -> Self {
210 Self(message.to_string(), span.clone())
211 }
212
213 fn eof() -> Self {
214 Self("unexpected eof".into(), 0..0)
215 }
216}
217
218impl std::fmt::Display for Error {
219 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
220 write!(f, "{self:?}")
221 }
222}
223
224impl std::error::Error for Error {}
225
226#[derive(Debug, Default)]
227pub struct Location {
228 pub block: Register,
229 pub offset: Register,
230}
231
232impl Location {
233 pub fn consume<'s, I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error>
234 where
235 I: Iterator<Item = (Token<'s>, Range<usize>)>,
236 {
237 // Require a '[' token.
238 let start_span = match tokens.next() {
239 Some((Token::AddressOpen, span)) => span,
240 Some((_, span)) => Err(Error::new("expected an address opening bracket", &span))?,
241 _ => Err(Error::eof())?,
242 };
243
244 let (block, _) = consume_register(tokens)?;
245 let (offset, _) = consume_register(tokens)?;
246
247 // Require a ']' token.
248 let end_span = match tokens.next() {
249 Some((Token::AddressClose, span)) => span,
250 Some((_, span)) => Err(Error::new("expected an address closing bracket", &span))?,
251 _ => Err(Error::eof())?,
252 };
253
254 Ok((Self { block, offset }, merge_spans(&start_span, &end_span)))
255 }
256}
257
258#[derive(Debug)]
259pub struct Expr<'s> {
260 pub label: &'s str,
261}
262
263#[derive(Debug)]
264pub enum PragmaType<'s> {
265 U32 { value: u32 },
266 WideString { value: Cow<'s, str> },
267}
268
269#[derive(Debug)]
270pub struct Pragma<'s> {
271 #[allow(unused)]
272 relocatable: bool,
273 pub payload: PragmaType<'s>,
274}
275
276impl<'s> Pragma<'s> {
277 pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error>
278 where
279 I: Iterator<Item = (Token<'s>, Range<usize>)>,
280 {
281 let relocatable = true;
282 let token = tokens.next().ok_or(Error::eof())?;
283 match token {
284 (Token::Pragma("u32"), start_span) => {
285 let (value, end_span) = consume_number(tokens)?;
286 Ok((
287 Self {
288 relocatable,
289 payload: PragmaType::U32 { value },
290 },
291 merge_spans(&start_span, &end_span),
292 ))
293 }
294 (Token::Pragma("wstr"), start_span) => {
295 let (value, end_span) = consume_string(tokens)?;
296 Ok((
297 Self {
298 relocatable,
299 payload: PragmaType::WideString { value },
300 },
301 merge_spans(&start_span, &end_span),
302 ))
303 }
304 (Token::Pragma(command), span) => Err(Error::new(
305 format!("unknown pragma command {command}"),
306 &span,
307 ))?,
308 (_, span) => Err(Error::new("unexpected token", &span))?,
309 }
310 }
311}
312
313#[derive(Debug)]
314pub enum Instruction<'s> {
315 /// Operation #0.
316 ConditionalMove {
317 destination: Register,
318 source: Register,
319 condition: Register,
320 },
321 /// Operation #13.
322 Address {
323 destination: Register,
324 reference: Expr<'s>,
325 },
326 /// Operation #13.
327 LiteralMove {
328 destination: Register,
329 literal: u32,
330 },
331 Load {
332 destination: Register,
333 address: Location,
334 },
335 Store {
336 source: Register,
337 address: Location,
338 },
339 Add {
340 destination: Register,
341 a: Register,
342 b: Register,
343 },
344 AddAssign {
345 destination: Register,
346 a: Register,
347 },
348 AddSelf {
349 destination: Register,
350 },
351 Mul {
352 destination: Register,
353 a: Register,
354 b: Register,
355 },
356 MulAssign {
357 destination: Register,
358 a: Register,
359 },
360 MulSelf {
361 destination: Register,
362 },
363 Div {
364 destination: Register,
365 a: Register,
366 b: Register,
367 },
368 DivAssign {
369 destination: Register,
370 a: Register,
371 },
372 DivSelf {
373 destination: Register,
374 },
375 Nand {
376 destination: Register,
377 a: Register,
378 b: Register,
379 },
380 NandAssign {
381 destination: Register,
382 a: Register,
383 },
384 NandSelf {
385 destination: Register,
386 },
387 Halt,
388 Alloc {
389 destination: Register,
390 length: Register,
391 },
392 Free {
393 block: Register,
394 },
395 Out {
396 source: Register,
397 },
398 In {
399 destination: Register,
400 },
401 Jmp {
402 location: Location,
403 },
404}
405
406impl<'s> Instruction<'s> {
407 pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error>
408 where
409 I: Iterator<Item = (Token<'s>, Range<usize>)>,
410 {
411 let ident = tokens.next().unwrap();
412 match ident {
413 (Token::Ident("halt"), span) => Ok((Self::Halt, span)),
414 (Token::Ident("adr"), start_span) => {
415 let (destination, _) = consume_register(tokens)?;
416 let (identifier, end_span) = consume_ident(tokens)?;
417 Ok((
418 Self::Address {
419 destination,
420 reference: Expr { label: identifier },
421 },
422 merge_spans(&start_span, &end_span),
423 ))
424 }
425 (Token::Ident("mov"), start_span) => {
426 let (destination, _) = consume_register(tokens)?;
427 if peek_register(tokens)?.is_some() {
428 let (source, _) = consume_register(tokens)?;
429 let (condition, end_span) = consume_register(tokens)?;
430 Ok((
431 Self::ConditionalMove {
432 destination,
433 source,
434 condition,
435 },
436 merge_spans(&start_span, &end_span),
437 ))
438 } else {
439 let (literal, end_span) = consume_number(tokens)?;
440 Ok((
441 Self::LiteralMove {
442 destination,
443 literal,
444 },
445 merge_spans(&start_span, &end_span),
446 ))
447 }
448 }
449 (Token::Ident("ldr"), start_span) => {
450 let (destination, _) = consume_register(tokens)?;
451 let (address, end_span) = Location::consume(tokens)?;
452 Ok((
453 Self::Load {
454 destination,
455 address,
456 },
457 merge_spans(&start_span, &end_span),
458 ))
459 }
460 (Token::Ident("str"), start_span) => {
461 let (source, _) = consume_register(tokens)?;
462 let (address, end_span) = Location::consume(tokens)?;
463 Ok((
464 Self::Store { source, address },
465 merge_spans(&start_span, &end_span),
466 ))
467 }
468 (Token::Ident("out"), start_span) => {
469 let (source, end_span) = consume_register(tokens)?;
470 Ok((Self::Out { source }, merge_spans(&start_span, &end_span)))
471 }
472 (Token::Ident("in"), start_span) => {
473 let (destination, end_span) = consume_register(tokens)?;
474 Ok((
475 Self::In { destination },
476 merge_spans(&start_span, &end_span),
477 ))
478 }
479 (Token::Ident("alloc"), start_span) => {
480 let (destination, _) = consume_register(tokens)?;
481 let (length, end_span) = consume_register(tokens)?;
482 Ok((
483 Self::Alloc {
484 length,
485 destination,
486 },
487 merge_spans(&start_span, &end_span),
488 ))
489 }
490 (Token::Ident("free"), start_span) => {
491 let (block, end_span) = consume_register(tokens)?;
492 Ok((Self::Free { block }, merge_spans(&start_span, &end_span)))
493 }
494 (Token::Ident("jmp"), start_span) => {
495 let (location, end_span) = Location::consume(tokens)?;
496 Ok((Self::Jmp { location }, merge_spans(&start_span, &end_span)))
497 }
498 (Token::Ident("add"), start_span) => {
499 let (destination, mid_span) = consume_register(tokens)?;
500 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
501 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
502 match (a, b) {
503 (Some((a, _)), Some((b, end_span))) => Ok((
504 Self::Add { destination, a, b },
505 merge_spans(&start_span, &end_span),
506 )),
507 (Some((a, end_span)), None) => Ok((
508 Self::AddAssign { destination, a },
509 merge_spans(&start_span, &end_span),
510 )),
511 (None, None) => Ok((
512 Self::AddSelf { destination },
513 merge_spans(&start_span, &mid_span),
514 )),
515 _ => unreachable!(),
516 }
517 }
518 (Token::Ident("mul"), start_span) => {
519 let (destination, mid_span) = consume_register(tokens)?;
520 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
521 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
522 match (a, b) {
523 (Some((a, _)), Some((b, end_span))) => Ok((
524 Self::Mul { destination, a, b },
525 merge_spans(&start_span, &end_span),
526 )),
527 (Some((a, end_span)), None) => Ok((
528 Self::MulAssign { destination, a },
529 merge_spans(&start_span, &end_span),
530 )),
531 (None, None) => Ok((
532 Self::MulSelf { destination },
533 merge_spans(&start_span, &mid_span),
534 )),
535 _ => unreachable!(),
536 }
537 }
538 (Token::Ident("div"), start_span) => {
539 let (destination, mid_span) = consume_register(tokens)?;
540 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
541 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
542 match (a, b) {
543 (Some((a, _)), Some((b, end_span))) => Ok((
544 Self::Div { destination, a, b },
545 merge_spans(&start_span, &end_span),
546 )),
547 (Some((a, end_span)), None) => Ok((
548 Self::DivAssign { destination, a },
549 merge_spans(&start_span, &end_span),
550 )),
551 (None, None) => Ok((
552 Self::DivSelf { destination },
553 merge_spans(&start_span, &mid_span),
554 )),
555 _ => unreachable!(),
556 }
557 }
558 (Token::Ident("nand"), start_span) => {
559 let (destination, mid_span) = consume_register(tokens)?;
560 let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
561 let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
562 match (a, b) {
563 (Some((a, _)), Some((b, end_span))) => Ok((
564 Self::Nand { destination, a, b },
565 merge_spans(&start_span, &end_span),
566 )),
567 (Some((a, end_span)), None) => Ok((
568 Self::NandAssign { destination, a },
569 merge_spans(&start_span, &end_span),
570 )),
571 (None, None) => Ok((
572 Self::NandSelf { destination },
573 merge_spans(&start_span, &mid_span),
574 )),
575 _ => unreachable!(),
576 }
577 }
578 (_, span) => Err(Error::new("unrecognised instruction", &span))?,
579 }
580 }
581}
582
583impl std::fmt::Display for Instruction<'_> {
584 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
585 match self {
586 Self::ConditionalMove {
587 destination,
588 source,
589 condition,
590 } => write!(f, "mov {destination}, {source}, {condition}"),
591 Self::Load {
592 destination,
593 address,
594 } => write!(
595 f,
596 "ldr {destination}, [{}, {}]",
597 address.block, address.offset
598 ),
599 Self::Store { source, address } => {
600 write!(f, "str {source}, [{}, {}]", address.block, address.offset)
601 }
602 Self::Add { destination, a, b } => write!(f, "add {destination}, {a}, {b}"),
603 Self::AddAssign { destination, a } => write!(f, "add {destination}, {a}"),
604 Self::AddSelf { destination } => write!(f, "add {destination}"),
605 Self::Mul { destination, a, b } => write!(f, "mul {destination}, {a}, {b}"),
606 Self::MulAssign { destination, a } => write!(f, "mul {destination}, {a}"),
607 Self::MulSelf { destination } => write!(f, "mul {destination}"),
608 Self::Div { destination, a, b } => write!(f, "div {destination}, {a}, {b}"),
609 Self::DivAssign { destination, a } => write!(f, "div {destination}, {a}"),
610 Self::DivSelf { destination } => write!(f, "div {destination}"),
611 Self::Nand { destination, a, b } => write!(f, "nand {destination}, {a}, {b}"),
612 Self::NandAssign { destination, a } => write!(f, "nand {destination}, {a}"),
613 Self::NandSelf { destination } => write!(f, "nand {destination}"),
614 Self::Halt => write!(f, "halt"),
615 Self::Out { source } => write!(f, "out {source}"),
616 Self::In { destination } => write!(f, "in {destination}"),
617 Self::Alloc {
618 length,
619 destination,
620 } => write!(f, "alloc {destination}, {length}"),
621 Self::Free { block } => {
622 write!(f, "free {block}")
623 }
624 Self::Jmp { location } => write!(f, "jmp [{}, {}]", location.block, location.offset),
625 Self::LiteralMove {
626 destination,
627 literal,
628 } => write!(f, "mov {destination}, {literal}"),
629 Self::Address {
630 destination,
631 reference,
632 } => write!(f, "adr {destination}, {}", reference.label),
633 }
634 }
635}
636
637/// Peeks at the next token and returns it iff it is a Register.
638fn peek_register<'s, I>(tokens: &mut Peekable<I>) -> Result<Option<Register>, Error>
639where
640 I: Iterator<Item = (Token<'s>, Range<usize>)>,
641{
642 match tokens.peek() {
643 Some((Token::Register(r), _)) => Ok(Some(*r)),
644 Some(_) => Ok(None),
645 None => Err(Error::new("unexpected eof", &(0..0))),
646 }
647}
648
649fn consume_register<'s, I>(tokens: &mut I) -> Result<(Register, Range<usize>), Error>
650where
651 I: Iterator<Item = (Token<'s>, Range<usize>)>,
652{
653 match tokens.next() {
654 Some((Token::Register(r), span)) => Ok((r, span)),
655 Some((token, span)) => Err(Error::new(
656 format!("expected a register, found: {token:?}"),
657 &span,
658 )),
659 None => Err(Error::eof()),
660 }
661}
662
663fn consume_ident<'s, I>(tokens: &mut I) -> Result<(&'s str, Range<usize>), Error>
664where
665 I: Iterator<Item = (Token<'s>, Range<usize>)>,
666{
667 match tokens.next() {
668 Some((Token::Ident(ident), span)) => Ok((ident, span)),
669 Some((token, span)) => Err(Error::new(
670 format!("expected an identifier, found: {token:?}"),
671 &span,
672 )),
673 None => Err(Error::eof()),
674 }
675}
676
677fn consume_number<'s, I>(tokens: &mut I) -> Result<(u32, Range<usize>), Error>
678where
679 I: Iterator<Item = (Token<'s>, Range<usize>)>,
680{
681 match tokens.next() {
682 Some((Token::Number(value), span)) => Ok((value, span)),
683 Some((token, span)) => Err(Error::new(
684 format!("expected a number literal, found: {token:?}"),
685 &span,
686 )),
687 None => Err(Error::eof()),
688 }
689}
690
691fn consume_string<'s, I>(tokens: &mut I) -> Result<(Cow<'s, str>, Range<usize>), Error>
692where
693 I: Iterator<Item = (Token<'s>, Range<usize>)>,
694{
695 match tokens.next() {
696 Some((Token::String(value), span)) => {
697 let unescaped = unescape_str(value).map_err(|_| Error::eof())?;
698 Ok((unescaped, span))
699 }
700 Some((token, span)) => Err(Error::new(
701 format!("expected a number literal, found: {token:?}"),
702 &span,
703 )),
704 None => Err(Error::eof()),
705 }
706}
707
708fn merge_spans(start: &Range<usize>, end: &Range<usize>) -> Range<usize> {
709 start.start..end.end
710}
711
712#[derive(Debug)]
713#[allow(unused)]
714pub struct InvalidCharacterEscape(pub char, pub usize);
715
716pub fn unescape_str(s: &str) -> Result<Cow<str>, InvalidCharacterEscape> {
717 fn escape_inner(c: &str, i: &mut CharIndices<'_>) -> Result<String, InvalidCharacterEscape> {
718 let mut buffer = c.to_owned();
719 let mut in_escape = true;
720
721 for (index, c) in i {
722 match (in_escape, c) {
723 (false, '\\') => {
724 in_escape = true;
725 continue;
726 }
727 (false, c) => buffer.push(c),
728 (true, '\\') => buffer.push('\\'),
729 (true, 'n') => buffer.push('\n'),
730 (true, '0') => buffer.push('\0'),
731 (true, '"') => buffer.push('"'),
732 (true, '\'') => buffer.push('\''),
733 (true, 'r') => buffer.push('\r'),
734 (true, 't') => buffer.push('\t'),
735 (true, c) => Err(InvalidCharacterEscape(c, index))?,
736 }
737
738 in_escape = false;
739 }
740
741 Ok(buffer)
742 }
743
744 let mut char_indicies = s.char_indices();
745 for (index, c) in &mut char_indicies {
746 let scanned = &s[..index];
747 if c == '\\' {
748 return Ok(Cow::Owned(escape_inner(scanned, &mut char_indicies)?));
749 }
750 }
751
752 Ok(Cow::Borrowed(s))
753}