Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable

feat: um-32 assembler

tjh 1e30dd2b 4b25dfc9

+93
Cargo.lock
··· 3 3 version = 3 4 4 5 5 [[package]] 6 + name = "beef" 7 + version = "0.5.2" 8 + source = "registry+https://github.com/rust-lang/crates.io-index" 9 + checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" 10 + 11 + [[package]] 12 + name = "fnv" 13 + version = "1.0.7" 14 + source = "registry+https://github.com/rust-lang/crates.io-index" 15 + checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 16 + 17 + [[package]] 18 + name = "lazy_static" 19 + version = "1.5.0" 20 + source = "registry+https://github.com/rust-lang/crates.io-index" 21 + checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 22 + 23 + [[package]] 24 + name = "logos" 25 + version = "0.14.2" 26 + source = "registry+https://github.com/rust-lang/crates.io-index" 27 + checksum = "1c6b6e02facda28ca5fb8dbe4b152496ba3b1bd5a4b40bb2b1b2d8ad74e0f39b" 28 + dependencies = [ 29 + "logos-derive", 30 + ] 31 + 32 + [[package]] 33 + name = "logos-codegen" 34 + version = "0.14.2" 35 + source = "registry+https://github.com/rust-lang/crates.io-index" 36 + checksum = "b32eb6b5f26efacd015b000bfc562186472cd9b34bdba3f6b264e2a052676d10" 37 + dependencies = [ 38 + "beef", 39 + "fnv", 40 + "lazy_static", 41 + "proc-macro2", 42 + "quote", 43 + "regex-syntax", 44 + "syn", 45 + ] 46 + 47 + [[package]] 48 + name = "logos-derive" 49 + version = "0.14.2" 50 + source = "registry+https://github.com/rust-lang/crates.io-index" 51 + checksum = "3e5d0c5463c911ef55624739fc353238b4e310f0144be1f875dc42fec6bfd5ec" 52 + dependencies = [ 53 + "logos-codegen", 54 + ] 55 + 56 + [[package]] 57 + name = "proc-macro2" 58 + version = "1.0.92" 59 + source = "registry+https://github.com/rust-lang/crates.io-index" 60 + checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" 61 + dependencies = [ 62 + "unicode-ident", 63 + ] 64 + 65 + [[package]] 66 + name = "quote" 67 + version = "1.0.37" 68 + source = "registry+https://github.com/rust-lang/crates.io-index" 69 + checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" 70 + dependencies = [ 71 + "proc-macro2", 72 + ] 73 + 74 + [[package]] 75 + name = "regex-syntax" 76 + version = "0.8.5" 77 + source = "registry+https://github.com/rust-lang/crates.io-index" 78 + checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 79 + 80 + [[package]] 6 81 name = "smallvec" 7 82 version = "1.13.2" 8 83 source = "registry+https://github.com/rust-lang/crates.io-index" 9 84 checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 10 85 11 86 [[package]] 87 + name = "syn" 88 + version = "2.0.89" 89 + source = "registry+https://github.com/rust-lang/crates.io-index" 90 + checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" 91 + dependencies = [ 92 + "proc-macro2", 93 + "quote", 94 + "unicode-ident", 95 + ] 96 + 97 + [[package]] 12 98 name = "um" 13 99 version = "0.1.0" 14 100 dependencies = [ 101 + "logos", 15 102 "smallvec", 16 103 ] 104 + 105 + [[package]] 106 + name = "unicode-ident" 107 + version = "1.0.14" 108 + source = "registry+https://github.com/rust-lang/crates.io-index" 109 + checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
+7 -1
Cargo.toml
··· 2 2 name = "um" 3 3 version = "0.1.0" 4 4 edition = "2021" 5 + default-run = "um" 6 + rust-version = "1.74.1" 5 7 6 8 [dependencies] 7 9 smallvec = { version = "1.13.2" } 10 + logos = { version = "0.14.2" } 8 11 9 12 [features] 10 13 default = [] 11 - timing = [] 14 + 15 + [profile.release] 16 + lto = "fat" 17 + codegen-units = 1
+1 -1
README.md
··· 6 6 7 7 Run the benchmark: 8 8 ```sh 9 - ; cargo run --release --features timing -- files/sandmark.umz 9 + ; cargo run --release -- files/sandmark.umz 10 10 ```
+33
files/cat.asm
··· 1 + ; 2 + ; cat.asm 3 + ; 4 + ; Read from stdin and echo to stdout. 5 + ; 6 + main: 7 + ; set r2 to 0xffffffff 8 + nand r2 9 + 10 + ; setup branches 11 + adr r6, output 12 + adr r5, loop 13 + 14 + loop: 15 + ; read stdin, r1 will contain 0xffffffff if we've reached EOF. 16 + in r1 17 + 18 + ; set r3 to 0 if r2 == r1 19 + nand r3, r2, r1 20 + 21 + ; setup branch 22 + adr r4, end 23 + ; overwrite r4 with $output iff r3 == 0. 24 + mov r4, r6, r3 25 + jmp [r0, r4] 26 + 27 + output: 28 + ; write to stdout 29 + out r1 30 + jmp [r0, r5] 31 + 32 + end: 33 + halt
+24
files/hello-world.asm
··· 1 + ; 2 + ; hello-world.asm 3 + ; 4 + ; Prints "Hello, world!" to the stdout. 5 + ; 6 + message: 7 + .wstr "Hello, world!\n" 8 + 9 + adr r1, message 10 + adr r4, loop 11 + mov r3, 1 12 + loop: 13 + ldr r2, [r0, r1] 14 + adr r6, next 15 + adr r7, end 16 + mov r7, r6, r2 17 + jmp [r0, r7] 18 + next: 19 + out r2 20 + add r1, r3 21 + jmp [r0, r4] 22 + 23 + end: 24 + halt
+335
src/asm.rs
··· 1 + mod lexer; 2 + mod parse; 3 + 4 + use crate::{Platter, Register}; 5 + use lexer::Token; 6 + use parse::{Instruction, Node, NodeType, PragmaType}; 7 + use std::collections::HashMap; 8 + 9 + #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] 10 + enum Section { 11 + Text, 12 + Data, 13 + } 14 + 15 + pub fn assemble<'s>(source: &'s str) -> Vec<Platter> { 16 + let parsed = parse::parse("", source).unwrap(); 17 + 18 + let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new(); 19 + let mut offsets: HashMap<Section, usize> = HashMap::new(); 20 + let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new(); 21 + for node in parsed.nodes().iter() { 22 + match node.entity { 23 + NodeType::Pragma(_) => { 24 + let loc = *offsets 25 + .entry(Section::Data) 26 + .and_modify(|loc| *loc += node.size()) 27 + .or_default(); 28 + 29 + sections 30 + .entry(Section::Data) 31 + .and_modify(|section| section.push(node)) 32 + .or_insert(vec![node]); 33 + 34 + for label in &node.labels { 35 + label_locations.insert(label, (Section::Data, loc)); 36 + } 37 + } 38 + NodeType::Instruction(_) => { 39 + let loc = *offsets 40 + .entry(Section::Text) 41 + .and_modify(|loc| *loc += node.size()) 42 + .or_default(); 43 + 44 + sections 45 + .entry(Section::Text) 46 + .and_modify(|section| section.push(node)) 47 + .or_insert(vec![node]); 48 + 49 + for label in &node.labels { 50 + label_locations.insert(label, (Section::Text, loc)); 51 + } 52 + } 53 + _ => {} 54 + } 55 + } 56 + 57 + let text = sections.remove(&Section::Text).unwrap(); 58 + let data_offset = text.len(); 59 + 60 + let mut program = vec![]; 61 + for node in text.into_iter() { 62 + let NodeType::Instruction(instruction) = &node.entity else { 63 + panic!("invalid node in .text section"); 64 + }; 65 + 66 + let encoded = match instruction { 67 + Instruction::ConditionalMove { 68 + destination, 69 + source, 70 + condition, 71 + } => encode_standard(0x00, destination, source, condition), 72 + Instruction::Load { 73 + destination, 74 + address, 75 + } => { 76 + let parse::Location { block, offset } = address; 77 + encode_standard(0x01, destination, block, offset) 78 + } 79 + Instruction::Store { source, address } => { 80 + let parse::Location { block, offset } = address; 81 + encode_standard(0x02, block, offset, source) 82 + } 83 + Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b), 84 + Instruction::AddAssign { destination, a } => { 85 + encode_standard(0x03, destination, destination, a) 86 + } 87 + Instruction::AddSelf { destination } => { 88 + encode_standard(0x03, destination, destination, destination) 89 + } 90 + Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b), 91 + Instruction::MulAssign { destination, a } => { 92 + encode_standard(0x04, destination, destination, a) 93 + } 94 + Instruction::MulSelf { destination } => { 95 + encode_standard(0x04, destination, destination, destination) 96 + } 97 + Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b), 98 + Instruction::DivAssign { destination, a } => { 99 + encode_standard(0x05, destination, destination, a) 100 + } 101 + Instruction::DivSelf { destination } => { 102 + encode_standard(0x05, destination, destination, destination) 103 + } 104 + Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b), 105 + Instruction::NandAssign { destination, a } => { 106 + encode_standard(0x06, destination, destination, a) 107 + } 108 + Instruction::NandSelf { destination } => { 109 + encode_standard(0x06, destination, destination, destination) 110 + } 111 + Instruction::Halt => encode_standard( 112 + 0x07, 113 + &Default::default(), 114 + &Default::default(), 115 + &Default::default(), 116 + ), 117 + Instruction::Alloc { 118 + destination, 119 + length, 120 + } => encode_standard(0x08, &Register::default(), destination, length), 121 + Instruction::Free { block } => { 122 + encode_standard(0x09, &Register::default(), &Register::default(), block) 123 + } 124 + Instruction::Out { source } => { 125 + encode_standard(0x0a, &Default::default(), &Default::default(), source) 126 + } 127 + Instruction::In { destination } => { 128 + encode_standard(0x0b, &Default::default(), &Default::default(), destination) 129 + } 130 + Instruction::Jmp { location } => { 131 + let parse::Location { block, offset } = location; 132 + encode_standard(0x0c, &Register::default(), block, offset) 133 + } 134 + Instruction::Address { 135 + destination, 136 + reference, 137 + } => { 138 + // lookup reference 139 + let Some((section, offset)) = label_locations.get(reference.label) else { 140 + panic!("failed to resolve {}", reference.label); 141 + }; 142 + 143 + let value = match section { 144 + Section::Text => *offset, 145 + Section::Data => data_offset + *offset, 146 + }; 147 + 148 + 0xd0000000 | destination.encode_a_ortho() | encode_literal(value as Platter) 149 + } 150 + Instruction::LiteralMove { 151 + destination, 152 + literal, 153 + } => 0xd0000000 | destination.encode_a_ortho() | encode_literal(*literal), 154 + }; 155 + 156 + program.push(encoded); 157 + } 158 + 159 + if let Some(data) = sections.remove(&Section::Data) { 160 + for node in data.into_iter() { 161 + let NodeType::Pragma(pragma) = &node.entity else { 162 + panic!("invalid node in .data section. {node:?}"); 163 + }; 164 + 165 + let encoded = match &pragma.payload { 166 + PragmaType::WideString { value } => { 167 + for byte in value.as_bytes() { 168 + program.push(*byte as Platter); 169 + } 170 + Some(0) // terminating byte. 171 + } 172 + PragmaType::U32 { value } => Some(*value), 173 + }; 174 + 175 + if let Some(encoded) = encoded { 176 + program.push(encoded); 177 + } 178 + } 179 + } 180 + 181 + program 182 + } 183 + 184 + fn encode_literal(value: Platter) -> Platter { 185 + const LITERAL_MAX: Platter = 0x1ffffff; 186 + assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})"); 187 + value as Platter 188 + } 189 + 190 + fn encode_standard(op: Platter, a: &Register, b: &Register, c: &Register) -> Platter { 191 + (op << 28) | a.encode_a() | b.encode_b() | c.encode_c() 192 + } 193 + 194 + #[cfg(test)] 195 + mod tests { 196 + use super::*; 197 + use crate::{Operation, Register::*}; 198 + 199 + #[test] 200 + fn wide_str() { 201 + // Embed a wide string and get a reference to it. 202 + let program = assemble( 203 + r#" 204 + adr r0, msg 205 + msg: .wstr "Hello" 206 + "#, 207 + ); 208 + 209 + let ops = crate::decode_ops(&program); 210 + assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 }); 211 + 212 + let mut platters = program.into_iter().skip(1); 213 + assert_eq!(platters.next(), Some('H' as Platter)); 214 + assert_eq!(platters.next(), Some('e' as Platter)); 215 + assert_eq!(platters.next(), Some('l' as Platter)); 216 + assert_eq!(platters.next(), Some('l' as Platter)); 217 + assert_eq!(platters.next(), Some('o' as Platter)); 218 + assert_eq!(platters.next(), Some(0)); 219 + assert_eq!(platters.next(), None); 220 + } 221 + 222 + #[test] 223 + fn addresses() { 224 + let program = assemble( 225 + r#" 226 + halt 227 + start: 228 + ldr r2, [r0, r1] 229 + str r2, [r0, r1] 230 + adr r3, start 231 + halt 232 + "#, 233 + ); 234 + 235 + let mut ops = crate::decode_ops(&program).into_iter(); 236 + 237 + assert_eq!(ops.next(), Some(Operation::Halt)); 238 + assert_eq!( 239 + ops.next(), 240 + Some(Operation::ArrayIndex { 241 + a: R2, 242 + b: R0, 243 + c: R1 244 + }) 245 + ); 246 + assert_eq!( 247 + ops.next(), 248 + Some(Operation::ArrayAmendment { 249 + a: R0, 250 + b: R1, 251 + c: R2 252 + }) 253 + ); 254 + assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 })); 255 + assert_eq!(ops.next(), Some(Operation::Halt)); 256 + assert_eq!(ops.next(), None); 257 + } 258 + 259 + #[test] 260 + fn load_store() { 261 + let state = crate::Um::new(assemble( 262 + r#" 263 + adr r1, loc 264 + ldr r2, [r0, r1] 265 + mov r3, 56 266 + str r3, [r0, r1] 267 + halt 268 + loc:.u32 42 269 + "#, 270 + )) 271 + .run(); 272 + assert_eq!(state.registers[R2], 42); 273 + assert_eq!(state.memory[0][5], 56); 274 + } 275 + 276 + #[test] 277 + fn addition() { 278 + let state = crate::Um::new(assemble( 279 + r#" 280 + mov r0, 42 281 + mov r1, 64 282 + mov r2, 8192 283 + 284 + add r3, r0, r1 ; r3 = r0 + r1 = 106 285 + add r1, r2 ; r1 = r1 + r2 = 8256 286 + add r0 ; r0 = r0 + r0 = 84 287 + 288 + halt 289 + "#, 290 + )) 291 + .run(); 292 + 293 + assert_eq!(state.registers[R0], 84); 294 + assert_eq!(state.registers[R1], 8256); 295 + assert_eq!(state.registers[R2], 8192); 296 + assert_eq!(state.registers[R3], 106); 297 + } 298 + 299 + #[test] 300 + fn alloc() { 301 + let state = crate::Um::new(assemble( 302 + r#" 303 + ; Allocate 1000 bytes. 304 + mov r0, 1000 305 + alloc r1, r0 306 + halt 307 + "#, 308 + )) 309 + .run(); 310 + assert_eq!(state.registers[R0], 1000); 311 + assert_ne!(state.registers[R1], 0); 312 + assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000); 313 + } 314 + 315 + #[test] 316 + fn free() { 317 + let state = crate::Um::new(assemble( 318 + r#" 319 + ; Allocate 1000 bytes. 320 + mov r0, 1000 321 + alloc r1, r0 322 + free r1 323 + halt 324 + "#, 325 + )) 326 + .run(); 327 + assert_eq!(state.registers[R0], 1000); 328 + assert_ne!(state.registers[R1], 0); 329 + assert_eq!( 330 + state.memory[state.registers[R1] as usize].len(), 331 + 0, 332 + "memory not free'd" 333 + ); 334 + } 335 + }
+127
src/asm/lexer.rs
··· 1 + use crate::{Platter, Register}; 2 + use logos::{Lexer, Logos}; 3 + 4 + #[derive(Clone, Debug, Default, PartialEq, Eq)] 5 + pub struct Extras { 6 + pub line: usize, 7 + } 8 + 9 + #[derive(Logos, Debug, PartialEq)] 10 + #[logos(skip r"[ \t\f,]+", extras = Extras)] 11 + pub enum Token<'source> { 12 + #[token("\n", lex_newline)] 13 + Newline, 14 + 15 + #[regex("[a-zA-Z]+[a-zA-Z0-9_]*:", lex_label)] 16 + Label(&'source str), 17 + 18 + #[regex("[a-zA-Z_]+[a-zA-Z0-9_]*", |lexer| lexer.slice())] 19 + Ident(&'source str), 20 + 21 + #[regex(r#"\.([a-zA-Z0-9]+)"#, |lexer| &lexer.slice()[1..])] 22 + Pragma(&'source str), 23 + 24 + #[token("[")] 25 + AddressOpen, 26 + 27 + #[token("]")] 28 + AddressClose, 29 + 30 + #[regex("r[0-7]", lex_register, priority = 10)] 31 + Register(Register), 32 + 33 + #[token("#")] 34 + Pound, 35 + 36 + #[token("+")] 37 + Plus, 38 + 39 + #[token("-")] 40 + Minus, 41 + 42 + #[token(".")] 43 + Here, 44 + 45 + #[regex(r#"(0x[a-fA-F0-9]+)|([0-9]+)"#, lex_number)] 46 + Number(Platter), 47 + 48 + #[token("\"", lex_string_literal)] 49 + String(&'source str), 50 + 51 + #[token(";", lex_comment)] 52 + Comment(&'source str), 53 + } 54 + 55 + fn lex_newline<'source>(lexer: &mut Lexer<'source, Token<'source>>) { 56 + lexer.extras.line += 1; 57 + } 58 + 59 + fn lex_label<'source>(lex: &mut Lexer<'source, Token<'source>>) -> &'source str { 60 + let slice = lex.slice(); 61 + &slice[..slice.len() - 1] 62 + } 63 + 64 + fn lex_number<'source>(lex: &mut Lexer<'source, Token<'source>>) -> Platter { 65 + let slice = &lex.slice(); 66 + if slice.starts_with("0x") { 67 + Platter::from_str_radix(slice.trim_start_matches("0x"), 16).unwrap() 68 + } else { 69 + slice.parse().unwrap() 70 + } 71 + } 72 + 73 + fn lex_string_literal<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> &'source str { 74 + let remainder = lexer.remainder(); 75 + 76 + let mut in_escape = false; 77 + let mut complete = false; 78 + let mut final_index = 0; 79 + for (index, character) in remainder.char_indices() { 80 + if complete { 81 + lexer.bump(index); 82 + return &remainder[..final_index]; 83 + } 84 + 85 + if character == '\\' { 86 + in_escape = true; 87 + continue; 88 + } 89 + 90 + if character == '"' && in_escape { 91 + continue; 92 + } 93 + 94 + if character == '"' && !in_escape { 95 + complete = true; 96 + final_index = index; 97 + continue; 98 + } 99 + 100 + in_escape = false; 101 + } 102 + 103 + lexer.bump(remainder.len()); 104 + remainder 105 + } 106 + 107 + fn lex_register<'source>(lex: &mut Lexer<'source, Token<'source>>) -> Register { 108 + let slice = lex.slice(); 109 + let index = slice[1..] 110 + .parse() 111 + .expect("regex for register tokens should make the infallible"); 112 + 113 + Register::from_u8(index) 114 + } 115 + 116 + fn lex_comment<'source>(lex: &mut Lexer<'source, Token<'source>>) -> &'source str { 117 + let remainder = lex.remainder(); 118 + for (position, c) in remainder.char_indices() { 119 + if c == '\n' { 120 + lex.bump(position); 121 + return &remainder[..position]; 122 + } 123 + } 124 + 125 + lex.bump(remainder.len()); 126 + remainder 127 + }
+696
src/asm/parse.rs
··· 1 + use super::Token; 2 + use crate::{Platter, Register}; 3 + use logos::{Logos, Source}; 4 + use std::{borrow::Cow, collections::HashMap, iter::Peekable, ops::Range}; 5 + 6 + pub fn parse(_unit: impl std::fmt::Display, source: &str) -> Result<ParsedProgram, Error> { 7 + Parser::new(source).parse() 8 + } 9 + 10 + #[derive(Debug)] 11 + pub enum NodeType<'s> { 12 + Pragma(Pragma<'s>), 13 + Instruction(Instruction<'s>), 14 + Comment(#[allow(unused)] &'s str), 15 + } 16 + 17 + impl NodeType<'_> { 18 + pub fn size(&self) -> usize { 19 + match self { 20 + Self::Pragma(pragma) => match &pragma.payload { 21 + PragmaType::U32 { .. } => 1, 22 + PragmaType::WideString { value } => value.len() + 1, 23 + }, 24 + // Instructions are always one platter. 25 + Self::Instruction(_) => 1, 26 + Self::Comment(_) => 0, 27 + } 28 + } 29 + } 30 + 31 + #[derive(Debug)] 32 + pub struct Node<'s> { 33 + pub labels: Vec<&'s str>, 34 + pub entity: NodeType<'s>, 35 + #[allow(unused)] 36 + pub span: Range<usize>, 37 + } 38 + 39 + impl Node<'_> { 40 + /// Compute encoded size of the node in platters. 41 + #[inline] 42 + pub fn size(&self) -> usize { 43 + self.entity.size() 44 + } 45 + } 46 + 47 + #[derive(Debug)] 48 + pub struct ParsedProgram<'s> { 49 + #[allow(unused)] 50 + pub source: &'s str, 51 + nodes: Vec<Node<'s>>, 52 + } 53 + 54 + impl<'s> ParsedProgram<'s> { 55 + pub fn nodes(&self) -> &[Node<'s>] { 56 + &self.nodes 57 + } 58 + } 59 + 60 + #[derive(Debug, Default)] 61 + pub struct Parser<'s> { 62 + source: &'s str, 63 + labels: HashMap<&'s str, Range<usize>>, 64 + active_labels: Vec<&'s str>, 65 + } 66 + 67 + impl<'s> Parser<'s> { 68 + fn new(source: &'s str) -> Self { 69 + Self { 70 + source, 71 + ..Default::default() 72 + } 73 + } 74 + 75 + fn parse(mut self) -> Result<ParsedProgram<'s>, Error> { 76 + let mut lexer = Token::lexer(self.source); 77 + let mut spanned = vec![]; 78 + while let Some(res) = lexer.next() { 79 + match res { 80 + Ok(token) => { 81 + spanned.push((token, lexer.span())); 82 + } 83 + Err(error) => Err(Error::new(format!("lex: {error:?}"), &lexer.span()))?, 84 + } 85 + } 86 + 87 + let mut nodes = vec![]; 88 + let mut tokens = spanned.into_iter().peekable(); 89 + while let Some((token, span)) = tokens.peek() { 90 + let node = match token { 91 + Token::Label(_) => { 92 + self.consume_label(&mut tokens)?; 93 + continue; 94 + } 95 + Token::Pragma(_) => self.consume_pragma(&mut tokens)?, 96 + Token::Ident(_) => self.consume_instruction(&mut tokens)?, 97 + Token::Comment(comment) => { 98 + let node = Node { 99 + labels: vec![], 100 + entity: NodeType::Comment(comment), 101 + span: span.clone(), 102 + }; 103 + tokens.next(); 104 + node 105 + } 106 + Token::Newline => { 107 + tokens.next(); 108 + continue; 109 + } 110 + _ => Err(Error::new(format!("unexpected token {token:?}"), span))?, 111 + }; 112 + 113 + nodes.push(node); 114 + } 115 + 116 + Ok(ParsedProgram { 117 + source: self.source, 118 + nodes, 119 + }) 120 + } 121 + 122 + /// Consumes a label from the token stream. 123 + fn consume_label<I>(&mut self, tokens: &mut I) -> Result<(), Error> 124 + where 125 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 126 + { 127 + let Some((Token::Label(label_ident), span)) = tokens.next() else { 128 + unreachable!("consume_label called on non-label token"); 129 + }; 130 + 131 + // Add the label to the set of observed labels. 132 + let label_span = self 133 + .labels 134 + .entry(label_ident) 135 + .or_insert_with(|| span.clone()); 136 + 137 + // If the span of the current token is not equal to 138 + // `label_span`, then we have already seen label with the 139 + // same identifier. 140 + if label_span != &span { 141 + return Err(Error::new( 142 + format!("duplicate label '{label_ident}', original label span: {label_span:?}"), 143 + &span, 144 + )); 145 + } 146 + 147 + self.active_labels.push(label_ident); 148 + Ok(()) 149 + } 150 + 151 + fn consume_pragma<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error> 152 + where 153 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 154 + { 155 + assert!( 156 + matches!(tokens.peek(), Some((Token::Pragma(_), _))), 157 + "consume_pragma called on non-pragma token" 158 + ); 159 + 160 + let labels = std::mem::take(&mut self.active_labels); 161 + let (pragma, span) = Pragma::consume(tokens)?; 162 + 163 + Ok(Node { 164 + labels, 165 + entity: NodeType::Pragma(pragma), 166 + span, 167 + }) 168 + } 169 + 170 + fn consume_instruction<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error> 171 + where 172 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 173 + { 174 + assert!( 175 + matches!(tokens.peek(), Some((Token::Ident(_), _))), 176 + "consume_instruction called on non-ident token" 177 + ); 178 + 179 + let labels = std::mem::take(&mut self.active_labels); 180 + let (instr, span) = Instruction::consume(tokens)?; 181 + Ok(Node { 182 + labels, 183 + entity: NodeType::Instruction(instr), 184 + span, 185 + }) 186 + } 187 + } 188 + 189 + /// An error encountered during parsing. 190 + #[derive(Debug)] 191 + #[allow(unused)] 192 + pub struct Error(pub String, pub Range<usize>); 193 + 194 + impl Error { 195 + fn new(message: impl ToString, span: &Range<usize>) -> Self { 196 + Self(message.to_string(), span.clone()) 197 + } 198 + 199 + fn eof() -> Self { 200 + Self("unexpected eof".into(), 0..0) 201 + } 202 + } 203 + 204 + impl std::fmt::Display for Error { 205 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 206 + write!(f, "{self:?}") 207 + } 208 + } 209 + 210 + impl std::error::Error for Error {} 211 + 212 + #[derive(Debug, Default)] 213 + pub struct Location { 214 + pub block: Register, 215 + pub offset: Register, 216 + } 217 + 218 + impl Location { 219 + pub fn consume<'s, I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 220 + where 221 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 222 + { 223 + // Require a '[' token. 224 + let start_span = match tokens.next() { 225 + Some((Token::AddressOpen, span)) => span, 226 + Some((_, span)) => Err(Error::new("expected an address opening bracket", &span))?, 227 + _ => Err(Error::eof())?, 228 + }; 229 + 230 + let (block, _) = consume_register(tokens)?; 231 + let (offset, _) = consume_register(tokens)?; 232 + 233 + // Require a ']' token. 234 + let end_span = match tokens.next() { 235 + Some((Token::AddressClose, span)) => span, 236 + Some((_, span)) => Err(Error::new("expected an address closing bracket", &span))?, 237 + _ => Err(Error::eof())?, 238 + }; 239 + 240 + Ok((Self { block, offset }, merge_spans(&start_span, &end_span))) 241 + } 242 + } 243 + 244 + #[derive(Debug)] 245 + pub struct Expr<'s> { 246 + pub label: &'s str, 247 + } 248 + 249 + #[derive(Debug)] 250 + pub enum PragmaType<'s> { 251 + U32 { value: u32 }, 252 + WideString { value: Cow<'s, str> }, 253 + } 254 + 255 + #[derive(Debug)] 256 + pub struct Pragma<'s> { 257 + #[allow(unused)] 258 + relocatable: bool, 259 + pub payload: PragmaType<'s>, 260 + } 261 + 262 + impl<'s> Pragma<'s> { 263 + pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 264 + where 265 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 266 + { 267 + let relocatable = true; 268 + let token = tokens.next().ok_or(Error::eof())?; 269 + match token { 270 + (Token::Pragma("u32"), start_span) => { 271 + let (value, end_span) = consume_number(tokens)?; 272 + Ok(( 273 + Self { 274 + relocatable, 275 + payload: PragmaType::U32 { value }, 276 + }, 277 + merge_spans(&start_span, &end_span), 278 + )) 279 + } 280 + (Token::Pragma("wstr"), start_span) => { 281 + let (value, end_span) = consume_string(tokens)?; 282 + Ok(( 283 + Self { 284 + relocatable, 285 + payload: PragmaType::WideString { value }, 286 + }, 287 + merge_spans(&start_span, &end_span), 288 + )) 289 + } 290 + (Token::Pragma(command), span) => Err(Error::new( 291 + format!("unknown pragma command {command}"), 292 + &span, 293 + ))?, 294 + (_, span) => Err(Error::new("unexpected token", &span))?, 295 + } 296 + } 297 + } 298 + 299 + #[derive(Debug)] 300 + pub enum Instruction<'s> { 301 + /// Operation #0. 302 + ConditionalMove { 303 + destination: Register, 304 + source: Register, 305 + condition: Register, 306 + }, 307 + /// Operation #13. 308 + Address { 309 + destination: Register, 310 + reference: Expr<'s>, 311 + }, 312 + /// Operation #13. 313 + LiteralMove { 314 + destination: Register, 315 + literal: Platter, 316 + }, 317 + Load { 318 + destination: Register, 319 + address: Location, 320 + }, 321 + Store { 322 + source: Register, 323 + address: Location, 324 + }, 325 + Add { 326 + destination: Register, 327 + a: Register, 328 + b: Register, 329 + }, 330 + AddAssign { 331 + destination: Register, 332 + a: Register, 333 + }, 334 + AddSelf { 335 + destination: Register, 336 + }, 337 + Mul { 338 + destination: Register, 339 + a: Register, 340 + b: Register, 341 + }, 342 + MulAssign { 343 + destination: Register, 344 + a: Register, 345 + }, 346 + MulSelf { 347 + destination: Register, 348 + }, 349 + Div { 350 + destination: Register, 351 + a: Register, 352 + b: Register, 353 + }, 354 + DivAssign { 355 + destination: Register, 356 + a: Register, 357 + }, 358 + DivSelf { 359 + destination: Register, 360 + }, 361 + Nand { 362 + destination: Register, 363 + a: Register, 364 + b: Register, 365 + }, 366 + NandAssign { 367 + destination: Register, 368 + a: Register, 369 + }, 370 + NandSelf { 371 + destination: Register, 372 + }, 373 + Halt, 374 + Alloc { 375 + destination: Register, 376 + length: Register, 377 + }, 378 + Free { 379 + block: Register, 380 + }, 381 + Out { 382 + source: Register, 383 + }, 384 + In { 385 + destination: Register, 386 + }, 387 + Jmp { 388 + location: Location, 389 + }, 390 + } 391 + 392 + impl<'s> Instruction<'s> { 393 + pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 394 + where 395 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 396 + { 397 + let ident = tokens.next().unwrap(); 398 + match ident { 399 + (Token::Ident("halt"), span) => Ok((Self::Halt, span)), 400 + (Token::Ident("adr"), start_span) => { 401 + let (destination, _) = consume_register(tokens)?; 402 + let (identifier, end_span) = consume_ident(tokens)?; 403 + Ok(( 404 + Self::Address { 405 + destination, 406 + reference: Expr { label: identifier }, 407 + }, 408 + merge_spans(&start_span, &end_span), 409 + )) 410 + } 411 + (Token::Ident("mov"), start_span) => { 412 + let (destination, _) = consume_register(tokens)?; 413 + if peek_register(tokens)?.is_some() { 414 + let (source, _) = consume_register(tokens)?; 415 + let (condition, end_span) = consume_register(tokens)?; 416 + Ok(( 417 + Self::ConditionalMove { 418 + destination, 419 + source, 420 + condition, 421 + }, 422 + merge_spans(&start_span, &end_span), 423 + )) 424 + } else { 425 + let (literal, end_span) = consume_number(tokens)?; 426 + Ok(( 427 + Self::LiteralMove { 428 + destination, 429 + literal, 430 + }, 431 + merge_spans(&start_span, &end_span), 432 + )) 433 + } 434 + } 435 + (Token::Ident("ldr"), start_span) => { 436 + let (destination, _) = consume_register(tokens)?; 437 + let (address, end_span) = Location::consume(tokens)?; 438 + Ok(( 439 + Self::Load { 440 + destination, 441 + address, 442 + }, 443 + merge_spans(&start_span, &end_span), 444 + )) 445 + } 446 + (Token::Ident("str"), start_span) => { 447 + let (source, _) = consume_register(tokens)?; 448 + let (address, end_span) = Location::consume(tokens)?; 449 + Ok(( 450 + Self::Store { source, address }, 451 + merge_spans(&start_span, &end_span), 452 + )) 453 + } 454 + (Token::Ident("out"), start_span) => { 455 + let (source, end_span) = consume_register(tokens)?; 456 + Ok((Self::Out { source }, merge_spans(&start_span, &end_span))) 457 + } 458 + (Token::Ident("in"), start_span) => { 459 + let (destination, end_span) = consume_register(tokens)?; 460 + Ok(( 461 + Self::In { destination }, 462 + merge_spans(&start_span, &end_span), 463 + )) 464 + } 465 + (Token::Ident("alloc"), start_span) => { 466 + let (destination, _) = consume_register(tokens)?; 467 + let (length, end_span) = consume_register(tokens)?; 468 + Ok(( 469 + Self::Alloc { 470 + length, 471 + destination, 472 + }, 473 + merge_spans(&start_span, &end_span), 474 + )) 475 + } 476 + (Token::Ident("free"), start_span) => { 477 + let (block, end_span) = consume_register(tokens)?; 478 + Ok((Self::Free { block }, merge_spans(&start_span, &end_span))) 479 + } 480 + (Token::Ident("jmp"), start_span) => { 481 + let (location, end_span) = Location::consume(tokens)?; 482 + Ok((Self::Jmp { location }, merge_spans(&start_span, &end_span))) 483 + } 484 + (Token::Ident("add"), start_span) => { 485 + let (destination, mid_span) = consume_register(tokens)?; 486 + let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 487 + let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 488 + match (a, b) { 489 + (Some((a, _)), Some((b, end_span))) => Ok(( 490 + Self::Add { destination, a, b }, 491 + merge_spans(&start_span, &end_span), 492 + )), 493 + (Some((a, end_span)), None) => Ok(( 494 + Self::AddAssign { destination, a }, 495 + merge_spans(&start_span, &end_span), 496 + )), 497 + (None, None) => Ok(( 498 + Self::AddSelf { destination }, 499 + merge_spans(&start_span, &mid_span), 500 + )), 501 + _ => unreachable!(), 502 + } 503 + } 504 + (Token::Ident("mul"), start_span) => { 505 + let (destination, mid_span) = consume_register(tokens)?; 506 + let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 507 + let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 508 + match (a, b) { 509 + (Some((a, _)), Some((b, end_span))) => Ok(( 510 + Self::Mul { destination, a, b }, 511 + merge_spans(&start_span, &end_span), 512 + )), 513 + (Some((a, end_span)), None) => Ok(( 514 + Self::MulAssign { destination, a }, 515 + merge_spans(&start_span, &end_span), 516 + )), 517 + (None, None) => Ok(( 518 + Self::MulSelf { destination }, 519 + merge_spans(&start_span, &mid_span), 520 + )), 521 + _ => unreachable!(), 522 + } 523 + } 524 + (Token::Ident("div"), start_span) => { 525 + let (destination, mid_span) = consume_register(tokens)?; 526 + let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 527 + let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 528 + match (a, b) { 529 + (Some((a, _)), Some((b, end_span))) => Ok(( 530 + Self::Div { destination, a, b }, 531 + merge_spans(&start_span, &end_span), 532 + )), 533 + (Some((a, end_span)), None) => Ok(( 534 + Self::DivAssign { destination, a }, 535 + merge_spans(&start_span, &end_span), 536 + )), 537 + (None, None) => Ok(( 538 + Self::DivSelf { destination }, 539 + merge_spans(&start_span, &mid_span), 540 + )), 541 + _ => unreachable!(), 542 + } 543 + } 544 + (Token::Ident("nand"), start_span) => { 545 + let (destination, mid_span) = consume_register(tokens)?; 546 + let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 547 + let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok()); 548 + match (a, b) { 549 + (Some((a, _)), Some((b, end_span))) => Ok(( 550 + Self::Nand { destination, a, b }, 551 + merge_spans(&start_span, &end_span), 552 + )), 553 + (Some((a, end_span)), None) => Ok(( 554 + Self::NandAssign { destination, a }, 555 + merge_spans(&start_span, &end_span), 556 + )), 557 + (None, None) => Ok(( 558 + Self::NandSelf { destination }, 559 + merge_spans(&start_span, &mid_span), 560 + )), 561 + _ => unreachable!(), 562 + } 563 + } 564 + (_, span) => Err(Error::new("unrecognised instruction", &span))?, 565 + } 566 + } 567 + } 568 + 569 + impl std::fmt::Display for Instruction<'_> { 570 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 571 + match self { 572 + Self::ConditionalMove { 573 + destination, 574 + source, 575 + condition, 576 + } => write!(f, "mov {destination}, {source}, {condition}"), 577 + Self::Load { 578 + destination, 579 + address, 580 + } => write!( 581 + f, 582 + "ldr {destination}, [{}, {}]", 583 + address.block, address.offset 584 + ), 585 + Self::Store { source, address } => { 586 + write!(f, "str {source}, [{}, {}]", address.block, address.offset) 587 + } 588 + Self::Add { destination, a, b } => write!(f, "add {destination}, {a}, {b}"), 589 + Self::AddAssign { destination, a } => write!(f, "add {destination}, {a}"), 590 + Self::AddSelf { destination } => write!(f, "add {destination}"), 591 + Self::Mul { destination, a, b } => write!(f, "mul {destination}, {a}, {b}"), 592 + Self::MulAssign { destination, a } => write!(f, "mul {destination}, {a}"), 593 + Self::MulSelf { destination } => write!(f, "mul {destination}"), 594 + Self::Div { destination, a, b } => write!(f, "div {destination}, {a}, {b}"), 595 + Self::DivAssign { destination, a } => write!(f, "div {destination}, {a}"), 596 + Self::DivSelf { destination } => write!(f, "div {destination}"), 597 + Self::Nand { destination, a, b } => write!(f, "nand {destination}, {a}, {b}"), 598 + Self::NandAssign { destination, a } => write!(f, "nand {destination}, {a}"), 599 + Self::NandSelf { destination } => write!(f, "nand {destination}"), 600 + Self::Halt => write!(f, "halt"), 601 + Self::Out { source } => write!(f, "out {source}"), 602 + Self::In { destination } => write!(f, "in {destination}"), 603 + Self::Alloc { 604 + length, 605 + destination, 606 + } => write!(f, "alloc {destination}, {length}"), 607 + Self::Free { block } => { 608 + write!(f, "free {block}") 609 + } 610 + Self::Jmp { location } => write!(f, "jmp [{}, {}]", location.block, location.offset), 611 + Self::LiteralMove { 612 + destination, 613 + literal, 614 + } => write!(f, "mov {destination}, {literal}"), 615 + Self::Address { 616 + destination, 617 + reference, 618 + } => write!(f, "adr {destination}, {}", reference.label), 619 + } 620 + } 621 + } 622 + 623 + /// Peeks at the next token and returns it iff it is a Register. 624 + fn peek_register<'s, I>(tokens: &mut Peekable<I>) -> Result<Option<Register>, Error> 625 + where 626 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 627 + { 628 + match tokens.peek() { 629 + Some((Token::Register(r), _)) => Ok(Some(*r)), 630 + Some(_) => Ok(None), 631 + None => Err(Error::new("unexpected eof", &(0..0))), 632 + } 633 + } 634 + 635 + fn consume_register<'s, I>(tokens: &mut I) -> Result<(Register, Range<usize>), Error> 636 + where 637 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 638 + { 639 + match tokens.next() { 640 + Some((Token::Register(r), span)) => Ok((r, span)), 641 + Some((token, span)) => Err(Error::new( 642 + format!("expected a register, found: {token:?}"), 643 + &span, 644 + )), 645 + None => Err(Error::eof()), 646 + } 647 + } 648 + 649 + fn consume_ident<'s, I>(tokens: &mut I) -> Result<(&'s str, Range<usize>), Error> 650 + where 651 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 652 + { 653 + match tokens.next() { 654 + Some((Token::Ident(ident), span)) => Ok((ident, span)), 655 + Some((token, span)) => Err(Error::new( 656 + format!("expected an identifier, found: {token:?}"), 657 + &span, 658 + )), 659 + None => Err(Error::eof()), 660 + } 661 + } 662 + 663 + fn consume_number<'s, I>(tokens: &mut I) -> Result<(Platter, Range<usize>), Error> 664 + where 665 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 666 + { 667 + match tokens.next() { 668 + Some((Token::Number(value), span)) => Ok((value, span)), 669 + Some((token, span)) => Err(Error::new( 670 + format!("expected a number literal, found: {token:?}"), 671 + &span, 672 + )), 673 + None => Err(Error::eof()), 674 + } 675 + } 676 + 677 + fn consume_string<'s, I>(tokens: &mut I) -> Result<(Cow<'s, str>, Range<usize>), Error> 678 + where 679 + I: Iterator<Item = (Token<'s>, Range<usize>)>, 680 + { 681 + match tokens.next() { 682 + Some((Token::String(value), span)) => { 683 + let unescaped = crate::str::unescape_str(value).map_err(|_| Error::eof())?; 684 + Ok((unescaped, span)) 685 + } 686 + Some((token, span)) => Err(Error::new( 687 + format!("expected a number literal, found: {token:?}"), 688 + &span, 689 + )), 690 + None => Err(Error::eof()), 691 + } 692 + } 693 + 694 + fn merge_spans(start: &Range<usize>, end: &Range<usize>) -> Range<usize> { 695 + start.start..end.end 696 + }
+51
src/bin/uasm.rs
··· 1 + use std::path::{Path, PathBuf}; 2 + use um::Platter; 3 + 4 + fn main() { 5 + let mut output = PathBuf::from("./a.um"); 6 + 7 + let mut program = Vec::new(); 8 + let mut args = std::env::args().skip(1); 9 + while let Some(arg) = args.next() { 10 + match arg.as_str() { 11 + "-o" | "--out" => { 12 + output = PathBuf::from(args.next().expect("expected output path")); 13 + } 14 + _ => { 15 + let path = Path::new(&arg); 16 + program.extend_from_slice(&match load_program(path) { 17 + Ok(p) => p, 18 + Err(error) => { 19 + eprintln!("{error}"); 20 + std::process::exit(1); 21 + } 22 + }); 23 + } 24 + } 25 + } 26 + 27 + // Convert the program to bytes. 28 + let bytes: Vec<_> = program 29 + .into_iter() 30 + .flat_map(|word| word.to_be_bytes()) 31 + .collect(); 32 + 33 + std::fs::write(&output, bytes).unwrap(); 34 + } 35 + 36 + fn load_program(path: &Path) -> std::io::Result<Vec<Platter>> { 37 + match path.extension().map(|ext| ext.as_encoded_bytes()) { 38 + Some(b"uasm") | Some(b"asm") => { 39 + let source = std::fs::read_to_string(path)?; 40 + let program = um::asm::assemble(&source); 41 + Ok(program) 42 + } 43 + _ => { 44 + let program = std::fs::read(path)?; 45 + Ok(program 46 + .chunks_exact(std::mem::size_of::<Platter>()) 47 + .map(|pl| Platter::from_be_bytes(pl.try_into().unwrap())) 48 + .collect()) 49 + } 50 + } 51 + }
+49
src/bin/um.rs
··· 1 + use std::{path::Path, time::Instant}; 2 + use um::{Platter, Um}; 3 + 4 + fn main() { 5 + let mut program = Vec::new(); 6 + let mut time = false; 7 + 8 + for arg in std::env::args().skip(1) { 9 + if arg == "--time" { 10 + time = true; 11 + continue; 12 + } 13 + 14 + let path = Path::new(&arg); 15 + program.extend_from_slice(&match load_program(path) { 16 + Ok(p) => p, 17 + Err(error) => { 18 + eprintln!("{error}"); 19 + std::process::exit(1); 20 + } 21 + }); 22 + } 23 + 24 + let start = Instant::now(); 25 + Um::new(program) 26 + .stdout(&mut std::io::stdout()) 27 + .stdin(&mut std::io::stdin()) 28 + .run(); 29 + 30 + if time { 31 + eprintln!("{:?}", start.elapsed()); 32 + } 33 + } 34 + 35 + fn load_program(path: &Path) -> std::io::Result<Vec<Platter>> { 36 + match path.extension().map(|ext| ext.as_encoded_bytes()) { 37 + Some(b"uasm") | Some(b"asm") => { 38 + let source = std::fs::read_to_string(path)?; 39 + Ok(um::asm::assemble(&source)) 40 + } 41 + _ => { 42 + let program = std::fs::read(path)?; 43 + Ok(program 44 + .chunks_exact(std::mem::size_of::<Platter>()) 45 + .map(|pl| Platter::from_be_bytes(pl.try_into().unwrap())) 46 + .collect()) 47 + } 48 + } 49 + }
+486 -38
src/lib.rs
··· 1 + use smallvec::SmallVec; 2 + use std::{ 3 + io::{Read, Write}, 4 + ops, 5 + }; 6 + 7 + pub mod asm; 8 + pub mod str; 9 + 1 10 pub type Platter = u32; 2 - pub type Parameter = u8; 11 + 12 + /// A reference to a register of the UM-32. 13 + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] 14 + pub enum Register { 15 + #[default] 16 + R0, 17 + R1, 18 + R2, 19 + R3, 20 + R4, 21 + R5, 22 + R6, 23 + R7, 24 + } 25 + 26 + impl std::fmt::Display for Register { 27 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 28 + write!(f, "r{}", *self as u8) 29 + } 30 + } 31 + 32 + impl Register { 33 + /// Encodes the register as the 'a' parameter of an encoded 34 + /// instruction (bits 6..=8). 35 + fn encode_a(self) -> Platter { 36 + ((self as Platter) & 0x7) << 6 37 + } 38 + 39 + /// Encodes the register as the 'b' parameter of an encoded 40 + /// instruction (bits 3..=5). 41 + fn encode_b(self) -> Platter { 42 + ((self as Platter) & 0x7) << 3 43 + } 44 + 45 + /// Encodes the register as the 'c' parameter of an encoded 46 + /// instruction (bits 0..=2). 47 + fn encode_c(self) -> Platter { 48 + (self as Platter) & 0x7 49 + } 50 + 51 + /// Encodes the register as the 'a' parameter of an `Orthography` 52 + /// operation. 53 + /// 54 + /// This is *only* valid for `Orthography` operations. 55 + fn encode_a_ortho(self) -> Platter { 56 + ((self as Platter) & 0x7) << 25 57 + } 58 + 59 + fn from_u8(index: u8) -> Self { 60 + match index { 61 + 0 => Register::R0, 62 + 1 => Register::R1, 63 + 2 => Register::R2, 64 + 3 => Register::R3, 65 + 4 => Register::R4, 66 + 5 => Register::R5, 67 + 6 => Register::R6, 68 + 7 => Register::R7, 69 + _ => unreachable!(), 70 + } 71 + } 72 + } 73 + 74 + /// A set of registers. 75 + #[derive(Debug, Default)] 76 + struct Page([Platter; 8]); 77 + 78 + impl ops::Index<Register> for Page { 79 + type Output = Platter; 80 + #[inline(always)] 81 + fn index(&self, index: Register) -> &Self::Output { 82 + &self.0[index as usize] 83 + } 84 + } 85 + 86 + impl ops::IndexMut<Register> for Page { 87 + #[inline(always)] 88 + fn index_mut(&mut self, index: Register) -> &mut Self::Output { 89 + &mut self.0[index as usize] 90 + } 91 + } 92 + 93 + impl From<[Platter; 8]> for Page { 94 + fn from(value: [Platter; 8]) -> Self { 95 + Self(value) 96 + } 97 + } 3 98 4 99 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 5 - pub enum Operation { 100 + enum Operation { 6 101 /// Operator #0. Conditional Move. 7 102 /// 8 103 /// The register A receives the value in register B, 9 104 /// unless the register C contains 0. 10 105 ConditionalMove { 11 - a: Parameter, 12 - b: Parameter, 13 - c: Parameter, 106 + a: Register, 107 + b: Register, 108 + c: Register, 14 109 }, 15 110 /// Operator #1: Array Index. 16 111 /// 17 112 /// The register A receives the value stored at offset 18 113 /// in register C in the array identified by B. 19 114 ArrayIndex { 20 - a: Parameter, 21 - b: Parameter, 22 - c: Parameter, 115 + a: Register, 116 + b: Register, 117 + c: Register, 23 118 }, 24 119 /// Operator #2. Array Amendment. 25 120 /// 26 121 /// The array identified by A is amended at the offset 27 122 /// in register B to store the value in register C. 28 123 ArrayAmendment { 29 - a: Parameter, 30 - b: Parameter, 31 - c: Parameter, 124 + a: Register, 125 + b: Register, 126 + c: Register, 32 127 }, 33 128 /// Operator #3. Addition. 34 129 /// 35 130 /// The register A receives the value in register B plus 36 131 /// the value in register C, modulo 2^32. 37 132 Addition { 38 - a: Parameter, 39 - b: Parameter, 40 - c: Parameter, 133 + a: Register, 134 + b: Register, 135 + c: Register, 41 136 }, 42 137 /// Operator #4. Multiplication. 43 138 /// 44 139 /// The register A receives the value in register B times 45 140 /// the value in register C, modulo 2^32. 46 141 Multiplication { 47 - a: Parameter, 48 - b: Parameter, 49 - c: Parameter, 142 + a: Register, 143 + b: Register, 144 + c: Register, 50 145 }, 51 146 /// Operator #5. Division. 52 147 /// ··· 54 149 /// divided by the value in register C, if any, where 55 150 /// each quantity is treated as an unsigned 32 bit number. 56 151 Division { 57 - a: Parameter, 58 - b: Parameter, 59 - c: Parameter, 152 + a: Register, 153 + b: Register, 154 + c: Register, 60 155 }, 61 156 /// Operator #6. Not-And. 62 157 /// ··· 65 160 /// position. Otherwise the bit in register A receives 66 161 /// the 0 bit. 67 162 NotAnd { 68 - a: Parameter, 69 - b: Parameter, 70 - c: Parameter, 163 + a: Register, 164 + b: Register, 165 + c: Register, 71 166 }, 72 167 /// Operator #7. Halt. 73 168 /// ··· 82 177 /// exclusively the 0 bit, and that identifies no other 83 178 /// active allocated array, is placed in the B register. 84 179 Allocation { 85 - b: Parameter, 86 - c: Parameter, 180 + b: Register, 181 + c: Register, 87 182 }, 88 183 /// Operator #9. Abandonment. 89 184 /// 90 185 /// The array identified by the register C is abandoned. 91 186 /// Future allocations may then reuse that identifier. 92 187 Abandonment { 93 - c: Parameter, 188 + c: Register, 94 189 }, 95 190 /// Operator #10. Output. 96 191 /// ··· 98 193 /// immediately. Only values between and including 0 and 255 99 194 /// are allowed. 100 195 Output { 101 - c: Parameter, 196 + c: Register, 102 197 }, 103 198 /// Operator #11. Input. 104 199 /// ··· 109 204 /// register C is endowed with a uniform value pattern 110 205 /// where every place is pregnant with the 1 bit. 111 206 Input { 112 - c: Parameter, 207 + c: Register, 113 208 }, 114 209 /// Operator #12. Load Program. 115 210 /// ··· 125 220 /// loading, and shall be handled with the utmost 126 221 /// velocity. 127 222 LoadProgram { 128 - b: Parameter, 129 - c: Parameter, 223 + b: Register, 224 + c: Register, 130 225 }, 131 226 /// Operator #13. Orthography. 132 227 /// 133 228 /// The value indicated is loaded into the register A 134 229 /// forthwith. 135 230 Orthography { 136 - a: Parameter, 231 + a: Register, 137 232 value: u32, 138 233 }, 139 234 IllegalInstruction, ··· 142 237 impl From<Platter> for Operation { 143 238 #[inline] 144 239 fn from(value: Platter) -> Self { 145 - let a = ((value >> 6) & 0x07) as Parameter; 146 - let b = ((value >> 3) & 0x07) as Parameter; 147 - let c = (value & 0x07) as Parameter; 148 - 240 + let a = Register::from_u8(((value >> 6) & 0x07) as u8); 241 + let b = Register::from_u8(((value >> 3) & 0x07) as u8); 242 + let c = Register::from_u8((value & 0x07) as u8); 149 243 match value & 0xf0000000 { 150 244 0x00000000 => Self::ConditionalMove { a, b, c }, 151 245 0x10000000 => Self::ArrayIndex { a, b, c }, ··· 161 255 0xb0000000 => Self::Input { c }, 162 256 0xc0000000 => Self::LoadProgram { b, c }, 163 257 0xd0000000 => { 164 - let a = ((value >> 25) & 0x07) as Parameter; 258 + let a = Register::from_u8(((value >> 25) & 0x07) as u8); 165 259 let value = value & 0x01ffffff; 166 260 Self::Orthography { a, value } 167 261 } ··· 170 264 } 171 265 } 172 266 173 - #[inline] 174 - pub fn decode_ops(ops: &[Platter]) -> Vec<Operation> { 267 + fn decode_ops(ops: &[Platter]) -> Vec<Operation> { 175 268 ops.iter() 176 269 .map(|&encoded| Operation::from(encoded)) 177 270 .collect() 178 271 } 272 + 273 + const SMALLVEC_SIZE: usize = 24; 274 + 275 + /// Lossless conversion to `usize`. 276 + /// 277 + /// This should only be implemented on types which can be losslessly 278 + /// cast to a `usize`. 279 + trait IntoIndex: Sized + Copy { 280 + fn into_index(self) -> usize; 281 + } 282 + 283 + macro_rules! impl_into_index { 284 + ($t:ty) => { 285 + impl IntoIndex for $t { 286 + fn into_index(self) -> usize { 287 + self as usize 288 + } 289 + } 290 + }; 291 + } 292 + 293 + #[cfg(target_pointer_width = "16")] 294 + compile_error!("16 bit architectures are unsupported"); 295 + 296 + // usize *may* be 16 bits, so only implement if it is 32 or 64 bits. 297 + #[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] 298 + impl_into_index!(Platter); 299 + 300 + #[derive(Default)] 301 + pub struct Um<'a> { 302 + pub program_counter: Platter, 303 + registers: Page, 304 + /// Program memory, modelled as a `Vec` of `SmallVec`. 305 + /// 306 + /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory 307 + /// indirection penalty for every memory access within that block. 308 + memory: Vec<SmallVec<[Platter; SMALLVEC_SIZE]>>, 309 + free_blocks: Vec<Platter>, 310 + /// Partially decoded operations cache. 311 + ops: Vec<Operation>, 312 + stdin: Option<&'a mut dyn Read>, 313 + stdout: Option<&'a mut dyn Write>, 314 + } 315 + 316 + impl<'a> Um<'a> { 317 + /// Initialise a Universal Machine with the specified program scroll. 318 + pub fn new(program: Vec<Platter>) -> Self { 319 + let ops = decode_ops(&program); 320 + Self { 321 + memory: vec![program.into()], 322 + ops, 323 + ..Default::default() 324 + } 325 + } 326 + 327 + /// Initialise a Universal Machine with a program read from a legacy 328 + /// unsigned 8-bit character scroll. 329 + pub fn from_bytes(program: impl AsRef<[u8]> + 'a) -> Self { 330 + fn inner<'a>(bytes: &[u8]) -> Um<'a> { 331 + let mut program = 332 + Vec::with_capacity(bytes.len().div_ceil(std::mem::size_of::<Platter>())); 333 + 334 + // Split the program into platters. 335 + let mut chunks = bytes.chunks_exact(std::mem::size_of::<Platter>()); 336 + for word in &mut chunks { 337 + program.push(Platter::from_be_bytes(unsafe { 338 + // SAFETY: The `chunks_exact` iterator will *always* emit 339 + // a slice of the correct length. 340 + word.try_into().unwrap_unchecked() 341 + })); 342 + } 343 + 344 + if !chunks.remainder().is_empty() { 345 + eprintln!( 346 + "WARNING: program may be corrupt; {} bytes remain after platter conversion.", 347 + chunks.remainder().len() 348 + ); 349 + } 350 + 351 + Um::new(program) 352 + } 353 + 354 + inner(program.as_ref()) 355 + } 356 + 357 + /// Sets the output for the universal machine. 358 + pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self { 359 + self.stdout.replace(stdout); 360 + self 361 + } 362 + 363 + /// Sets the input for the universal machine. 364 + pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self { 365 + self.stdin.replace(stdin); 366 + self 367 + } 368 + 369 + /// Begins the spin-cycle of the universal machine. 370 + #[inline(never)] 371 + pub fn run(mut self) -> Self { 372 + loop { 373 + // println!( 374 + // "{:?}, pc: {:08x}, r: {:08x?}", 375 + // self.ops[self.program_counter as usize], self.program_counter, self.registers 376 + // ); 377 + match self.ops[self.program_counter as usize] { 378 + Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 379 + Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 380 + Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 381 + Operation::Addition { a, b, c } => self.addition(a, b, c), 382 + Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 383 + Operation::Division { a, b, c } => self.division(a, b, c), 384 + Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 385 + Operation::Halt => break, 386 + Operation::Allocation { b, c } => self.allocation(b, c), 387 + Operation::Abandonment { c } => self.abandonment(c), 388 + Operation::Output { c } => self.output(c), 389 + Operation::Input { c } => self.input(c), 390 + Operation::LoadProgram { b, c } => { 391 + self.load_program(b, c); 392 + continue; 393 + } 394 + Operation::Orthography { a, value } => self.orthography(a, value), 395 + Operation::IllegalInstruction => self.illegal_instruction(), 396 + } 397 + self.program_counter += 1; 398 + } 399 + 400 + self 401 + } 402 + 403 + // Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even 404 + // though it has *no* interaction with the code path in Um::run(). 405 + // 406 + // /// Steps one instruction. 407 + // #[inline(never)] 408 + // pub fn step(&mut self) -> bool { 409 + // match self.ops[self.program_counter as usize] { 410 + // Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 411 + // Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 412 + // Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 413 + // Operation::Addition { a, b, c } => self.addition(a, b, c), 414 + // Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 415 + // Operation::Division { a, b, c } => self.division(a, b, c), 416 + // Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 417 + // Operation::Halt => return false, 418 + // Operation::Allocation { b, c } => self.allocation(b, c), 419 + // Operation::Abandonment { c } => self.abandonment(c), 420 + // Operation::Output { c } => self.output(c), 421 + // Operation::Input { c } => self.input(c), 422 + // Operation::LoadProgram { b, c } => { 423 + // self.load_program(b, c); 424 + // return true; 425 + // } 426 + // Operation::Orthography { a, value } => self.orthography(a, value), 427 + // Operation::IllegalInstruction => self.illegal_instruction(), 428 + // } 429 + // self.program_counter += 1; 430 + // true 431 + // } 432 + 433 + /// Loads the value from the specified register. 434 + fn load_register(&self, register: Register) -> Platter { 435 + self.registers[register] 436 + } 437 + 438 + /// Saves a value to the specified register. 439 + fn save_register(&mut self, register: Register, value: Platter) { 440 + self.registers[register] = value; 441 + } 442 + 443 + fn conditional_move(&mut self, a: Register, b: Register, c: Register) { 444 + if self.load_register(c) != 0 { 445 + self.save_register(a, self.load_register(b)); 446 + } 447 + } 448 + 449 + fn array_index(&mut self, a: Register, b: Register, c: Register) { 450 + let block = self.load_register(b); 451 + let offset = self.load_register(c); 452 + self.save_register(a, self.load_memory(block, offset)); 453 + } 454 + 455 + fn array_amendment(&mut self, a: Register, b: Register, c: Register) { 456 + let block = self.load_register(a); 457 + let offset = self.load_register(b); 458 + let value = self.load_register(c); 459 + self.store_memory(block, offset, value); 460 + } 461 + 462 + fn addition(&mut self, a: Register, b: Register, c: Register) { 463 + self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c))); 464 + } 465 + 466 + fn multiplication(&mut self, a: Register, b: Register, c: Register) { 467 + self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c))); 468 + } 469 + 470 + fn division(&mut self, a: Register, b: Register, c: Register) { 471 + self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c))); 472 + } 473 + 474 + fn not_and(&mut self, a: Register, b: Register, c: Register) { 475 + self.save_register(a, !(self.load_register(b) & self.load_register(c))); 476 + } 477 + 478 + fn allocation(&mut self, b: Register, c: Register) { 479 + let length = self.load_register(c); 480 + let index = self.allocate_memory(length); 481 + self.save_register(b, index); 482 + } 483 + 484 + fn abandonment(&mut self, c: Register) { 485 + let block = self.load_register(c); 486 + self.free_memory(block); 487 + } 488 + 489 + fn output(&mut self, c: Register) { 490 + let value = self.load_register(c); 491 + if let Some(stdout) = self.stdout.as_mut() { 492 + let buffer = [(value & 0xff) as u8]; 493 + stdout.write_all(&buffer).unwrap(); 494 + } 495 + } 496 + 497 + fn input(&mut self, c: Register) { 498 + if let Some(stdin) = self.stdin.as_mut() { 499 + let mut buffer = vec![0]; 500 + match stdin.read_exact(&mut buffer) { 501 + Ok(()) => self.save_register(c, buffer[0] as u32), 502 + Err(_) => self.save_register(c, Platter::MAX), 503 + } 504 + } else { 505 + self.save_register(c, Platter::MAX); 506 + } 507 + } 508 + 509 + fn load_program(&mut self, b: Register, c: Register) { 510 + let block = self.load_register(b); 511 + 512 + // Source array is always copied to array[0], but there 513 + // is no point copying array[0] to array[0]. 514 + if block != 0 { 515 + let duplicated = self.duplicate_memory(block); 516 + let ops = decode_ops(duplicated); 517 + self.ops = ops; 518 + } 519 + 520 + self.program_counter = self.load_register(c); 521 + } 522 + 523 + fn orthography(&mut self, a: Register, value: Platter) { 524 + self.save_register(a, value); 525 + } 526 + 527 + #[cold] 528 + #[inline(never)] 529 + fn illegal_instruction(&self) -> ! { 530 + panic!( 531 + "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}", 532 + self.memory[0][self.program_counter.into_index()], 533 + self.program_counter, 534 + self.registers 535 + ) 536 + } 537 + 538 + fn load_memory(&self, block: Platter, offset: Platter) -> Platter { 539 + let block = block.into_index(); 540 + let offset = offset.into_index(); 541 + assert!(block < self.memory.len() && offset < self.memory[block].len()); 542 + self.memory[block][offset] 543 + } 544 + 545 + fn store_memory(&mut self, block: Platter, offset: Platter, value: Platter) { 546 + let block = block.into_index(); 547 + let offset = offset.into_index(); 548 + assert!(block < self.memory.len() && offset < self.memory[block].len()); 549 + self.memory[block][offset] = value 550 + } 551 + 552 + /// Duplicates a block of memory. 553 + /// 554 + /// The block is copied to the first block of memory. 555 + fn duplicate_memory(&mut self, block: Platter) -> &[Platter] { 556 + let block = block.into_index(); 557 + assert!(block < self.memory.len()); 558 + self.memory[0] = self.memory[block].clone(); 559 + &self.memory[0] 560 + } 561 + 562 + /// Allocates a block of memory of the specified length. 563 + fn allocate_memory(&mut self, length: Platter) -> Platter { 564 + if let Some(index) = self.free_blocks.pop() { 565 + self.memory[index.into_index()] = Self::new_block(length.into_index()); 566 + index as Platter 567 + } else { 568 + self.memory.push(Self::new_block(length.into_index())); 569 + (self.memory.len() - 1) as Platter 570 + } 571 + } 572 + 573 + /// Frees a block of memory. 574 + fn free_memory(&mut self, block: Platter) { 575 + assert!(block.into_index() < self.memory.len()); 576 + self.free_blocks.push(block); 577 + self.memory[block.into_index()] = Self::new_block(0); 578 + } 579 + 580 + /// Creates a new block of memory. 581 + /// 582 + /// The block is initialised with `len` zeroes. 583 + fn new_block(len: usize) -> SmallVec<[Platter; SMALLVEC_SIZE]> { 584 + smallvec::smallvec![0; len] 585 + } 586 + } 587 + 588 + #[cfg(test)] 589 + mod tests { 590 + use super::*; 591 + 592 + #[test] 593 + #[should_panic] 594 + fn empty_program() { 595 + Um::new(vec![]).run(); 596 + } 597 + 598 + #[test] 599 + fn just_halt() { 600 + Um::new(vec![0x70000000]).run(); 601 + } 602 + 603 + #[test] 604 + fn hello_world() { 605 + let program = asm::assemble(include_str!("../files/hello-world.asm")); 606 + let mut buffer = Vec::new(); 607 + Um::new(program).stdout(&mut buffer).run(); 608 + assert_eq!(&buffer, b"Hello, world!\n"); 609 + } 610 + 611 + #[test] 612 + fn cat() { 613 + let program = asm::assemble(include_str!("../files/cat.asm")); 614 + let input = include_bytes!("lib.rs"); 615 + 616 + let mut reader = std::io::Cursor::new(input); 617 + let mut buffer = Vec::new(); 618 + 619 + Um::new(program) 620 + .stdin(&mut reader) 621 + .stdout(&mut buffer) 622 + .run(); 623 + 624 + assert_eq!(&buffer, &input); 625 + } 626 + }
-290
src/main.rs
··· 1 - use smallvec::SmallVec; 2 - use std::io::{Read, Write}; 3 - #[cfg(feature = "timing")] 4 - use std::time::Instant; 5 - use um::{Operation, Parameter, Platter}; 6 - 7 - const SMALLVEC_SIZE: usize = 24; 8 - 9 - fn main() { 10 - let mut program = Vec::new(); 11 - for arg in std::env::args().skip(1) { 12 - let p = std::fs::read(arg).unwrap(); 13 - program.extend_from_slice(&p); 14 - } 15 - 16 - Um::from_bytes(program) 17 - .stdout(&mut std::io::stdout()) 18 - .stdin(&mut std::io::stdin()) 19 - .run(); 20 - } 21 - 22 - /// Lossless conversion to `usize`. 23 - /// 24 - /// This should only be implemented on types which can be losslessly 25 - /// cast to a `usize`. 26 - trait IntoIndex: Sized + Copy { 27 - fn into_index(self) -> usize; 28 - } 29 - 30 - macro_rules! impl_into_index { 31 - ($t:ty) => { 32 - impl IntoIndex for $t { 33 - fn into_index(self) -> usize { 34 - self as usize 35 - } 36 - } 37 - }; 38 - } 39 - 40 - #[cfg(target_pointer_width = "16")] 41 - compile_error!("16 bit architectures are unsupported"); 42 - 43 - // usize *may* be 16 bits, so only implement if it is 32 or 64 bits. 44 - #[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] 45 - impl_into_index!(Platter); 46 - impl_into_index!(Parameter); 47 - 48 - #[derive(Default)] 49 - pub struct Um<'a> { 50 - program_counter: Platter, 51 - registers: [Platter; 8], 52 - memory: Vec<SmallVec<[Platter; SMALLVEC_SIZE]>>, 53 - free_blocks: Vec<Platter>, 54 - ops: Vec<Operation>, 55 - stdin: Option<&'a mut dyn Read>, 56 - stdout: Option<&'a mut dyn Write>, 57 - } 58 - 59 - impl<'a> Um<'a> { 60 - /// Initialise a Universal Machine with the specified program scroll. 61 - pub fn new(program: Vec<Platter>) -> Self { 62 - let ops = um::decode_ops(&program); 63 - Self { 64 - memory: vec![program.into()], 65 - ops, 66 - ..Default::default() 67 - } 68 - } 69 - 70 - /// Initialise a Universal Machine with a program read from a legacy 71 - /// unsigned 8-bit character scroll. 72 - pub fn from_bytes(program: impl AsRef<[u8]>) -> Self { 73 - let bytes = program.as_ref(); 74 - let mut program = Vec::with_capacity(bytes.len().div_ceil(size_of::<Platter>())); 75 - 76 - // Split the program into platters. 77 - let mut chunks = bytes.chunks_exact(size_of::<Platter>()); 78 - for word in &mut chunks { 79 - program.push(Platter::from_be_bytes([word[0], word[1], word[2], word[3]])); 80 - } 81 - 82 - if !chunks.remainder().is_empty() { 83 - eprintln!( 84 - "WARNING: program may be corrupt; {} bytes remain after platter conversion.", 85 - chunks.remainder().len() 86 - ); 87 - } 88 - 89 - Self::new(program) 90 - } 91 - 92 - /// Sets the output for the universal machine. 93 - pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self { 94 - self.stdout.replace(stdout); 95 - self 96 - } 97 - 98 - /// Sets the input for the universal machine. 99 - pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self { 100 - self.stdin.replace(stdin); 101 - self 102 - } 103 - 104 - /// Begins the spin-cycle of the universal machine. 105 - pub fn run(mut self) -> Self { 106 - #[cfg(feature = "timing")] 107 - let start = Instant::now(); 108 - 109 - while self.step() {} 110 - 111 - #[cfg(feature = "timing")] 112 - eprintln!("um complete: {:?}", start.elapsed()); 113 - 114 - self 115 - } 116 - 117 - /// Steps one instruction. 118 - pub fn step(&mut self) -> bool { 119 - match self.ops[self.program_counter as usize] { 120 - Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 121 - Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 122 - Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 123 - Operation::Addition { a, b, c } => self.addition(a, b, c), 124 - Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 125 - Operation::Division { a, b, c } => self.division(a, b, c), 126 - Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 127 - Operation::Halt => return false, 128 - Operation::Allocation { b, c } => self.allocation(b, c), 129 - Operation::Abandonment { c } => self.abandonment(c), 130 - Operation::Output { c } => self.output(c), 131 - Operation::Input { c } => self.input(c), 132 - Operation::LoadProgram { b, c } => { 133 - self.load_program(b, c); 134 - return true; 135 - } 136 - Operation::Orthography { a, value } => self.orthography(a, value), 137 - Operation::IllegalInstruction => self.illegal_instruction(), 138 - } 139 - self.program_counter += 1; 140 - true 141 - } 142 - 143 - /// Loads the value from the specified register. 144 - fn load_register(&self, index: Parameter) -> Platter { 145 - assert!(index < 8, "register index out of bounds"); 146 - self.registers[index.into_index()] 147 - } 148 - 149 - /// Saves a value to the specified register. 150 - fn save_register(&mut self, index: Parameter, value: Platter) { 151 - assert!(index < 8, "register index out of bounds"); 152 - self.registers[index.into_index()] = value; 153 - } 154 - 155 - pub fn conditional_move(&mut self, a: Parameter, b: Parameter, c: Parameter) { 156 - if self.load_register(c) != 0 { 157 - self.save_register(a, self.load_register(b)); 158 - } 159 - } 160 - 161 - pub fn array_index(&mut self, a: Parameter, b: Parameter, c: Parameter) { 162 - let block = self.load_register(b); 163 - let offset = self.load_register(c); 164 - self.save_register(a, self.load_memory(block, offset)); 165 - } 166 - 167 - pub fn array_amendment(&mut self, a: Parameter, b: Parameter, c: Parameter) { 168 - let block = self.load_register(a); 169 - let offset = self.load_register(b); 170 - let value = self.load_register(c); 171 - self.store_memory(block, offset, value); 172 - } 173 - 174 - pub fn addition(&mut self, a: Parameter, b: Parameter, c: Parameter) { 175 - self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c))); 176 - } 177 - 178 - pub fn multiplication(&mut self, a: Parameter, b: Parameter, c: Parameter) { 179 - self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c))); 180 - } 181 - 182 - pub fn division(&mut self, a: Parameter, b: Parameter, c: Parameter) { 183 - self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c))); 184 - } 185 - 186 - pub fn not_and(&mut self, a: Parameter, b: Parameter, c: Parameter) { 187 - self.save_register(a, !(self.load_register(b) & self.load_register(c))); 188 - } 189 - 190 - pub fn allocation(&mut self, b: Parameter, c: Parameter) { 191 - let length = self.load_register(c); 192 - let index = self.allocate_memory(length); 193 - self.save_register(b, index); 194 - } 195 - 196 - pub fn abandonment(&mut self, c: Parameter) { 197 - let block = self.load_register(c); 198 - self.free_memory(block); 199 - } 200 - 201 - pub fn output(&mut self, c: Parameter) { 202 - let value = self.load_register(c); 203 - if let Some(stdout) = self.stdout.as_mut() { 204 - let buffer = [(value & 0xff) as u8]; 205 - stdout.write_all(&buffer).unwrap(); 206 - } 207 - } 208 - 209 - pub fn input(&mut self, c: Parameter) { 210 - if let Some(stdin) = self.stdin.as_mut() { 211 - let mut buffer = vec![0]; 212 - match stdin.read_exact(&mut buffer) { 213 - Ok(()) => self.save_register(c, buffer[0] as u32), 214 - Err(_) => self.save_register(c, 0xff), 215 - } 216 - } else { 217 - self.save_register(c, 0xff); 218 - } 219 - } 220 - 221 - pub fn load_program(&mut self, b: Parameter, c: Parameter) { 222 - let block = self.load_register(b); 223 - 224 - // Source array is always copied to array[0], but there 225 - // is no point copying array[0] to array[0]. 226 - if block != 0 { 227 - let duplicated = self.duplicate_memory(block); 228 - let ops = um::decode_ops(duplicated); 229 - self.ops = ops; 230 - } 231 - 232 - self.program_counter = self.load_register(c); 233 - } 234 - 235 - pub fn orthography(&mut self, a: Parameter, value: Platter) { 236 - self.save_register(a, value); 237 - } 238 - 239 - #[cold] 240 - #[inline(never)] 241 - fn illegal_instruction(&self) -> ! { 242 - panic!( 243 - "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}", 244 - self.memory[0][self.program_counter.into_index()], 245 - self.program_counter, 246 - self.registers 247 - ) 248 - } 249 - 250 - fn load_memory(&self, block: Platter, offset: Platter) -> Platter { 251 - let block = block.into_index(); 252 - let offset = offset.into_index(); 253 - assert!(block < self.memory.len() && offset < self.memory[block].len()); 254 - self.memory[block][offset] 255 - } 256 - 257 - fn store_memory(&mut self, block: Platter, offset: Platter, value: Platter) { 258 - let block = block.into_index(); 259 - let offset = offset.into_index(); 260 - assert!(block < self.memory.len() && offset < self.memory[block].len()); 261 - self.memory[block][offset] = value 262 - } 263 - 264 - fn duplicate_memory(&mut self, block: Platter) -> &[Platter] { 265 - let block = block.into_index(); 266 - assert!(block < self.memory.len()); 267 - self.memory[0] = self.memory[block].clone(); 268 - &self.memory[0] 269 - } 270 - 271 - fn allocate_memory(&mut self, length: Platter) -> Platter { 272 - if let Some(index) = self.free_blocks.pop() { 273 - self.memory[index.into_index()] = Self::new_block(length.into_index()); 274 - index as Platter 275 - } else { 276 - self.memory.push(Self::new_block(length.into_index())); 277 - (self.memory.len() - 1) as Platter 278 - } 279 - } 280 - 281 - fn free_memory(&mut self, block: Platter) { 282 - assert!(block.into_index() < self.memory.len()); 283 - self.free_blocks.push(block); 284 - self.memory[block.into_index()] = Self::new_block(0); 285 - } 286 - 287 - fn new_block(len: usize) -> SmallVec<[Platter; SMALLVEC_SIZE]> { 288 - smallvec::smallvec![0; len] 289 - } 290 - }
+59
src/str.rs
··· 1 + use std::{borrow::Cow, str::CharIndices}; 2 + 3 + #[derive(Debug)] 4 + pub struct InvalidCharacterEscape(pub char, pub usize); 5 + 6 + pub fn unescape_str(s: &str) -> Result<Cow<str>, InvalidCharacterEscape> { 7 + fn escape_inner(c: &str, i: &mut CharIndices<'_>) -> Result<String, InvalidCharacterEscape> { 8 + let mut buffer = c.to_owned(); 9 + let mut in_escape = true; 10 + 11 + for (index, c) in i { 12 + match (in_escape, c) { 13 + (false, '\\') => { 14 + in_escape = true; 15 + continue; 16 + } 17 + (false, c) => buffer.push(c), 18 + (true, '\\') => buffer.push('\\'), 19 + (true, 'n') => buffer.push('\n'), 20 + (true, '0') => buffer.push('\0'), 21 + (true, '"') => buffer.push('"'), 22 + (true, '\'') => buffer.push('\''), 23 + (true, 'r') => buffer.push('\r'), 24 + (true, 't') => buffer.push('\t'), 25 + (true, c) => Err(InvalidCharacterEscape(c, index))?, 26 + } 27 + 28 + in_escape = false; 29 + } 30 + 31 + Ok(buffer) 32 + } 33 + 34 + let mut char_indicies = s.char_indices(); 35 + for (index, c) in &mut char_indicies { 36 + let scanned = &s[..index]; 37 + if c == '\\' { 38 + return Ok(Cow::Owned(escape_inner(scanned, &mut char_indicies)?)); 39 + } 40 + } 41 + 42 + Ok(Cow::Borrowed(s)) 43 + } 44 + 45 + #[cfg(test)] 46 + mod tests { 47 + use std::borrow::Cow; 48 + 49 + use super::unescape_str; 50 + 51 + #[test] 52 + fn no_unescapes() { 53 + let s = "Hello, this string should have no characters that need unescaping."; 54 + let u = unescape_str(s).unwrap(); 55 + 56 + assert!(matches!(u, Cow::Borrowed(_))); 57 + assert_eq!(s, u); 58 + } 59 + }