Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
at main 12 kB view raw
1// Copyright (C) 2025 Thom Hayward. 2// 3// This program is free software: you can redistribute it and/or modify it under 4// the terms of the GNU General Public License as published by the Free Software 5// Foundation, version 3. 6// 7// This program is distributed in the hope that it will be useful, but WITHOUT 8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 10// details. 11// 12// You should have received a copy of the GNU General Public License along with 13// this program. If not, see <https://www.gnu.org/licenses/>. 14// 15use smallvec::SmallVec; 16use std::io::{Read, Write}; 17 18#[cfg(feature = "asm")] 19pub mod asm; 20pub mod conv; 21pub mod ops; 22pub mod reg; 23 24use ops::Operation; 25use reg::{Page, Register}; 26 27const SMALLVEC_SIZE: usize = 24; 28 29/// Lossless conversion to `usize`. 30/// 31/// This should only be implemented on types which can be losslessly 32/// cast to a `usize`. 33trait IntoIndex: Sized + Copy { 34 fn into_index(self) -> usize; 35} 36 37macro_rules! impl_into_index { 38 ($t:ty) => { 39 impl IntoIndex for $t { 40 fn into_index(self) -> usize { 41 self as usize 42 } 43 } 44 }; 45} 46 47#[cfg(target_pointer_width = "16")] 48compile_error!("16 bit architectures are unsupported"); 49 50// usize *may* be 16 bits, so only implement if it is 32 or 64 bits. 51#[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] 52impl_into_index!(u32); 53 54#[derive(Default)] 55pub struct Um<'a> { 56 pub program_counter: u32, 57 registers: Page, 58 /// Program memory, modelled as a `Vec` of `SmallVec`. 59 /// 60 /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory 61 /// indirection penalty for every memory access within that block. 62 memory: Vec<SmallVec<[u32; SMALLVEC_SIZE]>>, 63 free_blocks: Vec<u32>, 64 /// Partially decoded operations cache. 65 ops: Vec<Operation>, 66 stdin: Option<&'a mut dyn Read>, 67 stdout: Option<&'a mut dyn Write>, 68} 69 70impl<'a> Um<'a> { 71 /// Initialise a Universal Machine with the specified program scroll. 72 pub fn new(program: Vec<u32>) -> Self { 73 let ops = ops::decode(&program); 74 Self { 75 memory: vec![program.into()], 76 ops, 77 ..Default::default() 78 } 79 } 80 81 /// Sets the output for the universal machine. 82 pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self { 83 self.stdout.replace(stdout); 84 self 85 } 86 87 /// Sets the input for the universal machine. 88 pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self { 89 self.stdin.replace(stdin); 90 self 91 } 92 93 /// Begins the spin-cycle of the universal machine. 94 #[inline(never)] 95 pub fn run(mut self) -> Self { 96 loop { 97 // println!( 98 // "{:?}, pc: {:08x}, r: {:08x?}", 99 // self.ops[self.program_counter as usize], self.program_counter, self.registers 100 // ); 101 match self.ops[self.program_counter as usize] { 102 Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 103 Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 104 Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 105 Operation::Addition { a, b, c } => self.addition(a, b, c), 106 Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 107 Operation::Division { a, b, c } => self.division(a, b, c), 108 Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 109 Operation::Halt => break, 110 Operation::Allocation { b, c } => self.allocation(b, c), 111 Operation::Abandonment { c } => self.abandonment(c), 112 Operation::Output { c } => self.output(c), 113 Operation::Input { c } => self.input(c), 114 Operation::LoadProgram { b, c } => { 115 self.load_program(b, c); 116 continue; 117 } 118 Operation::Orthography { a, value } => self.orthography(a, value), 119 Operation::IllegalInstruction => self.illegal_instruction(), 120 } 121 self.program_counter += 1; 122 } 123 124 self 125 } 126 127 // Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even 128 // though it has *no* interaction with the code path in Um::run(). 129 // 130 // /// Steps one instruction. 131 // #[inline(never)] 132 // pub fn step(&mut self) -> bool { 133 // match self.ops[self.program_counter as usize] { 134 // Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 135 // Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 136 // Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 137 // Operation::Addition { a, b, c } => self.addition(a, b, c), 138 // Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 139 // Operation::Division { a, b, c } => self.division(a, b, c), 140 // Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 141 // Operation::Halt => return false, 142 // Operation::Allocation { b, c } => self.allocation(b, c), 143 // Operation::Abandonment { c } => self.abandonment(c), 144 // Operation::Output { c } => self.output(c), 145 // Operation::Input { c } => self.input(c), 146 // Operation::LoadProgram { b, c } => { 147 // self.load_program(b, c); 148 // return true; 149 // } 150 // Operation::Orthography { a, value } => self.orthography(a, value), 151 // Operation::IllegalInstruction => self.illegal_instruction(), 152 // } 153 // self.program_counter += 1; 154 // true 155 // } 156 157 /// Loads the value from the specified register. 158 fn load_register(&self, register: Register) -> u32 { 159 self.registers[register] 160 } 161 162 /// Saves a value to the specified register. 163 fn save_register(&mut self, register: Register, value: u32) { 164 self.registers[register] = value; 165 } 166 167 fn conditional_move(&mut self, a: Register, b: Register, c: Register) { 168 if self.load_register(c) != 0 { 169 self.save_register(a, self.load_register(b)); 170 } 171 } 172 173 fn array_index(&mut self, a: Register, b: Register, c: Register) { 174 let block = self.load_register(b); 175 let offset = self.load_register(c); 176 self.save_register(a, self.load_memory(block, offset)); 177 } 178 179 fn array_amendment(&mut self, a: Register, b: Register, c: Register) { 180 let block = self.load_register(a); 181 let offset = self.load_register(b); 182 let value = self.load_register(c); 183 self.store_memory(block, offset, value); 184 } 185 186 fn addition(&mut self, a: Register, b: Register, c: Register) { 187 self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c))); 188 } 189 190 fn multiplication(&mut self, a: Register, b: Register, c: Register) { 191 self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c))); 192 } 193 194 fn division(&mut self, a: Register, b: Register, c: Register) { 195 self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c))); 196 } 197 198 fn not_and(&mut self, a: Register, b: Register, c: Register) { 199 self.save_register(a, !(self.load_register(b) & self.load_register(c))); 200 } 201 202 fn allocation(&mut self, b: Register, c: Register) { 203 let length = self.load_register(c); 204 let index = self.allocate_memory(length); 205 self.save_register(b, index); 206 } 207 208 fn abandonment(&mut self, c: Register) { 209 let block = self.load_register(c); 210 self.free_memory(block); 211 } 212 213 fn output(&mut self, c: Register) { 214 let value = self.load_register(c); 215 if let Some(stdout) = self.stdout.as_mut() { 216 let buffer = [(value & 0xff) as u8]; 217 stdout.write_all(&buffer).unwrap(); 218 } 219 } 220 221 fn input(&mut self, c: Register) { 222 if let Some(stdin) = self.stdin.as_mut() { 223 let mut buffer = vec![0]; 224 match stdin.read_exact(&mut buffer) { 225 Ok(()) => self.save_register(c, buffer[0] as u32), 226 Err(_) => self.save_register(c, u32::MAX), 227 } 228 } else { 229 self.save_register(c, u32::MAX); 230 } 231 } 232 233 fn load_program(&mut self, b: Register, c: Register) { 234 let block = self.load_register(b); 235 236 // Source array is always copied to array[0], but there 237 // is no point copying array[0] to array[0]. 238 if block != 0 { 239 let duplicated = self.duplicate_memory(block); 240 let ops = ops::decode(duplicated); 241 self.ops = ops; 242 } 243 244 self.program_counter = self.load_register(c); 245 } 246 247 fn orthography(&mut self, a: Register, value: u32) { 248 self.save_register(a, value); 249 } 250 251 #[cold] 252 #[inline(never)] 253 fn illegal_instruction(&self) -> ! { 254 panic!( 255 "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}", 256 self.memory[0][self.program_counter.into_index()], 257 self.program_counter, 258 self.registers 259 ) 260 } 261 262 fn load_memory(&self, block: u32, offset: u32) -> u32 { 263 let block = block.into_index(); 264 let offset = offset.into_index(); 265 assert!(block < self.memory.len() && offset < self.memory[block].len()); 266 self.memory[block][offset] 267 } 268 269 fn store_memory(&mut self, block: u32, offset: u32, value: u32) { 270 let block = block.into_index(); 271 let offset = offset.into_index(); 272 assert!(block < self.memory.len() && offset < self.memory[block].len()); 273 self.memory[block][offset] = value 274 } 275 276 /// Duplicates a block of memory. 277 /// 278 /// The block is copied to the first block of memory. 279 fn duplicate_memory(&mut self, block: u32) -> &[u32] { 280 let block = block.into_index(); 281 assert!(block < self.memory.len()); 282 self.memory[0] = self.memory[block].clone(); 283 &self.memory[0] 284 } 285 286 /// Allocates a block of memory of the specified length. 287 fn allocate_memory(&mut self, length: u32) -> u32 { 288 if let Some(index) = self.free_blocks.pop() { 289 self.memory[index.into_index()] = Self::new_block(length.into_index()); 290 index 291 } else { 292 self.memory.push(Self::new_block(length.into_index())); 293 (self.memory.len() - 1) as u32 294 } 295 } 296 297 /// Frees a block of memory. 298 fn free_memory(&mut self, block: u32) { 299 assert!(block.into_index() < self.memory.len()); 300 self.free_blocks.push(block); 301 self.memory[block.into_index()] = Self::new_block(0); 302 } 303 304 /// Creates a new block of memory. 305 /// 306 /// The block is initialised with `len` zeroes. 307 fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> { 308 smallvec::smallvec![0; len] 309 } 310} 311 312#[cfg(test)] 313mod tests { 314 use super::*; 315 316 #[test] 317 #[should_panic] 318 fn empty_program() { 319 Um::new(vec![]).run(); 320 } 321 322 #[test] 323 fn just_halt() { 324 Um::new(vec![0x70000000]).run(); 325 } 326 327 #[test] 328 #[cfg(feature = "asm")] 329 fn hello_world() { 330 let program = asm::assemble(include_str!("../files/hello-world.asm")); 331 let mut buffer = Vec::new(); 332 Um::new(program).stdout(&mut buffer).run(); 333 assert_eq!(&buffer, b"Hello, world!\n"); 334 } 335 336 #[test] 337 #[cfg(feature = "asm")] 338 fn cat() { 339 let program = asm::assemble(include_str!("../files/cat.asm")); 340 let input = include_bytes!("lib.rs"); 341 342 let mut reader = std::io::Cursor::new(input); 343 let mut buffer = Vec::new(); 344 345 Um::new(program) 346 .stdin(&mut reader) 347 .stdout(&mut buffer) 348 .run(); 349 350 assert_eq!(&buffer, &input); 351 } 352}