// Copyright (C) 2025 Thom Hayward. // // This program is free software: you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free Software // Foundation, version 3. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more // details. // // You should have received a copy of the GNU General Public License along with // this program. If not, see . // use smallvec::SmallVec; use std::io::{Read, Write}; #[cfg(feature = "asm")] pub mod asm; pub mod conv; pub mod ops; pub mod reg; use ops::Operation; use reg::{Page, Register}; const SMALLVEC_SIZE: usize = 24; /// Lossless conversion to `usize`. /// /// This should only be implemented on types which can be losslessly /// cast to a `usize`. trait IntoIndex: Sized + Copy { fn into_index(self) -> usize; } macro_rules! impl_into_index { ($t:ty) => { impl IntoIndex for $t { fn into_index(self) -> usize { self as usize } } }; } #[cfg(target_pointer_width = "16")] compile_error!("16 bit architectures are unsupported"); // usize *may* be 16 bits, so only implement if it is 32 or 64 bits. #[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] impl_into_index!(u32); #[derive(Default)] pub struct Um<'a> { pub program_counter: u32, registers: Page, /// Program memory, modelled as a `Vec` of `SmallVec`. /// /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory /// indirection penalty for every memory access within that block. memory: Vec>, free_blocks: Vec, /// Partially decoded operations cache. ops: Vec, stdin: Option<&'a mut dyn Read>, stdout: Option<&'a mut dyn Write>, } impl<'a> Um<'a> { /// Initialise a Universal Machine with the specified program scroll. pub fn new(program: Vec) -> Self { let ops = ops::decode(&program); Self { memory: vec![program.into()], ops, ..Default::default() } } /// Sets the output for the universal machine. pub fn stdout(mut self, stdout: &'a mut T) -> Self { self.stdout.replace(stdout); self } /// Sets the input for the universal machine. pub fn stdin(mut self, stdin: &'a mut T) -> Self { self.stdin.replace(stdin); self } /// Begins the spin-cycle of the universal machine. #[inline(never)] pub fn run(mut self) -> Self { loop { // println!( // "{:?}, pc: {:08x}, r: {:08x?}", // self.ops[self.program_counter as usize], self.program_counter, self.registers // ); match self.ops[self.program_counter as usize] { Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), Operation::Addition { a, b, c } => self.addition(a, b, c), Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), Operation::Division { a, b, c } => self.division(a, b, c), Operation::NotAnd { a, b, c } => self.not_and(a, b, c), Operation::Halt => break, Operation::Allocation { b, c } => self.allocation(b, c), Operation::Abandonment { c } => self.abandonment(c), Operation::Output { c } => self.output(c), Operation::Input { c } => self.input(c), Operation::LoadProgram { b, c } => { self.load_program(b, c); continue; } Operation::Orthography { a, value } => self.orthography(a, value), Operation::IllegalInstruction => self.illegal_instruction(), } self.program_counter += 1; } self } // Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even // though it has *no* interaction with the code path in Um::run(). // // /// Steps one instruction. // #[inline(never)] // pub fn step(&mut self) -> bool { // match self.ops[self.program_counter as usize] { // Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), // Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), // Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), // Operation::Addition { a, b, c } => self.addition(a, b, c), // Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), // Operation::Division { a, b, c } => self.division(a, b, c), // Operation::NotAnd { a, b, c } => self.not_and(a, b, c), // Operation::Halt => return false, // Operation::Allocation { b, c } => self.allocation(b, c), // Operation::Abandonment { c } => self.abandonment(c), // Operation::Output { c } => self.output(c), // Operation::Input { c } => self.input(c), // Operation::LoadProgram { b, c } => { // self.load_program(b, c); // return true; // } // Operation::Orthography { a, value } => self.orthography(a, value), // Operation::IllegalInstruction => self.illegal_instruction(), // } // self.program_counter += 1; // true // } /// Loads the value from the specified register. fn load_register(&self, register: Register) -> u32 { self.registers[register] } /// Saves a value to the specified register. fn save_register(&mut self, register: Register, value: u32) { self.registers[register] = value; } fn conditional_move(&mut self, a: Register, b: Register, c: Register) { if self.load_register(c) != 0 { self.save_register(a, self.load_register(b)); } } fn array_index(&mut self, a: Register, b: Register, c: Register) { let block = self.load_register(b); let offset = self.load_register(c); self.save_register(a, self.load_memory(block, offset)); } fn array_amendment(&mut self, a: Register, b: Register, c: Register) { let block = self.load_register(a); let offset = self.load_register(b); let value = self.load_register(c); self.store_memory(block, offset, value); } fn addition(&mut self, a: Register, b: Register, c: Register) { self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c))); } fn multiplication(&mut self, a: Register, b: Register, c: Register) { self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c))); } fn division(&mut self, a: Register, b: Register, c: Register) { self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c))); } fn not_and(&mut self, a: Register, b: Register, c: Register) { self.save_register(a, !(self.load_register(b) & self.load_register(c))); } fn allocation(&mut self, b: Register, c: Register) { let length = self.load_register(c); let index = self.allocate_memory(length); self.save_register(b, index); } fn abandonment(&mut self, c: Register) { let block = self.load_register(c); self.free_memory(block); } fn output(&mut self, c: Register) { let value = self.load_register(c); if let Some(stdout) = self.stdout.as_mut() { let buffer = [(value & 0xff) as u8]; stdout.write_all(&buffer).unwrap(); } } fn input(&mut self, c: Register) { if let Some(stdin) = self.stdin.as_mut() { let mut buffer = vec![0]; match stdin.read_exact(&mut buffer) { Ok(()) => self.save_register(c, buffer[0] as u32), Err(_) => self.save_register(c, u32::MAX), } } else { self.save_register(c, u32::MAX); } } fn load_program(&mut self, b: Register, c: Register) { let block = self.load_register(b); // Source array is always copied to array[0], but there // is no point copying array[0] to array[0]. if block != 0 { let duplicated = self.duplicate_memory(block); let ops = ops::decode(duplicated); self.ops = ops; } self.program_counter = self.load_register(c); } fn orthography(&mut self, a: Register, value: u32) { self.save_register(a, value); } #[cold] #[inline(never)] fn illegal_instruction(&self) -> ! { panic!( "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}", self.memory[0][self.program_counter.into_index()], self.program_counter, self.registers ) } fn load_memory(&self, block: u32, offset: u32) -> u32 { let block = block.into_index(); let offset = offset.into_index(); assert!(block < self.memory.len() && offset < self.memory[block].len()); self.memory[block][offset] } fn store_memory(&mut self, block: u32, offset: u32, value: u32) { let block = block.into_index(); let offset = offset.into_index(); assert!(block < self.memory.len() && offset < self.memory[block].len()); self.memory[block][offset] = value } /// Duplicates a block of memory. /// /// The block is copied to the first block of memory. fn duplicate_memory(&mut self, block: u32) -> &[u32] { let block = block.into_index(); assert!(block < self.memory.len()); self.memory[0] = self.memory[block].clone(); &self.memory[0] } /// Allocates a block of memory of the specified length. fn allocate_memory(&mut self, length: u32) -> u32 { if let Some(index) = self.free_blocks.pop() { self.memory[index.into_index()] = Self::new_block(length.into_index()); index } else { self.memory.push(Self::new_block(length.into_index())); (self.memory.len() - 1) as u32 } } /// Frees a block of memory. fn free_memory(&mut self, block: u32) { assert!(block.into_index() < self.memory.len()); self.free_blocks.push(block); self.memory[block.into_index()] = Self::new_block(0); } /// Creates a new block of memory. /// /// The block is initialised with `len` zeroes. fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> { smallvec::smallvec![0; len] } } #[cfg(test)] mod tests { use super::*; #[test] #[should_panic] fn empty_program() { Um::new(vec![]).run(); } #[test] fn just_halt() { Um::new(vec![0x70000000]).run(); } #[test] #[cfg(feature = "asm")] fn hello_world() { let program = asm::assemble(include_str!("../files/hello-world.asm")); let mut buffer = Vec::new(); Um::new(program).stdout(&mut buffer).run(); assert_eq!(&buffer, b"Hello, world!\n"); } #[test] #[cfg(feature = "asm")] fn cat() { let program = asm::assemble(include_str!("../files/cat.asm")); let input = include_bytes!("lib.rs"); let mut reader = std::io::Cursor::new(input); let mut buffer = Vec::new(); Um::new(program) .stdin(&mut reader) .stdout(&mut buffer) .run(); assert_eq!(&buffer, &input); } }