Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
1// Copyright (C) 2025 Thom Hayward.
2//
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the GNU General Public License as published by the Free Software
5// Foundation, version 3.
6//
7// This program is distributed in the hope that it will be useful, but WITHOUT
8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
10// details.
11//
12// You should have received a copy of the GNU General Public License along with
13// this program. If not, see <https://www.gnu.org/licenses/>.
14//
15use smallvec::SmallVec;
16use std::io::{Read, Write};
17
18#[cfg(feature = "asm")]
19pub mod asm;
20pub mod conv;
21pub mod ops;
22pub mod reg;
23
24use ops::Operation;
25use reg::{Page, Register};
26
27const SMALLVEC_SIZE: usize = 24;
28
29/// Lossless conversion to `usize`.
30///
31/// This should only be implemented on types which can be losslessly
32/// cast to a `usize`.
33trait IntoIndex: Sized + Copy {
34 fn into_index(self) -> usize;
35}
36
37macro_rules! impl_into_index {
38 ($t:ty) => {
39 impl IntoIndex for $t {
40 fn into_index(self) -> usize {
41 self as usize
42 }
43 }
44 };
45}
46
47#[cfg(target_pointer_width = "16")]
48compile_error!("16 bit architectures are unsupported");
49
50// usize *may* be 16 bits, so only implement if it is 32 or 64 bits.
51#[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))]
52impl_into_index!(u32);
53
54#[derive(Default)]
55pub struct Um<'a> {
56 pub program_counter: u32,
57 registers: Page,
58 /// Program memory, modelled as a `Vec` of `SmallVec`.
59 ///
60 /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory
61 /// indirection penalty for every memory access within that block.
62 memory: Vec<SmallVec<[u32; SMALLVEC_SIZE]>>,
63 free_blocks: Vec<u32>,
64 /// Partially decoded operations cache.
65 ops: Vec<Operation>,
66 stdin: Option<&'a mut dyn Read>,
67 stdout: Option<&'a mut dyn Write>,
68}
69
70impl<'a> Um<'a> {
71 /// Initialise a Universal Machine with the specified program scroll.
72 pub fn new(program: Vec<u32>) -> Self {
73 let ops = ops::decode(&program);
74 Self {
75 memory: vec![program.into()],
76 ops,
77 ..Default::default()
78 }
79 }
80
81 /// Sets the output for the universal machine.
82 pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self {
83 self.stdout.replace(stdout);
84 self
85 }
86
87 /// Sets the input for the universal machine.
88 pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self {
89 self.stdin.replace(stdin);
90 self
91 }
92
93 /// Begins the spin-cycle of the universal machine.
94 #[inline(never)]
95 pub fn run(mut self) -> Self {
96 loop {
97 // println!(
98 // "{:?}, pc: {:08x}, r: {:08x?}",
99 // self.ops[self.program_counter as usize], self.program_counter, self.registers
100 // );
101 match self.ops[self.program_counter as usize] {
102 Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
103 Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
104 Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
105 Operation::Addition { a, b, c } => self.addition(a, b, c),
106 Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
107 Operation::Division { a, b, c } => self.division(a, b, c),
108 Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
109 Operation::Halt => break,
110 Operation::Allocation { b, c } => self.allocation(b, c),
111 Operation::Abandonment { c } => self.abandonment(c),
112 Operation::Output { c } => self.output(c),
113 Operation::Input { c } => self.input(c),
114 Operation::LoadProgram { b, c } => {
115 self.load_program(b, c);
116 continue;
117 }
118 Operation::Orthography { a, value } => self.orthography(a, value),
119 Operation::IllegalInstruction => self.illegal_instruction(),
120 }
121 self.program_counter += 1;
122 }
123
124 self
125 }
126
127 // Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even
128 // though it has *no* interaction with the code path in Um::run().
129 //
130 // /// Steps one instruction.
131 // #[inline(never)]
132 // pub fn step(&mut self) -> bool {
133 // match self.ops[self.program_counter as usize] {
134 // Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
135 // Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
136 // Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
137 // Operation::Addition { a, b, c } => self.addition(a, b, c),
138 // Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
139 // Operation::Division { a, b, c } => self.division(a, b, c),
140 // Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
141 // Operation::Halt => return false,
142 // Operation::Allocation { b, c } => self.allocation(b, c),
143 // Operation::Abandonment { c } => self.abandonment(c),
144 // Operation::Output { c } => self.output(c),
145 // Operation::Input { c } => self.input(c),
146 // Operation::LoadProgram { b, c } => {
147 // self.load_program(b, c);
148 // return true;
149 // }
150 // Operation::Orthography { a, value } => self.orthography(a, value),
151 // Operation::IllegalInstruction => self.illegal_instruction(),
152 // }
153 // self.program_counter += 1;
154 // true
155 // }
156
157 /// Loads the value from the specified register.
158 fn load_register(&self, register: Register) -> u32 {
159 self.registers[register]
160 }
161
162 /// Saves a value to the specified register.
163 fn save_register(&mut self, register: Register, value: u32) {
164 self.registers[register] = value;
165 }
166
167 fn conditional_move(&mut self, a: Register, b: Register, c: Register) {
168 if self.load_register(c) != 0 {
169 self.save_register(a, self.load_register(b));
170 }
171 }
172
173 fn array_index(&mut self, a: Register, b: Register, c: Register) {
174 let block = self.load_register(b);
175 let offset = self.load_register(c);
176 self.save_register(a, self.load_memory(block, offset));
177 }
178
179 fn array_amendment(&mut self, a: Register, b: Register, c: Register) {
180 let block = self.load_register(a);
181 let offset = self.load_register(b);
182 let value = self.load_register(c);
183 self.store_memory(block, offset, value);
184 }
185
186 fn addition(&mut self, a: Register, b: Register, c: Register) {
187 self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c)));
188 }
189
190 fn multiplication(&mut self, a: Register, b: Register, c: Register) {
191 self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c)));
192 }
193
194 fn division(&mut self, a: Register, b: Register, c: Register) {
195 self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c)));
196 }
197
198 fn not_and(&mut self, a: Register, b: Register, c: Register) {
199 self.save_register(a, !(self.load_register(b) & self.load_register(c)));
200 }
201
202 fn allocation(&mut self, b: Register, c: Register) {
203 let length = self.load_register(c);
204 let index = self.allocate_memory(length);
205 self.save_register(b, index);
206 }
207
208 fn abandonment(&mut self, c: Register) {
209 let block = self.load_register(c);
210 self.free_memory(block);
211 }
212
213 fn output(&mut self, c: Register) {
214 let value = self.load_register(c);
215 if let Some(stdout) = self.stdout.as_mut() {
216 let buffer = [(value & 0xff) as u8];
217 stdout.write_all(&buffer).unwrap();
218 }
219 }
220
221 fn input(&mut self, c: Register) {
222 if let Some(stdin) = self.stdin.as_mut() {
223 let mut buffer = vec![0];
224 match stdin.read_exact(&mut buffer) {
225 Ok(()) => self.save_register(c, buffer[0] as u32),
226 Err(_) => self.save_register(c, u32::MAX),
227 }
228 } else {
229 self.save_register(c, u32::MAX);
230 }
231 }
232
233 fn load_program(&mut self, b: Register, c: Register) {
234 let block = self.load_register(b);
235
236 // Source array is always copied to array[0], but there
237 // is no point copying array[0] to array[0].
238 if block != 0 {
239 let duplicated = self.duplicate_memory(block);
240 let ops = ops::decode(duplicated);
241 self.ops = ops;
242 }
243
244 self.program_counter = self.load_register(c);
245 }
246
247 fn orthography(&mut self, a: Register, value: u32) {
248 self.save_register(a, value);
249 }
250
251 #[cold]
252 #[inline(never)]
253 fn illegal_instruction(&self) -> ! {
254 panic!(
255 "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}",
256 self.memory[0][self.program_counter.into_index()],
257 self.program_counter,
258 self.registers
259 )
260 }
261
262 fn load_memory(&self, block: u32, offset: u32) -> u32 {
263 let block = block.into_index();
264 let offset = offset.into_index();
265 assert!(block < self.memory.len() && offset < self.memory[block].len());
266 self.memory[block][offset]
267 }
268
269 fn store_memory(&mut self, block: u32, offset: u32, value: u32) {
270 let block = block.into_index();
271 let offset = offset.into_index();
272 assert!(block < self.memory.len() && offset < self.memory[block].len());
273 self.memory[block][offset] = value
274 }
275
276 /// Duplicates a block of memory.
277 ///
278 /// The block is copied to the first block of memory.
279 fn duplicate_memory(&mut self, block: u32) -> &[u32] {
280 let block = block.into_index();
281 assert!(block < self.memory.len());
282 self.memory[0] = self.memory[block].clone();
283 &self.memory[0]
284 }
285
286 /// Allocates a block of memory of the specified length.
287 fn allocate_memory(&mut self, length: u32) -> u32 {
288 if let Some(index) = self.free_blocks.pop() {
289 self.memory[index.into_index()] = Self::new_block(length.into_index());
290 index
291 } else {
292 self.memory.push(Self::new_block(length.into_index()));
293 (self.memory.len() - 1) as u32
294 }
295 }
296
297 /// Frees a block of memory.
298 fn free_memory(&mut self, block: u32) {
299 assert!(block.into_index() < self.memory.len());
300 self.free_blocks.push(block);
301 self.memory[block.into_index()] = Self::new_block(0);
302 }
303
304 /// Creates a new block of memory.
305 ///
306 /// The block is initialised with `len` zeroes.
307 fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> {
308 smallvec::smallvec![0; len]
309 }
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 #[test]
317 #[should_panic]
318 fn empty_program() {
319 Um::new(vec![]).run();
320 }
321
322 #[test]
323 fn just_halt() {
324 Um::new(vec![0x70000000]).run();
325 }
326
327 #[test]
328 #[cfg(feature = "asm")]
329 fn hello_world() {
330 let program = asm::assemble(include_str!("../files/hello-world.asm"));
331 let mut buffer = Vec::new();
332 Um::new(program).stdout(&mut buffer).run();
333 assert_eq!(&buffer, b"Hello, world!\n");
334 }
335
336 #[test]
337 #[cfg(feature = "asm")]
338 fn cat() {
339 let program = asm::assemble(include_str!("../files/cat.asm"));
340 let input = include_bytes!("lib.rs");
341
342 let mut reader = std::io::Cursor::new(input);
343 let mut buffer = Vec::new();
344
345 Um::new(program)
346 .stdin(&mut reader)
347 .stdout(&mut buffer)
348 .run();
349
350 assert_eq!(&buffer, &input);
351 }
352}