Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
at main 12 kB view raw
1// Copyright (C) 2025 Thom Hayward. 2// 3// This program is free software: you can redistribute it and/or modify it under 4// the terms of the GNU General Public License as published by the Free Software 5// Foundation, version 3. 6// 7// This program is distributed in the hope that it will be useful, but WITHOUT 8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 10// details. 11// 12// You should have received a copy of the GNU General Public License along with 13// this program. If not, see <https://www.gnu.org/licenses/>. 14// 15mod lexer; 16mod parse; 17 18use crate::Register; 19use lexer::Token; 20use parse::{Instruction, Node, NodeType, PragmaType}; 21use std::collections::HashMap; 22 23#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] 24enum Section { 25 Text, 26 Data, 27} 28 29pub fn assemble<'s>(source: &'s str) -> Vec<u32> { 30 let parsed = parse::parse("", source).unwrap(); 31 32 let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new(); 33 let mut offsets: HashMap<Section, usize> = HashMap::new(); 34 let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new(); 35 for node in parsed.nodes().iter() { 36 match node.entity { 37 NodeType::Pragma(_) => { 38 let loc = *offsets 39 .entry(Section::Data) 40 .and_modify(|loc| *loc += node.size()) 41 .or_default(); 42 43 sections 44 .entry(Section::Data) 45 .and_modify(|section| section.push(node)) 46 .or_insert(vec![node]); 47 48 for label in &node.labels { 49 label_locations.insert(label, (Section::Data, loc)); 50 } 51 } 52 NodeType::Instruction(_) => { 53 let loc = *offsets 54 .entry(Section::Text) 55 .and_modify(|loc| *loc += node.size()) 56 .or_default(); 57 58 sections 59 .entry(Section::Text) 60 .and_modify(|section| section.push(node)) 61 .or_insert(vec![node]); 62 63 for label in &node.labels { 64 label_locations.insert(label, (Section::Text, loc)); 65 } 66 } 67 _ => {} 68 } 69 } 70 71 let text = sections.remove(&Section::Text).unwrap(); 72 let data_offset = text.len(); 73 74 let mut program = vec![]; 75 for node in text.into_iter() { 76 let NodeType::Instruction(instruction) = &node.entity else { 77 panic!("invalid node in .text section"); 78 }; 79 80 let encoded = match instruction { 81 Instruction::ConditionalMove { 82 destination, 83 source, 84 condition, 85 } => encode_standard(0x00, destination, source, condition), 86 Instruction::Load { 87 destination, 88 address, 89 } => { 90 let parse::Location { block, offset } = address; 91 encode_standard(0x01, destination, block, offset) 92 } 93 Instruction::Store { source, address } => { 94 let parse::Location { block, offset } = address; 95 encode_standard(0x02, block, offset, source) 96 } 97 Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b), 98 Instruction::AddAssign { destination, a } => { 99 encode_standard(0x03, destination, destination, a) 100 } 101 Instruction::AddSelf { destination } => { 102 encode_standard(0x03, destination, destination, destination) 103 } 104 Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b), 105 Instruction::MulAssign { destination, a } => { 106 encode_standard(0x04, destination, destination, a) 107 } 108 Instruction::MulSelf { destination } => { 109 encode_standard(0x04, destination, destination, destination) 110 } 111 Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b), 112 Instruction::DivAssign { destination, a } => { 113 encode_standard(0x05, destination, destination, a) 114 } 115 Instruction::DivSelf { destination } => { 116 encode_standard(0x05, destination, destination, destination) 117 } 118 Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b), 119 Instruction::NandAssign { destination, a } => { 120 encode_standard(0x06, destination, destination, a) 121 } 122 Instruction::NandSelf { destination } => { 123 encode_standard(0x06, destination, destination, destination) 124 } 125 Instruction::Halt => encode_standard( 126 0x07, 127 &Default::default(), 128 &Default::default(), 129 &Default::default(), 130 ), 131 Instruction::Alloc { 132 destination, 133 length, 134 } => encode_standard(0x08, &Register::default(), destination, length), 135 Instruction::Free { block } => { 136 encode_standard(0x09, &Register::default(), &Register::default(), block) 137 } 138 Instruction::Out { source } => { 139 encode_standard(0x0a, &Default::default(), &Default::default(), source) 140 } 141 Instruction::In { destination } => { 142 encode_standard(0x0b, &Default::default(), &Default::default(), destination) 143 } 144 Instruction::Jmp { location } => { 145 let parse::Location { block, offset } = location; 146 encode_standard(0x0c, &Register::default(), block, offset) 147 } 148 Instruction::Address { 149 destination, 150 reference, 151 } => { 152 // lookup reference 153 let Some((section, offset)) = label_locations.get(reference.label) else { 154 panic!("failed to resolve {}", reference.label); 155 }; 156 157 let value = match section { 158 Section::Text => *offset, 159 Section::Data => data_offset + *offset, 160 }; 161 162 0xd0000000 | destination.encode_a_ortho() | encode_literal(value as u32) 163 } 164 Instruction::LiteralMove { 165 destination, 166 literal, 167 } => 0xd0000000 | destination.encode_a_ortho() | encode_literal(*literal), 168 }; 169 170 program.push(encoded); 171 } 172 173 if let Some(data) = sections.remove(&Section::Data) { 174 for node in data.into_iter() { 175 let NodeType::Pragma(pragma) = &node.entity else { 176 panic!("invalid node in .data section. {node:?}"); 177 }; 178 179 let encoded = match &pragma.payload { 180 PragmaType::WideString { value } => { 181 for byte in value.as_bytes() { 182 program.push(*byte as u32); 183 } 184 Some(0) // terminating byte. 185 } 186 PragmaType::U32 { value } => Some(*value), 187 }; 188 189 if let Some(encoded) = encoded { 190 program.push(encoded); 191 } 192 } 193 } 194 195 program 196} 197 198fn encode_literal(value: u32) -> u32 { 199 const LITERAL_MAX: u32 = 0x1ffffff; 200 assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})"); 201 value 202} 203 204fn encode_standard(op: u32, a: &Register, b: &Register, c: &Register) -> u32 { 205 (op << 28) | a.encode_a() | b.encode_b() | c.encode_c() 206} 207 208#[cfg(test)] 209mod tests { 210 use super::*; 211 use crate::{Operation, Register::*}; 212 213 #[test] 214 fn wide_str() { 215 // Embed a wide string and get a reference to it. 216 let program = assemble( 217 r#" 218 adr r0, msg 219 msg: .wstr "Hello" 220 "#, 221 ); 222 223 let ops = crate::ops::decode(&program); 224 assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 }); 225 226 let mut platters = program.into_iter().skip(1); 227 assert_eq!(platters.next(), Some('H' as u32)); 228 assert_eq!(platters.next(), Some('e' as u32)); 229 assert_eq!(platters.next(), Some('l' as u32)); 230 assert_eq!(platters.next(), Some('l' as u32)); 231 assert_eq!(platters.next(), Some('o' as u32)); 232 assert_eq!(platters.next(), Some(0)); 233 assert_eq!(platters.next(), None); 234 } 235 236 #[test] 237 fn addresses() { 238 let program = assemble( 239 r#" 240 halt 241 start: 242 ldr r2, [r0, r1] 243 str r2, [r0, r1] 244 adr r3, start 245 halt 246 "#, 247 ); 248 249 let mut ops = crate::ops::decode(&program).into_iter(); 250 251 assert_eq!(ops.next(), Some(Operation::Halt)); 252 assert_eq!( 253 ops.next(), 254 Some(Operation::ArrayIndex { 255 a: R2, 256 b: R0, 257 c: R1 258 }) 259 ); 260 assert_eq!( 261 ops.next(), 262 Some(Operation::ArrayAmendment { 263 a: R0, 264 b: R1, 265 c: R2 266 }) 267 ); 268 assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 })); 269 assert_eq!(ops.next(), Some(Operation::Halt)); 270 assert_eq!(ops.next(), None); 271 } 272 273 #[test] 274 fn load_store() { 275 let state = crate::Um::new(assemble( 276 r#" 277 adr r1, loc 278 ldr r2, [r0, r1] 279 mov r3, 56 280 str r3, [r0, r1] 281 halt 282 loc:.u32 42 283 "#, 284 )) 285 .run(); 286 assert_eq!(state.registers[R2], 42); 287 assert_eq!(state.memory[0][5], 56); 288 } 289 290 #[test] 291 fn addition() { 292 let state = crate::Um::new(assemble( 293 r#" 294 mov r0, 42 295 mov r1, 64 296 mov r2, 8192 297 298 add r3, r0, r1 ; r3 = r0 + r1 = 106 299 add r1, r2 ; r1 = r1 + r2 = 8256 300 add r0 ; r0 = r0 + r0 = 84 301 302 halt 303 "#, 304 )) 305 .run(); 306 307 assert_eq!(state.registers[R0], 84); 308 assert_eq!(state.registers[R1], 8256); 309 assert_eq!(state.registers[R2], 8192); 310 assert_eq!(state.registers[R3], 106); 311 } 312 313 #[test] 314 fn alloc() { 315 let state = crate::Um::new(assemble( 316 r#" 317 ; Allocate 1000 bytes. 318 mov r0, 1000 319 alloc r1, r0 320 halt 321 "#, 322 )) 323 .run(); 324 assert_eq!(state.registers[R0], 1000); 325 assert_ne!(state.registers[R1], 0); 326 assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000); 327 } 328 329 #[test] 330 fn free() { 331 let state = crate::Um::new(assemble( 332 r#" 333 ; Allocate 1000 bytes. 334 mov r0, 1000 335 alloc r1, r0 336 free r1 337 halt 338 "#, 339 )) 340 .run(); 341 assert_eq!(state.registers[R0], 1000); 342 assert_ne!(state.registers[R1], 0); 343 assert_eq!( 344 state.memory[state.registers[R1] as usize].len(), 345 0, 346 "memory not free'd" 347 ); 348 } 349}