Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
1// Copyright (C) 2025 Thom Hayward.
2//
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the GNU General Public License as published by the Free Software
5// Foundation, version 3.
6//
7// This program is distributed in the hope that it will be useful, but WITHOUT
8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
10// details.
11//
12// You should have received a copy of the GNU General Public License along with
13// this program. If not, see <https://www.gnu.org/licenses/>.
14//
15mod lexer;
16mod parse;
17
18use crate::Register;
19use lexer::Token;
20use parse::{Instruction, Node, NodeType, PragmaType};
21use std::collections::HashMap;
22
23#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
24enum Section {
25 Text,
26 Data,
27}
28
29pub fn assemble<'s>(source: &'s str) -> Vec<u32> {
30 let parsed = parse::parse("", source).unwrap();
31
32 let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new();
33 let mut offsets: HashMap<Section, usize> = HashMap::new();
34 let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new();
35 for node in parsed.nodes().iter() {
36 match node.entity {
37 NodeType::Pragma(_) => {
38 let loc = *offsets
39 .entry(Section::Data)
40 .and_modify(|loc| *loc += node.size())
41 .or_default();
42
43 sections
44 .entry(Section::Data)
45 .and_modify(|section| section.push(node))
46 .or_insert(vec![node]);
47
48 for label in &node.labels {
49 label_locations.insert(label, (Section::Data, loc));
50 }
51 }
52 NodeType::Instruction(_) => {
53 let loc = *offsets
54 .entry(Section::Text)
55 .and_modify(|loc| *loc += node.size())
56 .or_default();
57
58 sections
59 .entry(Section::Text)
60 .and_modify(|section| section.push(node))
61 .or_insert(vec![node]);
62
63 for label in &node.labels {
64 label_locations.insert(label, (Section::Text, loc));
65 }
66 }
67 _ => {}
68 }
69 }
70
71 let text = sections.remove(&Section::Text).unwrap();
72 let data_offset = text.len();
73
74 let mut program = vec![];
75 for node in text.into_iter() {
76 let NodeType::Instruction(instruction) = &node.entity else {
77 panic!("invalid node in .text section");
78 };
79
80 let encoded = match instruction {
81 Instruction::ConditionalMove {
82 destination,
83 source,
84 condition,
85 } => encode_standard(0x00, destination, source, condition),
86 Instruction::Load {
87 destination,
88 address,
89 } => {
90 let parse::Location { block, offset } = address;
91 encode_standard(0x01, destination, block, offset)
92 }
93 Instruction::Store { source, address } => {
94 let parse::Location { block, offset } = address;
95 encode_standard(0x02, block, offset, source)
96 }
97 Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b),
98 Instruction::AddAssign { destination, a } => {
99 encode_standard(0x03, destination, destination, a)
100 }
101 Instruction::AddSelf { destination } => {
102 encode_standard(0x03, destination, destination, destination)
103 }
104 Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b),
105 Instruction::MulAssign { destination, a } => {
106 encode_standard(0x04, destination, destination, a)
107 }
108 Instruction::MulSelf { destination } => {
109 encode_standard(0x04, destination, destination, destination)
110 }
111 Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b),
112 Instruction::DivAssign { destination, a } => {
113 encode_standard(0x05, destination, destination, a)
114 }
115 Instruction::DivSelf { destination } => {
116 encode_standard(0x05, destination, destination, destination)
117 }
118 Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b),
119 Instruction::NandAssign { destination, a } => {
120 encode_standard(0x06, destination, destination, a)
121 }
122 Instruction::NandSelf { destination } => {
123 encode_standard(0x06, destination, destination, destination)
124 }
125 Instruction::Halt => encode_standard(
126 0x07,
127 &Default::default(),
128 &Default::default(),
129 &Default::default(),
130 ),
131 Instruction::Alloc {
132 destination,
133 length,
134 } => encode_standard(0x08, &Register::default(), destination, length),
135 Instruction::Free { block } => {
136 encode_standard(0x09, &Register::default(), &Register::default(), block)
137 }
138 Instruction::Out { source } => {
139 encode_standard(0x0a, &Default::default(), &Default::default(), source)
140 }
141 Instruction::In { destination } => {
142 encode_standard(0x0b, &Default::default(), &Default::default(), destination)
143 }
144 Instruction::Jmp { location } => {
145 let parse::Location { block, offset } = location;
146 encode_standard(0x0c, &Register::default(), block, offset)
147 }
148 Instruction::Address {
149 destination,
150 reference,
151 } => {
152 // lookup reference
153 let Some((section, offset)) = label_locations.get(reference.label) else {
154 panic!("failed to resolve {}", reference.label);
155 };
156
157 let value = match section {
158 Section::Text => *offset,
159 Section::Data => data_offset + *offset,
160 };
161
162 0xd0000000 | destination.encode_a_ortho() | encode_literal(value as u32)
163 }
164 Instruction::LiteralMove {
165 destination,
166 literal,
167 } => 0xd0000000 | destination.encode_a_ortho() | encode_literal(*literal),
168 };
169
170 program.push(encoded);
171 }
172
173 if let Some(data) = sections.remove(&Section::Data) {
174 for node in data.into_iter() {
175 let NodeType::Pragma(pragma) = &node.entity else {
176 panic!("invalid node in .data section. {node:?}");
177 };
178
179 let encoded = match &pragma.payload {
180 PragmaType::WideString { value } => {
181 for byte in value.as_bytes() {
182 program.push(*byte as u32);
183 }
184 Some(0) // terminating byte.
185 }
186 PragmaType::U32 { value } => Some(*value),
187 };
188
189 if let Some(encoded) = encoded {
190 program.push(encoded);
191 }
192 }
193 }
194
195 program
196}
197
198fn encode_literal(value: u32) -> u32 {
199 const LITERAL_MAX: u32 = 0x1ffffff;
200 assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})");
201 value
202}
203
204fn encode_standard(op: u32, a: &Register, b: &Register, c: &Register) -> u32 {
205 (op << 28) | a.encode_a() | b.encode_b() | c.encode_c()
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211 use crate::{Operation, Register::*};
212
213 #[test]
214 fn wide_str() {
215 // Embed a wide string and get a reference to it.
216 let program = assemble(
217 r#"
218 adr r0, msg
219 msg: .wstr "Hello"
220 "#,
221 );
222
223 let ops = crate::ops::decode(&program);
224 assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 });
225
226 let mut platters = program.into_iter().skip(1);
227 assert_eq!(platters.next(), Some('H' as u32));
228 assert_eq!(platters.next(), Some('e' as u32));
229 assert_eq!(platters.next(), Some('l' as u32));
230 assert_eq!(platters.next(), Some('l' as u32));
231 assert_eq!(platters.next(), Some('o' as u32));
232 assert_eq!(platters.next(), Some(0));
233 assert_eq!(platters.next(), None);
234 }
235
236 #[test]
237 fn addresses() {
238 let program = assemble(
239 r#"
240 halt
241 start:
242 ldr r2, [r0, r1]
243 str r2, [r0, r1]
244 adr r3, start
245 halt
246 "#,
247 );
248
249 let mut ops = crate::ops::decode(&program).into_iter();
250
251 assert_eq!(ops.next(), Some(Operation::Halt));
252 assert_eq!(
253 ops.next(),
254 Some(Operation::ArrayIndex {
255 a: R2,
256 b: R0,
257 c: R1
258 })
259 );
260 assert_eq!(
261 ops.next(),
262 Some(Operation::ArrayAmendment {
263 a: R0,
264 b: R1,
265 c: R2
266 })
267 );
268 assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 }));
269 assert_eq!(ops.next(), Some(Operation::Halt));
270 assert_eq!(ops.next(), None);
271 }
272
273 #[test]
274 fn load_store() {
275 let state = crate::Um::new(assemble(
276 r#"
277 adr r1, loc
278 ldr r2, [r0, r1]
279 mov r3, 56
280 str r3, [r0, r1]
281 halt
282 loc:.u32 42
283 "#,
284 ))
285 .run();
286 assert_eq!(state.registers[R2], 42);
287 assert_eq!(state.memory[0][5], 56);
288 }
289
290 #[test]
291 fn addition() {
292 let state = crate::Um::new(assemble(
293 r#"
294 mov r0, 42
295 mov r1, 64
296 mov r2, 8192
297
298 add r3, r0, r1 ; r3 = r0 + r1 = 106
299 add r1, r2 ; r1 = r1 + r2 = 8256
300 add r0 ; r0 = r0 + r0 = 84
301
302 halt
303 "#,
304 ))
305 .run();
306
307 assert_eq!(state.registers[R0], 84);
308 assert_eq!(state.registers[R1], 8256);
309 assert_eq!(state.registers[R2], 8192);
310 assert_eq!(state.registers[R3], 106);
311 }
312
313 #[test]
314 fn alloc() {
315 let state = crate::Um::new(assemble(
316 r#"
317 ; Allocate 1000 bytes.
318 mov r0, 1000
319 alloc r1, r0
320 halt
321 "#,
322 ))
323 .run();
324 assert_eq!(state.registers[R0], 1000);
325 assert_ne!(state.registers[R1], 0);
326 assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000);
327 }
328
329 #[test]
330 fn free() {
331 let state = crate::Um::new(assemble(
332 r#"
333 ; Allocate 1000 bytes.
334 mov r0, 1000
335 alloc r1, r0
336 free r1
337 halt
338 "#,
339 ))
340 .run();
341 assert_eq!(state.registers[R0], 1000);
342 assert_ne!(state.registers[R1], 0);
343 assert_eq!(
344 state.memory[state.registers[R1] as usize].len(),
345 0,
346 "memory not free'd"
347 );
348 }
349}