+93
Cargo.lock
+93
Cargo.lock
···
3
3
version = 3
4
4
5
5
[[package]]
6
+
name = "beef"
7
+
version = "0.5.2"
8
+
source = "registry+https://github.com/rust-lang/crates.io-index"
9
+
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
10
+
11
+
[[package]]
12
+
name = "fnv"
13
+
version = "1.0.7"
14
+
source = "registry+https://github.com/rust-lang/crates.io-index"
15
+
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
16
+
17
+
[[package]]
18
+
name = "lazy_static"
19
+
version = "1.5.0"
20
+
source = "registry+https://github.com/rust-lang/crates.io-index"
21
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
22
+
23
+
[[package]]
24
+
name = "logos"
25
+
version = "0.14.2"
26
+
source = "registry+https://github.com/rust-lang/crates.io-index"
27
+
checksum = "1c6b6e02facda28ca5fb8dbe4b152496ba3b1bd5a4b40bb2b1b2d8ad74e0f39b"
28
+
dependencies = [
29
+
"logos-derive",
30
+
]
31
+
32
+
[[package]]
33
+
name = "logos-codegen"
34
+
version = "0.14.2"
35
+
source = "registry+https://github.com/rust-lang/crates.io-index"
36
+
checksum = "b32eb6b5f26efacd015b000bfc562186472cd9b34bdba3f6b264e2a052676d10"
37
+
dependencies = [
38
+
"beef",
39
+
"fnv",
40
+
"lazy_static",
41
+
"proc-macro2",
42
+
"quote",
43
+
"regex-syntax",
44
+
"syn",
45
+
]
46
+
47
+
[[package]]
48
+
name = "logos-derive"
49
+
version = "0.14.2"
50
+
source = "registry+https://github.com/rust-lang/crates.io-index"
51
+
checksum = "3e5d0c5463c911ef55624739fc353238b4e310f0144be1f875dc42fec6bfd5ec"
52
+
dependencies = [
53
+
"logos-codegen",
54
+
]
55
+
56
+
[[package]]
57
+
name = "proc-macro2"
58
+
version = "1.0.92"
59
+
source = "registry+https://github.com/rust-lang/crates.io-index"
60
+
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
61
+
dependencies = [
62
+
"unicode-ident",
63
+
]
64
+
65
+
[[package]]
66
+
name = "quote"
67
+
version = "1.0.37"
68
+
source = "registry+https://github.com/rust-lang/crates.io-index"
69
+
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
70
+
dependencies = [
71
+
"proc-macro2",
72
+
]
73
+
74
+
[[package]]
75
+
name = "regex-syntax"
76
+
version = "0.8.5"
77
+
source = "registry+https://github.com/rust-lang/crates.io-index"
78
+
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
79
+
80
+
[[package]]
6
81
name = "smallvec"
7
82
version = "1.13.2"
8
83
source = "registry+https://github.com/rust-lang/crates.io-index"
9
84
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
10
85
11
86
[[package]]
87
+
name = "syn"
88
+
version = "2.0.89"
89
+
source = "registry+https://github.com/rust-lang/crates.io-index"
90
+
checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e"
91
+
dependencies = [
92
+
"proc-macro2",
93
+
"quote",
94
+
"unicode-ident",
95
+
]
96
+
97
+
[[package]]
12
98
name = "um"
13
99
version = "0.1.0"
14
100
dependencies = [
101
+
"logos",
15
102
"smallvec",
16
103
]
104
+
105
+
[[package]]
106
+
name = "unicode-ident"
107
+
version = "1.0.14"
108
+
source = "registry+https://github.com/rust-lang/crates.io-index"
109
+
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
+7
-1
Cargo.toml
+7
-1
Cargo.toml
···
2
2
name = "um"
3
3
version = "0.1.0"
4
4
edition = "2021"
5
+
default-run = "um"
6
+
rust-version = "1.74.1"
5
7
6
8
[dependencies]
7
9
smallvec = { version = "1.13.2" }
10
+
logos = { version = "0.14.2" }
8
11
9
12
[features]
10
13
default = []
11
-
timing = []
14
+
15
+
[profile.release]
16
+
lto = "fat"
17
+
codegen-units = 1
+1
-1
README.md
+1
-1
README.md
+33
files/cat.asm
+33
files/cat.asm
···
1
+
;
2
+
; cat.asm
3
+
;
4
+
; Read from stdin and echo to stdout.
5
+
;
6
+
main:
7
+
; set r2 to 0xffffffff
8
+
nand r2
9
+
10
+
; setup branches
11
+
adr r6, output
12
+
adr r5, loop
13
+
14
+
loop:
15
+
; read stdin, r1 will contain 0xffffffff if we've reached EOF.
16
+
in r1
17
+
18
+
; set r3 to 0 if r2 == r1
19
+
nand r3, r2, r1
20
+
21
+
; setup branch
22
+
adr r4, end
23
+
; overwrite r4 with $output iff r3 == 0.
24
+
mov r4, r6, r3
25
+
jmp [r0, r4]
26
+
27
+
output:
28
+
; write to stdout
29
+
out r1
30
+
jmp [r0, r5]
31
+
32
+
end:
33
+
halt
+24
files/hello-world.asm
+24
files/hello-world.asm
···
1
+
;
2
+
; hello-world.asm
3
+
;
4
+
; Prints "Hello, world!" to the stdout.
5
+
;
6
+
message:
7
+
.wstr "Hello, world!\n"
8
+
9
+
adr r1, message
10
+
adr r4, loop
11
+
mov r3, 1
12
+
loop:
13
+
ldr r2, [r0, r1]
14
+
adr r6, next
15
+
adr r7, end
16
+
mov r7, r6, r2
17
+
jmp [r0, r7]
18
+
next:
19
+
out r2
20
+
add r1, r3
21
+
jmp [r0, r4]
22
+
23
+
end:
24
+
halt
+335
src/asm.rs
+335
src/asm.rs
···
1
+
mod lexer;
2
+
mod parse;
3
+
4
+
use crate::{Platter, Register};
5
+
use lexer::Token;
6
+
use parse::{Instruction, Node, NodeType, PragmaType};
7
+
use std::collections::HashMap;
8
+
9
+
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
10
+
enum Section {
11
+
Text,
12
+
Data,
13
+
}
14
+
15
+
pub fn assemble<'s>(source: &'s str) -> Vec<Platter> {
16
+
let parsed = parse::parse("", source).unwrap();
17
+
18
+
let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new();
19
+
let mut offsets: HashMap<Section, usize> = HashMap::new();
20
+
let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new();
21
+
for node in parsed.nodes().iter() {
22
+
match node.entity {
23
+
NodeType::Pragma(_) => {
24
+
let loc = *offsets
25
+
.entry(Section::Data)
26
+
.and_modify(|loc| *loc += node.size())
27
+
.or_default();
28
+
29
+
sections
30
+
.entry(Section::Data)
31
+
.and_modify(|section| section.push(node))
32
+
.or_insert(vec![node]);
33
+
34
+
for label in &node.labels {
35
+
label_locations.insert(label, (Section::Data, loc));
36
+
}
37
+
}
38
+
NodeType::Instruction(_) => {
39
+
let loc = *offsets
40
+
.entry(Section::Text)
41
+
.and_modify(|loc| *loc += node.size())
42
+
.or_default();
43
+
44
+
sections
45
+
.entry(Section::Text)
46
+
.and_modify(|section| section.push(node))
47
+
.or_insert(vec![node]);
48
+
49
+
for label in &node.labels {
50
+
label_locations.insert(label, (Section::Text, loc));
51
+
}
52
+
}
53
+
_ => {}
54
+
}
55
+
}
56
+
57
+
let text = sections.remove(&Section::Text).unwrap();
58
+
let data_offset = text.len();
59
+
60
+
let mut program = vec![];
61
+
for node in text.into_iter() {
62
+
let NodeType::Instruction(instruction) = &node.entity else {
63
+
panic!("invalid node in .text section");
64
+
};
65
+
66
+
let encoded = match instruction {
67
+
Instruction::ConditionalMove {
68
+
destination,
69
+
source,
70
+
condition,
71
+
} => encode_standard(0x00, destination, source, condition),
72
+
Instruction::Load {
73
+
destination,
74
+
address,
75
+
} => {
76
+
let parse::Location { block, offset } = address;
77
+
encode_standard(0x01, destination, block, offset)
78
+
}
79
+
Instruction::Store { source, address } => {
80
+
let parse::Location { block, offset } = address;
81
+
encode_standard(0x02, block, offset, source)
82
+
}
83
+
Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b),
84
+
Instruction::AddAssign { destination, a } => {
85
+
encode_standard(0x03, destination, destination, a)
86
+
}
87
+
Instruction::AddSelf { destination } => {
88
+
encode_standard(0x03, destination, destination, destination)
89
+
}
90
+
Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b),
91
+
Instruction::MulAssign { destination, a } => {
92
+
encode_standard(0x04, destination, destination, a)
93
+
}
94
+
Instruction::MulSelf { destination } => {
95
+
encode_standard(0x04, destination, destination, destination)
96
+
}
97
+
Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b),
98
+
Instruction::DivAssign { destination, a } => {
99
+
encode_standard(0x05, destination, destination, a)
100
+
}
101
+
Instruction::DivSelf { destination } => {
102
+
encode_standard(0x05, destination, destination, destination)
103
+
}
104
+
Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b),
105
+
Instruction::NandAssign { destination, a } => {
106
+
encode_standard(0x06, destination, destination, a)
107
+
}
108
+
Instruction::NandSelf { destination } => {
109
+
encode_standard(0x06, destination, destination, destination)
110
+
}
111
+
Instruction::Halt => encode_standard(
112
+
0x07,
113
+
&Default::default(),
114
+
&Default::default(),
115
+
&Default::default(),
116
+
),
117
+
Instruction::Alloc {
118
+
destination,
119
+
length,
120
+
} => encode_standard(0x08, &Register::default(), destination, length),
121
+
Instruction::Free { block } => {
122
+
encode_standard(0x09, &Register::default(), &Register::default(), block)
123
+
}
124
+
Instruction::Out { source } => {
125
+
encode_standard(0x0a, &Default::default(), &Default::default(), source)
126
+
}
127
+
Instruction::In { destination } => {
128
+
encode_standard(0x0b, &Default::default(), &Default::default(), destination)
129
+
}
130
+
Instruction::Jmp { location } => {
131
+
let parse::Location { block, offset } = location;
132
+
encode_standard(0x0c, &Register::default(), block, offset)
133
+
}
134
+
Instruction::Address {
135
+
destination,
136
+
reference,
137
+
} => {
138
+
// lookup reference
139
+
let Some((section, offset)) = label_locations.get(reference.label) else {
140
+
panic!("failed to resolve {}", reference.label);
141
+
};
142
+
143
+
let value = match section {
144
+
Section::Text => *offset,
145
+
Section::Data => data_offset + *offset,
146
+
};
147
+
148
+
0xd0000000 | destination.encode_a_ortho() | encode_literal(value as Platter)
149
+
}
150
+
Instruction::LiteralMove {
151
+
destination,
152
+
literal,
153
+
} => 0xd0000000 | destination.encode_a_ortho() | encode_literal(*literal),
154
+
};
155
+
156
+
program.push(encoded);
157
+
}
158
+
159
+
if let Some(data) = sections.remove(&Section::Data) {
160
+
for node in data.into_iter() {
161
+
let NodeType::Pragma(pragma) = &node.entity else {
162
+
panic!("invalid node in .data section. {node:?}");
163
+
};
164
+
165
+
let encoded = match &pragma.payload {
166
+
PragmaType::WideString { value } => {
167
+
for byte in value.as_bytes() {
168
+
program.push(*byte as Platter);
169
+
}
170
+
Some(0) // terminating byte.
171
+
}
172
+
PragmaType::U32 { value } => Some(*value),
173
+
};
174
+
175
+
if let Some(encoded) = encoded {
176
+
program.push(encoded);
177
+
}
178
+
}
179
+
}
180
+
181
+
program
182
+
}
183
+
184
+
fn encode_literal(value: Platter) -> Platter {
185
+
const LITERAL_MAX: Platter = 0x1ffffff;
186
+
assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})");
187
+
value as Platter
188
+
}
189
+
190
+
fn encode_standard(op: Platter, a: &Register, b: &Register, c: &Register) -> Platter {
191
+
(op << 28) | a.encode_a() | b.encode_b() | c.encode_c()
192
+
}
193
+
194
+
#[cfg(test)]
195
+
mod tests {
196
+
use super::*;
197
+
use crate::{Operation, Register::*};
198
+
199
+
#[test]
200
+
fn wide_str() {
201
+
// Embed a wide string and get a reference to it.
202
+
let program = assemble(
203
+
r#"
204
+
adr r0, msg
205
+
msg: .wstr "Hello"
206
+
"#,
207
+
);
208
+
209
+
let ops = crate::decode_ops(&program);
210
+
assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 });
211
+
212
+
let mut platters = program.into_iter().skip(1);
213
+
assert_eq!(platters.next(), Some('H' as Platter));
214
+
assert_eq!(platters.next(), Some('e' as Platter));
215
+
assert_eq!(platters.next(), Some('l' as Platter));
216
+
assert_eq!(platters.next(), Some('l' as Platter));
217
+
assert_eq!(platters.next(), Some('o' as Platter));
218
+
assert_eq!(platters.next(), Some(0));
219
+
assert_eq!(platters.next(), None);
220
+
}
221
+
222
+
#[test]
223
+
fn addresses() {
224
+
let program = assemble(
225
+
r#"
226
+
halt
227
+
start:
228
+
ldr r2, [r0, r1]
229
+
str r2, [r0, r1]
230
+
adr r3, start
231
+
halt
232
+
"#,
233
+
);
234
+
235
+
let mut ops = crate::decode_ops(&program).into_iter();
236
+
237
+
assert_eq!(ops.next(), Some(Operation::Halt));
238
+
assert_eq!(
239
+
ops.next(),
240
+
Some(Operation::ArrayIndex {
241
+
a: R2,
242
+
b: R0,
243
+
c: R1
244
+
})
245
+
);
246
+
assert_eq!(
247
+
ops.next(),
248
+
Some(Operation::ArrayAmendment {
249
+
a: R0,
250
+
b: R1,
251
+
c: R2
252
+
})
253
+
);
254
+
assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 }));
255
+
assert_eq!(ops.next(), Some(Operation::Halt));
256
+
assert_eq!(ops.next(), None);
257
+
}
258
+
259
+
#[test]
260
+
fn load_store() {
261
+
let state = crate::Um::new(assemble(
262
+
r#"
263
+
adr r1, loc
264
+
ldr r2, [r0, r1]
265
+
mov r3, 56
266
+
str r3, [r0, r1]
267
+
halt
268
+
loc:.u32 42
269
+
"#,
270
+
))
271
+
.run();
272
+
assert_eq!(state.registers[R2], 42);
273
+
assert_eq!(state.memory[0][5], 56);
274
+
}
275
+
276
+
#[test]
277
+
fn addition() {
278
+
let state = crate::Um::new(assemble(
279
+
r#"
280
+
mov r0, 42
281
+
mov r1, 64
282
+
mov r2, 8192
283
+
284
+
add r3, r0, r1 ; r3 = r0 + r1 = 106
285
+
add r1, r2 ; r1 = r1 + r2 = 8256
286
+
add r0 ; r0 = r0 + r0 = 84
287
+
288
+
halt
289
+
"#,
290
+
))
291
+
.run();
292
+
293
+
assert_eq!(state.registers[R0], 84);
294
+
assert_eq!(state.registers[R1], 8256);
295
+
assert_eq!(state.registers[R2], 8192);
296
+
assert_eq!(state.registers[R3], 106);
297
+
}
298
+
299
+
#[test]
300
+
fn alloc() {
301
+
let state = crate::Um::new(assemble(
302
+
r#"
303
+
; Allocate 1000 bytes.
304
+
mov r0, 1000
305
+
alloc r1, r0
306
+
halt
307
+
"#,
308
+
))
309
+
.run();
310
+
assert_eq!(state.registers[R0], 1000);
311
+
assert_ne!(state.registers[R1], 0);
312
+
assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000);
313
+
}
314
+
315
+
#[test]
316
+
fn free() {
317
+
let state = crate::Um::new(assemble(
318
+
r#"
319
+
; Allocate 1000 bytes.
320
+
mov r0, 1000
321
+
alloc r1, r0
322
+
free r1
323
+
halt
324
+
"#,
325
+
))
326
+
.run();
327
+
assert_eq!(state.registers[R0], 1000);
328
+
assert_ne!(state.registers[R1], 0);
329
+
assert_eq!(
330
+
state.memory[state.registers[R1] as usize].len(),
331
+
0,
332
+
"memory not free'd"
333
+
);
334
+
}
335
+
}
+127
src/asm/lexer.rs
+127
src/asm/lexer.rs
···
1
+
use crate::{Platter, Register};
2
+
use logos::{Lexer, Logos};
3
+
4
+
#[derive(Clone, Debug, Default, PartialEq, Eq)]
5
+
pub struct Extras {
6
+
pub line: usize,
7
+
}
8
+
9
+
#[derive(Logos, Debug, PartialEq)]
10
+
#[logos(skip r"[ \t\f,]+", extras = Extras)]
11
+
pub enum Token<'source> {
12
+
#[token("\n", lex_newline)]
13
+
Newline,
14
+
15
+
#[regex("[a-zA-Z]+[a-zA-Z0-9_]*:", lex_label)]
16
+
Label(&'source str),
17
+
18
+
#[regex("[a-zA-Z_]+[a-zA-Z0-9_]*", |lexer| lexer.slice())]
19
+
Ident(&'source str),
20
+
21
+
#[regex(r#"\.([a-zA-Z0-9]+)"#, |lexer| &lexer.slice()[1..])]
22
+
Pragma(&'source str),
23
+
24
+
#[token("[")]
25
+
AddressOpen,
26
+
27
+
#[token("]")]
28
+
AddressClose,
29
+
30
+
#[regex("r[0-7]", lex_register, priority = 10)]
31
+
Register(Register),
32
+
33
+
#[token("#")]
34
+
Pound,
35
+
36
+
#[token("+")]
37
+
Plus,
38
+
39
+
#[token("-")]
40
+
Minus,
41
+
42
+
#[token(".")]
43
+
Here,
44
+
45
+
#[regex(r#"(0x[a-fA-F0-9]+)|([0-9]+)"#, lex_number)]
46
+
Number(Platter),
47
+
48
+
#[token("\"", lex_string_literal)]
49
+
String(&'source str),
50
+
51
+
#[token(";", lex_comment)]
52
+
Comment(&'source str),
53
+
}
54
+
55
+
fn lex_newline<'source>(lexer: &mut Lexer<'source, Token<'source>>) {
56
+
lexer.extras.line += 1;
57
+
}
58
+
59
+
fn lex_label<'source>(lex: &mut Lexer<'source, Token<'source>>) -> &'source str {
60
+
let slice = lex.slice();
61
+
&slice[..slice.len() - 1]
62
+
}
63
+
64
+
fn lex_number<'source>(lex: &mut Lexer<'source, Token<'source>>) -> Platter {
65
+
let slice = &lex.slice();
66
+
if slice.starts_with("0x") {
67
+
Platter::from_str_radix(slice.trim_start_matches("0x"), 16).unwrap()
68
+
} else {
69
+
slice.parse().unwrap()
70
+
}
71
+
}
72
+
73
+
fn lex_string_literal<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> &'source str {
74
+
let remainder = lexer.remainder();
75
+
76
+
let mut in_escape = false;
77
+
let mut complete = false;
78
+
let mut final_index = 0;
79
+
for (index, character) in remainder.char_indices() {
80
+
if complete {
81
+
lexer.bump(index);
82
+
return &remainder[..final_index];
83
+
}
84
+
85
+
if character == '\\' {
86
+
in_escape = true;
87
+
continue;
88
+
}
89
+
90
+
if character == '"' && in_escape {
91
+
continue;
92
+
}
93
+
94
+
if character == '"' && !in_escape {
95
+
complete = true;
96
+
final_index = index;
97
+
continue;
98
+
}
99
+
100
+
in_escape = false;
101
+
}
102
+
103
+
lexer.bump(remainder.len());
104
+
remainder
105
+
}
106
+
107
+
fn lex_register<'source>(lex: &mut Lexer<'source, Token<'source>>) -> Register {
108
+
let slice = lex.slice();
109
+
let index = slice[1..]
110
+
.parse()
111
+
.expect("regex for register tokens should make the infallible");
112
+
113
+
Register::from_u8(index)
114
+
}
115
+
116
+
fn lex_comment<'source>(lex: &mut Lexer<'source, Token<'source>>) -> &'source str {
117
+
let remainder = lex.remainder();
118
+
for (position, c) in remainder.char_indices() {
119
+
if c == '\n' {
120
+
lex.bump(position);
121
+
return &remainder[..position];
122
+
}
123
+
}
124
+
125
+
lex.bump(remainder.len());
126
+
remainder
127
+
}
+696
src/asm/parse.rs
+696
src/asm/parse.rs
···
1
+
use super::Token;
2
+
use crate::{Platter, Register};
3
+
use logos::{Logos, Source};
4
+
use std::{borrow::Cow, collections::HashMap, iter::Peekable, ops::Range};
5
+
6
+
pub fn parse(_unit: impl std::fmt::Display, source: &str) -> Result<ParsedProgram, Error> {
7
+
Parser::new(source).parse()
8
+
}
9
+
10
+
#[derive(Debug)]
11
+
pub enum NodeType<'s> {
12
+
Pragma(Pragma<'s>),
13
+
Instruction(Instruction<'s>),
14
+
Comment(#[allow(unused)] &'s str),
15
+
}
16
+
17
+
impl NodeType<'_> {
18
+
pub fn size(&self) -> usize {
19
+
match self {
20
+
Self::Pragma(pragma) => match &pragma.payload {
21
+
PragmaType::U32 { .. } => 1,
22
+
PragmaType::WideString { value } => value.len() + 1,
23
+
},
24
+
// Instructions are always one platter.
25
+
Self::Instruction(_) => 1,
26
+
Self::Comment(_) => 0,
27
+
}
28
+
}
29
+
}
30
+
31
+
#[derive(Debug)]
32
+
pub struct Node<'s> {
33
+
pub labels: Vec<&'s str>,
34
+
pub entity: NodeType<'s>,
35
+
#[allow(unused)]
36
+
pub span: Range<usize>,
37
+
}
38
+
39
+
impl Node<'_> {
40
+
/// Compute encoded size of the node in platters.
41
+
#[inline]
42
+
pub fn size(&self) -> usize {
43
+
self.entity.size()
44
+
}
45
+
}
46
+
47
+
#[derive(Debug)]
48
+
pub struct ParsedProgram<'s> {
49
+
#[allow(unused)]
50
+
pub source: &'s str,
51
+
nodes: Vec<Node<'s>>,
52
+
}
53
+
54
+
impl<'s> ParsedProgram<'s> {
55
+
pub fn nodes(&self) -> &[Node<'s>] {
56
+
&self.nodes
57
+
}
58
+
}
59
+
60
+
#[derive(Debug, Default)]
61
+
pub struct Parser<'s> {
62
+
source: &'s str,
63
+
labels: HashMap<&'s str, Range<usize>>,
64
+
active_labels: Vec<&'s str>,
65
+
}
66
+
67
+
impl<'s> Parser<'s> {
68
+
fn new(source: &'s str) -> Self {
69
+
Self {
70
+
source,
71
+
..Default::default()
72
+
}
73
+
}
74
+
75
+
fn parse(mut self) -> Result<ParsedProgram<'s>, Error> {
76
+
let mut lexer = Token::lexer(self.source);
77
+
let mut spanned = vec![];
78
+
while let Some(res) = lexer.next() {
79
+
match res {
80
+
Ok(token) => {
81
+
spanned.push((token, lexer.span()));
82
+
}
83
+
Err(error) => Err(Error::new(format!("lex: {error:?}"), &lexer.span()))?,
84
+
}
85
+
}
86
+
87
+
let mut nodes = vec![];
88
+
let mut tokens = spanned.into_iter().peekable();
89
+
while let Some((token, span)) = tokens.peek() {
90
+
let node = match token {
91
+
Token::Label(_) => {
92
+
self.consume_label(&mut tokens)?;
93
+
continue;
94
+
}
95
+
Token::Pragma(_) => self.consume_pragma(&mut tokens)?,
96
+
Token::Ident(_) => self.consume_instruction(&mut tokens)?,
97
+
Token::Comment(comment) => {
98
+
let node = Node {
99
+
labels: vec![],
100
+
entity: NodeType::Comment(comment),
101
+
span: span.clone(),
102
+
};
103
+
tokens.next();
104
+
node
105
+
}
106
+
Token::Newline => {
107
+
tokens.next();
108
+
continue;
109
+
}
110
+
_ => Err(Error::new(format!("unexpected token {token:?}"), span))?,
111
+
};
112
+
113
+
nodes.push(node);
114
+
}
115
+
116
+
Ok(ParsedProgram {
117
+
source: self.source,
118
+
nodes,
119
+
})
120
+
}
121
+
122
+
/// Consumes a label from the token stream.
123
+
fn consume_label<I>(&mut self, tokens: &mut I) -> Result<(), Error>
124
+
where
125
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
126
+
{
127
+
let Some((Token::Label(label_ident), span)) = tokens.next() else {
128
+
unreachable!("consume_label called on non-label token");
129
+
};
130
+
131
+
// Add the label to the set of observed labels.
132
+
let label_span = self
133
+
.labels
134
+
.entry(label_ident)
135
+
.or_insert_with(|| span.clone());
136
+
137
+
// If the span of the current token is not equal to
138
+
// `label_span`, then we have already seen label with the
139
+
// same identifier.
140
+
if label_span != &span {
141
+
return Err(Error::new(
142
+
format!("duplicate label '{label_ident}', original label span: {label_span:?}"),
143
+
&span,
144
+
));
145
+
}
146
+
147
+
self.active_labels.push(label_ident);
148
+
Ok(())
149
+
}
150
+
151
+
fn consume_pragma<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error>
152
+
where
153
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
154
+
{
155
+
assert!(
156
+
matches!(tokens.peek(), Some((Token::Pragma(_), _))),
157
+
"consume_pragma called on non-pragma token"
158
+
);
159
+
160
+
let labels = std::mem::take(&mut self.active_labels);
161
+
let (pragma, span) = Pragma::consume(tokens)?;
162
+
163
+
Ok(Node {
164
+
labels,
165
+
entity: NodeType::Pragma(pragma),
166
+
span,
167
+
})
168
+
}
169
+
170
+
fn consume_instruction<I>(&mut self, tokens: &mut Peekable<I>) -> Result<Node<'s>, Error>
171
+
where
172
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
173
+
{
174
+
assert!(
175
+
matches!(tokens.peek(), Some((Token::Ident(_), _))),
176
+
"consume_instruction called on non-ident token"
177
+
);
178
+
179
+
let labels = std::mem::take(&mut self.active_labels);
180
+
let (instr, span) = Instruction::consume(tokens)?;
181
+
Ok(Node {
182
+
labels,
183
+
entity: NodeType::Instruction(instr),
184
+
span,
185
+
})
186
+
}
187
+
}
188
+
189
+
/// An error encountered during parsing.
190
+
#[derive(Debug)]
191
+
#[allow(unused)]
192
+
pub struct Error(pub String, pub Range<usize>);
193
+
194
+
impl Error {
195
+
fn new(message: impl ToString, span: &Range<usize>) -> Self {
196
+
Self(message.to_string(), span.clone())
197
+
}
198
+
199
+
fn eof() -> Self {
200
+
Self("unexpected eof".into(), 0..0)
201
+
}
202
+
}
203
+
204
+
impl std::fmt::Display for Error {
205
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
206
+
write!(f, "{self:?}")
207
+
}
208
+
}
209
+
210
+
impl std::error::Error for Error {}
211
+
212
+
#[derive(Debug, Default)]
213
+
pub struct Location {
214
+
pub block: Register,
215
+
pub offset: Register,
216
+
}
217
+
218
+
impl Location {
219
+
pub fn consume<'s, I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error>
220
+
where
221
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
222
+
{
223
+
// Require a '[' token.
224
+
let start_span = match tokens.next() {
225
+
Some((Token::AddressOpen, span)) => span,
226
+
Some((_, span)) => Err(Error::new("expected an address opening bracket", &span))?,
227
+
_ => Err(Error::eof())?,
228
+
};
229
+
230
+
let (block, _) = consume_register(tokens)?;
231
+
let (offset, _) = consume_register(tokens)?;
232
+
233
+
// Require a ']' token.
234
+
let end_span = match tokens.next() {
235
+
Some((Token::AddressClose, span)) => span,
236
+
Some((_, span)) => Err(Error::new("expected an address closing bracket", &span))?,
237
+
_ => Err(Error::eof())?,
238
+
};
239
+
240
+
Ok((Self { block, offset }, merge_spans(&start_span, &end_span)))
241
+
}
242
+
}
243
+
244
+
#[derive(Debug)]
245
+
pub struct Expr<'s> {
246
+
pub label: &'s str,
247
+
}
248
+
249
+
#[derive(Debug)]
250
+
pub enum PragmaType<'s> {
251
+
U32 { value: u32 },
252
+
WideString { value: Cow<'s, str> },
253
+
}
254
+
255
+
#[derive(Debug)]
256
+
pub struct Pragma<'s> {
257
+
#[allow(unused)]
258
+
relocatable: bool,
259
+
pub payload: PragmaType<'s>,
260
+
}
261
+
262
+
impl<'s> Pragma<'s> {
263
+
pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error>
264
+
where
265
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
266
+
{
267
+
let relocatable = true;
268
+
let token = tokens.next().ok_or(Error::eof())?;
269
+
match token {
270
+
(Token::Pragma("u32"), start_span) => {
271
+
let (value, end_span) = consume_number(tokens)?;
272
+
Ok((
273
+
Self {
274
+
relocatable,
275
+
payload: PragmaType::U32 { value },
276
+
},
277
+
merge_spans(&start_span, &end_span),
278
+
))
279
+
}
280
+
(Token::Pragma("wstr"), start_span) => {
281
+
let (value, end_span) = consume_string(tokens)?;
282
+
Ok((
283
+
Self {
284
+
relocatable,
285
+
payload: PragmaType::WideString { value },
286
+
},
287
+
merge_spans(&start_span, &end_span),
288
+
))
289
+
}
290
+
(Token::Pragma(command), span) => Err(Error::new(
291
+
format!("unknown pragma command {command}"),
292
+
&span,
293
+
))?,
294
+
(_, span) => Err(Error::new("unexpected token", &span))?,
295
+
}
296
+
}
297
+
}
298
+
299
+
#[derive(Debug)]
300
+
pub enum Instruction<'s> {
301
+
/// Operation #0.
302
+
ConditionalMove {
303
+
destination: Register,
304
+
source: Register,
305
+
condition: Register,
306
+
},
307
+
/// Operation #13.
308
+
Address {
309
+
destination: Register,
310
+
reference: Expr<'s>,
311
+
},
312
+
/// Operation #13.
313
+
LiteralMove {
314
+
destination: Register,
315
+
literal: Platter,
316
+
},
317
+
Load {
318
+
destination: Register,
319
+
address: Location,
320
+
},
321
+
Store {
322
+
source: Register,
323
+
address: Location,
324
+
},
325
+
Add {
326
+
destination: Register,
327
+
a: Register,
328
+
b: Register,
329
+
},
330
+
AddAssign {
331
+
destination: Register,
332
+
a: Register,
333
+
},
334
+
AddSelf {
335
+
destination: Register,
336
+
},
337
+
Mul {
338
+
destination: Register,
339
+
a: Register,
340
+
b: Register,
341
+
},
342
+
MulAssign {
343
+
destination: Register,
344
+
a: Register,
345
+
},
346
+
MulSelf {
347
+
destination: Register,
348
+
},
349
+
Div {
350
+
destination: Register,
351
+
a: Register,
352
+
b: Register,
353
+
},
354
+
DivAssign {
355
+
destination: Register,
356
+
a: Register,
357
+
},
358
+
DivSelf {
359
+
destination: Register,
360
+
},
361
+
Nand {
362
+
destination: Register,
363
+
a: Register,
364
+
b: Register,
365
+
},
366
+
NandAssign {
367
+
destination: Register,
368
+
a: Register,
369
+
},
370
+
NandSelf {
371
+
destination: Register,
372
+
},
373
+
Halt,
374
+
Alloc {
375
+
destination: Register,
376
+
length: Register,
377
+
},
378
+
Free {
379
+
block: Register,
380
+
},
381
+
Out {
382
+
source: Register,
383
+
},
384
+
In {
385
+
destination: Register,
386
+
},
387
+
Jmp {
388
+
location: Location,
389
+
},
390
+
}
391
+
392
+
impl<'s> Instruction<'s> {
393
+
pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error>
394
+
where
395
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
396
+
{
397
+
let ident = tokens.next().unwrap();
398
+
match ident {
399
+
(Token::Ident("halt"), span) => Ok((Self::Halt, span)),
400
+
(Token::Ident("adr"), start_span) => {
401
+
let (destination, _) = consume_register(tokens)?;
402
+
let (identifier, end_span) = consume_ident(tokens)?;
403
+
Ok((
404
+
Self::Address {
405
+
destination,
406
+
reference: Expr { label: identifier },
407
+
},
408
+
merge_spans(&start_span, &end_span),
409
+
))
410
+
}
411
+
(Token::Ident("mov"), start_span) => {
412
+
let (destination, _) = consume_register(tokens)?;
413
+
if peek_register(tokens)?.is_some() {
414
+
let (source, _) = consume_register(tokens)?;
415
+
let (condition, end_span) = consume_register(tokens)?;
416
+
Ok((
417
+
Self::ConditionalMove {
418
+
destination,
419
+
source,
420
+
condition,
421
+
},
422
+
merge_spans(&start_span, &end_span),
423
+
))
424
+
} else {
425
+
let (literal, end_span) = consume_number(tokens)?;
426
+
Ok((
427
+
Self::LiteralMove {
428
+
destination,
429
+
literal,
430
+
},
431
+
merge_spans(&start_span, &end_span),
432
+
))
433
+
}
434
+
}
435
+
(Token::Ident("ldr"), start_span) => {
436
+
let (destination, _) = consume_register(tokens)?;
437
+
let (address, end_span) = Location::consume(tokens)?;
438
+
Ok((
439
+
Self::Load {
440
+
destination,
441
+
address,
442
+
},
443
+
merge_spans(&start_span, &end_span),
444
+
))
445
+
}
446
+
(Token::Ident("str"), start_span) => {
447
+
let (source, _) = consume_register(tokens)?;
448
+
let (address, end_span) = Location::consume(tokens)?;
449
+
Ok((
450
+
Self::Store { source, address },
451
+
merge_spans(&start_span, &end_span),
452
+
))
453
+
}
454
+
(Token::Ident("out"), start_span) => {
455
+
let (source, end_span) = consume_register(tokens)?;
456
+
Ok((Self::Out { source }, merge_spans(&start_span, &end_span)))
457
+
}
458
+
(Token::Ident("in"), start_span) => {
459
+
let (destination, end_span) = consume_register(tokens)?;
460
+
Ok((
461
+
Self::In { destination },
462
+
merge_spans(&start_span, &end_span),
463
+
))
464
+
}
465
+
(Token::Ident("alloc"), start_span) => {
466
+
let (destination, _) = consume_register(tokens)?;
467
+
let (length, end_span) = consume_register(tokens)?;
468
+
Ok((
469
+
Self::Alloc {
470
+
length,
471
+
destination,
472
+
},
473
+
merge_spans(&start_span, &end_span),
474
+
))
475
+
}
476
+
(Token::Ident("free"), start_span) => {
477
+
let (block, end_span) = consume_register(tokens)?;
478
+
Ok((Self::Free { block }, merge_spans(&start_span, &end_span)))
479
+
}
480
+
(Token::Ident("jmp"), start_span) => {
481
+
let (location, end_span) = Location::consume(tokens)?;
482
+
Ok((Self::Jmp { location }, merge_spans(&start_span, &end_span)))
483
+
}
484
+
(Token::Ident("add"), start_span) => {
485
+
let (destination, mid_span) = consume_register(tokens)?;
486
+
let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
487
+
let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
488
+
match (a, b) {
489
+
(Some((a, _)), Some((b, end_span))) => Ok((
490
+
Self::Add { destination, a, b },
491
+
merge_spans(&start_span, &end_span),
492
+
)),
493
+
(Some((a, end_span)), None) => Ok((
494
+
Self::AddAssign { destination, a },
495
+
merge_spans(&start_span, &end_span),
496
+
)),
497
+
(None, None) => Ok((
498
+
Self::AddSelf { destination },
499
+
merge_spans(&start_span, &mid_span),
500
+
)),
501
+
_ => unreachable!(),
502
+
}
503
+
}
504
+
(Token::Ident("mul"), start_span) => {
505
+
let (destination, mid_span) = consume_register(tokens)?;
506
+
let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
507
+
let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
508
+
match (a, b) {
509
+
(Some((a, _)), Some((b, end_span))) => Ok((
510
+
Self::Mul { destination, a, b },
511
+
merge_spans(&start_span, &end_span),
512
+
)),
513
+
(Some((a, end_span)), None) => Ok((
514
+
Self::MulAssign { destination, a },
515
+
merge_spans(&start_span, &end_span),
516
+
)),
517
+
(None, None) => Ok((
518
+
Self::MulSelf { destination },
519
+
merge_spans(&start_span, &mid_span),
520
+
)),
521
+
_ => unreachable!(),
522
+
}
523
+
}
524
+
(Token::Ident("div"), start_span) => {
525
+
let (destination, mid_span) = consume_register(tokens)?;
526
+
let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
527
+
let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
528
+
match (a, b) {
529
+
(Some((a, _)), Some((b, end_span))) => Ok((
530
+
Self::Div { destination, a, b },
531
+
merge_spans(&start_span, &end_span),
532
+
)),
533
+
(Some((a, end_span)), None) => Ok((
534
+
Self::DivAssign { destination, a },
535
+
merge_spans(&start_span, &end_span),
536
+
)),
537
+
(None, None) => Ok((
538
+
Self::DivSelf { destination },
539
+
merge_spans(&start_span, &mid_span),
540
+
)),
541
+
_ => unreachable!(),
542
+
}
543
+
}
544
+
(Token::Ident("nand"), start_span) => {
545
+
let (destination, mid_span) = consume_register(tokens)?;
546
+
let a = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
547
+
let b = peek_register(tokens)?.and_then(|_| consume_register(tokens).ok());
548
+
match (a, b) {
549
+
(Some((a, _)), Some((b, end_span))) => Ok((
550
+
Self::Nand { destination, a, b },
551
+
merge_spans(&start_span, &end_span),
552
+
)),
553
+
(Some((a, end_span)), None) => Ok((
554
+
Self::NandAssign { destination, a },
555
+
merge_spans(&start_span, &end_span),
556
+
)),
557
+
(None, None) => Ok((
558
+
Self::NandSelf { destination },
559
+
merge_spans(&start_span, &mid_span),
560
+
)),
561
+
_ => unreachable!(),
562
+
}
563
+
}
564
+
(_, span) => Err(Error::new("unrecognised instruction", &span))?,
565
+
}
566
+
}
567
+
}
568
+
569
+
impl std::fmt::Display for Instruction<'_> {
570
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
571
+
match self {
572
+
Self::ConditionalMove {
573
+
destination,
574
+
source,
575
+
condition,
576
+
} => write!(f, "mov {destination}, {source}, {condition}"),
577
+
Self::Load {
578
+
destination,
579
+
address,
580
+
} => write!(
581
+
f,
582
+
"ldr {destination}, [{}, {}]",
583
+
address.block, address.offset
584
+
),
585
+
Self::Store { source, address } => {
586
+
write!(f, "str {source}, [{}, {}]", address.block, address.offset)
587
+
}
588
+
Self::Add { destination, a, b } => write!(f, "add {destination}, {a}, {b}"),
589
+
Self::AddAssign { destination, a } => write!(f, "add {destination}, {a}"),
590
+
Self::AddSelf { destination } => write!(f, "add {destination}"),
591
+
Self::Mul { destination, a, b } => write!(f, "mul {destination}, {a}, {b}"),
592
+
Self::MulAssign { destination, a } => write!(f, "mul {destination}, {a}"),
593
+
Self::MulSelf { destination } => write!(f, "mul {destination}"),
594
+
Self::Div { destination, a, b } => write!(f, "div {destination}, {a}, {b}"),
595
+
Self::DivAssign { destination, a } => write!(f, "div {destination}, {a}"),
596
+
Self::DivSelf { destination } => write!(f, "div {destination}"),
597
+
Self::Nand { destination, a, b } => write!(f, "nand {destination}, {a}, {b}"),
598
+
Self::NandAssign { destination, a } => write!(f, "nand {destination}, {a}"),
599
+
Self::NandSelf { destination } => write!(f, "nand {destination}"),
600
+
Self::Halt => write!(f, "halt"),
601
+
Self::Out { source } => write!(f, "out {source}"),
602
+
Self::In { destination } => write!(f, "in {destination}"),
603
+
Self::Alloc {
604
+
length,
605
+
destination,
606
+
} => write!(f, "alloc {destination}, {length}"),
607
+
Self::Free { block } => {
608
+
write!(f, "free {block}")
609
+
}
610
+
Self::Jmp { location } => write!(f, "jmp [{}, {}]", location.block, location.offset),
611
+
Self::LiteralMove {
612
+
destination,
613
+
literal,
614
+
} => write!(f, "mov {destination}, {literal}"),
615
+
Self::Address {
616
+
destination,
617
+
reference,
618
+
} => write!(f, "adr {destination}, {}", reference.label),
619
+
}
620
+
}
621
+
}
622
+
623
+
/// Peeks at the next token and returns it iff it is a Register.
624
+
fn peek_register<'s, I>(tokens: &mut Peekable<I>) -> Result<Option<Register>, Error>
625
+
where
626
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
627
+
{
628
+
match tokens.peek() {
629
+
Some((Token::Register(r), _)) => Ok(Some(*r)),
630
+
Some(_) => Ok(None),
631
+
None => Err(Error::new("unexpected eof", &(0..0))),
632
+
}
633
+
}
634
+
635
+
fn consume_register<'s, I>(tokens: &mut I) -> Result<(Register, Range<usize>), Error>
636
+
where
637
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
638
+
{
639
+
match tokens.next() {
640
+
Some((Token::Register(r), span)) => Ok((r, span)),
641
+
Some((token, span)) => Err(Error::new(
642
+
format!("expected a register, found: {token:?}"),
643
+
&span,
644
+
)),
645
+
None => Err(Error::eof()),
646
+
}
647
+
}
648
+
649
+
fn consume_ident<'s, I>(tokens: &mut I) -> Result<(&'s str, Range<usize>), Error>
650
+
where
651
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
652
+
{
653
+
match tokens.next() {
654
+
Some((Token::Ident(ident), span)) => Ok((ident, span)),
655
+
Some((token, span)) => Err(Error::new(
656
+
format!("expected an identifier, found: {token:?}"),
657
+
&span,
658
+
)),
659
+
None => Err(Error::eof()),
660
+
}
661
+
}
662
+
663
+
fn consume_number<'s, I>(tokens: &mut I) -> Result<(Platter, Range<usize>), Error>
664
+
where
665
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
666
+
{
667
+
match tokens.next() {
668
+
Some((Token::Number(value), span)) => Ok((value, span)),
669
+
Some((token, span)) => Err(Error::new(
670
+
format!("expected a number literal, found: {token:?}"),
671
+
&span,
672
+
)),
673
+
None => Err(Error::eof()),
674
+
}
675
+
}
676
+
677
+
fn consume_string<'s, I>(tokens: &mut I) -> Result<(Cow<'s, str>, Range<usize>), Error>
678
+
where
679
+
I: Iterator<Item = (Token<'s>, Range<usize>)>,
680
+
{
681
+
match tokens.next() {
682
+
Some((Token::String(value), span)) => {
683
+
let unescaped = crate::str::unescape_str(value).map_err(|_| Error::eof())?;
684
+
Ok((unescaped, span))
685
+
}
686
+
Some((token, span)) => Err(Error::new(
687
+
format!("expected a number literal, found: {token:?}"),
688
+
&span,
689
+
)),
690
+
None => Err(Error::eof()),
691
+
}
692
+
}
693
+
694
+
fn merge_spans(start: &Range<usize>, end: &Range<usize>) -> Range<usize> {
695
+
start.start..end.end
696
+
}
+51
src/bin/uasm.rs
+51
src/bin/uasm.rs
···
1
+
use std::path::{Path, PathBuf};
2
+
use um::Platter;
3
+
4
+
fn main() {
5
+
let mut output = PathBuf::from("./a.um");
6
+
7
+
let mut program = Vec::new();
8
+
let mut args = std::env::args().skip(1);
9
+
while let Some(arg) = args.next() {
10
+
match arg.as_str() {
11
+
"-o" | "--out" => {
12
+
output = PathBuf::from(args.next().expect("expected output path"));
13
+
}
14
+
_ => {
15
+
let path = Path::new(&arg);
16
+
program.extend_from_slice(&match load_program(path) {
17
+
Ok(p) => p,
18
+
Err(error) => {
19
+
eprintln!("{error}");
20
+
std::process::exit(1);
21
+
}
22
+
});
23
+
}
24
+
}
25
+
}
26
+
27
+
// Convert the program to bytes.
28
+
let bytes: Vec<_> = program
29
+
.into_iter()
30
+
.flat_map(|word| word.to_be_bytes())
31
+
.collect();
32
+
33
+
std::fs::write(&output, bytes).unwrap();
34
+
}
35
+
36
+
fn load_program(path: &Path) -> std::io::Result<Vec<Platter>> {
37
+
match path.extension().map(|ext| ext.as_encoded_bytes()) {
38
+
Some(b"uasm") | Some(b"asm") => {
39
+
let source = std::fs::read_to_string(path)?;
40
+
let program = um::asm::assemble(&source);
41
+
Ok(program)
42
+
}
43
+
_ => {
44
+
let program = std::fs::read(path)?;
45
+
Ok(program
46
+
.chunks_exact(std::mem::size_of::<Platter>())
47
+
.map(|pl| Platter::from_be_bytes(pl.try_into().unwrap()))
48
+
.collect())
49
+
}
50
+
}
51
+
}
+49
src/bin/um.rs
+49
src/bin/um.rs
···
1
+
use std::{path::Path, time::Instant};
2
+
use um::{Platter, Um};
3
+
4
+
fn main() {
5
+
let mut program = Vec::new();
6
+
let mut time = false;
7
+
8
+
for arg in std::env::args().skip(1) {
9
+
if arg == "--time" {
10
+
time = true;
11
+
continue;
12
+
}
13
+
14
+
let path = Path::new(&arg);
15
+
program.extend_from_slice(&match load_program(path) {
16
+
Ok(p) => p,
17
+
Err(error) => {
18
+
eprintln!("{error}");
19
+
std::process::exit(1);
20
+
}
21
+
});
22
+
}
23
+
24
+
let start = Instant::now();
25
+
Um::new(program)
26
+
.stdout(&mut std::io::stdout())
27
+
.stdin(&mut std::io::stdin())
28
+
.run();
29
+
30
+
if time {
31
+
eprintln!("{:?}", start.elapsed());
32
+
}
33
+
}
34
+
35
+
fn load_program(path: &Path) -> std::io::Result<Vec<Platter>> {
36
+
match path.extension().map(|ext| ext.as_encoded_bytes()) {
37
+
Some(b"uasm") | Some(b"asm") => {
38
+
let source = std::fs::read_to_string(path)?;
39
+
Ok(um::asm::assemble(&source))
40
+
}
41
+
_ => {
42
+
let program = std::fs::read(path)?;
43
+
Ok(program
44
+
.chunks_exact(std::mem::size_of::<Platter>())
45
+
.map(|pl| Platter::from_be_bytes(pl.try_into().unwrap()))
46
+
.collect())
47
+
}
48
+
}
49
+
}
+486
-38
src/lib.rs
+486
-38
src/lib.rs
···
1
+
use smallvec::SmallVec;
2
+
use std::{
3
+
io::{Read, Write},
4
+
ops,
5
+
};
6
+
7
+
pub mod asm;
8
+
pub mod str;
9
+
1
10
pub type Platter = u32;
2
-
pub type Parameter = u8;
11
+
12
+
/// A reference to a register of the UM-32.
13
+
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
14
+
pub enum Register {
15
+
#[default]
16
+
R0,
17
+
R1,
18
+
R2,
19
+
R3,
20
+
R4,
21
+
R5,
22
+
R6,
23
+
R7,
24
+
}
25
+
26
+
impl std::fmt::Display for Register {
27
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28
+
write!(f, "r{}", *self as u8)
29
+
}
30
+
}
31
+
32
+
impl Register {
33
+
/// Encodes the register as the 'a' parameter of an encoded
34
+
/// instruction (bits 6..=8).
35
+
fn encode_a(self) -> Platter {
36
+
((self as Platter) & 0x7) << 6
37
+
}
38
+
39
+
/// Encodes the register as the 'b' parameter of an encoded
40
+
/// instruction (bits 3..=5).
41
+
fn encode_b(self) -> Platter {
42
+
((self as Platter) & 0x7) << 3
43
+
}
44
+
45
+
/// Encodes the register as the 'c' parameter of an encoded
46
+
/// instruction (bits 0..=2).
47
+
fn encode_c(self) -> Platter {
48
+
(self as Platter) & 0x7
49
+
}
50
+
51
+
/// Encodes the register as the 'a' parameter of an `Orthography`
52
+
/// operation.
53
+
///
54
+
/// This is *only* valid for `Orthography` operations.
55
+
fn encode_a_ortho(self) -> Platter {
56
+
((self as Platter) & 0x7) << 25
57
+
}
58
+
59
+
fn from_u8(index: u8) -> Self {
60
+
match index {
61
+
0 => Register::R0,
62
+
1 => Register::R1,
63
+
2 => Register::R2,
64
+
3 => Register::R3,
65
+
4 => Register::R4,
66
+
5 => Register::R5,
67
+
6 => Register::R6,
68
+
7 => Register::R7,
69
+
_ => unreachable!(),
70
+
}
71
+
}
72
+
}
73
+
74
+
/// A set of registers.
75
+
#[derive(Debug, Default)]
76
+
struct Page([Platter; 8]);
77
+
78
+
impl ops::Index<Register> for Page {
79
+
type Output = Platter;
80
+
#[inline(always)]
81
+
fn index(&self, index: Register) -> &Self::Output {
82
+
&self.0[index as usize]
83
+
}
84
+
}
85
+
86
+
impl ops::IndexMut<Register> for Page {
87
+
#[inline(always)]
88
+
fn index_mut(&mut self, index: Register) -> &mut Self::Output {
89
+
&mut self.0[index as usize]
90
+
}
91
+
}
92
+
93
+
impl From<[Platter; 8]> for Page {
94
+
fn from(value: [Platter; 8]) -> Self {
95
+
Self(value)
96
+
}
97
+
}
3
98
4
99
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5
-
pub enum Operation {
100
+
enum Operation {
6
101
/// Operator #0. Conditional Move.
7
102
///
8
103
/// The register A receives the value in register B,
9
104
/// unless the register C contains 0.
10
105
ConditionalMove {
11
-
a: Parameter,
12
-
b: Parameter,
13
-
c: Parameter,
106
+
a: Register,
107
+
b: Register,
108
+
c: Register,
14
109
},
15
110
/// Operator #1: Array Index.
16
111
///
17
112
/// The register A receives the value stored at offset
18
113
/// in register C in the array identified by B.
19
114
ArrayIndex {
20
-
a: Parameter,
21
-
b: Parameter,
22
-
c: Parameter,
115
+
a: Register,
116
+
b: Register,
117
+
c: Register,
23
118
},
24
119
/// Operator #2. Array Amendment.
25
120
///
26
121
/// The array identified by A is amended at the offset
27
122
/// in register B to store the value in register C.
28
123
ArrayAmendment {
29
-
a: Parameter,
30
-
b: Parameter,
31
-
c: Parameter,
124
+
a: Register,
125
+
b: Register,
126
+
c: Register,
32
127
},
33
128
/// Operator #3. Addition.
34
129
///
35
130
/// The register A receives the value in register B plus
36
131
/// the value in register C, modulo 2^32.
37
132
Addition {
38
-
a: Parameter,
39
-
b: Parameter,
40
-
c: Parameter,
133
+
a: Register,
134
+
b: Register,
135
+
c: Register,
41
136
},
42
137
/// Operator #4. Multiplication.
43
138
///
44
139
/// The register A receives the value in register B times
45
140
/// the value in register C, modulo 2^32.
46
141
Multiplication {
47
-
a: Parameter,
48
-
b: Parameter,
49
-
c: Parameter,
142
+
a: Register,
143
+
b: Register,
144
+
c: Register,
50
145
},
51
146
/// Operator #5. Division.
52
147
///
···
54
149
/// divided by the value in register C, if any, where
55
150
/// each quantity is treated as an unsigned 32 bit number.
56
151
Division {
57
-
a: Parameter,
58
-
b: Parameter,
59
-
c: Parameter,
152
+
a: Register,
153
+
b: Register,
154
+
c: Register,
60
155
},
61
156
/// Operator #6. Not-And.
62
157
///
···
65
160
/// position. Otherwise the bit in register A receives
66
161
/// the 0 bit.
67
162
NotAnd {
68
-
a: Parameter,
69
-
b: Parameter,
70
-
c: Parameter,
163
+
a: Register,
164
+
b: Register,
165
+
c: Register,
71
166
},
72
167
/// Operator #7. Halt.
73
168
///
···
82
177
/// exclusively the 0 bit, and that identifies no other
83
178
/// active allocated array, is placed in the B register.
84
179
Allocation {
85
-
b: Parameter,
86
-
c: Parameter,
180
+
b: Register,
181
+
c: Register,
87
182
},
88
183
/// Operator #9. Abandonment.
89
184
///
90
185
/// The array identified by the register C is abandoned.
91
186
/// Future allocations may then reuse that identifier.
92
187
Abandonment {
93
-
c: Parameter,
188
+
c: Register,
94
189
},
95
190
/// Operator #10. Output.
96
191
///
···
98
193
/// immediately. Only values between and including 0 and 255
99
194
/// are allowed.
100
195
Output {
101
-
c: Parameter,
196
+
c: Register,
102
197
},
103
198
/// Operator #11. Input.
104
199
///
···
109
204
/// register C is endowed with a uniform value pattern
110
205
/// where every place is pregnant with the 1 bit.
111
206
Input {
112
-
c: Parameter,
207
+
c: Register,
113
208
},
114
209
/// Operator #12. Load Program.
115
210
///
···
125
220
/// loading, and shall be handled with the utmost
126
221
/// velocity.
127
222
LoadProgram {
128
-
b: Parameter,
129
-
c: Parameter,
223
+
b: Register,
224
+
c: Register,
130
225
},
131
226
/// Operator #13. Orthography.
132
227
///
133
228
/// The value indicated is loaded into the register A
134
229
/// forthwith.
135
230
Orthography {
136
-
a: Parameter,
231
+
a: Register,
137
232
value: u32,
138
233
},
139
234
IllegalInstruction,
···
142
237
impl From<Platter> for Operation {
143
238
#[inline]
144
239
fn from(value: Platter) -> Self {
145
-
let a = ((value >> 6) & 0x07) as Parameter;
146
-
let b = ((value >> 3) & 0x07) as Parameter;
147
-
let c = (value & 0x07) as Parameter;
148
-
240
+
let a = Register::from_u8(((value >> 6) & 0x07) as u8);
241
+
let b = Register::from_u8(((value >> 3) & 0x07) as u8);
242
+
let c = Register::from_u8((value & 0x07) as u8);
149
243
match value & 0xf0000000 {
150
244
0x00000000 => Self::ConditionalMove { a, b, c },
151
245
0x10000000 => Self::ArrayIndex { a, b, c },
···
161
255
0xb0000000 => Self::Input { c },
162
256
0xc0000000 => Self::LoadProgram { b, c },
163
257
0xd0000000 => {
164
-
let a = ((value >> 25) & 0x07) as Parameter;
258
+
let a = Register::from_u8(((value >> 25) & 0x07) as u8);
165
259
let value = value & 0x01ffffff;
166
260
Self::Orthography { a, value }
167
261
}
···
170
264
}
171
265
}
172
266
173
-
#[inline]
174
-
pub fn decode_ops(ops: &[Platter]) -> Vec<Operation> {
267
+
fn decode_ops(ops: &[Platter]) -> Vec<Operation> {
175
268
ops.iter()
176
269
.map(|&encoded| Operation::from(encoded))
177
270
.collect()
178
271
}
272
+
273
+
const SMALLVEC_SIZE: usize = 24;
274
+
275
+
/// Lossless conversion to `usize`.
276
+
///
277
+
/// This should only be implemented on types which can be losslessly
278
+
/// cast to a `usize`.
279
+
trait IntoIndex: Sized + Copy {
280
+
fn into_index(self) -> usize;
281
+
}
282
+
283
+
macro_rules! impl_into_index {
284
+
($t:ty) => {
285
+
impl IntoIndex for $t {
286
+
fn into_index(self) -> usize {
287
+
self as usize
288
+
}
289
+
}
290
+
};
291
+
}
292
+
293
+
#[cfg(target_pointer_width = "16")]
294
+
compile_error!("16 bit architectures are unsupported");
295
+
296
+
// usize *may* be 16 bits, so only implement if it is 32 or 64 bits.
297
+
#[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))]
298
+
impl_into_index!(Platter);
299
+
300
+
#[derive(Default)]
301
+
pub struct Um<'a> {
302
+
pub program_counter: Platter,
303
+
registers: Page,
304
+
/// Program memory, modelled as a `Vec` of `SmallVec`.
305
+
///
306
+
/// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory
307
+
/// indirection penalty for every memory access within that block.
308
+
memory: Vec<SmallVec<[Platter; SMALLVEC_SIZE]>>,
309
+
free_blocks: Vec<Platter>,
310
+
/// Partially decoded operations cache.
311
+
ops: Vec<Operation>,
312
+
stdin: Option<&'a mut dyn Read>,
313
+
stdout: Option<&'a mut dyn Write>,
314
+
}
315
+
316
+
impl<'a> Um<'a> {
317
+
/// Initialise a Universal Machine with the specified program scroll.
318
+
pub fn new(program: Vec<Platter>) -> Self {
319
+
let ops = decode_ops(&program);
320
+
Self {
321
+
memory: vec![program.into()],
322
+
ops,
323
+
..Default::default()
324
+
}
325
+
}
326
+
327
+
/// Initialise a Universal Machine with a program read from a legacy
328
+
/// unsigned 8-bit character scroll.
329
+
pub fn from_bytes(program: impl AsRef<[u8]> + 'a) -> Self {
330
+
fn inner<'a>(bytes: &[u8]) -> Um<'a> {
331
+
let mut program =
332
+
Vec::with_capacity(bytes.len().div_ceil(std::mem::size_of::<Platter>()));
333
+
334
+
// Split the program into platters.
335
+
let mut chunks = bytes.chunks_exact(std::mem::size_of::<Platter>());
336
+
for word in &mut chunks {
337
+
program.push(Platter::from_be_bytes(unsafe {
338
+
// SAFETY: The `chunks_exact` iterator will *always* emit
339
+
// a slice of the correct length.
340
+
word.try_into().unwrap_unchecked()
341
+
}));
342
+
}
343
+
344
+
if !chunks.remainder().is_empty() {
345
+
eprintln!(
346
+
"WARNING: program may be corrupt; {} bytes remain after platter conversion.",
347
+
chunks.remainder().len()
348
+
);
349
+
}
350
+
351
+
Um::new(program)
352
+
}
353
+
354
+
inner(program.as_ref())
355
+
}
356
+
357
+
/// Sets the output for the universal machine.
358
+
pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self {
359
+
self.stdout.replace(stdout);
360
+
self
361
+
}
362
+
363
+
/// Sets the input for the universal machine.
364
+
pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self {
365
+
self.stdin.replace(stdin);
366
+
self
367
+
}
368
+
369
+
/// Begins the spin-cycle of the universal machine.
370
+
#[inline(never)]
371
+
pub fn run(mut self) -> Self {
372
+
loop {
373
+
// println!(
374
+
// "{:?}, pc: {:08x}, r: {:08x?}",
375
+
// self.ops[self.program_counter as usize], self.program_counter, self.registers
376
+
// );
377
+
match self.ops[self.program_counter as usize] {
378
+
Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
379
+
Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
380
+
Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
381
+
Operation::Addition { a, b, c } => self.addition(a, b, c),
382
+
Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
383
+
Operation::Division { a, b, c } => self.division(a, b, c),
384
+
Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
385
+
Operation::Halt => break,
386
+
Operation::Allocation { b, c } => self.allocation(b, c),
387
+
Operation::Abandonment { c } => self.abandonment(c),
388
+
Operation::Output { c } => self.output(c),
389
+
Operation::Input { c } => self.input(c),
390
+
Operation::LoadProgram { b, c } => {
391
+
self.load_program(b, c);
392
+
continue;
393
+
}
394
+
Operation::Orthography { a, value } => self.orthography(a, value),
395
+
Operation::IllegalInstruction => self.illegal_instruction(),
396
+
}
397
+
self.program_counter += 1;
398
+
}
399
+
400
+
self
401
+
}
402
+
403
+
// Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even
404
+
// though it has *no* interaction with the code path in Um::run().
405
+
//
406
+
// /// Steps one instruction.
407
+
// #[inline(never)]
408
+
// pub fn step(&mut self) -> bool {
409
+
// match self.ops[self.program_counter as usize] {
410
+
// Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
411
+
// Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
412
+
// Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
413
+
// Operation::Addition { a, b, c } => self.addition(a, b, c),
414
+
// Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
415
+
// Operation::Division { a, b, c } => self.division(a, b, c),
416
+
// Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
417
+
// Operation::Halt => return false,
418
+
// Operation::Allocation { b, c } => self.allocation(b, c),
419
+
// Operation::Abandonment { c } => self.abandonment(c),
420
+
// Operation::Output { c } => self.output(c),
421
+
// Operation::Input { c } => self.input(c),
422
+
// Operation::LoadProgram { b, c } => {
423
+
// self.load_program(b, c);
424
+
// return true;
425
+
// }
426
+
// Operation::Orthography { a, value } => self.orthography(a, value),
427
+
// Operation::IllegalInstruction => self.illegal_instruction(),
428
+
// }
429
+
// self.program_counter += 1;
430
+
// true
431
+
// }
432
+
433
+
/// Loads the value from the specified register.
434
+
fn load_register(&self, register: Register) -> Platter {
435
+
self.registers[register]
436
+
}
437
+
438
+
/// Saves a value to the specified register.
439
+
fn save_register(&mut self, register: Register, value: Platter) {
440
+
self.registers[register] = value;
441
+
}
442
+
443
+
fn conditional_move(&mut self, a: Register, b: Register, c: Register) {
444
+
if self.load_register(c) != 0 {
445
+
self.save_register(a, self.load_register(b));
446
+
}
447
+
}
448
+
449
+
fn array_index(&mut self, a: Register, b: Register, c: Register) {
450
+
let block = self.load_register(b);
451
+
let offset = self.load_register(c);
452
+
self.save_register(a, self.load_memory(block, offset));
453
+
}
454
+
455
+
fn array_amendment(&mut self, a: Register, b: Register, c: Register) {
456
+
let block = self.load_register(a);
457
+
let offset = self.load_register(b);
458
+
let value = self.load_register(c);
459
+
self.store_memory(block, offset, value);
460
+
}
461
+
462
+
fn addition(&mut self, a: Register, b: Register, c: Register) {
463
+
self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c)));
464
+
}
465
+
466
+
fn multiplication(&mut self, a: Register, b: Register, c: Register) {
467
+
self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c)));
468
+
}
469
+
470
+
fn division(&mut self, a: Register, b: Register, c: Register) {
471
+
self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c)));
472
+
}
473
+
474
+
fn not_and(&mut self, a: Register, b: Register, c: Register) {
475
+
self.save_register(a, !(self.load_register(b) & self.load_register(c)));
476
+
}
477
+
478
+
fn allocation(&mut self, b: Register, c: Register) {
479
+
let length = self.load_register(c);
480
+
let index = self.allocate_memory(length);
481
+
self.save_register(b, index);
482
+
}
483
+
484
+
fn abandonment(&mut self, c: Register) {
485
+
let block = self.load_register(c);
486
+
self.free_memory(block);
487
+
}
488
+
489
+
fn output(&mut self, c: Register) {
490
+
let value = self.load_register(c);
491
+
if let Some(stdout) = self.stdout.as_mut() {
492
+
let buffer = [(value & 0xff) as u8];
493
+
stdout.write_all(&buffer).unwrap();
494
+
}
495
+
}
496
+
497
+
fn input(&mut self, c: Register) {
498
+
if let Some(stdin) = self.stdin.as_mut() {
499
+
let mut buffer = vec![0];
500
+
match stdin.read_exact(&mut buffer) {
501
+
Ok(()) => self.save_register(c, buffer[0] as u32),
502
+
Err(_) => self.save_register(c, Platter::MAX),
503
+
}
504
+
} else {
505
+
self.save_register(c, Platter::MAX);
506
+
}
507
+
}
508
+
509
+
fn load_program(&mut self, b: Register, c: Register) {
510
+
let block = self.load_register(b);
511
+
512
+
// Source array is always copied to array[0], but there
513
+
// is no point copying array[0] to array[0].
514
+
if block != 0 {
515
+
let duplicated = self.duplicate_memory(block);
516
+
let ops = decode_ops(duplicated);
517
+
self.ops = ops;
518
+
}
519
+
520
+
self.program_counter = self.load_register(c);
521
+
}
522
+
523
+
fn orthography(&mut self, a: Register, value: Platter) {
524
+
self.save_register(a, value);
525
+
}
526
+
527
+
#[cold]
528
+
#[inline(never)]
529
+
fn illegal_instruction(&self) -> ! {
530
+
panic!(
531
+
"illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}",
532
+
self.memory[0][self.program_counter.into_index()],
533
+
self.program_counter,
534
+
self.registers
535
+
)
536
+
}
537
+
538
+
fn load_memory(&self, block: Platter, offset: Platter) -> Platter {
539
+
let block = block.into_index();
540
+
let offset = offset.into_index();
541
+
assert!(block < self.memory.len() && offset < self.memory[block].len());
542
+
self.memory[block][offset]
543
+
}
544
+
545
+
fn store_memory(&mut self, block: Platter, offset: Platter, value: Platter) {
546
+
let block = block.into_index();
547
+
let offset = offset.into_index();
548
+
assert!(block < self.memory.len() && offset < self.memory[block].len());
549
+
self.memory[block][offset] = value
550
+
}
551
+
552
+
/// Duplicates a block of memory.
553
+
///
554
+
/// The block is copied to the first block of memory.
555
+
fn duplicate_memory(&mut self, block: Platter) -> &[Platter] {
556
+
let block = block.into_index();
557
+
assert!(block < self.memory.len());
558
+
self.memory[0] = self.memory[block].clone();
559
+
&self.memory[0]
560
+
}
561
+
562
+
/// Allocates a block of memory of the specified length.
563
+
fn allocate_memory(&mut self, length: Platter) -> Platter {
564
+
if let Some(index) = self.free_blocks.pop() {
565
+
self.memory[index.into_index()] = Self::new_block(length.into_index());
566
+
index as Platter
567
+
} else {
568
+
self.memory.push(Self::new_block(length.into_index()));
569
+
(self.memory.len() - 1) as Platter
570
+
}
571
+
}
572
+
573
+
/// Frees a block of memory.
574
+
fn free_memory(&mut self, block: Platter) {
575
+
assert!(block.into_index() < self.memory.len());
576
+
self.free_blocks.push(block);
577
+
self.memory[block.into_index()] = Self::new_block(0);
578
+
}
579
+
580
+
/// Creates a new block of memory.
581
+
///
582
+
/// The block is initialised with `len` zeroes.
583
+
fn new_block(len: usize) -> SmallVec<[Platter; SMALLVEC_SIZE]> {
584
+
smallvec::smallvec![0; len]
585
+
}
586
+
}
587
+
588
+
#[cfg(test)]
589
+
mod tests {
590
+
use super::*;
591
+
592
+
#[test]
593
+
#[should_panic]
594
+
fn empty_program() {
595
+
Um::new(vec![]).run();
596
+
}
597
+
598
+
#[test]
599
+
fn just_halt() {
600
+
Um::new(vec![0x70000000]).run();
601
+
}
602
+
603
+
#[test]
604
+
fn hello_world() {
605
+
let program = asm::assemble(include_str!("../files/hello-world.asm"));
606
+
let mut buffer = Vec::new();
607
+
Um::new(program).stdout(&mut buffer).run();
608
+
assert_eq!(&buffer, b"Hello, world!\n");
609
+
}
610
+
611
+
#[test]
612
+
fn cat() {
613
+
let program = asm::assemble(include_str!("../files/cat.asm"));
614
+
let input = include_bytes!("lib.rs");
615
+
616
+
let mut reader = std::io::Cursor::new(input);
617
+
let mut buffer = Vec::new();
618
+
619
+
Um::new(program)
620
+
.stdin(&mut reader)
621
+
.stdout(&mut buffer)
622
+
.run();
623
+
624
+
assert_eq!(&buffer, &input);
625
+
}
626
+
}
-290
src/main.rs
-290
src/main.rs
···
1
-
use smallvec::SmallVec;
2
-
use std::io::{Read, Write};
3
-
#[cfg(feature = "timing")]
4
-
use std::time::Instant;
5
-
use um::{Operation, Parameter, Platter};
6
-
7
-
const SMALLVEC_SIZE: usize = 24;
8
-
9
-
fn main() {
10
-
let mut program = Vec::new();
11
-
for arg in std::env::args().skip(1) {
12
-
let p = std::fs::read(arg).unwrap();
13
-
program.extend_from_slice(&p);
14
-
}
15
-
16
-
Um::from_bytes(program)
17
-
.stdout(&mut std::io::stdout())
18
-
.stdin(&mut std::io::stdin())
19
-
.run();
20
-
}
21
-
22
-
/// Lossless conversion to `usize`.
23
-
///
24
-
/// This should only be implemented on types which can be losslessly
25
-
/// cast to a `usize`.
26
-
trait IntoIndex: Sized + Copy {
27
-
fn into_index(self) -> usize;
28
-
}
29
-
30
-
macro_rules! impl_into_index {
31
-
($t:ty) => {
32
-
impl IntoIndex for $t {
33
-
fn into_index(self) -> usize {
34
-
self as usize
35
-
}
36
-
}
37
-
};
38
-
}
39
-
40
-
#[cfg(target_pointer_width = "16")]
41
-
compile_error!("16 bit architectures are unsupported");
42
-
43
-
// usize *may* be 16 bits, so only implement if it is 32 or 64 bits.
44
-
#[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))]
45
-
impl_into_index!(Platter);
46
-
impl_into_index!(Parameter);
47
-
48
-
#[derive(Default)]
49
-
pub struct Um<'a> {
50
-
program_counter: Platter,
51
-
registers: [Platter; 8],
52
-
memory: Vec<SmallVec<[Platter; SMALLVEC_SIZE]>>,
53
-
free_blocks: Vec<Platter>,
54
-
ops: Vec<Operation>,
55
-
stdin: Option<&'a mut dyn Read>,
56
-
stdout: Option<&'a mut dyn Write>,
57
-
}
58
-
59
-
impl<'a> Um<'a> {
60
-
/// Initialise a Universal Machine with the specified program scroll.
61
-
pub fn new(program: Vec<Platter>) -> Self {
62
-
let ops = um::decode_ops(&program);
63
-
Self {
64
-
memory: vec![program.into()],
65
-
ops,
66
-
..Default::default()
67
-
}
68
-
}
69
-
70
-
/// Initialise a Universal Machine with a program read from a legacy
71
-
/// unsigned 8-bit character scroll.
72
-
pub fn from_bytes(program: impl AsRef<[u8]>) -> Self {
73
-
let bytes = program.as_ref();
74
-
let mut program = Vec::with_capacity(bytes.len().div_ceil(size_of::<Platter>()));
75
-
76
-
// Split the program into platters.
77
-
let mut chunks = bytes.chunks_exact(size_of::<Platter>());
78
-
for word in &mut chunks {
79
-
program.push(Platter::from_be_bytes([word[0], word[1], word[2], word[3]]));
80
-
}
81
-
82
-
if !chunks.remainder().is_empty() {
83
-
eprintln!(
84
-
"WARNING: program may be corrupt; {} bytes remain after platter conversion.",
85
-
chunks.remainder().len()
86
-
);
87
-
}
88
-
89
-
Self::new(program)
90
-
}
91
-
92
-
/// Sets the output for the universal machine.
93
-
pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self {
94
-
self.stdout.replace(stdout);
95
-
self
96
-
}
97
-
98
-
/// Sets the input for the universal machine.
99
-
pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self {
100
-
self.stdin.replace(stdin);
101
-
self
102
-
}
103
-
104
-
/// Begins the spin-cycle of the universal machine.
105
-
pub fn run(mut self) -> Self {
106
-
#[cfg(feature = "timing")]
107
-
let start = Instant::now();
108
-
109
-
while self.step() {}
110
-
111
-
#[cfg(feature = "timing")]
112
-
eprintln!("um complete: {:?}", start.elapsed());
113
-
114
-
self
115
-
}
116
-
117
-
/// Steps one instruction.
118
-
pub fn step(&mut self) -> bool {
119
-
match self.ops[self.program_counter as usize] {
120
-
Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
121
-
Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
122
-
Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
123
-
Operation::Addition { a, b, c } => self.addition(a, b, c),
124
-
Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
125
-
Operation::Division { a, b, c } => self.division(a, b, c),
126
-
Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
127
-
Operation::Halt => return false,
128
-
Operation::Allocation { b, c } => self.allocation(b, c),
129
-
Operation::Abandonment { c } => self.abandonment(c),
130
-
Operation::Output { c } => self.output(c),
131
-
Operation::Input { c } => self.input(c),
132
-
Operation::LoadProgram { b, c } => {
133
-
self.load_program(b, c);
134
-
return true;
135
-
}
136
-
Operation::Orthography { a, value } => self.orthography(a, value),
137
-
Operation::IllegalInstruction => self.illegal_instruction(),
138
-
}
139
-
self.program_counter += 1;
140
-
true
141
-
}
142
-
143
-
/// Loads the value from the specified register.
144
-
fn load_register(&self, index: Parameter) -> Platter {
145
-
assert!(index < 8, "register index out of bounds");
146
-
self.registers[index.into_index()]
147
-
}
148
-
149
-
/// Saves a value to the specified register.
150
-
fn save_register(&mut self, index: Parameter, value: Platter) {
151
-
assert!(index < 8, "register index out of bounds");
152
-
self.registers[index.into_index()] = value;
153
-
}
154
-
155
-
pub fn conditional_move(&mut self, a: Parameter, b: Parameter, c: Parameter) {
156
-
if self.load_register(c) != 0 {
157
-
self.save_register(a, self.load_register(b));
158
-
}
159
-
}
160
-
161
-
pub fn array_index(&mut self, a: Parameter, b: Parameter, c: Parameter) {
162
-
let block = self.load_register(b);
163
-
let offset = self.load_register(c);
164
-
self.save_register(a, self.load_memory(block, offset));
165
-
}
166
-
167
-
pub fn array_amendment(&mut self, a: Parameter, b: Parameter, c: Parameter) {
168
-
let block = self.load_register(a);
169
-
let offset = self.load_register(b);
170
-
let value = self.load_register(c);
171
-
self.store_memory(block, offset, value);
172
-
}
173
-
174
-
pub fn addition(&mut self, a: Parameter, b: Parameter, c: Parameter) {
175
-
self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c)));
176
-
}
177
-
178
-
pub fn multiplication(&mut self, a: Parameter, b: Parameter, c: Parameter) {
179
-
self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c)));
180
-
}
181
-
182
-
pub fn division(&mut self, a: Parameter, b: Parameter, c: Parameter) {
183
-
self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c)));
184
-
}
185
-
186
-
pub fn not_and(&mut self, a: Parameter, b: Parameter, c: Parameter) {
187
-
self.save_register(a, !(self.load_register(b) & self.load_register(c)));
188
-
}
189
-
190
-
pub fn allocation(&mut self, b: Parameter, c: Parameter) {
191
-
let length = self.load_register(c);
192
-
let index = self.allocate_memory(length);
193
-
self.save_register(b, index);
194
-
}
195
-
196
-
pub fn abandonment(&mut self, c: Parameter) {
197
-
let block = self.load_register(c);
198
-
self.free_memory(block);
199
-
}
200
-
201
-
pub fn output(&mut self, c: Parameter) {
202
-
let value = self.load_register(c);
203
-
if let Some(stdout) = self.stdout.as_mut() {
204
-
let buffer = [(value & 0xff) as u8];
205
-
stdout.write_all(&buffer).unwrap();
206
-
}
207
-
}
208
-
209
-
pub fn input(&mut self, c: Parameter) {
210
-
if let Some(stdin) = self.stdin.as_mut() {
211
-
let mut buffer = vec![0];
212
-
match stdin.read_exact(&mut buffer) {
213
-
Ok(()) => self.save_register(c, buffer[0] as u32),
214
-
Err(_) => self.save_register(c, 0xff),
215
-
}
216
-
} else {
217
-
self.save_register(c, 0xff);
218
-
}
219
-
}
220
-
221
-
pub fn load_program(&mut self, b: Parameter, c: Parameter) {
222
-
let block = self.load_register(b);
223
-
224
-
// Source array is always copied to array[0], but there
225
-
// is no point copying array[0] to array[0].
226
-
if block != 0 {
227
-
let duplicated = self.duplicate_memory(block);
228
-
let ops = um::decode_ops(duplicated);
229
-
self.ops = ops;
230
-
}
231
-
232
-
self.program_counter = self.load_register(c);
233
-
}
234
-
235
-
pub fn orthography(&mut self, a: Parameter, value: Platter) {
236
-
self.save_register(a, value);
237
-
}
238
-
239
-
#[cold]
240
-
#[inline(never)]
241
-
fn illegal_instruction(&self) -> ! {
242
-
panic!(
243
-
"illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}",
244
-
self.memory[0][self.program_counter.into_index()],
245
-
self.program_counter,
246
-
self.registers
247
-
)
248
-
}
249
-
250
-
fn load_memory(&self, block: Platter, offset: Platter) -> Platter {
251
-
let block = block.into_index();
252
-
let offset = offset.into_index();
253
-
assert!(block < self.memory.len() && offset < self.memory[block].len());
254
-
self.memory[block][offset]
255
-
}
256
-
257
-
fn store_memory(&mut self, block: Platter, offset: Platter, value: Platter) {
258
-
let block = block.into_index();
259
-
let offset = offset.into_index();
260
-
assert!(block < self.memory.len() && offset < self.memory[block].len());
261
-
self.memory[block][offset] = value
262
-
}
263
-
264
-
fn duplicate_memory(&mut self, block: Platter) -> &[Platter] {
265
-
let block = block.into_index();
266
-
assert!(block < self.memory.len());
267
-
self.memory[0] = self.memory[block].clone();
268
-
&self.memory[0]
269
-
}
270
-
271
-
fn allocate_memory(&mut self, length: Platter) -> Platter {
272
-
if let Some(index) = self.free_blocks.pop() {
273
-
self.memory[index.into_index()] = Self::new_block(length.into_index());
274
-
index as Platter
275
-
} else {
276
-
self.memory.push(Self::new_block(length.into_index()));
277
-
(self.memory.len() - 1) as Platter
278
-
}
279
-
}
280
-
281
-
fn free_memory(&mut self, block: Platter) {
282
-
assert!(block.into_index() < self.memory.len());
283
-
self.free_blocks.push(block);
284
-
self.memory[block.into_index()] = Self::new_block(0);
285
-
}
286
-
287
-
fn new_block(len: usize) -> SmallVec<[Platter; SMALLVEC_SIZE]> {
288
-
smallvec::smallvec![0; len]
289
-
}
290
-
}
+59
src/str.rs
+59
src/str.rs
···
1
+
use std::{borrow::Cow, str::CharIndices};
2
+
3
+
#[derive(Debug)]
4
+
pub struct InvalidCharacterEscape(pub char, pub usize);
5
+
6
+
pub fn unescape_str(s: &str) -> Result<Cow<str>, InvalidCharacterEscape> {
7
+
fn escape_inner(c: &str, i: &mut CharIndices<'_>) -> Result<String, InvalidCharacterEscape> {
8
+
let mut buffer = c.to_owned();
9
+
let mut in_escape = true;
10
+
11
+
for (index, c) in i {
12
+
match (in_escape, c) {
13
+
(false, '\\') => {
14
+
in_escape = true;
15
+
continue;
16
+
}
17
+
(false, c) => buffer.push(c),
18
+
(true, '\\') => buffer.push('\\'),
19
+
(true, 'n') => buffer.push('\n'),
20
+
(true, '0') => buffer.push('\0'),
21
+
(true, '"') => buffer.push('"'),
22
+
(true, '\'') => buffer.push('\''),
23
+
(true, 'r') => buffer.push('\r'),
24
+
(true, 't') => buffer.push('\t'),
25
+
(true, c) => Err(InvalidCharacterEscape(c, index))?,
26
+
}
27
+
28
+
in_escape = false;
29
+
}
30
+
31
+
Ok(buffer)
32
+
}
33
+
34
+
let mut char_indicies = s.char_indices();
35
+
for (index, c) in &mut char_indicies {
36
+
let scanned = &s[..index];
37
+
if c == '\\' {
38
+
return Ok(Cow::Owned(escape_inner(scanned, &mut char_indicies)?));
39
+
}
40
+
}
41
+
42
+
Ok(Cow::Borrowed(s))
43
+
}
44
+
45
+
#[cfg(test)]
46
+
mod tests {
47
+
use std::borrow::Cow;
48
+
49
+
use super::unescape_str;
50
+
51
+
#[test]
52
+
fn no_unescapes() {
53
+
let s = "Hello, this string should have no characters that need unescaping.";
54
+
let u = unescape_str(s).unwrap();
55
+
56
+
assert!(matches!(u, Cow::Borrowed(_)));
57
+
assert_eq!(s, u);
58
+
}
59
+
}