// Copyright (C) 2025 Thom Hayward.
//
// This program is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software
// Foundation, version 3.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with
// this program. If not, see .
//
mod lexer;
mod parse;
use crate::reg::Register;
use lexer::Token;
use parse::{Instruction, Node, NodeType, PragmaType};
use std::collections::HashMap;
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum Section {
Text,
Data,
}
/// Assemble a Universal Machine program.
///
/// # Panics
///
/// Panics if `source` cannot be parsed.
///
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn assemble<'s>(source: &'s str) -> Vec {
let parsed = parse::parse("", source).unwrap();
let mut sections: HashMap>> = HashMap::new();
let mut offsets: HashMap = HashMap::new();
let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new();
for node in parsed.nodes() {
match node.entity {
NodeType::Pragma(_) => {
let loc = *offsets
.entry(Section::Data)
.and_modify(|loc| *loc += node.size())
.or_default();
sections
.entry(Section::Data)
.and_modify(|section| section.push(node))
.or_insert_with(|| vec![node]);
for label in &node.labels {
label_locations.insert(label, (Section::Data, loc));
}
}
NodeType::Instruction(_) => {
let loc = *offsets
.entry(Section::Text)
.and_modify(|loc| *loc += node.size())
.or_default();
sections
.entry(Section::Text)
.and_modify(|section| section.push(node))
.or_insert_with(|| vec![node]);
for label in &node.labels {
label_locations.insert(label, (Section::Text, loc));
}
}
NodeType::Comment(_) => {}
}
}
let text = sections.remove(&Section::Text).unwrap();
let data_offset = text.len();
let mut program = vec![];
for node in text {
let NodeType::Instruction(instruction) = &node.entity else {
panic!("invalid node in .text section");
};
let encoded = match *instruction {
Instruction::ConditionalMove {
destination,
source,
condition,
} => encode_standard(0x00, destination, source, condition),
Instruction::Load {
destination,
address,
} => {
let parse::Location { block, offset } = address;
encode_standard(0x01, destination, block, offset)
}
Instruction::Store { source, address } => {
let parse::Location { block, offset } = address;
encode_standard(0x02, block, offset, source)
}
Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b),
Instruction::AddAssign { destination, a } => {
encode_standard(0x03, destination, destination, a)
}
Instruction::AddSelf { destination } => {
encode_standard(0x03, destination, destination, destination)
}
Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b),
Instruction::MulAssign { destination, a } => {
encode_standard(0x04, destination, destination, a)
}
Instruction::MulSelf { destination } => {
encode_standard(0x04, destination, destination, destination)
}
Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b),
Instruction::DivAssign { destination, a } => {
encode_standard(0x05, destination, destination, a)
}
Instruction::DivSelf { destination } => {
encode_standard(0x05, destination, destination, destination)
}
Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b),
Instruction::NandAssign { destination, a } => {
encode_standard(0x06, destination, destination, a)
}
Instruction::NandSelf { destination } => {
encode_standard(0x06, destination, destination, destination)
}
Instruction::Halt => encode_standard(
0x07,
Register::default(),
Register::default(),
Register::default(),
),
Instruction::Alloc {
destination,
length,
} => encode_standard(0x08, Register::default(), destination, length),
Instruction::Free { block } => {
encode_standard(0x09, Register::default(), Register::default(), block)
}
Instruction::Out { source } => {
encode_standard(0x0a, Register::default(), Register::default(), source)
}
Instruction::In { destination } => {
encode_standard(0x0b, Register::default(), Register::default(), destination)
}
Instruction::Jmp { location } => {
let parse::Location { block, offset } = location;
encode_standard(0x0c, Register::default(), block, offset)
}
Instruction::Address {
destination,
ref reference,
} => {
// lookup reference
let Some((section, offset)) = label_locations.get(reference.label) else {
panic!("failed to resolve {}", reference.label);
};
let value = match section {
Section::Text => *offset,
Section::Data => data_offset + *offset,
};
0xd000_0000
| destination.encode_a_ortho()
| encode_literal(u32::try_from(value).unwrap())
}
Instruction::LiteralMove {
destination,
literal,
} => 0xd000_0000 | destination.encode_a_ortho() | encode_literal(literal),
};
program.push(encoded);
}
if let Some(data) = sections.remove(&Section::Data) {
for node in data {
let NodeType::Pragma(pragma) = &node.entity else {
panic!("invalid node in .data section. {node:?}");
};
let encoded = match &pragma.payload {
PragmaType::WideString { value } => {
for &byte in value.as_bytes() {
program.push(u32::from(byte));
}
Some(0) // terminating byte.
}
PragmaType::U32 { value } => Some(*value),
};
if let Some(encoded) = encoded {
program.push(encoded);
}
}
}
program
}
fn encode_literal(value: u32) -> u32 {
const LITERAL_MAX: u32 = 0x1ff_ffff;
assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})");
value
}
const fn encode_standard(op: u32, a: Register, b: Register, c: Register) -> u32 {
(op << 28) | a.encode_a() | b.encode_b() | c.encode_c()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ops::Operation;
use crate::reg::Register::*;
#[test]
fn wide_str() {
// Embed a wide string and get a reference to it.
let program = assemble(
r#"
adr r0, msg
msg: .wstr "Hello"
"#,
);
let ops = crate::ops::decode(&program);
assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 });
let mut platters = program.into_iter().skip(1);
assert_eq!(platters.next(), Some('H' as u32));
assert_eq!(platters.next(), Some('e' as u32));
assert_eq!(platters.next(), Some('l' as u32));
assert_eq!(platters.next(), Some('l' as u32));
assert_eq!(platters.next(), Some('o' as u32));
assert_eq!(platters.next(), Some(0));
assert_eq!(platters.next(), None);
}
#[test]
fn addresses() {
let program = assemble(
r#"
halt
start:
ldr r2, [r0, r1]
str r2, [r0, r1]
adr r3, start
halt
"#,
);
let mut ops = crate::ops::decode(&program).into_iter();
assert_eq!(ops.next(), Some(Operation::Halt));
assert_eq!(
ops.next(),
Some(Operation::ArrayIndex {
a: R2,
b: R0,
c: R1
})
);
assert_eq!(
ops.next(),
Some(Operation::ArrayAmendment {
a: R0,
b: R1,
c: R2
})
);
assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 }));
assert_eq!(ops.next(), Some(Operation::Halt));
assert_eq!(ops.next(), None);
}
#[test]
fn load_store() {
let state = crate::Um::new(assemble(
r#"
adr r1, loc
ldr r2, [r0, r1]
mov r3, 56
str r3, [r0, r1]
halt
loc:.u32 42
"#,
))
.run();
assert_eq!(state.registers[R2], 42);
assert_eq!(state.memory[0][5], 56);
}
#[test]
fn addition() {
let state = crate::Um::new(assemble(
r#"
mov r0, 42
mov r1, 64
mov r2, 8192
add r3, r0, r1 ; r3 = r0 + r1 = 106
add r1, r2 ; r1 = r1 + r2 = 8256
add r0 ; r0 = r0 + r0 = 84
halt
"#,
))
.run();
assert_eq!(state.registers[R0], 84);
assert_eq!(state.registers[R1], 8256);
assert_eq!(state.registers[R2], 8192);
assert_eq!(state.registers[R3], 106);
}
#[test]
fn alloc() {
let state = crate::Um::new(assemble(
r#"
; Allocate 1000 bytes.
mov r0, 1000
alloc r1, r0
halt
"#,
))
.run();
assert_eq!(state.registers[R0], 1000);
assert_ne!(state.registers[R1], 0);
assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000);
}
#[test]
fn free() {
let state = crate::Um::new(assemble(
r#"
; Allocate 1000 bytes.
mov r0, 1000
alloc r1, r0
free r1
halt
"#,
))
.run();
assert_eq!(state.registers[R0], 1000);
assert_ne!(state.registers[R1], 0);
assert_eq!(
state.memory[state.registers[R1] as usize].len(),
0,
"memory not free'd"
);
}
}