Next Generation WASM Microkernel Operating System
wasm os rust microkernel
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: bring back multicore support (#252)

* feat(loader): make multicore ready

* fix(loader): don't zero out BSS from secondary harts

* feat(loader): start secondary harts

* feat(loader): map kernel stacks & TLS regions

* fix(sync): don't deadlock in `Once::poll`

* fix(kernel): use global shared logger for prettier printing

* feat(kernel): make initialization multicore ready

* clippy

* docs

authored by

Jonas Kruckenberg and committed by
GitHub
08acac3f c5663208

+695 -266
+1
Cargo.lock
··· 523 523 "rand 0.8.5", 524 524 "rand_chacha 0.3.1", 525 525 "riscv", 526 + "sync", 526 527 "xmas-elf", 527 528 ] 528 529
+23 -13
kernel/src/arch/riscv64/mod.rs
··· 6 6 // copied, modified, or distributed except according to those terms. 7 7 8 8 mod setjmp_longjmp; 9 - mod start; 10 9 mod trap_handler; 11 10 mod utils; 12 11 mod vm; ··· 24 23 KERNEL_ASPACE_BASE, PAGE_SHIFT, PAGE_SIZE, USER_ASPACE_BASE, 25 24 }; 26 25 26 + /// Global RISC-V specific initialization. 27 27 #[cold] 28 28 pub fn init() { 29 29 let supported = riscv::sbi::supported_extensions().unwrap(); 30 30 log::trace!("Supported SBI extensions: {supported:?}"); 31 31 32 - log::trace!("BOOT STACK {:?}", start::BOOT_STACK.0.as_ptr_range()); 33 - 34 32 vm::init(); 33 + } 35 34 36 - // TODO riscv64_mmu_early_init_percpu 35 + /// Early per-hart and RISC-V specific initialization. 36 + /// 37 + /// This function will be called before global initialization is done, notably this function 38 + /// cannot call logging functions, cannot allocate memory, cannot access hart-local state and should 39 + /// not panic as the panic handler is not initialized yet. 40 + #[cold] 41 + pub fn per_hart_init_early() { 42 + // Safety: register access 43 + unsafe { 44 + // enable counters 45 + scounteren::set_cy(); 46 + scounteren::set_tm(); 47 + scounteren::set_ir(); 48 + 49 + // Set the FPU state to initial 50 + sstatus::set_fs(FS::Initial); 51 + } 37 52 } 38 53 54 + /// Late per-hart and RISC-V specific initialization. 55 + /// 56 + /// This function will be called after all global initialization is done. 39 57 #[cold] 40 - pub fn per_hart_init() { 58 + pub fn per_hart_init_late() { 41 59 // Safety: register access 42 60 unsafe { 43 61 // Initialize the trap handler ··· 49 67 // Enable supervisor timer and external interrupts 50 68 sie::set_stie(); 51 69 sie::set_seie(); 52 - 53 - // enable counters 54 - scounteren::set_cy(); 55 - scounteren::set_tm(); 56 - scounteren::set_ir(); 57 - 58 - // Set the FPU state to initial 59 - sstatus::set_fs(FS::Initial); 60 70 } 61 71 } 62 72
-58
kernel/src/arch/riscv64/start.rs
··· 1 - // Copyright 2025 Jonas Kruckenberg 2 - // 3 - // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or 4 - // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or 5 - // http://opensource.org/licenses/MIT>, at your option. This file may not be 6 - // copied, modified, or distributed except according to those terms. 7 - 8 - use crate::arch::PAGE_SIZE; 9 - use crate::STACK_SIZE_PAGES; 10 - use core::arch::naked_asm; 11 - use loader_api::LoaderConfig; 12 - 13 - #[unsafe(link_section = ".bss.uninit")] 14 - pub static BOOT_STACK: Stack = Stack([0; STACK_SIZE_PAGES * PAGE_SIZE]); 15 - 16 - #[repr(C, align(128))] 17 - pub struct Stack(pub [u8; STACK_SIZE_PAGES * PAGE_SIZE]); 18 - 19 - #[used(linker)] 20 - #[unsafe(link_section = ".loader_config")] 21 - static LOADER_CONFIG: LoaderConfig = LoaderConfig::new_default(); 22 - 23 - #[unsafe(no_mangle)] 24 - #[naked] 25 - unsafe extern "C" fn _start(hartid: usize, boot_info: &'static loader_api::BootInfo) -> ! { 26 - // Safety: inline assembly 27 - unsafe { 28 - naked_asm! { 29 - // Setup the stack pointer 30 - "la t0, {boot_stack_start}", // set the stack pointer to the bottom of the stack 31 - "li t1, {boot_stack_size}", // load the stack size 32 - "add sp, t0, t1", // add both to get the top of the stack 33 - 34 - // Fill the stack with a canary pattern (0xACE0BACE) so that we can identify unused stack memory 35 - // in dumps & calculate stack usage. This is also really great (don't ask my why I know this) to identify 36 - // when we tried executing stack memory. 37 - "li t1, 0xACE0BACE", 38 - "1:", 39 - " sw t1, 0(t0)", // write the canary as u64 40 - " addi t0, t0, 8", // move to the next u64 41 - " bltu t0, sp, 1b", // loop until we reach the top of the stack 42 - 43 - // Call the rust entry point 44 - "call {start_rust}", 45 - 46 - // Loop forever. 47 - // `start_rust` should never return, but in case it does prevent the hart from executing 48 - // random code 49 - "2:", 50 - " wfi", 51 - " j 2b", 52 - 53 - boot_stack_start = sym BOOT_STACK, 54 - boot_stack_size = const STACK_SIZE_PAGES * PAGE_SIZE, 55 - start_rust = sym crate::main, 56 - } 57 - } 58 - }
+2 -2
kernel/src/arch/riscv64/trap_handler.rs
··· 27 27 .byte_add(TRAP_STACK_SIZE_PAGES * PAGE_SIZE) as *mut u8 28 28 }; 29 29 30 - log::debug!("setting sscratch to {:p}", trap_stack_top); 30 + log::trace!("setting sscratch to {:p}", trap_stack_top); 31 31 // Safety: inline assembly 32 32 unsafe { 33 33 asm!( ··· 36 36 ); 37 37 } 38 38 39 - log::debug!("setting trap vec to {:#x}", trap_vec as usize); 39 + log::trace!("setting trap vec to {:#x}", trap_vec as usize); 40 40 // Safety: register access 41 41 unsafe { stvec::write(trap_vec as usize, stvec::Mode::Vectored) }; 42 42 }
+24 -25
kernel/src/logger.rs
··· 5 5 // http://opensource.org/licenses/MIT>, at your option. This file may not be 6 6 // copied, modified, or distributed except according to those terms. 7 7 8 - use core::cell::RefCell; 9 - use core::fmt::Write; 8 + use core::cell::Cell; 10 9 use log::{LevelFilter, Metadata, Record}; 11 10 use thread_local::thread_local; 12 11 13 - /// Initializes the global logger with the semihosting logger. 14 - /// 15 - /// # Panics 16 - /// 17 - /// This function will panic if it is called more than once, or if another library has already initialized a global logger. 12 + thread_local!( 13 + static HARTID: Cell<usize> = Cell::new(usize::MAX); 14 + ); 15 + 18 16 pub fn init(lvl: LevelFilter) { 19 17 static LOGGER: Logger = Logger; 20 18 ··· 22 20 log::set_max_level(lvl); 23 21 } 24 22 25 - pub fn init_hart(hartid: usize) { 26 - STATE.with_borrow_mut(|state| state.1 = hartid); 23 + pub fn per_hart_init(hartid: usize) { 24 + HARTID.set(hartid); 27 25 } 28 - 29 - thread_local!( 30 - static STATE: RefCell<(riscv::hio::HostStream, usize)> = 31 - RefCell::new((riscv::hio::HostStream::new_stdout(), 0)); 32 - ); 33 26 34 27 struct Logger; 35 28 ··· 40 33 41 34 fn log(&self, record: &Record) { 42 35 if self.enabled(record.metadata()) { 43 - let _ = STATE.try_with(|state| { 44 - // Safety: state is always initialized 45 - let (stdout, hartid) = unsafe { &mut *state.as_ptr() }; 46 - let _ = stdout.write_fmt(format_args!( 47 - "[{:<5} HART {} {}] {}\n", 48 - record.level(), 49 - *hartid, 50 - record.module_path_static().unwrap_or_default(), 51 - record.args() 52 - )); 53 - }); 36 + print(format_args!( 37 + "[{:<5} HART {} {}] {}\n", 38 + record.level(), 39 + HARTID.get(), 40 + record.module_path_static().unwrap_or_default(), 41 + record.args() 42 + )); 54 43 } 55 44 } 56 45 57 46 fn flush(&self) {} 58 47 } 48 + 49 + fn print(args: core::fmt::Arguments) { 50 + cfg_if::cfg_if! { 51 + if #[cfg(any(target_arch = "riscv64", target_arch = "riscv32"))] { 52 + riscv::hio::_print(args); 53 + } else { 54 + compile_error!("unsupported target architecture"); 55 + } 56 + } 57 + }
+67 -84
kernel/src/main.rs
··· 38 38 use crate::time::Instant; 39 39 use crate::vm::bootstrap_alloc::BootstrapAllocator; 40 40 use arrayvec::ArrayVec; 41 - use core::alloc::Layout; 42 41 use core::cell::RefCell; 43 42 use core::range::Range; 44 - use core::{cmp, slice}; 45 - use loader_api::{BootInfo, MemoryRegionKind, TlsTemplate}; 46 - use sync::OnceLock; 43 + use loader_api::{BootInfo, LoaderConfig, MemoryRegionKind}; 44 + use sync::{Once, OnceLock}; 47 45 use thread_local::thread_local; 48 46 use vm::frame_alloc; 49 - use vm::{PhysicalAddress, VirtualAddress}; 47 + use vm::PhysicalAddress; 50 48 51 49 /// The log level for the kernel 52 50 pub const LOG_LEVEL: log::Level = log::Level::Trace; 53 51 /// The size of the stack in pages 54 - pub const STACK_SIZE_PAGES: usize = 128; // TODO find a lower more appropriate value 52 + pub const STACK_SIZE_PAGES: u32 = 128; // TODO find a lower more appropriate value 55 53 /// The size of the trap handler stack in pages 56 54 pub const TRAP_STACK_SIZE_PAGES: usize = 64; // TODO find a lower more appropriate value 57 55 /// The initial size of the kernel heap in pages. ··· 71 69 RefCell::new(HartLocalMachineInfo::default()); 72 70 ); 73 71 74 - fn main(hartid: usize, boot_info: &'static BootInfo) -> ! { 75 - // initialize a simple bump allocator for allocating memory before our virtual memory subsystem 76 - // is available 77 - let allocatable_memories = allocatable_memory_regions(boot_info); 78 - let mut boot_alloc = BootstrapAllocator::new(&allocatable_memories); 72 + #[used(linker)] 73 + #[unsafe(link_section = ".loader_config")] 74 + static LOADER_CONFIG: LoaderConfig = { 75 + let mut cfg = LoaderConfig::new_default(); 76 + cfg.kernel_stack_size_pages = STACK_SIZE_PAGES; 77 + cfg 78 + }; 79 79 80 - // initializing the global allocator 81 - allocator::init(&mut boot_alloc, boot_info); 80 + // | hart | stack | tls | 81 + // |------|----------------------------------------|----------------------------------------| 82 + // | 0 | 0xffffffc0c008c000..0xffffffc0c00a0000 | 0xffffffc040000000..0xffffffc0400400c2 | 83 + // | 1 | 0xffffffc0c0078000..0xffffffc0c008c000 | 0xffffffc0400400c2..0xffffffc040080184 | 84 + // | 2 | 0xffffffc0c0064000..0xffffffc0c0078000 | 0xffffffc040080184..0xffffffc0400c0246 | 85 + // | 3 | 0xffffffc0c0050000..0xffffffc0c0064000 | 0xffffffc0400c0246..0xffffffc040100308 | 86 + // | 4 | 0xffffffc0c003c000..0xffffffc0c0050000 | 0xffffffc040100308..0xffffffc0401403ca | 87 + // | 5 | 0xffffffc0c0028000..0xffffffc0c003c000 | 0xffffffc0401403ca..0xffffffc04018048c | 88 + // | 6 | 0xffffffc0c0014000..0xffffffc0c0028000 | 0xffffffc04018048c..0xffffffc0401c054e | 89 + // | 7 | 0xffffffc0c0000000..0xffffffc0c0014000 | 0xffffffc0401c054e..0xffffffc040200610 | 82 90 83 - // initialize the panic backtracing subsystem after the allocator has been set up 84 - // since setting up the symbolization context requires allocation 85 - panic::init(boot_info); 91 + #[unsafe(no_mangle)] 92 + fn _start(hartid: usize, boot_info: &'static BootInfo, boot_ticks: u64) -> ! { 93 + // initialize the hart local state of the logger before enabling it, so it is ready as soon as 94 + // logging is turned on 95 + logger::per_hart_init(hartid); 86 96 87 - // initialize thread-local storage 88 - // done after global allocator initialization since TLS destructors are registered in a heap 89 - // allocated Vec 90 - let tls = init_tls(&mut boot_alloc, &boot_info.tls_template); 97 + // perform EARLY per-hart, architecture-specific initialization 98 + // (e.g. resetting the FPU) 99 + arch::per_hart_init_early(); 91 100 92 - // initialize the logger 93 - // done after TLS initialization since we maintain per-hart host stdio channels 94 - logger::init_hart(hartid); 95 - logger::init(LOG_LEVEL.to_level_filter()); 101 + let fdt = locate_device_tree(boot_info); 96 102 97 - log::debug!("\n{boot_info}"); 98 - log::trace!("Allocatable memory regions: {allocatable_memories:?}"); 99 - log::trace!("Thread pointer: {tls:?}"); 103 + static SYNC: Once = Once::new(); 104 + SYNC.call_once(|| { 105 + // initialize the global logger as early as possible 106 + logger::init(LOG_LEVEL.to_level_filter()); 100 107 101 - // perform per-hart, architecture-specific initialization 102 - // (e.g. setting the trap vector and resetting the FPU) 103 - arch::per_hart_init(); 108 + // initialize a simple bump allocator for allocating memory before our virtual memory subsystem 109 + // is available 110 + let allocatable_memories = allocatable_memory_regions(boot_info); 111 + let mut boot_alloc = BootstrapAllocator::new(&allocatable_memories); 112 + 113 + // initializing the global allocator 114 + allocator::init(&mut boot_alloc, boot_info); 115 + 116 + // initialize the panic backtracing subsystem after the allocator has been set up 117 + // since setting up the symbolization context requires allocation 118 + panic::init(boot_info); 119 + 120 + // perform global, architecture-specific initialization 121 + arch::init(); 104 122 105 - // perform global, architecture-specific initialization 106 - arch::init(); 123 + // // TODO move this into a init function 124 + let minfo = MACHINE_INFO 125 + .get_or_try_init(|| { 126 + // Safety: we have to trust the loader mapped the fdt correctly 127 + unsafe { MachineInfo::from_dtb(fdt) } 128 + }) 129 + .unwrap(); 130 + log::debug!("\n{minfo}"); 107 131 108 - let fdt = locate_device_tree(boot_info); 132 + // initialize the global frame allocator 133 + frame_alloc::init(boot_alloc); 109 134 110 - // TODO move this into a init function 111 - let minfo = MACHINE_INFO 112 - .get_or_try_init(|| { 113 - // Safety: we have to trust the loader mapped the fdt correctly 114 - unsafe { MachineInfo::from_dtb(fdt) } 115 - }) 116 - .unwrap(); 117 - log::debug!("\n{minfo}"); 135 + // initialize the virtual memory subsystem 136 + vm::init(boot_info, minfo).unwrap(); 137 + }); 118 138 119 - // Safety: we have to trust the loader mapped the fdt correctly 139 + // // Safety: we have to trust the loader mapped the fdt correctly 120 140 let hart_local_minfo = unsafe { HartLocalMachineInfo::from_dtb(hartid, fdt).unwrap() }; 121 141 log::debug!("\n{hart_local_minfo}"); 122 142 HART_LOCAL_MACHINE_INFO.set(hart_local_minfo); 123 143 124 - frame_alloc::init(boot_alloc); 125 - 126 - // TODO init kernel address space (requires global allocator) 127 - 128 - vm::init(boot_info, minfo).unwrap(); 144 + // perform EARLY per-hart, architecture-specific initialization 145 + // (e.g. setting the trap vector and enabling interrupts) 146 + arch::per_hart_init_late(); 129 147 130 148 log::info!( 131 149 "Booted in ~{:?} ({:?} in k23)", 132 150 Instant::now().duration_since(Instant::ZERO), 133 - Instant::from_ticks(boot_info.boot_ticks).elapsed() 151 + Instant::from_ticks(boot_ticks).elapsed() 134 152 ); 153 + 135 154 // wasm::test(); 136 155 137 156 // - [all][global] parse cmdline ··· 154 173 } 155 174 156 175 arch::exit(0); 157 - } 158 - 159 - fn init_tls( 160 - boot_alloc: &mut BootstrapAllocator, 161 - maybe_tls_template: &Option<TlsTemplate>, 162 - ) -> Option<VirtualAddress> { 163 - if let Some(template) = &maybe_tls_template { 164 - let layout = 165 - Layout::from_size_align(template.mem_size, cmp::max(template.align, arch::PAGE_SIZE)) 166 - .unwrap(); 167 - let phys = boot_alloc.allocate_contiguous_zeroed(layout).unwrap(); 168 - 169 - // Use the phys_map to access the newly allocated TLS region 170 - let virt = VirtualAddress::from_phys(phys).unwrap(); 171 - 172 - if template.file_size != 0 { 173 - // Safety: We have to trust the loaders BootInfo here 174 - unsafe { 175 - let src: &[u8] = 176 - slice::from_raw_parts(template.start_addr as *const u8, template.file_size); 177 - let dst: &mut [u8] = 178 - slice::from_raw_parts_mut(virt.as_mut_ptr(), template.file_size); 179 - 180 - // sanity check to ensure our destination allocated memory is actually zeroed. 181 - // if it's not, that likely means we're about to override something important 182 - debug_assert!(dst.iter().all(|&x| x == 0)); 183 - 184 - dst.copy_from_slice(src); 185 - } 186 - } 187 - 188 - arch::set_thread_ptr(virt); 189 - Some(virt) 190 - } else { 191 - None 192 - } 193 176 } 194 177 195 178 /// Builds a list of memory regions from the boot info that are usable for allocation.
+63
libs/sync/src/barrier.rs
··· 1 + // Copyright 2025 Jonas Kruckenberg 2 + // 3 + // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or 4 + // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or 5 + // http://opensource.org/licenses/MIT>, at your option. This file may not be 6 + // copied, modified, or distributed except according to those terms. 7 + 8 + use crate::Mutex; 9 + use core::hint; 10 + 11 + pub struct Barrier { 12 + lock: Mutex<BarrierState>, 13 + num_threads: usize, 14 + } 15 + 16 + // The inner state of a double barrier 17 + struct BarrierState { 18 + count: usize, 19 + generation_id: usize, 20 + } 21 + 22 + pub struct BarrierWaitResult(bool); 23 + 24 + impl Barrier { 25 + pub const fn new(n: usize) -> Self { 26 + Self { 27 + lock: Mutex::new(BarrierState { 28 + count: 0, 29 + generation_id: 0, 30 + }), 31 + num_threads: n, 32 + } 33 + } 34 + 35 + pub fn wait(&self) -> BarrierWaitResult { 36 + let mut lock = self.lock.lock(); 37 + lock.count += 1; 38 + 39 + if lock.count < self.num_threads { 40 + // not the leader 41 + let local_gen = lock.generation_id; 42 + 43 + while local_gen == lock.generation_id && lock.count < self.num_threads { 44 + drop(lock); 45 + hint::spin_loop(); 46 + lock = self.lock.lock(); 47 + } 48 + BarrierWaitResult(false) 49 + } else { 50 + // this thread is the leader, 51 + // and is responsible for incrementing the generation 52 + lock.count = 0; 53 + lock.generation_id = lock.generation_id.wrapping_add(1); 54 + BarrierWaitResult(true) 55 + } 56 + } 57 + } 58 + 59 + impl BarrierWaitResult { 60 + pub fn is_leader(&self) -> bool { 61 + self.0 62 + } 63 + }
+2
libs/sync/src/lib.rs
··· 9 9 #![no_std] 10 10 #![cfg_attr(feature = "thread-local", feature(thread_local))] 11 11 12 + mod barrier; 12 13 mod lazy_lock; 13 14 mod once; 14 15 mod once_lock; ··· 20 21 pub use raw_mutex::RawMutex; 21 22 pub use raw_rwlock::RawRwLock; 22 23 24 + pub use barrier::{Barrier, BarrierWaitResult}; 23 25 pub use lazy_lock::LazyLock; 24 26 pub use once::Once; 25 27 pub use once_lock::OnceLock;
+3 -5
libs/sync/src/once.rs
··· 120 120 } 121 121 } 122 122 123 - fn wait(&self) { 124 - loop { 125 - if !self.poll() { 126 - core::hint::spin_loop(); 127 - } 123 + pub fn wait(&self) { 124 + while !self.poll() { 125 + core::hint::spin_loop(); 128 126 } 129 127 } 130 128 }
+1
loader/Cargo.toml
··· 11 11 [dependencies] 12 12 dtb-parser.workspace = true 13 13 loader-api.workspace = true 14 + sync.workspace = true 14 15 15 16 log.workspace = true 16 17 cfg-if.workspace = true
+12 -3
loader/api/src/config.rs
··· 12 12 #[repr(C)] 13 13 pub struct LoaderConfig { 14 14 magic: u32, 15 + /// The size of the stack that the loader should allocate for the kernel (in pages). 16 + /// 17 + /// The loader starts the kernel with a valid stack pointer. This setting defines 18 + /// the stack size that the loader should allocate and map. 19 + /// 20 + /// The stack is created with an additional guard page, so a stack overflow will lead to 21 + /// a page fault. 22 + pub kernel_stack_size_pages: u32, 15 23 } 16 24 17 25 impl LoaderConfig { 18 26 /// Creates a new default configuration with the following values: 19 27 /// 20 28 /// - `kernel_stack_size_pages`: 20 21 - /// - `kernel_heap_size_pages`: None 22 - /// - `memory_mode`: The default memory mode for the target architecture (Sv39 for Risc-V). 23 29 #[must_use] 24 30 pub const fn new_default() -> Self { 25 - Self { magic: CFG_MAGIC } 31 + Self { 32 + magic: CFG_MAGIC, 33 + kernel_stack_size_pages: 20, 34 + } 26 35 } 27 36 28 37 /// Asserts that the configuration is valid.
+2 -2
loader/api/src/info.rs
··· 12 12 #[derive(Debug)] 13 13 #[non_exhaustive] 14 14 pub struct BootInfo { 15 + pub hart_mask: usize, 15 16 /// A map of the physical memory regions of the underlying machine. 16 17 /// 17 18 /// The loader parses this information from the firmware and also reports regions used ··· 37 38 /// 38 39 /// This field can be used by the kernel to perform introspection of its own ELF file. 39 40 pub kernel_phys: Range<usize>, // PhysicalAddress 40 - pub boot_ticks: u64, 41 41 } 42 42 43 43 impl BootInfo { ··· 47 47 pub fn new(memory_regions: MemoryRegions) -> Self { 48 48 Self { 49 49 memory_regions, 50 + hart_mask: 0, 50 51 physical_address_offset: Default::default(), 51 52 physical_memory_map: Default::default(), 52 53 tls_template: None, 53 54 kernel_virt: Default::default(), 54 55 kernel_phys: Default::default(), 55 - boot_ticks: 0, 56 56 } 57 57 } 58 58 }
+2
loader/riscv64-qemu.ld
··· 45 45 __data_end = .; 46 46 } 47 47 48 + __stack_start = .; 49 + 48 50 /DISCARD/ : { 49 51 *(.comment*) 50 52 *(.gcc_except_table*)
+170 -33
loader/src/arch/riscv64.rs
··· 7 7 8 8 use crate::error::Error; 9 9 use crate::frame_alloc::FrameAllocator; 10 + use crate::machine_info::MachineInfo; 10 11 use crate::mapping::Flags; 12 + use crate::GlobalInitResult; 11 13 use bitflags::bitflags; 12 14 use core::arch::{asm, naked_asm}; 13 15 use core::fmt; 14 16 use core::num::NonZero; 15 17 use core::ptr::NonNull; 16 - use loader_api::BootInfo; 17 18 use riscv::satp; 18 19 19 20 pub const DEFAULT_ASID: u16 = 0; ··· 30 31 /// On `RiscV` targets the page table entry's physical address bits are shifted 2 bits to the right. 31 32 const PTE_PPN_SHIFT: usize = 2; 32 33 33 - const BOOT_STACK_SIZE: usize = 32 * PAGE_SIZE; 34 - 35 - #[unsafe(link_section = ".bss.uninit")] 36 - static BOOT_STACK: Stack = Stack([0; BOOT_STACK_SIZE]); 37 - 38 - #[repr(C, align(128))] 39 - struct Stack([u8; BOOT_STACK_SIZE]); 40 - 34 + /// Entry point for the initializing hart, this will set up the CPU environment for Rust and then 35 + /// transfer control to [`crate::main`]. 36 + /// 37 + /// For the entry point of all secondary harts see [`_start_secondary`]. 41 38 #[unsafe(link_section = ".text.start")] 42 39 #[unsafe(no_mangle)] 43 40 #[naked] ··· 49 46 "rdtime a2", 50 47 51 48 // Clear return address and frame pointer 52 - "mv ra, zero", 53 - "mv s0, zero", 49 + "mv ra, zero", 50 + "mv s0, zero", 54 51 55 52 // Clear the gp register in case anything tries to use it. 56 - "mv gp, zero", 53 + "mv gp, zero", 57 54 58 55 // Mask all interrupts in case the previous stage left them on. 59 - "csrc sstatus, 1 << 1", 60 - "csrw sie, zero", 56 + "csrc sstatus, 1 << 1", 57 + "csrw sie, zero", 61 58 62 59 // Reset the trap vector in case the previous stage left one installed. 63 - "csrw stvec, zero", 60 + "csrw stvec, zero", 64 61 65 62 // Disable the MMU in case it was left on. 66 - "csrw satp, zero", 63 + "csrw satp, zero", 67 64 68 65 // Setup the stack pointer 69 - "la t0, {boot_stack_start}", // set the stack pointer to the bottom of the stack 70 - "li t1, {boot_stack_size}", // load the stack size 71 - "add sp, t0, t1", // add both to get the top of the stack 66 + "la t0, __stack_start", // set the stack pointer to the bottom of the stack 67 + "li t1, {stack_size}", // load the stack size 68 + "mul sp, a0, t1", // multiply the stack size by the hart id to get the relative stack bottom offset 69 + "add t0, t0, sp", // add the relative stack bottom offset to the absolute stack region offset to get 70 + // the absolute stack bottom 71 + "add sp, t0, t1", // add one stack size again to get to the top of the stack. This is our final stack pointer. 72 + 73 + // fill stack with canary pattern 74 + // $sp is set to stack top above, $t0 as well 75 + "call {fill_stack}", 76 + 77 + // Clear .bss. The linker script ensures these are aligned to 16 bytes. 78 + "lla a3, __bss_zero_start", 79 + "lla a4, __bss_end", 80 + "0:", 81 + " sd zero, (a3)", 82 + " sd zero, 8(a3)", 83 + " add a3, a3, 16", 84 + " blt a3, a4, 0b", 85 + 86 + // Call the rust entry point 87 + "call {start_rust}", 88 + 89 + // Loop forever. 90 + // `start_rust` should never return, but in case it does prevent the hart from executing 91 + // random code 92 + "2:", 93 + " wfi", 94 + " j 2b", 95 + 96 + stack_size = const crate::STACK_SIZE, 97 + start_rust = sym crate::main, 98 + fill_stack = sym fill_stack 99 + } 100 + } 101 + } 102 + 103 + /// Entry point for all secondary harts, this is essentially the same as [`_start`] but it doesn't 104 + /// attempt to zero out the BSS. 105 + /// 106 + /// It will however transfer control to the common [`crate::main`] routine. 107 + #[naked] 108 + unsafe extern "C" fn _start_secondary() -> ! { 109 + // Safety: inline assembly 110 + unsafe { 111 + naked_asm! { 112 + // read boot time stamp as early as possible 113 + "rdtime a2", 114 + 115 + // Clear return address and frame pointer 116 + "mv ra, zero", 117 + "mv s0, zero", 118 + 119 + // Clear the gp register in case anything tries to use it. 120 + "mv gp, zero", 121 + 122 + // Mask all interrupts in case the previous stage left them on. 123 + "csrc sstatus, 1 << 1", 124 + "csrw sie, zero", 125 + 126 + // Reset the trap vector in case the previous stage left one installed. 127 + "csrw stvec, zero", 128 + 129 + // Disable the MMU in case it was left on. 130 + "csrw satp, zero", 131 + 132 + // Setup the stack pointer 133 + "la t0, __stack_start", // set the stack pointer to the bottom of the stack 134 + "li t1, {stack_size}", // load the stack size 135 + "mul sp, a0, t1", // multiply the stack size by the hart id to get the relative stack bottom offset 136 + "add t0, t0, sp", // add the relative stack bottom offset to the absolute stack region offset to get 137 + // the absolute stack bottom 138 + "add sp, t0, t1", // add one stack size again to get to the top of the stack. This is our final stack pointer. 72 139 73 - // Fill the stack with a canary pattern (0xACE0BACE) so that we can identify unused stack memory 74 - // in dumps & calculate stack usage. This is also really great (don't ask my why I know this) to identify 75 - // when we tried executing stack memory. 76 - "li t1, 0xACE0BACE", 77 - "1:", 78 - " sw t1, 0(t0)", // write the canary as u64 79 - " addi t0, t0, 8", // move to the next u64 80 - " bltu t0, sp, 1b", // loop until we reach the top of the stack 140 + // fill stack with canary pattern 141 + // $sp is set to stack top above, $t0 as well 142 + "call {fill_stack}", 81 143 82 144 // Call the rust entry point 83 145 "call {start_rust}", ··· 89 151 " wfi", 90 152 " j 2b", 91 153 92 - boot_stack_start = sym BOOT_STACK, 93 - boot_stack_size = const BOOT_STACK_SIZE, 154 + stack_size = const crate::STACK_SIZE, 94 155 start_rust = sym crate::main, 156 + fill_stack = sym fill_stack 95 157 } 96 158 } 97 159 } 98 160 99 - pub unsafe fn handoff_to_kernel(hartid: usize, boot_info: *mut BootInfo, entry: usize) -> ! { 161 + /// Fill the stack with a canary pattern (0xACE0BACE) so that we can identify unused stack memory 162 + /// in dumps & calculate stack usage. This is also really great (don't ask my why I know this) to identify 163 + /// when we tried executing stack memory. 164 + /// 165 + /// # Safety 166 + /// 167 + /// expects the bottom of the stack in `t0` and the top of stack in `sp` 168 + #[naked] 169 + unsafe extern "C" fn fill_stack() { 170 + // Safety: inline assembly 171 + unsafe { 172 + naked_asm! { 173 + // Fill the stack with a canary pattern (0xACE0BACE) so that we can identify unused stack memory 174 + // in dumps & calculate stack usage. This is also really great (don't ask my why I know this) to identify 175 + // when we tried executing stack memory. 176 + "li t1, 0xACE0BACE", 177 + "1:", 178 + " sw t1, 0(t0)", // write the canary as u64 179 + " addi t0, t0, 8", // move to the next u64 180 + " bltu t0, sp, 1b", // loop until we reach the top of the stack 181 + "ret" 182 + } 183 + } 184 + } 185 + 186 + /// This will hand off control over this CPU to the kernel. This is the last function executed in 187 + /// the loader and will never return. 188 + pub unsafe fn handoff_to_kernel(hartid: usize, boot_ticks: u64, init: &GlobalInitResult) -> ! { 189 + let stack = init.stacks_alloc.region_for_hart(hartid); 190 + let tls = init 191 + .maybe_tls_alloc 192 + .as_ref() 193 + .map(|tls| tls.region_for_hart(hartid)) 194 + .unwrap_or_default(); 195 + 100 196 log::debug!("Hart {hartid} Jumping to kernel..."); 101 - log::trace!("Hart {hartid} entry: {entry:#x}, arguments: a0={hartid} a1={boot_info:?}"); 197 + log::trace!("Hart {hartid} entry: {:#x}, arguments: a0={hartid} a1={:?} stack={stack:#x?} tls={tls:#x?}", init.kernel_entry, init.boot_info); 198 + 199 + // Synchronize all harts before jumping to the kernel. 200 + // Technically this isn't really necessary, but debugging output gets horribly mangled if we don't 201 + // and that's terrible for this critical transition 202 + init.barrier.wait(); 102 203 103 204 // Safety: inline assembly 104 205 unsafe { 105 206 asm! { 207 + "mv sp, {stack_top}", // Set the kernel stack ptr 208 + "mv tp, {tls_start}", // Set the kernel thread ptr 209 + 210 + // fill stack with canary pattern 211 + // $sp is set to stack top above, $t0 is set to stack bottom by the asm args below 212 + "call {fill_stack}", 213 + 106 214 "mv ra, zero", // Reset return address 107 215 108 216 "jalr zero, {kernel_entry}", ··· 114 222 " wfi", 115 223 " j 1b", 116 224 in("a0") hartid, 117 - in("a1") boot_info, 118 - kernel_entry = in(reg) entry, 225 + in("a1") init.boot_info, 226 + in("a2") boot_ticks, 227 + in("t0") stack.start, 228 + stack_top = in(reg) stack.end, 229 + tls_start = in(reg) tls.start, 230 + kernel_entry = in(reg) init.kernel_entry, 231 + fill_stack = sym fill_stack, 119 232 options(noreturn) 120 233 } 121 234 } 235 + } 236 + 237 + /// Start all secondary harts on the system as reported by [`MachineInfo`]. 238 + pub fn start_secondary_harts(boot_hart: usize, minfo: &MachineInfo) -> crate::Result<()> { 239 + let start = minfo.hart_mask.trailing_zeros() as usize; 240 + let end = (usize::BITS - minfo.hart_mask.leading_zeros()) as usize; 241 + log::trace!("{start}..{end}"); 242 + 243 + for hartid in start..end { 244 + // Don't try to start ourselves 245 + if hartid == boot_hart { 246 + continue; 247 + } 248 + 249 + log::trace!("[{boot_hart}] starting hart {hartid}..."); 250 + riscv::sbi::hsm::start_hart( 251 + hartid, 252 + _start_secondary as usize, 253 + minfo.fdt.as_ptr() as usize, 254 + ) 255 + .map_err(Error::FailedToStartSecondaryHart)?; 256 + } 257 + 258 + Ok(()) 122 259 } 123 260 124 261 pub unsafe fn map_contiguous(
+2 -2
loader/src/boot_info.rs
··· 24 24 loader_phys: Range<usize>, 25 25 kernel_phys: Range<usize>, 26 26 fdt_phys: Range<usize>, 27 - boot_ticks: u64, 27 + hart_mask: usize, 28 28 ) -> crate::Result<*mut BootInfo> { 29 29 let frame = frame_alloc 30 30 .allocate_contiguous_zeroed( ··· 42 42 boot_info.tls_template = maybe_tls_template; 43 43 boot_info.kernel_virt = kernel_virt; 44 44 boot_info.kernel_phys = kernel_phys; 45 - boot_info.boot_ticks = boot_ticks; 45 + boot_info.hart_mask = hart_mask; 46 46 47 47 let boot_info_ptr = page as *mut BootInfo; 48 48 // Safety: we just allocated the boot info frame
+7
loader/src/error.rs
··· 17 17 Elf(&'static str), 18 18 /// The system was not able to allocate memory needed for the operation. 19 19 NoMemory, 20 + /// Failed to start secondary hart 21 + #[cfg(any(target_arch = "riscv64", target_arch = "riscv32"))] 22 + FailedToStartSecondaryHart(riscv::sbi::Error), 20 23 } 21 24 22 25 impl From<core::num::TryFromIntError> for Error { ··· 41 44 Error::TryFromInt(_) => write!(f, "Failed to convert number"), 42 45 Error::Dtb(err) => write!(f, "Failed to parse device tree blob: {err}"), 43 46 Error::Elf(err) => write!(f, "Failed to parse kernel elf: {err}"), 47 + #[cfg(any(target_arch = "riscv64", target_arch = "riscv32"))] 48 + Error::FailedToStartSecondaryHart(err) => { 49 + write!(f, "Failed to start secondary hart: {err}") 50 + } 44 51 } 45 52 } 46 53 }
+79 -4
loader/src/machine_info.rs
··· 9 9 use crate::mapping::{align_down, checked_align_up}; 10 10 use arrayvec::ArrayVec; 11 11 use core::cmp::Ordering; 12 - use core::ffi::c_void; 12 + use core::ffi::{c_void, CStr}; 13 13 use core::fmt; 14 14 use core::fmt::Formatter; 15 15 use core::range::Range; ··· 25 25 pub memories: ArrayVec<Range<usize>, 16>, 26 26 /// The RNG seed passed to us by the previous stage loader. 27 27 pub rng_seed: Option<&'dt [u8]>, 28 + /// A bitfield where each bit corresponds to a CPU in the system. 29 + /// A `1` bit indicates the CPU is "online" and can be used, 30 + /// while a `0` bit indicates the CPU is "offline" and can't be used by the system. 31 + /// This is used across SBI calls to dispatch IPIs to the correct CPUs. 32 + pub hart_mask: usize, 28 33 } 29 34 30 35 impl MachineInfo<'_> { ··· 40 45 let mut info = MachineInfo { 41 46 fdt: fdt_slice, 42 47 memories: v.memory_regions, 43 - rng_seed: v.chosen_visitor.rng_seed, 48 + rng_seed: v.chosen.rng_seed, 49 + hart_mask: v.cpus.hart_mask, 44 50 }; 45 51 46 52 let mut exclude_region = |entry: Range<usize>| { ··· 144 150 } else { 145 151 writeln!(f, "{:<17} : None", "PRNG SEED")?; 146 152 } 153 + writeln!(f, "{:<17} : {:b}", "HART MASK", self.hart_mask)?; 147 154 148 155 for (idx, r) in self.memories.iter().enumerate() { 149 156 writeln!(f, "MEMORY REGION {:<4}: {}..{}", idx, r.start, r.end)?; ··· 164 171 #[derive(Default)] 165 172 struct MachineInfoVisitor<'dt> { 166 173 reservations: ReservationsVisitor<'dt>, 167 - chosen_visitor: ChosenVisitor<'dt>, 174 + chosen: ChosenVisitor<'dt>, 175 + cpus: CpusVisitor, 168 176 memory_regions: ArrayVec<Range<usize>, 16>, 169 177 address_size: usize, 170 178 width_size: usize, ··· 187 195 } else if name == "reserved-memory" { 188 196 node.visit(&mut self.reservations)?; 189 197 } else if name == "chosen" { 190 - node.visit(&mut self.chosen_visitor)?; 198 + node.visit(&mut self.chosen)?; 199 + } else if name == "cpus" { 200 + node.visit(&mut self.cpus)?; 191 201 } 192 202 193 203 Ok(()) ··· 370 380 Ok(()) 371 381 } 372 382 } 383 + 384 + #[derive(Default)] 385 + struct CpusVisitor { 386 + hart_mask: usize, 387 + } 388 + 389 + impl CpusVisitor { 390 + fn cpu_visitor(&self) -> CpuVisitor { 391 + CpuVisitor::default() 392 + } 393 + } 394 + 395 + impl<'dt> Visitor<'dt> for CpusVisitor { 396 + type Error = dtb_parser::Error; 397 + 398 + fn visit_subnode(&mut self, name: &'dt str, node: Node<'dt>) -> Result<(), Self::Error> { 399 + if name.starts_with("cpu@") { 400 + let mut v = self.cpu_visitor(); 401 + node.visit(&mut v)?; 402 + let (hartid, enabled) = v.result(); 403 + 404 + if enabled { 405 + self.hart_mask |= 1 << hartid; 406 + } 407 + } 408 + 409 + Ok(()) 410 + } 411 + } 412 + 413 + #[derive(Default)] 414 + struct CpuVisitor<'dt> { 415 + status: Option<&'dt CStr>, 416 + hartid: usize, 417 + } 418 + 419 + impl CpuVisitor<'_> { 420 + fn result(self) -> (usize, bool) { 421 + let enabled = self.status.unwrap() != c"disabled"; 422 + 423 + (self.hartid, enabled) 424 + } 425 + } 426 + 427 + impl<'dt> Visitor<'dt> for CpuVisitor<'dt> { 428 + type Error = dtb_parser::Error; 429 + 430 + fn visit_reg(&mut self, reg: &'dt [u8]) -> Result<(), Self::Error> { 431 + self.hartid = match reg.len() { 432 + 4 => usize::try_from(u32::from_be_bytes(reg.try_into()?))?, 433 + 8 => usize::try_from(u64::from_be_bytes(reg.try_into()?))?, 434 + _ => unreachable!(), 435 + }; 436 + 437 + Ok(()) 438 + } 439 + 440 + fn visit_property(&mut self, name: &'dt str, value: &'dt [u8]) -> Result<(), Self::Error> { 441 + if name == "status" { 442 + self.status = Some(CStr::from_bytes_until_nul(value)?); 443 + } 444 + 445 + Ok(()) 446 + } 447 + }
+68 -23
loader/src/main.rs
··· 10 10 #![feature(naked_functions)] 11 11 #![feature(new_range_api)] 12 12 #![feature(maybe_uninit_slice)] 13 + #![feature(alloc_layout_extra)] 13 14 14 15 use crate::boot_info::prepare_boot_info; 15 16 use crate::error::Error; 16 17 use crate::frame_alloc::FrameAllocator; 17 18 use crate::kernel::{parse_kernel, INLINED_KERNEL_BYTES}; 18 19 use crate::machine_info::MachineInfo; 19 - use crate::mapping::{identity_map_self, map_kernel, map_physical_memory}; 20 + use crate::mapping::{ 21 + identity_map_self, map_kernel, map_kernel_stacks, map_physical_memory, StacksAllocation, 22 + TlsAllocation, 23 + }; 20 24 use arrayvec::ArrayVec; 21 25 use core::alloc::Layout; 22 26 use core::ffi::c_void; 23 27 use core::range::Range; 24 28 use core::{ptr, slice}; 29 + use sync::{Barrier, OnceLock}; 25 30 26 31 mod arch; 27 32 mod boot_info; ··· 36 41 37 42 pub const ENABLE_KASLR: bool = false; 38 43 pub const LOG_LEVEL: log::Level = log::Level::Trace; 44 + pub const STACK_SIZE: usize = 32 * arch::PAGE_SIZE; 39 45 40 46 pub type Result<T> = core::result::Result<T, Error>; 41 47 ··· 43 49 /// 44 50 /// The passed `opaque` ptr must point to a valid memory region. 45 51 unsafe fn main(hartid: usize, opaque: *const c_void, boot_ticks: u64) -> ! { 46 - // zero out the BSS section 47 - unsafe extern "C" { 48 - static mut __bss_zero_start: u64; 49 - static mut __bss_end: u64; 50 - } 51 - // Safety: Zero BSS section 52 + static GLOBAL_INIT: OnceLock<GlobalInitResult> = OnceLock::new(); 53 + let res = GLOBAL_INIT.get_or_init(|| do_global_init(hartid, opaque)); 54 + 55 + // Enable the MMU on all harts. Note that this technically reenables it on the initializing hart 56 + // but there is no harm in that. 57 + // Safety: there is no safety 52 58 unsafe { 53 - let mut ptr = &raw mut __bss_zero_start; 54 - let end = &raw mut __bss_end; 55 - while ptr < end { 56 - ptr.write_volatile(0); 57 - ptr = ptr.offset(1); 58 - } 59 + log::trace!("activating MMU..."); 60 + arch::activate_aspace(res.root_pgtable); 61 + log::trace!("activated."); 59 62 } 60 63 64 + if let Some(alloc) = &res.maybe_tls_alloc { 65 + alloc.initialize_for_hart(hartid); 66 + } 67 + 68 + // Safety: this will jump to the kernel entry 69 + unsafe { arch::handoff_to_kernel(hartid, boot_ticks, res) } 70 + } 71 + 72 + pub struct GlobalInitResult { 73 + boot_info: *mut loader_api::BootInfo, 74 + kernel_entry: usize, 75 + root_pgtable: usize, 76 + stacks_alloc: StacksAllocation, 77 + maybe_tls_alloc: Option<TlsAllocation>, 78 + barrier: Barrier, 79 + } 80 + 81 + // Safety: *mut BootInfo isn't Send but `GlobalInitResult` will only ever we read from, so this is fine. 82 + unsafe impl Send for GlobalInitResult {} 83 + // Safety: *mut BootInfo isn't Send but `GlobalInitResult` will only ever we read from, so this is fine. 84 + unsafe impl Sync for GlobalInitResult {} 85 + 86 + fn do_global_init(hartid: usize, opaque: *const c_void) -> GlobalInitResult { 61 87 logger::init(LOG_LEVEL.to_level_filter()); 62 - 63 88 // Safety: TODO 64 89 let minfo = unsafe { MachineInfo::from_dtb(opaque).expect("failed to parse machine info") }; 65 90 log::debug!("\n{minfo}"); 66 91 67 - let self_regions = SelfRegions::collect(); 92 + arch::start_secondary_harts(hartid, &minfo).unwrap(); 93 + 94 + let self_regions = SelfRegions::collect(&minfo); 68 95 log::debug!("{self_regions:#x?}"); 69 96 70 97 // Initialize the frame allocator ··· 121 148 // print the elf sections for debugging purposes 122 149 log::debug!("\n{kernel}"); 123 150 124 - let (kernel_virt, maybe_tls_template) = map_kernel( 151 + let (kernel_virt, maybe_tls_alloc) = map_kernel( 125 152 root_pgtable, 126 153 &mut frame_alloc, 127 154 &mut page_alloc, 128 155 &kernel, 156 + &minfo, 129 157 phys_off, 130 158 ) 131 159 .unwrap(); 132 160 133 161 log::trace!("KASLR: Kernel image at {:#x}", kernel_virt.start); 134 162 163 + let stacks_alloc = map_kernel_stacks( 164 + root_pgtable, 165 + &mut frame_alloc, 166 + &mut page_alloc, 167 + &minfo, 168 + usize::try_from(kernel._loader_config.kernel_stack_size_pages).unwrap(), 169 + phys_off, 170 + ) 171 + .unwrap(); 172 + 135 173 let frame_usage = frame_alloc.frame_usage(); 136 174 log::debug!( 137 175 "Mapping complete, permanently used {} KiB.", ··· 143 181 phys_off, 144 182 phys_map, 145 183 kernel_virt, 146 - maybe_tls_template, 184 + maybe_tls_alloc.as_ref().map(|alloc| alloc.template.clone()), 147 185 Range::from(self_regions.executable.start..self_regions.read_write.end), 148 186 kernel_phys, 149 187 fdt_phys, 150 - boot_ticks, 188 + minfo.hart_mask, 151 189 ) 152 190 .unwrap(); 153 191 ··· 156 194 .checked_add(usize::try_from(kernel.elf_file.header.pt2.entry_point()).unwrap()) 157 195 .unwrap(); 158 196 159 - // Safety: this will jump to the kernel entry 160 - unsafe { arch::handoff_to_kernel(hartid, boot_info, kernel_entry) } 197 + GlobalInitResult { 198 + boot_info, 199 + kernel_entry, 200 + root_pgtable, 201 + maybe_tls_alloc, 202 + stacks_alloc, 203 + barrier: Barrier::new(minfo.hart_mask.count_ones() as usize), 204 + } 161 205 } 162 206 163 207 #[derive(Debug)] ··· 168 212 } 169 213 170 214 impl SelfRegions { 171 - pub fn collect() -> Self { 215 + pub fn collect(minfo: &MachineInfo) -> Self { 172 216 unsafe extern "C" { 173 217 static __text_start: u8; 174 218 static __text_end: u8; 175 219 static __rodata_start: u8; 176 220 static __rodata_end: u8; 177 221 static __bss_start: u8; 178 - static __data_end: u8; 222 + static __stack_start: u8; 179 223 } 180 224 181 225 SelfRegions { ··· 189 233 }, 190 234 read_write: Range { 191 235 start: &raw const __bss_start as usize, 192 - end: &raw const __data_end as usize, 236 + end: (&raw const __stack_start as usize) 237 + + (minfo.hart_mask.count_ones() as usize * STACK_SIZE), 193 238 }, 194 239 } 195 240 }
+167 -12
loader/src/mapping.rs
··· 13 13 use crate::{arch, SelfRegions}; 14 14 use bitflags::bitflags; 15 15 use core::alloc::Layout; 16 - use core::num::NonZeroUsize; 16 + use core::num::{NonZero, NonZeroUsize}; 17 17 use core::range::Range; 18 - use core::{ptr, slice}; 18 + use core::{cmp, ptr, slice}; 19 19 use loader_api::TlsTemplate; 20 20 use xmas_elf::dynamic::Tag; 21 21 use xmas_elf::program::{SegmentData, Type}; ··· 142 142 frame_alloc: &mut FrameAllocator, 143 143 page_alloc: &mut PageAllocator, 144 144 kernel: &Kernel, 145 + minfo: &MachineInfo, 145 146 phys_off: usize, 146 - ) -> crate::Result<(Range<usize>, Option<TlsTemplate>)> { 147 + ) -> crate::Result<(Range<usize>, Option<TlsAllocation>)> { 147 148 let kernel_virt = page_alloc.allocate( 148 149 Layout::from_size_align( 149 150 usize::try_from(kernel.mem_size())?, ··· 173 174 )?, 174 175 Type::Tls => { 175 176 let ph = ProgramHeader::try_from(ph)?; 176 - let old = maybe_tls_allocation.replace(TlsTemplate { 177 - start_addr: kernel_virt.start.checked_add(ph.virtual_address).unwrap(), 178 - mem_size: ph.mem_size, 179 - file_size: ph.file_size, 180 - align: ph.align, 181 - }); 177 + let old = maybe_tls_allocation.replace(handle_tls_segment( 178 + root_pgtable, 179 + frame_alloc, 180 + page_alloc, 181 + &ph, 182 + kernel_virt.start, 183 + minfo, 184 + phys_off, 185 + )?); 182 186 log::trace!("{maybe_tls_allocation:?}"); 183 187 assert!(old.is_none(), "multiple TLS segments not supported"); 184 188 } ··· 224 228 ) -> crate::Result<()> { 225 229 let flags = flags_for_segment(ph); 226 230 227 - log::debug!( 231 + log::trace!( 228 232 "Handling Segment: LOAD off {offset:#016x} vaddr {vaddr:#016x} align {align} filesz {filesz:#016x} memsz {memsz:#016x} flags {flags:?}", 229 233 offset = ph.offset, 230 234 vaddr = ph.virtual_address, ··· 306 310 307 311 let data_bytes_before_zero = zero_start & 0xfff; 308 312 309 - log::debug!( 313 + log::trace!( 310 314 "handling BSS {:#x?}, data bytes before {data_bytes_before_zero}", 311 315 zero_start..zero_end 312 316 ); ··· 342 346 data_bytes_before_zero, 343 347 ); 344 348 345 - log::debug!("copying {data_bytes_before_zero} bytes from {src:p} to {dst:p}..."); 349 + log::trace!("copying {data_bytes_before_zero} bytes from {src:p} to {dst:p}..."); 346 350 ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), dst.len()); 347 351 } 348 352 ··· 459 463 } 460 464 } 461 465 _ => unimplemented!("unsupported relocation type {}", rela.get_type()), 466 + } 467 + } 468 + 469 + /// Map the kernel thread-local storage (TLS) memory regions. 470 + fn handle_tls_segment( 471 + root_pgtable: usize, 472 + frame_alloc: &mut FrameAllocator, 473 + page_alloc: &mut PageAllocator, 474 + ph: &ProgramHeader, 475 + virt_base: usize, 476 + minfo: &MachineInfo, 477 + phys_off: usize, 478 + ) -> crate::Result<TlsAllocation> { 479 + let layout = Layout::from_size_align(ph.mem_size, cmp::max(ph.align, arch::PAGE_SIZE)) 480 + .unwrap() 481 + .repeat(minfo.hart_mask.count_ones() as usize) 482 + .unwrap() 483 + .0 484 + .pad_to_align(); 485 + 486 + let phys = frame_alloc 487 + .allocate_contiguous_zeroed(layout, phys_off) 488 + .unwrap(); 489 + let virt = page_alloc.allocate(layout); 490 + 491 + log::trace!("Mapping TLS region {virt:#x?}..."); 492 + // Safety: Leaving the address space in an invalid state here is fine since on panic we'll 493 + // abort startup anyway 494 + unsafe { 495 + arch::map_contiguous( 496 + root_pgtable, 497 + frame_alloc, 498 + virt.start, 499 + phys, 500 + NonZero::new(layout.size()).unwrap(), 501 + Flags::READ | Flags::WRITE, 502 + phys_off, 503 + )?; 504 + } 505 + log::trace!("here"); 506 + 507 + Ok(TlsAllocation { 508 + virt, 509 + template: TlsTemplate { 510 + start_addr: virt_base + ph.virtual_address, 511 + mem_size: ph.mem_size, 512 + file_size: ph.file_size, 513 + align: ph.align, 514 + }, 515 + }) 516 + } 517 + 518 + #[derive(Debug)] 519 + pub struct TlsAllocation { 520 + /// The TLS region in virtual memory 521 + virt: Range<usize>, 522 + /// The template we allocated for 523 + pub template: TlsTemplate, 524 + } 525 + 526 + impl TlsAllocation { 527 + pub fn region_for_hart(&self, hartid: usize) -> Range<usize> { 528 + let aligned_size = checked_align_up( 529 + self.template.mem_size, 530 + cmp::max(self.template.align, arch::PAGE_SIZE), 531 + ) 532 + .unwrap(); 533 + let start = self.virt.start + (aligned_size * hartid); 534 + 535 + Range::from(start..start + self.template.mem_size) 536 + } 537 + 538 + pub fn initialize_for_hart(&self, hartid: usize) { 539 + if self.template.file_size != 0 { 540 + // Safety: We have to trust the loaders BootInfo here 541 + unsafe { 542 + let src: &[u8] = slice::from_raw_parts( 543 + self.template.start_addr as *const u8, 544 + self.template.file_size, 545 + ); 546 + let dst: &mut [u8] = slice::from_raw_parts_mut( 547 + self.region_for_hart(hartid).start as *mut u8, 548 + self.template.file_size, 549 + ); 550 + 551 + // sanity check to ensure our destination allocated memory is actually zeroed. 552 + // if it's not, that likely means we're about to override something important 553 + debug_assert!(dst.iter().all(|&x| x == 0)); 554 + 555 + dst.copy_from_slice(src); 556 + } 557 + } 558 + } 559 + } 560 + 561 + pub fn map_kernel_stacks( 562 + root_pgtable: usize, 563 + frame_alloc: &mut FrameAllocator, 564 + page_alloc: &mut PageAllocator, 565 + minfo: &MachineInfo, 566 + per_hart_size_pages: usize, 567 + phys_off: usize, 568 + ) -> crate::Result<StacksAllocation> { 569 + let per_hart_size = per_hart_size_pages * arch::PAGE_SIZE; 570 + let layout = Layout::from_size_align(per_hart_size, arch::PAGE_SIZE) 571 + .unwrap() 572 + .repeat(minfo.hart_mask.count_ones() as usize) 573 + .unwrap() 574 + .0; 575 + 576 + log::trace!("Allocating stack region {layout:?}..."); 577 + 578 + // The stacks region doesn't need to be zeroed, since we will be filling it with 579 + // the canary pattern anyway 580 + let phys = frame_alloc 581 + .allocate_contiguous(layout) 582 + .ok_or(Error::NoMemory)?; 583 + let virt = page_alloc.allocate(layout); 584 + 585 + log::trace!("Mapping stack region {virt:#x?}..."); 586 + // Safety: Leaving the address space in an invalid state here is fine since on panic we'll 587 + // abort startup anyway 588 + unsafe { 589 + arch::map_contiguous( 590 + root_pgtable, 591 + frame_alloc, 592 + virt.start, 593 + phys, 594 + NonZero::new(layout.size()).unwrap(), 595 + Flags::READ | Flags::WRITE, 596 + phys_off, 597 + )?; 598 + } 599 + 600 + Ok(StacksAllocation { 601 + virt, 602 + per_hart_size, 603 + }) 604 + } 605 + 606 + pub struct StacksAllocation { 607 + /// The TLS region in virtual memory 608 + virt: Range<usize>, 609 + per_hart_size: usize, 610 + } 611 + 612 + impl StacksAllocation { 613 + pub fn region_for_hart(&self, hartid: usize) -> Range<usize> { 614 + let end = self.virt.end - (self.per_hart_size * hartid); 615 + 616 + Range::from((end - self.per_hart_size)..end) 462 617 } 463 618 } 464 619