Next Generation WASM Microkernel Operating System
at trap_handler 503 lines 21 kB view raw
1// Copyright 2025 Jonas Kruckenberg 2// 3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or 4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or 5// http://opensource.org/licenses/MIT>, at your option. This file may not be 6// copied, modified, or distributed except according to those terms. 7 8use crate::wasm::cranelift::code_translator::Reachability; 9use crate::wasm::cranelift::env::TranslationEnvironment; 10use crate::wasm::cranelift::utils::index_type_to_ir_type; 11use crate::wasm::translate::IndexType; 12use crate::wasm::trap::TRAP_HEAP_MISALIGNED; 13use cranelift_codegen::cursor::{Cursor, FuncCursor}; 14use cranelift_codegen::ir; 15use cranelift_codegen::ir::InstBuilder; 16use cranelift_codegen::ir::condcodes::IntCC; 17use cranelift_codegen::ir::{Expr, Fact, MemFlags, RelSourceLoc, TrapCode, Type, Value}; 18use cranelift_frontend::FunctionBuilder; 19use wasmparser::MemArg; 20 21#[derive(Debug, Clone)] 22pub struct CraneliftMemory { 23 /// The address of the start of the heap's storage. 24 pub base_gv: ir::GlobalValue, 25 /// The index type for the heap. 26 pub index_type: IndexType, 27 /// The memory type for the pointed-to memory, if using proof-carrying code. 28 pub memory_type: Option<ir::MemoryType>, 29 /// Heap bound in bytes. The offset-guard pages are allocated after the 30 /// bound. 31 pub bound: u64, 32 /// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` 33 /// don't need bounds checking. 34 pub min_size: u64, 35 /// The maximum heap size in bytes. 36 /// 37 /// Heap accesses larger than this will always trap. 38 pub max_size: Option<u64>, 39 /// Size in bytes of the offset-guard pages following the heap. 40 pub offset_guard_size: u64, 41 /// The log2 of this memory's page size. 42 pub page_size_log2: u8, 43} 44 45impl CraneliftMemory { 46 /// Returns `None` when the Wasm access will unconditionally trap. 47 /// 48 /// Returns `(flags, wasm_addr, native_addr)`. 49 pub fn prepare_addr( 50 &self, 51 builder: &mut FunctionBuilder, 52 index: Value, 53 access_size: u8, 54 memarg: &MemArg, 55 env: &mut TranslationEnvironment, 56 ) -> Reachability<(MemFlags, Value, Value)> { 57 let addr = if let Ok(offset) = u32::try_from(memarg.offset) { 58 // If our offset fits within a u32, then we can place it into the 59 // offset immediate of the `heap_addr` instruction. 60 self.bounds_check_and_compute_addr(builder, index, offset, access_size, env) 61 } else { 62 // If the offset doesn't fit within a u32, then we can't pass it 63 // directly into `heap_addr`. 64 let offset = builder.ins().iconst( 65 index_type_to_ir_type(self.index_type), 66 i64::try_from(memarg.offset).unwrap(), 67 ); 68 let adjusted_index = 69 builder 70 .ins() 71 .uadd_overflow_trap(index, offset, TrapCode::HEAP_OUT_OF_BOUNDS); 72 self.bounds_check_and_compute_addr(builder, adjusted_index, 0, access_size, env) 73 }; 74 75 match addr { 76 Reachability::Unreachable => Reachability::Unreachable, 77 Reachability::Reachable(addr) => { 78 // Note that we don't set `is_aligned` here, even if the load instruction's 79 // alignment immediate may say it's aligned, because WebAssembly's 80 // immediate field is just a hint, while Cranelift's aligned flag needs a 81 // guarantee. WebAssembly memory accesses are always little-endian. 82 let mut flags = MemFlags::new(); 83 flags.set_endianness(ir::Endianness::Little); 84 85 if self.memory_type.is_some() { 86 // Proof-carrying code is enabled; check this memory access. 87 flags.set_checked(); 88 } 89 90 // The access occurs to the `heap` disjoint category of abstract 91 // state. This may allow alias analysis to merge redundant loads, 92 // etc. when heap accesses occur interleaved with other (table, 93 // vmctx, stack) accesses. 94 flags.set_alias_region(Some(ir::AliasRegion::Heap)); 95 96 Reachability::Reachable((flags, index, addr)) 97 } 98 } 99 } 100 101 /// Like `prepare_addr` but for atomic accesses. 102 /// 103 /// Returns `None` when the Wasm access will unconditionally trap. 104 pub fn prepare_atomic_addr( 105 &self, 106 builder: &mut FunctionBuilder, 107 index: Value, 108 loaded_bytes: u8, 109 memarg: &MemArg, 110 env: &mut TranslationEnvironment, 111 ) -> Reachability<(MemFlags, Value, Value)> { 112 // Atomic addresses must all be aligned correctly, and for now we check 113 // alignment before we check out-of-bounds-ness. The order of this check may 114 // need to be updated depending on the outcome of the official threads 115 // proposal itself. 116 // 117 // Note that with an offset>0 we generate an `iadd_imm` where the result is 118 // thrown away after the offset check. This may truncate the offset and the 119 // result may overflow as well, but those conditions won't affect the 120 // alignment check itself. This can probably be optimized better and we 121 // should do so in the future as well. 122 if loaded_bytes > 1 { 123 let effective_addr = if memarg.offset == 0 { 124 index 125 } else { 126 builder 127 .ins() 128 .iadd_imm(index, i64::try_from(memarg.offset).unwrap()) 129 }; 130 debug_assert!(loaded_bytes.is_power_of_two()); 131 let misalignment = builder.ins().band_imm( 132 effective_addr, 133 i64::from(loaded_bytes.checked_sub(1).unwrap()), 134 ); 135 let f = builder.ins().icmp_imm(IntCC::NotEqual, misalignment, 0); 136 builder.ins().trapnz(f, TRAP_HEAP_MISALIGNED); 137 } 138 139 self.prepare_addr(builder, index, loaded_bytes, memarg, env) 140 } 141 142 fn bounds_check_and_compute_addr( 143 &self, 144 builder: &mut FunctionBuilder, 145 // Dynamic operand indexing into the memory. 146 index: Value, 147 // Static immediate added to the index. 148 offset: u32, 149 // Static size of the heap access. 150 access_size: u8, 151 env: &mut TranslationEnvironment, 152 ) -> Reachability<Value> { 153 let pointer_bit_width = u16::try_from(env.pointer_type().bits()).unwrap(); 154 let orig_index = index; 155 let index = cast_index_to_pointer_ty( 156 index, 157 index_type_to_ir_type(self.index_type), 158 env.pointer_type(), 159 self.memory_type.is_some(), 160 &mut builder.cursor(), 161 ); 162 163 let spectre_mitigations_enabled = env.heap_access_spectre_mitigation(); 164 let pcc = env.proof_carrying_code(); 165 // Cannot overflow because we are widening to `u64`. 166 // TODO when memory64 is supported this needs to be handles correctly 167 let offset_and_size = u64::from(offset) + u64::from(access_size); 168 169 let host_page_size_log2 = env.target_isa().page_size_align_log2(); 170 let can_use_virtual_memory = self.page_size_log2 >= host_page_size_log2; 171 assert!( 172 can_use_virtual_memory, 173 "k23's memories require the ability to use virtual memory" 174 ); 175 176 let make_compare = 177 |builder: &mut FunctionBuilder, compare_kind: IntCC, lhs: Value, rhs: Value| { 178 let result = builder.ins().icmp(compare_kind, lhs, rhs); 179 if pcc { 180 // Name the original value as a def of the SSA value; 181 // if the value was extended, name that as well with a 182 // dynamic range, overwriting the basic full-range 183 // fact that we previously put on the uextend. 184 builder.func.dfg.facts[orig_index] = Some(Fact::Def { value: orig_index }); 185 if index != orig_index { 186 builder.func.dfg.facts[index] = 187 Some(Fact::value(pointer_bit_width, orig_index)); 188 } 189 190 // Create a fact on the LHS that is a "trivial symbolic 191 // fact": v1 has range v1+LHS_off..=v1+LHS_off 192 builder.func.dfg.facts[lhs] = 193 Some(Fact::value_offset(pointer_bit_width, orig_index, 0)); 194 // If the RHS is a symbolic value (v1 or gv1), we can 195 // emit a Compare fact. 196 if let Some(rhs) = builder.func.dfg.facts[rhs] 197 .as_ref() 198 .and_then(|f| f.as_symbol()) 199 { 200 builder.func.dfg.facts[result] = Some(Fact::Compare { 201 kind: compare_kind, 202 lhs: Expr::offset(&Expr::value(orig_index), 0).unwrap(), 203 rhs: Expr::offset(rhs, 0).unwrap(), 204 }); 205 } 206 // Likewise, if the RHS is a constant, we can emit a 207 // Compare fact. 208 if let Some(k) = builder.func.dfg.facts[rhs] 209 .as_ref() 210 .and_then(|f| f.as_const(pointer_bit_width)) 211 { 212 builder.func.dfg.facts[result] = Some(Fact::Compare { 213 kind: compare_kind, 214 lhs: Expr::value(orig_index), 215 rhs: Expr::constant(i64::try_from(k).unwrap()), 216 }); 217 } 218 } 219 result 220 }; 221 222 if offset_and_size > self.bound { 223 // 1. First special case: trap immediately if `offset + access_size > 224 // bound`, since we will end up being out-of-bounds regardless of the 225 // given `index`. 226 builder.ins().trap(TrapCode::HEAP_OUT_OF_BOUNDS); 227 Reachability::Unreachable 228 } else if index_type_to_ir_type(self.index_type) == ir::types::I32 229 && u64::from(u32::MAX) 230 <= self 231 .bound 232 .saturating_add(self.offset_guard_size) 233 .saturating_add(offset_and_size) 234 { 235 // 2. Second special case for when we can completely omit explicit 236 // bounds checks for 32-bit static memories. 237 // 238 // First, let's rewrite our comparison to move all the constants 239 // to one side: 240 // 241 // index + offset + access_size > bound 242 // ==> index > bound - (offset + access_size) 243 // 244 // We know the subtraction on the right-hand side won't wrap because 245 // we didn't hit the first special case. 246 // 247 // Additionally, we add our guard pages (if any) to the right-hand 248 // side, since we can rely on the virtual memory subsystem at runtime 249 // to catch out-of-bound accesses within the range `bound .. bound + 250 // guard_size`. So now we are dealing with 251 // 252 // index > bound + guard_size - (offset + access_size) 253 // 254 // Note that `bound + guard_size` cannot overflow for 255 // correctly-configured heaps, as otherwise the heap wouldn't fit in 256 // a 64-bit memory space. 257 // 258 // The complement of our should-this-trap comparison expression is 259 // the should-this-not-trap comparison expression: 260 // 261 // index <= bound + guard_size - (offset + access_size) 262 // 263 // If we know the right-hand side is greater than or equal to 264 // `u32::MAX`, then 265 // 266 // index <= u32::MAX <= bound + guard_size - (offset + access_size) 267 // 268 // This expression is always true when the heap is indexed with 269 // 32-bit integers because `index` cannot be larger than 270 // `u32::MAX`. This means that `index` is always either in bounds or 271 // within the guard page region, neither of which require emitting an 272 // explicit bounds check. 273 274 Reachability::Reachable( 275 self.compute_addr( 276 &mut builder.cursor(), 277 env.pointer_type(), 278 index, 279 offset, 280 self.memory_type 281 .map(|ty| (ty, self.bound + self.offset_guard_size)), 282 ), 283 ) 284 } else { 285 // 3. General case for static memories. 286 // 287 // We have to explicitly test whether 288 // 289 // index > bound - (offset + access_size) 290 // 291 // and trap if so. 292 // 293 // Since we have to emit explicit bounds checks, we might as well be 294 // precise, not rely on the virtual memory subsystem at all, and not 295 // factor in the guard pages here. 296 // NB: this subtraction cannot wrap because we didn't hit the first 297 // special case. 298 let adjusted_bound = self.bound.checked_sub(offset_and_size).unwrap(); 299 let adjusted_bound_value = builder 300 .ins() 301 .iconst(env.pointer_type(), i64::try_from(adjusted_bound).unwrap()); 302 if pcc { 303 builder.func.dfg.facts[adjusted_bound_value] = 304 Some(Fact::constant(pointer_bit_width, adjusted_bound)); 305 } 306 let oob = make_compare( 307 builder, 308 IntCC::UnsignedGreaterThan, 309 index, 310 adjusted_bound_value, 311 ); 312 Reachability::Reachable(self.explicit_check_oob_condition_and_compute_addr( 313 builder, 314 env.pointer_type(), 315 index, 316 offset, 317 access_size, 318 spectre_mitigations_enabled, 319 self.memory_type.map(|ty| (ty, self.bound)), 320 oob, 321 )) 322 } 323 } 324 325 #[expect(clippy::too_many_arguments, reason = "")] 326 fn explicit_check_oob_condition_and_compute_addr( 327 &self, 328 builder: &mut FunctionBuilder, 329 addr_ty: Type, 330 index: Value, 331 offset: u32, 332 access_size: u8, 333 // Whether Spectre mitigations are enabled for heap accesses. 334 spectre_mitigations_enabled: bool, 335 // Whether we're emitting PCC facts. 336 pcc: Option<(ir::MemoryType, u64)>, 337 // The `i8` boolean value that is non-zero when the heap access is out of 338 // bounds (and therefore we should trap) and is zero when the heap access is 339 // in bounds (and therefore we can proceed). 340 oob_condition: Value, 341 ) -> Value { 342 if !spectre_mitigations_enabled { 343 builder 344 .ins() 345 .trapnz(oob_condition, TrapCode::HEAP_OUT_OF_BOUNDS); 346 } 347 let mut addr = self.compute_addr(&mut builder.cursor(), addr_ty, index, offset, pcc); 348 349 if spectre_mitigations_enabled { 350 let null = builder.ins().iconst(addr_ty, 0); 351 addr = builder 352 .ins() 353 .select_spectre_guard(oob_condition, null, addr); 354 355 if let Some((ty, size)) = pcc { 356 builder.func.dfg.facts[null] = 357 Some(Fact::constant(u16::try_from(addr_ty.bits()).unwrap(), 0)); 358 builder.func.dfg.facts[addr] = Some(Fact::Mem { 359 ty, 360 min_offset: 0, 361 max_offset: size.checked_sub(u64::from(access_size)).unwrap(), 362 nullable: true, 363 }); 364 } 365 } 366 367 addr 368 } 369 370 fn compute_addr( 371 &self, 372 pos: &mut FuncCursor, 373 addr_ty: Type, 374 index: Value, 375 offset: u32, 376 pcc: Option<(ir::MemoryType, u64)>, 377 ) -> Value { 378 debug_assert_eq!(pos.func.dfg.value_type(index), addr_ty); 379 380 let heap_base = pos.ins().global_value(addr_ty, self.base_gv); 381 382 if let Some((ty, _size)) = pcc { 383 pos.func.dfg.facts[heap_base] = Some(Fact::Mem { 384 ty, 385 min_offset: 0, 386 max_offset: 0, 387 nullable: false, 388 }); 389 } 390 391 let base_and_index = pos.ins().iadd(heap_base, index); 392 393 if let Some((ty, _)) = pcc { 394 if let Some(idx) = pos.func.dfg.facts[index] 395 .as_ref() 396 .and_then(|f| f.as_symbol()) 397 .cloned() 398 { 399 pos.func.dfg.facts[base_and_index] = Some(Fact::DynamicMem { 400 ty, 401 min: idx.clone(), 402 max: idx, 403 nullable: false, 404 }); 405 } else { 406 pos.func.dfg.facts[base_and_index] = Some(Fact::Mem { 407 ty, 408 min_offset: 0, 409 max_offset: u64::from(u32::MAX), 410 nullable: false, 411 }); 412 } 413 } 414 415 if offset == 0 { 416 base_and_index 417 } else { 418 // NB: The addition of the offset immediate must happen *before* the 419 // `select_spectre_guard`, if any. If it happens after, then we 420 // potentially are letting speculative execution read the whole first 421 // 4GiB of memory. 422 let offset_val = pos.ins().iconst(addr_ty, i64::from(offset)); 423 424 if pcc.is_some() { 425 pos.func.dfg.facts[offset_val] = Some(Fact::constant( 426 u16::try_from(addr_ty.bits()).unwrap(), 427 u64::from(offset), 428 )); 429 } 430 431 let result = pos.ins().iadd(base_and_index, offset_val); 432 433 if let Some((ty, _)) = pcc { 434 if let Some(idx) = pos.func.dfg.facts[index] 435 .as_ref() 436 .and_then(|f| f.as_symbol()) 437 { 438 pos.func.dfg.facts[result] = Some(Fact::DynamicMem { 439 ty, 440 min: idx.clone(), 441 // Safety: adding an offset to an expression with 442 // zero offset -- add cannot wrap, so `unwrap()` 443 // cannot fail. 444 max: Expr::offset(idx, i64::from(offset)).unwrap(), 445 nullable: false, 446 }); 447 } else { 448 pos.func.dfg.facts[result] = Some(Fact::Mem { 449 ty, 450 min_offset: u64::from(offset), 451 // Safety: can't overflow -- two u32s summed in a 452 // 64-bit add. TODO: when memory64 is supported here, 453 // `u32::MAX` is no longer true, and we'll need to 454 // handle overflow here. 455 max_offset: u64::from(u32::MAX) + u64::from(offset), 456 nullable: false, 457 }); 458 } 459 } 460 461 result 462 } 463 } 464} 465 466fn cast_index_to_pointer_ty( 467 index: Value, 468 index_ty: Type, 469 pointer_ty: Type, 470 pcc: bool, 471 pos: &mut FuncCursor, 472) -> Value { 473 if index_ty == pointer_ty { 474 return index; 475 } 476 // Note that using 64-bit heaps on a 32-bit host is not currently supported, 477 // would require at least a bounds check here to ensure that the truncation 478 // from 64-to-32 bits doesn't lose any upper bits. For now though we're 479 // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast. 480 assert!(index_ty.bits() < pointer_ty.bits()); 481 482 // Convert `index` to `addr_ty`. 483 let extended_index = pos.ins().uextend(pointer_ty, index); 484 485 // Add a range fact on the extended value. 486 if pcc { 487 pos.func.dfg.facts[extended_index] = Some(Fact::max_range_for_width_extended( 488 u16::try_from(index_ty.bits()).unwrap(), 489 u16::try_from(pointer_ty.bits()).unwrap(), 490 )); 491 } 492 493 // Add debug value-label alias so that debuginfo can name the extended 494 // value as the address 495 let loc = pos.srcloc(); 496 let loc = RelSourceLoc::from_base_offset(pos.func.params.base_srcloc(), loc); 497 pos.func 498 .stencil 499 .dfg 500 .add_value_label_alias(extended_index, loc, index); 501 502 extended_index 503}