Next Generation WASM Microkernel Operating System
1// Copyright 2025 Jonas Kruckenberg
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::wasm::cranelift::code_translator::Reachability;
9use crate::wasm::cranelift::env::TranslationEnvironment;
10use crate::wasm::cranelift::utils::index_type_to_ir_type;
11use crate::wasm::translate::IndexType;
12use crate::wasm::trap::TRAP_HEAP_MISALIGNED;
13use cranelift_codegen::cursor::{Cursor, FuncCursor};
14use cranelift_codegen::ir;
15use cranelift_codegen::ir::InstBuilder;
16use cranelift_codegen::ir::condcodes::IntCC;
17use cranelift_codegen::ir::{Expr, Fact, MemFlags, RelSourceLoc, TrapCode, Type, Value};
18use cranelift_frontend::FunctionBuilder;
19use wasmparser::MemArg;
20
21#[derive(Debug, Clone)]
22pub struct CraneliftMemory {
23 /// The address of the start of the heap's storage.
24 pub base_gv: ir::GlobalValue,
25 /// The index type for the heap.
26 pub index_type: IndexType,
27 /// The memory type for the pointed-to memory, if using proof-carrying code.
28 pub memory_type: Option<ir::MemoryType>,
29 /// Heap bound in bytes. The offset-guard pages are allocated after the
30 /// bound.
31 pub bound: u64,
32 /// Guaranteed minimum heap size in bytes. Heap accesses before `min_size`
33 /// don't need bounds checking.
34 pub min_size: u64,
35 /// The maximum heap size in bytes.
36 ///
37 /// Heap accesses larger than this will always trap.
38 pub max_size: Option<u64>,
39 /// Size in bytes of the offset-guard pages following the heap.
40 pub offset_guard_size: u64,
41 /// The log2 of this memory's page size.
42 pub page_size_log2: u8,
43}
44
45impl CraneliftMemory {
46 /// Returns `None` when the Wasm access will unconditionally trap.
47 ///
48 /// Returns `(flags, wasm_addr, native_addr)`.
49 pub fn prepare_addr(
50 &self,
51 builder: &mut FunctionBuilder,
52 index: Value,
53 access_size: u8,
54 memarg: &MemArg,
55 env: &mut TranslationEnvironment,
56 ) -> Reachability<(MemFlags, Value, Value)> {
57 let addr = if let Ok(offset) = u32::try_from(memarg.offset) {
58 // If our offset fits within a u32, then we can place it into the
59 // offset immediate of the `heap_addr` instruction.
60 self.bounds_check_and_compute_addr(builder, index, offset, access_size, env)
61 } else {
62 // If the offset doesn't fit within a u32, then we can't pass it
63 // directly into `heap_addr`.
64 let offset = builder.ins().iconst(
65 index_type_to_ir_type(self.index_type),
66 i64::try_from(memarg.offset).unwrap(),
67 );
68 let adjusted_index =
69 builder
70 .ins()
71 .uadd_overflow_trap(index, offset, TrapCode::HEAP_OUT_OF_BOUNDS);
72 self.bounds_check_and_compute_addr(builder, adjusted_index, 0, access_size, env)
73 };
74
75 match addr {
76 Reachability::Unreachable => Reachability::Unreachable,
77 Reachability::Reachable(addr) => {
78 // Note that we don't set `is_aligned` here, even if the load instruction's
79 // alignment immediate may say it's aligned, because WebAssembly's
80 // immediate field is just a hint, while Cranelift's aligned flag needs a
81 // guarantee. WebAssembly memory accesses are always little-endian.
82 let mut flags = MemFlags::new();
83 flags.set_endianness(ir::Endianness::Little);
84
85 if self.memory_type.is_some() {
86 // Proof-carrying code is enabled; check this memory access.
87 flags.set_checked();
88 }
89
90 // The access occurs to the `heap` disjoint category of abstract
91 // state. This may allow alias analysis to merge redundant loads,
92 // etc. when heap accesses occur interleaved with other (table,
93 // vmctx, stack) accesses.
94 flags.set_alias_region(Some(ir::AliasRegion::Heap));
95
96 Reachability::Reachable((flags, index, addr))
97 }
98 }
99 }
100
101 /// Like `prepare_addr` but for atomic accesses.
102 ///
103 /// Returns `None` when the Wasm access will unconditionally trap.
104 pub fn prepare_atomic_addr(
105 &self,
106 builder: &mut FunctionBuilder,
107 index: Value,
108 loaded_bytes: u8,
109 memarg: &MemArg,
110 env: &mut TranslationEnvironment,
111 ) -> Reachability<(MemFlags, Value, Value)> {
112 // Atomic addresses must all be aligned correctly, and for now we check
113 // alignment before we check out-of-bounds-ness. The order of this check may
114 // need to be updated depending on the outcome of the official threads
115 // proposal itself.
116 //
117 // Note that with an offset>0 we generate an `iadd_imm` where the result is
118 // thrown away after the offset check. This may truncate the offset and the
119 // result may overflow as well, but those conditions won't affect the
120 // alignment check itself. This can probably be optimized better and we
121 // should do so in the future as well.
122 if loaded_bytes > 1 {
123 let effective_addr = if memarg.offset == 0 {
124 index
125 } else {
126 builder
127 .ins()
128 .iadd_imm(index, i64::try_from(memarg.offset).unwrap())
129 };
130 debug_assert!(loaded_bytes.is_power_of_two());
131 let misalignment = builder.ins().band_imm(
132 effective_addr,
133 i64::from(loaded_bytes.checked_sub(1).unwrap()),
134 );
135 let f = builder.ins().icmp_imm(IntCC::NotEqual, misalignment, 0);
136 builder.ins().trapnz(f, TRAP_HEAP_MISALIGNED);
137 }
138
139 self.prepare_addr(builder, index, loaded_bytes, memarg, env)
140 }
141
142 fn bounds_check_and_compute_addr(
143 &self,
144 builder: &mut FunctionBuilder,
145 // Dynamic operand indexing into the memory.
146 index: Value,
147 // Static immediate added to the index.
148 offset: u32,
149 // Static size of the heap access.
150 access_size: u8,
151 env: &mut TranslationEnvironment,
152 ) -> Reachability<Value> {
153 let pointer_bit_width = u16::try_from(env.pointer_type().bits()).unwrap();
154 let orig_index = index;
155 let index = cast_index_to_pointer_ty(
156 index,
157 index_type_to_ir_type(self.index_type),
158 env.pointer_type(),
159 self.memory_type.is_some(),
160 &mut builder.cursor(),
161 );
162
163 let spectre_mitigations_enabled = env.heap_access_spectre_mitigation();
164 let pcc = env.proof_carrying_code();
165 // Cannot overflow because we are widening to `u64`.
166 // TODO when memory64 is supported this needs to be handles correctly
167 let offset_and_size = u64::from(offset) + u64::from(access_size);
168
169 let host_page_size_log2 = env.target_isa().page_size_align_log2();
170 let can_use_virtual_memory = self.page_size_log2 >= host_page_size_log2;
171 assert!(
172 can_use_virtual_memory,
173 "k23's memories require the ability to use virtual memory"
174 );
175
176 let make_compare =
177 |builder: &mut FunctionBuilder, compare_kind: IntCC, lhs: Value, rhs: Value| {
178 let result = builder.ins().icmp(compare_kind, lhs, rhs);
179 if pcc {
180 // Name the original value as a def of the SSA value;
181 // if the value was extended, name that as well with a
182 // dynamic range, overwriting the basic full-range
183 // fact that we previously put on the uextend.
184 builder.func.dfg.facts[orig_index] = Some(Fact::Def { value: orig_index });
185 if index != orig_index {
186 builder.func.dfg.facts[index] =
187 Some(Fact::value(pointer_bit_width, orig_index));
188 }
189
190 // Create a fact on the LHS that is a "trivial symbolic
191 // fact": v1 has range v1+LHS_off..=v1+LHS_off
192 builder.func.dfg.facts[lhs] =
193 Some(Fact::value_offset(pointer_bit_width, orig_index, 0));
194 // If the RHS is a symbolic value (v1 or gv1), we can
195 // emit a Compare fact.
196 if let Some(rhs) = builder.func.dfg.facts[rhs]
197 .as_ref()
198 .and_then(|f| f.as_symbol())
199 {
200 builder.func.dfg.facts[result] = Some(Fact::Compare {
201 kind: compare_kind,
202 lhs: Expr::offset(&Expr::value(orig_index), 0).unwrap(),
203 rhs: Expr::offset(rhs, 0).unwrap(),
204 });
205 }
206 // Likewise, if the RHS is a constant, we can emit a
207 // Compare fact.
208 if let Some(k) = builder.func.dfg.facts[rhs]
209 .as_ref()
210 .and_then(|f| f.as_const(pointer_bit_width))
211 {
212 builder.func.dfg.facts[result] = Some(Fact::Compare {
213 kind: compare_kind,
214 lhs: Expr::value(orig_index),
215 rhs: Expr::constant(i64::try_from(k).unwrap()),
216 });
217 }
218 }
219 result
220 };
221
222 if offset_and_size > self.bound {
223 // 1. First special case: trap immediately if `offset + access_size >
224 // bound`, since we will end up being out-of-bounds regardless of the
225 // given `index`.
226 builder.ins().trap(TrapCode::HEAP_OUT_OF_BOUNDS);
227 Reachability::Unreachable
228 } else if index_type_to_ir_type(self.index_type) == ir::types::I32
229 && u64::from(u32::MAX)
230 <= self
231 .bound
232 .saturating_add(self.offset_guard_size)
233 .saturating_add(offset_and_size)
234 {
235 // 2. Second special case for when we can completely omit explicit
236 // bounds checks for 32-bit static memories.
237 //
238 // First, let's rewrite our comparison to move all the constants
239 // to one side:
240 //
241 // index + offset + access_size > bound
242 // ==> index > bound - (offset + access_size)
243 //
244 // We know the subtraction on the right-hand side won't wrap because
245 // we didn't hit the first special case.
246 //
247 // Additionally, we add our guard pages (if any) to the right-hand
248 // side, since we can rely on the virtual memory subsystem at runtime
249 // to catch out-of-bound accesses within the range `bound .. bound +
250 // guard_size`. So now we are dealing with
251 //
252 // index > bound + guard_size - (offset + access_size)
253 //
254 // Note that `bound + guard_size` cannot overflow for
255 // correctly-configured heaps, as otherwise the heap wouldn't fit in
256 // a 64-bit memory space.
257 //
258 // The complement of our should-this-trap comparison expression is
259 // the should-this-not-trap comparison expression:
260 //
261 // index <= bound + guard_size - (offset + access_size)
262 //
263 // If we know the right-hand side is greater than or equal to
264 // `u32::MAX`, then
265 //
266 // index <= u32::MAX <= bound + guard_size - (offset + access_size)
267 //
268 // This expression is always true when the heap is indexed with
269 // 32-bit integers because `index` cannot be larger than
270 // `u32::MAX`. This means that `index` is always either in bounds or
271 // within the guard page region, neither of which require emitting an
272 // explicit bounds check.
273
274 Reachability::Reachable(
275 self.compute_addr(
276 &mut builder.cursor(),
277 env.pointer_type(),
278 index,
279 offset,
280 self.memory_type
281 .map(|ty| (ty, self.bound + self.offset_guard_size)),
282 ),
283 )
284 } else {
285 // 3. General case for static memories.
286 //
287 // We have to explicitly test whether
288 //
289 // index > bound - (offset + access_size)
290 //
291 // and trap if so.
292 //
293 // Since we have to emit explicit bounds checks, we might as well be
294 // precise, not rely on the virtual memory subsystem at all, and not
295 // factor in the guard pages here.
296 // NB: this subtraction cannot wrap because we didn't hit the first
297 // special case.
298 let adjusted_bound = self.bound.checked_sub(offset_and_size).unwrap();
299 let adjusted_bound_value = builder
300 .ins()
301 .iconst(env.pointer_type(), i64::try_from(adjusted_bound).unwrap());
302 if pcc {
303 builder.func.dfg.facts[adjusted_bound_value] =
304 Some(Fact::constant(pointer_bit_width, adjusted_bound));
305 }
306 let oob = make_compare(
307 builder,
308 IntCC::UnsignedGreaterThan,
309 index,
310 adjusted_bound_value,
311 );
312 Reachability::Reachable(self.explicit_check_oob_condition_and_compute_addr(
313 builder,
314 env.pointer_type(),
315 index,
316 offset,
317 access_size,
318 spectre_mitigations_enabled,
319 self.memory_type.map(|ty| (ty, self.bound)),
320 oob,
321 ))
322 }
323 }
324
325 #[expect(clippy::too_many_arguments, reason = "")]
326 fn explicit_check_oob_condition_and_compute_addr(
327 &self,
328 builder: &mut FunctionBuilder,
329 addr_ty: Type,
330 index: Value,
331 offset: u32,
332 access_size: u8,
333 // Whether Spectre mitigations are enabled for heap accesses.
334 spectre_mitigations_enabled: bool,
335 // Whether we're emitting PCC facts.
336 pcc: Option<(ir::MemoryType, u64)>,
337 // The `i8` boolean value that is non-zero when the heap access is out of
338 // bounds (and therefore we should trap) and is zero when the heap access is
339 // in bounds (and therefore we can proceed).
340 oob_condition: Value,
341 ) -> Value {
342 if !spectre_mitigations_enabled {
343 builder
344 .ins()
345 .trapnz(oob_condition, TrapCode::HEAP_OUT_OF_BOUNDS);
346 }
347 let mut addr = self.compute_addr(&mut builder.cursor(), addr_ty, index, offset, pcc);
348
349 if spectre_mitigations_enabled {
350 let null = builder.ins().iconst(addr_ty, 0);
351 addr = builder
352 .ins()
353 .select_spectre_guard(oob_condition, null, addr);
354
355 if let Some((ty, size)) = pcc {
356 builder.func.dfg.facts[null] =
357 Some(Fact::constant(u16::try_from(addr_ty.bits()).unwrap(), 0));
358 builder.func.dfg.facts[addr] = Some(Fact::Mem {
359 ty,
360 min_offset: 0,
361 max_offset: size.checked_sub(u64::from(access_size)).unwrap(),
362 nullable: true,
363 });
364 }
365 }
366
367 addr
368 }
369
370 fn compute_addr(
371 &self,
372 pos: &mut FuncCursor,
373 addr_ty: Type,
374 index: Value,
375 offset: u32,
376 pcc: Option<(ir::MemoryType, u64)>,
377 ) -> Value {
378 debug_assert_eq!(pos.func.dfg.value_type(index), addr_ty);
379
380 let heap_base = pos.ins().global_value(addr_ty, self.base_gv);
381
382 if let Some((ty, _size)) = pcc {
383 pos.func.dfg.facts[heap_base] = Some(Fact::Mem {
384 ty,
385 min_offset: 0,
386 max_offset: 0,
387 nullable: false,
388 });
389 }
390
391 let base_and_index = pos.ins().iadd(heap_base, index);
392
393 if let Some((ty, _)) = pcc {
394 if let Some(idx) = pos.func.dfg.facts[index]
395 .as_ref()
396 .and_then(|f| f.as_symbol())
397 .cloned()
398 {
399 pos.func.dfg.facts[base_and_index] = Some(Fact::DynamicMem {
400 ty,
401 min: idx.clone(),
402 max: idx,
403 nullable: false,
404 });
405 } else {
406 pos.func.dfg.facts[base_and_index] = Some(Fact::Mem {
407 ty,
408 min_offset: 0,
409 max_offset: u64::from(u32::MAX),
410 nullable: false,
411 });
412 }
413 }
414
415 if offset == 0 {
416 base_and_index
417 } else {
418 // NB: The addition of the offset immediate must happen *before* the
419 // `select_spectre_guard`, if any. If it happens after, then we
420 // potentially are letting speculative execution read the whole first
421 // 4GiB of memory.
422 let offset_val = pos.ins().iconst(addr_ty, i64::from(offset));
423
424 if pcc.is_some() {
425 pos.func.dfg.facts[offset_val] = Some(Fact::constant(
426 u16::try_from(addr_ty.bits()).unwrap(),
427 u64::from(offset),
428 ));
429 }
430
431 let result = pos.ins().iadd(base_and_index, offset_val);
432
433 if let Some((ty, _)) = pcc {
434 if let Some(idx) = pos.func.dfg.facts[index]
435 .as_ref()
436 .and_then(|f| f.as_symbol())
437 {
438 pos.func.dfg.facts[result] = Some(Fact::DynamicMem {
439 ty,
440 min: idx.clone(),
441 // Safety: adding an offset to an expression with
442 // zero offset -- add cannot wrap, so `unwrap()`
443 // cannot fail.
444 max: Expr::offset(idx, i64::from(offset)).unwrap(),
445 nullable: false,
446 });
447 } else {
448 pos.func.dfg.facts[result] = Some(Fact::Mem {
449 ty,
450 min_offset: u64::from(offset),
451 // Safety: can't overflow -- two u32s summed in a
452 // 64-bit add. TODO: when memory64 is supported here,
453 // `u32::MAX` is no longer true, and we'll need to
454 // handle overflow here.
455 max_offset: u64::from(u32::MAX) + u64::from(offset),
456 nullable: false,
457 });
458 }
459 }
460
461 result
462 }
463 }
464}
465
466fn cast_index_to_pointer_ty(
467 index: Value,
468 index_ty: Type,
469 pointer_ty: Type,
470 pcc: bool,
471 pos: &mut FuncCursor,
472) -> Value {
473 if index_ty == pointer_ty {
474 return index;
475 }
476 // Note that using 64-bit heaps on a 32-bit host is not currently supported,
477 // would require at least a bounds check here to ensure that the truncation
478 // from 64-to-32 bits doesn't lose any upper bits. For now though we're
479 // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast.
480 assert!(index_ty.bits() < pointer_ty.bits());
481
482 // Convert `index` to `addr_ty`.
483 let extended_index = pos.ins().uextend(pointer_ty, index);
484
485 // Add a range fact on the extended value.
486 if pcc {
487 pos.func.dfg.facts[extended_index] = Some(Fact::max_range_for_width_extended(
488 u16::try_from(index_ty.bits()).unwrap(),
489 u16::try_from(pointer_ty.bits()).unwrap(),
490 ));
491 }
492
493 // Add debug value-label alias so that debuginfo can name the extended
494 // value as the address
495 let loc = pos.srcloc();
496 let loc = RelSourceLoc::from_base_offset(pos.func.params.base_srcloc(), loc);
497 pos.func
498 .stencil
499 .dfg
500 .add_value_label_alias(extended_index, loc, index);
501
502 extended_index
503}