Next Generation WASM Microkernel Operating System
1// Copyright 2025 Jonas Kruckenberg
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use alloc::vec::Vec;
9use core::cell::RefCell;
10use core::sync::atomic;
11use core::sync::atomic::{AtomicUsize, Ordering};
12use cpu_local::collection::CpuLocal;
13use ksharded_slab::Pool;
14use ksharded_slab::pool::Ref;
15use tracing_core::field::FieldSet;
16use tracing_core::span::{Attributes, Current, Id, Record};
17use tracing_core::{Collect, Dispatch, Event, Interest, Metadata, dispatch};
18
19/// A shared, reusable store for spans.
20///
21/// This registry is implemented using a [lock-free sharded slab][slab], and is
22/// highly optimized for concurrent access.
23///
24/// # Span ID Generation
25///
26/// Span IDs are not globally unique, but the registry ensures that
27/// no two currently active spans have the same ID within a process.
28///
29/// One of the primary responsibilities of the registry is to generate [span
30/// IDs]. Therefore, it's important for other code that interacts with the
31/// registry, such as subscribers, to understand the guarantees of the
32/// span IDs that are generated.
33///
34/// The registry's span IDs are guaranteed to be unique **at a given point
35/// in time**. This means that an active span will never be assigned the
36/// same ID as another **currently active** span. However, the registry
37/// **will** eventually reuse the IDs of [closed] spans, although an ID
38/// will never be reassigned immediately after a span has closed.
39///
40/// Spans are not [considered closed] by the `Registry` until *every*
41/// [`Span`] reference with that ID has been dropped.
42///
43/// Thus: span IDs generated by the registry should be considered unique
44/// only at a given point in time, and only relative to other spans
45/// generated by the same process. Two spans with the same ID will not exist
46/// in the same process concurrently. However, if historical span data is
47/// being stored, the same ID may occur for multiple spans times in that
48/// data. If spans must be uniquely identified in historical data, the user
49/// code storing this data must assign its own unique identifiers to those
50/// spans. A counter is generally sufficient for this.
51///
52/// Similarly, span IDs generated by the registry are not unique outside of
53/// a given process. Distributed tracing systems may require identifiers
54/// that are unique across multiple processes on multiple machines (for
55/// example, [OpenTelemetry's `SpanId`s and `TraceId`s][opentelemetry]). `tracing` span
56/// IDs generated by the registry should **not** be used for this purpose.
57/// Instead, code which integrates with a distributed tracing system should
58/// generate and propagate its own IDs according to the rules specified by
59/// the distributed tracing system. These IDs can be associated with
60/// `tracing` spans using [fields] and/or [stored span data].
61///
62/// [span IDs]: https://docs.rs/tracing-core/latest/tracing_core/span/struct.Id.html
63/// [slab]: https://docs.rs/crate/sharded-slab/
64/// [closed]: https://docs.rs/tracing/latest/tracing/span/index.html#closing-spans
65/// [considered closed]: https://docs.rs/tracing-core/latest/tracing_core/subscriber/trait.Subscriber.html#method.try_close
66/// [`Span`]: https://docs.rs/tracing/latest/tracing/span/struct.Span.html
67/// [opentelemetry]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#spancontext
68/// [fields]: https://docs.rs/tracing-core/latest/tracing-core/field/index.html
69#[derive(Debug)]
70pub struct Registry {
71 spans: Pool<DataInner>,
72 current_spans: CpuLocal<RefCell<SpanStack>>,
73}
74
75/// Span data stored in a [`Registry`].
76///
77/// The registry stores well-known data defined by tracing: span relationships,
78/// metadata and reference counts.
79#[derive(Debug)]
80pub struct Data<'a> {
81 /// Immutable reference to the pooled `DataInner` entry.
82 inner: Ref<'a, DataInner>,
83}
84
85/// Stored data associated with a span.
86///
87/// This type is pooled using `sharded_slab::Pool`; when a span is dropped, the
88/// `DataInner` entry at that span's slab index is cleared in place and reused
89/// by a future span. Thus, the `Default` and `sharded_slab::Clear`
90/// implementations for this type are load-bearing.
91#[derive(Debug)]
92struct DataInner {
93 metadata: &'static Metadata<'static>,
94 parent: Option<Id>,
95 ref_count: AtomicUsize,
96}
97
98/// A reference to [`Data`] and the associated [`Registry`].
99///
100/// This type implements all the same methods as [`Data`], and provides
101#[derive(Debug)]
102pub struct SpanRef<'a> {
103 registry: &'a Registry,
104 data: Data<'a>,
105}
106
107/// An iterator over the parents of a span, ordered from leaf to root.
108///
109/// This is returned by the [`SpanRef::scope`] method.
110#[derive(Debug)]
111pub struct Scope<'a> {
112 registry: &'a Registry,
113 next: Option<Id>,
114}
115
116#[derive(Debug)]
117struct ContextId {
118 id: Id,
119 duplicate: bool,
120}
121
122/// `SpanStack` tracks what spans are currently executing on a thread-local basis.
123///
124/// A "separate current span" for each thread is a semantic choice, as each span
125/// can be executing in a different thread.
126#[derive(Debug, Default)]
127pub(crate) struct SpanStack {
128 stack: Vec<ContextId>,
129}
130
131// === impl Registry ===
132
133impl Default for Registry {
134 fn default() -> Self {
135 Self {
136 spans: Pool::new(),
137 current_spans: CpuLocal::new(),
138 }
139 }
140}
141
142impl Registry {
143 fn get(&self, id: &Id) -> Option<Ref<'_, DataInner>> {
144 self.spans.get(id_to_idx(id))
145 }
146
147 fn span_data(&self, id: &Id) -> Option<Data> {
148 let inner = self.get(id)?;
149 Some(Data { inner })
150 }
151
152 fn span(&self, id: &Id) -> Option<SpanRef>
153 where
154 Self: Sized,
155 {
156 let data = self.span_data(id)?;
157 Some(SpanRef {
158 registry: self,
159 data,
160 })
161 }
162}
163
164impl Collect for Registry {
165 fn register_callsite(&self, _metadata: &'static Metadata<'static>) -> Interest {
166 unreachable!("Registry::register_callsite should never be called")
167 }
168
169 fn enabled(&self, _metadata: &Metadata<'_>) -> bool {
170 unreachable!("Registry::enabled should never be called")
171 }
172
173 fn new_span(&self, attrs: &Attributes<'_>) -> Id {
174 let parent = if attrs.is_root() {
175 None
176 } else if attrs.is_contextual() {
177 self.current_span().id().map(|id| self.clone_span(id))
178 } else {
179 attrs.parent().map(|id| self.clone_span(id))
180 };
181
182 let id = self
183 .spans
184 // Check out a `DataInner` entry from the pool for the new span. If
185 // there are free entries already allocated in the pool, this will
186 // preferentially reuse one; otherwise, a new `DataInner` is
187 // allocated and added to the pool.
188 .create_with(|data| {
189 data.metadata = attrs.metadata();
190 data.parent = parent;
191
192 let refs = data.ref_count.get_mut();
193 debug_assert_eq!(*refs, 0);
194 *refs = 1;
195 })
196 .expect("Unable to allocate another span");
197 idx_to_id(id)
198 }
199
200 fn record(&self, _span: &Id, _values: &Record<'_>) {}
201
202 fn record_follows_from(&self, _span: &Id, _follows: &Id) {}
203
204 fn event(&self, _event: &Event<'_>) {}
205
206 fn enter(&self, id: &Id) {
207 if self
208 .current_spans
209 .get_or_default()
210 .borrow_mut()
211 .push(id.clone())
212 {
213 self.clone_span(id);
214 }
215 }
216
217 fn exit(&self, id: &Id) {
218 if let Some(spans) = self.current_spans.get() {
219 if spans.borrow_mut().pop(id) {
220 dispatch::get_default(|dispatch| dispatch.try_close(id.clone()));
221 }
222 }
223 }
224
225 fn clone_span(&self, id: &Id) -> Id {
226 let span = self
227 .get(id)
228 .unwrap_or_else(|| panic!("tried to clone {:?}, but no span exists with that ID", id));
229 // Like `std::sync::Arc`, adds to the ref count (on clone) don't require
230 // a strong ordering; if we call` clone_span`, the reference count must
231 // always at least 1. The only synchronization necessary is between
232 // calls to `try_close`: we have to ensure that all threads have
233 // dropped their refs to the span before the span is closed.
234 let refs = span.ref_count.fetch_add(1, Ordering::Relaxed);
235 assert_ne!(
236 refs, 0,
237 "tried to clone a span ({:?}) that already closed",
238 id
239 );
240 id.clone()
241 }
242
243 fn try_close(&self, id: Id) -> bool {
244 let Some(span) = self.get(&id) else {
245 panic!("tried to drop a ref to {:?}, but no such span exists!", id);
246 };
247
248 let refs = span.ref_count.fetch_sub(1, Ordering::Release);
249 assert!(refs < usize::MAX, "reference count overflow!");
250 if refs > 1 {
251 return false;
252 }
253
254 // Synchronize if we are actually removing the span (stolen
255 // from std::Arc); this ensures that all other `try_close` calls on
256 // other threads happen-before we actually remove the span.
257 atomic::fence(Ordering::Acquire);
258 true
259 }
260
261 fn current_span(&self) -> Current {
262 self.current_spans
263 .get()
264 .and_then(|spans| {
265 let spans = spans.borrow();
266 let id = spans.current()?;
267 let span = self.get(id)?;
268 Some(Current::new(id.clone(), span.metadata))
269 })
270 .unwrap_or_else(Current::none)
271 }
272}
273
274// === impl DataInner ===
275
276impl Default for DataInner {
277 fn default() -> Self {
278 // Since `DataInner` owns a `&'static Callsite` pointer, we need
279 // something to use as the initial default value for that callsite.
280 // Since we can't access a `DataInner` until it has had actual span data
281 // inserted into it, the null metadata will never actually be accessed.
282 struct NullCallsite;
283 impl tracing_core::callsite::Callsite for NullCallsite {
284 fn set_interest(&self, _: Interest) {
285 unreachable!(
286 "/!\\ Tried to register the null callsite /!\\\n \
287 This should never have happened and is definitely a bug. \
288 A `tracing` bug report would be appreciated."
289 )
290 }
291
292 fn metadata(&self) -> &Metadata<'_> {
293 unreachable!(
294 "/!\\ Tried to access the null callsite's metadata /!\\\n \
295 This should never have happened and is definitely a bug. \
296 A `tracing` bug report would be appreciated."
297 )
298 }
299 }
300
301 static NULL_CALLSITE: NullCallsite = NullCallsite;
302 static NULL_METADATA: Metadata<'static> = tracing_core::metadata! {
303 name: "",
304 target: "",
305 level: tracing_core::Level::TRACE,
306 fields: &[],
307 callsite: &NULL_CALLSITE,
308 kind: tracing_core::metadata::Kind::SPAN,
309 };
310
311 Self {
312 metadata: &NULL_METADATA,
313 parent: None,
314 ref_count: AtomicUsize::new(0),
315 }
316 }
317}
318
319impl ksharded_slab::Clear for DataInner {
320 /// Clears the span's data in place, dropping the parent's reference count.
321 fn clear(&mut self) {
322 // A span is not considered closed until all of its children have closed.
323 // Therefore, each span's `DataInner` holds a "reference" to the parent
324 // span, keeping the parent span open until all its children have closed.
325 // When we close a span, we must then decrement the parent's ref count
326 // (potentially, allowing it to close, if this child is the last reference
327 // to that span).
328 // We have to actually unpack the option inside the `get_default`
329 // closure, since it is a `FnMut`, but testing that there _is_ a value
330 // here lets us avoid the thread-local access if we don't need the
331 // dispatcher at all.
332 if self.parent.is_some() {
333 // Note that --- because `Layered::try_close` works by calling
334 // `try_close` on the inner subscriber and using the return value to
335 // determine whether to call the subscriber's `on_close` callback ---
336 // we must call `try_close` on the entire subscriber stack, rather
337 // than just on the registry. If the registry called `try_close` on
338 // itself directly, the subscribers wouldn't see the close notification.
339 let subscriber = dispatch::get_default(Dispatch::clone);
340 if let Some(parent) = self.parent.take() {
341 let _ = subscriber.try_close(parent);
342 }
343 }
344 }
345}
346
347impl Data<'_> {
348 fn id(&self) -> Id {
349 idx_to_id(self.inner.key())
350 }
351
352 fn metadata(&self) -> &'static Metadata<'static> {
353 self.inner.metadata
354 }
355
356 fn parent(&self) -> Option<&Id> {
357 self.inner.parent.as_ref()
358 }
359}
360
361impl<'a> SpanRef<'a> {
362 /// Returns this span's ID.
363 pub fn id(&self) -> Id {
364 self.data.id()
365 }
366
367 /// Returns a static reference to the span's metadata.
368 pub fn metadata(&self) -> &'static Metadata<'static> {
369 self.data.metadata()
370 }
371
372 /// Returns the span's name,
373 pub fn name(&self) -> &'static str {
374 self.data.metadata().name()
375 }
376
377 /// Returns a list of [fields] defined by the span.
378 ///
379 /// [fields]: tracing_core::field
380 pub fn fields(&self) -> &FieldSet {
381 self.data.metadata().fields()
382 }
383
384 /// Returns the ID of this span's parent, or `None` if this span is the root
385 /// of its trace tree.
386 pub fn parent_id(&self) -> Option<&Id> {
387 self.data.parent()
388 }
389
390 /// Returns a `SpanRef` describing this span's parent, or `None` if this
391 /// span is the root of its trace tree.
392 pub fn parent(&self) -> Option<Self> {
393 let id = self.data.parent()?;
394 let data = self.registry.span_data(id)?;
395
396 Some(Self {
397 registry: self.registry,
398 data,
399 })
400 }
401
402 /// Returns an iterator over all parents of this span, starting with this span,
403 /// ordered from leaf to root.
404 ///
405 /// The iterator will first return the span, then the span's immediate parent,
406 /// followed by that span's parent, and so on, until it reaches a root span.
407 pub fn scope(&self) -> Scope<'a> {
408 Scope {
409 registry: self.registry,
410 next: Some(self.id()),
411 }
412 }
413}
414
415// === impl SpanStack ===
416
417impl SpanStack {
418 #[inline]
419 pub(crate) fn push(&mut self, id: Id) -> bool {
420 let duplicate = self.stack.iter().any(|i| i.id == id);
421 self.stack.push(ContextId { id, duplicate });
422 !duplicate
423 }
424
425 #[inline]
426 pub(crate) fn pop(&mut self, expected_id: &Id) -> bool {
427 if let Some((idx, _)) = self
428 .stack
429 .iter()
430 .enumerate()
431 .rev()
432 .find(|(_, ctx_id)| ctx_id.id == *expected_id)
433 {
434 let ContextId { id: _, duplicate } = self.stack.remove(idx);
435 return !duplicate;
436 }
437 false
438 }
439
440 #[inline]
441 pub(crate) fn iter(&self) -> impl Iterator<Item = &Id> {
442 self.stack
443 .iter()
444 .rev()
445 .filter_map(|ContextId { id, duplicate }| if !*duplicate { Some(id) } else { None })
446 }
447
448 #[inline]
449 pub(crate) fn current(&self) -> Option<&Id> {
450 self.iter().next()
451 }
452}
453
454#[inline]
455fn idx_to_id(idx: usize) -> Id {
456 Id::from_u64(idx as u64 + 1)
457}
458
459#[inline]
460fn id_to_idx(id: &Id) -> usize {
461 usize::try_from(id.into_u64()).unwrap() - 1
462}