Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2
3// Copyright (C) 2025 Google LLC.
4
5use core::mem::{size_of, size_of_val, MaybeUninit};
6use core::ops::Range;
7
8use kernel::{
9 bindings,
10 fs::file::{File, FileDescriptorReservation},
11 prelude::*,
12 sync::{aref::ARef, Arc},
13 transmute::{AsBytes, FromBytes},
14 uaccess::UserSliceReader,
15 uapi,
16};
17
18use crate::{
19 deferred_close::DeferredFdCloser,
20 defs::*,
21 node::{Node, NodeRef},
22 process::Process,
23 DArc,
24};
25
26#[derive(Default)]
27pub(crate) struct AllocationInfo {
28 /// Range within the allocation where we can find the offsets to the object descriptors.
29 pub(crate) offsets: Option<Range<usize>>,
30 /// The target node of the transaction this allocation is associated to.
31 /// Not set for replies.
32 pub(crate) target_node: Option<NodeRef>,
33 /// When this allocation is dropped, call `pending_oneway_finished` on the node.
34 ///
35 /// This is used to serialize oneway transaction on the same node. Binder guarantees that
36 /// oneway transactions to the same node are delivered sequentially in the order they are sent.
37 pub(crate) oneway_node: Option<DArc<Node>>,
38 /// Zero the data in the buffer on free.
39 pub(crate) clear_on_free: bool,
40 /// List of files embedded in this transaction.
41 file_list: FileList,
42}
43
44/// Represents an allocation that the kernel is currently using.
45///
46/// When allocations are idle, the range allocator holds the data related to them.
47///
48/// # Invariants
49///
50/// This allocation corresponds to an allocation in the range allocator, so the relevant pages are
51/// marked in use in the page range.
52pub(crate) struct Allocation {
53 pub(crate) offset: usize,
54 size: usize,
55 pub(crate) ptr: usize,
56 pub(crate) process: Arc<Process>,
57 allocation_info: Option<AllocationInfo>,
58 free_on_drop: bool,
59 pub(crate) oneway_spam_detected: bool,
60 #[allow(dead_code)]
61 pub(crate) debug_id: usize,
62}
63
64impl Allocation {
65 pub(crate) fn new(
66 process: Arc<Process>,
67 debug_id: usize,
68 offset: usize,
69 size: usize,
70 ptr: usize,
71 oneway_spam_detected: bool,
72 ) -> Self {
73 Self {
74 process,
75 offset,
76 size,
77 ptr,
78 debug_id,
79 oneway_spam_detected,
80 allocation_info: None,
81 free_on_drop: true,
82 }
83 }
84
85 fn size_check(&self, offset: usize, size: usize) -> Result {
86 let overflow_fail = offset.checked_add(size).is_none();
87 let cmp_size_fail = offset.wrapping_add(size) > self.size;
88 if overflow_fail || cmp_size_fail {
89 return Err(EFAULT);
90 }
91 Ok(())
92 }
93
94 pub(crate) fn copy_into(
95 &self,
96 reader: &mut UserSliceReader,
97 offset: usize,
98 size: usize,
99 ) -> Result {
100 self.size_check(offset, size)?;
101
102 // SAFETY: While this object exists, the range allocator will keep the range allocated, and
103 // in turn, the pages will be marked as in use.
104 unsafe {
105 self.process
106 .pages
107 .copy_from_user_slice(reader, self.offset + offset, size)
108 }
109 }
110
111 pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
112 self.size_check(offset, size_of::<T>())?;
113
114 // SAFETY: While this object exists, the range allocator will keep the range allocated, and
115 // in turn, the pages will be marked as in use.
116 unsafe { self.process.pages.read(self.offset + offset) }
117 }
118
119 pub(crate) fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
120 self.size_check(offset, size_of_val::<T>(obj))?;
121
122 // SAFETY: While this object exists, the range allocator will keep the range allocated, and
123 // in turn, the pages will be marked as in use.
124 unsafe { self.process.pages.write(self.offset + offset, obj) }
125 }
126
127 pub(crate) fn fill_zero(&self) -> Result {
128 // SAFETY: While this object exists, the range allocator will keep the range allocated, and
129 // in turn, the pages will be marked as in use.
130 unsafe { self.process.pages.fill_zero(self.offset, self.size) }
131 }
132
133 pub(crate) fn keep_alive(mut self) {
134 self.process
135 .buffer_make_freeable(self.offset, self.allocation_info.take());
136 self.free_on_drop = false;
137 }
138
139 pub(crate) fn set_info(&mut self, info: AllocationInfo) {
140 self.allocation_info = Some(info);
141 }
142
143 pub(crate) fn get_or_init_info(&mut self) -> &mut AllocationInfo {
144 self.allocation_info.get_or_insert_with(Default::default)
145 }
146
147 pub(crate) fn set_info_offsets(&mut self, offsets: Range<usize>) {
148 self.get_or_init_info().offsets = Some(offsets);
149 }
150
151 pub(crate) fn set_info_oneway_node(&mut self, oneway_node: DArc<Node>) {
152 self.get_or_init_info().oneway_node = Some(oneway_node);
153 }
154
155 pub(crate) fn set_info_clear_on_drop(&mut self) {
156 self.get_or_init_info().clear_on_free = true;
157 }
158
159 pub(crate) fn set_info_target_node(&mut self, target_node: NodeRef) {
160 self.get_or_init_info().target_node = Some(target_node);
161 }
162
163 /// Reserve enough space to push at least `num_fds` fds.
164 pub(crate) fn info_add_fd_reserve(&mut self, num_fds: usize) -> Result {
165 self.get_or_init_info()
166 .file_list
167 .files_to_translate
168 .reserve(num_fds, GFP_KERNEL)?;
169
170 Ok(())
171 }
172
173 pub(crate) fn info_add_fd(
174 &mut self,
175 file: ARef<File>,
176 buffer_offset: usize,
177 close_on_free: bool,
178 ) -> Result {
179 self.get_or_init_info().file_list.files_to_translate.push(
180 FileEntry {
181 file,
182 buffer_offset,
183 close_on_free,
184 },
185 GFP_KERNEL,
186 )?;
187
188 Ok(())
189 }
190
191 pub(crate) fn set_info_close_on_free(&mut self, cof: FdsCloseOnFree) {
192 self.get_or_init_info().file_list.close_on_free = cof.0;
193 }
194
195 pub(crate) fn translate_fds(&mut self) -> Result<TranslatedFds> {
196 let file_list = match self.allocation_info.as_mut() {
197 Some(info) => &mut info.file_list,
198 None => return Ok(TranslatedFds::new()),
199 };
200
201 let files = core::mem::take(&mut file_list.files_to_translate);
202
203 let num_close_on_free = files.iter().filter(|entry| entry.close_on_free).count();
204 let mut close_on_free = KVec::with_capacity(num_close_on_free, GFP_KERNEL)?;
205
206 let mut reservations = KVec::with_capacity(files.len(), GFP_KERNEL)?;
207 for file_info in files {
208 let res = FileDescriptorReservation::get_unused_fd_flags(bindings::O_CLOEXEC)?;
209 let fd = res.reserved_fd();
210 self.write::<u32>(file_info.buffer_offset, &fd)?;
211
212 reservations.push(
213 Reservation {
214 res,
215 file: file_info.file,
216 },
217 GFP_KERNEL,
218 )?;
219 if file_info.close_on_free {
220 close_on_free.push(fd, GFP_KERNEL)?;
221 }
222 }
223
224 Ok(TranslatedFds {
225 reservations,
226 close_on_free: FdsCloseOnFree(close_on_free),
227 })
228 }
229
230 /// Should the looper return to userspace when freeing this allocation?
231 pub(crate) fn looper_need_return_on_free(&self) -> bool {
232 // Closing fds involves pushing task_work for execution when we return to userspace. Hence,
233 // we should return to userspace asap if we are closing fds.
234 match self.allocation_info {
235 Some(ref info) => !info.file_list.close_on_free.is_empty(),
236 None => false,
237 }
238 }
239}
240
241impl Drop for Allocation {
242 fn drop(&mut self) {
243 if !self.free_on_drop {
244 return;
245 }
246
247 if let Some(mut info) = self.allocation_info.take() {
248 if let Some(oneway_node) = info.oneway_node.as_ref() {
249 oneway_node.pending_oneway_finished();
250 }
251
252 info.target_node = None;
253
254 if let Some(offsets) = info.offsets.clone() {
255 let view = AllocationView::new(self, offsets.start);
256 for i in offsets.step_by(size_of::<usize>()) {
257 if view.cleanup_object(i).is_err() {
258 pr_warn!("Error cleaning up object at offset {}\n", i)
259 }
260 }
261 }
262
263 for &fd in &info.file_list.close_on_free {
264 let closer = match DeferredFdCloser::new(GFP_KERNEL) {
265 Ok(closer) => closer,
266 Err(kernel::alloc::AllocError) => {
267 // Ignore allocation failures.
268 break;
269 }
270 };
271
272 // Here, we ignore errors. The operation can fail if the fd is not valid, or if the
273 // method is called from a kthread. However, this is always called from a syscall,
274 // so the latter case cannot happen, and we don't care about the first case.
275 let _ = closer.close_fd(fd);
276 }
277
278 if info.clear_on_free {
279 if let Err(e) = self.fill_zero() {
280 pr_warn!("Failed to clear data on free: {:?}", e);
281 }
282 }
283 }
284
285 self.process.buffer_raw_free(self.ptr);
286 }
287}
288
289/// A wrapper around `Allocation` that is being created.
290///
291/// If the allocation is destroyed while wrapped in this wrapper, then the allocation will be
292/// considered to be part of a failed transaction. Successful transactions avoid that by calling
293/// `success`, which skips the destructor.
294#[repr(transparent)]
295pub(crate) struct NewAllocation(pub(crate) Allocation);
296
297impl NewAllocation {
298 pub(crate) fn success(self) -> Allocation {
299 // This skips the destructor.
300 //
301 // SAFETY: This type is `#[repr(transparent)]`, so the layout matches.
302 unsafe { core::mem::transmute(self) }
303 }
304}
305
306impl core::ops::Deref for NewAllocation {
307 type Target = Allocation;
308 fn deref(&self) -> &Allocation {
309 &self.0
310 }
311}
312
313impl core::ops::DerefMut for NewAllocation {
314 fn deref_mut(&mut self) -> &mut Allocation {
315 &mut self.0
316 }
317}
318
319/// A view into the beginning of an allocation.
320///
321/// All attempts to read or write outside of the view will fail. To intentionally access outside of
322/// this view, use the `alloc` field of this struct directly.
323pub(crate) struct AllocationView<'a> {
324 pub(crate) alloc: &'a mut Allocation,
325 limit: usize,
326}
327
328impl<'a> AllocationView<'a> {
329 pub(crate) fn new(alloc: &'a mut Allocation, limit: usize) -> Self {
330 AllocationView { alloc, limit }
331 }
332
333 pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
334 if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit {
335 return Err(EINVAL);
336 }
337 self.alloc.read(offset)
338 }
339
340 pub(crate) fn write<T: AsBytes>(&self, offset: usize, obj: &T) -> Result {
341 if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit {
342 return Err(EINVAL);
343 }
344 self.alloc.write(offset, obj)
345 }
346
347 pub(crate) fn copy_into(
348 &self,
349 reader: &mut UserSliceReader,
350 offset: usize,
351 size: usize,
352 ) -> Result {
353 if offset.checked_add(size).ok_or(EINVAL)? > self.limit {
354 return Err(EINVAL);
355 }
356 self.alloc.copy_into(reader, offset, size)
357 }
358
359 pub(crate) fn transfer_binder_object(
360 &self,
361 offset: usize,
362 obj: &uapi::flat_binder_object,
363 strong: bool,
364 node_ref: NodeRef,
365 ) -> Result {
366 let mut newobj = FlatBinderObject::default();
367 let node = node_ref.node.clone();
368 if Arc::ptr_eq(&node_ref.node.owner, &self.alloc.process) {
369 // The receiving process is the owner of the node, so send it a binder object (instead
370 // of a handle).
371 let (ptr, cookie) = node.get_id();
372 newobj.hdr.type_ = if strong {
373 BINDER_TYPE_BINDER
374 } else {
375 BINDER_TYPE_WEAK_BINDER
376 };
377 newobj.flags = obj.flags;
378 newobj.__bindgen_anon_1.binder = ptr as _;
379 newobj.cookie = cookie as _;
380 self.write(offset, &newobj)?;
381 // Increment the user ref count on the node. It will be decremented as part of the
382 // destruction of the buffer, when we see a binder or weak-binder object.
383 node.update_refcount(true, 1, strong);
384 } else {
385 // The receiving process is different from the owner, so we need to insert a handle to
386 // the binder object.
387 let handle = self
388 .alloc
389 .process
390 .as_arc_borrow()
391 .insert_or_update_handle(node_ref, false)?;
392 newobj.hdr.type_ = if strong {
393 BINDER_TYPE_HANDLE
394 } else {
395 BINDER_TYPE_WEAK_HANDLE
396 };
397 newobj.flags = obj.flags;
398 newobj.__bindgen_anon_1.handle = handle;
399 if self.write(offset, &newobj).is_err() {
400 // Decrement ref count on the handle we just created.
401 let _ = self
402 .alloc
403 .process
404 .as_arc_borrow()
405 .update_ref(handle, false, strong);
406 return Err(EINVAL);
407 }
408 }
409
410 Ok(())
411 }
412
413 fn cleanup_object(&self, index_offset: usize) -> Result {
414 let offset = self.alloc.read(index_offset)?;
415 let header = self.read::<BinderObjectHeader>(offset)?;
416 match header.type_ {
417 BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => {
418 let obj = self.read::<FlatBinderObject>(offset)?;
419 let strong = header.type_ == BINDER_TYPE_BINDER;
420 // SAFETY: The type is `BINDER_TYPE_{WEAK_}BINDER`, so the `binder` field is
421 // populated.
422 let ptr = unsafe { obj.__bindgen_anon_1.binder };
423 let cookie = obj.cookie;
424 self.alloc.process.update_node(ptr, cookie, strong);
425 Ok(())
426 }
427 BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => {
428 let obj = self.read::<FlatBinderObject>(offset)?;
429 let strong = header.type_ == BINDER_TYPE_HANDLE;
430 // SAFETY: The type is `BINDER_TYPE_{WEAK_}HANDLE`, so the `handle` field is
431 // populated.
432 let handle = unsafe { obj.__bindgen_anon_1.handle };
433 self.alloc
434 .process
435 .as_arc_borrow()
436 .update_ref(handle, false, strong)
437 }
438 _ => Ok(()),
439 }
440 }
441}
442
443/// A binder object as it is serialized.
444///
445/// # Invariants
446///
447/// All bytes must be initialized, and the value of `self.hdr.type_` must be one of the allowed
448/// types.
449#[repr(C)]
450pub(crate) union BinderObject {
451 hdr: uapi::binder_object_header,
452 fbo: uapi::flat_binder_object,
453 fdo: uapi::binder_fd_object,
454 bbo: uapi::binder_buffer_object,
455 fdao: uapi::binder_fd_array_object,
456}
457
458/// A view into a `BinderObject` that can be used in a match statement.
459pub(crate) enum BinderObjectRef<'a> {
460 Binder(&'a mut uapi::flat_binder_object),
461 Handle(&'a mut uapi::flat_binder_object),
462 Fd(&'a mut uapi::binder_fd_object),
463 Ptr(&'a mut uapi::binder_buffer_object),
464 Fda(&'a mut uapi::binder_fd_array_object),
465}
466
467impl BinderObject {
468 pub(crate) fn read_from(reader: &mut UserSliceReader) -> Result<BinderObject> {
469 let object = Self::read_from_inner(|slice| {
470 let read_len = usize::min(slice.len(), reader.len());
471 reader.clone_reader().read_slice(&mut slice[..read_len])?;
472 Ok(())
473 })?;
474
475 // If we used a object type smaller than the largest object size, then we've read more
476 // bytes than we needed to. However, we used `.clone_reader()` to avoid advancing the
477 // original reader. Now, we call `skip` so that the caller's reader is advanced by the
478 // right amount.
479 //
480 // The `skip` call fails if the reader doesn't have `size` bytes available. This could
481 // happen if the type header corresponds to an object type that is larger than the rest of
482 // the reader.
483 //
484 // Any extra bytes beyond the size of the object are inaccessible after this call, so
485 // reading them again from the `reader` later does not result in TOCTOU bugs.
486 reader.skip(object.size())?;
487
488 Ok(object)
489 }
490
491 /// Use the provided reader closure to construct a `BinderObject`.
492 ///
493 /// The closure should write the bytes for the object into the provided slice.
494 pub(crate) fn read_from_inner<R>(reader: R) -> Result<BinderObject>
495 where
496 R: FnOnce(&mut [u8; size_of::<BinderObject>()]) -> Result<()>,
497 {
498 let mut obj = MaybeUninit::<BinderObject>::zeroed();
499
500 // SAFETY: The lengths of `BinderObject` and `[u8; size_of::<BinderObject>()]` are equal,
501 // and the byte array has an alignment requirement of one, so the pointer cast is okay.
502 // Additionally, `obj` was initialized to zeros, so the byte array will not be
503 // uninitialized.
504 (reader)(unsafe { &mut *obj.as_mut_ptr().cast() })?;
505
506 // SAFETY: The entire object is initialized, so accessing this field is safe.
507 let type_ = unsafe { obj.assume_init_ref().hdr.type_ };
508 if Self::type_to_size(type_).is_none() {
509 // The value of `obj.hdr_type_` was invalid.
510 return Err(EINVAL);
511 }
512
513 // SAFETY: All bytes are initialized (since we zeroed them at the start) and we checked
514 // that `self.hdr.type_` is one of the allowed types, so the type invariants are satisfied.
515 unsafe { Ok(obj.assume_init()) }
516 }
517
518 pub(crate) fn as_ref(&mut self) -> BinderObjectRef<'_> {
519 use BinderObjectRef::*;
520 // SAFETY: The constructor ensures that all bytes of `self` are initialized, and all
521 // variants of this union accept all initialized bit patterns.
522 unsafe {
523 match self.hdr.type_ {
524 BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => Binder(&mut self.fbo),
525 BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => Handle(&mut self.fbo),
526 BINDER_TYPE_FD => Fd(&mut self.fdo),
527 BINDER_TYPE_PTR => Ptr(&mut self.bbo),
528 BINDER_TYPE_FDA => Fda(&mut self.fdao),
529 // SAFETY: By the type invariant, the value of `self.hdr.type_` cannot have any
530 // other value than the ones checked above.
531 _ => core::hint::unreachable_unchecked(),
532 }
533 }
534 }
535
536 pub(crate) fn size(&self) -> usize {
537 // SAFETY: The entire object is initialized, so accessing this field is safe.
538 let type_ = unsafe { self.hdr.type_ };
539
540 // SAFETY: The type invariants guarantee that the type field is correct.
541 unsafe { Self::type_to_size(type_).unwrap_unchecked() }
542 }
543
544 fn type_to_size(type_: u32) -> Option<usize> {
545 match type_ {
546 BINDER_TYPE_WEAK_BINDER => Some(size_of::<uapi::flat_binder_object>()),
547 BINDER_TYPE_BINDER => Some(size_of::<uapi::flat_binder_object>()),
548 BINDER_TYPE_WEAK_HANDLE => Some(size_of::<uapi::flat_binder_object>()),
549 BINDER_TYPE_HANDLE => Some(size_of::<uapi::flat_binder_object>()),
550 BINDER_TYPE_FD => Some(size_of::<uapi::binder_fd_object>()),
551 BINDER_TYPE_PTR => Some(size_of::<uapi::binder_buffer_object>()),
552 BINDER_TYPE_FDA => Some(size_of::<uapi::binder_fd_array_object>()),
553 _ => None,
554 }
555 }
556}
557
558#[derive(Default)]
559struct FileList {
560 files_to_translate: KVec<FileEntry>,
561 close_on_free: KVec<u32>,
562}
563
564struct FileEntry {
565 /// The file for which a descriptor will be created in the recipient process.
566 file: ARef<File>,
567 /// The offset in the buffer where the file descriptor is stored.
568 buffer_offset: usize,
569 /// Whether this fd should be closed when the allocation is freed.
570 close_on_free: bool,
571}
572
573pub(crate) struct TranslatedFds {
574 reservations: KVec<Reservation>,
575 /// If commit is called, then these fds should be closed. (If commit is not called, then they
576 /// shouldn't be closed.)
577 close_on_free: FdsCloseOnFree,
578}
579
580struct Reservation {
581 res: FileDescriptorReservation,
582 file: ARef<File>,
583}
584
585impl TranslatedFds {
586 pub(crate) fn new() -> Self {
587 Self {
588 reservations: KVec::new(),
589 close_on_free: FdsCloseOnFree(KVec::new()),
590 }
591 }
592
593 pub(crate) fn commit(self) -> FdsCloseOnFree {
594 for entry in self.reservations {
595 entry.res.fd_install(entry.file);
596 }
597
598 self.close_on_free
599 }
600}
601
602pub(crate) struct FdsCloseOnFree(KVec<u32>);