rust_binder: add Rust Binder driver · tjh.dev/kernel@eafedbc

+14 -1

drivers/android/Kconfig

··· 14 14 Android process, using Binder to identify, invoke and pass arguments 15 15 between said processes. 16 16 17 + config ANDROID_BINDER_IPC_RUST 18 + bool "Rust version of Android Binder IPC Driver" 19 + depends on RUST && MMU && !ANDROID_BINDER_IPC 20 + help 21 + This enables the Rust implementation of the Binder driver. 22 + 23 + Binder is used in Android for both communication between processes, 24 + and remote method invocation. 25 + 26 + This means one Android process can call a method/routine in another 27 + Android process, using Binder to identify, invoke and pass arguments 28 + between said processes. 29 + 17 30 config ANDROID_BINDERFS 18 31 bool "Android Binderfs filesystem" 19 32 depends on ANDROID_BINDER_IPC ··· 41 28 42 29 config ANDROID_BINDER_DEVICES 43 30 string "Android Binder devices" 44 - depends on ANDROID_BINDER_IPC 31 + depends on ANDROID_BINDER_IPC || ANDROID_BINDER_IPC_RUST 45 32 default "binder,hwbinder,vndbinder" 46 33 help 47 34 Default value for the binder.devices parameter.

+1

drivers/android/Makefile

··· 4 4 obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o 5 5 obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o binder_netlink.o 6 6 obj-$(CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST) += tests/ 7 + obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += binder/

+9

drivers/android/binder/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + ccflags-y += -I$(src) # needed for trace events 3 + 4 + obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += rust_binder.o 5 + rust_binder-y := \ 6 + rust_binder_main.o \ 7 + rust_binderfs.o \ 8 + rust_binder_events.o \ 9 + page_range_helper.o

+602

drivers/android/binder/allocation.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use core::mem::{size_of, size_of_val, MaybeUninit}; 6 + use core::ops::Range; 7 + 8 + use kernel::{ 9 + bindings, 10 + fs::file::{File, FileDescriptorReservation}, 11 + prelude::*, 12 + sync::{aref::ARef, Arc}, 13 + transmute::{AsBytes, FromBytes}, 14 + uaccess::UserSliceReader, 15 + uapi, 16 + }; 17 + 18 + use crate::{ 19 + deferred_close::DeferredFdCloser, 20 + defs::*, 21 + node::{Node, NodeRef}, 22 + process::Process, 23 + DArc, 24 + }; 25 + 26 + #[derive(Default)] 27 + pub(crate) struct AllocationInfo { 28 + /// Range within the allocation where we can find the offsets to the object descriptors. 29 + pub(crate) offsets: Option<Range<usize>>, 30 + /// The target node of the transaction this allocation is associated to. 31 + /// Not set for replies. 32 + pub(crate) target_node: Option<NodeRef>, 33 + /// When this allocation is dropped, call `pending_oneway_finished` on the node. 34 + /// 35 + /// This is used to serialize oneway transaction on the same node. Binder guarantees that 36 + /// oneway transactions to the same node are delivered sequentially in the order they are sent. 37 + pub(crate) oneway_node: Option<DArc<Node>>, 38 + /// Zero the data in the buffer on free. 39 + pub(crate) clear_on_free: bool, 40 + /// List of files embedded in this transaction. 41 + file_list: FileList, 42 + } 43 + 44 + /// Represents an allocation that the kernel is currently using. 45 + /// 46 + /// When allocations are idle, the range allocator holds the data related to them. 47 + /// 48 + /// # Invariants 49 + /// 50 + /// This allocation corresponds to an allocation in the range allocator, so the relevant pages are 51 + /// marked in use in the page range. 52 + pub(crate) struct Allocation { 53 + pub(crate) offset: usize, 54 + size: usize, 55 + pub(crate) ptr: usize, 56 + pub(crate) process: Arc<Process>, 57 + allocation_info: Option<AllocationInfo>, 58 + free_on_drop: bool, 59 + pub(crate) oneway_spam_detected: bool, 60 + #[allow(dead_code)] 61 + pub(crate) debug_id: usize, 62 + } 63 + 64 + impl Allocation { 65 + pub(crate) fn new( 66 + process: Arc<Process>, 67 + debug_id: usize, 68 + offset: usize, 69 + size: usize, 70 + ptr: usize, 71 + oneway_spam_detected: bool, 72 + ) -> Self { 73 + Self { 74 + process, 75 + offset, 76 + size, 77 + ptr, 78 + debug_id, 79 + oneway_spam_detected, 80 + allocation_info: None, 81 + free_on_drop: true, 82 + } 83 + } 84 + 85 + fn size_check(&self, offset: usize, size: usize) -> Result { 86 + let overflow_fail = offset.checked_add(size).is_none(); 87 + let cmp_size_fail = offset.wrapping_add(size) > self.size; 88 + if overflow_fail || cmp_size_fail { 89 + return Err(EFAULT); 90 + } 91 + Ok(()) 92 + } 93 + 94 + pub(crate) fn copy_into( 95 + &self, 96 + reader: &mut UserSliceReader, 97 + offset: usize, 98 + size: usize, 99 + ) -> Result { 100 + self.size_check(offset, size)?; 101 + 102 + // SAFETY: While this object exists, the range allocator will keep the range allocated, and 103 + // in turn, the pages will be marked as in use. 104 + unsafe { 105 + self.process 106 + .pages 107 + .copy_from_user_slice(reader, self.offset + offset, size) 108 + } 109 + } 110 + 111 + pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 112 + self.size_check(offset, size_of::<T>())?; 113 + 114 + // SAFETY: While this object exists, the range allocator will keep the range allocated, and 115 + // in turn, the pages will be marked as in use. 116 + unsafe { self.process.pages.read(self.offset + offset) } 117 + } 118 + 119 + pub(crate) fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { 120 + self.size_check(offset, size_of_val::<T>(obj))?; 121 + 122 + // SAFETY: While this object exists, the range allocator will keep the range allocated, and 123 + // in turn, the pages will be marked as in use. 124 + unsafe { self.process.pages.write(self.offset + offset, obj) } 125 + } 126 + 127 + pub(crate) fn fill_zero(&self) -> Result { 128 + // SAFETY: While this object exists, the range allocator will keep the range allocated, and 129 + // in turn, the pages will be marked as in use. 130 + unsafe { self.process.pages.fill_zero(self.offset, self.size) } 131 + } 132 + 133 + pub(crate) fn keep_alive(mut self) { 134 + self.process 135 + .buffer_make_freeable(self.offset, self.allocation_info.take()); 136 + self.free_on_drop = false; 137 + } 138 + 139 + pub(crate) fn set_info(&mut self, info: AllocationInfo) { 140 + self.allocation_info = Some(info); 141 + } 142 + 143 + pub(crate) fn get_or_init_info(&mut self) -> &mut AllocationInfo { 144 + self.allocation_info.get_or_insert_with(Default::default) 145 + } 146 + 147 + pub(crate) fn set_info_offsets(&mut self, offsets: Range<usize>) { 148 + self.get_or_init_info().offsets = Some(offsets); 149 + } 150 + 151 + pub(crate) fn set_info_oneway_node(&mut self, oneway_node: DArc<Node>) { 152 + self.get_or_init_info().oneway_node = Some(oneway_node); 153 + } 154 + 155 + pub(crate) fn set_info_clear_on_drop(&mut self) { 156 + self.get_or_init_info().clear_on_free = true; 157 + } 158 + 159 + pub(crate) fn set_info_target_node(&mut self, target_node: NodeRef) { 160 + self.get_or_init_info().target_node = Some(target_node); 161 + } 162 + 163 + /// Reserve enough space to push at least `num_fds` fds. 164 + pub(crate) fn info_add_fd_reserve(&mut self, num_fds: usize) -> Result { 165 + self.get_or_init_info() 166 + .file_list 167 + .files_to_translate 168 + .reserve(num_fds, GFP_KERNEL)?; 169 + 170 + Ok(()) 171 + } 172 + 173 + pub(crate) fn info_add_fd( 174 + &mut self, 175 + file: ARef<File>, 176 + buffer_offset: usize, 177 + close_on_free: bool, 178 + ) -> Result { 179 + self.get_or_init_info().file_list.files_to_translate.push( 180 + FileEntry { 181 + file, 182 + buffer_offset, 183 + close_on_free, 184 + }, 185 + GFP_KERNEL, 186 + )?; 187 + 188 + Ok(()) 189 + } 190 + 191 + pub(crate) fn set_info_close_on_free(&mut self, cof: FdsCloseOnFree) { 192 + self.get_or_init_info().file_list.close_on_free = cof.0; 193 + } 194 + 195 + pub(crate) fn translate_fds(&mut self) -> Result<TranslatedFds> { 196 + let file_list = match self.allocation_info.as_mut() { 197 + Some(info) => &mut info.file_list, 198 + None => return Ok(TranslatedFds::new()), 199 + }; 200 + 201 + let files = core::mem::take(&mut file_list.files_to_translate); 202 + 203 + let num_close_on_free = files.iter().filter(|entry| entry.close_on_free).count(); 204 + let mut close_on_free = KVec::with_capacity(num_close_on_free, GFP_KERNEL)?; 205 + 206 + let mut reservations = KVec::with_capacity(files.len(), GFP_KERNEL)?; 207 + for file_info in files { 208 + let res = FileDescriptorReservation::get_unused_fd_flags(bindings::O_CLOEXEC)?; 209 + let fd = res.reserved_fd(); 210 + self.write::<u32>(file_info.buffer_offset, &fd)?; 211 + 212 + reservations.push( 213 + Reservation { 214 + res, 215 + file: file_info.file, 216 + }, 217 + GFP_KERNEL, 218 + )?; 219 + if file_info.close_on_free { 220 + close_on_free.push(fd, GFP_KERNEL)?; 221 + } 222 + } 223 + 224 + Ok(TranslatedFds { 225 + reservations, 226 + close_on_free: FdsCloseOnFree(close_on_free), 227 + }) 228 + } 229 + 230 + /// Should the looper return to userspace when freeing this allocation? 231 + pub(crate) fn looper_need_return_on_free(&self) -> bool { 232 + // Closing fds involves pushing task_work for execution when we return to userspace. Hence, 233 + // we should return to userspace asap if we are closing fds. 234 + match self.allocation_info { 235 + Some(ref info) => !info.file_list.close_on_free.is_empty(), 236 + None => false, 237 + } 238 + } 239 + } 240 + 241 + impl Drop for Allocation { 242 + fn drop(&mut self) { 243 + if !self.free_on_drop { 244 + return; 245 + } 246 + 247 + if let Some(mut info) = self.allocation_info.take() { 248 + if let Some(oneway_node) = info.oneway_node.as_ref() { 249 + oneway_node.pending_oneway_finished(); 250 + } 251 + 252 + info.target_node = None; 253 + 254 + if let Some(offsets) = info.offsets.clone() { 255 + let view = AllocationView::new(self, offsets.start); 256 + for i in offsets.step_by(size_of::<usize>()) { 257 + if view.cleanup_object(i).is_err() { 258 + pr_warn!("Error cleaning up object at offset {}\n", i) 259 + } 260 + } 261 + } 262 + 263 + for &fd in &info.file_list.close_on_free { 264 + let closer = match DeferredFdCloser::new(GFP_KERNEL) { 265 + Ok(closer) => closer, 266 + Err(kernel::alloc::AllocError) => { 267 + // Ignore allocation failures. 268 + break; 269 + } 270 + }; 271 + 272 + // Here, we ignore errors. The operation can fail if the fd is not valid, or if the 273 + // method is called from a kthread. However, this is always called from a syscall, 274 + // so the latter case cannot happen, and we don't care about the first case. 275 + let _ = closer.close_fd(fd); 276 + } 277 + 278 + if info.clear_on_free { 279 + if let Err(e) = self.fill_zero() { 280 + pr_warn!("Failed to clear data on free: {:?}", e); 281 + } 282 + } 283 + } 284 + 285 + self.process.buffer_raw_free(self.ptr); 286 + } 287 + } 288 + 289 + /// A wrapper around `Allocation` that is being created. 290 + /// 291 + /// If the allocation is destroyed while wrapped in this wrapper, then the allocation will be 292 + /// considered to be part of a failed transaction. Successful transactions avoid that by calling 293 + /// `success`, which skips the destructor. 294 + #[repr(transparent)] 295 + pub(crate) struct NewAllocation(pub(crate) Allocation); 296 + 297 + impl NewAllocation { 298 + pub(crate) fn success(self) -> Allocation { 299 + // This skips the destructor. 300 + // 301 + // SAFETY: This type is `#[repr(transparent)]`, so the layout matches. 302 + unsafe { core::mem::transmute(self) } 303 + } 304 + } 305 + 306 + impl core::ops::Deref for NewAllocation { 307 + type Target = Allocation; 308 + fn deref(&self) -> &Allocation { 309 + &self.0 310 + } 311 + } 312 + 313 + impl core::ops::DerefMut for NewAllocation { 314 + fn deref_mut(&mut self) -> &mut Allocation { 315 + &mut self.0 316 + } 317 + } 318 + 319 + /// A view into the beginning of an allocation. 320 + /// 321 + /// All attempts to read or write outside of the view will fail. To intentionally access outside of 322 + /// this view, use the `alloc` field of this struct directly. 323 + pub(crate) struct AllocationView<'a> { 324 + pub(crate) alloc: &'a mut Allocation, 325 + limit: usize, 326 + } 327 + 328 + impl<'a> AllocationView<'a> { 329 + pub(crate) fn new(alloc: &'a mut Allocation, limit: usize) -> Self { 330 + AllocationView { alloc, limit } 331 + } 332 + 333 + pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 334 + if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit { 335 + return Err(EINVAL); 336 + } 337 + self.alloc.read(offset) 338 + } 339 + 340 + pub(crate) fn write<T: AsBytes>(&self, offset: usize, obj: &T) -> Result { 341 + if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit { 342 + return Err(EINVAL); 343 + } 344 + self.alloc.write(offset, obj) 345 + } 346 + 347 + pub(crate) fn copy_into( 348 + &self, 349 + reader: &mut UserSliceReader, 350 + offset: usize, 351 + size: usize, 352 + ) -> Result { 353 + if offset.checked_add(size).ok_or(EINVAL)? > self.limit { 354 + return Err(EINVAL); 355 + } 356 + self.alloc.copy_into(reader, offset, size) 357 + } 358 + 359 + pub(crate) fn transfer_binder_object( 360 + &self, 361 + offset: usize, 362 + obj: &uapi::flat_binder_object, 363 + strong: bool, 364 + node_ref: NodeRef, 365 + ) -> Result { 366 + let mut newobj = FlatBinderObject::default(); 367 + let node = node_ref.node.clone(); 368 + if Arc::ptr_eq(&node_ref.node.owner, &self.alloc.process) { 369 + // The receiving process is the owner of the node, so send it a binder object (instead 370 + // of a handle). 371 + let (ptr, cookie) = node.get_id(); 372 + newobj.hdr.type_ = if strong { 373 + BINDER_TYPE_BINDER 374 + } else { 375 + BINDER_TYPE_WEAK_BINDER 376 + }; 377 + newobj.flags = obj.flags; 378 + newobj.__bindgen_anon_1.binder = ptr as _; 379 + newobj.cookie = cookie as _; 380 + self.write(offset, &newobj)?; 381 + // Increment the user ref count on the node. It will be decremented as part of the 382 + // destruction of the buffer, when we see a binder or weak-binder object. 383 + node.update_refcount(true, 1, strong); 384 + } else { 385 + // The receiving process is different from the owner, so we need to insert a handle to 386 + // the binder object. 387 + let handle = self 388 + .alloc 389 + .process 390 + .as_arc_borrow() 391 + .insert_or_update_handle(node_ref, false)?; 392 + newobj.hdr.type_ = if strong { 393 + BINDER_TYPE_HANDLE 394 + } else { 395 + BINDER_TYPE_WEAK_HANDLE 396 + }; 397 + newobj.flags = obj.flags; 398 + newobj.__bindgen_anon_1.handle = handle; 399 + if self.write(offset, &newobj).is_err() { 400 + // Decrement ref count on the handle we just created. 401 + let _ = self 402 + .alloc 403 + .process 404 + .as_arc_borrow() 405 + .update_ref(handle, false, strong); 406 + return Err(EINVAL); 407 + } 408 + } 409 + 410 + Ok(()) 411 + } 412 + 413 + fn cleanup_object(&self, index_offset: usize) -> Result { 414 + let offset = self.alloc.read(index_offset)?; 415 + let header = self.read::<BinderObjectHeader>(offset)?; 416 + match header.type_ { 417 + BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => { 418 + let obj = self.read::<FlatBinderObject>(offset)?; 419 + let strong = header.type_ == BINDER_TYPE_BINDER; 420 + // SAFETY: The type is `BINDER_TYPE_{WEAK_}BINDER`, so the `binder` field is 421 + // populated. 422 + let ptr = unsafe { obj.__bindgen_anon_1.binder }; 423 + let cookie = obj.cookie; 424 + self.alloc.process.update_node(ptr, cookie, strong); 425 + Ok(()) 426 + } 427 + BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => { 428 + let obj = self.read::<FlatBinderObject>(offset)?; 429 + let strong = header.type_ == BINDER_TYPE_HANDLE; 430 + // SAFETY: The type is `BINDER_TYPE_{WEAK_}HANDLE`, so the `handle` field is 431 + // populated. 432 + let handle = unsafe { obj.__bindgen_anon_1.handle }; 433 + self.alloc 434 + .process 435 + .as_arc_borrow() 436 + .update_ref(handle, false, strong) 437 + } 438 + _ => Ok(()), 439 + } 440 + } 441 + } 442 + 443 + /// A binder object as it is serialized. 444 + /// 445 + /// # Invariants 446 + /// 447 + /// All bytes must be initialized, and the value of `self.hdr.type_` must be one of the allowed 448 + /// types. 449 + #[repr(C)] 450 + pub(crate) union BinderObject { 451 + hdr: uapi::binder_object_header, 452 + fbo: uapi::flat_binder_object, 453 + fdo: uapi::binder_fd_object, 454 + bbo: uapi::binder_buffer_object, 455 + fdao: uapi::binder_fd_array_object, 456 + } 457 + 458 + /// A view into a `BinderObject` that can be used in a match statement. 459 + pub(crate) enum BinderObjectRef<'a> { 460 + Binder(&'a mut uapi::flat_binder_object), 461 + Handle(&'a mut uapi::flat_binder_object), 462 + Fd(&'a mut uapi::binder_fd_object), 463 + Ptr(&'a mut uapi::binder_buffer_object), 464 + Fda(&'a mut uapi::binder_fd_array_object), 465 + } 466 + 467 + impl BinderObject { 468 + pub(crate) fn read_from(reader: &mut UserSliceReader) -> Result<BinderObject> { 469 + let object = Self::read_from_inner(|slice| { 470 + let read_len = usize::min(slice.len(), reader.len()); 471 + reader.clone_reader().read_slice(&mut slice[..read_len])?; 472 + Ok(()) 473 + })?; 474 + 475 + // If we used a object type smaller than the largest object size, then we've read more 476 + // bytes than we needed to. However, we used `.clone_reader()` to avoid advancing the 477 + // original reader. Now, we call `skip` so that the caller's reader is advanced by the 478 + // right amount. 479 + // 480 + // The `skip` call fails if the reader doesn't have `size` bytes available. This could 481 + // happen if the type header corresponds to an object type that is larger than the rest of 482 + // the reader. 483 + // 484 + // Any extra bytes beyond the size of the object are inaccessible after this call, so 485 + // reading them again from the `reader` later does not result in TOCTOU bugs. 486 + reader.skip(object.size())?; 487 + 488 + Ok(object) 489 + } 490 + 491 + /// Use the provided reader closure to construct a `BinderObject`. 492 + /// 493 + /// The closure should write the bytes for the object into the provided slice. 494 + pub(crate) fn read_from_inner<R>(reader: R) -> Result<BinderObject> 495 + where 496 + R: FnOnce(&mut [u8; size_of::<BinderObject>()]) -> Result<()>, 497 + { 498 + let mut obj = MaybeUninit::<BinderObject>::zeroed(); 499 + 500 + // SAFETY: The lengths of `BinderObject` and `[u8; size_of::<BinderObject>()]` are equal, 501 + // and the byte array has an alignment requirement of one, so the pointer cast is okay. 502 + // Additionally, `obj` was initialized to zeros, so the byte array will not be 503 + // uninitialized. 504 + (reader)(unsafe { &mut *obj.as_mut_ptr().cast() })?; 505 + 506 + // SAFETY: The entire object is initialized, so accessing this field is safe. 507 + let type_ = unsafe { obj.assume_init_ref().hdr.type_ }; 508 + if Self::type_to_size(type_).is_none() { 509 + // The value of `obj.hdr_type_` was invalid. 510 + return Err(EINVAL); 511 + } 512 + 513 + // SAFETY: All bytes are initialized (since we zeroed them at the start) and we checked 514 + // that `self.hdr.type_` is one of the allowed types, so the type invariants are satisfied. 515 + unsafe { Ok(obj.assume_init()) } 516 + } 517 + 518 + pub(crate) fn as_ref(&mut self) -> BinderObjectRef<'_> { 519 + use BinderObjectRef::*; 520 + // SAFETY: The constructor ensures that all bytes of `self` are initialized, and all 521 + // variants of this union accept all initialized bit patterns. 522 + unsafe { 523 + match self.hdr.type_ { 524 + BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => Binder(&mut self.fbo), 525 + BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => Handle(&mut self.fbo), 526 + BINDER_TYPE_FD => Fd(&mut self.fdo), 527 + BINDER_TYPE_PTR => Ptr(&mut self.bbo), 528 + BINDER_TYPE_FDA => Fda(&mut self.fdao), 529 + // SAFETY: By the type invariant, the value of `self.hdr.type_` cannot have any 530 + // other value than the ones checked above. 531 + _ => core::hint::unreachable_unchecked(), 532 + } 533 + } 534 + } 535 + 536 + pub(crate) fn size(&self) -> usize { 537 + // SAFETY: The entire object is initialized, so accessing this field is safe. 538 + let type_ = unsafe { self.hdr.type_ }; 539 + 540 + // SAFETY: The type invariants guarantee that the type field is correct. 541 + unsafe { Self::type_to_size(type_).unwrap_unchecked() } 542 + } 543 + 544 + fn type_to_size(type_: u32) -> Option<usize> { 545 + match type_ { 546 + BINDER_TYPE_WEAK_BINDER => Some(size_of::<uapi::flat_binder_object>()), 547 + BINDER_TYPE_BINDER => Some(size_of::<uapi::flat_binder_object>()), 548 + BINDER_TYPE_WEAK_HANDLE => Some(size_of::<uapi::flat_binder_object>()), 549 + BINDER_TYPE_HANDLE => Some(size_of::<uapi::flat_binder_object>()), 550 + BINDER_TYPE_FD => Some(size_of::<uapi::binder_fd_object>()), 551 + BINDER_TYPE_PTR => Some(size_of::<uapi::binder_buffer_object>()), 552 + BINDER_TYPE_FDA => Some(size_of::<uapi::binder_fd_array_object>()), 553 + _ => None, 554 + } 555 + } 556 + } 557 + 558 + #[derive(Default)] 559 + struct FileList { 560 + files_to_translate: KVec<FileEntry>, 561 + close_on_free: KVec<u32>, 562 + } 563 + 564 + struct FileEntry { 565 + /// The file for which a descriptor will be created in the recipient process. 566 + file: ARef<File>, 567 + /// The offset in the buffer where the file descriptor is stored. 568 + buffer_offset: usize, 569 + /// Whether this fd should be closed when the allocation is freed. 570 + close_on_free: bool, 571 + } 572 + 573 + pub(crate) struct TranslatedFds { 574 + reservations: KVec<Reservation>, 575 + /// If commit is called, then these fds should be closed. (If commit is not called, then they 576 + /// shouldn't be closed.) 577 + close_on_free: FdsCloseOnFree, 578 + } 579 + 580 + struct Reservation { 581 + res: FileDescriptorReservation, 582 + file: ARef<File>, 583 + } 584 + 585 + impl TranslatedFds { 586 + pub(crate) fn new() -> Self { 587 + Self { 588 + reservations: KVec::new(), 589 + close_on_free: FdsCloseOnFree(KVec::new()), 590 + } 591 + } 592 + 593 + pub(crate) fn commit(self) -> FdsCloseOnFree { 594 + for entry in self.reservations { 595 + entry.res.fd_install(entry.file); 596 + } 597 + 598 + self.close_on_free 599 + } 600 + } 601 + 602 + pub(crate) struct FdsCloseOnFree(KVec<u32>);

+180

drivers/android/binder/context.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{ 6 + error::Error, 7 + list::{List, ListArc, ListLinks}, 8 + prelude::*, 9 + security, 10 + str::{CStr, CString}, 11 + sync::{Arc, Mutex}, 12 + task::Kuid, 13 + }; 14 + 15 + use crate::{error::BinderError, node::NodeRef, process::Process}; 16 + 17 + kernel::sync::global_lock! { 18 + // SAFETY: We call `init` in the module initializer, so it's initialized before first use. 19 + pub(crate) unsafe(uninit) static CONTEXTS: Mutex<ContextList> = ContextList { 20 + list: List::new(), 21 + }; 22 + } 23 + 24 + pub(crate) struct ContextList { 25 + list: List<Context>, 26 + } 27 + 28 + pub(crate) fn get_all_contexts() -> Result<KVec<Arc<Context>>> { 29 + let lock = CONTEXTS.lock(); 30 + 31 + let count = lock.list.iter().count(); 32 + 33 + let mut ctxs = KVec::with_capacity(count, GFP_KERNEL)?; 34 + for ctx in &lock.list { 35 + ctxs.push(Arc::from(ctx), GFP_KERNEL)?; 36 + } 37 + Ok(ctxs) 38 + } 39 + 40 + /// This struct keeps track of the processes using this context, and which process is the context 41 + /// manager. 42 + struct Manager { 43 + node: Option<NodeRef>, 44 + uid: Option<Kuid>, 45 + all_procs: List<Process>, 46 + } 47 + 48 + /// There is one context per binder file (/dev/binder, /dev/hwbinder, etc) 49 + #[pin_data] 50 + pub(crate) struct Context { 51 + #[pin] 52 + manager: Mutex<Manager>, 53 + pub(crate) name: CString, 54 + #[pin] 55 + links: ListLinks, 56 + } 57 + 58 + kernel::list::impl_list_arc_safe! { 59 + impl ListArcSafe<0> for Context { untracked; } 60 + } 61 + kernel::list::impl_list_item! { 62 + impl ListItem<0> for Context { 63 + using ListLinks { self.links }; 64 + } 65 + } 66 + 67 + impl Context { 68 + pub(crate) fn new(name: &CStr) -> Result<Arc<Self>> { 69 + let name = CString::try_from(name)?; 70 + let list_ctx = ListArc::pin_init::<Error>( 71 + try_pin_init!(Context { 72 + name, 73 + links <- ListLinks::new(), 74 + manager <- kernel::new_mutex!(Manager { 75 + all_procs: List::new(), 76 + node: None, 77 + uid: None, 78 + }, "Context::manager"), 79 + }), 80 + GFP_KERNEL, 81 + )?; 82 + 83 + let ctx = list_ctx.clone_arc(); 84 + CONTEXTS.lock().list.push_back(list_ctx); 85 + 86 + Ok(ctx) 87 + } 88 + 89 + /// Called when the file for this context is unlinked. 90 + /// 91 + /// No-op if called twice. 92 + pub(crate) fn deregister(&self) { 93 + // SAFETY: We never add the context to any other linked list than this one, so it is either 94 + // in this list, or not in any list. 95 + unsafe { CONTEXTS.lock().list.remove(self) }; 96 + } 97 + 98 + pub(crate) fn register_process(self: &Arc<Self>, proc: ListArc<Process>) { 99 + if !Arc::ptr_eq(self, &proc.ctx) { 100 + pr_err!("Context::register_process called on the wrong context."); 101 + return; 102 + } 103 + self.manager.lock().all_procs.push_back(proc); 104 + } 105 + 106 + pub(crate) fn deregister_process(self: &Arc<Self>, proc: &Process) { 107 + if !Arc::ptr_eq(self, &proc.ctx) { 108 + pr_err!("Context::deregister_process called on the wrong context."); 109 + return; 110 + } 111 + // SAFETY: We just checked that this is the right list. 112 + unsafe { self.manager.lock().all_procs.remove(proc) }; 113 + } 114 + 115 + pub(crate) fn set_manager_node(&self, node_ref: NodeRef) -> Result { 116 + let mut manager = self.manager.lock(); 117 + if manager.node.is_some() { 118 + pr_warn!("BINDER_SET_CONTEXT_MGR already set"); 119 + return Err(EBUSY); 120 + } 121 + security::binder_set_context_mgr(&node_ref.node.owner.cred)?; 122 + 123 + // If the context manager has been set before, ensure that we use the same euid. 124 + let caller_uid = Kuid::current_euid(); 125 + if let Some(ref uid) = manager.uid { 126 + if *uid != caller_uid { 127 + return Err(EPERM); 128 + } 129 + } 130 + 131 + manager.node = Some(node_ref); 132 + manager.uid = Some(caller_uid); 133 + Ok(()) 134 + } 135 + 136 + pub(crate) fn unset_manager_node(&self) { 137 + let node_ref = self.manager.lock().node.take(); 138 + drop(node_ref); 139 + } 140 + 141 + pub(crate) fn get_manager_node(&self, strong: bool) -> Result<NodeRef, BinderError> { 142 + self.manager 143 + .lock() 144 + .node 145 + .as_ref() 146 + .ok_or_else(BinderError::new_dead)? 147 + .clone(strong) 148 + .map_err(BinderError::from) 149 + } 150 + 151 + pub(crate) fn for_each_proc<F>(&self, mut func: F) 152 + where 153 + F: FnMut(&Process), 154 + { 155 + let lock = self.manager.lock(); 156 + for proc in &lock.all_procs { 157 + func(&proc); 158 + } 159 + } 160 + 161 + pub(crate) fn get_all_procs(&self) -> Result<KVec<Arc<Process>>> { 162 + let lock = self.manager.lock(); 163 + let count = lock.all_procs.iter().count(); 164 + 165 + let mut procs = KVec::with_capacity(count, GFP_KERNEL)?; 166 + for proc in &lock.all_procs { 167 + procs.push(Arc::from(proc), GFP_KERNEL)?; 168 + } 169 + Ok(procs) 170 + } 171 + 172 + pub(crate) fn get_procs_with_pid(&self, pid: i32) -> Result<KVec<Arc<Process>>> { 173 + let orig = self.get_all_procs()?; 174 + let mut backing = KVec::with_capacity(orig.len(), GFP_KERNEL)?; 175 + for proc in orig.into_iter().filter(|proc| proc.task.pid() == pid) { 176 + backing.push(proc, GFP_KERNEL)?; 177 + } 178 + Ok(backing) 179 + } 180 + }

+204

drivers/android/binder/deferred_close.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + //! Logic for closing files in a deferred manner. 6 + //! 7 + //! This file could make sense to have in `kernel::fs`, but it was rejected for being too 8 + //! Binder-specific. 9 + 10 + use core::mem::MaybeUninit; 11 + use kernel::{ 12 + alloc::{AllocError, Flags}, 13 + bindings, 14 + prelude::*, 15 + }; 16 + 17 + /// Helper used for closing file descriptors in a way that is safe even if the file is currently 18 + /// held using `fdget`. 19 + /// 20 + /// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to 21 + /// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`. 22 + pub(crate) struct DeferredFdCloser { 23 + inner: KBox<DeferredFdCloserInner>, 24 + } 25 + 26 + /// SAFETY: This just holds an allocation with no real content, so there's no safety issue with 27 + /// moving it across threads. 28 + unsafe impl Send for DeferredFdCloser {} 29 + /// SAFETY: This just holds an allocation with no real content, so there's no safety issue with 30 + /// moving it across threads. 31 + unsafe impl Sync for DeferredFdCloser {} 32 + 33 + /// # Invariants 34 + /// 35 + /// If the `file` pointer is non-null, then it points at a `struct file` and owns a refcount to 36 + /// that file. 37 + #[repr(C)] 38 + struct DeferredFdCloserInner { 39 + twork: MaybeUninit<bindings::callback_head>, 40 + file: *mut bindings::file, 41 + } 42 + 43 + impl DeferredFdCloser { 44 + /// Create a new [`DeferredFdCloser`]. 45 + pub(crate) fn new(flags: Flags) -> Result<Self, AllocError> { 46 + Ok(Self { 47 + // INVARIANT: The `file` pointer is null, so the type invariant does not apply. 48 + inner: KBox::new( 49 + DeferredFdCloserInner { 50 + twork: MaybeUninit::uninit(), 51 + file: core::ptr::null_mut(), 52 + }, 53 + flags, 54 + )?, 55 + }) 56 + } 57 + 58 + /// Schedule a task work that closes the file descriptor when this task returns to userspace. 59 + /// 60 + /// Fails if this is called from a context where we cannot run work when returning to 61 + /// userspace. (E.g., from a kthread.) 62 + pub(crate) fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> { 63 + use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME; 64 + 65 + // In this method, we schedule the task work before closing the file. This is because 66 + // scheduling a task work is fallible, and we need to know whether it will fail before we 67 + // attempt to close the file. 68 + 69 + // Task works are not available on kthreads. 70 + let current = kernel::current!(); 71 + 72 + // Check if this is a kthread. 73 + // SAFETY: Reading `flags` from a task is always okay. 74 + if unsafe { ((*current.as_ptr()).flags & bindings::PF_KTHREAD) != 0 } { 75 + return Err(DeferredFdCloseError::TaskWorkUnavailable); 76 + } 77 + 78 + // Transfer ownership of the box's allocation to a raw pointer. This disables the 79 + // destructor, so we must manually convert it back to a KBox to drop it. 80 + // 81 + // Until we convert it back to a `KBox`, there are no aliasing requirements on this 82 + // pointer. 83 + let inner = KBox::into_raw(self.inner); 84 + 85 + // The `callback_head` field is first in the struct, so this cast correctly gives us a 86 + // pointer to the field. 87 + let callback_head = inner.cast::<bindings::callback_head>(); 88 + // SAFETY: This pointer offset operation does not go out-of-bounds. 89 + let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) }; 90 + 91 + let current = current.as_ptr(); 92 + 93 + // SAFETY: This function currently has exclusive access to the `DeferredFdCloserInner`, so 94 + // it is okay for us to perform unsynchronized writes to its `callback_head` field. 95 + unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) }; 96 + 97 + // SAFETY: This inserts the `DeferredFdCloserInner` into the task workqueue for the current 98 + // task. If this operation is successful, then this transfers exclusive ownership of the 99 + // `callback_head` field to the C side until it calls `do_close_fd`, and we don't touch or 100 + // invalidate the field during that time. 101 + // 102 + // When the C side calls `do_close_fd`, the safety requirements of that method are 103 + // satisfied because when a task work is executed, the callback is given ownership of the 104 + // pointer. 105 + // 106 + // The file pointer is currently null. If it is changed to be non-null before `do_close_fd` 107 + // is called, then that change happens due to the write at the end of this function, and 108 + // that write has a safety comment that explains why the refcount can be dropped when 109 + // `do_close_fd` runs. 110 + let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) }; 111 + 112 + if res != 0 { 113 + // SAFETY: Scheduling the task work failed, so we still have ownership of the box, so 114 + // we may destroy it. 115 + unsafe { drop(KBox::from_raw(inner)) }; 116 + 117 + return Err(DeferredFdCloseError::TaskWorkUnavailable); 118 + } 119 + 120 + // This removes the fd from the fd table in `current`. The file is not fully closed until 121 + // `filp_close` is called. We are given ownership of one refcount to the file. 122 + // 123 + // SAFETY: This is safe no matter what `fd` is. If the `fd` is valid (that is, if the 124 + // pointer is non-null), then we call `filp_close` on the returned pointer as required by 125 + // `file_close_fd`. 126 + let file = unsafe { bindings::file_close_fd(fd) }; 127 + if file.is_null() { 128 + // We don't clean up the task work since that might be expensive if the task work queue 129 + // is long. Just let it execute and let it clean up for itself. 130 + return Err(DeferredFdCloseError::BadFd); 131 + } 132 + 133 + // Acquire a second refcount to the file. 134 + // 135 + // SAFETY: The `file` pointer points at a file with a non-zero refcount. 136 + unsafe { bindings::get_file(file) }; 137 + 138 + // This method closes the fd, consuming one of our two refcounts. There could be active 139 + // light refcounts created from that fd, so we must ensure that the file has a positive 140 + // refcount for the duration of those active light refcounts. We do that by holding on to 141 + // the second refcount until the current task returns to userspace. 142 + // 143 + // SAFETY: The `file` pointer is valid. Passing `current->files` as the file table to close 144 + // it in is correct, since we just got the `fd` from `file_close_fd` which also uses 145 + // `current->files`. 146 + // 147 + // Note: fl_owner_t is currently a void pointer. 148 + unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) }; 149 + 150 + // We update the file pointer that the task work is supposed to fput. This transfers 151 + // ownership of our last refcount. 152 + // 153 + // INVARIANT: This changes the `file` field of a `DeferredFdCloserInner` from null to 154 + // non-null. This doesn't break the type invariant for `DeferredFdCloserInner` because we 155 + // still own a refcount to the file, so we can pass ownership of that refcount to the 156 + // `DeferredFdCloserInner`. 157 + // 158 + // When `do_close_fd` runs, it must be safe for it to `fput` the refcount. However, this is 159 + // the case because all light refcounts that are associated with the fd we closed 160 + // previously must be dropped when `do_close_fd`, since light refcounts must be dropped 161 + // before returning to userspace. 162 + // 163 + // SAFETY: Task works are executed on the current thread right before we return to 164 + // userspace, so this write is guaranteed to happen before `do_close_fd` is called, which 165 + // means that a race is not possible here. 166 + unsafe { *file_field = file }; 167 + 168 + Ok(()) 169 + } 170 + 171 + /// # Safety 172 + /// 173 + /// The provided pointer must point at the `twork` field of a `DeferredFdCloserInner` stored in 174 + /// a `KBox`, and the caller must pass exclusive ownership of that `KBox`. Furthermore, if the 175 + /// file pointer is non-null, then it must be okay to release the refcount by calling `fput`. 176 + unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) { 177 + // SAFETY: The caller just passed us ownership of this box. 178 + let inner = unsafe { KBox::from_raw(inner.cast::<DeferredFdCloserInner>()) }; 179 + if !inner.file.is_null() { 180 + // SAFETY: By the type invariants, we own a refcount to this file, and the caller 181 + // guarantees that dropping the refcount now is okay. 182 + unsafe { bindings::fput(inner.file) }; 183 + } 184 + // The allocation is freed when `inner` goes out of scope. 185 + } 186 + } 187 + 188 + /// Represents a failure to close an fd in a deferred manner. 189 + #[derive(Copy, Clone, Debug, Eq, PartialEq)] 190 + pub(crate) enum DeferredFdCloseError { 191 + /// Closing the fd failed because we were unable to schedule a task work. 192 + TaskWorkUnavailable, 193 + /// Closing the fd failed because the fd does not exist. 194 + BadFd, 195 + } 196 + 197 + impl From<DeferredFdCloseError> for Error { 198 + fn from(err: DeferredFdCloseError) -> Error { 199 + match err { 200 + DeferredFdCloseError::TaskWorkUnavailable => ESRCH, 201 + DeferredFdCloseError::BadFd => EBADF, 202 + } 203 + } 204 + }

+182

drivers/android/binder/defs.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use core::mem::MaybeUninit; 6 + use core::ops::{Deref, DerefMut}; 7 + use kernel::{ 8 + transmute::{AsBytes, FromBytes}, 9 + uapi::{self, *}, 10 + }; 11 + 12 + macro_rules! pub_no_prefix { 13 + ($prefix:ident, $($newname:ident),+ $(,)?) => { 14 + $(pub(crate) const $newname: u32 = kernel::macros::concat_idents!($prefix, $newname);)+ 15 + }; 16 + } 17 + 18 + pub_no_prefix!( 19 + binder_driver_return_protocol_, 20 + BR_TRANSACTION, 21 + BR_TRANSACTION_SEC_CTX, 22 + BR_REPLY, 23 + BR_DEAD_REPLY, 24 + BR_FAILED_REPLY, 25 + BR_FROZEN_REPLY, 26 + BR_NOOP, 27 + BR_SPAWN_LOOPER, 28 + BR_TRANSACTION_COMPLETE, 29 + BR_TRANSACTION_PENDING_FROZEN, 30 + BR_ONEWAY_SPAM_SUSPECT, 31 + BR_OK, 32 + BR_ERROR, 33 + BR_INCREFS, 34 + BR_ACQUIRE, 35 + BR_RELEASE, 36 + BR_DECREFS, 37 + BR_DEAD_BINDER, 38 + BR_CLEAR_DEATH_NOTIFICATION_DONE, 39 + BR_FROZEN_BINDER, 40 + BR_CLEAR_FREEZE_NOTIFICATION_DONE, 41 + ); 42 + 43 + pub_no_prefix!( 44 + binder_driver_command_protocol_, 45 + BC_TRANSACTION, 46 + BC_TRANSACTION_SG, 47 + BC_REPLY, 48 + BC_REPLY_SG, 49 + BC_FREE_BUFFER, 50 + BC_ENTER_LOOPER, 51 + BC_EXIT_LOOPER, 52 + BC_REGISTER_LOOPER, 53 + BC_INCREFS, 54 + BC_ACQUIRE, 55 + BC_RELEASE, 56 + BC_DECREFS, 57 + BC_INCREFS_DONE, 58 + BC_ACQUIRE_DONE, 59 + BC_REQUEST_DEATH_NOTIFICATION, 60 + BC_CLEAR_DEATH_NOTIFICATION, 61 + BC_DEAD_BINDER_DONE, 62 + BC_REQUEST_FREEZE_NOTIFICATION, 63 + BC_CLEAR_FREEZE_NOTIFICATION, 64 + BC_FREEZE_NOTIFICATION_DONE, 65 + ); 66 + 67 + pub_no_prefix!( 68 + flat_binder_object_flags_, 69 + FLAT_BINDER_FLAG_ACCEPTS_FDS, 70 + FLAT_BINDER_FLAG_TXN_SECURITY_CTX 71 + ); 72 + 73 + pub_no_prefix!( 74 + transaction_flags_, 75 + TF_ONE_WAY, 76 + TF_ACCEPT_FDS, 77 + TF_CLEAR_BUF, 78 + TF_UPDATE_TXN 79 + ); 80 + 81 + pub(crate) use uapi::{ 82 + BINDER_TYPE_BINDER, BINDER_TYPE_FD, BINDER_TYPE_FDA, BINDER_TYPE_HANDLE, BINDER_TYPE_PTR, 83 + BINDER_TYPE_WEAK_BINDER, BINDER_TYPE_WEAK_HANDLE, 84 + }; 85 + 86 + macro_rules! decl_wrapper { 87 + ($newname:ident, $wrapped:ty) => { 88 + // Define a wrapper around the C type. Use `MaybeUninit` to enforce that the value of 89 + // padding bytes must be preserved. 90 + #[derive(Copy, Clone)] 91 + #[repr(transparent)] 92 + pub(crate) struct $newname(MaybeUninit<$wrapped>); 93 + 94 + // SAFETY: This macro is only used with types where this is ok. 95 + unsafe impl FromBytes for $newname {} 96 + // SAFETY: This macro is only used with types where this is ok. 97 + unsafe impl AsBytes for $newname {} 98 + 99 + impl Deref for $newname { 100 + type Target = $wrapped; 101 + fn deref(&self) -> &Self::Target { 102 + // SAFETY: We use `MaybeUninit` only to preserve padding. The value must still 103 + // always be valid. 104 + unsafe { self.0.assume_init_ref() } 105 + } 106 + } 107 + 108 + impl DerefMut for $newname { 109 + fn deref_mut(&mut self) -> &mut Self::Target { 110 + // SAFETY: We use `MaybeUninit` only to preserve padding. The value must still 111 + // always be valid. 112 + unsafe { self.0.assume_init_mut() } 113 + } 114 + } 115 + 116 + impl Default for $newname { 117 + fn default() -> Self { 118 + // Create a new value of this type where all bytes (including padding) are zeroed. 119 + Self(MaybeUninit::zeroed()) 120 + } 121 + } 122 + }; 123 + } 124 + 125 + decl_wrapper!(BinderNodeDebugInfo, uapi::binder_node_debug_info); 126 + decl_wrapper!(BinderNodeInfoForRef, uapi::binder_node_info_for_ref); 127 + decl_wrapper!(FlatBinderObject, uapi::flat_binder_object); 128 + decl_wrapper!(BinderFdObject, uapi::binder_fd_object); 129 + decl_wrapper!(BinderFdArrayObject, uapi::binder_fd_array_object); 130 + decl_wrapper!(BinderObjectHeader, uapi::binder_object_header); 131 + decl_wrapper!(BinderBufferObject, uapi::binder_buffer_object); 132 + decl_wrapper!(BinderTransactionData, uapi::binder_transaction_data); 133 + decl_wrapper!( 134 + BinderTransactionDataSecctx, 135 + uapi::binder_transaction_data_secctx 136 + ); 137 + decl_wrapper!(BinderTransactionDataSg, uapi::binder_transaction_data_sg); 138 + decl_wrapper!(BinderWriteRead, uapi::binder_write_read); 139 + decl_wrapper!(BinderVersion, uapi::binder_version); 140 + decl_wrapper!(BinderFrozenStatusInfo, uapi::binder_frozen_status_info); 141 + decl_wrapper!(BinderFreezeInfo, uapi::binder_freeze_info); 142 + decl_wrapper!(BinderFrozenStateInfo, uapi::binder_frozen_state_info); 143 + decl_wrapper!(BinderHandleCookie, uapi::binder_handle_cookie); 144 + decl_wrapper!(ExtendedError, uapi::binder_extended_error); 145 + 146 + impl BinderVersion { 147 + pub(crate) fn current() -> Self { 148 + Self(MaybeUninit::new(uapi::binder_version { 149 + protocol_version: BINDER_CURRENT_PROTOCOL_VERSION as _, 150 + })) 151 + } 152 + } 153 + 154 + impl BinderTransactionData { 155 + pub(crate) fn with_buffers_size(self, buffers_size: u64) -> BinderTransactionDataSg { 156 + BinderTransactionDataSg(MaybeUninit::new(uapi::binder_transaction_data_sg { 157 + transaction_data: *self, 158 + buffers_size, 159 + })) 160 + } 161 + } 162 + 163 + impl BinderTransactionDataSecctx { 164 + /// View the inner data as wrapped in `BinderTransactionData`. 165 + pub(crate) fn tr_data(&mut self) -> &mut BinderTransactionData { 166 + // SAFETY: Transparent wrapper is safe to transmute. 167 + unsafe { 168 + &mut *(&mut self.transaction_data as *mut uapi::binder_transaction_data 169 + as *mut BinderTransactionData) 170 + } 171 + } 172 + } 173 + 174 + impl ExtendedError { 175 + pub(crate) fn new(id: u32, command: u32, param: i32) -> Self { 176 + Self(MaybeUninit::new(uapi::binder_extended_error { 177 + id, 178 + command, 179 + param, 180 + })) 181 + } 182 + }

+99

drivers/android/binder/error.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::prelude::*; 6 + 7 + use crate::defs::*; 8 + 9 + pub(crate) type BinderResult<T = ()> = core::result::Result<T, BinderError>; 10 + 11 + /// An error that will be returned to userspace via the `BINDER_WRITE_READ` ioctl rather than via 12 + /// errno. 13 + pub(crate) struct BinderError { 14 + pub(crate) reply: u32, 15 + source: Option<Error>, 16 + } 17 + 18 + impl BinderError { 19 + pub(crate) fn new_dead() -> Self { 20 + Self { 21 + reply: BR_DEAD_REPLY, 22 + source: None, 23 + } 24 + } 25 + 26 + pub(crate) fn new_frozen() -> Self { 27 + Self { 28 + reply: BR_FROZEN_REPLY, 29 + source: None, 30 + } 31 + } 32 + 33 + pub(crate) fn new_frozen_oneway() -> Self { 34 + Self { 35 + reply: BR_TRANSACTION_PENDING_FROZEN, 36 + source: None, 37 + } 38 + } 39 + 40 + pub(crate) fn is_dead(&self) -> bool { 41 + self.reply == BR_DEAD_REPLY 42 + } 43 + 44 + pub(crate) fn as_errno(&self) -> kernel::ffi::c_int { 45 + self.source.unwrap_or(EINVAL).to_errno() 46 + } 47 + 48 + pub(crate) fn should_pr_warn(&self) -> bool { 49 + self.source.is_some() 50 + } 51 + } 52 + 53 + /// Convert an errno into a `BinderError` and store the errno used to construct it. The errno 54 + /// should be stored as the thread's extended error when given to userspace. 55 + impl From<Error> for BinderError { 56 + fn from(source: Error) -> Self { 57 + Self { 58 + reply: BR_FAILED_REPLY, 59 + source: Some(source), 60 + } 61 + } 62 + } 63 + 64 + impl From<kernel::fs::file::BadFdError> for BinderError { 65 + fn from(source: kernel::fs::file::BadFdError) -> Self { 66 + BinderError::from(Error::from(source)) 67 + } 68 + } 69 + 70 + impl From<kernel::alloc::AllocError> for BinderError { 71 + fn from(_: kernel::alloc::AllocError) -> Self { 72 + Self { 73 + reply: BR_FAILED_REPLY, 74 + source: Some(ENOMEM), 75 + } 76 + } 77 + } 78 + 79 + impl core::fmt::Debug for BinderError { 80 + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 81 + match self.reply { 82 + BR_FAILED_REPLY => match self.source.as_ref() { 83 + Some(source) => f 84 + .debug_struct("BR_FAILED_REPLY") 85 + .field("source", source) 86 + .finish(), 87 + None => f.pad("BR_FAILED_REPLY"), 88 + }, 89 + BR_DEAD_REPLY => f.pad("BR_DEAD_REPLY"), 90 + BR_FROZEN_REPLY => f.pad("BR_FROZEN_REPLY"), 91 + BR_TRANSACTION_PENDING_FROZEN => f.pad("BR_TRANSACTION_PENDING_FROZEN"), 92 + BR_TRANSACTION_COMPLETE => f.pad("BR_TRANSACTION_COMPLETE"), 93 + _ => f 94 + .debug_struct("BinderError") 95 + .field("reply", &self.reply) 96 + .finish(), 97 + } 98 + } 99 + }

+388

drivers/android/binder/freeze.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{ 6 + alloc::AllocError, 7 + list::ListArc, 8 + prelude::*, 9 + rbtree::{self, RBTreeNodeReservation}, 10 + seq_file::SeqFile, 11 + seq_print, 12 + sync::{Arc, UniqueArc}, 13 + uaccess::UserSliceReader, 14 + }; 15 + 16 + use crate::{ 17 + defs::*, node::Node, process::Process, thread::Thread, BinderReturnWriter, DArc, DLArc, 18 + DTRWrap, DeliverToRead, 19 + }; 20 + 21 + #[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] 22 + pub(crate) struct FreezeCookie(u64); 23 + 24 + /// Represents a listener for changes to the frozen state of a process. 25 + pub(crate) struct FreezeListener { 26 + /// The node we are listening for. 27 + pub(crate) node: DArc<Node>, 28 + /// The cookie of this freeze listener. 29 + cookie: FreezeCookie, 30 + /// What value of `is_frozen` did we most recently tell userspace about? 31 + last_is_frozen: Option<bool>, 32 + /// We sent a `BR_FROZEN_BINDER` and we are waiting for `BC_FREEZE_NOTIFICATION_DONE` before 33 + /// sending any other commands. 34 + is_pending: bool, 35 + /// Userspace sent `BC_CLEAR_FREEZE_NOTIFICATION` and we need to reply with 36 + /// `BR_CLEAR_FREEZE_NOTIFICATION_DONE` as soon as possible. If `is_pending` is set, then we 37 + /// must wait for it to be unset before we can reply. 38 + is_clearing: bool, 39 + /// Number of cleared duplicates that can't be deleted until userspace sends 40 + /// `BC_FREEZE_NOTIFICATION_DONE`. 41 + num_pending_duplicates: u64, 42 + /// Number of cleared duplicates that can be deleted. 43 + num_cleared_duplicates: u64, 44 + } 45 + 46 + impl FreezeListener { 47 + /// Is it okay to create a new listener with the same cookie as this one for the provided node? 48 + /// 49 + /// Under some scenarios, userspace may delete a freeze listener and immediately recreate it 50 + /// with the same cookie. This results in duplicate listeners. To avoid issues with ambiguity, 51 + /// we allow this only if the new listener is for the same node, and we also require that the 52 + /// old listener has already been cleared. 53 + fn allow_duplicate(&self, node: &DArc<Node>) -> bool { 54 + Arc::ptr_eq(&self.node, node) && self.is_clearing 55 + } 56 + } 57 + 58 + type UninitFM = UniqueArc<core::mem::MaybeUninit<DTRWrap<FreezeMessage>>>; 59 + 60 + /// Represents a notification that the freeze state has changed. 61 + pub(crate) struct FreezeMessage { 62 + cookie: FreezeCookie, 63 + } 64 + 65 + kernel::list::impl_list_arc_safe! { 66 + impl ListArcSafe<0> for FreezeMessage { 67 + untracked; 68 + } 69 + } 70 + 71 + impl FreezeMessage { 72 + fn new(flags: kernel::alloc::Flags) -> Result<UninitFM, AllocError> { 73 + UniqueArc::new_uninit(flags) 74 + } 75 + 76 + fn init(ua: UninitFM, cookie: FreezeCookie) -> DLArc<FreezeMessage> { 77 + match ua.pin_init_with(DTRWrap::new(FreezeMessage { cookie })) { 78 + Ok(msg) => ListArc::from(msg), 79 + Err(err) => match err {}, 80 + } 81 + } 82 + } 83 + 84 + impl DeliverToRead for FreezeMessage { 85 + fn do_work( 86 + self: DArc<Self>, 87 + thread: &Thread, 88 + writer: &mut BinderReturnWriter<'_>, 89 + ) -> Result<bool> { 90 + let _removed_listener; 91 + let mut node_refs = thread.process.node_refs.lock(); 92 + let Some(mut freeze_entry) = node_refs.freeze_listeners.find_mut(&self.cookie) else { 93 + return Ok(true); 94 + }; 95 + let freeze = freeze_entry.get_mut(); 96 + 97 + if freeze.num_cleared_duplicates > 0 { 98 + freeze.num_cleared_duplicates -= 1; 99 + drop(node_refs); 100 + writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; 101 + writer.write_payload(&self.cookie.0)?; 102 + return Ok(true); 103 + } 104 + 105 + if freeze.is_pending { 106 + return Ok(true); 107 + } 108 + if freeze.is_clearing { 109 + _removed_listener = freeze_entry.remove_node(); 110 + drop(node_refs); 111 + writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; 112 + writer.write_payload(&self.cookie.0)?; 113 + Ok(true) 114 + } else { 115 + let is_frozen = freeze.node.owner.inner.lock().is_frozen; 116 + if freeze.last_is_frozen == Some(is_frozen) { 117 + return Ok(true); 118 + } 119 + 120 + let mut state_info = BinderFrozenStateInfo::default(); 121 + state_info.is_frozen = is_frozen as u32; 122 + state_info.cookie = freeze.cookie.0; 123 + freeze.is_pending = true; 124 + freeze.last_is_frozen = Some(is_frozen); 125 + drop(node_refs); 126 + 127 + writer.write_code(BR_FROZEN_BINDER)?; 128 + writer.write_payload(&state_info)?; 129 + // BR_FROZEN_BINDER notifications can cause transactions 130 + Ok(false) 131 + } 132 + } 133 + 134 + fn cancel(self: DArc<Self>) {} 135 + 136 + fn should_sync_wakeup(&self) -> bool { 137 + false 138 + } 139 + 140 + #[inline(never)] 141 + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { 142 + seq_print!(m, "{}has frozen binder\n", prefix); 143 + Ok(()) 144 + } 145 + } 146 + 147 + impl FreezeListener { 148 + pub(crate) fn on_process_exit(&self, proc: &Arc<Process>) { 149 + if !self.is_clearing { 150 + self.node.remove_freeze_listener(proc); 151 + } 152 + } 153 + } 154 + 155 + impl Process { 156 + pub(crate) fn request_freeze_notif( 157 + self: &Arc<Self>, 158 + reader: &mut UserSliceReader, 159 + ) -> Result<()> { 160 + let hc = reader.read::<BinderHandleCookie>()?; 161 + let handle = hc.handle; 162 + let cookie = FreezeCookie(hc.cookie); 163 + 164 + let msg = FreezeMessage::new(GFP_KERNEL)?; 165 + let alloc = RBTreeNodeReservation::new(GFP_KERNEL)?; 166 + 167 + let mut node_refs_guard = self.node_refs.lock(); 168 + let node_refs = &mut *node_refs_guard; 169 + let Some(info) = node_refs.by_handle.get_mut(&handle) else { 170 + pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION invalid ref {}\n", handle); 171 + return Err(EINVAL); 172 + }; 173 + if info.freeze().is_some() { 174 + pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION already set\n"); 175 + return Err(EINVAL); 176 + } 177 + let node_ref = info.node_ref(); 178 + let freeze_entry = node_refs.freeze_listeners.entry(cookie); 179 + 180 + if let rbtree::Entry::Occupied(ref dupe) = freeze_entry { 181 + if !dupe.get().allow_duplicate(&node_ref.node) { 182 + pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION duplicate cookie\n"); 183 + return Err(EINVAL); 184 + } 185 + } 186 + 187 + // All failure paths must come before this call, and all modifications must come after this 188 + // call. 189 + node_ref.node.add_freeze_listener(self, GFP_KERNEL)?; 190 + 191 + match freeze_entry { 192 + rbtree::Entry::Vacant(entry) => { 193 + entry.insert( 194 + FreezeListener { 195 + cookie, 196 + node: node_ref.node.clone(), 197 + last_is_frozen: None, 198 + is_pending: false, 199 + is_clearing: false, 200 + num_pending_duplicates: 0, 201 + num_cleared_duplicates: 0, 202 + }, 203 + alloc, 204 + ); 205 + } 206 + rbtree::Entry::Occupied(mut dupe) => { 207 + let dupe = dupe.get_mut(); 208 + if dupe.is_pending { 209 + dupe.num_pending_duplicates += 1; 210 + } else { 211 + dupe.num_cleared_duplicates += 1; 212 + } 213 + dupe.last_is_frozen = None; 214 + dupe.is_pending = false; 215 + dupe.is_clearing = false; 216 + } 217 + } 218 + 219 + *info.freeze() = Some(cookie); 220 + let msg = FreezeMessage::init(msg, cookie); 221 + drop(node_refs_guard); 222 + let _ = self.push_work(msg); 223 + Ok(()) 224 + } 225 + 226 + pub(crate) fn freeze_notif_done(self: &Arc<Self>, reader: &mut UserSliceReader) -> Result<()> { 227 + let cookie = FreezeCookie(reader.read()?); 228 + let alloc = FreezeMessage::new(GFP_KERNEL)?; 229 + let mut node_refs_guard = self.node_refs.lock(); 230 + let node_refs = &mut *node_refs_guard; 231 + let Some(freeze) = node_refs.freeze_listeners.get_mut(&cookie) else { 232 + pr_warn!("BC_FREEZE_NOTIFICATION_DONE {:016x} not found\n", cookie.0); 233 + return Err(EINVAL); 234 + }; 235 + let mut clear_msg = None; 236 + if freeze.num_pending_duplicates > 0 { 237 + clear_msg = Some(FreezeMessage::init(alloc, cookie)); 238 + freeze.num_pending_duplicates -= 1; 239 + freeze.num_cleared_duplicates += 1; 240 + } else { 241 + if !freeze.is_pending { 242 + pr_warn!( 243 + "BC_FREEZE_NOTIFICATION_DONE {:016x} not pending\n", 244 + cookie.0 245 + ); 246 + return Err(EINVAL); 247 + } 248 + if freeze.is_clearing { 249 + // Immediately send another FreezeMessage for BR_CLEAR_FREEZE_NOTIFICATION_DONE. 250 + clear_msg = Some(FreezeMessage::init(alloc, cookie)); 251 + } 252 + freeze.is_pending = false; 253 + } 254 + drop(node_refs_guard); 255 + if let Some(clear_msg) = clear_msg { 256 + let _ = self.push_work(clear_msg); 257 + } 258 + Ok(()) 259 + } 260 + 261 + pub(crate) fn clear_freeze_notif(self: &Arc<Self>, reader: &mut UserSliceReader) -> Result<()> { 262 + let hc = reader.read::<BinderHandleCookie>()?; 263 + let handle = hc.handle; 264 + let cookie = FreezeCookie(hc.cookie); 265 + 266 + let alloc = FreezeMessage::new(GFP_KERNEL)?; 267 + let mut node_refs_guard = self.node_refs.lock(); 268 + let node_refs = &mut *node_refs_guard; 269 + let Some(info) = node_refs.by_handle.get_mut(&handle) else { 270 + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION invalid ref {}\n", handle); 271 + return Err(EINVAL); 272 + }; 273 + let Some(info_cookie) = info.freeze() else { 274 + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION freeze notification not active\n"); 275 + return Err(EINVAL); 276 + }; 277 + if *info_cookie != cookie { 278 + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION freeze notification cookie mismatch\n"); 279 + return Err(EINVAL); 280 + } 281 + let Some(listener) = node_refs.freeze_listeners.get_mut(&cookie) else { 282 + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION invalid cookie {}\n", handle); 283 + return Err(EINVAL); 284 + }; 285 + listener.is_clearing = true; 286 + listener.node.remove_freeze_listener(self); 287 + *info.freeze() = None; 288 + let mut msg = None; 289 + if !listener.is_pending { 290 + msg = Some(FreezeMessage::init(alloc, cookie)); 291 + } 292 + drop(node_refs_guard); 293 + 294 + if let Some(msg) = msg { 295 + let _ = self.push_work(msg); 296 + } 297 + Ok(()) 298 + } 299 + 300 + fn get_freeze_cookie(&self, node: &DArc<Node>) -> Option<FreezeCookie> { 301 + let node_refs = &mut *self.node_refs.lock(); 302 + let handle = node_refs.by_node.get(&node.global_id())?; 303 + let node_ref = node_refs.by_handle.get_mut(handle)?; 304 + *node_ref.freeze() 305 + } 306 + 307 + /// Creates a vector of every freeze listener on this process. 308 + /// 309 + /// Returns pairs of the remote process listening for notifications and the local node it is 310 + /// listening on. 311 + #[expect(clippy::type_complexity)] 312 + fn find_freeze_recipients(&self) -> Result<KVVec<(DArc<Node>, Arc<Process>)>, AllocError> { 313 + // Defined before `inner` to drop after releasing spinlock if `push_within_capacity` fails. 314 + let mut node_proc_pair; 315 + 316 + // We pre-allocate space for up to 8 recipients before we take the spinlock. However, if 317 + // the allocation fails, use a vector with a capacity of zero instead of failing. After 318 + // all, there might not be any freeze listeners, in which case this operation could still 319 + // succeed. 320 + let mut recipients = 321 + KVVec::with_capacity(8, GFP_KERNEL).unwrap_or_else(|_err| KVVec::new()); 322 + 323 + let mut inner = self.lock_with_nodes(); 324 + let mut curr = inner.nodes.cursor_front(); 325 + while let Some(cursor) = curr { 326 + let (key, node) = cursor.current(); 327 + let key = *key; 328 + let list = node.freeze_list(&inner.inner); 329 + let len = list.len(); 330 + 331 + if recipients.spare_capacity_mut().len() < len { 332 + drop(inner); 333 + recipients.reserve(len, GFP_KERNEL)?; 334 + inner = self.lock_with_nodes(); 335 + // Find the node we were looking at and try again. If the set of nodes was changed, 336 + // then just proceed to the next node. This is ok because we don't guarantee the 337 + // inclusion of nodes that are added or removed in parallel with this operation. 338 + curr = inner.nodes.cursor_lower_bound(&key); 339 + continue; 340 + } 341 + 342 + for proc in list { 343 + node_proc_pair = (node.clone(), proc.clone()); 344 + recipients 345 + .push_within_capacity(node_proc_pair) 346 + .map_err(|_| { 347 + pr_err!( 348 + "push_within_capacity failed even though we checked the capacity\n" 349 + ); 350 + AllocError 351 + })?; 352 + } 353 + 354 + curr = cursor.move_next(); 355 + } 356 + Ok(recipients) 357 + } 358 + 359 + /// Prepare allocations for sending freeze messages. 360 + pub(crate) fn prepare_freeze_messages(&self) -> Result<FreezeMessages, AllocError> { 361 + let recipients = self.find_freeze_recipients()?; 362 + let mut batch = KVVec::with_capacity(recipients.len(), GFP_KERNEL)?; 363 + for (node, proc) in recipients { 364 + let Some(cookie) = proc.get_freeze_cookie(&node) else { 365 + // If the freeze listener was removed in the meantime, just discard the 366 + // notification. 367 + continue; 368 + }; 369 + let msg_alloc = FreezeMessage::new(GFP_KERNEL)?; 370 + let msg = FreezeMessage::init(msg_alloc, cookie); 371 + batch.push((proc, msg), GFP_KERNEL)?; 372 + } 373 + 374 + Ok(FreezeMessages { batch }) 375 + } 376 + } 377 + 378 + pub(crate) struct FreezeMessages { 379 + batch: KVVec<(Arc<Process>, DLArc<FreezeMessage>)>, 380 + } 381 + 382 + impl FreezeMessages { 383 + pub(crate) fn send_messages(self) { 384 + for (proc, msg) in self.batch { 385 + let _ = proc.push_work(msg); 386 + } 387 + } 388 + }

+1131

drivers/android/binder/node.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{ 6 + list::{AtomicTracker, List, ListArc, ListLinks, TryNewListArc}, 7 + prelude::*, 8 + seq_file::SeqFile, 9 + seq_print, 10 + sync::lock::{spinlock::SpinLockBackend, Guard}, 11 + sync::{Arc, LockedBy, SpinLock}, 12 + }; 13 + 14 + use crate::{ 15 + defs::*, 16 + error::BinderError, 17 + process::{NodeRefInfo, Process, ProcessInner}, 18 + thread::Thread, 19 + transaction::Transaction, 20 + BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead, 21 + }; 22 + 23 + use core::mem; 24 + 25 + mod wrapper; 26 + pub(crate) use self::wrapper::CritIncrWrapper; 27 + 28 + #[derive(Debug)] 29 + pub(crate) struct CouldNotDeliverCriticalIncrement; 30 + 31 + /// Keeps track of how this node is scheduled. 32 + /// 33 + /// There are two ways to schedule a node to a work list. Just schedule the node itself, or 34 + /// allocate a wrapper that references the node and schedule the wrapper. These wrappers exists to 35 + /// make it possible to "move" a node from one list to another - when `do_work` is called directly 36 + /// on the `Node`, then it's a no-op if there's also a pending wrapper. 37 + /// 38 + /// Wrappers are generally only needed for zero-to-one refcount increments, and there are two cases 39 + /// of this: weak increments and strong increments. We call such increments "critical" because it 40 + /// is critical that they are delivered to the thread doing the increment. Some examples: 41 + /// 42 + /// * One thread makes a zero-to-one strong increment, and another thread makes a zero-to-one weak 43 + /// increment. Delivering the node to the thread doing the weak increment is wrong, since the 44 + /// thread doing the strong increment may have ended a long time ago when the command is actually 45 + /// processed by userspace. 46 + /// 47 + /// * We have a weak reference and are about to drop it on one thread. But then another thread does 48 + /// a zero-to-one strong increment. If the strong increment gets sent to the thread that was 49 + /// about to drop the weak reference, then the strong increment could be processed after the 50 + /// other thread has already exited, which would be too late. 51 + /// 52 + /// Note that trying to create a `ListArc` to the node can succeed even if `has_normal_push` is 53 + /// set. This is because another thread might just have popped the node from a todo list, but not 54 + /// yet called `do_work`. However, if `has_normal_push` is false, then creating a `ListArc` should 55 + /// always succeed. 56 + /// 57 + /// Like the other fields in `NodeInner`, the delivery state is protected by the process lock. 58 + struct DeliveryState { 59 + /// Is the `Node` currently scheduled? 60 + has_pushed_node: bool, 61 + 62 + /// Is a wrapper currently scheduled? 63 + /// 64 + /// The wrapper is used only for strong zero2one increments. 65 + has_pushed_wrapper: bool, 66 + 67 + /// Is the currently scheduled `Node` scheduled due to a weak zero2one increment? 68 + /// 69 + /// Weak zero2one operations are always scheduled using the `Node`. 70 + has_weak_zero2one: bool, 71 + 72 + /// Is the currently scheduled wrapper/`Node` scheduled due to a strong zero2one increment? 73 + /// 74 + /// If `has_pushed_wrapper` is set, then the strong zero2one increment was scheduled using the 75 + /// wrapper. Otherwise, `has_pushed_node` must be set and it was scheduled using the `Node`. 76 + has_strong_zero2one: bool, 77 + } 78 + 79 + impl DeliveryState { 80 + fn should_normal_push(&self) -> bool { 81 + !self.has_pushed_node && !self.has_pushed_wrapper 82 + } 83 + 84 + fn did_normal_push(&mut self) { 85 + assert!(self.should_normal_push()); 86 + self.has_pushed_node = true; 87 + } 88 + 89 + fn should_push_weak_zero2one(&self) -> bool { 90 + !self.has_weak_zero2one && !self.has_strong_zero2one 91 + } 92 + 93 + fn can_push_weak_zero2one_normally(&self) -> bool { 94 + !self.has_pushed_node 95 + } 96 + 97 + fn did_push_weak_zero2one(&mut self) { 98 + assert!(self.should_push_weak_zero2one()); 99 + assert!(self.can_push_weak_zero2one_normally()); 100 + self.has_pushed_node = true; 101 + self.has_weak_zero2one = true; 102 + } 103 + 104 + fn should_push_strong_zero2one(&self) -> bool { 105 + !self.has_strong_zero2one 106 + } 107 + 108 + fn can_push_strong_zero2one_normally(&self) -> bool { 109 + !self.has_pushed_node 110 + } 111 + 112 + fn did_push_strong_zero2one(&mut self) { 113 + assert!(self.should_push_strong_zero2one()); 114 + assert!(self.can_push_strong_zero2one_normally()); 115 + self.has_pushed_node = true; 116 + self.has_strong_zero2one = true; 117 + } 118 + 119 + fn did_push_strong_zero2one_wrapper(&mut self) { 120 + assert!(self.should_push_strong_zero2one()); 121 + assert!(!self.can_push_strong_zero2one_normally()); 122 + self.has_pushed_wrapper = true; 123 + self.has_strong_zero2one = true; 124 + } 125 + } 126 + 127 + struct CountState { 128 + /// The reference count. 129 + count: usize, 130 + /// Whether the process that owns this node thinks that we hold a refcount on it. (Note that 131 + /// even if count is greater than one, we only increment it once in the owning process.) 132 + has_count: bool, 133 + } 134 + 135 + impl CountState { 136 + fn new() -> Self { 137 + Self { 138 + count: 0, 139 + has_count: false, 140 + } 141 + } 142 + } 143 + 144 + struct NodeInner { 145 + /// Strong refcounts held on this node by `NodeRef` objects. 146 + strong: CountState, 147 + /// Weak refcounts held on this node by `NodeRef` objects. 148 + weak: CountState, 149 + delivery_state: DeliveryState, 150 + /// The binder driver guarantees that oneway transactions sent to the same node are serialized, 151 + /// that is, userspace will not be given the next one until it has finished processing the 152 + /// previous oneway transaction. This is done to avoid the case where two oneway transactions 153 + /// arrive in opposite order from the order in which they were sent. (E.g., they could be 154 + /// delivered to two different threads, which could appear as-if they were sent in opposite 155 + /// order.) 156 + /// 157 + /// To fix that, we store pending oneway transactions in a separate list in the node, and don't 158 + /// deliver the next oneway transaction until userspace signals that it has finished processing 159 + /// the previous oneway transaction by calling the `BC_FREE_BUFFER` ioctl. 160 + oneway_todo: List<DTRWrap<Transaction>>, 161 + /// Keeps track of whether this node has a pending oneway transaction. 162 + /// 163 + /// When this is true, incoming oneway transactions are stored in `oneway_todo`, instead of 164 + /// being delivered directly to the process. 165 + has_oneway_transaction: bool, 166 + /// List of processes to deliver a notification to when this node is destroyed (usually due to 167 + /// the process dying). 168 + death_list: List<DTRWrap<NodeDeath>, 1>, 169 + /// List of processes to deliver freeze notifications to. 170 + freeze_list: KVVec<Arc<Process>>, 171 + /// The number of active BR_INCREFS or BR_ACQUIRE operations. (should be maximum two) 172 + /// 173 + /// If this is non-zero, then we postpone any BR_RELEASE or BR_DECREFS notifications until the 174 + /// active operations have ended. This avoids the situation an increment and decrement get 175 + /// reordered from userspace's perspective. 176 + active_inc_refs: u8, 177 + /// List of `NodeRefInfo` objects that reference this node. 178 + refs: List<NodeRefInfo, { NodeRefInfo::LIST_NODE }>, 179 + } 180 + 181 + #[pin_data] 182 + pub(crate) struct Node { 183 + pub(crate) debug_id: usize, 184 + ptr: u64, 185 + pub(crate) cookie: u64, 186 + pub(crate) flags: u32, 187 + pub(crate) owner: Arc<Process>, 188 + inner: LockedBy<NodeInner, ProcessInner>, 189 + #[pin] 190 + links_track: AtomicTracker, 191 + } 192 + 193 + kernel::list::impl_list_arc_safe! { 194 + impl ListArcSafe<0> for Node { 195 + tracked_by links_track: AtomicTracker; 196 + } 197 + } 198 + 199 + // Make `oneway_todo` work. 200 + kernel::list::impl_list_item! { 201 + impl ListItem<0> for DTRWrap<Transaction> { 202 + using ListLinks { self.links.inner }; 203 + } 204 + } 205 + 206 + impl Node { 207 + pub(crate) fn new( 208 + ptr: u64, 209 + cookie: u64, 210 + flags: u32, 211 + owner: Arc<Process>, 212 + ) -> impl PinInit<Self> { 213 + pin_init!(Self { 214 + inner: LockedBy::new( 215 + &owner.inner, 216 + NodeInner { 217 + strong: CountState::new(), 218 + weak: CountState::new(), 219 + delivery_state: DeliveryState { 220 + has_pushed_node: false, 221 + has_pushed_wrapper: false, 222 + has_weak_zero2one: false, 223 + has_strong_zero2one: false, 224 + }, 225 + death_list: List::new(), 226 + oneway_todo: List::new(), 227 + freeze_list: KVVec::new(), 228 + has_oneway_transaction: false, 229 + active_inc_refs: 0, 230 + refs: List::new(), 231 + }, 232 + ), 233 + debug_id: super::next_debug_id(), 234 + ptr, 235 + cookie, 236 + flags, 237 + owner, 238 + links_track <- AtomicTracker::new(), 239 + }) 240 + } 241 + 242 + pub(crate) fn has_oneway_transaction(&self, owner_inner: &mut ProcessInner) -> bool { 243 + let inner = self.inner.access_mut(owner_inner); 244 + inner.has_oneway_transaction 245 + } 246 + 247 + #[inline(never)] 248 + pub(crate) fn full_debug_print( 249 + &self, 250 + m: &SeqFile, 251 + owner_inner: &mut ProcessInner, 252 + ) -> Result<()> { 253 + let inner = self.inner.access_mut(owner_inner); 254 + seq_print!( 255 + m, 256 + " node {}: u{:016x} c{:016x} hs {} hw {} cs {} cw {}", 257 + self.debug_id, 258 + self.ptr, 259 + self.cookie, 260 + inner.strong.has_count, 261 + inner.weak.has_count, 262 + inner.strong.count, 263 + inner.weak.count, 264 + ); 265 + if !inner.refs.is_empty() { 266 + seq_print!(m, " proc"); 267 + for node_ref in &inner.refs { 268 + seq_print!(m, " {}", node_ref.process.task.pid()); 269 + } 270 + } 271 + seq_print!(m, "\n"); 272 + for t in &inner.oneway_todo { 273 + t.debug_print_inner(m, " pending async transaction "); 274 + } 275 + Ok(()) 276 + } 277 + 278 + /// Insert the `NodeRef` into this `refs` list. 279 + /// 280 + /// # Safety 281 + /// 282 + /// It must be the case that `info.node_ref.node` is this node. 283 + pub(crate) unsafe fn insert_node_info( 284 + &self, 285 + info: ListArc<NodeRefInfo, { NodeRefInfo::LIST_NODE }>, 286 + ) { 287 + self.inner 288 + .access_mut(&mut self.owner.inner.lock()) 289 + .refs 290 + .push_front(info); 291 + } 292 + 293 + /// Insert the `NodeRef` into this `refs` list. 294 + /// 295 + /// # Safety 296 + /// 297 + /// It must be the case that `info.node_ref.node` is this node. 298 + pub(crate) unsafe fn remove_node_info( 299 + &self, 300 + info: &NodeRefInfo, 301 + ) -> Option<ListArc<NodeRefInfo, { NodeRefInfo::LIST_NODE }>> { 302 + // SAFETY: We always insert `NodeRefInfo` objects into the `refs` list of the node that it 303 + // references in `info.node_ref.node`. That is this node, so `info` cannot possibly be in 304 + // the `refs` list of another node. 305 + unsafe { 306 + self.inner 307 + .access_mut(&mut self.owner.inner.lock()) 308 + .refs 309 + .remove(info) 310 + } 311 + } 312 + 313 + /// An id that is unique across all binder nodes on the system. Used as the key in the 314 + /// `by_node` map. 315 + pub(crate) fn global_id(&self) -> usize { 316 + self as *const Node as usize 317 + } 318 + 319 + pub(crate) fn get_id(&self) -> (u64, u64) { 320 + (self.ptr, self.cookie) 321 + } 322 + 323 + pub(crate) fn add_death( 324 + &self, 325 + death: ListArc<DTRWrap<NodeDeath>, 1>, 326 + guard: &mut Guard<'_, ProcessInner, SpinLockBackend>, 327 + ) { 328 + self.inner.access_mut(guard).death_list.push_back(death); 329 + } 330 + 331 + pub(crate) fn inc_ref_done_locked( 332 + self: &DArc<Node>, 333 + _strong: bool, 334 + owner_inner: &mut ProcessInner, 335 + ) -> Option<DLArc<Node>> { 336 + let inner = self.inner.access_mut(owner_inner); 337 + if inner.active_inc_refs == 0 { 338 + pr_err!("inc_ref_done called when no active inc_refs"); 339 + return None; 340 + } 341 + 342 + inner.active_inc_refs -= 1; 343 + if inner.active_inc_refs == 0 { 344 + // Having active inc_refs can inhibit dropping of ref-counts. Calculate whether we 345 + // would send a refcount decrement, and if so, tell the caller to schedule us. 346 + let strong = inner.strong.count > 0; 347 + let has_strong = inner.strong.has_count; 348 + let weak = strong || inner.weak.count > 0; 349 + let has_weak = inner.weak.has_count; 350 + 351 + let should_drop_weak = !weak && has_weak; 352 + let should_drop_strong = !strong && has_strong; 353 + 354 + // If we want to drop the ref-count again, tell the caller to schedule a work node for 355 + // that. 356 + let need_push = should_drop_weak || should_drop_strong; 357 + 358 + if need_push && inner.delivery_state.should_normal_push() { 359 + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); 360 + inner.delivery_state.did_normal_push(); 361 + Some(list_arc) 362 + } else { 363 + None 364 + } 365 + } else { 366 + None 367 + } 368 + } 369 + 370 + pub(crate) fn update_refcount_locked( 371 + self: &DArc<Node>, 372 + inc: bool, 373 + strong: bool, 374 + count: usize, 375 + owner_inner: &mut ProcessInner, 376 + ) -> Option<DLArc<Node>> { 377 + let is_dead = owner_inner.is_dead; 378 + let inner = self.inner.access_mut(owner_inner); 379 + 380 + // Get a reference to the state we'll update. 381 + let state = if strong { 382 + &mut inner.strong 383 + } else { 384 + &mut inner.weak 385 + }; 386 + 387 + // Update the count and determine whether we need to push work. 388 + let need_push = if inc { 389 + state.count += count; 390 + // TODO: This method shouldn't be used for zero-to-one increments. 391 + !is_dead && !state.has_count 392 + } else { 393 + if state.count < count { 394 + pr_err!("Failure: refcount underflow!"); 395 + return None; 396 + } 397 + state.count -= count; 398 + !is_dead && state.count == 0 && state.has_count 399 + }; 400 + 401 + if need_push && inner.delivery_state.should_normal_push() { 402 + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); 403 + inner.delivery_state.did_normal_push(); 404 + Some(list_arc) 405 + } else { 406 + None 407 + } 408 + } 409 + 410 + pub(crate) fn incr_refcount_allow_zero2one( 411 + self: &DArc<Self>, 412 + strong: bool, 413 + owner_inner: &mut ProcessInner, 414 + ) -> Result<Option<DLArc<Node>>, CouldNotDeliverCriticalIncrement> { 415 + let is_dead = owner_inner.is_dead; 416 + let inner = self.inner.access_mut(owner_inner); 417 + 418 + // Get a reference to the state we'll update. 419 + let state = if strong { 420 + &mut inner.strong 421 + } else { 422 + &mut inner.weak 423 + }; 424 + 425 + // Update the count and determine whether we need to push work. 426 + state.count += 1; 427 + if is_dead || state.has_count { 428 + return Ok(None); 429 + } 430 + 431 + // Userspace needs to be notified of this. 432 + if !strong && inner.delivery_state.should_push_weak_zero2one() { 433 + assert!(inner.delivery_state.can_push_weak_zero2one_normally()); 434 + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); 435 + inner.delivery_state.did_push_weak_zero2one(); 436 + Ok(Some(list_arc)) 437 + } else if strong && inner.delivery_state.should_push_strong_zero2one() { 438 + if inner.delivery_state.can_push_strong_zero2one_normally() { 439 + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); 440 + inner.delivery_state.did_push_strong_zero2one(); 441 + Ok(Some(list_arc)) 442 + } else { 443 + state.count -= 1; 444 + Err(CouldNotDeliverCriticalIncrement) 445 + } 446 + } else { 447 + // Work is already pushed, and we don't need to push again. 448 + Ok(None) 449 + } 450 + } 451 + 452 + pub(crate) fn incr_refcount_allow_zero2one_with_wrapper( 453 + self: &DArc<Self>, 454 + strong: bool, 455 + wrapper: CritIncrWrapper, 456 + owner_inner: &mut ProcessInner, 457 + ) -> Option<DLArc<dyn DeliverToRead>> { 458 + match self.incr_refcount_allow_zero2one(strong, owner_inner) { 459 + Ok(Some(node)) => Some(node as _), 460 + Ok(None) => None, 461 + Err(CouldNotDeliverCriticalIncrement) => { 462 + assert!(strong); 463 + let inner = self.inner.access_mut(owner_inner); 464 + inner.strong.count += 1; 465 + inner.delivery_state.did_push_strong_zero2one_wrapper(); 466 + Some(wrapper.init(self.clone())) 467 + } 468 + } 469 + } 470 + 471 + pub(crate) fn update_refcount(self: &DArc<Self>, inc: bool, count: usize, strong: bool) { 472 + self.owner 473 + .inner 474 + .lock() 475 + .update_node_refcount(self, inc, strong, count, None); 476 + } 477 + 478 + pub(crate) fn populate_counts( 479 + &self, 480 + out: &mut BinderNodeInfoForRef, 481 + guard: &Guard<'_, ProcessInner, SpinLockBackend>, 482 + ) { 483 + let inner = self.inner.access(guard); 484 + out.strong_count = inner.strong.count as _; 485 + out.weak_count = inner.weak.count as _; 486 + } 487 + 488 + pub(crate) fn populate_debug_info( 489 + &self, 490 + out: &mut BinderNodeDebugInfo, 491 + guard: &Guard<'_, ProcessInner, SpinLockBackend>, 492 + ) { 493 + out.ptr = self.ptr as _; 494 + out.cookie = self.cookie as _; 495 + let inner = self.inner.access(guard); 496 + if inner.strong.has_count { 497 + out.has_strong_ref = 1; 498 + } 499 + if inner.weak.has_count { 500 + out.has_weak_ref = 1; 501 + } 502 + } 503 + 504 + pub(crate) fn force_has_count(&self, guard: &mut Guard<'_, ProcessInner, SpinLockBackend>) { 505 + let inner = self.inner.access_mut(guard); 506 + inner.strong.has_count = true; 507 + inner.weak.has_count = true; 508 + } 509 + 510 + fn write(&self, writer: &mut BinderReturnWriter<'_>, code: u32) -> Result { 511 + writer.write_code(code)?; 512 + writer.write_payload(&self.ptr)?; 513 + writer.write_payload(&self.cookie)?; 514 + Ok(()) 515 + } 516 + 517 + pub(crate) fn submit_oneway( 518 + &self, 519 + transaction: DLArc<Transaction>, 520 + guard: &mut Guard<'_, ProcessInner, SpinLockBackend>, 521 + ) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> { 522 + if guard.is_dead { 523 + return Err((BinderError::new_dead(), transaction)); 524 + } 525 + 526 + let inner = self.inner.access_mut(guard); 527 + if inner.has_oneway_transaction { 528 + inner.oneway_todo.push_back(transaction); 529 + } else { 530 + inner.has_oneway_transaction = true; 531 + guard.push_work(transaction)?; 532 + } 533 + Ok(()) 534 + } 535 + 536 + pub(crate) fn release(&self) { 537 + let mut guard = self.owner.inner.lock(); 538 + while let Some(work) = self.inner.access_mut(&mut guard).oneway_todo.pop_front() { 539 + drop(guard); 540 + work.into_arc().cancel(); 541 + guard = self.owner.inner.lock(); 542 + } 543 + 544 + let death_list = core::mem::take(&mut self.inner.access_mut(&mut guard).death_list); 545 + drop(guard); 546 + for death in death_list { 547 + death.into_arc().set_dead(); 548 + } 549 + } 550 + 551 + pub(crate) fn pending_oneway_finished(&self) { 552 + let mut guard = self.owner.inner.lock(); 553 + if guard.is_dead { 554 + // Cleanup will happen in `Process::deferred_release`. 555 + return; 556 + } 557 + 558 + let inner = self.inner.access_mut(&mut guard); 559 + 560 + let transaction = inner.oneway_todo.pop_front(); 561 + inner.has_oneway_transaction = transaction.is_some(); 562 + if let Some(transaction) = transaction { 563 + match guard.push_work(transaction) { 564 + Ok(()) => {} 565 + Err((_err, work)) => { 566 + // Process is dead. 567 + // This shouldn't happen due to the `is_dead` check, but if it does, just drop 568 + // the transaction and return. 569 + drop(guard); 570 + drop(work); 571 + } 572 + } 573 + } 574 + } 575 + 576 + /// Finds an outdated transaction that the given transaction can replace. 577 + /// 578 + /// If one is found, it is removed from the list and returned. 579 + pub(crate) fn take_outdated_transaction( 580 + &self, 581 + new: &Transaction, 582 + guard: &mut Guard<'_, ProcessInner, SpinLockBackend>, 583 + ) -> Option<DLArc<Transaction>> { 584 + let inner = self.inner.access_mut(guard); 585 + let mut cursor = inner.oneway_todo.cursor_front(); 586 + while let Some(next) = cursor.peek_next() { 587 + if new.can_replace(&next) { 588 + return Some(next.remove()); 589 + } 590 + cursor.move_next(); 591 + } 592 + None 593 + } 594 + 595 + /// This is split into a separate function since it's called by both `Node::do_work` and 596 + /// `NodeWrapper::do_work`. 597 + fn do_work_locked( 598 + &self, 599 + writer: &mut BinderReturnWriter<'_>, 600 + mut guard: Guard<'_, ProcessInner, SpinLockBackend>, 601 + ) -> Result<bool> { 602 + let inner = self.inner.access_mut(&mut guard); 603 + let strong = inner.strong.count > 0; 604 + let has_strong = inner.strong.has_count; 605 + let weak = strong || inner.weak.count > 0; 606 + let has_weak = inner.weak.has_count; 607 + 608 + if weak && !has_weak { 609 + inner.weak.has_count = true; 610 + inner.active_inc_refs += 1; 611 + } 612 + 613 + if strong && !has_strong { 614 + inner.strong.has_count = true; 615 + inner.active_inc_refs += 1; 616 + } 617 + 618 + let no_active_inc_refs = inner.active_inc_refs == 0; 619 + let should_drop_weak = no_active_inc_refs && (!weak && has_weak); 620 + let should_drop_strong = no_active_inc_refs && (!strong && has_strong); 621 + if should_drop_weak { 622 + inner.weak.has_count = false; 623 + } 624 + if should_drop_strong { 625 + inner.strong.has_count = false; 626 + } 627 + if no_active_inc_refs && !weak { 628 + // Remove the node if there are no references to it. 629 + guard.remove_node(self.ptr); 630 + } 631 + drop(guard); 632 + 633 + if weak && !has_weak { 634 + self.write(writer, BR_INCREFS)?; 635 + } 636 + if strong && !has_strong { 637 + self.write(writer, BR_ACQUIRE)?; 638 + } 639 + if should_drop_strong { 640 + self.write(writer, BR_RELEASE)?; 641 + } 642 + if should_drop_weak { 643 + self.write(writer, BR_DECREFS)?; 644 + } 645 + 646 + Ok(true) 647 + } 648 + 649 + pub(crate) fn add_freeze_listener( 650 + &self, 651 + process: &Arc<Process>, 652 + flags: kernel::alloc::Flags, 653 + ) -> Result { 654 + let mut vec_alloc = KVVec::<Arc<Process>>::new(); 655 + loop { 656 + let mut guard = self.owner.inner.lock(); 657 + // Do not check for `guard.dead`. The `dead` flag that matters here is the owner of the 658 + // listener, no the target. 659 + let inner = self.inner.access_mut(&mut guard); 660 + let len = inner.freeze_list.len(); 661 + if len >= inner.freeze_list.capacity() { 662 + if len >= vec_alloc.capacity() { 663 + drop(guard); 664 + vec_alloc = KVVec::with_capacity((1 + len).next_power_of_two(), flags)?; 665 + continue; 666 + } 667 + mem::swap(&mut inner.freeze_list, &mut vec_alloc); 668 + for elem in vec_alloc.drain_all() { 669 + inner.freeze_list.push_within_capacity(elem)?; 670 + } 671 + } 672 + inner.freeze_list.push_within_capacity(process.clone())?; 673 + return Ok(()); 674 + } 675 + } 676 + 677 + pub(crate) fn remove_freeze_listener(&self, p: &Arc<Process>) { 678 + let _unused_capacity; 679 + let mut guard = self.owner.inner.lock(); 680 + let inner = self.inner.access_mut(&mut guard); 681 + let len = inner.freeze_list.len(); 682 + inner.freeze_list.retain(|proc| !Arc::ptr_eq(proc, p)); 683 + if len == inner.freeze_list.len() { 684 + pr_warn!( 685 + "Could not remove freeze listener for {}\n", 686 + p.pid_in_current_ns() 687 + ); 688 + } 689 + if inner.freeze_list.is_empty() { 690 + _unused_capacity = mem::replace(&mut inner.freeze_list, KVVec::new()); 691 + } 692 + } 693 + 694 + pub(crate) fn freeze_list<'a>(&'a self, guard: &'a ProcessInner) -> &'a [Arc<Process>] { 695 + &self.inner.access(guard).freeze_list 696 + } 697 + } 698 + 699 + impl DeliverToRead for Node { 700 + fn do_work( 701 + self: DArc<Self>, 702 + _thread: &Thread, 703 + writer: &mut BinderReturnWriter<'_>, 704 + ) -> Result<bool> { 705 + let mut owner_inner = self.owner.inner.lock(); 706 + let inner = self.inner.access_mut(&mut owner_inner); 707 + 708 + assert!(inner.delivery_state.has_pushed_node); 709 + if inner.delivery_state.has_pushed_wrapper { 710 + // If the wrapper is scheduled, then we are either a normal push or weak zero2one 711 + // increment, and the wrapper is a strong zero2one increment, so the wrapper always 712 + // takes precedence over us. 713 + assert!(inner.delivery_state.has_strong_zero2one); 714 + inner.delivery_state.has_pushed_node = false; 715 + inner.delivery_state.has_weak_zero2one = false; 716 + return Ok(true); 717 + } 718 + 719 + inner.delivery_state.has_pushed_node = false; 720 + inner.delivery_state.has_weak_zero2one = false; 721 + inner.delivery_state.has_strong_zero2one = false; 722 + 723 + self.do_work_locked(writer, owner_inner) 724 + } 725 + 726 + fn cancel(self: DArc<Self>) {} 727 + 728 + fn should_sync_wakeup(&self) -> bool { 729 + false 730 + } 731 + 732 + #[inline(never)] 733 + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { 734 + seq_print!( 735 + m, 736 + "{}node work {}: u{:016x} c{:016x}\n", 737 + prefix, 738 + self.debug_id, 739 + self.ptr, 740 + self.cookie, 741 + ); 742 + Ok(()) 743 + } 744 + } 745 + 746 + /// Represents something that holds one or more ref-counts to a `Node`. 747 + /// 748 + /// Whenever process A holds a refcount to a node owned by a different process B, then process A 749 + /// will store a `NodeRef` that refers to the `Node` in process B. When process A releases the 750 + /// refcount, we destroy the NodeRef, which decrements the ref-count in process A. 751 + /// 752 + /// This type is also used for some other cases. For example, a transaction allocation holds a 753 + /// refcount on the target node, and this is implemented by storing a `NodeRef` in the allocation 754 + /// so that the destructor of the allocation will drop a refcount of the `Node`. 755 + pub(crate) struct NodeRef { 756 + pub(crate) node: DArc<Node>, 757 + /// How many times does this NodeRef hold a refcount on the Node? 758 + strong_node_count: usize, 759 + weak_node_count: usize, 760 + /// How many times does userspace hold a refcount on this NodeRef? 761 + strong_count: usize, 762 + weak_count: usize, 763 + } 764 + 765 + impl NodeRef { 766 + pub(crate) fn new(node: DArc<Node>, strong_count: usize, weak_count: usize) -> Self { 767 + Self { 768 + node, 769 + strong_node_count: strong_count, 770 + weak_node_count: weak_count, 771 + strong_count, 772 + weak_count, 773 + } 774 + } 775 + 776 + pub(crate) fn absorb(&mut self, mut other: Self) { 777 + assert!( 778 + Arc::ptr_eq(&self.node, &other.node), 779 + "absorb called with differing nodes" 780 + ); 781 + self.strong_node_count += other.strong_node_count; 782 + self.weak_node_count += other.weak_node_count; 783 + self.strong_count += other.strong_count; 784 + self.weak_count += other.weak_count; 785 + other.strong_count = 0; 786 + other.weak_count = 0; 787 + other.strong_node_count = 0; 788 + other.weak_node_count = 0; 789 + 790 + if self.strong_node_count >= 2 || self.weak_node_count >= 2 { 791 + let mut guard = self.node.owner.inner.lock(); 792 + let inner = self.node.inner.access_mut(&mut guard); 793 + 794 + if self.strong_node_count >= 2 { 795 + inner.strong.count -= self.strong_node_count - 1; 796 + self.strong_node_count = 1; 797 + assert_ne!(inner.strong.count, 0); 798 + } 799 + if self.weak_node_count >= 2 { 800 + inner.weak.count -= self.weak_node_count - 1; 801 + self.weak_node_count = 1; 802 + assert_ne!(inner.weak.count, 0); 803 + } 804 + } 805 + } 806 + 807 + pub(crate) fn get_count(&self) -> (usize, usize) { 808 + (self.strong_count, self.weak_count) 809 + } 810 + 811 + pub(crate) fn clone(&self, strong: bool) -> Result<NodeRef> { 812 + if strong && self.strong_count == 0 { 813 + return Err(EINVAL); 814 + } 815 + Ok(self 816 + .node 817 + .owner 818 + .inner 819 + .lock() 820 + .new_node_ref(self.node.clone(), strong, None)) 821 + } 822 + 823 + /// Updates (increments or decrements) the number of references held against the node. If the 824 + /// count being updated transitions from 0 to 1 or from 1 to 0, the node is notified by having 825 + /// its `update_refcount` function called. 826 + /// 827 + /// Returns whether `self` should be removed (when both counts are zero). 828 + pub(crate) fn update(&mut self, inc: bool, strong: bool) -> bool { 829 + if strong && self.strong_count == 0 { 830 + return false; 831 + } 832 + let (count, node_count, other_count) = if strong { 833 + ( 834 + &mut self.strong_count, 835 + &mut self.strong_node_count, 836 + self.weak_count, 837 + ) 838 + } else { 839 + ( 840 + &mut self.weak_count, 841 + &mut self.weak_node_count, 842 + self.strong_count, 843 + ) 844 + }; 845 + if inc { 846 + if *count == 0 { 847 + *node_count = 1; 848 + self.node.update_refcount(true, 1, strong); 849 + } 850 + *count += 1; 851 + } else { 852 + if *count == 0 { 853 + pr_warn!( 854 + "pid {} performed invalid decrement on ref\n", 855 + kernel::current!().pid() 856 + ); 857 + return false; 858 + } 859 + *count -= 1; 860 + if *count == 0 { 861 + self.node.update_refcount(false, *node_count, strong); 862 + *node_count = 0; 863 + return other_count == 0; 864 + } 865 + } 866 + false 867 + } 868 + } 869 + 870 + impl Drop for NodeRef { 871 + // This destructor is called conditionally from `Allocation::drop`. That branch is often 872 + // mispredicted. Inlining this method call reduces the cost of those branch mispredictions. 873 + #[inline(always)] 874 + fn drop(&mut self) { 875 + if self.strong_node_count > 0 { 876 + self.node 877 + .update_refcount(false, self.strong_node_count, true); 878 + } 879 + if self.weak_node_count > 0 { 880 + self.node 881 + .update_refcount(false, self.weak_node_count, false); 882 + } 883 + } 884 + } 885 + 886 + struct NodeDeathInner { 887 + dead: bool, 888 + cleared: bool, 889 + notification_done: bool, 890 + /// Indicates whether the normal flow was interrupted by removing the handle. In this case, we 891 + /// need behave as if the death notification didn't exist (i.e., we don't deliver anything to 892 + /// the user. 893 + aborted: bool, 894 + } 895 + 896 + /// Used to deliver notifications when a process dies. 897 + /// 898 + /// A process can request to be notified when a process dies using `BC_REQUEST_DEATH_NOTIFICATION`. 899 + /// This will make the driver send a `BR_DEAD_BINDER` to userspace when the process dies (or 900 + /// immediately if it is already dead). Userspace is supposed to respond with `BC_DEAD_BINDER_DONE` 901 + /// once it has processed the notification. 902 + /// 903 + /// Userspace can unregister from death notifications using the `BC_CLEAR_DEATH_NOTIFICATION` 904 + /// command. In this case, the kernel will respond with `BR_CLEAR_DEATH_NOTIFICATION_DONE` once the 905 + /// notification has been removed. Note that if the remote process dies before the kernel has 906 + /// responded with `BR_CLEAR_DEATH_NOTIFICATION_DONE`, then the kernel will still send a 907 + /// `BR_DEAD_BINDER`, which userspace must be able to process. In this case, the kernel will wait 908 + /// for the `BC_DEAD_BINDER_DONE` command before it sends `BR_CLEAR_DEATH_NOTIFICATION_DONE`. 909 + /// 910 + /// Note that even if the kernel sends a `BR_DEAD_BINDER`, this does not remove the death 911 + /// notification. Userspace must still remove it manually using `BC_CLEAR_DEATH_NOTIFICATION`. 912 + /// 913 + /// If a process uses `BC_RELEASE` to destroy its last refcount on a node that has an active death 914 + /// registration, then the death registration is immediately deleted (we implement this using the 915 + /// `aborted` field). However, userspace is not supposed to delete a `NodeRef` without first 916 + /// deregistering death notifications, so this codepath is not executed under normal circumstances. 917 + #[pin_data] 918 + pub(crate) struct NodeDeath { 919 + node: DArc<Node>, 920 + process: Arc<Process>, 921 + pub(crate) cookie: u64, 922 + #[pin] 923 + links_track: AtomicTracker<0>, 924 + /// Used by the owner `Node` to store a list of registered death notifications. 925 + /// 926 + /// # Invariants 927 + /// 928 + /// Only ever used with the `death_list` list of `self.node`. 929 + #[pin] 930 + death_links: ListLinks<1>, 931 + /// Used by the process to keep track of the death notifications for which we have sent a 932 + /// `BR_DEAD_BINDER` but not yet received a `BC_DEAD_BINDER_DONE`. 933 + /// 934 + /// # Invariants 935 + /// 936 + /// Only ever used with the `delivered_deaths` list of `self.process`. 937 + #[pin] 938 + delivered_links: ListLinks<2>, 939 + #[pin] 940 + delivered_links_track: AtomicTracker<2>, 941 + #[pin] 942 + inner: SpinLock<NodeDeathInner>, 943 + } 944 + 945 + impl NodeDeath { 946 + /// Constructs a new node death notification object. 947 + pub(crate) fn new( 948 + node: DArc<Node>, 949 + process: Arc<Process>, 950 + cookie: u64, 951 + ) -> impl PinInit<DTRWrap<Self>> { 952 + DTRWrap::new(pin_init!( 953 + Self { 954 + node, 955 + process, 956 + cookie, 957 + links_track <- AtomicTracker::new(), 958 + death_links <- ListLinks::new(), 959 + delivered_links <- ListLinks::new(), 960 + delivered_links_track <- AtomicTracker::new(), 961 + inner <- kernel::new_spinlock!(NodeDeathInner { 962 + dead: false, 963 + cleared: false, 964 + notification_done: false, 965 + aborted: false, 966 + }, "NodeDeath::inner"), 967 + } 968 + )) 969 + } 970 + 971 + /// Sets the cleared flag to `true`. 972 + /// 973 + /// It removes `self` from the node's death notification list if needed. 974 + /// 975 + /// Returns whether it needs to be queued. 976 + pub(crate) fn set_cleared(self: &DArc<Self>, abort: bool) -> bool { 977 + let (needs_removal, needs_queueing) = { 978 + // Update state and determine if we need to queue a work item. We only need to do it 979 + // when the node is not dead or if the user already completed the death notification. 980 + let mut inner = self.inner.lock(); 981 + if abort { 982 + inner.aborted = true; 983 + } 984 + if inner.cleared { 985 + // Already cleared. 986 + return false; 987 + } 988 + inner.cleared = true; 989 + (!inner.dead, !inner.dead || inner.notification_done) 990 + }; 991 + 992 + // Remove death notification from node. 993 + if needs_removal { 994 + let mut owner_inner = self.node.owner.inner.lock(); 995 + let node_inner = self.node.inner.access_mut(&mut owner_inner); 996 + // SAFETY: A `NodeDeath` is never inserted into the death list of any node other than 997 + // its owner, so it is either in this death list or in no death list. 998 + unsafe { node_inner.death_list.remove(self) }; 999 + } 1000 + needs_queueing 1001 + } 1002 + 1003 + /// Sets the 'notification done' flag to `true`. 1004 + pub(crate) fn set_notification_done(self: DArc<Self>, thread: &Thread) { 1005 + let needs_queueing = { 1006 + let mut inner = self.inner.lock(); 1007 + inner.notification_done = true; 1008 + inner.cleared 1009 + }; 1010 + if needs_queueing { 1011 + if let Some(death) = ListArc::try_from_arc_or_drop(self) { 1012 + let _ = thread.push_work_if_looper(death); 1013 + } 1014 + } 1015 + } 1016 + 1017 + /// Sets the 'dead' flag to `true` and queues work item if needed. 1018 + pub(crate) fn set_dead(self: DArc<Self>) { 1019 + let needs_queueing = { 1020 + let mut inner = self.inner.lock(); 1021 + if inner.cleared { 1022 + false 1023 + } else { 1024 + inner.dead = true; 1025 + true 1026 + } 1027 + }; 1028 + if needs_queueing { 1029 + // Push the death notification to the target process. There is nothing else to do if 1030 + // it's already dead. 1031 + if let Some(death) = ListArc::try_from_arc_or_drop(self) { 1032 + let process = death.process.clone(); 1033 + let _ = process.push_work(death); 1034 + } 1035 + } 1036 + } 1037 + } 1038 + 1039 + kernel::list::impl_list_arc_safe! { 1040 + impl ListArcSafe<0> for NodeDeath { 1041 + tracked_by links_track: AtomicTracker; 1042 + } 1043 + } 1044 + 1045 + kernel::list::impl_list_arc_safe! { 1046 + impl ListArcSafe<1> for DTRWrap<NodeDeath> { untracked; } 1047 + } 1048 + kernel::list::impl_list_item! { 1049 + impl ListItem<1> for DTRWrap<NodeDeath> { 1050 + using ListLinks { self.wrapped.death_links }; 1051 + } 1052 + } 1053 + 1054 + kernel::list::impl_list_arc_safe! { 1055 + impl ListArcSafe<2> for DTRWrap<NodeDeath> { 1056 + tracked_by wrapped: NodeDeath; 1057 + } 1058 + } 1059 + kernel::list::impl_list_arc_safe! { 1060 + impl ListArcSafe<2> for NodeDeath { 1061 + tracked_by delivered_links_track: AtomicTracker<2>; 1062 + } 1063 + } 1064 + kernel::list::impl_list_item! { 1065 + impl ListItem<2> for DTRWrap<NodeDeath> { 1066 + using ListLinks { self.wrapped.delivered_links }; 1067 + } 1068 + } 1069 + 1070 + impl DeliverToRead for NodeDeath { 1071 + fn do_work( 1072 + self: DArc<Self>, 1073 + _thread: &Thread, 1074 + writer: &mut BinderReturnWriter<'_>, 1075 + ) -> Result<bool> { 1076 + let done = { 1077 + let inner = self.inner.lock(); 1078 + if inner.aborted { 1079 + return Ok(true); 1080 + } 1081 + inner.cleared && (!inner.dead || inner.notification_done) 1082 + }; 1083 + 1084 + let cookie = self.cookie; 1085 + let cmd = if done { 1086 + BR_CLEAR_DEATH_NOTIFICATION_DONE 1087 + } else { 1088 + let process = self.process.clone(); 1089 + let mut process_inner = process.inner.lock(); 1090 + let inner = self.inner.lock(); 1091 + if inner.aborted { 1092 + return Ok(true); 1093 + } 1094 + // We're still holding the inner lock, so it cannot be aborted while we insert it into 1095 + // the delivered list. 1096 + process_inner.death_delivered(self.clone()); 1097 + BR_DEAD_BINDER 1098 + }; 1099 + 1100 + writer.write_code(cmd)?; 1101 + writer.write_payload(&cookie)?; 1102 + // DEAD_BINDER notifications can cause transactions, so stop processing work items when we 1103 + // get to a death notification. 1104 + Ok(cmd != BR_DEAD_BINDER) 1105 + } 1106 + 1107 + fn cancel(self: DArc<Self>) {} 1108 + 1109 + fn should_sync_wakeup(&self) -> bool { 1110 + false 1111 + } 1112 + 1113 + #[inline(never)] 1114 + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { 1115 + let inner = self.inner.lock(); 1116 + 1117 + let dead_binder = inner.dead && !inner.notification_done; 1118 + 1119 + if dead_binder { 1120 + if inner.cleared { 1121 + seq_print!(m, "{}has cleared dead binder\n", prefix); 1122 + } else { 1123 + seq_print!(m, "{}has dead binder\n", prefix); 1124 + } 1125 + } else { 1126 + seq_print!(m, "{}has cleared death notification\n", prefix); 1127 + } 1128 + 1129 + Ok(()) 1130 + } 1131 + }

+78

drivers/android/binder/node/wrapper.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{list::ListArc, prelude::*, seq_file::SeqFile, seq_print, sync::UniqueArc}; 6 + 7 + use crate::{node::Node, thread::Thread, BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead}; 8 + 9 + use core::mem::MaybeUninit; 10 + 11 + pub(crate) struct CritIncrWrapper { 12 + inner: UniqueArc<MaybeUninit<DTRWrap<NodeWrapper>>>, 13 + } 14 + 15 + impl CritIncrWrapper { 16 + pub(crate) fn new() -> Result<Self> { 17 + Ok(CritIncrWrapper { 18 + inner: UniqueArc::new_uninit(GFP_KERNEL)?, 19 + }) 20 + } 21 + 22 + pub(super) fn init(self, node: DArc<Node>) -> DLArc<dyn DeliverToRead> { 23 + match self.inner.pin_init_with(DTRWrap::new(NodeWrapper { node })) { 24 + Ok(initialized) => ListArc::from(initialized) as _, 25 + Err(err) => match err {}, 26 + } 27 + } 28 + } 29 + 30 + struct NodeWrapper { 31 + node: DArc<Node>, 32 + } 33 + 34 + kernel::list::impl_list_arc_safe! { 35 + impl ListArcSafe<0> for NodeWrapper { 36 + untracked; 37 + } 38 + } 39 + 40 + impl DeliverToRead for NodeWrapper { 41 + fn do_work( 42 + self: DArc<Self>, 43 + _thread: &Thread, 44 + writer: &mut BinderReturnWriter<'_>, 45 + ) -> Result<bool> { 46 + let node = &self.node; 47 + let mut owner_inner = node.owner.inner.lock(); 48 + let inner = node.inner.access_mut(&mut owner_inner); 49 + 50 + let ds = &mut inner.delivery_state; 51 + 52 + assert!(ds.has_pushed_wrapper); 53 + assert!(ds.has_strong_zero2one); 54 + ds.has_pushed_wrapper = false; 55 + ds.has_strong_zero2one = false; 56 + 57 + node.do_work_locked(writer, owner_inner) 58 + } 59 + 60 + fn cancel(self: DArc<Self>) {} 61 + 62 + fn should_sync_wakeup(&self) -> bool { 63 + false 64 + } 65 + 66 + #[inline(never)] 67 + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { 68 + seq_print!( 69 + m, 70 + "{}node work {}: u{:016x} c{:016x}\n", 71 + prefix, 72 + self.node.debug_id, 73 + self.node.ptr, 74 + self.node.cookie, 75 + ); 76 + Ok(()) 77 + } 78 + }

+734

drivers/android/binder/page_range.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + //! This module has utilities for managing a page range where unused pages may be reclaimed by a 6 + //! vma shrinker. 7 + 8 + // To avoid deadlocks, locks are taken in the order: 9 + // 10 + // 1. mmap lock 11 + // 2. spinlock 12 + // 3. lru spinlock 13 + // 14 + // The shrinker will use trylock methods because it locks them in a different order. 15 + 16 + use core::{ 17 + marker::PhantomPinned, 18 + mem::{size_of, size_of_val, MaybeUninit}, 19 + ptr, 20 + }; 21 + 22 + use kernel::{ 23 + bindings, 24 + error::Result, 25 + ffi::{c_ulong, c_void}, 26 + mm::{virt, Mm, MmWithUser}, 27 + new_mutex, new_spinlock, 28 + page::{Page, PAGE_SHIFT, PAGE_SIZE}, 29 + prelude::*, 30 + str::CStr, 31 + sync::{aref::ARef, Mutex, SpinLock}, 32 + task::Pid, 33 + transmute::FromBytes, 34 + types::Opaque, 35 + uaccess::UserSliceReader, 36 + }; 37 + 38 + /// Represents a shrinker that can be registered with the kernel. 39 + /// 40 + /// Each shrinker can be used by many `ShrinkablePageRange` objects. 41 + #[repr(C)] 42 + pub(crate) struct Shrinker { 43 + inner: Opaque<*mut bindings::shrinker>, 44 + list_lru: Opaque<bindings::list_lru>, 45 + } 46 + 47 + // SAFETY: The shrinker and list_lru are thread safe. 48 + unsafe impl Send for Shrinker {} 49 + // SAFETY: The shrinker and list_lru are thread safe. 50 + unsafe impl Sync for Shrinker {} 51 + 52 + impl Shrinker { 53 + /// Create a new shrinker. 54 + /// 55 + /// # Safety 56 + /// 57 + /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have 58 + /// been called exactly once, and it must not have returned an error. 59 + pub(crate) const unsafe fn new() -> Self { 60 + Self { 61 + inner: Opaque::uninit(), 62 + list_lru: Opaque::uninit(), 63 + } 64 + } 65 + 66 + /// Register this shrinker with the kernel. 67 + pub(crate) fn register(&'static self, name: &CStr) -> Result<()> { 68 + // SAFETY: These fields are not yet used, so it's okay to zero them. 69 + unsafe { 70 + self.inner.get().write(ptr::null_mut()); 71 + self.list_lru.get().write_bytes(0, 1); 72 + } 73 + 74 + // SAFETY: The field is not yet used, so we can initialize it. 75 + let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) }; 76 + if ret != 0 { 77 + return Err(Error::from_errno(ret)); 78 + } 79 + 80 + // SAFETY: The `name` points at a valid c string. 81 + let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) }; 82 + if shrinker.is_null() { 83 + // SAFETY: We initialized it, so its okay to destroy it. 84 + unsafe { bindings::list_lru_destroy(self.list_lru.get()) }; 85 + return Err(Error::from_errno(ret)); 86 + } 87 + 88 + // SAFETY: We're about to register the shrinker, and these are the fields we need to 89 + // initialize. (All other fields are already zeroed.) 90 + unsafe { 91 + (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count)); 92 + (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan)); 93 + (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast()); 94 + } 95 + 96 + // SAFETY: The new shrinker has been fully initialized, so we can register it. 97 + unsafe { bindings::shrinker_register(shrinker) }; 98 + 99 + // SAFETY: This initializes the pointer to the shrinker so that we can use it. 100 + unsafe { self.inner.get().write(shrinker) }; 101 + 102 + Ok(()) 103 + } 104 + } 105 + 106 + /// A container that manages a page range in a vma. 107 + /// 108 + /// The pages can be thought of as an array of booleans of whether the pages are usable. The 109 + /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false 110 + /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed 111 + /// immediately. Instead, it is made available to the memory shrinker to free it if the device is 112 + /// under memory pressure. 113 + /// 114 + /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no 115 + /// way to know whether an index ends up with true or false if a call to `use_range` races with 116 + /// another call to `stop_using_range` on a given index. 117 + /// 118 + /// It's also okay for the two methods to race with themselves, e.g. if two threads call 119 + /// `use_range` on the same index, then that's fine and neither call will return until the page is 120 + /// allocated and mapped. 121 + /// 122 + /// The methods that read or write to a range require that the page is marked as in use. So it is 123 + /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or 124 + /// write to the page. 125 + #[pin_data(PinnedDrop)] 126 + pub(crate) struct ShrinkablePageRange { 127 + /// Shrinker object registered with the kernel. 128 + shrinker: &'static Shrinker, 129 + /// Pid using this page range. Only used as debugging information. 130 + pid: Pid, 131 + /// The mm for the relevant process. 132 + mm: ARef<Mm>, 133 + /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`. 134 + #[pin] 135 + mm_lock: Mutex<()>, 136 + /// Spinlock protecting changes to pages. 137 + #[pin] 138 + lock: SpinLock<Inner>, 139 + 140 + /// Must not move, since page info has pointers back. 141 + #[pin] 142 + _pin: PhantomPinned, 143 + } 144 + 145 + struct Inner { 146 + /// Array of pages. 147 + /// 148 + /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive 149 + /// ownership. To deal with that, we manage it using raw pointers. 150 + pages: *mut PageInfo, 151 + /// Length of the `pages` array. 152 + size: usize, 153 + /// The address of the vma to insert the pages into. 154 + vma_addr: usize, 155 + } 156 + 157 + // SAFETY: proper locking is in place for `Inner` 158 + unsafe impl Send for Inner {} 159 + 160 + type StableMmGuard = 161 + kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>; 162 + 163 + /// An array element that describes the current state of a page. 164 + /// 165 + /// There are three states: 166 + /// 167 + /// * Free. The page is None. The `lru` element is not queued. 168 + /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru. 169 + /// * Used. The page is Some. The `lru` element is not queued. 170 + /// 171 + /// When an element is available, the shrinker is able to free the page. 172 + #[repr(C)] 173 + struct PageInfo { 174 + lru: bindings::list_head, 175 + page: Option<Page>, 176 + range: *const ShrinkablePageRange, 177 + } 178 + 179 + impl PageInfo { 180 + /// # Safety 181 + /// 182 + /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set. 183 + unsafe fn set_page(me: *mut PageInfo, page: Page) { 184 + // SAFETY: This pointer offset is in bounds. 185 + let ptr = unsafe { &raw mut (*me).page }; 186 + 187 + // SAFETY: The pointer is valid for writing, so also valid for reading. 188 + if unsafe { (*ptr).is_some() } { 189 + pr_err!("set_page called when there is already a page"); 190 + // SAFETY: We will initialize the page again below. 191 + unsafe { ptr::drop_in_place(ptr) }; 192 + } 193 + 194 + // SAFETY: The pointer is valid for writing. 195 + unsafe { ptr::write(ptr, Some(page)) }; 196 + } 197 + 198 + /// # Safety 199 + /// 200 + /// The caller ensures that reading from `me.page` is ok for the duration of 'a. 201 + unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> { 202 + // SAFETY: This pointer offset is in bounds. 203 + let ptr = unsafe { &raw const (*me).page }; 204 + 205 + // SAFETY: The pointer is valid for reading. 206 + unsafe { (*ptr).as_ref() } 207 + } 208 + 209 + /// # Safety 210 + /// 211 + /// The caller ensures that writing to `me.page` is ok for the duration of 'a. 212 + unsafe fn take_page(me: *mut PageInfo) -> Option<Page> { 213 + // SAFETY: This pointer offset is in bounds. 214 + let ptr = unsafe { &raw mut (*me).page }; 215 + 216 + // SAFETY: The pointer is valid for reading. 217 + unsafe { (*ptr).take() } 218 + } 219 + 220 + /// Add this page to the lru list, if not already in the list. 221 + /// 222 + /// # Safety 223 + /// 224 + /// The pointer must be valid, and it must be the right shrinker and nid. 225 + unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 226 + // SAFETY: This pointer offset is in bounds. 227 + let lru_ptr = unsafe { &raw mut (*me).lru }; 228 + // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 229 + unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 230 + } 231 + 232 + /// Remove this page from the lru list, if it is in the list. 233 + /// 234 + /// # Safety 235 + /// 236 + /// The pointer must be valid, and it must be the right shrinker and nid. 237 + unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 238 + // SAFETY: This pointer offset is in bounds. 239 + let lru_ptr = unsafe { &raw mut (*me).lru }; 240 + // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 241 + unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 242 + } 243 + } 244 + 245 + impl ShrinkablePageRange { 246 + /// Create a new `ShrinkablePageRange` using the given shrinker. 247 + pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> { 248 + try_pin_init!(Self { 249 + shrinker, 250 + pid: kernel::current!().pid(), 251 + mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?), 252 + mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"), 253 + lock <- new_spinlock!(Inner { 254 + pages: ptr::null_mut(), 255 + size: 0, 256 + vma_addr: 0, 257 + }, "ShrinkablePageRange"), 258 + _pin: PhantomPinned, 259 + }) 260 + } 261 + 262 + pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> { 263 + // SAFETY: This extends the duration of the reference. Since this call happens before 264 + // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block 265 + // until the returned guard is dropped. This ensures that the guard is valid until dropped. 266 + let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) }; 267 + 268 + mm_lock.try_lock() 269 + } 270 + 271 + /// Register a vma with this page range. Returns the size of the region. 272 + pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { 273 + let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); 274 + let num_pages = num_bytes >> PAGE_SHIFT; 275 + 276 + if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) { 277 + pr_debug!("Failed to register with vma: invalid vma->vm_mm"); 278 + return Err(EINVAL); 279 + } 280 + if num_pages == 0 { 281 + pr_debug!("Failed to register with vma: size zero"); 282 + return Err(EINVAL); 283 + } 284 + 285 + let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?; 286 + 287 + // SAFETY: This just initializes the pages array. 288 + unsafe { 289 + let self_ptr = self as *const ShrinkablePageRange; 290 + for i in 0..num_pages { 291 + let info = pages.as_mut_ptr().add(i); 292 + (&raw mut (*info).range).write(self_ptr); 293 + (&raw mut (*info).page).write(None); 294 + let lru = &raw mut (*info).lru; 295 + (&raw mut (*lru).next).write(lru); 296 + (&raw mut (*lru).prev).write(lru); 297 + } 298 + } 299 + 300 + let mut inner = self.lock.lock(); 301 + if inner.size > 0 { 302 + pr_debug!("Failed to register with vma: already registered"); 303 + drop(inner); 304 + return Err(EBUSY); 305 + } 306 + 307 + inner.pages = pages.into_raw_parts().0; 308 + inner.size = num_pages; 309 + inner.vma_addr = vma.start(); 310 + 311 + Ok(num_pages) 312 + } 313 + 314 + /// Make sure that the given pages are allocated and mapped. 315 + /// 316 + /// Must not be called from an atomic context. 317 + pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> { 318 + if start >= end { 319 + return Ok(()); 320 + } 321 + let mut inner = self.lock.lock(); 322 + assert!(end <= inner.size); 323 + 324 + for i in start..end { 325 + // SAFETY: This pointer offset is in bounds. 326 + let page_info = unsafe { inner.pages.add(i) }; 327 + 328 + // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 329 + if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 330 + // Since we're going to use the page, we should remove it from the lru list so that 331 + // the shrinker will not free it. 332 + // 333 + // SAFETY: The pointer is valid, and this is the right shrinker. 334 + // 335 + // The shrinker can't free the page between the check and this call to 336 + // `list_lru_del` because we hold the lock. 337 + unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 338 + } else { 339 + // We have to allocate a new page. Use the slow path. 340 + drop(inner); 341 + // SAFETY: `i < end <= inner.size` so `i` is in bounds. 342 + match unsafe { self.use_page_slow(i) } { 343 + Ok(()) => {} 344 + Err(err) => { 345 + pr_warn!("Error in use_page_slow: {:?}", err); 346 + return Err(err); 347 + } 348 + } 349 + inner = self.lock.lock(); 350 + } 351 + } 352 + Ok(()) 353 + } 354 + 355 + /// Mark the given page as in use, slow path. 356 + /// 357 + /// Must not be called from an atomic context. 358 + /// 359 + /// # Safety 360 + /// 361 + /// Assumes that `i` is in bounds. 362 + #[cold] 363 + unsafe fn use_page_slow(&self, i: usize) -> Result<()> { 364 + let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?; 365 + 366 + let mm_mutex = self.mm_lock.lock(); 367 + let inner = self.lock.lock(); 368 + 369 + // SAFETY: This pointer offset is in bounds. 370 + let page_info = unsafe { inner.pages.add(i) }; 371 + 372 + // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 373 + if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 374 + // The page was already there, or someone else added the page while we didn't hold the 375 + // spinlock. 376 + // 377 + // SAFETY: The pointer is valid, and this is the right shrinker. 378 + // 379 + // The shrinker can't free the page between the check and this call to 380 + // `list_lru_del` because we hold the lock. 381 + unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 382 + return Ok(()); 383 + } 384 + 385 + let vma_addr = inner.vma_addr; 386 + // Release the spinlock while we insert the page into the vma. 387 + drop(inner); 388 + 389 + // No overflow since we stay in bounds of the vma. 390 + let user_page_addr = vma_addr + (i << PAGE_SHIFT); 391 + 392 + // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from 393 + // a remote process. If the call to `mmput` races with the process shutting down, then the 394 + // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't 395 + // happen until it returns to userspace. However, the caller might instead go to sleep and 396 + // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the 397 + // middle of a shutdown process that won't complete until the `mm` is dropped. This can 398 + // amount to a deadlock. 399 + // 400 + // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a 401 + // workqueue. 402 + MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?) 403 + .mmap_read_lock() 404 + .vma_lookup(vma_addr) 405 + .ok_or(ESRCH)? 406 + .as_mixedmap_vma() 407 + .ok_or(ESRCH)? 408 + .vm_insert_page(user_page_addr, &new_page) 409 + .inspect_err(|err| { 410 + pr_warn!( 411 + "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}", 412 + user_page_addr, 413 + vma_addr, 414 + i, 415 + err 416 + ) 417 + })?; 418 + 419 + let inner = self.lock.lock(); 420 + 421 + // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page 422 + // can be written to since we hold the lock. 423 + // 424 + // We released and reacquired the spinlock since we checked that the page is null, but we 425 + // always hold the mm_lock mutex when setting the page to a non-null value, so it's not 426 + // possible for someone else to have changed it since our check. 427 + unsafe { PageInfo::set_page(page_info, new_page) }; 428 + 429 + drop(inner); 430 + drop(mm_mutex); 431 + 432 + Ok(()) 433 + } 434 + 435 + /// If the given page is in use, then mark it as available so that the shrinker can free it. 436 + /// 437 + /// May be called from an atomic context. 438 + pub(crate) fn stop_using_range(&self, start: usize, end: usize) { 439 + if start >= end { 440 + return; 441 + } 442 + let inner = self.lock.lock(); 443 + assert!(end <= inner.size); 444 + 445 + for i in (start..end).rev() { 446 + // SAFETY: The pointer is in bounds. 447 + let page_info = unsafe { inner.pages.add(i) }; 448 + 449 + // SAFETY: Okay for reading since we have the lock. 450 + if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 451 + // SAFETY: The pointer is valid, and it's the right shrinker. 452 + unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) }; 453 + } 454 + } 455 + } 456 + 457 + /// Helper for reading or writing to a range of bytes that may overlap with several pages. 458 + /// 459 + /// # Safety 460 + /// 461 + /// All pages touched by this operation must be in use for the duration of this call. 462 + unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result 463 + where 464 + T: FnMut(&Page, usize, usize) -> Result, 465 + { 466 + if size == 0 { 467 + return Ok(()); 468 + } 469 + 470 + let (pages, num_pages) = { 471 + let inner = self.lock.lock(); 472 + (inner.pages, inner.size) 473 + }; 474 + let num_bytes = num_pages << PAGE_SHIFT; 475 + 476 + // Check that the request is within the buffer. 477 + if offset.checked_add(size).ok_or(EFAULT)? > num_bytes { 478 + return Err(EFAULT); 479 + } 480 + 481 + let mut page_index = offset >> PAGE_SHIFT; 482 + offset &= PAGE_SIZE - 1; 483 + while size > 0 { 484 + let available = usize::min(size, PAGE_SIZE - offset); 485 + // SAFETY: The pointer is in bounds. 486 + let page_info = unsafe { pages.add(page_index) }; 487 + // SAFETY: The caller guarantees that this page is in the "in use" state for the 488 + // duration of this call to `iterate`, so nobody will change the page. 489 + let page = unsafe { PageInfo::get_page(page_info) }; 490 + if page.is_none() { 491 + pr_warn!("Page is null!"); 492 + } 493 + let page = page.ok_or(EFAULT)?; 494 + cb(page, offset, available)?; 495 + size -= available; 496 + page_index += 1; 497 + offset = 0; 498 + } 499 + Ok(()) 500 + } 501 + 502 + /// Copy from userspace into this page range. 503 + /// 504 + /// # Safety 505 + /// 506 + /// All pages touched by this operation must be in use for the duration of this call. 507 + pub(crate) unsafe fn copy_from_user_slice( 508 + &self, 509 + reader: &mut UserSliceReader, 510 + offset: usize, 511 + size: usize, 512 + ) -> Result { 513 + // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`. 514 + unsafe { 515 + self.iterate(offset, size, |page, offset, to_copy| { 516 + page.copy_from_user_slice_raw(reader, offset, to_copy) 517 + }) 518 + } 519 + } 520 + 521 + /// Copy from this page range into kernel space. 522 + /// 523 + /// # Safety 524 + /// 525 + /// All pages touched by this operation must be in use for the duration of this call. 526 + pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 527 + let mut out = MaybeUninit::<T>::uninit(); 528 + let mut out_offset = 0; 529 + // SAFETY: `self.iterate` has the same safety requirements as `read`. 530 + unsafe { 531 + self.iterate(offset, size_of::<T>(), |page, offset, to_copy| { 532 + // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 533 + let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset); 534 + // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid. 535 + page.read_raw(obj_ptr, offset, to_copy)?; 536 + out_offset += to_copy; 537 + Ok(()) 538 + })?; 539 + } 540 + // SAFETY: We just initialised the data. 541 + Ok(unsafe { out.assume_init() }) 542 + } 543 + 544 + /// Copy from kernel space into this page range. 545 + /// 546 + /// # Safety 547 + /// 548 + /// All pages touched by this operation must be in use for the duration of this call. 549 + pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { 550 + let mut obj_offset = 0; 551 + // SAFETY: `self.iterate` has the same safety requirements as `write`. 552 + unsafe { 553 + self.iterate(offset, size_of_val(obj), |page, offset, to_copy| { 554 + // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 555 + let obj_ptr = (obj as *const T as *const u8).add(obj_offset); 556 + // SAFETY: We have a reference to the object, so the pointer is valid. 557 + page.write_raw(obj_ptr, offset, to_copy)?; 558 + obj_offset += to_copy; 559 + Ok(()) 560 + }) 561 + } 562 + } 563 + 564 + /// Write zeroes to the given range. 565 + /// 566 + /// # Safety 567 + /// 568 + /// All pages touched by this operation must be in use for the duration of this call. 569 + pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result { 570 + // SAFETY: `self.iterate` has the same safety requirements as `copy_into`. 571 + unsafe { 572 + self.iterate(offset, size, |page, offset, len| { 573 + page.fill_zero_raw(offset, len) 574 + }) 575 + } 576 + } 577 + } 578 + 579 + #[pinned_drop] 580 + impl PinnedDrop for ShrinkablePageRange { 581 + fn drop(self: Pin<&mut Self>) { 582 + let (pages, size) = { 583 + let lock = self.lock.lock(); 584 + (lock.pages, lock.size) 585 + }; 586 + 587 + if size == 0 { 588 + return; 589 + } 590 + 591 + // Note: This call is also necessary for the safety of `stable_trylock_mm`. 592 + let mm_lock = self.mm_lock.lock(); 593 + 594 + // This is the destructor, so unlike the other methods, we only need to worry about races 595 + // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the 596 + // shrinker, and after this loop, the shrinker will not access any of our pages since we 597 + // removed them from the lru list. 598 + for i in 0..size { 599 + // SAFETY: Loop is in-bounds of the size. 600 + let p_ptr = unsafe { pages.add(i) }; 601 + // SAFETY: No other readers, so we can read. 602 + if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } { 603 + // SAFETY: The pointer is valid and it's the right shrinker. 604 + unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) }; 605 + } 606 + } 607 + 608 + drop(mm_lock); 609 + 610 + // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore, 611 + // all `size` elements are initialized. Also, the array is no longer shared with the 612 + // shrinker due to the above loop. 613 + drop(unsafe { KVVec::from_raw_parts(pages, size, size) }); 614 + } 615 + } 616 + 617 + /// # Safety 618 + /// Called by the shrinker. 619 + #[no_mangle] 620 + unsafe extern "C" fn rust_shrink_count( 621 + shrink: *mut bindings::shrinker, 622 + _sc: *mut bindings::shrink_control, 623 + ) -> c_ulong { 624 + // SAFETY: We can access our own private data. 625 + let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 626 + // SAFETY: Accessing the lru list is okay. Just an FFI call. 627 + unsafe { bindings::list_lru_count(list_lru) } 628 + } 629 + 630 + /// # Safety 631 + /// Called by the shrinker. 632 + #[no_mangle] 633 + unsafe extern "C" fn rust_shrink_scan( 634 + shrink: *mut bindings::shrinker, 635 + sc: *mut bindings::shrink_control, 636 + ) -> c_ulong { 637 + // SAFETY: We can access our own private data. 638 + let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 639 + // SAFETY: Caller guarantees that it is safe to read this field. 640 + let nr_to_scan = unsafe { (*sc).nr_to_scan }; 641 + // SAFETY: Accessing the lru list is okay. Just an FFI call. 642 + unsafe { 643 + bindings::list_lru_walk( 644 + list_lru, 645 + Some(bindings::rust_shrink_free_page_wrap), 646 + ptr::null_mut(), 647 + nr_to_scan, 648 + ) 649 + } 650 + } 651 + 652 + const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP; 653 + const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY; 654 + 655 + /// # Safety 656 + /// Called by the shrinker. 657 + #[no_mangle] 658 + unsafe extern "C" fn rust_shrink_free_page( 659 + item: *mut bindings::list_head, 660 + lru: *mut bindings::list_lru_one, 661 + _cb_arg: *mut c_void, 662 + ) -> bindings::lru_status { 663 + // Fields that should survive after unlocking the lru lock. 664 + let page; 665 + let page_index; 666 + let mm; 667 + let mmap_read; 668 + let mm_mutex; 669 + let vma_addr; 670 + 671 + { 672 + // CAST: The `list_head` field is first in `PageInfo`. 673 + let info = item as *mut PageInfo; 674 + // SAFETY: The `range` field of `PageInfo` is immutable. 675 + let range = unsafe { &*((*info).range) }; 676 + 677 + mm = match range.mm.mmget_not_zero() { 678 + Some(mm) => MmWithUser::into_mmput_async(mm), 679 + None => return LRU_SKIP, 680 + }; 681 + 682 + mm_mutex = match range.stable_trylock_mm() { 683 + Some(guard) => guard, 684 + None => return LRU_SKIP, 685 + }; 686 + 687 + mmap_read = match mm.mmap_read_trylock() { 688 + Some(guard) => guard, 689 + None => return LRU_SKIP, 690 + }; 691 + 692 + // We can't lock it normally here, since we hold the lru lock. 693 + let inner = match range.lock.try_lock() { 694 + Some(inner) => inner, 695 + None => return LRU_SKIP, 696 + }; 697 + 698 + // SAFETY: The item is in this lru list, so it's okay to remove it. 699 + unsafe { bindings::list_lru_isolate(lru, item) }; 700 + 701 + // SAFETY: Both pointers are in bounds of the same allocation. 702 + page_index = unsafe { info.offset_from(inner.pages) } as usize; 703 + 704 + // SAFETY: We hold the spinlock, so we can take the page. 705 + // 706 + // This sets the page pointer to zero before we unmap it from the vma. However, we call 707 + // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to 708 + // insert a new page until after our call to `zap_page_range`. 709 + page = unsafe { PageInfo::take_page(info) }; 710 + vma_addr = inner.vma_addr; 711 + 712 + // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because 713 + // they can be freed at any point after we unlock `lru_lock`. This is with the exception of 714 + // `mm_mutex` which is kept alive by holding the lock. 715 + } 716 + 717 + // SAFETY: The lru lock is locked when this method is called. 718 + unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; 719 + 720 + if let Some(vma) = mmap_read.vma_lookup(vma_addr) { 721 + let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); 722 + vma.zap_page_range_single(user_page_addr, PAGE_SIZE); 723 + } 724 + 725 + drop(mmap_read); 726 + drop(mm_mutex); 727 + drop(mm); 728 + drop(page); 729 + 730 + // SAFETY: We just unlocked the lru lock, but it should be locked when we return. 731 + unsafe { bindings::spin_lock(&raw mut (*lru).lock) }; 732 + 733 + LRU_REMOVED_ENTRY 734 + }

+24

drivers/android/binder/page_range_helper.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* C helper for page_range.rs to work around a CFI violation. 4 + * 5 + * Bindgen currently pretends that `enum lru_status` is the same as an integer. 6 + * This assumption is fine ABI-wise, but once you add CFI to the mix, it 7 + * triggers a CFI violation because `enum lru_status` gets a different CFI tag. 8 + * 9 + * This file contains a workaround until bindgen can be fixed. 10 + * 11 + * Copyright (C) 2025 Google LLC. 12 + */ 13 + #include "page_range_helper.h" 14 + 15 + unsigned int rust_shrink_free_page(struct list_head *item, 16 + struct list_lru_one *list, 17 + void *cb_arg); 18 + 19 + enum lru_status 20 + rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list, 21 + void *cb_arg) 22 + { 23 + return rust_shrink_free_page(item, list, cb_arg); 24 + }

+15

drivers/android/binder/page_range_helper.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2025 Google, Inc. 4 + */ 5 + 6 + #ifndef _LINUX_PAGE_RANGE_HELPER_H 7 + #define _LINUX_PAGE_RANGE_HELPER_H 8 + 9 + #include <linux/list_lru.h> 10 + 11 + enum lru_status 12 + rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list, 13 + void *cb_arg); 14 + 15 + #endif /* _LINUX_PAGE_RANGE_HELPER_H */

+1696

drivers/android/binder/process.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + //! This module defines the `Process` type, which represents a process using a particular binder 6 + //! context. 7 + //! 8 + //! The `Process` object keeps track of all of the resources that this process owns in the binder 9 + //! context. 10 + //! 11 + //! There is one `Process` object for each binder fd that a process has opened, so processes using 12 + //! several binder contexts have several `Process` objects. This ensures that the contexts are 13 + //! fully separated. 14 + 15 + use core::mem::take; 16 + 17 + use kernel::{ 18 + bindings, 19 + cred::Credential, 20 + error::Error, 21 + fs::file::{self, File}, 22 + list::{List, ListArc, ListArcField, ListLinks}, 23 + mm, 24 + prelude::*, 25 + rbtree::{self, RBTree, RBTreeNode, RBTreeNodeReservation}, 26 + seq_file::SeqFile, 27 + seq_print, 28 + sync::poll::PollTable, 29 + sync::{ 30 + lock::{spinlock::SpinLockBackend, Guard}, 31 + Arc, ArcBorrow, CondVar, CondVarTimeoutResult, Mutex, SpinLock, UniqueArc, 32 + }, 33 + task::Task, 34 + types::ARef, 35 + uaccess::{UserSlice, UserSliceReader}, 36 + uapi, 37 + workqueue::{self, Work}, 38 + }; 39 + 40 + use crate::{ 41 + allocation::{Allocation, AllocationInfo, NewAllocation}, 42 + context::Context, 43 + defs::*, 44 + error::{BinderError, BinderResult}, 45 + node::{CouldNotDeliverCriticalIncrement, CritIncrWrapper, Node, NodeDeath, NodeRef}, 46 + page_range::ShrinkablePageRange, 47 + range_alloc::{RangeAllocator, ReserveNew, ReserveNewArgs}, 48 + stats::BinderStats, 49 + thread::{PushWorkRes, Thread}, 50 + BinderfsProcFile, DArc, DLArc, DTRWrap, DeliverToRead, 51 + }; 52 + 53 + #[path = "freeze.rs"] 54 + mod freeze; 55 + use self::freeze::{FreezeCookie, FreezeListener}; 56 + 57 + struct Mapping { 58 + address: usize, 59 + alloc: RangeAllocator<AllocationInfo>, 60 + } 61 + 62 + impl Mapping { 63 + fn new(address: usize, size: usize) -> Self { 64 + Self { 65 + address, 66 + alloc: RangeAllocator::new(size), 67 + } 68 + } 69 + } 70 + 71 + // bitflags for defer_work. 72 + const PROC_DEFER_FLUSH: u8 = 1; 73 + const PROC_DEFER_RELEASE: u8 = 2; 74 + 75 + /// The fields of `Process` protected by the spinlock. 76 + pub(crate) struct ProcessInner { 77 + is_manager: bool, 78 + pub(crate) is_dead: bool, 79 + threads: RBTree<i32, Arc<Thread>>, 80 + /// INVARIANT: Threads pushed to this list must be owned by this process. 81 + ready_threads: List<Thread>, 82 + nodes: RBTree<u64, DArc<Node>>, 83 + mapping: Option<Mapping>, 84 + work: List<DTRWrap<dyn DeliverToRead>>, 85 + delivered_deaths: List<DTRWrap<NodeDeath>, 2>, 86 + 87 + /// The number of requested threads that haven't registered yet. 88 + requested_thread_count: u32, 89 + /// The maximum number of threads used by the process thread pool. 90 + max_threads: u32, 91 + /// The number of threads the started and registered with the thread pool. 92 + started_thread_count: u32, 93 + 94 + /// Bitmap of deferred work to do. 95 + defer_work: u8, 96 + 97 + /// Number of transactions to be transmitted before processes in freeze_wait 98 + /// are woken up. 99 + outstanding_txns: u32, 100 + /// Process is frozen and unable to service binder transactions. 101 + pub(crate) is_frozen: bool, 102 + /// Process received sync transactions since last frozen. 103 + pub(crate) sync_recv: bool, 104 + /// Process received async transactions since last frozen. 105 + pub(crate) async_recv: bool, 106 + pub(crate) binderfs_file: Option<BinderfsProcFile>, 107 + /// Check for oneway spam 108 + oneway_spam_detection_enabled: bool, 109 + } 110 + 111 + impl ProcessInner { 112 + fn new() -> Self { 113 + Self { 114 + is_manager: false, 115 + is_dead: false, 116 + threads: RBTree::new(), 117 + ready_threads: List::new(), 118 + mapping: None, 119 + nodes: RBTree::new(), 120 + work: List::new(), 121 + delivered_deaths: List::new(), 122 + requested_thread_count: 0, 123 + max_threads: 0, 124 + started_thread_count: 0, 125 + defer_work: 0, 126 + outstanding_txns: 0, 127 + is_frozen: false, 128 + sync_recv: false, 129 + async_recv: false, 130 + binderfs_file: None, 131 + oneway_spam_detection_enabled: false, 132 + } 133 + } 134 + 135 + /// Schedule the work item for execution on this process. 136 + /// 137 + /// If any threads are ready for work, then the work item is given directly to that thread and 138 + /// it is woken up. Otherwise, it is pushed to the process work list. 139 + /// 140 + /// This call can fail only if the process is dead. In this case, the work item is returned to 141 + /// the caller so that the caller can drop it after releasing the inner process lock. This is 142 + /// necessary since the destructor of `Transaction` will take locks that can't necessarily be 143 + /// taken while holding the inner process lock. 144 + pub(crate) fn push_work( 145 + &mut self, 146 + work: DLArc<dyn DeliverToRead>, 147 + ) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> { 148 + // Try to find a ready thread to which to push the work. 149 + if let Some(thread) = self.ready_threads.pop_front() { 150 + // Push to thread while holding state lock. This prevents the thread from giving up 151 + // (for example, because of a signal) when we're about to deliver work. 152 + match thread.push_work(work) { 153 + PushWorkRes::Ok => Ok(()), 154 + PushWorkRes::FailedDead(work) => Err((BinderError::new_dead(), work)), 155 + } 156 + } else if self.is_dead { 157 + Err((BinderError::new_dead(), work)) 158 + } else { 159 + let sync = work.should_sync_wakeup(); 160 + 161 + // Didn't find a thread waiting for proc work; this can happen 162 + // in two scenarios: 163 + // 1. All threads are busy handling transactions 164 + // In that case, one of those threads should call back into 165 + // the kernel driver soon and pick up this work. 166 + // 2. Threads are using the (e)poll interface, in which case 167 + // they may be blocked on the waitqueue without having been 168 + // added to waiting_threads. For this case, we just iterate 169 + // over all threads not handling transaction work, and 170 + // wake them all up. We wake all because we don't know whether 171 + // a thread that called into (e)poll is handling non-binder 172 + // work currently. 173 + self.work.push_back(work); 174 + 175 + // Wake up polling threads, if any. 176 + for thread in self.threads.values() { 177 + thread.notify_if_poll_ready(sync); 178 + } 179 + 180 + Ok(()) 181 + } 182 + } 183 + 184 + pub(crate) fn remove_node(&mut self, ptr: u64) { 185 + self.nodes.remove(&ptr); 186 + } 187 + 188 + /// Updates the reference count on the given node. 189 + pub(crate) fn update_node_refcount( 190 + &mut self, 191 + node: &DArc<Node>, 192 + inc: bool, 193 + strong: bool, 194 + count: usize, 195 + othread: Option<&Thread>, 196 + ) { 197 + let push = node.update_refcount_locked(inc, strong, count, self); 198 + 199 + // If we decided that we need to push work, push either to the process or to a thread if 200 + // one is specified. 201 + if let Some(node) = push { 202 + if let Some(thread) = othread { 203 + thread.push_work_deferred(node); 204 + } else { 205 + let _ = self.push_work(node); 206 + // Nothing to do: `push_work` may fail if the process is dead, but that's ok as in 207 + // that case, it doesn't care about the notification. 208 + } 209 + } 210 + } 211 + 212 + pub(crate) fn new_node_ref( 213 + &mut self, 214 + node: DArc<Node>, 215 + strong: bool, 216 + thread: Option<&Thread>, 217 + ) -> NodeRef { 218 + self.update_node_refcount(&node, true, strong, 1, thread); 219 + let strong_count = if strong { 1 } else { 0 }; 220 + NodeRef::new(node, strong_count, 1 - strong_count) 221 + } 222 + 223 + pub(crate) fn new_node_ref_with_thread( 224 + &mut self, 225 + node: DArc<Node>, 226 + strong: bool, 227 + thread: &Thread, 228 + wrapper: Option<CritIncrWrapper>, 229 + ) -> Result<NodeRef, CouldNotDeliverCriticalIncrement> { 230 + let push = match wrapper { 231 + None => node 232 + .incr_refcount_allow_zero2one(strong, self)? 233 + .map(|node| node as _), 234 + Some(wrapper) => node.incr_refcount_allow_zero2one_with_wrapper(strong, wrapper, self), 235 + }; 236 + if let Some(node) = push { 237 + thread.push_work_deferred(node); 238 + } 239 + let strong_count = if strong { 1 } else { 0 }; 240 + Ok(NodeRef::new(node, strong_count, 1 - strong_count)) 241 + } 242 + 243 + /// Returns an existing node with the given pointer and cookie, if one exists. 244 + /// 245 + /// Returns an error if a node with the given pointer but a different cookie exists. 246 + fn get_existing_node(&self, ptr: u64, cookie: u64) -> Result<Option<DArc<Node>>> { 247 + match self.nodes.get(&ptr) { 248 + None => Ok(None), 249 + Some(node) => { 250 + let (_, node_cookie) = node.get_id(); 251 + if node_cookie == cookie { 252 + Ok(Some(node.clone())) 253 + } else { 254 + Err(EINVAL) 255 + } 256 + } 257 + } 258 + } 259 + 260 + fn register_thread(&mut self) -> bool { 261 + if self.requested_thread_count == 0 { 262 + return false; 263 + } 264 + 265 + self.requested_thread_count -= 1; 266 + self.started_thread_count += 1; 267 + true 268 + } 269 + 270 + /// Finds a delivered death notification with the given cookie, removes it from the thread's 271 + /// delivered list, and returns it. 272 + fn pull_delivered_death(&mut self, cookie: u64) -> Option<DArc<NodeDeath>> { 273 + let mut cursor = self.delivered_deaths.cursor_front(); 274 + while let Some(next) = cursor.peek_next() { 275 + if next.cookie == cookie { 276 + return Some(next.remove().into_arc()); 277 + } 278 + cursor.move_next(); 279 + } 280 + None 281 + } 282 + 283 + pub(crate) fn death_delivered(&mut self, death: DArc<NodeDeath>) { 284 + if let Some(death) = ListArc::try_from_arc_or_drop(death) { 285 + self.delivered_deaths.push_back(death); 286 + } else { 287 + pr_warn!("Notification added to `delivered_deaths` twice."); 288 + } 289 + } 290 + 291 + pub(crate) fn add_outstanding_txn(&mut self) { 292 + self.outstanding_txns += 1; 293 + } 294 + 295 + fn txns_pending_locked(&self) -> bool { 296 + if self.outstanding_txns > 0 { 297 + return true; 298 + } 299 + for thread in self.threads.values() { 300 + if thread.has_current_transaction() { 301 + return true; 302 + } 303 + } 304 + false 305 + } 306 + } 307 + 308 + /// Used to keep track of a node that this process has a handle to. 309 + #[pin_data] 310 + pub(crate) struct NodeRefInfo { 311 + debug_id: usize, 312 + /// The refcount that this process owns to the node. 313 + node_ref: ListArcField<NodeRef, { Self::LIST_PROC }>, 314 + death: ListArcField<Option<DArc<NodeDeath>>, { Self::LIST_PROC }>, 315 + /// Cookie of the active freeze listener for this node. 316 + freeze: ListArcField<Option<FreezeCookie>, { Self::LIST_PROC }>, 317 + /// Used to store this `NodeRefInfo` in the node's `refs` list. 318 + #[pin] 319 + links: ListLinks<{ Self::LIST_NODE }>, 320 + /// The handle for this `NodeRefInfo`. 321 + handle: u32, 322 + /// The process that has a handle to the node. 323 + pub(crate) process: Arc<Process>, 324 + } 325 + 326 + impl NodeRefInfo { 327 + /// The id used for the `Node::refs` list. 328 + pub(crate) const LIST_NODE: u64 = 0x2da16350fb724a10; 329 + /// The id used for the `ListArc` in `ProcessNodeRefs`. 330 + const LIST_PROC: u64 = 0xd703a5263dcc8650; 331 + 332 + fn new(node_ref: NodeRef, handle: u32, process: Arc<Process>) -> impl PinInit<Self> { 333 + pin_init!(Self { 334 + debug_id: super::next_debug_id(), 335 + node_ref: ListArcField::new(node_ref), 336 + death: ListArcField::new(None), 337 + freeze: ListArcField::new(None), 338 + links <- ListLinks::new(), 339 + handle, 340 + process, 341 + }) 342 + } 343 + 344 + kernel::list::define_list_arc_field_getter! { 345 + pub(crate) fn death(&mut self<{Self::LIST_PROC}>) -> &mut Option<DArc<NodeDeath>> { death } 346 + pub(crate) fn freeze(&mut self<{Self::LIST_PROC}>) -> &mut Option<FreezeCookie> { freeze } 347 + pub(crate) fn node_ref(&mut self<{Self::LIST_PROC}>) -> &mut NodeRef { node_ref } 348 + pub(crate) fn node_ref2(&self<{Self::LIST_PROC}>) -> &NodeRef { node_ref } 349 + } 350 + } 351 + 352 + kernel::list::impl_list_arc_safe! { 353 + impl ListArcSafe<{Self::LIST_NODE}> for NodeRefInfo { untracked; } 354 + impl ListArcSafe<{Self::LIST_PROC}> for NodeRefInfo { untracked; } 355 + } 356 + kernel::list::impl_list_item! { 357 + impl ListItem<{Self::LIST_NODE}> for NodeRefInfo { 358 + using ListLinks { self.links }; 359 + } 360 + } 361 + 362 + /// Keeps track of references this process has to nodes owned by other processes. 363 + /// 364 + /// TODO: Currently, the rbtree requires two allocations per node reference, and two tree 365 + /// traversals to look up a node by `Node::global_id`. Once the rbtree is more powerful, these 366 + /// extra costs should be eliminated. 367 + struct ProcessNodeRefs { 368 + /// Used to look up nodes using the 32-bit id that this process knows it by. 369 + by_handle: RBTree<u32, ListArc<NodeRefInfo, { NodeRefInfo::LIST_PROC }>>, 370 + /// Used to look up nodes without knowing their local 32-bit id. The usize is the address of 371 + /// the underlying `Node` struct as returned by `Node::global_id`. 372 + by_node: RBTree<usize, u32>, 373 + /// Used to look up a `FreezeListener` by cookie. 374 + /// 375 + /// There might be multiple freeze listeners for the same node, but at most one of them is 376 + /// active. 377 + freeze_listeners: RBTree<FreezeCookie, FreezeListener>, 378 + } 379 + 380 + impl ProcessNodeRefs { 381 + fn new() -> Self { 382 + Self { 383 + by_handle: RBTree::new(), 384 + by_node: RBTree::new(), 385 + freeze_listeners: RBTree::new(), 386 + } 387 + } 388 + } 389 + 390 + /// A process using binder. 391 + /// 392 + /// Strictly speaking, there can be multiple of these per process. There is one for each binder fd 393 + /// that a process has opened, so processes using several binder contexts have several `Process` 394 + /// objects. This ensures that the contexts are fully separated. 395 + #[pin_data] 396 + pub(crate) struct Process { 397 + pub(crate) ctx: Arc<Context>, 398 + 399 + // The task leader (process). 400 + pub(crate) task: ARef<Task>, 401 + 402 + // Credential associated with file when `Process` is created. 403 + pub(crate) cred: ARef<Credential>, 404 + 405 + #[pin] 406 + pub(crate) inner: SpinLock<ProcessInner>, 407 + 408 + #[pin] 409 + pub(crate) pages: ShrinkablePageRange, 410 + 411 + // Waitqueue of processes waiting for all outstanding transactions to be 412 + // processed. 413 + #[pin] 414 + freeze_wait: CondVar, 415 + 416 + // Node references are in a different lock to avoid recursive acquisition when 417 + // incrementing/decrementing a node in another process. 418 + #[pin] 419 + node_refs: Mutex<ProcessNodeRefs>, 420 + 421 + // Work node for deferred work item. 422 + #[pin] 423 + defer_work: Work<Process>, 424 + 425 + // Links for process list in Context. 426 + #[pin] 427 + links: ListLinks, 428 + 429 + pub(crate) stats: BinderStats, 430 + } 431 + 432 + kernel::impl_has_work! { 433 + impl HasWork<Process> for Process { self.defer_work } 434 + } 435 + 436 + kernel::list::impl_list_arc_safe! { 437 + impl ListArcSafe<0> for Process { untracked; } 438 + } 439 + kernel::list::impl_list_item! { 440 + impl ListItem<0> for Process { 441 + using ListLinks { self.links }; 442 + } 443 + } 444 + 445 + impl workqueue::WorkItem for Process { 446 + type Pointer = Arc<Process>; 447 + 448 + fn run(me: Arc<Self>) { 449 + let defer; 450 + { 451 + let mut inner = me.inner.lock(); 452 + defer = inner.defer_work; 453 + inner.defer_work = 0; 454 + } 455 + 456 + if defer & PROC_DEFER_FLUSH != 0 { 457 + me.deferred_flush(); 458 + } 459 + if defer & PROC_DEFER_RELEASE != 0 { 460 + me.deferred_release(); 461 + } 462 + } 463 + } 464 + 465 + impl Process { 466 + fn new(ctx: Arc<Context>, cred: ARef<Credential>) -> Result<Arc<Self>> { 467 + let current = kernel::current!(); 468 + let list_process = ListArc::pin_init::<Error>( 469 + try_pin_init!(Process { 470 + ctx, 471 + cred, 472 + inner <- kernel::new_spinlock!(ProcessInner::new(), "Process::inner"), 473 + pages <- ShrinkablePageRange::new(&super::BINDER_SHRINKER), 474 + node_refs <- kernel::new_mutex!(ProcessNodeRefs::new(), "Process::node_refs"), 475 + freeze_wait <- kernel::new_condvar!("Process::freeze_wait"), 476 + task: current.group_leader().into(), 477 + defer_work <- kernel::new_work!("Process::defer_work"), 478 + links <- ListLinks::new(), 479 + stats: BinderStats::new(), 480 + }), 481 + GFP_KERNEL, 482 + )?; 483 + 484 + let process = list_process.clone_arc(); 485 + process.ctx.register_process(list_process); 486 + 487 + Ok(process) 488 + } 489 + 490 + pub(crate) fn pid_in_current_ns(&self) -> kernel::task::Pid { 491 + self.task.tgid_nr_ns(None) 492 + } 493 + 494 + #[inline(never)] 495 + pub(crate) fn debug_print_stats(&self, m: &SeqFile, ctx: &Context) -> Result<()> { 496 + seq_print!(m, "proc {}\n", self.pid_in_current_ns()); 497 + seq_print!(m, "context {}\n", &*ctx.name); 498 + 499 + let inner = self.inner.lock(); 500 + seq_print!(m, " threads: {}\n", inner.threads.iter().count()); 501 + seq_print!( 502 + m, 503 + " requested threads: {}+{}/{}\n", 504 + inner.requested_thread_count, 505 + inner.started_thread_count, 506 + inner.max_threads, 507 + ); 508 + if let Some(mapping) = &inner.mapping { 509 + seq_print!( 510 + m, 511 + " free oneway space: {}\n", 512 + mapping.alloc.free_oneway_space() 513 + ); 514 + seq_print!(m, " buffers: {}\n", mapping.alloc.count_buffers()); 515 + } 516 + seq_print!( 517 + m, 518 + " outstanding transactions: {}\n", 519 + inner.outstanding_txns 520 + ); 521 + seq_print!(m, " nodes: {}\n", inner.nodes.iter().count()); 522 + drop(inner); 523 + 524 + { 525 + let mut refs = self.node_refs.lock(); 526 + let (mut count, mut weak, mut strong) = (0, 0, 0); 527 + for r in refs.by_handle.values_mut() { 528 + let node_ref = r.node_ref(); 529 + let (nstrong, nweak) = node_ref.get_count(); 530 + count += 1; 531 + weak += nweak; 532 + strong += nstrong; 533 + } 534 + seq_print!(m, " refs: {count} s {strong} w {weak}\n"); 535 + } 536 + 537 + self.stats.debug_print(" ", m); 538 + 539 + Ok(()) 540 + } 541 + 542 + #[inline(never)] 543 + pub(crate) fn debug_print(&self, m: &SeqFile, ctx: &Context, print_all: bool) -> Result<()> { 544 + seq_print!(m, "proc {}\n", self.pid_in_current_ns()); 545 + seq_print!(m, "context {}\n", &*ctx.name); 546 + 547 + let mut all_threads = KVec::new(); 548 + let mut all_nodes = KVec::new(); 549 + loop { 550 + let inner = self.inner.lock(); 551 + let num_threads = inner.threads.iter().count(); 552 + let num_nodes = inner.nodes.iter().count(); 553 + 554 + if all_threads.capacity() < num_threads || all_nodes.capacity() < num_nodes { 555 + drop(inner); 556 + all_threads.reserve(num_threads, GFP_KERNEL)?; 557 + all_nodes.reserve(num_nodes, GFP_KERNEL)?; 558 + continue; 559 + } 560 + 561 + for thread in inner.threads.values() { 562 + assert!(all_threads.len() < all_threads.capacity()); 563 + let _ = all_threads.push(thread.clone(), GFP_ATOMIC); 564 + } 565 + 566 + for node in inner.nodes.values() { 567 + assert!(all_nodes.len() < all_nodes.capacity()); 568 + let _ = all_nodes.push(node.clone(), GFP_ATOMIC); 569 + } 570 + 571 + break; 572 + } 573 + 574 + for thread in all_threads { 575 + thread.debug_print(m, print_all)?; 576 + } 577 + 578 + let mut inner = self.inner.lock(); 579 + for node in all_nodes { 580 + if print_all || node.has_oneway_transaction(&mut inner) { 581 + node.full_debug_print(m, &mut inner)?; 582 + } 583 + } 584 + drop(inner); 585 + 586 + if print_all { 587 + let mut refs = self.node_refs.lock(); 588 + for r in refs.by_handle.values_mut() { 589 + let node_ref = r.node_ref(); 590 + let dead = node_ref.node.owner.inner.lock().is_dead; 591 + let (strong, weak) = node_ref.get_count(); 592 + let debug_id = node_ref.node.debug_id; 593 + 594 + seq_print!( 595 + m, 596 + " ref {}: desc {} {}node {debug_id} s {strong} w {weak}", 597 + r.debug_id, 598 + r.handle, 599 + if dead { "dead " } else { "" }, 600 + ); 601 + } 602 + } 603 + 604 + let inner = self.inner.lock(); 605 + for work in &inner.work { 606 + work.debug_print(m, " ", " pending transaction ")?; 607 + } 608 + for _death in &inner.delivered_deaths { 609 + seq_print!(m, " has delivered dead binder\n"); 610 + } 611 + if let Some(mapping) = &inner.mapping { 612 + mapping.alloc.debug_print(m)?; 613 + } 614 + drop(inner); 615 + 616 + Ok(()) 617 + } 618 + 619 + /// Attempts to fetch a work item from the process queue. 620 + pub(crate) fn get_work(&self) -> Option<DLArc<dyn DeliverToRead>> { 621 + self.inner.lock().work.pop_front() 622 + } 623 + 624 + /// Attempts to fetch a work item from the process queue. If none is available, it registers the 625 + /// given thread as ready to receive work directly. 626 + /// 627 + /// This must only be called when the thread is not participating in a transaction chain; when 628 + /// it is, work will always be delivered directly to the thread (and not through the process 629 + /// queue). 630 + pub(crate) fn get_work_or_register<'a>( 631 + &'a self, 632 + thread: &'a Arc<Thread>, 633 + ) -> GetWorkOrRegister<'a> { 634 + let mut inner = self.inner.lock(); 635 + // Try to get work from the process queue. 636 + if let Some(work) = inner.work.pop_front() { 637 + return GetWorkOrRegister::Work(work); 638 + } 639 + 640 + // Register the thread as ready. 641 + GetWorkOrRegister::Register(Registration::new(thread, &mut inner)) 642 + } 643 + 644 + fn get_current_thread(self: ArcBorrow<'_, Self>) -> Result<Arc<Thread>> { 645 + let id = { 646 + let current = kernel::current!(); 647 + if !core::ptr::eq(current.group_leader(), &*self.task) { 648 + pr_err!("get_current_thread was called from the wrong process."); 649 + return Err(EINVAL); 650 + } 651 + current.pid() 652 + }; 653 + 654 + { 655 + let inner = self.inner.lock(); 656 + if let Some(thread) = inner.threads.get(&id) { 657 + return Ok(thread.clone()); 658 + } 659 + } 660 + 661 + // Allocate a new `Thread` without holding any locks. 662 + let reservation = RBTreeNodeReservation::new(GFP_KERNEL)?; 663 + let ta: Arc<Thread> = Thread::new(id, self.into())?; 664 + 665 + let mut inner = self.inner.lock(); 666 + match inner.threads.entry(id) { 667 + rbtree::Entry::Vacant(entry) => { 668 + entry.insert(ta.clone(), reservation); 669 + Ok(ta) 670 + } 671 + rbtree::Entry::Occupied(_entry) => { 672 + pr_err!("Cannot create two threads with the same id."); 673 + Err(EINVAL) 674 + } 675 + } 676 + } 677 + 678 + pub(crate) fn push_work(&self, work: DLArc<dyn DeliverToRead>) -> BinderResult { 679 + // If push_work fails, drop the work item outside the lock. 680 + let res = self.inner.lock().push_work(work); 681 + match res { 682 + Ok(()) => Ok(()), 683 + Err((err, work)) => { 684 + drop(work); 685 + Err(err) 686 + } 687 + } 688 + } 689 + 690 + fn set_as_manager( 691 + self: ArcBorrow<'_, Self>, 692 + info: Option<FlatBinderObject>, 693 + thread: &Thread, 694 + ) -> Result { 695 + let (ptr, cookie, flags) = if let Some(obj) = info { 696 + ( 697 + // SAFETY: The object type for this ioctl is implicitly `BINDER_TYPE_BINDER`, so it 698 + // is safe to access the `binder` field. 699 + unsafe { obj.__bindgen_anon_1.binder }, 700 + obj.cookie, 701 + obj.flags, 702 + ) 703 + } else { 704 + (0, 0, 0) 705 + }; 706 + let node_ref = self.get_node(ptr, cookie, flags as _, true, thread)?; 707 + let node = node_ref.node.clone(); 708 + self.ctx.set_manager_node(node_ref)?; 709 + self.inner.lock().is_manager = true; 710 + 711 + // Force the state of the node to prevent the delivery of acquire/increfs. 712 + let mut owner_inner = node.owner.inner.lock(); 713 + node.force_has_count(&mut owner_inner); 714 + Ok(()) 715 + } 716 + 717 + fn get_node_inner( 718 + self: ArcBorrow<'_, Self>, 719 + ptr: u64, 720 + cookie: u64, 721 + flags: u32, 722 + strong: bool, 723 + thread: &Thread, 724 + wrapper: Option<CritIncrWrapper>, 725 + ) -> Result<Result<NodeRef, CouldNotDeliverCriticalIncrement>> { 726 + // Try to find an existing node. 727 + { 728 + let mut inner = self.inner.lock(); 729 + if let Some(node) = inner.get_existing_node(ptr, cookie)? { 730 + return Ok(inner.new_node_ref_with_thread(node, strong, thread, wrapper)); 731 + } 732 + } 733 + 734 + // Allocate the node before reacquiring the lock. 735 + let node = DTRWrap::arc_pin_init(Node::new(ptr, cookie, flags, self.into()))?.into_arc(); 736 + let rbnode = RBTreeNode::new(ptr, node.clone(), GFP_KERNEL)?; 737 + let mut inner = self.inner.lock(); 738 + if let Some(node) = inner.get_existing_node(ptr, cookie)? { 739 + return Ok(inner.new_node_ref_with_thread(node, strong, thread, wrapper)); 740 + } 741 + 742 + inner.nodes.insert(rbnode); 743 + // This can only fail if someone has already pushed the node to a list, but we just created 744 + // it and still hold the lock, so it can't fail right now. 745 + let node_ref = inner 746 + .new_node_ref_with_thread(node, strong, thread, wrapper) 747 + .unwrap(); 748 + 749 + Ok(Ok(node_ref)) 750 + } 751 + 752 + pub(crate) fn get_node( 753 + self: ArcBorrow<'_, Self>, 754 + ptr: u64, 755 + cookie: u64, 756 + flags: u32, 757 + strong: bool, 758 + thread: &Thread, 759 + ) -> Result<NodeRef> { 760 + let mut wrapper = None; 761 + for _ in 0..2 { 762 + match self.get_node_inner(ptr, cookie, flags, strong, thread, wrapper) { 763 + Err(err) => return Err(err), 764 + Ok(Ok(node_ref)) => return Ok(node_ref), 765 + Ok(Err(CouldNotDeliverCriticalIncrement)) => { 766 + wrapper = Some(CritIncrWrapper::new()?); 767 + } 768 + } 769 + } 770 + // We only get a `CouldNotDeliverCriticalIncrement` error if `wrapper` is `None`, so the 771 + // loop should run at most twice. 772 + unreachable!() 773 + } 774 + 775 + pub(crate) fn insert_or_update_handle( 776 + self: ArcBorrow<'_, Process>, 777 + node_ref: NodeRef, 778 + is_mananger: bool, 779 + ) -> Result<u32> { 780 + { 781 + let mut refs = self.node_refs.lock(); 782 + 783 + // Do a lookup before inserting. 784 + if let Some(handle_ref) = refs.by_node.get(&node_ref.node.global_id()) { 785 + let handle = *handle_ref; 786 + let info = refs.by_handle.get_mut(&handle).unwrap(); 787 + info.node_ref().absorb(node_ref); 788 + return Ok(handle); 789 + } 790 + } 791 + 792 + // Reserve memory for tree nodes. 793 + let reserve1 = RBTreeNodeReservation::new(GFP_KERNEL)?; 794 + let reserve2 = RBTreeNodeReservation::new(GFP_KERNEL)?; 795 + let info = UniqueArc::new_uninit(GFP_KERNEL)?; 796 + 797 + let mut refs = self.node_refs.lock(); 798 + 799 + // Do a lookup again as node may have been inserted before the lock was reacquired. 800 + if let Some(handle_ref) = refs.by_node.get(&node_ref.node.global_id()) { 801 + let handle = *handle_ref; 802 + let info = refs.by_handle.get_mut(&handle).unwrap(); 803 + info.node_ref().absorb(node_ref); 804 + return Ok(handle); 805 + } 806 + 807 + // Find id. 808 + let mut target: u32 = if is_mananger { 0 } else { 1 }; 809 + for handle in refs.by_handle.keys() { 810 + if *handle > target { 811 + break; 812 + } 813 + if *handle == target { 814 + target = target.checked_add(1).ok_or(ENOMEM)?; 815 + } 816 + } 817 + 818 + let gid = node_ref.node.global_id(); 819 + let (info_proc, info_node) = { 820 + let info_init = NodeRefInfo::new(node_ref, target, self.into()); 821 + match info.pin_init_with(info_init) { 822 + Ok(info) => ListArc::pair_from_pin_unique(info), 823 + // error is infallible 824 + Err(err) => match err {}, 825 + } 826 + }; 827 + 828 + // Ensure the process is still alive while we insert a new reference. 829 + // 830 + // This releases the lock before inserting the nodes, but since `is_dead` is set as the 831 + // first thing in `deferred_release`, process cleanup will not miss the items inserted into 832 + // `refs` below. 833 + if self.inner.lock().is_dead { 834 + return Err(ESRCH); 835 + } 836 + 837 + // SAFETY: `info_proc` and `info_node` reference the same node, so we are inserting 838 + // `info_node` into the right node's `refs` list. 839 + unsafe { info_proc.node_ref2().node.insert_node_info(info_node) }; 840 + 841 + refs.by_node.insert(reserve1.into_node(gid, target)); 842 + refs.by_handle.insert(reserve2.into_node(target, info_proc)); 843 + Ok(target) 844 + } 845 + 846 + pub(crate) fn get_transaction_node(&self, handle: u32) -> BinderResult<NodeRef> { 847 + // When handle is zero, try to get the context manager. 848 + if handle == 0 { 849 + Ok(self.ctx.get_manager_node(true)?) 850 + } else { 851 + Ok(self.get_node_from_handle(handle, true)?) 852 + } 853 + } 854 + 855 + pub(crate) fn get_node_from_handle(&self, handle: u32, strong: bool) -> Result<NodeRef> { 856 + self.node_refs 857 + .lock() 858 + .by_handle 859 + .get_mut(&handle) 860 + .ok_or(ENOENT)? 861 + .node_ref() 862 + .clone(strong) 863 + } 864 + 865 + pub(crate) fn remove_from_delivered_deaths(&self, death: &DArc<NodeDeath>) { 866 + let mut inner = self.inner.lock(); 867 + // SAFETY: By the invariant on the `delivered_links` field, this is the right linked list. 868 + let removed = unsafe { inner.delivered_deaths.remove(death) }; 869 + drop(inner); 870 + drop(removed); 871 + } 872 + 873 + pub(crate) fn update_ref( 874 + self: ArcBorrow<'_, Process>, 875 + handle: u32, 876 + inc: bool, 877 + strong: bool, 878 + ) -> Result { 879 + if inc && handle == 0 { 880 + if let Ok(node_ref) = self.ctx.get_manager_node(strong) { 881 + if core::ptr::eq(&*self, &*node_ref.node.owner) { 882 + return Err(EINVAL); 883 + } 884 + let _ = self.insert_or_update_handle(node_ref, true); 885 + return Ok(()); 886 + } 887 + } 888 + 889 + // To preserve original binder behaviour, we only fail requests where the manager tries to 890 + // increment references on itself. 891 + let mut refs = self.node_refs.lock(); 892 + if let Some(info) = refs.by_handle.get_mut(&handle) { 893 + if info.node_ref().update(inc, strong) { 894 + // Clean up death if there is one attached to this node reference. 895 + if let Some(death) = info.death().take() { 896 + death.set_cleared(true); 897 + self.remove_from_delivered_deaths(&death); 898 + } 899 + 900 + // Remove reference from process tables, and from the node's `refs` list. 901 + 902 + // SAFETY: We are removing the `NodeRefInfo` from the right node. 903 + unsafe { info.node_ref2().node.remove_node_info(info) }; 904 + 905 + let id = info.node_ref().node.global_id(); 906 + refs.by_handle.remove(&handle); 907 + refs.by_node.remove(&id); 908 + } 909 + } else { 910 + // All refs are cleared in process exit, so this warning is expected in that case. 911 + if !self.inner.lock().is_dead { 912 + pr_warn!("{}: no such ref {handle}\n", self.pid_in_current_ns()); 913 + } 914 + } 915 + Ok(()) 916 + } 917 + 918 + /// Decrements the refcount of the given node, if one exists. 919 + pub(crate) fn update_node(&self, ptr: u64, cookie: u64, strong: bool) { 920 + let mut inner = self.inner.lock(); 921 + if let Ok(Some(node)) = inner.get_existing_node(ptr, cookie) { 922 + inner.update_node_refcount(&node, false, strong, 1, None); 923 + } 924 + } 925 + 926 + pub(crate) fn inc_ref_done(&self, reader: &mut UserSliceReader, strong: bool) -> Result { 927 + let ptr = reader.read::<u64>()?; 928 + let cookie = reader.read::<u64>()?; 929 + let mut inner = self.inner.lock(); 930 + if let Ok(Some(node)) = inner.get_existing_node(ptr, cookie) { 931 + if let Some(node) = node.inc_ref_done_locked(strong, &mut inner) { 932 + // This only fails if the process is dead. 933 + let _ = inner.push_work(node); 934 + } 935 + } 936 + Ok(()) 937 + } 938 + 939 + pub(crate) fn buffer_alloc( 940 + self: &Arc<Self>, 941 + debug_id: usize, 942 + size: usize, 943 + is_oneway: bool, 944 + from_pid: i32, 945 + ) -> BinderResult<NewAllocation> { 946 + use kernel::page::PAGE_SIZE; 947 + 948 + let mut reserve_new_args = ReserveNewArgs { 949 + debug_id, 950 + size, 951 + is_oneway, 952 + pid: from_pid, 953 + ..ReserveNewArgs::default() 954 + }; 955 + 956 + let (new_alloc, addr) = loop { 957 + let mut inner = self.inner.lock(); 958 + let mapping = inner.mapping.as_mut().ok_or_else(BinderError::new_dead)?; 959 + let alloc_request = match mapping.alloc.reserve_new(reserve_new_args)? { 960 + ReserveNew::Success(new_alloc) => break (new_alloc, mapping.address), 961 + ReserveNew::NeedAlloc(request) => request, 962 + }; 963 + drop(inner); 964 + // We need to allocate memory and then call `reserve_new` again. 965 + reserve_new_args = alloc_request.make_alloc()?; 966 + }; 967 + 968 + let res = Allocation::new( 969 + self.clone(), 970 + debug_id, 971 + new_alloc.offset, 972 + size, 973 + addr + new_alloc.offset, 974 + new_alloc.oneway_spam_detected, 975 + ); 976 + 977 + // This allocation will be marked as in use until the `Allocation` is used to free it. 978 + // 979 + // This method can't be called while holding a lock, so we release the lock first. It's 980 + // okay for several threads to use the method on the same index at the same time. In that 981 + // case, one of the calls will allocate the given page (if missing), and the other call 982 + // will wait for the other call to finish allocating the page. 983 + // 984 + // We will not call `stop_using_range` in parallel with this on the same page, because the 985 + // allocation can only be removed via the destructor of the `Allocation` object that we 986 + // currently own. 987 + match self.pages.use_range( 988 + new_alloc.offset / PAGE_SIZE, 989 + (new_alloc.offset + size).div_ceil(PAGE_SIZE), 990 + ) { 991 + Ok(()) => {} 992 + Err(err) => { 993 + pr_warn!("use_range failure {:?}", err); 994 + return Err(err.into()); 995 + } 996 + } 997 + 998 + Ok(NewAllocation(res)) 999 + } 1000 + 1001 + pub(crate) fn buffer_get(self: &Arc<Self>, ptr: usize) -> Option<Allocation> { 1002 + let mut inner = self.inner.lock(); 1003 + let mapping = inner.mapping.as_mut()?; 1004 + let offset = ptr.checked_sub(mapping.address)?; 1005 + let (size, debug_id, odata) = mapping.alloc.reserve_existing(offset).ok()?; 1006 + let mut alloc = Allocation::new(self.clone(), debug_id, offset, size, ptr, false); 1007 + if let Some(data) = odata { 1008 + alloc.set_info(data); 1009 + } 1010 + Some(alloc) 1011 + } 1012 + 1013 + pub(crate) fn buffer_raw_free(&self, ptr: usize) { 1014 + let mut inner = self.inner.lock(); 1015 + if let Some(ref mut mapping) = &mut inner.mapping { 1016 + let offset = match ptr.checked_sub(mapping.address) { 1017 + Some(offset) => offset, 1018 + None => return, 1019 + }; 1020 + 1021 + let freed_range = match mapping.alloc.reservation_abort(offset) { 1022 + Ok(freed_range) => freed_range, 1023 + Err(_) => { 1024 + pr_warn!( 1025 + "Pointer {:x} failed to free, base = {:x}\n", 1026 + ptr, 1027 + mapping.address 1028 + ); 1029 + return; 1030 + } 1031 + }; 1032 + 1033 + // No more allocations in this range. Mark them as not in use. 1034 + // 1035 + // Must be done before we release the lock so that `use_range` is not used on these 1036 + // indices until `stop_using_range` returns. 1037 + self.pages 1038 + .stop_using_range(freed_range.start_page_idx, freed_range.end_page_idx); 1039 + } 1040 + } 1041 + 1042 + pub(crate) fn buffer_make_freeable(&self, offset: usize, mut data: Option<AllocationInfo>) { 1043 + let mut inner = self.inner.lock(); 1044 + if let Some(ref mut mapping) = &mut inner.mapping { 1045 + if mapping.alloc.reservation_commit(offset, &mut data).is_err() { 1046 + pr_warn!("Offset {} failed to be marked freeable\n", offset); 1047 + } 1048 + } 1049 + } 1050 + 1051 + fn create_mapping(&self, vma: &mm::virt::VmaNew) -> Result { 1052 + use kernel::page::PAGE_SIZE; 1053 + let size = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); 1054 + let mapping = Mapping::new(vma.start(), size); 1055 + let page_count = self.pages.register_with_vma(vma)?; 1056 + if page_count * PAGE_SIZE != size { 1057 + return Err(EINVAL); 1058 + } 1059 + 1060 + // Save range allocator for later. 1061 + self.inner.lock().mapping = Some(mapping); 1062 + 1063 + Ok(()) 1064 + } 1065 + 1066 + fn version(&self, data: UserSlice) -> Result { 1067 + data.writer().write(&BinderVersion::current()) 1068 + } 1069 + 1070 + pub(crate) fn register_thread(&self) -> bool { 1071 + self.inner.lock().register_thread() 1072 + } 1073 + 1074 + fn remove_thread(&self, thread: Arc<Thread>) { 1075 + self.inner.lock().threads.remove(&thread.id); 1076 + thread.release(); 1077 + } 1078 + 1079 + fn set_max_threads(&self, max: u32) { 1080 + self.inner.lock().max_threads = max; 1081 + } 1082 + 1083 + fn set_oneway_spam_detection_enabled(&self, enabled: u32) { 1084 + self.inner.lock().oneway_spam_detection_enabled = enabled != 0; 1085 + } 1086 + 1087 + pub(crate) fn is_oneway_spam_detection_enabled(&self) -> bool { 1088 + self.inner.lock().oneway_spam_detection_enabled 1089 + } 1090 + 1091 + fn get_node_debug_info(&self, data: UserSlice) -> Result { 1092 + let (mut reader, mut writer) = data.reader_writer(); 1093 + 1094 + // Read the starting point. 1095 + let ptr = reader.read::<BinderNodeDebugInfo>()?.ptr; 1096 + let mut out = BinderNodeDebugInfo::default(); 1097 + 1098 + { 1099 + let inner = self.inner.lock(); 1100 + for (node_ptr, node) in &inner.nodes { 1101 + if *node_ptr > ptr { 1102 + node.populate_debug_info(&mut out, &inner); 1103 + break; 1104 + } 1105 + } 1106 + } 1107 + 1108 + writer.write(&out) 1109 + } 1110 + 1111 + fn get_node_info_from_ref(&self, data: UserSlice) -> Result { 1112 + let (mut reader, mut writer) = data.reader_writer(); 1113 + let mut out = reader.read::<BinderNodeInfoForRef>()?; 1114 + 1115 + if out.strong_count != 0 1116 + || out.weak_count != 0 1117 + || out.reserved1 != 0 1118 + || out.reserved2 != 0 1119 + || out.reserved3 != 0 1120 + { 1121 + return Err(EINVAL); 1122 + } 1123 + 1124 + // Only the context manager is allowed to use this ioctl. 1125 + if !self.inner.lock().is_manager { 1126 + return Err(EPERM); 1127 + } 1128 + 1129 + { 1130 + let mut node_refs = self.node_refs.lock(); 1131 + let node_info = node_refs.by_handle.get_mut(&out.handle).ok_or(ENOENT)?; 1132 + let node_ref = node_info.node_ref(); 1133 + let owner_inner = node_ref.node.owner.inner.lock(); 1134 + node_ref.node.populate_counts(&mut out, &owner_inner); 1135 + } 1136 + 1137 + // Write the result back. 1138 + writer.write(&out) 1139 + } 1140 + 1141 + pub(crate) fn needs_thread(&self) -> bool { 1142 + let mut inner = self.inner.lock(); 1143 + let ret = inner.requested_thread_count == 0 1144 + && inner.ready_threads.is_empty() 1145 + && inner.started_thread_count < inner.max_threads; 1146 + if ret { 1147 + inner.requested_thread_count += 1 1148 + } 1149 + ret 1150 + } 1151 + 1152 + pub(crate) fn request_death( 1153 + self: &Arc<Self>, 1154 + reader: &mut UserSliceReader, 1155 + thread: &Thread, 1156 + ) -> Result { 1157 + let handle: u32 = reader.read()?; 1158 + let cookie: u64 = reader.read()?; 1159 + 1160 + // Queue BR_ERROR if we can't allocate memory for the death notification. 1161 + let death = UniqueArc::new_uninit(GFP_KERNEL).inspect_err(|_| { 1162 + thread.push_return_work(BR_ERROR); 1163 + })?; 1164 + let mut refs = self.node_refs.lock(); 1165 + let Some(info) = refs.by_handle.get_mut(&handle) else { 1166 + pr_warn!("BC_REQUEST_DEATH_NOTIFICATION invalid ref {handle}\n"); 1167 + return Ok(()); 1168 + }; 1169 + 1170 + // Nothing to do if there is already a death notification request for this handle. 1171 + if info.death().is_some() { 1172 + pr_warn!("BC_REQUEST_DEATH_NOTIFICATION death notification already set\n"); 1173 + return Ok(()); 1174 + } 1175 + 1176 + let death = { 1177 + let death_init = NodeDeath::new(info.node_ref().node.clone(), self.clone(), cookie); 1178 + match death.pin_init_with(death_init) { 1179 + Ok(death) => death, 1180 + // error is infallible 1181 + Err(err) => match err {}, 1182 + } 1183 + }; 1184 + 1185 + // Register the death notification. 1186 + { 1187 + let owner = info.node_ref2().node.owner.clone(); 1188 + let mut owner_inner = owner.inner.lock(); 1189 + if owner_inner.is_dead { 1190 + let death = Arc::from(death); 1191 + *info.death() = Some(death.clone()); 1192 + drop(owner_inner); 1193 + death.set_dead(); 1194 + } else { 1195 + let death = ListArc::from(death); 1196 + *info.death() = Some(death.clone_arc()); 1197 + info.node_ref().node.add_death(death, &mut owner_inner); 1198 + } 1199 + } 1200 + Ok(()) 1201 + } 1202 + 1203 + pub(crate) fn clear_death(&self, reader: &mut UserSliceReader, thread: &Thread) -> Result { 1204 + let handle: u32 = reader.read()?; 1205 + let cookie: u64 = reader.read()?; 1206 + 1207 + let mut refs = self.node_refs.lock(); 1208 + let Some(info) = refs.by_handle.get_mut(&handle) else { 1209 + pr_warn!("BC_CLEAR_DEATH_NOTIFICATION invalid ref {handle}\n"); 1210 + return Ok(()); 1211 + }; 1212 + 1213 + let Some(death) = info.death().take() else { 1214 + pr_warn!("BC_CLEAR_DEATH_NOTIFICATION death notification not active\n"); 1215 + return Ok(()); 1216 + }; 1217 + if death.cookie != cookie { 1218 + *info.death() = Some(death); 1219 + pr_warn!("BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch\n"); 1220 + return Ok(()); 1221 + } 1222 + 1223 + // Update state and determine if we need to queue a work item. We only need to do it when 1224 + // the node is not dead or if the user already completed the death notification. 1225 + if death.set_cleared(false) { 1226 + if let Some(death) = ListArc::try_from_arc_or_drop(death) { 1227 + let _ = thread.push_work_if_looper(death); 1228 + } 1229 + } 1230 + 1231 + Ok(()) 1232 + } 1233 + 1234 + pub(crate) fn dead_binder_done(&self, cookie: u64, thread: &Thread) { 1235 + if let Some(death) = self.inner.lock().pull_delivered_death(cookie) { 1236 + death.set_notification_done(thread); 1237 + } 1238 + } 1239 + 1240 + /// Locks the spinlock and move the `nodes` rbtree out. 1241 + /// 1242 + /// This allows you to iterate through `nodes` while also allowing you to give other parts of 1243 + /// the codebase exclusive access to `ProcessInner`. 1244 + pub(crate) fn lock_with_nodes(&self) -> WithNodes<'_> { 1245 + let mut inner = self.inner.lock(); 1246 + WithNodes { 1247 + nodes: take(&mut inner.nodes), 1248 + inner, 1249 + } 1250 + } 1251 + 1252 + fn deferred_flush(&self) { 1253 + let inner = self.inner.lock(); 1254 + for thread in inner.threads.values() { 1255 + thread.exit_looper(); 1256 + } 1257 + } 1258 + 1259 + fn deferred_release(self: Arc<Self>) { 1260 + let is_manager = { 1261 + let mut inner = self.inner.lock(); 1262 + inner.is_dead = true; 1263 + inner.is_frozen = false; 1264 + inner.sync_recv = false; 1265 + inner.async_recv = false; 1266 + inner.is_manager 1267 + }; 1268 + 1269 + if is_manager { 1270 + self.ctx.unset_manager_node(); 1271 + } 1272 + 1273 + self.ctx.deregister_process(&self); 1274 + 1275 + let binderfs_file = self.inner.lock().binderfs_file.take(); 1276 + drop(binderfs_file); 1277 + 1278 + // Release threads. 1279 + let threads = { 1280 + let mut inner = self.inner.lock(); 1281 + let threads = take(&mut inner.threads); 1282 + let ready = take(&mut inner.ready_threads); 1283 + drop(inner); 1284 + drop(ready); 1285 + 1286 + for thread in threads.values() { 1287 + thread.release(); 1288 + } 1289 + threads 1290 + }; 1291 + 1292 + // Release nodes. 1293 + { 1294 + while let Some(node) = { 1295 + let mut lock = self.inner.lock(); 1296 + lock.nodes.cursor_front().map(|c| c.remove_current().1) 1297 + } { 1298 + node.to_key_value().1.release(); 1299 + } 1300 + } 1301 + 1302 + // Clean up death listeners and remove nodes from external node info lists. 1303 + for info in self.node_refs.lock().by_handle.values_mut() { 1304 + // SAFETY: We are removing the `NodeRefInfo` from the right node. 1305 + unsafe { info.node_ref2().node.remove_node_info(info) }; 1306 + 1307 + // Remove all death notifications from the nodes (that belong to a different process). 1308 + let death = if let Some(existing) = info.death().take() { 1309 + existing 1310 + } else { 1311 + continue; 1312 + }; 1313 + death.set_cleared(false); 1314 + } 1315 + 1316 + // Clean up freeze listeners. 1317 + let freeze_listeners = take(&mut self.node_refs.lock().freeze_listeners); 1318 + for listener in freeze_listeners.values() { 1319 + listener.on_process_exit(&self); 1320 + } 1321 + drop(freeze_listeners); 1322 + 1323 + // Release refs on foreign nodes. 1324 + { 1325 + let mut refs = self.node_refs.lock(); 1326 + let by_handle = take(&mut refs.by_handle); 1327 + let by_node = take(&mut refs.by_node); 1328 + drop(refs); 1329 + drop(by_node); 1330 + drop(by_handle); 1331 + } 1332 + 1333 + // Cancel all pending work items. 1334 + while let Some(work) = self.get_work() { 1335 + work.into_arc().cancel(); 1336 + } 1337 + 1338 + let delivered_deaths = take(&mut self.inner.lock().delivered_deaths); 1339 + drop(delivered_deaths); 1340 + 1341 + // Free any resources kept alive by allocated buffers. 1342 + let omapping = self.inner.lock().mapping.take(); 1343 + if let Some(mut mapping) = omapping { 1344 + let address = mapping.address; 1345 + mapping 1346 + .alloc 1347 + .take_for_each(|offset, size, debug_id, odata| { 1348 + let ptr = offset + address; 1349 + pr_warn!( 1350 + "{}: removing orphan mapping {offset}:{size}\n", 1351 + self.pid_in_current_ns() 1352 + ); 1353 + let mut alloc = 1354 + Allocation::new(self.clone(), debug_id, offset, size, ptr, false); 1355 + if let Some(data) = odata { 1356 + alloc.set_info(data); 1357 + } 1358 + drop(alloc) 1359 + }); 1360 + } 1361 + 1362 + // calls to synchronize_rcu() in thread drop will happen here 1363 + drop(threads); 1364 + } 1365 + 1366 + pub(crate) fn drop_outstanding_txn(&self) { 1367 + let wake = { 1368 + let mut inner = self.inner.lock(); 1369 + if inner.outstanding_txns == 0 { 1370 + pr_err!("outstanding_txns underflow"); 1371 + return; 1372 + } 1373 + inner.outstanding_txns -= 1; 1374 + inner.is_frozen && inner.outstanding_txns == 0 1375 + }; 1376 + 1377 + if wake { 1378 + self.freeze_wait.notify_all(); 1379 + } 1380 + } 1381 + 1382 + pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { 1383 + if info.enable == 0 { 1384 + let msgs = self.prepare_freeze_messages()?; 1385 + let mut inner = self.inner.lock(); 1386 + inner.sync_recv = false; 1387 + inner.async_recv = false; 1388 + inner.is_frozen = false; 1389 + drop(inner); 1390 + msgs.send_messages(); 1391 + return Ok(()); 1392 + } 1393 + 1394 + let mut inner = self.inner.lock(); 1395 + inner.sync_recv = false; 1396 + inner.async_recv = false; 1397 + inner.is_frozen = true; 1398 + 1399 + if info.timeout_ms > 0 { 1400 + let mut jiffies = kernel::time::msecs_to_jiffies(info.timeout_ms); 1401 + while jiffies > 0 { 1402 + if inner.outstanding_txns == 0 { 1403 + break; 1404 + } 1405 + 1406 + match self 1407 + .freeze_wait 1408 + .wait_interruptible_timeout(&mut inner, jiffies) 1409 + { 1410 + CondVarTimeoutResult::Signal { .. } => { 1411 + inner.is_frozen = false; 1412 + return Err(ERESTARTSYS); 1413 + } 1414 + CondVarTimeoutResult::Woken { jiffies: remaining } => { 1415 + jiffies = remaining; 1416 + } 1417 + CondVarTimeoutResult::Timeout => { 1418 + jiffies = 0; 1419 + } 1420 + } 1421 + } 1422 + } 1423 + 1424 + if inner.txns_pending_locked() { 1425 + inner.is_frozen = false; 1426 + Err(EAGAIN) 1427 + } else { 1428 + drop(inner); 1429 + match self.prepare_freeze_messages() { 1430 + Ok(batch) => { 1431 + batch.send_messages(); 1432 + Ok(()) 1433 + } 1434 + Err(kernel::alloc::AllocError) => { 1435 + self.inner.lock().is_frozen = false; 1436 + Err(ENOMEM) 1437 + } 1438 + } 1439 + } 1440 + } 1441 + } 1442 + 1443 + fn get_frozen_status(data: UserSlice) -> Result { 1444 + let (mut reader, mut writer) = data.reader_writer(); 1445 + 1446 + let mut info = reader.read::<BinderFrozenStatusInfo>()?; 1447 + info.sync_recv = 0; 1448 + info.async_recv = 0; 1449 + let mut found = false; 1450 + 1451 + for ctx in crate::context::get_all_contexts()? { 1452 + ctx.for_each_proc(|proc| { 1453 + if proc.task.pid() == info.pid as _ { 1454 + found = true; 1455 + let inner = proc.inner.lock(); 1456 + let txns_pending = inner.txns_pending_locked(); 1457 + info.async_recv |= inner.async_recv as u32; 1458 + info.sync_recv |= inner.sync_recv as u32; 1459 + info.sync_recv |= (txns_pending as u32) << 1; 1460 + } 1461 + }); 1462 + } 1463 + 1464 + if found { 1465 + writer.write(&info)?; 1466 + Ok(()) 1467 + } else { 1468 + Err(EINVAL) 1469 + } 1470 + } 1471 + 1472 + fn ioctl_freeze(reader: &mut UserSliceReader) -> Result { 1473 + let info = reader.read::<BinderFreezeInfo>()?; 1474 + 1475 + // Very unlikely for there to be more than 3, since a process normally uses at most binder and 1476 + // hwbinder. 1477 + let mut procs = KVec::with_capacity(3, GFP_KERNEL)?; 1478 + 1479 + let ctxs = crate::context::get_all_contexts()?; 1480 + for ctx in ctxs { 1481 + for proc in ctx.get_procs_with_pid(info.pid as i32)? { 1482 + procs.push(proc, GFP_KERNEL)?; 1483 + } 1484 + } 1485 + 1486 + for proc in procs { 1487 + proc.ioctl_freeze(&info)?; 1488 + } 1489 + Ok(()) 1490 + } 1491 + 1492 + /// The ioctl handler. 1493 + impl Process { 1494 + /// Ioctls that are write-only from the perspective of userspace. 1495 + /// 1496 + /// The kernel will only read from the pointer that userspace provided to us. 1497 + fn ioctl_write_only( 1498 + this: ArcBorrow<'_, Process>, 1499 + _file: &File, 1500 + cmd: u32, 1501 + reader: &mut UserSliceReader, 1502 + ) -> Result { 1503 + let thread = this.get_current_thread()?; 1504 + match cmd { 1505 + uapi::BINDER_SET_MAX_THREADS => this.set_max_threads(reader.read()?), 1506 + uapi::BINDER_THREAD_EXIT => this.remove_thread(thread), 1507 + uapi::BINDER_SET_CONTEXT_MGR => this.set_as_manager(None, &thread)?, 1508 + uapi::BINDER_SET_CONTEXT_MGR_EXT => { 1509 + this.set_as_manager(Some(reader.read()?), &thread)? 1510 + } 1511 + uapi::BINDER_ENABLE_ONEWAY_SPAM_DETECTION => { 1512 + this.set_oneway_spam_detection_enabled(reader.read()?) 1513 + } 1514 + uapi::BINDER_FREEZE => ioctl_freeze(reader)?, 1515 + _ => return Err(EINVAL), 1516 + } 1517 + Ok(()) 1518 + } 1519 + 1520 + /// Ioctls that are read/write from the perspective of userspace. 1521 + /// 1522 + /// The kernel will both read from and write to the pointer that userspace provided to us. 1523 + fn ioctl_write_read( 1524 + this: ArcBorrow<'_, Process>, 1525 + file: &File, 1526 + cmd: u32, 1527 + data: UserSlice, 1528 + ) -> Result { 1529 + let thread = this.get_current_thread()?; 1530 + let blocking = (file.flags() & file::flags::O_NONBLOCK) == 0; 1531 + match cmd { 1532 + uapi::BINDER_WRITE_READ => thread.write_read(data, blocking)?, 1533 + uapi::BINDER_GET_NODE_DEBUG_INFO => this.get_node_debug_info(data)?, 1534 + uapi::BINDER_GET_NODE_INFO_FOR_REF => this.get_node_info_from_ref(data)?, 1535 + uapi::BINDER_VERSION => this.version(data)?, 1536 + uapi::BINDER_GET_FROZEN_INFO => get_frozen_status(data)?, 1537 + uapi::BINDER_GET_EXTENDED_ERROR => thread.get_extended_error(data)?, 1538 + _ => return Err(EINVAL), 1539 + } 1540 + Ok(()) 1541 + } 1542 + } 1543 + 1544 + /// The file operations supported by `Process`. 1545 + impl Process { 1546 + pub(crate) fn open(ctx: ArcBorrow<'_, Context>, file: &File) -> Result<Arc<Process>> { 1547 + Self::new(ctx.into(), ARef::from(file.cred())) 1548 + } 1549 + 1550 + pub(crate) fn release(this: Arc<Process>, _file: &File) { 1551 + let binderfs_file; 1552 + let should_schedule; 1553 + { 1554 + let mut inner = this.inner.lock(); 1555 + should_schedule = inner.defer_work == 0; 1556 + inner.defer_work |= PROC_DEFER_RELEASE; 1557 + binderfs_file = inner.binderfs_file.take(); 1558 + } 1559 + 1560 + if should_schedule { 1561 + // Ignore failures to schedule to the workqueue. Those just mean that we're already 1562 + // scheduled for execution. 1563 + let _ = workqueue::system().enqueue(this); 1564 + } 1565 + 1566 + drop(binderfs_file); 1567 + } 1568 + 1569 + pub(crate) fn flush(this: ArcBorrow<'_, Process>) -> Result { 1570 + let should_schedule; 1571 + { 1572 + let mut inner = this.inner.lock(); 1573 + should_schedule = inner.defer_work == 0; 1574 + inner.defer_work |= PROC_DEFER_FLUSH; 1575 + } 1576 + 1577 + if should_schedule { 1578 + // Ignore failures to schedule to the workqueue. Those just mean that we're already 1579 + // scheduled for execution. 1580 + let _ = workqueue::system().enqueue(Arc::from(this)); 1581 + } 1582 + Ok(()) 1583 + } 1584 + 1585 + pub(crate) fn ioctl(this: ArcBorrow<'_, Process>, file: &File, cmd: u32, arg: usize) -> Result { 1586 + use kernel::ioctl::{_IOC_DIR, _IOC_SIZE}; 1587 + use kernel::uapi::{_IOC_READ, _IOC_WRITE}; 1588 + 1589 + crate::trace::trace_ioctl(cmd, arg); 1590 + 1591 + let user_slice = UserSlice::new(UserPtr::from_addr(arg), _IOC_SIZE(cmd)); 1592 + 1593 + const _IOC_READ_WRITE: u32 = _IOC_READ | _IOC_WRITE; 1594 + 1595 + match _IOC_DIR(cmd) { 1596 + _IOC_WRITE => Self::ioctl_write_only(this, file, cmd, &mut user_slice.reader()), 1597 + _IOC_READ_WRITE => Self::ioctl_write_read(this, file, cmd, user_slice), 1598 + _ => Err(EINVAL), 1599 + } 1600 + } 1601 + 1602 + pub(crate) fn compat_ioctl( 1603 + this: ArcBorrow<'_, Process>, 1604 + file: &File, 1605 + cmd: u32, 1606 + arg: usize, 1607 + ) -> Result { 1608 + Self::ioctl(this, file, cmd, arg) 1609 + } 1610 + 1611 + pub(crate) fn mmap( 1612 + this: ArcBorrow<'_, Process>, 1613 + _file: &File, 1614 + vma: &mm::virt::VmaNew, 1615 + ) -> Result { 1616 + // We don't allow mmap to be used in a different process. 1617 + if !core::ptr::eq(kernel::current!().group_leader(), &*this.task) { 1618 + return Err(EINVAL); 1619 + } 1620 + if vma.start() == 0 { 1621 + return Err(EINVAL); 1622 + } 1623 + 1624 + vma.try_clear_maywrite().map_err(|_| EPERM)?; 1625 + vma.set_dontcopy(); 1626 + vma.set_mixedmap(); 1627 + 1628 + // TODO: Set ops. We need to learn when the user unmaps so that we can stop using it. 1629 + this.create_mapping(vma) 1630 + } 1631 + 1632 + pub(crate) fn poll( 1633 + this: ArcBorrow<'_, Process>, 1634 + file: &File, 1635 + table: PollTable<'_>, 1636 + ) -> Result<u32> { 1637 + let thread = this.get_current_thread()?; 1638 + let (from_proc, mut mask) = thread.poll(file, table); 1639 + if mask == 0 && from_proc && !this.inner.lock().work.is_empty() { 1640 + mask |= bindings::POLLIN; 1641 + } 1642 + Ok(mask) 1643 + } 1644 + } 1645 + 1646 + /// Represents that a thread has registered with the `ready_threads` list of its process. 1647 + /// 1648 + /// The destructor of this type will unregister the thread from the list of ready threads. 1649 + pub(crate) struct Registration<'a> { 1650 + thread: &'a Arc<Thread>, 1651 + } 1652 + 1653 + impl<'a> Registration<'a> { 1654 + fn new(thread: &'a Arc<Thread>, guard: &mut Guard<'_, ProcessInner, SpinLockBackend>) -> Self { 1655 + assert!(core::ptr::eq(&thread.process.inner, guard.lock_ref())); 1656 + // INVARIANT: We are pushing this thread to the right `ready_threads` list. 1657 + if let Ok(list_arc) = ListArc::try_from_arc(thread.clone()) { 1658 + guard.ready_threads.push_front(list_arc); 1659 + } else { 1660 + // It is an error to hit this branch, and it should not be reachable. We try to do 1661 + // something reasonable when the failure path happens. Most likely, the thread in 1662 + // question will sleep forever. 1663 + pr_err!("Same thread registered with `ready_threads` twice."); 1664 + } 1665 + Self { thread } 1666 + } 1667 + } 1668 + 1669 + impl Drop for Registration<'_> { 1670 + fn drop(&mut self) { 1671 + let mut inner = self.thread.process.inner.lock(); 1672 + // SAFETY: The thread has the invariant that we never push it to any other linked list than 1673 + // the `ready_threads` list of its parent process. Therefore, the thread is either in that 1674 + // list, or in no list. 1675 + unsafe { inner.ready_threads.remove(self.thread) }; 1676 + } 1677 + } 1678 + 1679 + pub(crate) struct WithNodes<'a> { 1680 + pub(crate) inner: Guard<'a, ProcessInner, SpinLockBackend>, 1681 + pub(crate) nodes: RBTree<u64, DArc<Node>>, 1682 + } 1683 + 1684 + impl Drop for WithNodes<'_> { 1685 + fn drop(&mut self) { 1686 + core::mem::swap(&mut self.nodes, &mut self.inner.nodes); 1687 + if self.nodes.iter().next().is_some() { 1688 + pr_err!("nodes array was modified while using lock_with_nodes\n"); 1689 + } 1690 + } 1691 + } 1692 + 1693 + pub(crate) enum GetWorkOrRegister<'a> { 1694 + Work(DLArc<dyn DeliverToRead>), 1695 + Register(Registration<'a>), 1696 + }

+251

drivers/android/binder/range_alloc/array.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{ 6 + page::{PAGE_MASK, PAGE_SIZE}, 7 + prelude::*, 8 + seq_file::SeqFile, 9 + seq_print, 10 + task::Pid, 11 + }; 12 + 13 + use crate::range_alloc::{DescriptorState, FreedRange, Range}; 14 + 15 + /// Keeps track of allocations in a process' mmap. 16 + /// 17 + /// Each process has an mmap where the data for incoming transactions will be placed. This struct 18 + /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that 19 + /// has metadata related to the allocation. We also keep track of available free space. 20 + pub(super) struct ArrayRangeAllocator<T> { 21 + /// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not* 22 + /// store the free ranges. 23 + /// 24 + /// Sorted by offset. 25 + pub(super) ranges: KVec<Range<T>>, 26 + size: usize, 27 + free_oneway_space: usize, 28 + } 29 + 30 + struct FindEmptyRes { 31 + /// Which index in `ranges` should we insert the new range at? 32 + /// 33 + /// Inserting the new range at this index keeps `ranges` sorted. 34 + insert_at_idx: usize, 35 + /// Which offset should we insert the new range at? 36 + insert_at_offset: usize, 37 + } 38 + 39 + impl<T> ArrayRangeAllocator<T> { 40 + pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self { 41 + Self { 42 + ranges: alloc.ranges, 43 + size, 44 + free_oneway_space: size / 2, 45 + } 46 + } 47 + 48 + pub(crate) fn free_oneway_space(&self) -> usize { 49 + self.free_oneway_space 50 + } 51 + 52 + pub(crate) fn count_buffers(&self) -> usize { 53 + self.ranges.len() 54 + } 55 + 56 + pub(crate) fn total_size(&self) -> usize { 57 + self.size 58 + } 59 + 60 + pub(crate) fn is_full(&self) -> bool { 61 + self.ranges.len() == self.ranges.capacity() 62 + } 63 + 64 + pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 65 + for range in &self.ranges { 66 + seq_print!( 67 + m, 68 + " buffer {}: {} size {} pid {} oneway {}", 69 + 0, 70 + range.offset, 71 + range.size, 72 + range.state.pid(), 73 + range.state.is_oneway(), 74 + ); 75 + if let DescriptorState::Reserved(_) = range.state { 76 + seq_print!(m, " reserved\n"); 77 + } else { 78 + seq_print!(m, " allocated\n"); 79 + } 80 + } 81 + Ok(()) 82 + } 83 + 84 + /// Find somewhere to put a new range. 85 + /// 86 + /// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that 87 + /// fragmentation isn't a big issue when we don't have many ranges. 88 + /// 89 + /// Returns the index that the new range should have in `self.ranges` after insertion. 90 + fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> { 91 + let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0); 92 + 93 + if size <= self.total_size() - after_last_range { 94 + // We can put the range at the end, so just do that. 95 + Some(FindEmptyRes { 96 + insert_at_idx: self.ranges.len(), 97 + insert_at_offset: after_last_range, 98 + }) 99 + } else { 100 + let mut end_of_prev = 0; 101 + for (i, range) in self.ranges.iter().enumerate() { 102 + // Does it fit before the i'th range? 103 + if size <= range.offset - end_of_prev { 104 + return Some(FindEmptyRes { 105 + insert_at_idx: i, 106 + insert_at_offset: end_of_prev, 107 + }); 108 + } 109 + end_of_prev = range.endpoint(); 110 + } 111 + None 112 + } 113 + } 114 + 115 + pub(crate) fn reserve_new( 116 + &mut self, 117 + debug_id: usize, 118 + size: usize, 119 + is_oneway: bool, 120 + pid: Pid, 121 + ) -> Result<usize> { 122 + // Compute new value of free_oneway_space, which is set only on success. 123 + let new_oneway_space = if is_oneway { 124 + match self.free_oneway_space.checked_sub(size) { 125 + Some(new_oneway_space) => new_oneway_space, 126 + None => return Err(ENOSPC), 127 + } 128 + } else { 129 + self.free_oneway_space 130 + }; 131 + 132 + let FindEmptyRes { 133 + insert_at_idx, 134 + insert_at_offset, 135 + } = self.find_empty_range(size).ok_or(ENOSPC)?; 136 + self.free_oneway_space = new_oneway_space; 137 + 138 + let new_range = Range { 139 + offset: insert_at_offset, 140 + size, 141 + state: DescriptorState::new(is_oneway, debug_id, pid), 142 + }; 143 + // Insert the value at the given index to keep the array sorted. 144 + self.ranges 145 + .insert_within_capacity(insert_at_idx, new_range) 146 + .ok() 147 + .unwrap(); 148 + 149 + Ok(insert_at_offset) 150 + } 151 + 152 + pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 153 + // This could use a binary search, but linear scans are usually faster for small arrays. 154 + let i = self 155 + .ranges 156 + .iter() 157 + .position(|range| range.offset == offset) 158 + .ok_or(EINVAL)?; 159 + let range = &self.ranges[i]; 160 + 161 + if let DescriptorState::Allocated(_) = range.state { 162 + return Err(EPERM); 163 + } 164 + 165 + let size = range.size; 166 + let offset = range.offset; 167 + 168 + if range.state.is_oneway() { 169 + self.free_oneway_space += size; 170 + } 171 + 172 + // This computes the range of pages that are no longer used by *any* allocated range. The 173 + // caller will mark them as unused, which means that they can be freed if the system comes 174 + // under memory pressure. 175 + let mut freed_range = FreedRange::interior_pages(offset, size); 176 + #[expect(clippy::collapsible_if)] // reads better like this 177 + if offset % PAGE_SIZE != 0 { 178 + if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { 179 + freed_range.start_page_idx -= 1; 180 + } 181 + } 182 + if range.endpoint() % PAGE_SIZE != 0 { 183 + let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE; 184 + if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset { 185 + freed_range.end_page_idx += 1; 186 + } 187 + } 188 + 189 + self.ranges.remove(i)?; 190 + Ok(freed_range) 191 + } 192 + 193 + pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 194 + // This could use a binary search, but linear scans are usually faster for small arrays. 195 + let range = self 196 + .ranges 197 + .iter_mut() 198 + .find(|range| range.offset == offset) 199 + .ok_or(ENOENT)?; 200 + 201 + let DescriptorState::Reserved(reservation) = &range.state else { 202 + return Err(ENOENT); 203 + }; 204 + 205 + range.state = DescriptorState::Allocated(reservation.clone().allocate(data.take())); 206 + Ok(()) 207 + } 208 + 209 + pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 210 + // This could use a binary search, but linear scans are usually faster for small arrays. 211 + let range = self 212 + .ranges 213 + .iter_mut() 214 + .find(|range| range.offset == offset) 215 + .ok_or(ENOENT)?; 216 + 217 + let DescriptorState::Allocated(allocation) = &mut range.state else { 218 + return Err(ENOENT); 219 + }; 220 + 221 + let data = allocation.take(); 222 + let debug_id = allocation.reservation.debug_id; 223 + range.state = DescriptorState::Reserved(allocation.reservation.clone()); 224 + Ok((range.size, debug_id, data)) 225 + } 226 + 227 + pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 228 + for range in self.ranges.iter_mut() { 229 + if let DescriptorState::Allocated(allocation) = &mut range.state { 230 + callback( 231 + range.offset, 232 + range.size, 233 + allocation.reservation.debug_id, 234 + allocation.data.take(), 235 + ); 236 + } 237 + } 238 + } 239 + } 240 + 241 + pub(crate) struct EmptyArrayAlloc<T> { 242 + ranges: KVec<Range<T>>, 243 + } 244 + 245 + impl<T> EmptyArrayAlloc<T> { 246 + pub(crate) fn try_new(capacity: usize) -> Result<Self> { 247 + Ok(Self { 248 + ranges: KVec::with_capacity(capacity, GFP_KERNEL)?, 249 + }) 250 + } 251 + }

+329

drivers/android/binder/range_alloc/mod.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{page::PAGE_SIZE, prelude::*, seq_file::SeqFile, task::Pid}; 6 + 7 + mod tree; 8 + use self::tree::{FromArrayAllocs, ReserveNewTreeAlloc, TreeRangeAllocator}; 9 + 10 + mod array; 11 + use self::array::{ArrayRangeAllocator, EmptyArrayAlloc}; 12 + 13 + enum DescriptorState<T> { 14 + Reserved(Reservation), 15 + Allocated(Allocation<T>), 16 + } 17 + 18 + impl<T> DescriptorState<T> { 19 + fn new(is_oneway: bool, debug_id: usize, pid: Pid) -> Self { 20 + DescriptorState::Reserved(Reservation { 21 + debug_id, 22 + is_oneway, 23 + pid, 24 + }) 25 + } 26 + 27 + fn pid(&self) -> Pid { 28 + match self { 29 + DescriptorState::Reserved(inner) => inner.pid, 30 + DescriptorState::Allocated(inner) => inner.reservation.pid, 31 + } 32 + } 33 + 34 + fn is_oneway(&self) -> bool { 35 + match self { 36 + DescriptorState::Reserved(inner) => inner.is_oneway, 37 + DescriptorState::Allocated(inner) => inner.reservation.is_oneway, 38 + } 39 + } 40 + } 41 + 42 + #[derive(Clone)] 43 + struct Reservation { 44 + debug_id: usize, 45 + is_oneway: bool, 46 + pid: Pid, 47 + } 48 + 49 + impl Reservation { 50 + fn allocate<T>(self, data: Option<T>) -> Allocation<T> { 51 + Allocation { 52 + data, 53 + reservation: self, 54 + } 55 + } 56 + } 57 + 58 + struct Allocation<T> { 59 + reservation: Reservation, 60 + data: Option<T>, 61 + } 62 + 63 + impl<T> Allocation<T> { 64 + fn deallocate(self) -> (Reservation, Option<T>) { 65 + (self.reservation, self.data) 66 + } 67 + 68 + fn debug_id(&self) -> usize { 69 + self.reservation.debug_id 70 + } 71 + 72 + fn take(&mut self) -> Option<T> { 73 + self.data.take() 74 + } 75 + } 76 + 77 + /// The array implementation must switch to the tree if it wants to go beyond this number of 78 + /// ranges. 79 + const TREE_THRESHOLD: usize = 8; 80 + 81 + /// Represents a range of pages that have just become completely free. 82 + #[derive(Copy, Clone)] 83 + pub(crate) struct FreedRange { 84 + pub(crate) start_page_idx: usize, 85 + pub(crate) end_page_idx: usize, 86 + } 87 + 88 + impl FreedRange { 89 + fn interior_pages(offset: usize, size: usize) -> FreedRange { 90 + FreedRange { 91 + // Divide round up 92 + start_page_idx: offset.div_ceil(PAGE_SIZE), 93 + // Divide round down 94 + end_page_idx: (offset + size) / PAGE_SIZE, 95 + } 96 + } 97 + } 98 + 99 + struct Range<T> { 100 + offset: usize, 101 + size: usize, 102 + state: DescriptorState<T>, 103 + } 104 + 105 + impl<T> Range<T> { 106 + fn endpoint(&self) -> usize { 107 + self.offset + self.size 108 + } 109 + } 110 + 111 + pub(crate) struct RangeAllocator<T> { 112 + inner: Impl<T>, 113 + } 114 + 115 + enum Impl<T> { 116 + Empty(usize), 117 + Array(ArrayRangeAllocator<T>), 118 + Tree(TreeRangeAllocator<T>), 119 + } 120 + 121 + impl<T> RangeAllocator<T> { 122 + pub(crate) fn new(size: usize) -> Self { 123 + Self { 124 + inner: Impl::Empty(size), 125 + } 126 + } 127 + 128 + pub(crate) fn free_oneway_space(&self) -> usize { 129 + match &self.inner { 130 + Impl::Empty(size) => size / 2, 131 + Impl::Array(array) => array.free_oneway_space(), 132 + Impl::Tree(tree) => tree.free_oneway_space(), 133 + } 134 + } 135 + 136 + pub(crate) fn count_buffers(&self) -> usize { 137 + match &self.inner { 138 + Impl::Empty(_size) => 0, 139 + Impl::Array(array) => array.count_buffers(), 140 + Impl::Tree(tree) => tree.count_buffers(), 141 + } 142 + } 143 + 144 + pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 145 + match &self.inner { 146 + Impl::Empty(_size) => Ok(()), 147 + Impl::Array(array) => array.debug_print(m), 148 + Impl::Tree(tree) => tree.debug_print(m), 149 + } 150 + } 151 + 152 + /// Try to reserve a new buffer, using the provided allocation if necessary. 153 + pub(crate) fn reserve_new(&mut self, mut args: ReserveNewArgs<T>) -> Result<ReserveNew<T>> { 154 + match &mut self.inner { 155 + Impl::Empty(size) => { 156 + let empty_array = match args.empty_array_alloc.take() { 157 + Some(empty_array) => ArrayRangeAllocator::new(*size, empty_array), 158 + None => { 159 + return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc { 160 + args, 161 + need_empty_array_alloc: true, 162 + need_new_tree_alloc: false, 163 + need_tree_alloc: false, 164 + })) 165 + } 166 + }; 167 + 168 + self.inner = Impl::Array(empty_array); 169 + self.reserve_new(args) 170 + } 171 + Impl::Array(array) if array.is_full() => { 172 + let allocs = match args.new_tree_alloc { 173 + Some(ref mut allocs) => allocs, 174 + None => { 175 + return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc { 176 + args, 177 + need_empty_array_alloc: false, 178 + need_new_tree_alloc: true, 179 + need_tree_alloc: true, 180 + })) 181 + } 182 + }; 183 + 184 + let new_tree = 185 + TreeRangeAllocator::from_array(array.total_size(), &mut array.ranges, allocs); 186 + 187 + self.inner = Impl::Tree(new_tree); 188 + self.reserve_new(args) 189 + } 190 + Impl::Array(array) => { 191 + let offset = 192 + array.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid)?; 193 + Ok(ReserveNew::Success(ReserveNewSuccess { 194 + offset, 195 + oneway_spam_detected: false, 196 + _empty_array_alloc: args.empty_array_alloc, 197 + _new_tree_alloc: args.new_tree_alloc, 198 + _tree_alloc: args.tree_alloc, 199 + })) 200 + } 201 + Impl::Tree(tree) => { 202 + let alloc = match args.tree_alloc { 203 + Some(alloc) => alloc, 204 + None => { 205 + return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc { 206 + args, 207 + need_empty_array_alloc: false, 208 + need_new_tree_alloc: false, 209 + need_tree_alloc: true, 210 + })); 211 + } 212 + }; 213 + let (offset, oneway_spam_detected) = 214 + tree.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid, alloc)?; 215 + Ok(ReserveNew::Success(ReserveNewSuccess { 216 + offset, 217 + oneway_spam_detected, 218 + _empty_array_alloc: args.empty_array_alloc, 219 + _new_tree_alloc: args.new_tree_alloc, 220 + _tree_alloc: None, 221 + })) 222 + } 223 + } 224 + } 225 + 226 + /// Deletes the allocations at `offset`. 227 + pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 228 + match &mut self.inner { 229 + Impl::Empty(_size) => Err(EINVAL), 230 + Impl::Array(array) => array.reservation_abort(offset), 231 + Impl::Tree(tree) => { 232 + let freed_range = tree.reservation_abort(offset)?; 233 + if tree.is_empty() { 234 + self.inner = Impl::Empty(tree.total_size()); 235 + } 236 + Ok(freed_range) 237 + } 238 + } 239 + } 240 + 241 + /// Called when an allocation is no longer in use by the kernel. 242 + /// 243 + /// The value in `data` will be stored, if any. A mutable reference is used to avoid dropping 244 + /// the `T` when an error is returned. 245 + pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 246 + match &mut self.inner { 247 + Impl::Empty(_size) => Err(EINVAL), 248 + Impl::Array(array) => array.reservation_commit(offset, data), 249 + Impl::Tree(tree) => tree.reservation_commit(offset, data), 250 + } 251 + } 252 + 253 + /// Called when the kernel starts using an allocation. 254 + /// 255 + /// Returns the size of the existing entry and the data associated with it. 256 + pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 257 + match &mut self.inner { 258 + Impl::Empty(_size) => Err(EINVAL), 259 + Impl::Array(array) => array.reserve_existing(offset), 260 + Impl::Tree(tree) => tree.reserve_existing(offset), 261 + } 262 + } 263 + 264 + /// Call the provided callback at every allocated region. 265 + /// 266 + /// This destroys the range allocator. Used only during shutdown. 267 + pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 268 + match &mut self.inner { 269 + Impl::Empty(_size) => {} 270 + Impl::Array(array) => array.take_for_each(callback), 271 + Impl::Tree(tree) => tree.take_for_each(callback), 272 + } 273 + } 274 + } 275 + 276 + /// The arguments for `reserve_new`. 277 + #[derive(Default)] 278 + pub(crate) struct ReserveNewArgs<T> { 279 + pub(crate) size: usize, 280 + pub(crate) is_oneway: bool, 281 + pub(crate) debug_id: usize, 282 + pub(crate) pid: Pid, 283 + pub(crate) empty_array_alloc: Option<EmptyArrayAlloc<T>>, 284 + pub(crate) new_tree_alloc: Option<FromArrayAllocs<T>>, 285 + pub(crate) tree_alloc: Option<ReserveNewTreeAlloc<T>>, 286 + } 287 + 288 + /// The return type of `ReserveNew`. 289 + pub(crate) enum ReserveNew<T> { 290 + Success(ReserveNewSuccess<T>), 291 + NeedAlloc(ReserveNewNeedAlloc<T>), 292 + } 293 + 294 + /// Returned by `reserve_new` when the reservation was successul. 295 + pub(crate) struct ReserveNewSuccess<T> { 296 + pub(crate) offset: usize, 297 + pub(crate) oneway_spam_detected: bool, 298 + 299 + // If the user supplied an allocation that we did not end up using, then we return it here. 300 + // The caller will kfree it outside of the lock. 301 + _empty_array_alloc: Option<EmptyArrayAlloc<T>>, 302 + _new_tree_alloc: Option<FromArrayAllocs<T>>, 303 + _tree_alloc: Option<ReserveNewTreeAlloc<T>>, 304 + } 305 + 306 + /// Returned by `reserve_new` to request the caller to make an allocation before calling the method 307 + /// again. 308 + pub(crate) struct ReserveNewNeedAlloc<T> { 309 + args: ReserveNewArgs<T>, 310 + need_empty_array_alloc: bool, 311 + need_new_tree_alloc: bool, 312 + need_tree_alloc: bool, 313 + } 314 + 315 + impl<T> ReserveNewNeedAlloc<T> { 316 + /// Make the necessary allocations for another call to `reserve_new`. 317 + pub(crate) fn make_alloc(mut self) -> Result<ReserveNewArgs<T>> { 318 + if self.need_empty_array_alloc && self.args.empty_array_alloc.is_none() { 319 + self.args.empty_array_alloc = Some(EmptyArrayAlloc::try_new(TREE_THRESHOLD)?); 320 + } 321 + if self.need_new_tree_alloc && self.args.new_tree_alloc.is_none() { 322 + self.args.new_tree_alloc = Some(FromArrayAllocs::try_new(TREE_THRESHOLD)?); 323 + } 324 + if self.need_tree_alloc && self.args.tree_alloc.is_none() { 325 + self.args.tree_alloc = Some(ReserveNewTreeAlloc::try_new()?); 326 + } 327 + Ok(self.args) 328 + } 329 + }

+488

drivers/android/binder/range_alloc/tree.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::{ 6 + page::PAGE_SIZE, 7 + prelude::*, 8 + rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation}, 9 + seq_file::SeqFile, 10 + seq_print, 11 + task::Pid, 12 + }; 13 + 14 + use crate::range_alloc::{DescriptorState, FreedRange, Range}; 15 + 16 + /// Keeps track of allocations in a process' mmap. 17 + /// 18 + /// Each process has an mmap where the data for incoming transactions will be placed. This struct 19 + /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that 20 + /// has metadata related to the allocation. We also keep track of available free space. 21 + pub(super) struct TreeRangeAllocator<T> { 22 + /// This collection contains descriptors for *both* ranges containing an allocation, *and* free 23 + /// ranges between allocations. The free ranges get merged, so there are never two free ranges 24 + /// next to each other. 25 + tree: RBTree<usize, Descriptor<T>>, 26 + /// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size, 27 + /// letting us look up the smallest range whose size is at least some lower bound. 28 + free_tree: RBTree<FreeKey, ()>, 29 + size: usize, 30 + free_oneway_space: usize, 31 + } 32 + 33 + impl<T> TreeRangeAllocator<T> { 34 + pub(crate) fn from_array( 35 + size: usize, 36 + ranges: &mut KVec<Range<T>>, 37 + alloc: &mut FromArrayAllocs<T>, 38 + ) -> Self { 39 + let mut tree = TreeRangeAllocator { 40 + tree: RBTree::new(), 41 + free_tree: RBTree::new(), 42 + size, 43 + free_oneway_space: size / 2, 44 + }; 45 + 46 + let mut free_offset = 0; 47 + for range in ranges.drain_all() { 48 + let free_size = range.offset - free_offset; 49 + if free_size > 0 { 50 + let free_node = alloc.free_tree.pop().unwrap(); 51 + tree.free_tree 52 + .insert(free_node.into_node((free_size, free_offset), ())); 53 + let tree_node = alloc.tree.pop().unwrap(); 54 + tree.tree.insert( 55 + tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)), 56 + ); 57 + } 58 + free_offset = range.endpoint(); 59 + 60 + if range.state.is_oneway() { 61 + tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size); 62 + } 63 + 64 + let free_res = alloc.free_tree.pop().unwrap(); 65 + let tree_node = alloc.tree.pop().unwrap(); 66 + let mut desc = Descriptor::new(range.offset, range.size); 67 + desc.state = Some((range.state, free_res)); 68 + tree.tree.insert(tree_node.into_node(range.offset, desc)); 69 + } 70 + 71 + // After the last range, we may need a free range. 72 + if free_offset < size { 73 + let free_size = size - free_offset; 74 + let free_node = alloc.free_tree.pop().unwrap(); 75 + tree.free_tree 76 + .insert(free_node.into_node((free_size, free_offset), ())); 77 + let tree_node = alloc.tree.pop().unwrap(); 78 + tree.tree 79 + .insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size))); 80 + } 81 + 82 + tree 83 + } 84 + 85 + pub(crate) fn is_empty(&self) -> bool { 86 + let mut tree_iter = self.tree.values(); 87 + // There's always at least one range, because index zero is either the start of a free or 88 + // allocated range. 89 + let first_value = tree_iter.next().unwrap(); 90 + if tree_iter.next().is_some() { 91 + // There are never two free ranges next to each other, so if there is more than one 92 + // descriptor, then at least one of them must hold an allocated range. 93 + return false; 94 + } 95 + // There is only one descriptor. Return true if it is for a free range. 96 + first_value.state.is_none() 97 + } 98 + 99 + pub(crate) fn total_size(&self) -> usize { 100 + self.size 101 + } 102 + 103 + pub(crate) fn free_oneway_space(&self) -> usize { 104 + self.free_oneway_space 105 + } 106 + 107 + pub(crate) fn count_buffers(&self) -> usize { 108 + self.tree 109 + .values() 110 + .filter(|desc| desc.state.is_some()) 111 + .count() 112 + } 113 + 114 + pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 115 + for desc in self.tree.values() { 116 + let state = match &desc.state { 117 + Some(state) => &state.0, 118 + None => continue, 119 + }; 120 + seq_print!( 121 + m, 122 + " buffer: {} size {} pid {}", 123 + desc.offset, 124 + desc.size, 125 + state.pid(), 126 + ); 127 + if state.is_oneway() { 128 + seq_print!(m, " oneway"); 129 + } 130 + match state { 131 + DescriptorState::Reserved(_res) => { 132 + seq_print!(m, " reserved\n"); 133 + } 134 + DescriptorState::Allocated(_alloc) => { 135 + seq_print!(m, " allocated\n"); 136 + } 137 + } 138 + } 139 + Ok(()) 140 + } 141 + 142 + fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> { 143 + let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?; 144 + let ((_, offset), ()) = free_cursor.current(); 145 + self.tree.get_mut(offset) 146 + } 147 + 148 + /// Try to reserve a new buffer, using the provided allocation if necessary. 149 + pub(crate) fn reserve_new( 150 + &mut self, 151 + debug_id: usize, 152 + size: usize, 153 + is_oneway: bool, 154 + pid: Pid, 155 + alloc: ReserveNewTreeAlloc<T>, 156 + ) -> Result<(usize, bool)> { 157 + // Compute new value of free_oneway_space, which is set only on success. 158 + let new_oneway_space = if is_oneway { 159 + match self.free_oneway_space.checked_sub(size) { 160 + Some(new_oneway_space) => new_oneway_space, 161 + None => return Err(ENOSPC), 162 + } 163 + } else { 164 + self.free_oneway_space 165 + }; 166 + 167 + // Start detecting spammers once we have less than 20% 168 + // of async space left (which is less than 10% of total 169 + // buffer size). 170 + // 171 + // (This will short-circut, so `low_oneway_space` is 172 + // only called when necessary.) 173 + let oneway_spam_detected = 174 + is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); 175 + 176 + let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) { 177 + None => { 178 + pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size); 179 + return Err(ENOSPC); 180 + } 181 + Some(desc) => { 182 + let found_size = desc.size; 183 + let found_offset = desc.offset; 184 + 185 + // In case we need to break up the descriptor 186 + let new_desc = Descriptor::new(found_offset + size, found_size - size); 187 + let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc); 188 + 189 + desc.state = Some(( 190 + DescriptorState::new(is_oneway, debug_id, pid), 191 + desc_node_res, 192 + )); 193 + desc.size = size; 194 + 195 + (found_size, found_offset, tree_node, free_tree_node) 196 + } 197 + }; 198 + self.free_oneway_space = new_oneway_space; 199 + self.free_tree.remove(&(found_size, found_off)); 200 + 201 + if found_size != size { 202 + self.tree.insert(tree_node); 203 + self.free_tree.insert(free_tree_node); 204 + } 205 + 206 + Ok((found_off, oneway_spam_detected)) 207 + } 208 + 209 + pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 210 + let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| { 211 + pr_warn!( 212 + "EINVAL from range_alloc.reservation_abort - offset: {}", 213 + offset 214 + ); 215 + EINVAL 216 + })?; 217 + 218 + let (_, desc) = cursor.current_mut(); 219 + 220 + if desc.offset != offset { 221 + pr_warn!( 222 + "EINVAL from range_alloc.reservation_abort - offset: {}", 223 + offset 224 + ); 225 + return Err(EINVAL); 226 + } 227 + 228 + let (reservation, free_node_res) = desc.try_change_state(|state| match state { 229 + Some((DescriptorState::Reserved(reservation), free_node_res)) => { 230 + (None, Ok((reservation, free_node_res))) 231 + } 232 + None => { 233 + pr_warn!( 234 + "EINVAL from range_alloc.reservation_abort - offset: {}", 235 + offset 236 + ); 237 + (None, Err(EINVAL)) 238 + } 239 + allocated => { 240 + pr_warn!( 241 + "EPERM from range_alloc.reservation_abort - offset: {}", 242 + offset 243 + ); 244 + (allocated, Err(EPERM)) 245 + } 246 + })?; 247 + 248 + let mut size = desc.size; 249 + let mut offset = desc.offset; 250 + let free_oneway_space_add = if reservation.is_oneway { size } else { 0 }; 251 + 252 + self.free_oneway_space += free_oneway_space_add; 253 + 254 + let mut freed_range = FreedRange::interior_pages(offset, size); 255 + // Compute how large the next free region needs to be to include one more page in 256 + // the newly freed range. 257 + let add_next_page_needed = match (offset + size) % PAGE_SIZE { 258 + 0 => usize::MAX, 259 + unalign => PAGE_SIZE - unalign, 260 + }; 261 + // Compute how large the previous free region needs to be to include one more page 262 + // in the newly freed range. 263 + let add_prev_page_needed = match offset % PAGE_SIZE { 264 + 0 => usize::MAX, 265 + unalign => unalign, 266 + }; 267 + 268 + // Merge next into current if next is free 269 + let remove_next = match cursor.peek_next() { 270 + Some((_, next)) if next.state.is_none() => { 271 + if next.size >= add_next_page_needed { 272 + freed_range.end_page_idx += 1; 273 + } 274 + self.free_tree.remove(&(next.size, next.offset)); 275 + size += next.size; 276 + true 277 + } 278 + _ => false, 279 + }; 280 + 281 + if remove_next { 282 + let (_, desc) = cursor.current_mut(); 283 + desc.size = size; 284 + cursor.remove_next(); 285 + } 286 + 287 + // Merge current into prev if prev is free 288 + match cursor.peek_prev_mut() { 289 + Some((_, prev)) if prev.state.is_none() => { 290 + if prev.size >= add_prev_page_needed { 291 + freed_range.start_page_idx -= 1; 292 + } 293 + // merge previous with current, remove current 294 + self.free_tree.remove(&(prev.size, prev.offset)); 295 + offset = prev.offset; 296 + size += prev.size; 297 + prev.size = size; 298 + cursor.remove_current(); 299 + } 300 + _ => {} 301 + }; 302 + 303 + self.free_tree 304 + .insert(free_node_res.into_node((size, offset), ())); 305 + 306 + Ok(freed_range) 307 + } 308 + 309 + pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 310 + let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?; 311 + 312 + desc.try_change_state(|state| match state { 313 + Some((DescriptorState::Reserved(reservation), free_node_res)) => ( 314 + Some(( 315 + DescriptorState::Allocated(reservation.allocate(data.take())), 316 + free_node_res, 317 + )), 318 + Ok(()), 319 + ), 320 + other => (other, Err(ENOENT)), 321 + }) 322 + } 323 + 324 + /// Takes an entry at the given offset from [`DescriptorState::Allocated`] to 325 + /// [`DescriptorState::Reserved`]. 326 + /// 327 + /// Returns the size of the existing entry and the data associated with it. 328 + pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 329 + let desc = self.tree.get_mut(&offset).ok_or_else(|| { 330 + pr_warn!( 331 + "ENOENT from range_alloc.reserve_existing - offset: {}", 332 + offset 333 + ); 334 + ENOENT 335 + })?; 336 + 337 + let (debug_id, data) = desc.try_change_state(|state| match state { 338 + Some((DescriptorState::Allocated(allocation), free_node_res)) => { 339 + let (reservation, data) = allocation.deallocate(); 340 + let debug_id = reservation.debug_id; 341 + ( 342 + Some((DescriptorState::Reserved(reservation), free_node_res)), 343 + Ok((debug_id, data)), 344 + ) 345 + } 346 + other => { 347 + pr_warn!( 348 + "ENOENT from range_alloc.reserve_existing - offset: {}", 349 + offset 350 + ); 351 + (other, Err(ENOENT)) 352 + } 353 + })?; 354 + 355 + Ok((desc.size, debug_id, data)) 356 + } 357 + 358 + /// Call the provided callback at every allocated region. 359 + /// 360 + /// This destroys the range allocator. Used only during shutdown. 361 + pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 362 + for (_, desc) in self.tree.iter_mut() { 363 + if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state { 364 + callback( 365 + desc.offset, 366 + desc.size, 367 + allocation.debug_id(), 368 + allocation.take(), 369 + ); 370 + } 371 + } 372 + } 373 + 374 + /// Find the amount and size of buffers allocated by the current caller. 375 + /// 376 + /// The idea is that once we cross the threshold, whoever is responsible 377 + /// for the low async space is likely to try to send another async transaction, 378 + /// and at some point we'll catch them in the act. This is more efficient 379 + /// than keeping a map per pid. 380 + fn low_oneway_space(&self, calling_pid: Pid) -> bool { 381 + let mut total_alloc_size = 0; 382 + let mut num_buffers = 0; 383 + for (_, desc) in self.tree.iter() { 384 + if let Some((state, _)) = &desc.state { 385 + if state.is_oneway() && state.pid() == calling_pid { 386 + total_alloc_size += desc.size; 387 + num_buffers += 1; 388 + } 389 + } 390 + } 391 + 392 + // Warn if this pid has more than 50 transactions, or more than 50% of 393 + // async space (which is 25% of total buffer size). Oneway spam is only 394 + // detected when the threshold is exceeded. 395 + num_buffers > 50 || total_alloc_size > self.size / 4 396 + } 397 + } 398 + 399 + type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes); 400 + struct Descriptor<T> { 401 + size: usize, 402 + offset: usize, 403 + state: Option<TreeDescriptorState<T>>, 404 + } 405 + 406 + impl<T> Descriptor<T> { 407 + fn new(offset: usize, size: usize) -> Self { 408 + Self { 409 + size, 410 + offset, 411 + state: None, 412 + } 413 + } 414 + 415 + fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data> 416 + where 417 + F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>), 418 + { 419 + let (new_state, result) = f(self.state.take()); 420 + self.state = new_state; 421 + result 422 + } 423 + } 424 + 425 + // (Descriptor.size, Descriptor.offset) 426 + type FreeKey = (usize, usize); 427 + type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>; 428 + 429 + /// An allocation for use by `reserve_new`. 430 + pub(crate) struct ReserveNewTreeAlloc<T> { 431 + tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>, 432 + free_tree_node_res: FreeNodeRes, 433 + desc_node_res: FreeNodeRes, 434 + } 435 + 436 + impl<T> ReserveNewTreeAlloc<T> { 437 + pub(crate) fn try_new() -> Result<Self> { 438 + let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; 439 + let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; 440 + let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; 441 + Ok(Self { 442 + tree_node_res, 443 + free_tree_node_res, 444 + desc_node_res, 445 + }) 446 + } 447 + 448 + fn initialize( 449 + self, 450 + desc: Descriptor<T>, 451 + ) -> ( 452 + RBTreeNode<usize, Descriptor<T>>, 453 + RBTreeNode<FreeKey, ()>, 454 + FreeNodeRes, 455 + ) { 456 + let size = desc.size; 457 + let offset = desc.offset; 458 + ( 459 + self.tree_node_res.into_node(offset, desc), 460 + self.free_tree_node_res.into_node((size, offset), ()), 461 + self.desc_node_res, 462 + ) 463 + } 464 + } 465 + 466 + /// An allocation for creating a tree from an `ArrayRangeAllocator`. 467 + pub(crate) struct FromArrayAllocs<T> { 468 + tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>, 469 + free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>, 470 + } 471 + 472 + impl<T> FromArrayAllocs<T> { 473 + pub(crate) fn try_new(len: usize) -> Result<Self> { 474 + let num_descriptors = 2 * len + 1; 475 + 476 + let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?; 477 + for _ in 0..num_descriptors { 478 + tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?; 479 + } 480 + 481 + let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?; 482 + for _ in 0..num_descriptors { 483 + free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?; 484 + } 485 + 486 + Ok(Self { tree, free_tree }) 487 + } 488 + }

+23

drivers/android/binder/rust_binder.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2025 Google, Inc. 4 + */ 5 + 6 + #ifndef _LINUX_RUST_BINDER_H 7 + #define _LINUX_RUST_BINDER_H 8 + 9 + #include <uapi/linux/android/binder.h> 10 + #include <uapi/linux/android/binderfs.h> 11 + 12 + /* 13 + * These symbols are exposed by `rust_binderfs.c` and exist here so that Rust 14 + * Binder can call them. 15 + */ 16 + int init_rust_binderfs(void); 17 + 18 + struct dentry; 19 + struct inode; 20 + struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid); 21 + void rust_binderfs_remove_file(struct dentry *dentry); 22 + 23 + #endif

+59

drivers/android/binder/rust_binder_events.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* rust_binder_events.c 3 + * 4 + * Rust Binder tracepoints. 5 + * 6 + * Copyright 2025 Google LLC 7 + */ 8 + 9 + #include "rust_binder.h" 10 + 11 + const char * const binder_command_strings[] = { 12 + "BC_TRANSACTION", 13 + "BC_REPLY", 14 + "BC_ACQUIRE_RESULT", 15 + "BC_FREE_BUFFER", 16 + "BC_INCREFS", 17 + "BC_ACQUIRE", 18 + "BC_RELEASE", 19 + "BC_DECREFS", 20 + "BC_INCREFS_DONE", 21 + "BC_ACQUIRE_DONE", 22 + "BC_ATTEMPT_ACQUIRE", 23 + "BC_REGISTER_LOOPER", 24 + "BC_ENTER_LOOPER", 25 + "BC_EXIT_LOOPER", 26 + "BC_REQUEST_DEATH_NOTIFICATION", 27 + "BC_CLEAR_DEATH_NOTIFICATION", 28 + "BC_DEAD_BINDER_DONE", 29 + "BC_TRANSACTION_SG", 30 + "BC_REPLY_SG", 31 + }; 32 + 33 + const char * const binder_return_strings[] = { 34 + "BR_ERROR", 35 + "BR_OK", 36 + "BR_TRANSACTION", 37 + "BR_REPLY", 38 + "BR_ACQUIRE_RESULT", 39 + "BR_DEAD_REPLY", 40 + "BR_TRANSACTION_COMPLETE", 41 + "BR_INCREFS", 42 + "BR_ACQUIRE", 43 + "BR_RELEASE", 44 + "BR_DECREFS", 45 + "BR_ATTEMPT_ACQUIRE", 46 + "BR_NOOP", 47 + "BR_SPAWN_LOOPER", 48 + "BR_FINISHED", 49 + "BR_DEAD_BINDER", 50 + "BR_CLEAR_DEATH_NOTIFICATION_DONE", 51 + "BR_FAILED_REPLY", 52 + "BR_FROZEN_REPLY", 53 + "BR_ONEWAY_SPAM_SUSPECT", 54 + "BR_TRANSACTION_PENDING_FROZEN" 55 + }; 56 + 57 + #define CREATE_TRACE_POINTS 58 + #define CREATE_RUST_TRACE_POINTS 59 + #include "rust_binder_events.h"

+36

drivers/android/binder/rust_binder_events.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2025 Google, Inc. 4 + */ 5 + 6 + #undef TRACE_SYSTEM 7 + #undef TRACE_INCLUDE_FILE 8 + #undef TRACE_INCLUDE_PATH 9 + #define TRACE_SYSTEM rust_binder 10 + #define TRACE_INCLUDE_FILE rust_binder_events 11 + #define TRACE_INCLUDE_PATH ../drivers/android/binder 12 + 13 + #if !defined(_RUST_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) 14 + #define _RUST_BINDER_TRACE_H 15 + 16 + #include <linux/tracepoint.h> 17 + 18 + TRACE_EVENT(rust_binder_ioctl, 19 + TP_PROTO(unsigned int cmd, unsigned long arg), 20 + TP_ARGS(cmd, arg), 21 + 22 + TP_STRUCT__entry( 23 + __field(unsigned int, cmd) 24 + __field(unsigned long, arg) 25 + ), 26 + TP_fast_assign( 27 + __entry->cmd = cmd; 28 + __entry->arg = arg; 29 + ), 30 + TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg) 31 + ); 32 + 33 + #endif /* _RUST_BINDER_TRACE_H */ 34 + 35 + /* This part must be outside protection */ 36 + #include <trace/define_trace.h>

+87

drivers/android/binder/rust_binder_internal.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* rust_binder_internal.h 3 + * 4 + * This file contains internal data structures used by Rust Binder. Mostly, 5 + * these are type definitions used only by binderfs or things that Rust Binder 6 + * define and export to binderfs. 7 + * 8 + * It does not include things exported by binderfs to Rust Binder since this 9 + * file is not included as input to bindgen. 10 + * 11 + * Copyright (C) 2025 Google LLC. 12 + */ 13 + 14 + #ifndef _LINUX_RUST_BINDER_INTERNAL_H 15 + #define _LINUX_RUST_BINDER_INTERNAL_H 16 + 17 + #define RUST_BINDERFS_SUPER_MAGIC 0x6c6f6f71 18 + 19 + #include <linux/seq_file.h> 20 + #include <uapi/linux/android/binder.h> 21 + #include <uapi/linux/android/binderfs.h> 22 + 23 + /* 24 + * The internal data types in the Rust Binder driver are opaque to C, so we use 25 + * void pointer typedefs for these types. 26 + */ 27 + typedef void *rust_binder_context; 28 + 29 + /** 30 + * struct binder_device - information about a binder device node 31 + * @minor: the minor number used by this device 32 + * @ctx: the Rust Context used by this device, or null for binder-control 33 + * 34 + * This is used as the private data for files directly in binderfs, but not 35 + * files in the binder_logs subdirectory. This struct owns a refcount on `ctx` 36 + * and the entry for `minor` in `binderfs_minors`. For binder-control `ctx` is 37 + * null. 38 + */ 39 + struct binder_device { 40 + int minor; 41 + rust_binder_context ctx; 42 + }; 43 + 44 + int rust_binder_stats_show(struct seq_file *m, void *unused); 45 + int rust_binder_state_show(struct seq_file *m, void *unused); 46 + int rust_binder_transactions_show(struct seq_file *m, void *unused); 47 + int rust_binder_proc_show(struct seq_file *m, void *pid); 48 + 49 + extern const struct file_operations rust_binder_fops; 50 + rust_binder_context rust_binder_new_context(char *name); 51 + void rust_binder_remove_context(rust_binder_context device); 52 + 53 + /** 54 + * binderfs_mount_opts - mount options for binderfs 55 + * @max: maximum number of allocatable binderfs binder devices 56 + * @stats_mode: enable binder stats in binderfs. 57 + */ 58 + struct binderfs_mount_opts { 59 + int max; 60 + int stats_mode; 61 + }; 62 + 63 + /** 64 + * binderfs_info - information about a binderfs mount 65 + * @ipc_ns: The ipc namespace the binderfs mount belongs to. 66 + * @control_dentry: This records the dentry of this binderfs mount 67 + * binder-control device. 68 + * @root_uid: uid that needs to be used when a new binder device is 69 + * created. 70 + * @root_gid: gid that needs to be used when a new binder device is 71 + * created. 72 + * @mount_opts: The mount options in use. 73 + * @device_count: The current number of allocated binder devices. 74 + * @proc_log_dir: Pointer to the directory dentry containing process-specific 75 + * logs. 76 + */ 77 + struct binderfs_info { 78 + struct ipc_namespace *ipc_ns; 79 + struct dentry *control_dentry; 80 + kuid_t root_uid; 81 + kgid_t root_gid; 82 + struct binderfs_mount_opts mount_opts; 83 + int device_count; 84 + struct dentry *proc_log_dir; 85 + }; 86 + 87 + #endif /* _LINUX_RUST_BINDER_INTERNAL_H */

+627

drivers/android/binder/rust_binder_main.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + //! Binder -- the Android IPC mechanism. 6 + #![recursion_limit = "256"] 7 + #![allow( 8 + clippy::as_underscore, 9 + clippy::ref_as_ptr, 10 + clippy::ptr_as_ptr, 11 + clippy::cast_lossless 12 + )] 13 + 14 + use kernel::{ 15 + bindings::{self, seq_file}, 16 + fs::File, 17 + list::{ListArc, ListArcSafe, ListLinksSelfPtr, TryNewListArc}, 18 + prelude::*, 19 + seq_file::SeqFile, 20 + seq_print, 21 + sync::poll::PollTable, 22 + sync::Arc, 23 + task::Pid, 24 + transmute::AsBytes, 25 + types::ForeignOwnable, 26 + uaccess::UserSliceWriter, 27 + }; 28 + 29 + use crate::{context::Context, page_range::Shrinker, process::Process, thread::Thread}; 30 + 31 + use core::{ 32 + ptr::NonNull, 33 + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, 34 + }; 35 + 36 + mod allocation; 37 + mod context; 38 + mod deferred_close; 39 + mod defs; 40 + mod error; 41 + mod node; 42 + mod page_range; 43 + mod process; 44 + mod range_alloc; 45 + mod stats; 46 + mod thread; 47 + mod trace; 48 + mod transaction; 49 + 50 + #[allow(warnings)] // generated bindgen code 51 + mod binderfs { 52 + use kernel::bindings::{dentry, inode}; 53 + 54 + extern "C" { 55 + pub fn init_rust_binderfs() -> kernel::ffi::c_int; 56 + } 57 + extern "C" { 58 + pub fn rust_binderfs_create_proc_file( 59 + nodp: *mut inode, 60 + pid: kernel::ffi::c_int, 61 + ) -> *mut dentry; 62 + } 63 + extern "C" { 64 + pub fn rust_binderfs_remove_file(dentry: *mut dentry); 65 + } 66 + pub type rust_binder_context = *mut kernel::ffi::c_void; 67 + #[repr(C)] 68 + #[derive(Copy, Clone)] 69 + pub struct binder_device { 70 + pub minor: kernel::ffi::c_int, 71 + pub ctx: rust_binder_context, 72 + } 73 + impl Default for binder_device { 74 + fn default() -> Self { 75 + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); 76 + unsafe { 77 + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); 78 + s.assume_init() 79 + } 80 + } 81 + } 82 + } 83 + 84 + module! { 85 + type: BinderModule, 86 + name: "rust_binder", 87 + authors: ["Wedson Almeida Filho", "Alice Ryhl"], 88 + description: "Android Binder", 89 + license: "GPL", 90 + } 91 + 92 + fn next_debug_id() -> usize { 93 + static NEXT_DEBUG_ID: AtomicUsize = AtomicUsize::new(0); 94 + 95 + NEXT_DEBUG_ID.fetch_add(1, Ordering::Relaxed) 96 + } 97 + 98 + /// Provides a single place to write Binder return values via the 99 + /// supplied `UserSliceWriter`. 100 + pub(crate) struct BinderReturnWriter<'a> { 101 + writer: UserSliceWriter, 102 + thread: &'a Thread, 103 + } 104 + 105 + impl<'a> BinderReturnWriter<'a> { 106 + fn new(writer: UserSliceWriter, thread: &'a Thread) -> Self { 107 + BinderReturnWriter { writer, thread } 108 + } 109 + 110 + /// Write a return code back to user space. 111 + /// Should be a `BR_` constant from [`defs`] e.g. [`defs::BR_TRANSACTION_COMPLETE`]. 112 + fn write_code(&mut self, code: u32) -> Result { 113 + stats::GLOBAL_STATS.inc_br(code); 114 + self.thread.process.stats.inc_br(code); 115 + self.writer.write(&code) 116 + } 117 + 118 + /// Write something *other than* a return code to user space. 119 + fn write_payload<T: AsBytes>(&mut self, payload: &T) -> Result { 120 + self.writer.write(payload) 121 + } 122 + 123 + fn len(&self) -> usize { 124 + self.writer.len() 125 + } 126 + } 127 + 128 + /// Specifies how a type should be delivered to the read part of a BINDER_WRITE_READ ioctl. 129 + /// 130 + /// When a value is pushed to the todo list for a process or thread, it is stored as a trait object 131 + /// with the type `Arc<dyn DeliverToRead>`. Trait objects are a Rust feature that lets you 132 + /// implement dynamic dispatch over many different types. This lets us store many different types 133 + /// in the todo list. 134 + trait DeliverToRead: ListArcSafe + Send + Sync { 135 + /// Performs work. Returns true if remaining work items in the queue should be processed 136 + /// immediately, or false if it should return to caller before processing additional work 137 + /// items. 138 + fn do_work( 139 + self: DArc<Self>, 140 + thread: &Thread, 141 + writer: &mut BinderReturnWriter<'_>, 142 + ) -> Result<bool>; 143 + 144 + /// Cancels the given work item. This is called instead of [`DeliverToRead::do_work`] when work 145 + /// won't be delivered. 146 + fn cancel(self: DArc<Self>); 147 + 148 + /// Should we use `wake_up_interruptible_sync` or `wake_up_interruptible` when scheduling this 149 + /// work item? 150 + /// 151 + /// Generally only set to true for non-oneway transactions. 152 + fn should_sync_wakeup(&self) -> bool; 153 + 154 + fn debug_print(&self, m: &SeqFile, prefix: &str, transaction_prefix: &str) -> Result<()>; 155 + } 156 + 157 + // Wrapper around a `DeliverToRead` with linked list links. 158 + #[pin_data] 159 + struct DTRWrap<T: ?Sized> { 160 + #[pin] 161 + links: ListLinksSelfPtr<DTRWrap<dyn DeliverToRead>>, 162 + #[pin] 163 + wrapped: T, 164 + } 165 + kernel::list::impl_list_arc_safe! { 166 + impl{T: ListArcSafe + ?Sized} ListArcSafe<0> for DTRWrap<T> { 167 + tracked_by wrapped: T; 168 + } 169 + } 170 + kernel::list::impl_list_item! { 171 + impl ListItem<0> for DTRWrap<dyn DeliverToRead> { 172 + using ListLinksSelfPtr { self.links }; 173 + } 174 + } 175 + 176 + impl<T: ?Sized> core::ops::Deref for DTRWrap<T> { 177 + type Target = T; 178 + fn deref(&self) -> &T { 179 + &self.wrapped 180 + } 181 + } 182 + 183 + type DArc<T> = kernel::sync::Arc<DTRWrap<T>>; 184 + type DLArc<T> = kernel::list::ListArc<DTRWrap<T>>; 185 + 186 + impl<T: ListArcSafe> DTRWrap<T> { 187 + fn new(val: impl PinInit<T>) -> impl PinInit<Self> { 188 + pin_init!(Self { 189 + links <- ListLinksSelfPtr::new(), 190 + wrapped <- val, 191 + }) 192 + } 193 + 194 + fn arc_try_new(val: T) -> Result<DLArc<T>, kernel::alloc::AllocError> { 195 + ListArc::pin_init( 196 + try_pin_init!(Self { 197 + links <- ListLinksSelfPtr::new(), 198 + wrapped: val, 199 + }), 200 + GFP_KERNEL, 201 + ) 202 + .map_err(|_| kernel::alloc::AllocError) 203 + } 204 + 205 + fn arc_pin_init(init: impl PinInit<T>) -> Result<DLArc<T>, kernel::error::Error> { 206 + ListArc::pin_init( 207 + try_pin_init!(Self { 208 + links <- ListLinksSelfPtr::new(), 209 + wrapped <- init, 210 + }), 211 + GFP_KERNEL, 212 + ) 213 + } 214 + } 215 + 216 + struct DeliverCode { 217 + code: u32, 218 + skip: AtomicBool, 219 + } 220 + 221 + kernel::list::impl_list_arc_safe! { 222 + impl ListArcSafe<0> for DeliverCode { untracked; } 223 + } 224 + 225 + impl DeliverCode { 226 + fn new(code: u32) -> Self { 227 + Self { 228 + code, 229 + skip: AtomicBool::new(false), 230 + } 231 + } 232 + 233 + /// Disable this DeliverCode and make it do nothing. 234 + /// 235 + /// This is used instead of removing it from the work list, since `LinkedList::remove` is 236 + /// unsafe, whereas this method is not. 237 + fn skip(&self) { 238 + self.skip.store(true, Ordering::Relaxed); 239 + } 240 + } 241 + 242 + impl DeliverToRead for DeliverCode { 243 + fn do_work( 244 + self: DArc<Self>, 245 + _thread: &Thread, 246 + writer: &mut BinderReturnWriter<'_>, 247 + ) -> Result<bool> { 248 + if !self.skip.load(Ordering::Relaxed) { 249 + writer.write_code(self.code)?; 250 + } 251 + Ok(true) 252 + } 253 + 254 + fn cancel(self: DArc<Self>) {} 255 + 256 + fn should_sync_wakeup(&self) -> bool { 257 + false 258 + } 259 + 260 + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { 261 + seq_print!(m, "{}", prefix); 262 + if self.skip.load(Ordering::Relaxed) { 263 + seq_print!(m, "(skipped) "); 264 + } 265 + if self.code == defs::BR_TRANSACTION_COMPLETE { 266 + seq_print!(m, "transaction complete\n"); 267 + } else { 268 + seq_print!(m, "transaction error: {}\n", self.code); 269 + } 270 + Ok(()) 271 + } 272 + } 273 + 274 + fn ptr_align(value: usize) -> Option<usize> { 275 + let size = core::mem::size_of::<usize>() - 1; 276 + Some(value.checked_add(size)? & !size) 277 + } 278 + 279 + // SAFETY: We call register in `init`. 280 + static BINDER_SHRINKER: Shrinker = unsafe { Shrinker::new() }; 281 + 282 + struct BinderModule {} 283 + 284 + impl kernel::Module for BinderModule { 285 + fn init(_module: &'static kernel::ThisModule) -> Result<Self> { 286 + // SAFETY: The module initializer never runs twice, so we only call this once. 287 + unsafe { crate::context::CONTEXTS.init() }; 288 + 289 + pr_warn!("Loaded Rust Binder."); 290 + 291 + BINDER_SHRINKER.register(kernel::c_str!("android-binder"))?; 292 + 293 + // SAFETY: The module is being loaded, so we can initialize binderfs. 294 + unsafe { kernel::error::to_result(binderfs::init_rust_binderfs())? }; 295 + 296 + Ok(Self {}) 297 + } 298 + } 299 + 300 + /// Makes the inner type Sync. 301 + #[repr(transparent)] 302 + pub struct AssertSync<T>(T); 303 + // SAFETY: Used only to insert `file_operations` into a global, which is safe. 304 + unsafe impl<T> Sync for AssertSync<T> {} 305 + 306 + /// File operations that rust_binderfs.c can use. 307 + #[no_mangle] 308 + #[used] 309 + pub static rust_binder_fops: AssertSync<kernel::bindings::file_operations> = { 310 + // SAFETY: All zeroes is safe for the `file_operations` type. 311 + let zeroed_ops = unsafe { core::mem::MaybeUninit::zeroed().assume_init() }; 312 + 313 + let ops = kernel::bindings::file_operations { 314 + owner: THIS_MODULE.as_ptr(), 315 + poll: Some(rust_binder_poll), 316 + unlocked_ioctl: Some(rust_binder_unlocked_ioctl), 317 + compat_ioctl: Some(rust_binder_compat_ioctl), 318 + mmap: Some(rust_binder_mmap), 319 + open: Some(rust_binder_open), 320 + release: Some(rust_binder_release), 321 + flush: Some(rust_binder_flush), 322 + ..zeroed_ops 323 + }; 324 + AssertSync(ops) 325 + }; 326 + 327 + /// # Safety 328 + /// Only called by binderfs. 329 + #[no_mangle] 330 + unsafe extern "C" fn rust_binder_new_context( 331 + name: *const kernel::ffi::c_char, 332 + ) -> *mut kernel::ffi::c_void { 333 + // SAFETY: The caller will always provide a valid c string here. 334 + let name = unsafe { kernel::str::CStr::from_char_ptr(name) }; 335 + match Context::new(name) { 336 + Ok(ctx) => Arc::into_foreign(ctx), 337 + Err(_err) => core::ptr::null_mut(), 338 + } 339 + } 340 + 341 + /// # Safety 342 + /// Only called by binderfs. 343 + #[no_mangle] 344 + unsafe extern "C" fn rust_binder_remove_context(device: *mut kernel::ffi::c_void) { 345 + if !device.is_null() { 346 + // SAFETY: The caller ensures that the `device` pointer came from a previous call to 347 + // `rust_binder_new_device`. 348 + let ctx = unsafe { Arc::<Context>::from_foreign(device) }; 349 + ctx.deregister(); 350 + drop(ctx); 351 + } 352 + } 353 + 354 + /// # Safety 355 + /// Only called by binderfs. 356 + unsafe extern "C" fn rust_binder_open( 357 + inode: *mut bindings::inode, 358 + file_ptr: *mut bindings::file, 359 + ) -> kernel::ffi::c_int { 360 + // SAFETY: The `rust_binderfs.c` file ensures that `i_private` is set to a 361 + // `struct binder_device`. 362 + let device = unsafe { (*inode).i_private } as *const binderfs::binder_device; 363 + 364 + assert!(!device.is_null()); 365 + 366 + // SAFETY: The `rust_binderfs.c` file ensures that `device->ctx` holds a binder context when 367 + // using the rust binder fops. 368 + let ctx = unsafe { Arc::<Context>::borrow((*device).ctx) }; 369 + 370 + // SAFETY: The caller provides a valid file pointer to a new `struct file`. 371 + let file = unsafe { File::from_raw_file(file_ptr) }; 372 + let process = match Process::open(ctx, file) { 373 + Ok(process) => process, 374 + Err(err) => return err.to_errno(), 375 + }; 376 + 377 + // SAFETY: This is an `inode` for a newly created binder file. 378 + match unsafe { BinderfsProcFile::new(inode, process.task.pid()) } { 379 + Ok(Some(file)) => process.inner.lock().binderfs_file = Some(file), 380 + Ok(None) => { /* pid already exists */ } 381 + Err(err) => return err.to_errno(), 382 + } 383 + 384 + // SAFETY: This file is associated with Rust binder, so we own the `private_data` field. 385 + unsafe { (*file_ptr).private_data = process.into_foreign() }; 386 + 0 387 + } 388 + 389 + /// # Safety 390 + /// Only called by binderfs. 391 + unsafe extern "C" fn rust_binder_release( 392 + _inode: *mut bindings::inode, 393 + file: *mut bindings::file, 394 + ) -> kernel::ffi::c_int { 395 + // SAFETY: We previously set `private_data` in `rust_binder_open`. 396 + let process = unsafe { Arc::<Process>::from_foreign((*file).private_data) }; 397 + // SAFETY: The caller ensures that the file is valid. 398 + let file = unsafe { File::from_raw_file(file) }; 399 + Process::release(process, file); 400 + 0 401 + } 402 + 403 + /// # Safety 404 + /// Only called by binderfs. 405 + unsafe extern "C" fn rust_binder_compat_ioctl( 406 + file: *mut bindings::file, 407 + cmd: kernel::ffi::c_uint, 408 + arg: kernel::ffi::c_ulong, 409 + ) -> kernel::ffi::c_long { 410 + // SAFETY: We previously set `private_data` in `rust_binder_open`. 411 + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; 412 + // SAFETY: The caller ensures that the file is valid. 413 + match Process::compat_ioctl(f, unsafe { File::from_raw_file(file) }, cmd as _, arg as _) { 414 + Ok(()) => 0, 415 + Err(err) => err.to_errno() as isize, 416 + } 417 + } 418 + 419 + /// # Safety 420 + /// Only called by binderfs. 421 + unsafe extern "C" fn rust_binder_unlocked_ioctl( 422 + file: *mut bindings::file, 423 + cmd: kernel::ffi::c_uint, 424 + arg: kernel::ffi::c_ulong, 425 + ) -> kernel::ffi::c_long { 426 + // SAFETY: We previously set `private_data` in `rust_binder_open`. 427 + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; 428 + // SAFETY: The caller ensures that the file is valid. 429 + match Process::ioctl(f, unsafe { File::from_raw_file(file) }, cmd as _, arg as _) { 430 + Ok(()) => 0, 431 + Err(err) => err.to_errno() as isize, 432 + } 433 + } 434 + 435 + /// # Safety 436 + /// Only called by binderfs. 437 + unsafe extern "C" fn rust_binder_mmap( 438 + file: *mut bindings::file, 439 + vma: *mut bindings::vm_area_struct, 440 + ) -> kernel::ffi::c_int { 441 + // SAFETY: We previously set `private_data` in `rust_binder_open`. 442 + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; 443 + // SAFETY: The caller ensures that the vma is valid. 444 + let area = unsafe { kernel::mm::virt::VmaNew::from_raw(vma) }; 445 + // SAFETY: The caller ensures that the file is valid. 446 + match Process::mmap(f, unsafe { File::from_raw_file(file) }, area) { 447 + Ok(()) => 0, 448 + Err(err) => err.to_errno(), 449 + } 450 + } 451 + 452 + /// # Safety 453 + /// Only called by binderfs. 454 + unsafe extern "C" fn rust_binder_poll( 455 + file: *mut bindings::file, 456 + wait: *mut bindings::poll_table_struct, 457 + ) -> bindings::__poll_t { 458 + // SAFETY: We previously set `private_data` in `rust_binder_open`. 459 + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; 460 + // SAFETY: The caller ensures that the file is valid. 461 + let fileref = unsafe { File::from_raw_file(file) }; 462 + // SAFETY: The caller ensures that the `PollTable` is valid. 463 + match Process::poll(f, fileref, unsafe { PollTable::from_raw(wait) }) { 464 + Ok(v) => v, 465 + Err(_) => bindings::POLLERR, 466 + } 467 + } 468 + 469 + /// # Safety 470 + /// Only called by binderfs. 471 + unsafe extern "C" fn rust_binder_flush( 472 + file: *mut bindings::file, 473 + _id: bindings::fl_owner_t, 474 + ) -> kernel::ffi::c_int { 475 + // SAFETY: We previously set `private_data` in `rust_binder_open`. 476 + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; 477 + match Process::flush(f) { 478 + Ok(()) => 0, 479 + Err(err) => err.to_errno(), 480 + } 481 + } 482 + 483 + /// # Safety 484 + /// Only called by binderfs. 485 + #[no_mangle] 486 + unsafe extern "C" fn rust_binder_stats_show( 487 + ptr: *mut seq_file, 488 + _: *mut kernel::ffi::c_void, 489 + ) -> kernel::ffi::c_int { 490 + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which 491 + // this method is called. 492 + let m = unsafe { SeqFile::from_raw(ptr) }; 493 + if let Err(err) = rust_binder_stats_show_impl(m) { 494 + seq_print!(m, "failed to generate state: {:?}\n", err); 495 + } 496 + 0 497 + } 498 + 499 + /// # Safety 500 + /// Only called by binderfs. 501 + #[no_mangle] 502 + unsafe extern "C" fn rust_binder_state_show( 503 + ptr: *mut seq_file, 504 + _: *mut kernel::ffi::c_void, 505 + ) -> kernel::ffi::c_int { 506 + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which 507 + // this method is called. 508 + let m = unsafe { SeqFile::from_raw(ptr) }; 509 + if let Err(err) = rust_binder_state_show_impl(m) { 510 + seq_print!(m, "failed to generate state: {:?}\n", err); 511 + } 512 + 0 513 + } 514 + 515 + /// # Safety 516 + /// Only called by binderfs. 517 + #[no_mangle] 518 + unsafe extern "C" fn rust_binder_proc_show( 519 + ptr: *mut seq_file, 520 + _: *mut kernel::ffi::c_void, 521 + ) -> kernel::ffi::c_int { 522 + // SAFETY: Accessing the private field of `seq_file` is okay. 523 + let pid = (unsafe { (*ptr).private }) as usize as Pid; 524 + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which 525 + // this method is called. 526 + let m = unsafe { SeqFile::from_raw(ptr) }; 527 + if let Err(err) = rust_binder_proc_show_impl(m, pid) { 528 + seq_print!(m, "failed to generate state: {:?}\n", err); 529 + } 530 + 0 531 + } 532 + 533 + /// # Safety 534 + /// Only called by binderfs. 535 + #[no_mangle] 536 + unsafe extern "C" fn rust_binder_transactions_show( 537 + ptr: *mut seq_file, 538 + _: *mut kernel::ffi::c_void, 539 + ) -> kernel::ffi::c_int { 540 + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which 541 + // this method is called. 542 + let m = unsafe { SeqFile::from_raw(ptr) }; 543 + if let Err(err) = rust_binder_transactions_show_impl(m) { 544 + seq_print!(m, "failed to generate state: {:?}\n", err); 545 + } 546 + 0 547 + } 548 + 549 + fn rust_binder_transactions_show_impl(m: &SeqFile) -> Result<()> { 550 + seq_print!(m, "binder transactions:\n"); 551 + let contexts = context::get_all_contexts()?; 552 + for ctx in contexts { 553 + let procs = ctx.get_all_procs()?; 554 + for proc in procs { 555 + proc.debug_print(m, &ctx, false)?; 556 + seq_print!(m, "\n"); 557 + } 558 + } 559 + Ok(()) 560 + } 561 + 562 + fn rust_binder_stats_show_impl(m: &SeqFile) -> Result<()> { 563 + seq_print!(m, "binder stats:\n"); 564 + stats::GLOBAL_STATS.debug_print("", m); 565 + let contexts = context::get_all_contexts()?; 566 + for ctx in contexts { 567 + let procs = ctx.get_all_procs()?; 568 + for proc in procs { 569 + proc.debug_print_stats(m, &ctx)?; 570 + seq_print!(m, "\n"); 571 + } 572 + } 573 + Ok(()) 574 + } 575 + 576 + fn rust_binder_state_show_impl(m: &SeqFile) -> Result<()> { 577 + seq_print!(m, "binder state:\n"); 578 + let contexts = context::get_all_contexts()?; 579 + for ctx in contexts { 580 + let procs = ctx.get_all_procs()?; 581 + for proc in procs { 582 + proc.debug_print(m, &ctx, true)?; 583 + seq_print!(m, "\n"); 584 + } 585 + } 586 + Ok(()) 587 + } 588 + 589 + fn rust_binder_proc_show_impl(m: &SeqFile, pid: Pid) -> Result<()> { 590 + seq_print!(m, "binder proc state:\n"); 591 + let contexts = context::get_all_contexts()?; 592 + for ctx in contexts { 593 + let procs = ctx.get_procs_with_pid(pid)?; 594 + for proc in procs { 595 + proc.debug_print(m, &ctx, true)?; 596 + seq_print!(m, "\n"); 597 + } 598 + } 599 + Ok(()) 600 + } 601 + 602 + struct BinderfsProcFile(NonNull<bindings::dentry>); 603 + 604 + // SAFETY: Safe to drop any thread. 605 + unsafe impl Send for BinderfsProcFile {} 606 + 607 + impl BinderfsProcFile { 608 + /// # Safety 609 + /// 610 + /// Takes an inode from a newly created binder file. 611 + unsafe fn new(nodp: *mut bindings::inode, pid: i32) -> Result<Option<Self>> { 612 + // SAFETY: The caller passes an `inode` for a newly created binder file. 613 + let dentry = unsafe { binderfs::rust_binderfs_create_proc_file(nodp, pid) }; 614 + match kernel::error::from_err_ptr(dentry) { 615 + Ok(dentry) => Ok(NonNull::new(dentry).map(Self)), 616 + Err(err) if err == EEXIST => Ok(None), 617 + Err(err) => Err(err), 618 + } 619 + } 620 + } 621 + 622 + impl Drop for BinderfsProcFile { 623 + fn drop(&mut self) { 624 + // SAFETY: This is a dentry from `rust_binderfs_remove_file` that has not been deleted yet. 625 + unsafe { binderfs::rust_binderfs_remove_file(self.0.as_ptr()) }; 626 + } 627 + }

+850

drivers/android/binder/rust_binderfs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/compiler_types.h> 4 + #include <linux/errno.h> 5 + #include <linux/fs.h> 6 + #include <linux/fsnotify.h> 7 + #include <linux/gfp.h> 8 + #include <linux/idr.h> 9 + #include <linux/init.h> 10 + #include <linux/ipc_namespace.h> 11 + #include <linux/kdev_t.h> 12 + #include <linux/kernel.h> 13 + #include <linux/list.h> 14 + #include <linux/namei.h> 15 + #include <linux/magic.h> 16 + #include <linux/major.h> 17 + #include <linux/miscdevice.h> 18 + #include <linux/module.h> 19 + #include <linux/mutex.h> 20 + #include <linux/mount.h> 21 + #include <linux/fs_parser.h> 22 + #include <linux/sched.h> 23 + #include <linux/seq_file.h> 24 + #include <linux/slab.h> 25 + #include <linux/spinlock_types.h> 26 + #include <linux/stddef.h> 27 + #include <linux/string.h> 28 + #include <linux/types.h> 29 + #include <linux/uaccess.h> 30 + #include <linux/user_namespace.h> 31 + #include <linux/xarray.h> 32 + #include <uapi/asm-generic/errno-base.h> 33 + #include <uapi/linux/android/binder.h> 34 + #include <uapi/linux/android/binderfs.h> 35 + 36 + #include "rust_binder.h" 37 + #include "rust_binder_internal.h" 38 + 39 + #define FIRST_INODE 1 40 + #define SECOND_INODE 2 41 + #define INODE_OFFSET 3 42 + #define BINDERFS_MAX_MINOR (1U << MINORBITS) 43 + /* Ensure that the initial ipc namespace always has devices available. */ 44 + #define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) 45 + 46 + DEFINE_SHOW_ATTRIBUTE(rust_binder_stats); 47 + DEFINE_SHOW_ATTRIBUTE(rust_binder_state); 48 + DEFINE_SHOW_ATTRIBUTE(rust_binder_transactions); 49 + DEFINE_SHOW_ATTRIBUTE(rust_binder_proc); 50 + 51 + char *rust_binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; 52 + module_param_named(rust_devices, rust_binder_devices_param, charp, 0444); 53 + 54 + static dev_t binderfs_dev; 55 + static DEFINE_MUTEX(binderfs_minors_mutex); 56 + static DEFINE_IDA(binderfs_minors); 57 + 58 + enum binderfs_param { 59 + Opt_max, 60 + Opt_stats_mode, 61 + }; 62 + 63 + enum binderfs_stats_mode { 64 + binderfs_stats_mode_unset, 65 + binderfs_stats_mode_global, 66 + }; 67 + 68 + struct binder_features { 69 + bool oneway_spam_detection; 70 + bool extended_error; 71 + bool freeze_notification; 72 + }; 73 + 74 + static const struct constant_table binderfs_param_stats[] = { 75 + { "global", binderfs_stats_mode_global }, 76 + {} 77 + }; 78 + 79 + static const struct fs_parameter_spec binderfs_fs_parameters[] = { 80 + fsparam_u32("max", Opt_max), 81 + fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats), 82 + {} 83 + }; 84 + 85 + static struct binder_features binder_features = { 86 + .oneway_spam_detection = true, 87 + .extended_error = true, 88 + .freeze_notification = true, 89 + }; 90 + 91 + static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) 92 + { 93 + return sb->s_fs_info; 94 + } 95 + 96 + /** 97 + * binderfs_binder_device_create - allocate inode from super block of a 98 + * binderfs mount 99 + * @ref_inode: inode from wich the super block will be taken 100 + * @userp: buffer to copy information about new device for userspace to 101 + * @req: struct binderfs_device as copied from userspace 102 + * 103 + * This function allocates a new binder_device and reserves a new minor 104 + * number for it. 105 + * Minor numbers are limited and tracked globally in binderfs_minors. The 106 + * function will stash a struct binder_device for the specific binder 107 + * device in i_private of the inode. 108 + * It will go on to allocate a new inode from the super block of the 109 + * filesystem mount, stash a struct binder_device in its i_private field 110 + * and attach a dentry to that inode. 111 + * 112 + * Return: 0 on success, negative errno on failure 113 + */ 114 + static int binderfs_binder_device_create(struct inode *ref_inode, 115 + struct binderfs_device __user *userp, 116 + struct binderfs_device *req) 117 + { 118 + int minor, ret; 119 + struct dentry *dentry, *root; 120 + struct binder_device *device = NULL; 121 + rust_binder_context ctx = NULL; 122 + struct inode *inode = NULL; 123 + struct super_block *sb = ref_inode->i_sb; 124 + struct binderfs_info *info = sb->s_fs_info; 125 + #if defined(CONFIG_IPC_NS) 126 + bool use_reserve = (info->ipc_ns == &init_ipc_ns); 127 + #else 128 + bool use_reserve = true; 129 + #endif 130 + 131 + /* Reserve new minor number for the new device. */ 132 + mutex_lock(&binderfs_minors_mutex); 133 + if (++info->device_count <= info->mount_opts.max) 134 + minor = ida_alloc_max(&binderfs_minors, 135 + use_reserve ? BINDERFS_MAX_MINOR : 136 + BINDERFS_MAX_MINOR_CAPPED, 137 + GFP_KERNEL); 138 + else 139 + minor = -ENOSPC; 140 + if (minor < 0) { 141 + --info->device_count; 142 + mutex_unlock(&binderfs_minors_mutex); 143 + return minor; 144 + } 145 + mutex_unlock(&binderfs_minors_mutex); 146 + 147 + ret = -ENOMEM; 148 + device = kzalloc(sizeof(*device), GFP_KERNEL); 149 + if (!device) 150 + goto err; 151 + 152 + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ 153 + 154 + ctx = rust_binder_new_context(req->name); 155 + if (!ctx) 156 + goto err; 157 + 158 + inode = new_inode(sb); 159 + if (!inode) 160 + goto err; 161 + 162 + inode->i_ino = minor + INODE_OFFSET; 163 + simple_inode_init_ts(inode); 164 + init_special_inode(inode, S_IFCHR | 0600, 165 + MKDEV(MAJOR(binderfs_dev), minor)); 166 + inode->i_fop = &rust_binder_fops; 167 + inode->i_uid = info->root_uid; 168 + inode->i_gid = info->root_gid; 169 + 170 + req->major = MAJOR(binderfs_dev); 171 + req->minor = minor; 172 + device->ctx = ctx; 173 + device->minor = minor; 174 + 175 + if (userp && copy_to_user(userp, req, sizeof(*req))) { 176 + ret = -EFAULT; 177 + goto err; 178 + } 179 + 180 + root = sb->s_root; 181 + inode_lock(d_inode(root)); 182 + 183 + /* look it up */ 184 + dentry = lookup_noperm(&QSTR(req->name), root); 185 + if (IS_ERR(dentry)) { 186 + inode_unlock(d_inode(root)); 187 + ret = PTR_ERR(dentry); 188 + goto err; 189 + } 190 + 191 + if (d_really_is_positive(dentry)) { 192 + /* already exists */ 193 + dput(dentry); 194 + inode_unlock(d_inode(root)); 195 + ret = -EEXIST; 196 + goto err; 197 + } 198 + 199 + inode->i_private = device; 200 + d_instantiate(dentry, inode); 201 + fsnotify_create(root->d_inode, dentry); 202 + inode_unlock(d_inode(root)); 203 + 204 + return 0; 205 + 206 + err: 207 + kfree(device); 208 + rust_binder_remove_context(ctx); 209 + mutex_lock(&binderfs_minors_mutex); 210 + --info->device_count; 211 + ida_free(&binderfs_minors, minor); 212 + mutex_unlock(&binderfs_minors_mutex); 213 + iput(inode); 214 + 215 + return ret; 216 + } 217 + 218 + /** 219 + * binder_ctl_ioctl - handle binder device node allocation requests 220 + * 221 + * The request handler for the binder-control device. All requests operate on 222 + * the binderfs mount the binder-control device resides in: 223 + * - BINDER_CTL_ADD 224 + * Allocate a new binder device. 225 + * 226 + * Return: %0 on success, negative errno on failure. 227 + */ 228 + static long binder_ctl_ioctl(struct file *file, unsigned int cmd, 229 + unsigned long arg) 230 + { 231 + int ret = -EINVAL; 232 + struct inode *inode = file_inode(file); 233 + struct binderfs_device __user *device = (struct binderfs_device __user *)arg; 234 + struct binderfs_device device_req; 235 + 236 + switch (cmd) { 237 + case BINDER_CTL_ADD: 238 + ret = copy_from_user(&device_req, device, sizeof(device_req)); 239 + if (ret) { 240 + ret = -EFAULT; 241 + break; 242 + } 243 + 244 + ret = binderfs_binder_device_create(inode, device, &device_req); 245 + break; 246 + default: 247 + break; 248 + } 249 + 250 + return ret; 251 + } 252 + 253 + static void binderfs_evict_inode(struct inode *inode) 254 + { 255 + struct binder_device *device = inode->i_private; 256 + struct binderfs_info *info = BINDERFS_SB(inode->i_sb); 257 + 258 + clear_inode(inode); 259 + 260 + if (!S_ISCHR(inode->i_mode) || !device) 261 + return; 262 + 263 + mutex_lock(&binderfs_minors_mutex); 264 + --info->device_count; 265 + ida_free(&binderfs_minors, device->minor); 266 + mutex_unlock(&binderfs_minors_mutex); 267 + 268 + /* ctx is null for binder-control, but this function ignores null pointers */ 269 + rust_binder_remove_context(device->ctx); 270 + 271 + kfree(device); 272 + } 273 + 274 + static int binderfs_fs_context_parse_param(struct fs_context *fc, 275 + struct fs_parameter *param) 276 + { 277 + int opt; 278 + struct binderfs_mount_opts *ctx = fc->fs_private; 279 + struct fs_parse_result result; 280 + 281 + opt = fs_parse(fc, binderfs_fs_parameters, param, &result); 282 + if (opt < 0) 283 + return opt; 284 + 285 + switch (opt) { 286 + case Opt_max: 287 + if (result.uint_32 > BINDERFS_MAX_MINOR) 288 + return invalfc(fc, "Bad value for '%s'", param->key); 289 + 290 + ctx->max = result.uint_32; 291 + break; 292 + case Opt_stats_mode: 293 + if (!capable(CAP_SYS_ADMIN)) 294 + return -EPERM; 295 + 296 + ctx->stats_mode = result.uint_32; 297 + break; 298 + default: 299 + return invalfc(fc, "Unsupported parameter '%s'", param->key); 300 + } 301 + 302 + return 0; 303 + } 304 + 305 + static int binderfs_fs_context_reconfigure(struct fs_context *fc) 306 + { 307 + struct binderfs_mount_opts *ctx = fc->fs_private; 308 + struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb); 309 + 310 + if (info->mount_opts.stats_mode != ctx->stats_mode) 311 + return invalfc(fc, "Binderfs stats mode cannot be changed during a remount"); 312 + 313 + info->mount_opts.stats_mode = ctx->stats_mode; 314 + info->mount_opts.max = ctx->max; 315 + return 0; 316 + } 317 + 318 + static int binderfs_show_options(struct seq_file *seq, struct dentry *root) 319 + { 320 + struct binderfs_info *info = BINDERFS_SB(root->d_sb); 321 + 322 + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) 323 + seq_printf(seq, ",max=%d", info->mount_opts.max); 324 + 325 + switch (info->mount_opts.stats_mode) { 326 + case binderfs_stats_mode_unset: 327 + break; 328 + case binderfs_stats_mode_global: 329 + seq_puts(seq, ",stats=global"); 330 + break; 331 + } 332 + 333 + return 0; 334 + } 335 + 336 + static const struct super_operations binderfs_super_ops = { 337 + .evict_inode = binderfs_evict_inode, 338 + .show_options = binderfs_show_options, 339 + .statfs = simple_statfs, 340 + }; 341 + 342 + static inline bool is_binderfs_control_device(const struct dentry *dentry) 343 + { 344 + struct binderfs_info *info = dentry->d_sb->s_fs_info; 345 + 346 + return info->control_dentry == dentry; 347 + } 348 + 349 + static int binderfs_rename(struct mnt_idmap *idmap, 350 + struct inode *old_dir, struct dentry *old_dentry, 351 + struct inode *new_dir, struct dentry *new_dentry, 352 + unsigned int flags) 353 + { 354 + if (is_binderfs_control_device(old_dentry) || 355 + is_binderfs_control_device(new_dentry)) 356 + return -EPERM; 357 + 358 + return simple_rename(idmap, old_dir, old_dentry, new_dir, 359 + new_dentry, flags); 360 + } 361 + 362 + static int binderfs_unlink(struct inode *dir, struct dentry *dentry) 363 + { 364 + if (is_binderfs_control_device(dentry)) 365 + return -EPERM; 366 + 367 + return simple_unlink(dir, dentry); 368 + } 369 + 370 + static const struct file_operations binder_ctl_fops = { 371 + .owner = THIS_MODULE, 372 + .open = nonseekable_open, 373 + .unlocked_ioctl = binder_ctl_ioctl, 374 + .compat_ioctl = binder_ctl_ioctl, 375 + .llseek = noop_llseek, 376 + }; 377 + 378 + /** 379 + * binderfs_binder_ctl_create - create a new binder-control device 380 + * @sb: super block of the binderfs mount 381 + * 382 + * This function creates a new binder-control device node in the binderfs mount 383 + * referred to by @sb. 384 + * 385 + * Return: 0 on success, negative errno on failure 386 + */ 387 + static int binderfs_binder_ctl_create(struct super_block *sb) 388 + { 389 + int minor, ret; 390 + struct dentry *dentry; 391 + struct binder_device *device; 392 + struct inode *inode = NULL; 393 + struct dentry *root = sb->s_root; 394 + struct binderfs_info *info = sb->s_fs_info; 395 + #if defined(CONFIG_IPC_NS) 396 + bool use_reserve = (info->ipc_ns == &init_ipc_ns); 397 + #else 398 + bool use_reserve = true; 399 + #endif 400 + 401 + device = kzalloc(sizeof(*device), GFP_KERNEL); 402 + if (!device) 403 + return -ENOMEM; 404 + 405 + /* If we have already created a binder-control node, return. */ 406 + if (info->control_dentry) { 407 + ret = 0; 408 + goto out; 409 + } 410 + 411 + ret = -ENOMEM; 412 + inode = new_inode(sb); 413 + if (!inode) 414 + goto out; 415 + 416 + /* Reserve a new minor number for the new device. */ 417 + mutex_lock(&binderfs_minors_mutex); 418 + minor = ida_alloc_max(&binderfs_minors, 419 + use_reserve ? BINDERFS_MAX_MINOR : 420 + BINDERFS_MAX_MINOR_CAPPED, 421 + GFP_KERNEL); 422 + mutex_unlock(&binderfs_minors_mutex); 423 + if (minor < 0) { 424 + ret = minor; 425 + goto out; 426 + } 427 + 428 + inode->i_ino = SECOND_INODE; 429 + simple_inode_init_ts(inode); 430 + init_special_inode(inode, S_IFCHR | 0600, 431 + MKDEV(MAJOR(binderfs_dev), minor)); 432 + inode->i_fop = &binder_ctl_fops; 433 + inode->i_uid = info->root_uid; 434 + inode->i_gid = info->root_gid; 435 + 436 + device->minor = minor; 437 + device->ctx = NULL; 438 + 439 + dentry = d_alloc_name(root, "binder-control"); 440 + if (!dentry) 441 + goto out; 442 + 443 + inode->i_private = device; 444 + info->control_dentry = dentry; 445 + d_add(dentry, inode); 446 + 447 + return 0; 448 + 449 + out: 450 + kfree(device); 451 + iput(inode); 452 + 453 + return ret; 454 + } 455 + 456 + static const struct inode_operations binderfs_dir_inode_operations = { 457 + .lookup = simple_lookup, 458 + .rename = binderfs_rename, 459 + .unlink = binderfs_unlink, 460 + }; 461 + 462 + static struct inode *binderfs_make_inode(struct super_block *sb, int mode) 463 + { 464 + struct inode *ret; 465 + 466 + ret = new_inode(sb); 467 + if (ret) { 468 + ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); 469 + ret->i_mode = mode; 470 + simple_inode_init_ts(ret); 471 + } 472 + return ret; 473 + } 474 + 475 + static struct dentry *binderfs_create_dentry(struct dentry *parent, 476 + const char *name) 477 + { 478 + struct dentry *dentry; 479 + 480 + dentry = lookup_noperm(&QSTR(name), parent); 481 + if (IS_ERR(dentry)) 482 + return dentry; 483 + 484 + /* Return error if the file/dir already exists. */ 485 + if (d_really_is_positive(dentry)) { 486 + dput(dentry); 487 + return ERR_PTR(-EEXIST); 488 + } 489 + 490 + return dentry; 491 + } 492 + 493 + void rust_binderfs_remove_file(struct dentry *dentry) 494 + { 495 + struct inode *parent_inode; 496 + 497 + parent_inode = d_inode(dentry->d_parent); 498 + inode_lock(parent_inode); 499 + if (simple_positive(dentry)) { 500 + dget(dentry); 501 + simple_unlink(parent_inode, dentry); 502 + d_delete(dentry); 503 + dput(dentry); 504 + } 505 + inode_unlock(parent_inode); 506 + } 507 + 508 + static struct dentry *rust_binderfs_create_file(struct dentry *parent, const char *name, 509 + const struct file_operations *fops, 510 + void *data) 511 + { 512 + struct dentry *dentry; 513 + struct inode *new_inode, *parent_inode; 514 + struct super_block *sb; 515 + 516 + parent_inode = d_inode(parent); 517 + inode_lock(parent_inode); 518 + 519 + dentry = binderfs_create_dentry(parent, name); 520 + if (IS_ERR(dentry)) 521 + goto out; 522 + 523 + sb = parent_inode->i_sb; 524 + new_inode = binderfs_make_inode(sb, S_IFREG | 0444); 525 + if (!new_inode) { 526 + dput(dentry); 527 + dentry = ERR_PTR(-ENOMEM); 528 + goto out; 529 + } 530 + 531 + new_inode->i_fop = fops; 532 + new_inode->i_private = data; 533 + d_instantiate(dentry, new_inode); 534 + fsnotify_create(parent_inode, dentry); 535 + 536 + out: 537 + inode_unlock(parent_inode); 538 + return dentry; 539 + } 540 + 541 + struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid) 542 + { 543 + struct binderfs_info *info = nodp->i_sb->s_fs_info; 544 + struct dentry *dir = info->proc_log_dir; 545 + char strbuf[20 + 1]; 546 + void *data = (void *)(unsigned long) pid; 547 + 548 + if (!dir) 549 + return NULL; 550 + 551 + snprintf(strbuf, sizeof(strbuf), "%u", pid); 552 + return rust_binderfs_create_file(dir, strbuf, &rust_binder_proc_fops, data); 553 + } 554 + 555 + static struct dentry *binderfs_create_dir(struct dentry *parent, 556 + const char *name) 557 + { 558 + struct dentry *dentry; 559 + struct inode *new_inode, *parent_inode; 560 + struct super_block *sb; 561 + 562 + parent_inode = d_inode(parent); 563 + inode_lock(parent_inode); 564 + 565 + dentry = binderfs_create_dentry(parent, name); 566 + if (IS_ERR(dentry)) 567 + goto out; 568 + 569 + sb = parent_inode->i_sb; 570 + new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); 571 + if (!new_inode) { 572 + dput(dentry); 573 + dentry = ERR_PTR(-ENOMEM); 574 + goto out; 575 + } 576 + 577 + new_inode->i_fop = &simple_dir_operations; 578 + new_inode->i_op = &simple_dir_inode_operations; 579 + 580 + set_nlink(new_inode, 2); 581 + d_instantiate(dentry, new_inode); 582 + inc_nlink(parent_inode); 583 + fsnotify_mkdir(parent_inode, dentry); 584 + 585 + out: 586 + inode_unlock(parent_inode); 587 + return dentry; 588 + } 589 + 590 + static int binder_features_show(struct seq_file *m, void *unused) 591 + { 592 + bool *feature = m->private; 593 + 594 + seq_printf(m, "%d\n", *feature); 595 + 596 + return 0; 597 + } 598 + DEFINE_SHOW_ATTRIBUTE(binder_features); 599 + 600 + static int init_binder_features(struct super_block *sb) 601 + { 602 + struct dentry *dentry, *dir; 603 + 604 + dir = binderfs_create_dir(sb->s_root, "features"); 605 + if (IS_ERR(dir)) 606 + return PTR_ERR(dir); 607 + 608 + dentry = rust_binderfs_create_file(dir, "oneway_spam_detection", 609 + &binder_features_fops, 610 + &binder_features.oneway_spam_detection); 611 + if (IS_ERR(dentry)) 612 + return PTR_ERR(dentry); 613 + 614 + dentry = rust_binderfs_create_file(dir, "extended_error", 615 + &binder_features_fops, 616 + &binder_features.extended_error); 617 + if (IS_ERR(dentry)) 618 + return PTR_ERR(dentry); 619 + 620 + dentry = rust_binderfs_create_file(dir, "freeze_notification", 621 + &binder_features_fops, 622 + &binder_features.freeze_notification); 623 + if (IS_ERR(dentry)) 624 + return PTR_ERR(dentry); 625 + 626 + return 0; 627 + } 628 + 629 + static int init_binder_logs(struct super_block *sb) 630 + { 631 + struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; 632 + struct binderfs_info *info; 633 + int ret = 0; 634 + 635 + binder_logs_root_dir = binderfs_create_dir(sb->s_root, 636 + "binder_logs"); 637 + if (IS_ERR(binder_logs_root_dir)) { 638 + ret = PTR_ERR(binder_logs_root_dir); 639 + goto out; 640 + } 641 + 642 + dentry = rust_binderfs_create_file(binder_logs_root_dir, "stats", 643 + &rust_binder_stats_fops, NULL); 644 + if (IS_ERR(dentry)) { 645 + ret = PTR_ERR(dentry); 646 + goto out; 647 + } 648 + 649 + dentry = rust_binderfs_create_file(binder_logs_root_dir, "state", 650 + &rust_binder_state_fops, NULL); 651 + if (IS_ERR(dentry)) { 652 + ret = PTR_ERR(dentry); 653 + goto out; 654 + } 655 + 656 + dentry = rust_binderfs_create_file(binder_logs_root_dir, "transactions", 657 + &rust_binder_transactions_fops, NULL); 658 + if (IS_ERR(dentry)) { 659 + ret = PTR_ERR(dentry); 660 + goto out; 661 + } 662 + 663 + proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); 664 + if (IS_ERR(proc_log_dir)) { 665 + ret = PTR_ERR(proc_log_dir); 666 + goto out; 667 + } 668 + info = sb->s_fs_info; 669 + info->proc_log_dir = proc_log_dir; 670 + 671 + out: 672 + return ret; 673 + } 674 + 675 + static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc) 676 + { 677 + int ret; 678 + struct binderfs_info *info; 679 + struct binderfs_mount_opts *ctx = fc->fs_private; 680 + struct inode *inode = NULL; 681 + struct binderfs_device device_info = {}; 682 + const char *name; 683 + size_t len; 684 + 685 + sb->s_blocksize = PAGE_SIZE; 686 + sb->s_blocksize_bits = PAGE_SHIFT; 687 + 688 + /* 689 + * The binderfs filesystem can be mounted by userns root in a 690 + * non-initial userns. By default such mounts have the SB_I_NODEV flag 691 + * set in s_iflags to prevent security issues where userns root can 692 + * just create random device nodes via mknod() since it owns the 693 + * filesystem mount. But binderfs does not allow to create any files 694 + * including devices nodes. The only way to create binder devices nodes 695 + * is through the binder-control device which userns root is explicitly 696 + * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both 697 + * necessary and safe. 698 + */ 699 + sb->s_iflags &= ~SB_I_NODEV; 700 + sb->s_iflags |= SB_I_NOEXEC; 701 + sb->s_magic = RUST_BINDERFS_SUPER_MAGIC; 702 + sb->s_op = &binderfs_super_ops; 703 + sb->s_time_gran = 1; 704 + 705 + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); 706 + if (!sb->s_fs_info) 707 + return -ENOMEM; 708 + info = sb->s_fs_info; 709 + 710 + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); 711 + 712 + info->root_gid = make_kgid(sb->s_user_ns, 0); 713 + if (!gid_valid(info->root_gid)) 714 + info->root_gid = GLOBAL_ROOT_GID; 715 + info->root_uid = make_kuid(sb->s_user_ns, 0); 716 + if (!uid_valid(info->root_uid)) 717 + info->root_uid = GLOBAL_ROOT_UID; 718 + info->mount_opts.max = ctx->max; 719 + info->mount_opts.stats_mode = ctx->stats_mode; 720 + 721 + inode = new_inode(sb); 722 + if (!inode) 723 + return -ENOMEM; 724 + 725 + inode->i_ino = FIRST_INODE; 726 + inode->i_fop = &simple_dir_operations; 727 + inode->i_mode = S_IFDIR | 0755; 728 + simple_inode_init_ts(inode); 729 + inode->i_op = &binderfs_dir_inode_operations; 730 + set_nlink(inode, 2); 731 + 732 + sb->s_root = d_make_root(inode); 733 + if (!sb->s_root) 734 + return -ENOMEM; 735 + 736 + ret = binderfs_binder_ctl_create(sb); 737 + if (ret) 738 + return ret; 739 + 740 + name = rust_binder_devices_param; 741 + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { 742 + strscpy(device_info.name, name, len + 1); 743 + ret = binderfs_binder_device_create(inode, NULL, &device_info); 744 + if (ret) 745 + return ret; 746 + name += len; 747 + if (*name == ',') 748 + name++; 749 + } 750 + 751 + ret = init_binder_features(sb); 752 + if (ret) 753 + return ret; 754 + 755 + if (info->mount_opts.stats_mode == binderfs_stats_mode_global) 756 + return init_binder_logs(sb); 757 + 758 + return 0; 759 + } 760 + 761 + static int binderfs_fs_context_get_tree(struct fs_context *fc) 762 + { 763 + return get_tree_nodev(fc, binderfs_fill_super); 764 + } 765 + 766 + static void binderfs_fs_context_free(struct fs_context *fc) 767 + { 768 + struct binderfs_mount_opts *ctx = fc->fs_private; 769 + 770 + kfree(ctx); 771 + } 772 + 773 + static const struct fs_context_operations binderfs_fs_context_ops = { 774 + .free = binderfs_fs_context_free, 775 + .get_tree = binderfs_fs_context_get_tree, 776 + .parse_param = binderfs_fs_context_parse_param, 777 + .reconfigure = binderfs_fs_context_reconfigure, 778 + }; 779 + 780 + static int binderfs_init_fs_context(struct fs_context *fc) 781 + { 782 + struct binderfs_mount_opts *ctx; 783 + 784 + ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL); 785 + if (!ctx) 786 + return -ENOMEM; 787 + 788 + ctx->max = BINDERFS_MAX_MINOR; 789 + ctx->stats_mode = binderfs_stats_mode_unset; 790 + 791 + fc->fs_private = ctx; 792 + fc->ops = &binderfs_fs_context_ops; 793 + 794 + return 0; 795 + } 796 + 797 + static void binderfs_kill_super(struct super_block *sb) 798 + { 799 + struct binderfs_info *info = sb->s_fs_info; 800 + 801 + /* 802 + * During inode eviction struct binderfs_info is needed. 803 + * So first wipe the super_block then free struct binderfs_info. 804 + */ 805 + kill_litter_super(sb); 806 + 807 + if (info && info->ipc_ns) 808 + put_ipc_ns(info->ipc_ns); 809 + 810 + kfree(info); 811 + } 812 + 813 + static struct file_system_type binder_fs_type = { 814 + .name = "binder", 815 + .init_fs_context = binderfs_init_fs_context, 816 + .parameters = binderfs_fs_parameters, 817 + .kill_sb = binderfs_kill_super, 818 + .fs_flags = FS_USERNS_MOUNT, 819 + }; 820 + 821 + int init_rust_binderfs(void) 822 + { 823 + int ret; 824 + const char *name; 825 + size_t len; 826 + 827 + /* Verify that the default binderfs device names are valid. */ 828 + name = rust_binder_devices_param; 829 + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { 830 + if (len > BINDERFS_MAX_NAME) 831 + return -E2BIG; 832 + name += len; 833 + if (*name == ',') 834 + name++; 835 + } 836 + 837 + /* Allocate new major number for binderfs. */ 838 + ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, 839 + "rust_binder"); 840 + if (ret) 841 + return ret; 842 + 843 + ret = register_filesystem(&binder_fs_type); 844 + if (ret) { 845 + unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); 846 + return ret; 847 + } 848 + 849 + return ret; 850 + }

+89

drivers/android/binder/stats.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + //! Keep track of statistics for binder_logs. 6 + 7 + use crate::defs::*; 8 + use core::sync::atomic::{AtomicU32, Ordering::Relaxed}; 9 + use kernel::{ioctl::_IOC_NR, seq_file::SeqFile, seq_print}; 10 + 11 + const BC_COUNT: usize = _IOC_NR(BC_REPLY_SG) as usize + 1; 12 + const BR_COUNT: usize = _IOC_NR(BR_TRANSACTION_PENDING_FROZEN) as usize + 1; 13 + 14 + pub(crate) static GLOBAL_STATS: BinderStats = BinderStats::new(); 15 + 16 + pub(crate) struct BinderStats { 17 + bc: [AtomicU32; BC_COUNT], 18 + br: [AtomicU32; BR_COUNT], 19 + } 20 + 21 + impl BinderStats { 22 + pub(crate) const fn new() -> Self { 23 + #[expect(clippy::declare_interior_mutable_const)] 24 + const ZERO: AtomicU32 = AtomicU32::new(0); 25 + 26 + Self { 27 + bc: [ZERO; BC_COUNT], 28 + br: [ZERO; BR_COUNT], 29 + } 30 + } 31 + 32 + pub(crate) fn inc_bc(&self, bc: u32) { 33 + let idx = _IOC_NR(bc) as usize; 34 + if let Some(bc_ref) = self.bc.get(idx) { 35 + bc_ref.fetch_add(1, Relaxed); 36 + } 37 + } 38 + 39 + pub(crate) fn inc_br(&self, br: u32) { 40 + let idx = _IOC_NR(br) as usize; 41 + if let Some(br_ref) = self.br.get(idx) { 42 + br_ref.fetch_add(1, Relaxed); 43 + } 44 + } 45 + 46 + pub(crate) fn debug_print(&self, prefix: &str, m: &SeqFile) { 47 + for (i, cnt) in self.bc.iter().enumerate() { 48 + let cnt = cnt.load(Relaxed); 49 + if cnt > 0 { 50 + seq_print!(m, "{}{}: {}\n", prefix, command_string(i), cnt); 51 + } 52 + } 53 + for (i, cnt) in self.br.iter().enumerate() { 54 + let cnt = cnt.load(Relaxed); 55 + if cnt > 0 { 56 + seq_print!(m, "{}{}: {}\n", prefix, return_string(i), cnt); 57 + } 58 + } 59 + } 60 + } 61 + 62 + mod strings { 63 + use core::str::from_utf8_unchecked; 64 + use kernel::str::CStr; 65 + 66 + extern "C" { 67 + static binder_command_strings: [*const u8; super::BC_COUNT]; 68 + static binder_return_strings: [*const u8; super::BR_COUNT]; 69 + } 70 + 71 + pub(super) fn command_string(i: usize) -> &'static str { 72 + // SAFETY: Accessing `binder_command_strings` is always safe. 73 + let c_str_ptr = unsafe { binder_command_strings[i] }; 74 + // SAFETY: The `binder_command_strings` array only contains nul-terminated strings. 75 + let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.as_bytes(); 76 + // SAFETY: The `binder_command_strings` array only contains strings with ascii-chars. 77 + unsafe { from_utf8_unchecked(bytes) } 78 + } 79 + 80 + pub(super) fn return_string(i: usize) -> &'static str { 81 + // SAFETY: Accessing `binder_return_strings` is always safe. 82 + let c_str_ptr = unsafe { binder_return_strings[i] }; 83 + // SAFETY: The `binder_command_strings` array only contains nul-terminated strings. 84 + let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.as_bytes(); 85 + // SAFETY: The `binder_command_strings` array only contains strings with ascii-chars. 86 + unsafe { from_utf8_unchecked(bytes) } 87 + } 88 + } 89 + use strings::{command_string, return_string};

+1596

drivers/android/binder/thread.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + //! This module defines the `Thread` type, which represents a userspace thread that is using 6 + //! binder. 7 + //! 8 + //! The `Process` object stores all of the threads in an rb tree. 9 + 10 + use kernel::{ 11 + bindings, 12 + fs::{File, LocalFile}, 13 + list::{AtomicTracker, List, ListArc, ListLinks, TryNewListArc}, 14 + prelude::*, 15 + security, 16 + seq_file::SeqFile, 17 + seq_print, 18 + sync::poll::{PollCondVar, PollTable}, 19 + sync::{Arc, SpinLock}, 20 + task::Task, 21 + types::ARef, 22 + uaccess::UserSlice, 23 + uapi, 24 + }; 25 + 26 + use crate::{ 27 + allocation::{Allocation, AllocationView, BinderObject, BinderObjectRef, NewAllocation}, 28 + defs::*, 29 + error::BinderResult, 30 + process::{GetWorkOrRegister, Process}, 31 + ptr_align, 32 + stats::GLOBAL_STATS, 33 + transaction::Transaction, 34 + BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverCode, DeliverToRead, 35 + }; 36 + 37 + use core::{ 38 + mem::size_of, 39 + sync::atomic::{AtomicU32, Ordering}, 40 + }; 41 + 42 + /// Stores the layout of the scatter-gather entries. This is used during the `translate_objects` 43 + /// call and is discarded when it returns. 44 + struct ScatterGatherState { 45 + /// A struct that tracks the amount of unused buffer space. 46 + unused_buffer_space: UnusedBufferSpace, 47 + /// Scatter-gather entries to copy. 48 + sg_entries: KVec<ScatterGatherEntry>, 49 + /// Indexes into `sg_entries` corresponding to the last binder_buffer_object that 50 + /// was processed and all of its ancestors. The array is in sorted order. 51 + ancestors: KVec<usize>, 52 + } 53 + 54 + /// This entry specifies an additional buffer that should be copied using the scatter-gather 55 + /// mechanism. 56 + struct ScatterGatherEntry { 57 + /// The index in the offset array of the BINDER_TYPE_PTR that this entry originates from. 58 + obj_index: usize, 59 + /// Offset in target buffer. 60 + offset: usize, 61 + /// User address in source buffer. 62 + sender_uaddr: usize, 63 + /// Number of bytes to copy. 64 + length: usize, 65 + /// The minimum offset of the next fixup in this buffer. 66 + fixup_min_offset: usize, 67 + /// The offsets within this buffer that contain pointers which should be translated. 68 + pointer_fixups: KVec<PointerFixupEntry>, 69 + } 70 + 71 + /// This entry specifies that a fixup should happen at `target_offset` of the 72 + /// buffer. If `skip` is nonzero, then the fixup is a `binder_fd_array_object` 73 + /// and is applied later. Otherwise if `skip` is zero, then the size of the 74 + /// fixup is `sizeof::<u64>()` and `pointer_value` is written to the buffer. 75 + struct PointerFixupEntry { 76 + /// The number of bytes to skip, or zero for a `binder_buffer_object` fixup. 77 + skip: usize, 78 + /// The translated pointer to write when `skip` is zero. 79 + pointer_value: u64, 80 + /// The offset at which the value should be written. The offset is relative 81 + /// to the original buffer. 82 + target_offset: usize, 83 + } 84 + 85 + /// Return type of `apply_and_validate_fixup_in_parent`. 86 + struct ParentFixupInfo { 87 + /// The index of the parent buffer in `sg_entries`. 88 + parent_sg_index: usize, 89 + /// The number of ancestors of the buffer. 90 + /// 91 + /// The buffer is considered an ancestor of itself, so this is always at 92 + /// least one. 93 + num_ancestors: usize, 94 + /// New value of `fixup_min_offset` if this fixup is applied. 95 + new_min_offset: usize, 96 + /// The offset of the fixup in the target buffer. 97 + target_offset: usize, 98 + } 99 + 100 + impl ScatterGatherState { 101 + /// Called when a `binder_buffer_object` or `binder_fd_array_object` tries 102 + /// to access a region in its parent buffer. These accesses have various 103 + /// restrictions, which this method verifies. 104 + /// 105 + /// The `parent_offset` and `length` arguments describe the offset and 106 + /// length of the access in the parent buffer. 107 + /// 108 + /// # Detailed restrictions 109 + /// 110 + /// Obviously the fixup must be in-bounds for the parent buffer. 111 + /// 112 + /// For safety reasons, we only allow fixups inside a buffer to happen 113 + /// at increasing offsets; additionally, we only allow fixup on the last 114 + /// buffer object that was verified, or one of its parents. 115 + /// 116 + /// Example of what is allowed: 117 + /// 118 + /// A 119 + /// B (parent = A, offset = 0) 120 + /// C (parent = A, offset = 16) 121 + /// D (parent = C, offset = 0) 122 + /// E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset) 123 + /// 124 + /// Examples of what is not allowed: 125 + /// 126 + /// Decreasing offsets within the same parent: 127 + /// A 128 + /// C (parent = A, offset = 16) 129 + /// B (parent = A, offset = 0) // decreasing offset within A 130 + /// 131 + /// Arcerring to a parent that wasn't the last object or any of its parents: 132 + /// A 133 + /// B (parent = A, offset = 0) 134 + /// C (parent = A, offset = 0) 135 + /// C (parent = A, offset = 16) 136 + /// D (parent = B, offset = 0) // B is not A or any of A's parents 137 + fn validate_parent_fixup( 138 + &self, 139 + parent: usize, 140 + parent_offset: usize, 141 + length: usize, 142 + ) -> Result<ParentFixupInfo> { 143 + // Using `position` would also be correct, but `rposition` avoids 144 + // quadratic running times. 145 + let ancestors_i = self 146 + .ancestors 147 + .iter() 148 + .copied() 149 + .rposition(|sg_idx| self.sg_entries[sg_idx].obj_index == parent) 150 + .ok_or(EINVAL)?; 151 + let sg_idx = self.ancestors[ancestors_i]; 152 + let sg_entry = match self.sg_entries.get(sg_idx) { 153 + Some(sg_entry) => sg_entry, 154 + None => { 155 + pr_err!( 156 + "self.ancestors[{}] is {}, but self.sg_entries.len() is {}", 157 + ancestors_i, 158 + sg_idx, 159 + self.sg_entries.len() 160 + ); 161 + return Err(EINVAL); 162 + } 163 + }; 164 + if sg_entry.fixup_min_offset > parent_offset { 165 + pr_warn!( 166 + "validate_parent_fixup: fixup_min_offset={}, parent_offset={}", 167 + sg_entry.fixup_min_offset, 168 + parent_offset 169 + ); 170 + return Err(EINVAL); 171 + } 172 + let new_min_offset = parent_offset.checked_add(length).ok_or(EINVAL)?; 173 + if new_min_offset > sg_entry.length { 174 + pr_warn!( 175 + "validate_parent_fixup: new_min_offset={}, sg_entry.length={}", 176 + new_min_offset, 177 + sg_entry.length 178 + ); 179 + return Err(EINVAL); 180 + } 181 + let target_offset = sg_entry.offset.checked_add(parent_offset).ok_or(EINVAL)?; 182 + // The `ancestors_i + 1` operation can't overflow since the output of the addition is at 183 + // most `self.ancestors.len()`, which also fits in a usize. 184 + Ok(ParentFixupInfo { 185 + parent_sg_index: sg_idx, 186 + num_ancestors: ancestors_i + 1, 187 + new_min_offset, 188 + target_offset, 189 + }) 190 + } 191 + } 192 + 193 + /// Keeps track of how much unused buffer space is left. The initial amount is the number of bytes 194 + /// requested by the user using the `buffers_size` field of `binder_transaction_data_sg`. Each time 195 + /// we translate an object of type `BINDER_TYPE_PTR`, some of the unused buffer space is consumed. 196 + struct UnusedBufferSpace { 197 + /// The start of the remaining space. 198 + offset: usize, 199 + /// The end of the remaining space. 200 + limit: usize, 201 + } 202 + impl UnusedBufferSpace { 203 + /// Claim the next `size` bytes from the unused buffer space. The offset for the claimed chunk 204 + /// into the buffer is returned. 205 + fn claim_next(&mut self, size: usize) -> Result<usize> { 206 + // We require every chunk to be aligned. 207 + let size = ptr_align(size).ok_or(EINVAL)?; 208 + let new_offset = self.offset.checked_add(size).ok_or(EINVAL)?; 209 + 210 + if new_offset <= self.limit { 211 + let offset = self.offset; 212 + self.offset = new_offset; 213 + Ok(offset) 214 + } else { 215 + Err(EINVAL) 216 + } 217 + } 218 + } 219 + 220 + pub(crate) enum PushWorkRes { 221 + Ok, 222 + FailedDead(DLArc<dyn DeliverToRead>), 223 + } 224 + 225 + impl PushWorkRes { 226 + fn is_ok(&self) -> bool { 227 + match self { 228 + PushWorkRes::Ok => true, 229 + PushWorkRes::FailedDead(_) => false, 230 + } 231 + } 232 + } 233 + 234 + /// The fields of `Thread` protected by the spinlock. 235 + struct InnerThread { 236 + /// Determines the looper state of the thread. It is a bit-wise combination of the constants 237 + /// prefixed with `LOOPER_`. 238 + looper_flags: u32, 239 + 240 + /// Determines whether the looper should return. 241 + looper_need_return: bool, 242 + 243 + /// Determines if thread is dead. 244 + is_dead: bool, 245 + 246 + /// Work item used to deliver error codes to the thread that started a transaction. Stored here 247 + /// so that it can be reused. 248 + reply_work: DArc<ThreadError>, 249 + 250 + /// Work item used to deliver error codes to the current thread. Stored here so that it can be 251 + /// reused. 252 + return_work: DArc<ThreadError>, 253 + 254 + /// Determines whether the work list below should be processed. When set to false, `work_list` 255 + /// is treated as if it were empty. 256 + process_work_list: bool, 257 + /// List of work items to deliver to userspace. 258 + work_list: List<DTRWrap<dyn DeliverToRead>>, 259 + current_transaction: Option<DArc<Transaction>>, 260 + 261 + /// Extended error information for this thread. 262 + extended_error: ExtendedError, 263 + } 264 + 265 + const LOOPER_REGISTERED: u32 = 0x01; 266 + const LOOPER_ENTERED: u32 = 0x02; 267 + const LOOPER_EXITED: u32 = 0x04; 268 + const LOOPER_INVALID: u32 = 0x08; 269 + const LOOPER_WAITING: u32 = 0x10; 270 + const LOOPER_WAITING_PROC: u32 = 0x20; 271 + const LOOPER_POLL: u32 = 0x40; 272 + 273 + impl InnerThread { 274 + fn new() -> Result<Self> { 275 + fn next_err_id() -> u32 { 276 + static EE_ID: AtomicU32 = AtomicU32::new(0); 277 + EE_ID.fetch_add(1, Ordering::Relaxed) 278 + } 279 + 280 + Ok(Self { 281 + looper_flags: 0, 282 + looper_need_return: false, 283 + is_dead: false, 284 + process_work_list: false, 285 + reply_work: ThreadError::try_new()?, 286 + return_work: ThreadError::try_new()?, 287 + work_list: List::new(), 288 + current_transaction: None, 289 + extended_error: ExtendedError::new(next_err_id(), BR_OK, 0), 290 + }) 291 + } 292 + 293 + fn pop_work(&mut self) -> Option<DLArc<dyn DeliverToRead>> { 294 + if !self.process_work_list { 295 + return None; 296 + } 297 + 298 + let ret = self.work_list.pop_front(); 299 + self.process_work_list = !self.work_list.is_empty(); 300 + ret 301 + } 302 + 303 + fn push_work(&mut self, work: DLArc<dyn DeliverToRead>) -> PushWorkRes { 304 + if self.is_dead { 305 + PushWorkRes::FailedDead(work) 306 + } else { 307 + self.work_list.push_back(work); 308 + self.process_work_list = true; 309 + PushWorkRes::Ok 310 + } 311 + } 312 + 313 + fn push_reply_work(&mut self, code: u32) { 314 + if let Ok(work) = ListArc::try_from_arc(self.reply_work.clone()) { 315 + work.set_error_code(code); 316 + self.push_work(work); 317 + } else { 318 + pr_warn!("Thread reply work is already in use."); 319 + } 320 + } 321 + 322 + fn push_return_work(&mut self, reply: u32) { 323 + if let Ok(work) = ListArc::try_from_arc(self.return_work.clone()) { 324 + work.set_error_code(reply); 325 + self.push_work(work); 326 + } else { 327 + pr_warn!("Thread return work is already in use."); 328 + } 329 + } 330 + 331 + /// Used to push work items that do not need to be processed immediately and can wait until the 332 + /// thread gets another work item. 333 + fn push_work_deferred(&mut self, work: DLArc<dyn DeliverToRead>) { 334 + self.work_list.push_back(work); 335 + } 336 + 337 + /// Fetches the transaction this thread can reply to. If the thread has a pending transaction 338 + /// (that it could respond to) but it has also issued a transaction, it must first wait for the 339 + /// previously-issued transaction to complete. 340 + /// 341 + /// The `thread` parameter should be the thread containing this `ThreadInner`. 342 + fn pop_transaction_to_reply(&mut self, thread: &Thread) -> Result<DArc<Transaction>> { 343 + let transaction = self.current_transaction.take().ok_or(EINVAL)?; 344 + if core::ptr::eq(thread, transaction.from.as_ref()) { 345 + self.current_transaction = Some(transaction); 346 + return Err(EINVAL); 347 + } 348 + // Find a new current transaction for this thread. 349 + self.current_transaction = transaction.find_from(thread).cloned(); 350 + Ok(transaction) 351 + } 352 + 353 + fn pop_transaction_replied(&mut self, transaction: &DArc<Transaction>) -> bool { 354 + match self.current_transaction.take() { 355 + None => false, 356 + Some(old) => { 357 + if !Arc::ptr_eq(transaction, &old) { 358 + self.current_transaction = Some(old); 359 + return false; 360 + } 361 + self.current_transaction = old.clone_next(); 362 + true 363 + } 364 + } 365 + } 366 + 367 + fn looper_enter(&mut self) { 368 + self.looper_flags |= LOOPER_ENTERED; 369 + if self.looper_flags & LOOPER_REGISTERED != 0 { 370 + self.looper_flags |= LOOPER_INVALID; 371 + } 372 + } 373 + 374 + fn looper_register(&mut self, valid: bool) { 375 + self.looper_flags |= LOOPER_REGISTERED; 376 + if !valid || self.looper_flags & LOOPER_ENTERED != 0 { 377 + self.looper_flags |= LOOPER_INVALID; 378 + } 379 + } 380 + 381 + fn looper_exit(&mut self) { 382 + self.looper_flags |= LOOPER_EXITED; 383 + } 384 + 385 + /// Determines whether the thread is part of a pool, i.e., if it is a looper. 386 + fn is_looper(&self) -> bool { 387 + self.looper_flags & (LOOPER_ENTERED | LOOPER_REGISTERED) != 0 388 + } 389 + 390 + /// Determines whether the thread should attempt to fetch work items from the process queue. 391 + /// This is generally case when the thread is registered as a looper and not part of a 392 + /// transaction stack. But if there is local work, we want to return to userspace before we 393 + /// deliver any remote work. 394 + fn should_use_process_work_queue(&self) -> bool { 395 + self.current_transaction.is_none() && !self.process_work_list && self.is_looper() 396 + } 397 + 398 + fn poll(&mut self) -> u32 { 399 + self.looper_flags |= LOOPER_POLL; 400 + if self.process_work_list || self.looper_need_return { 401 + bindings::POLLIN 402 + } else { 403 + 0 404 + } 405 + } 406 + } 407 + 408 + /// This represents a thread that's used with binder. 409 + #[pin_data] 410 + pub(crate) struct Thread { 411 + pub(crate) id: i32, 412 + pub(crate) process: Arc<Process>, 413 + pub(crate) task: ARef<Task>, 414 + #[pin] 415 + inner: SpinLock<InnerThread>, 416 + #[pin] 417 + work_condvar: PollCondVar, 418 + /// Used to insert this thread into the process' `ready_threads` list. 419 + /// 420 + /// INVARIANT: May never be used for any other list than the `self.process.ready_threads`. 421 + #[pin] 422 + links: ListLinks, 423 + #[pin] 424 + links_track: AtomicTracker, 425 + } 426 + 427 + kernel::list::impl_list_arc_safe! { 428 + impl ListArcSafe<0> for Thread { 429 + tracked_by links_track: AtomicTracker; 430 + } 431 + } 432 + kernel::list::impl_list_item! { 433 + impl ListItem<0> for Thread { 434 + using ListLinks { self.links }; 435 + } 436 + } 437 + 438 + impl Thread { 439 + pub(crate) fn new(id: i32, process: Arc<Process>) -> Result<Arc<Self>> { 440 + let inner = InnerThread::new()?; 441 + 442 + Arc::pin_init( 443 + try_pin_init!(Thread { 444 + id, 445 + process, 446 + task: ARef::from(&**kernel::current!()), 447 + inner <- kernel::new_spinlock!(inner, "Thread::inner"), 448 + work_condvar <- kernel::new_poll_condvar!("Thread::work_condvar"), 449 + links <- ListLinks::new(), 450 + links_track <- AtomicTracker::new(), 451 + }), 452 + GFP_KERNEL, 453 + ) 454 + } 455 + 456 + #[inline(never)] 457 + pub(crate) fn debug_print(self: &Arc<Self>, m: &SeqFile, print_all: bool) -> Result<()> { 458 + let inner = self.inner.lock(); 459 + 460 + if print_all || inner.current_transaction.is_some() || !inner.work_list.is_empty() { 461 + seq_print!( 462 + m, 463 + " thread {}: l {:02x} need_return {}\n", 464 + self.id, 465 + inner.looper_flags, 466 + inner.looper_need_return, 467 + ); 468 + } 469 + 470 + let mut t_opt = inner.current_transaction.as_ref(); 471 + while let Some(t) = t_opt { 472 + if Arc::ptr_eq(&t.from, self) { 473 + t.debug_print_inner(m, " outgoing transaction "); 474 + t_opt = t.from_parent.as_ref(); 475 + } else if Arc::ptr_eq(&t.to, &self.process) { 476 + t.debug_print_inner(m, " incoming transaction "); 477 + t_opt = t.find_from(self); 478 + } else { 479 + t.debug_print_inner(m, " bad transaction "); 480 + t_opt = None; 481 + } 482 + } 483 + 484 + for work in &inner.work_list { 485 + work.debug_print(m, " ", " pending transaction ")?; 486 + } 487 + Ok(()) 488 + } 489 + 490 + pub(crate) fn get_extended_error(&self, data: UserSlice) -> Result { 491 + let mut writer = data.writer(); 492 + let ee = self.inner.lock().extended_error; 493 + writer.write(&ee)?; 494 + Ok(()) 495 + } 496 + 497 + pub(crate) fn set_current_transaction(&self, transaction: DArc<Transaction>) { 498 + self.inner.lock().current_transaction = Some(transaction); 499 + } 500 + 501 + pub(crate) fn has_current_transaction(&self) -> bool { 502 + self.inner.lock().current_transaction.is_some() 503 + } 504 + 505 + /// Attempts to fetch a work item from the thread-local queue. The behaviour if the queue is 506 + /// empty depends on `wait`: if it is true, the function waits for some work to be queued (or a 507 + /// signal); otherwise it returns indicating that none is available. 508 + fn get_work_local(self: &Arc<Self>, wait: bool) -> Result<Option<DLArc<dyn DeliverToRead>>> { 509 + { 510 + let mut inner = self.inner.lock(); 511 + if inner.looper_need_return { 512 + return Ok(inner.pop_work()); 513 + } 514 + } 515 + 516 + // Try once if the caller does not want to wait. 517 + if !wait { 518 + return self.inner.lock().pop_work().ok_or(EAGAIN).map(Some); 519 + } 520 + 521 + // Loop waiting only on the local queue (i.e., not registering with the process queue). 522 + let mut inner = self.inner.lock(); 523 + loop { 524 + if let Some(work) = inner.pop_work() { 525 + return Ok(Some(work)); 526 + } 527 + 528 + inner.looper_flags |= LOOPER_WAITING; 529 + let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner); 530 + inner.looper_flags &= !LOOPER_WAITING; 531 + 532 + if signal_pending { 533 + return Err(EINTR); 534 + } 535 + if inner.looper_need_return { 536 + return Ok(None); 537 + } 538 + } 539 + } 540 + 541 + /// Attempts to fetch a work item from the thread-local queue, falling back to the process-wide 542 + /// queue if none is available locally. 543 + /// 544 + /// This must only be called when the thread is not participating in a transaction chain. If it 545 + /// is, the local version (`get_work_local`) should be used instead. 546 + fn get_work(self: &Arc<Self>, wait: bool) -> Result<Option<DLArc<dyn DeliverToRead>>> { 547 + // Try to get work from the thread's work queue, using only a local lock. 548 + { 549 + let mut inner = self.inner.lock(); 550 + if let Some(work) = inner.pop_work() { 551 + return Ok(Some(work)); 552 + } 553 + if inner.looper_need_return { 554 + drop(inner); 555 + return Ok(self.process.get_work()); 556 + } 557 + } 558 + 559 + // If the caller doesn't want to wait, try to grab work from the process queue. 560 + // 561 + // We know nothing will have been queued directly to the thread queue because it is not in 562 + // a transaction and it is not in the process' ready list. 563 + if !wait { 564 + return self.process.get_work().ok_or(EAGAIN).map(Some); 565 + } 566 + 567 + // Get work from the process queue. If none is available, atomically register as ready. 568 + let reg = match self.process.get_work_or_register(self) { 569 + GetWorkOrRegister::Work(work) => return Ok(Some(work)), 570 + GetWorkOrRegister::Register(reg) => reg, 571 + }; 572 + 573 + let mut inner = self.inner.lock(); 574 + loop { 575 + if let Some(work) = inner.pop_work() { 576 + return Ok(Some(work)); 577 + } 578 + 579 + inner.looper_flags |= LOOPER_WAITING | LOOPER_WAITING_PROC; 580 + let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner); 581 + inner.looper_flags &= !(LOOPER_WAITING | LOOPER_WAITING_PROC); 582 + 583 + if signal_pending || inner.looper_need_return { 584 + // We need to return now. We need to pull the thread off the list of ready threads 585 + // (by dropping `reg`), then check the state again after it's off the list to 586 + // ensure that something was not queued in the meantime. If something has been 587 + // queued, we just return it (instead of the error). 588 + drop(inner); 589 + drop(reg); 590 + 591 + let res = match self.inner.lock().pop_work() { 592 + Some(work) => Ok(Some(work)), 593 + None if signal_pending => Err(EINTR), 594 + None => Ok(None), 595 + }; 596 + return res; 597 + } 598 + } 599 + } 600 + 601 + /// Push the provided work item to be delivered to user space via this thread. 602 + /// 603 + /// Returns whether the item was successfully pushed. This can only fail if the thread is dead. 604 + pub(crate) fn push_work(&self, work: DLArc<dyn DeliverToRead>) -> PushWorkRes { 605 + let sync = work.should_sync_wakeup(); 606 + 607 + let res = self.inner.lock().push_work(work); 608 + 609 + if res.is_ok() { 610 + if sync { 611 + self.work_condvar.notify_sync(); 612 + } else { 613 + self.work_condvar.notify_one(); 614 + } 615 + } 616 + 617 + res 618 + } 619 + 620 + /// Attempts to push to given work item to the thread if it's a looper thread (i.e., if it's 621 + /// part of a thread pool) and is alive. Otherwise, push the work item to the process instead. 622 + pub(crate) fn push_work_if_looper(&self, work: DLArc<dyn DeliverToRead>) -> BinderResult { 623 + let mut inner = self.inner.lock(); 624 + if inner.is_looper() && !inner.is_dead { 625 + inner.push_work(work); 626 + Ok(()) 627 + } else { 628 + drop(inner); 629 + self.process.push_work(work) 630 + } 631 + } 632 + 633 + pub(crate) fn push_work_deferred(&self, work: DLArc<dyn DeliverToRead>) { 634 + self.inner.lock().push_work_deferred(work); 635 + } 636 + 637 + pub(crate) fn push_return_work(&self, reply: u32) { 638 + self.inner.lock().push_return_work(reply); 639 + } 640 + 641 + fn translate_object( 642 + &self, 643 + obj_index: usize, 644 + offset: usize, 645 + object: BinderObjectRef<'_>, 646 + view: &mut AllocationView<'_>, 647 + allow_fds: bool, 648 + sg_state: &mut ScatterGatherState, 649 + ) -> BinderResult { 650 + match object { 651 + BinderObjectRef::Binder(obj) => { 652 + let strong = obj.hdr.type_ == BINDER_TYPE_BINDER; 653 + // SAFETY: `binder` is a `binder_uintptr_t`; any bit pattern is a valid 654 + // representation. 655 + let ptr = unsafe { obj.__bindgen_anon_1.binder } as _; 656 + let cookie = obj.cookie as _; 657 + let flags = obj.flags as _; 658 + let node = self 659 + .process 660 + .as_arc_borrow() 661 + .get_node(ptr, cookie, flags, strong, self)?; 662 + security::binder_transfer_binder(&self.process.cred, &view.alloc.process.cred)?; 663 + view.transfer_binder_object(offset, obj, strong, node)?; 664 + } 665 + BinderObjectRef::Handle(obj) => { 666 + let strong = obj.hdr.type_ == BINDER_TYPE_HANDLE; 667 + // SAFETY: `handle` is a `u32`; any bit pattern is a valid representation. 668 + let handle = unsafe { obj.__bindgen_anon_1.handle } as _; 669 + let node = self.process.get_node_from_handle(handle, strong)?; 670 + security::binder_transfer_binder(&self.process.cred, &view.alloc.process.cred)?; 671 + view.transfer_binder_object(offset, obj, strong, node)?; 672 + } 673 + BinderObjectRef::Fd(obj) => { 674 + if !allow_fds { 675 + return Err(EPERM.into()); 676 + } 677 + 678 + // SAFETY: `fd` is a `u32`; any bit pattern is a valid representation. 679 + let fd = unsafe { obj.__bindgen_anon_1.fd }; 680 + let file = LocalFile::fget(fd)?; 681 + // SAFETY: The binder driver never calls `fdget_pos` and this code runs from an 682 + // ioctl, so there are no active calls to `fdget_pos` on this thread. 683 + let file = unsafe { LocalFile::assume_no_fdget_pos(file) }; 684 + security::binder_transfer_file( 685 + &self.process.cred, 686 + &view.alloc.process.cred, 687 + &file, 688 + )?; 689 + 690 + let mut obj_write = BinderFdObject::default(); 691 + obj_write.hdr.type_ = BINDER_TYPE_FD; 692 + // This will be overwritten with the actual fd when the transaction is received. 693 + obj_write.__bindgen_anon_1.fd = u32::MAX; 694 + obj_write.cookie = obj.cookie; 695 + view.write::<BinderFdObject>(offset, &obj_write)?; 696 + 697 + const FD_FIELD_OFFSET: usize = 698 + core::mem::offset_of!(uapi::binder_fd_object, __bindgen_anon_1.fd); 699 + 700 + let field_offset = offset + FD_FIELD_OFFSET; 701 + 702 + view.alloc.info_add_fd(file, field_offset, false)?; 703 + } 704 + BinderObjectRef::Ptr(obj) => { 705 + let obj_length = obj.length.try_into().map_err(|_| EINVAL)?; 706 + let alloc_offset = match sg_state.unused_buffer_space.claim_next(obj_length) { 707 + Ok(alloc_offset) => alloc_offset, 708 + Err(err) => { 709 + pr_warn!( 710 + "Failed to claim space for a BINDER_TYPE_PTR. (offset: {}, limit: {}, size: {})", 711 + sg_state.unused_buffer_space.offset, 712 + sg_state.unused_buffer_space.limit, 713 + obj_length, 714 + ); 715 + return Err(err.into()); 716 + } 717 + }; 718 + 719 + let sg_state_idx = sg_state.sg_entries.len(); 720 + sg_state.sg_entries.push( 721 + ScatterGatherEntry { 722 + obj_index, 723 + offset: alloc_offset, 724 + sender_uaddr: obj.buffer as _, 725 + length: obj_length, 726 + pointer_fixups: KVec::new(), 727 + fixup_min_offset: 0, 728 + }, 729 + GFP_KERNEL, 730 + )?; 731 + 732 + let buffer_ptr_in_user_space = (view.alloc.ptr + alloc_offset) as u64; 733 + 734 + if obj.flags & uapi::BINDER_BUFFER_FLAG_HAS_PARENT == 0 { 735 + sg_state.ancestors.clear(); 736 + sg_state.ancestors.push(sg_state_idx, GFP_KERNEL)?; 737 + } else { 738 + // Another buffer also has a pointer to this buffer, and we need to fixup that 739 + // pointer too. 740 + 741 + let parent_index = usize::try_from(obj.parent).map_err(|_| EINVAL)?; 742 + let parent_offset = usize::try_from(obj.parent_offset).map_err(|_| EINVAL)?; 743 + 744 + let info = sg_state.validate_parent_fixup( 745 + parent_index, 746 + parent_offset, 747 + size_of::<u64>(), 748 + )?; 749 + 750 + sg_state.ancestors.truncate(info.num_ancestors); 751 + sg_state.ancestors.push(sg_state_idx, GFP_KERNEL)?; 752 + 753 + let parent_entry = match sg_state.sg_entries.get_mut(info.parent_sg_index) { 754 + Some(parent_entry) => parent_entry, 755 + None => { 756 + pr_err!( 757 + "validate_parent_fixup returned index out of bounds for sg.entries" 758 + ); 759 + return Err(EINVAL.into()); 760 + } 761 + }; 762 + 763 + parent_entry.fixup_min_offset = info.new_min_offset; 764 + parent_entry.pointer_fixups.push( 765 + PointerFixupEntry { 766 + skip: 0, 767 + pointer_value: buffer_ptr_in_user_space, 768 + target_offset: info.target_offset, 769 + }, 770 + GFP_KERNEL, 771 + )?; 772 + } 773 + 774 + let mut obj_write = BinderBufferObject::default(); 775 + obj_write.hdr.type_ = BINDER_TYPE_PTR; 776 + obj_write.flags = obj.flags; 777 + obj_write.buffer = buffer_ptr_in_user_space; 778 + obj_write.length = obj.length; 779 + obj_write.parent = obj.parent; 780 + obj_write.parent_offset = obj.parent_offset; 781 + view.write::<BinderBufferObject>(offset, &obj_write)?; 782 + } 783 + BinderObjectRef::Fda(obj) => { 784 + if !allow_fds { 785 + return Err(EPERM.into()); 786 + } 787 + let parent_index = usize::try_from(obj.parent).map_err(|_| EINVAL)?; 788 + let parent_offset = usize::try_from(obj.parent_offset).map_err(|_| EINVAL)?; 789 + let num_fds = usize::try_from(obj.num_fds).map_err(|_| EINVAL)?; 790 + let fds_len = num_fds.checked_mul(size_of::<u32>()).ok_or(EINVAL)?; 791 + 792 + let info = sg_state.validate_parent_fixup(parent_index, parent_offset, fds_len)?; 793 + view.alloc.info_add_fd_reserve(num_fds)?; 794 + 795 + sg_state.ancestors.truncate(info.num_ancestors); 796 + let parent_entry = match sg_state.sg_entries.get_mut(info.parent_sg_index) { 797 + Some(parent_entry) => parent_entry, 798 + None => { 799 + pr_err!( 800 + "validate_parent_fixup returned index out of bounds for sg.entries" 801 + ); 802 + return Err(EINVAL.into()); 803 + } 804 + }; 805 + 806 + parent_entry.fixup_min_offset = info.new_min_offset; 807 + parent_entry 808 + .pointer_fixups 809 + .push( 810 + PointerFixupEntry { 811 + skip: fds_len, 812 + pointer_value: 0, 813 + target_offset: info.target_offset, 814 + }, 815 + GFP_KERNEL, 816 + ) 817 + .map_err(|_| ENOMEM)?; 818 + 819 + let fda_uaddr = parent_entry 820 + .sender_uaddr 821 + .checked_add(parent_offset) 822 + .ok_or(EINVAL)?; 823 + let mut fda_bytes = KVec::new(); 824 + UserSlice::new(UserPtr::from_addr(fda_uaddr as _), fds_len) 825 + .read_all(&mut fda_bytes, GFP_KERNEL)?; 826 + 827 + if fds_len != fda_bytes.len() { 828 + pr_err!("UserSlice::read_all returned wrong length in BINDER_TYPE_FDA"); 829 + return Err(EINVAL.into()); 830 + } 831 + 832 + for i in (0..fds_len).step_by(size_of::<u32>()) { 833 + let fd = { 834 + let mut fd_bytes = [0u8; size_of::<u32>()]; 835 + fd_bytes.copy_from_slice(&fda_bytes[i..i + size_of::<u32>()]); 836 + u32::from_ne_bytes(fd_bytes) 837 + }; 838 + 839 + let file = LocalFile::fget(fd)?; 840 + // SAFETY: The binder driver never calls `fdget_pos` and this code runs from an 841 + // ioctl, so there are no active calls to `fdget_pos` on this thread. 842 + let file = unsafe { LocalFile::assume_no_fdget_pos(file) }; 843 + security::binder_transfer_file( 844 + &self.process.cred, 845 + &view.alloc.process.cred, 846 + &file, 847 + )?; 848 + 849 + // The `validate_parent_fixup` call ensuers that this addition will not 850 + // overflow. 851 + view.alloc.info_add_fd(file, info.target_offset + i, true)?; 852 + } 853 + drop(fda_bytes); 854 + 855 + let mut obj_write = BinderFdArrayObject::default(); 856 + obj_write.hdr.type_ = BINDER_TYPE_FDA; 857 + obj_write.num_fds = obj.num_fds; 858 + obj_write.parent = obj.parent; 859 + obj_write.parent_offset = obj.parent_offset; 860 + view.write::<BinderFdArrayObject>(offset, &obj_write)?; 861 + } 862 + } 863 + Ok(()) 864 + } 865 + 866 + fn apply_sg(&self, alloc: &mut Allocation, sg_state: &mut ScatterGatherState) -> BinderResult { 867 + for sg_entry in &mut sg_state.sg_entries { 868 + let mut end_of_previous_fixup = sg_entry.offset; 869 + let offset_end = sg_entry.offset.checked_add(sg_entry.length).ok_or(EINVAL)?; 870 + 871 + let mut reader = 872 + UserSlice::new(UserPtr::from_addr(sg_entry.sender_uaddr), sg_entry.length).reader(); 873 + for fixup in &mut sg_entry.pointer_fixups { 874 + let fixup_len = if fixup.skip == 0 { 875 + size_of::<u64>() 876 + } else { 877 + fixup.skip 878 + }; 879 + 880 + let target_offset_end = fixup.target_offset.checked_add(fixup_len).ok_or(EINVAL)?; 881 + if fixup.target_offset < end_of_previous_fixup || offset_end < target_offset_end { 882 + pr_warn!( 883 + "Fixups oob {} {} {} {}", 884 + fixup.target_offset, 885 + end_of_previous_fixup, 886 + offset_end, 887 + target_offset_end 888 + ); 889 + return Err(EINVAL.into()); 890 + } 891 + 892 + let copy_off = end_of_previous_fixup; 893 + let copy_len = fixup.target_offset - end_of_previous_fixup; 894 + if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) { 895 + pr_warn!("Failed copying into alloc: {:?}", err); 896 + return Err(err.into()); 897 + } 898 + if fixup.skip == 0 { 899 + let res = alloc.write::<u64>(fixup.target_offset, &fixup.pointer_value); 900 + if let Err(err) = res { 901 + pr_warn!("Failed copying ptr into alloc: {:?}", err); 902 + return Err(err.into()); 903 + } 904 + } 905 + if let Err(err) = reader.skip(fixup_len) { 906 + pr_warn!("Failed skipping {} from reader: {:?}", fixup_len, err); 907 + return Err(err.into()); 908 + } 909 + end_of_previous_fixup = target_offset_end; 910 + } 911 + let copy_off = end_of_previous_fixup; 912 + let copy_len = offset_end - end_of_previous_fixup; 913 + if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) { 914 + pr_warn!("Failed copying remainder into alloc: {:?}", err); 915 + return Err(err.into()); 916 + } 917 + } 918 + Ok(()) 919 + } 920 + 921 + /// This method copies the payload of a transaction into the target process. 922 + /// 923 + /// The resulting payload will have several different components, which will be stored next to 924 + /// each other in the allocation. Furthermore, various objects can be embedded in the payload, 925 + /// and those objects have to be translated so that they make sense to the target transaction. 926 + pub(crate) fn copy_transaction_data( 927 + &self, 928 + to_process: Arc<Process>, 929 + tr: &BinderTransactionDataSg, 930 + debug_id: usize, 931 + allow_fds: bool, 932 + txn_security_ctx_offset: Option<&mut usize>, 933 + ) -> BinderResult<NewAllocation> { 934 + let trd = &tr.transaction_data; 935 + let is_oneway = trd.flags & TF_ONE_WAY != 0; 936 + let mut secctx = if let Some(offset) = txn_security_ctx_offset { 937 + let secid = self.process.cred.get_secid(); 938 + let ctx = match security::SecurityCtx::from_secid(secid) { 939 + Ok(ctx) => ctx, 940 + Err(err) => { 941 + pr_warn!("Failed to get security ctx for id {}: {:?}", secid, err); 942 + return Err(err.into()); 943 + } 944 + }; 945 + Some((offset, ctx)) 946 + } else { 947 + None 948 + }; 949 + 950 + let data_size = trd.data_size.try_into().map_err(|_| EINVAL)?; 951 + let aligned_data_size = ptr_align(data_size).ok_or(EINVAL)?; 952 + let offsets_size = trd.offsets_size.try_into().map_err(|_| EINVAL)?; 953 + let aligned_offsets_size = ptr_align(offsets_size).ok_or(EINVAL)?; 954 + let buffers_size = tr.buffers_size.try_into().map_err(|_| EINVAL)?; 955 + let aligned_buffers_size = ptr_align(buffers_size).ok_or(EINVAL)?; 956 + let aligned_secctx_size = match secctx.as_ref() { 957 + Some((_offset, ctx)) => ptr_align(ctx.len()).ok_or(EINVAL)?, 958 + None => 0, 959 + }; 960 + 961 + // This guarantees that at least `sizeof(usize)` bytes will be allocated. 962 + let len = usize::max( 963 + aligned_data_size 964 + .checked_add(aligned_offsets_size) 965 + .and_then(|sum| sum.checked_add(aligned_buffers_size)) 966 + .and_then(|sum| sum.checked_add(aligned_secctx_size)) 967 + .ok_or(ENOMEM)?, 968 + size_of::<usize>(), 969 + ); 970 + let secctx_off = aligned_data_size + aligned_offsets_size + aligned_buffers_size; 971 + let mut alloc = 972 + match to_process.buffer_alloc(debug_id, len, is_oneway, self.process.task.pid()) { 973 + Ok(alloc) => alloc, 974 + Err(err) => { 975 + pr_warn!( 976 + "Failed to allocate buffer. len:{}, is_oneway:{}", 977 + len, 978 + is_oneway 979 + ); 980 + return Err(err); 981 + } 982 + }; 983 + 984 + // SAFETY: This accesses a union field, but it's okay because the field's type is valid for 985 + // all bit-patterns. 986 + let trd_data_ptr = unsafe { &trd.data.ptr }; 987 + let mut buffer_reader = 988 + UserSlice::new(UserPtr::from_addr(trd_data_ptr.buffer as _), data_size).reader(); 989 + let mut end_of_previous_object = 0; 990 + let mut sg_state = None; 991 + 992 + // Copy offsets if there are any. 993 + if offsets_size > 0 { 994 + { 995 + let mut reader = 996 + UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size) 997 + .reader(); 998 + alloc.copy_into(&mut reader, aligned_data_size, offsets_size)?; 999 + } 1000 + 1001 + let offsets_start = aligned_data_size; 1002 + let offsets_end = aligned_data_size + aligned_offsets_size; 1003 + 1004 + // This state is used for BINDER_TYPE_PTR objects. 1005 + let sg_state = sg_state.insert(ScatterGatherState { 1006 + unused_buffer_space: UnusedBufferSpace { 1007 + offset: offsets_end, 1008 + limit: len, 1009 + }, 1010 + sg_entries: KVec::new(), 1011 + ancestors: KVec::new(), 1012 + }); 1013 + 1014 + // Traverse the objects specified. 1015 + let mut view = AllocationView::new(&mut alloc, data_size); 1016 + for (index, index_offset) in (offsets_start..offsets_end) 1017 + .step_by(size_of::<usize>()) 1018 + .enumerate() 1019 + { 1020 + let offset = view.alloc.read(index_offset)?; 1021 + 1022 + if offset < end_of_previous_object { 1023 + pr_warn!("Got transaction with invalid offset."); 1024 + return Err(EINVAL.into()); 1025 + } 1026 + 1027 + // Copy data between two objects. 1028 + if end_of_previous_object < offset { 1029 + view.copy_into( 1030 + &mut buffer_reader, 1031 + end_of_previous_object, 1032 + offset - end_of_previous_object, 1033 + )?; 1034 + } 1035 + 1036 + let mut object = BinderObject::read_from(&mut buffer_reader)?; 1037 + 1038 + match self.translate_object( 1039 + index, 1040 + offset, 1041 + object.as_ref(), 1042 + &mut view, 1043 + allow_fds, 1044 + sg_state, 1045 + ) { 1046 + Ok(()) => end_of_previous_object = offset + object.size(), 1047 + Err(err) => { 1048 + pr_warn!("Error while translating object."); 1049 + return Err(err); 1050 + } 1051 + } 1052 + 1053 + // Update the indexes containing objects to clean up. 1054 + let offset_after_object = index_offset + size_of::<usize>(); 1055 + view.alloc 1056 + .set_info_offsets(offsets_start..offset_after_object); 1057 + } 1058 + } 1059 + 1060 + // Copy remaining raw data. 1061 + alloc.copy_into( 1062 + &mut buffer_reader, 1063 + end_of_previous_object, 1064 + data_size - end_of_previous_object, 1065 + )?; 1066 + 1067 + if let Some(sg_state) = sg_state.as_mut() { 1068 + if let Err(err) = self.apply_sg(&mut alloc, sg_state) { 1069 + pr_warn!("Failure in apply_sg: {:?}", err); 1070 + return Err(err); 1071 + } 1072 + } 1073 + 1074 + if let Some((off_out, secctx)) = secctx.as_mut() { 1075 + if let Err(err) = alloc.write(secctx_off, secctx.as_bytes()) { 1076 + pr_warn!("Failed to write security context: {:?}", err); 1077 + return Err(err.into()); 1078 + } 1079 + **off_out = secctx_off; 1080 + } 1081 + Ok(alloc) 1082 + } 1083 + 1084 + fn unwind_transaction_stack(self: &Arc<Self>) { 1085 + let mut thread = self.clone(); 1086 + while let Ok(transaction) = { 1087 + let mut inner = thread.inner.lock(); 1088 + inner.pop_transaction_to_reply(thread.as_ref()) 1089 + } { 1090 + let reply = Err(BR_DEAD_REPLY); 1091 + if !transaction.from.deliver_single_reply(reply, &transaction) { 1092 + break; 1093 + } 1094 + 1095 + thread = transaction.from.clone(); 1096 + } 1097 + } 1098 + 1099 + pub(crate) fn deliver_reply( 1100 + &self, 1101 + reply: Result<DLArc<Transaction>, u32>, 1102 + transaction: &DArc<Transaction>, 1103 + ) { 1104 + if self.deliver_single_reply(reply, transaction) { 1105 + transaction.from.unwind_transaction_stack(); 1106 + } 1107 + } 1108 + 1109 + /// Delivers a reply to the thread that started a transaction. The reply can either be a 1110 + /// reply-transaction or an error code to be delivered instead. 1111 + /// 1112 + /// Returns whether the thread is dead. If it is, the caller is expected to unwind the 1113 + /// transaction stack by completing transactions for threads that are dead. 1114 + fn deliver_single_reply( 1115 + &self, 1116 + reply: Result<DLArc<Transaction>, u32>, 1117 + transaction: &DArc<Transaction>, 1118 + ) -> bool { 1119 + if let Ok(transaction) = &reply { 1120 + transaction.set_outstanding(&mut self.process.inner.lock()); 1121 + } 1122 + 1123 + { 1124 + let mut inner = self.inner.lock(); 1125 + if !inner.pop_transaction_replied(transaction) { 1126 + return false; 1127 + } 1128 + 1129 + if inner.is_dead { 1130 + return true; 1131 + } 1132 + 1133 + match reply { 1134 + Ok(work) => { 1135 + inner.push_work(work); 1136 + } 1137 + Err(code) => inner.push_reply_work(code), 1138 + } 1139 + } 1140 + 1141 + // Notify the thread now that we've released the inner lock. 1142 + self.work_condvar.notify_sync(); 1143 + false 1144 + } 1145 + 1146 + /// Determines if the given transaction is the current transaction for this thread. 1147 + fn is_current_transaction(&self, transaction: &DArc<Transaction>) -> bool { 1148 + let inner = self.inner.lock(); 1149 + match &inner.current_transaction { 1150 + None => false, 1151 + Some(current) => Arc::ptr_eq(current, transaction), 1152 + } 1153 + } 1154 + 1155 + /// Determines the current top of the transaction stack. It fails if the top is in another 1156 + /// thread (i.e., this thread belongs to a stack but it has called another thread). The top is 1157 + /// [`None`] if the thread is not currently participating in a transaction stack. 1158 + fn top_of_transaction_stack(&self) -> Result<Option<DArc<Transaction>>> { 1159 + let inner = self.inner.lock(); 1160 + if let Some(cur) = &inner.current_transaction { 1161 + if core::ptr::eq(self, cur.from.as_ref()) { 1162 + pr_warn!("got new transaction with bad transaction stack"); 1163 + return Err(EINVAL); 1164 + } 1165 + Ok(Some(cur.clone())) 1166 + } else { 1167 + Ok(None) 1168 + } 1169 + } 1170 + 1171 + fn transaction<T>(self: &Arc<Self>, tr: &BinderTransactionDataSg, inner: T) 1172 + where 1173 + T: FnOnce(&Arc<Self>, &BinderTransactionDataSg) -> BinderResult, 1174 + { 1175 + if let Err(err) = inner(self, tr) { 1176 + if err.should_pr_warn() { 1177 + let mut ee = self.inner.lock().extended_error; 1178 + ee.command = err.reply; 1179 + ee.param = err.as_errno(); 1180 + pr_warn!( 1181 + "Transaction failed: {:?} my_pid:{}", 1182 + err, 1183 + self.process.pid_in_current_ns() 1184 + ); 1185 + } 1186 + 1187 + self.push_return_work(err.reply); 1188 + } 1189 + } 1190 + 1191 + fn transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult { 1192 + // SAFETY: Handle's type has no invalid bit patterns. 1193 + let handle = unsafe { tr.transaction_data.target.handle }; 1194 + let node_ref = self.process.get_transaction_node(handle)?; 1195 + security::binder_transaction(&self.process.cred, &node_ref.node.owner.cred)?; 1196 + // TODO: We need to ensure that there isn't a pending transaction in the work queue. How 1197 + // could this happen? 1198 + let top = self.top_of_transaction_stack()?; 1199 + let list_completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?; 1200 + let completion = list_completion.clone_arc(); 1201 + let transaction = Transaction::new(node_ref, top, self, tr)?; 1202 + 1203 + // Check that the transaction stack hasn't changed while the lock was released, then update 1204 + // it with the new transaction. 1205 + { 1206 + let mut inner = self.inner.lock(); 1207 + if !transaction.is_stacked_on(&inner.current_transaction) { 1208 + pr_warn!("Transaction stack changed during transaction!"); 1209 + return Err(EINVAL.into()); 1210 + } 1211 + inner.current_transaction = Some(transaction.clone_arc()); 1212 + // We push the completion as a deferred work so that we wait for the reply before 1213 + // returning to userland. 1214 + inner.push_work_deferred(list_completion); 1215 + } 1216 + 1217 + if let Err(e) = transaction.submit() { 1218 + completion.skip(); 1219 + // Define `transaction` first to drop it after `inner`. 1220 + let transaction; 1221 + let mut inner = self.inner.lock(); 1222 + transaction = inner.current_transaction.take().unwrap(); 1223 + inner.current_transaction = transaction.clone_next(); 1224 + Err(e) 1225 + } else { 1226 + Ok(()) 1227 + } 1228 + } 1229 + 1230 + fn reply_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult { 1231 + let orig = self.inner.lock().pop_transaction_to_reply(self)?; 1232 + if !orig.from.is_current_transaction(&orig) { 1233 + return Err(EINVAL.into()); 1234 + } 1235 + 1236 + // We need to complete the transaction even if we cannot complete building the reply. 1237 + let out = (|| -> BinderResult<_> { 1238 + let completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?; 1239 + let process = orig.from.process.clone(); 1240 + let allow_fds = orig.flags & TF_ACCEPT_FDS != 0; 1241 + let reply = Transaction::new_reply(self, process, tr, allow_fds)?; 1242 + self.inner.lock().push_work(completion); 1243 + orig.from.deliver_reply(Ok(reply), &orig); 1244 + Ok(()) 1245 + })() 1246 + .map_err(|mut err| { 1247 + // At this point we only return `BR_TRANSACTION_COMPLETE` to the caller, and we must let 1248 + // the sender know that the transaction has completed (with an error in this case). 1249 + pr_warn!( 1250 + "Failure {:?} during reply - delivering BR_FAILED_REPLY to sender.", 1251 + err 1252 + ); 1253 + let reply = Err(BR_FAILED_REPLY); 1254 + orig.from.deliver_reply(reply, &orig); 1255 + err.reply = BR_TRANSACTION_COMPLETE; 1256 + err 1257 + }); 1258 + 1259 + out 1260 + } 1261 + 1262 + fn oneway_transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult { 1263 + // SAFETY: The `handle` field is valid for all possible byte values, so reading from the 1264 + // union is okay. 1265 + let handle = unsafe { tr.transaction_data.target.handle }; 1266 + let node_ref = self.process.get_transaction_node(handle)?; 1267 + security::binder_transaction(&self.process.cred, &node_ref.node.owner.cred)?; 1268 + let transaction = Transaction::new(node_ref, None, self, tr)?; 1269 + let code = if self.process.is_oneway_spam_detection_enabled() 1270 + && transaction.oneway_spam_detected 1271 + { 1272 + BR_ONEWAY_SPAM_SUSPECT 1273 + } else { 1274 + BR_TRANSACTION_COMPLETE 1275 + }; 1276 + let list_completion = DTRWrap::arc_try_new(DeliverCode::new(code))?; 1277 + let completion = list_completion.clone_arc(); 1278 + self.inner.lock().push_work(list_completion); 1279 + match transaction.submit() { 1280 + Ok(()) => Ok(()), 1281 + Err(err) => { 1282 + completion.skip(); 1283 + Err(err) 1284 + } 1285 + } 1286 + } 1287 + 1288 + fn write(self: &Arc<Self>, req: &mut BinderWriteRead) -> Result { 1289 + let write_start = req.write_buffer.wrapping_add(req.write_consumed); 1290 + let write_len = req.write_size.saturating_sub(req.write_consumed); 1291 + let mut reader = 1292 + UserSlice::new(UserPtr::from_addr(write_start as _), write_len as _).reader(); 1293 + 1294 + while reader.len() >= size_of::<u32>() && self.inner.lock().return_work.is_unused() { 1295 + let before = reader.len(); 1296 + let cmd = reader.read::<u32>()?; 1297 + GLOBAL_STATS.inc_bc(cmd); 1298 + self.process.stats.inc_bc(cmd); 1299 + match cmd { 1300 + BC_TRANSACTION => { 1301 + let tr = reader.read::<BinderTransactionData>()?.with_buffers_size(0); 1302 + if tr.transaction_data.flags & TF_ONE_WAY != 0 { 1303 + self.transaction(&tr, Self::oneway_transaction_inner); 1304 + } else { 1305 + self.transaction(&tr, Self::transaction_inner); 1306 + } 1307 + } 1308 + BC_TRANSACTION_SG => { 1309 + let tr = reader.read::<BinderTransactionDataSg>()?; 1310 + if tr.transaction_data.flags & TF_ONE_WAY != 0 { 1311 + self.transaction(&tr, Self::oneway_transaction_inner); 1312 + } else { 1313 + self.transaction(&tr, Self::transaction_inner); 1314 + } 1315 + } 1316 + BC_REPLY => { 1317 + let tr = reader.read::<BinderTransactionData>()?.with_buffers_size(0); 1318 + self.transaction(&tr, Self::reply_inner) 1319 + } 1320 + BC_REPLY_SG => { 1321 + let tr = reader.read::<BinderTransactionDataSg>()?; 1322 + self.transaction(&tr, Self::reply_inner) 1323 + } 1324 + BC_FREE_BUFFER => { 1325 + let buffer = self.process.buffer_get(reader.read()?); 1326 + if let Some(buffer) = &buffer { 1327 + if buffer.looper_need_return_on_free() { 1328 + self.inner.lock().looper_need_return = true; 1329 + } 1330 + } 1331 + drop(buffer); 1332 + } 1333 + BC_INCREFS => { 1334 + self.process 1335 + .as_arc_borrow() 1336 + .update_ref(reader.read()?, true, false)? 1337 + } 1338 + BC_ACQUIRE => { 1339 + self.process 1340 + .as_arc_borrow() 1341 + .update_ref(reader.read()?, true, true)? 1342 + } 1343 + BC_RELEASE => { 1344 + self.process 1345 + .as_arc_borrow() 1346 + .update_ref(reader.read()?, false, true)? 1347 + } 1348 + BC_DECREFS => { 1349 + self.process 1350 + .as_arc_borrow() 1351 + .update_ref(reader.read()?, false, false)? 1352 + } 1353 + BC_INCREFS_DONE => self.process.inc_ref_done(&mut reader, false)?, 1354 + BC_ACQUIRE_DONE => self.process.inc_ref_done(&mut reader, true)?, 1355 + BC_REQUEST_DEATH_NOTIFICATION => self.process.request_death(&mut reader, self)?, 1356 + BC_CLEAR_DEATH_NOTIFICATION => self.process.clear_death(&mut reader, self)?, 1357 + BC_DEAD_BINDER_DONE => self.process.dead_binder_done(reader.read()?, self), 1358 + BC_REGISTER_LOOPER => { 1359 + let valid = self.process.register_thread(); 1360 + self.inner.lock().looper_register(valid); 1361 + } 1362 + BC_ENTER_LOOPER => self.inner.lock().looper_enter(), 1363 + BC_EXIT_LOOPER => self.inner.lock().looper_exit(), 1364 + BC_REQUEST_FREEZE_NOTIFICATION => self.process.request_freeze_notif(&mut reader)?, 1365 + BC_CLEAR_FREEZE_NOTIFICATION => self.process.clear_freeze_notif(&mut reader)?, 1366 + BC_FREEZE_NOTIFICATION_DONE => self.process.freeze_notif_done(&mut reader)?, 1367 + 1368 + // Fail if given an unknown error code. 1369 + // BC_ATTEMPT_ACQUIRE and BC_ACQUIRE_RESULT are no longer supported. 1370 + _ => return Err(EINVAL), 1371 + } 1372 + // Update the number of write bytes consumed. 1373 + req.write_consumed += (before - reader.len()) as u64; 1374 + } 1375 + 1376 + Ok(()) 1377 + } 1378 + 1379 + fn read(self: &Arc<Self>, req: &mut BinderWriteRead, wait: bool) -> Result { 1380 + let read_start = req.read_buffer.wrapping_add(req.read_consumed); 1381 + let read_len = req.read_size.saturating_sub(req.read_consumed); 1382 + let mut writer = BinderReturnWriter::new( 1383 + UserSlice::new(UserPtr::from_addr(read_start as _), read_len as _).writer(), 1384 + self, 1385 + ); 1386 + let (in_pool, use_proc_queue) = { 1387 + let inner = self.inner.lock(); 1388 + (inner.is_looper(), inner.should_use_process_work_queue()) 1389 + }; 1390 + 1391 + let getter = if use_proc_queue { 1392 + Self::get_work 1393 + } else { 1394 + Self::get_work_local 1395 + }; 1396 + 1397 + // Reserve some room at the beginning of the read buffer so that we can send a 1398 + // BR_SPAWN_LOOPER if we need to. 1399 + let mut has_noop_placeholder = false; 1400 + if req.read_consumed == 0 { 1401 + if let Err(err) = writer.write_code(BR_NOOP) { 1402 + pr_warn!("Failure when writing BR_NOOP at beginning of buffer."); 1403 + return Err(err); 1404 + } 1405 + has_noop_placeholder = true; 1406 + } 1407 + 1408 + // Loop doing work while there is room in the buffer. 1409 + let initial_len = writer.len(); 1410 + while writer.len() >= size_of::<uapi::binder_transaction_data_secctx>() + 4 { 1411 + match getter(self, wait && initial_len == writer.len()) { 1412 + Ok(Some(work)) => match work.into_arc().do_work(self, &mut writer) { 1413 + Ok(true) => {} 1414 + Ok(false) => break, 1415 + Err(err) => { 1416 + return Err(err); 1417 + } 1418 + }, 1419 + Ok(None) => { 1420 + break; 1421 + } 1422 + Err(err) => { 1423 + // Propagate the error if we haven't written anything else. 1424 + if err != EINTR && err != EAGAIN { 1425 + pr_warn!("Failure in work getter: {:?}", err); 1426 + } 1427 + if initial_len == writer.len() { 1428 + return Err(err); 1429 + } else { 1430 + break; 1431 + } 1432 + } 1433 + } 1434 + } 1435 + 1436 + req.read_consumed += read_len - writer.len() as u64; 1437 + 1438 + // Write BR_SPAWN_LOOPER if the process needs more threads for its pool. 1439 + if has_noop_placeholder && in_pool && self.process.needs_thread() { 1440 + let mut writer = 1441 + UserSlice::new(UserPtr::from_addr(req.read_buffer as _), req.read_size as _) 1442 + .writer(); 1443 + writer.write(&BR_SPAWN_LOOPER)?; 1444 + } 1445 + Ok(()) 1446 + } 1447 + 1448 + pub(crate) fn write_read(self: &Arc<Self>, data: UserSlice, wait: bool) -> Result { 1449 + let (mut reader, mut writer) = data.reader_writer(); 1450 + let mut req = reader.read::<BinderWriteRead>()?; 1451 + 1452 + // Go through the write buffer. 1453 + let mut ret = Ok(()); 1454 + if req.write_size > 0 { 1455 + ret = self.write(&mut req); 1456 + if let Err(err) = ret { 1457 + pr_warn!( 1458 + "Write failure {:?} in pid:{}", 1459 + err, 1460 + self.process.pid_in_current_ns() 1461 + ); 1462 + req.read_consumed = 0; 1463 + writer.write(&req)?; 1464 + self.inner.lock().looper_need_return = false; 1465 + return ret; 1466 + } 1467 + } 1468 + 1469 + // Go through the work queue. 1470 + if req.read_size > 0 { 1471 + ret = self.read(&mut req, wait); 1472 + if ret.is_err() && ret != Err(EINTR) { 1473 + pr_warn!( 1474 + "Read failure {:?} in pid:{}", 1475 + ret, 1476 + self.process.pid_in_current_ns() 1477 + ); 1478 + } 1479 + } 1480 + 1481 + // Write the request back so that the consumed fields are visible to the caller. 1482 + writer.write(&req)?; 1483 + 1484 + self.inner.lock().looper_need_return = false; 1485 + 1486 + ret 1487 + } 1488 + 1489 + pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> (bool, u32) { 1490 + table.register_wait(file, &self.work_condvar); 1491 + let mut inner = self.inner.lock(); 1492 + (inner.should_use_process_work_queue(), inner.poll()) 1493 + } 1494 + 1495 + /// Make the call to `get_work` or `get_work_local` return immediately, if any. 1496 + pub(crate) fn exit_looper(&self) { 1497 + let mut inner = self.inner.lock(); 1498 + let should_notify = inner.looper_flags & LOOPER_WAITING != 0; 1499 + if should_notify { 1500 + inner.looper_need_return = true; 1501 + } 1502 + drop(inner); 1503 + 1504 + if should_notify { 1505 + self.work_condvar.notify_one(); 1506 + } 1507 + } 1508 + 1509 + pub(crate) fn notify_if_poll_ready(&self, sync: bool) { 1510 + // Determine if we need to notify. This requires the lock. 1511 + let inner = self.inner.lock(); 1512 + let notify = inner.looper_flags & LOOPER_POLL != 0 && inner.should_use_process_work_queue(); 1513 + drop(inner); 1514 + 1515 + // Now that the lock is no longer held, notify the waiters if we have to. 1516 + if notify { 1517 + if sync { 1518 + self.work_condvar.notify_sync(); 1519 + } else { 1520 + self.work_condvar.notify_one(); 1521 + } 1522 + } 1523 + } 1524 + 1525 + pub(crate) fn release(self: &Arc<Self>) { 1526 + self.inner.lock().is_dead = true; 1527 + 1528 + //self.work_condvar.clear(); 1529 + self.unwind_transaction_stack(); 1530 + 1531 + // Cancel all pending work items. 1532 + while let Ok(Some(work)) = self.get_work_local(false) { 1533 + work.into_arc().cancel(); 1534 + } 1535 + } 1536 + } 1537 + 1538 + #[pin_data] 1539 + struct ThreadError { 1540 + error_code: AtomicU32, 1541 + #[pin] 1542 + links_track: AtomicTracker, 1543 + } 1544 + 1545 + impl ThreadError { 1546 + fn try_new() -> Result<DArc<Self>> { 1547 + DTRWrap::arc_pin_init(pin_init!(Self { 1548 + error_code: AtomicU32::new(BR_OK), 1549 + links_track <- AtomicTracker::new(), 1550 + })) 1551 + .map(ListArc::into_arc) 1552 + } 1553 + 1554 + fn set_error_code(&self, code: u32) { 1555 + self.error_code.store(code, Ordering::Relaxed); 1556 + } 1557 + 1558 + fn is_unused(&self) -> bool { 1559 + self.error_code.load(Ordering::Relaxed) == BR_OK 1560 + } 1561 + } 1562 + 1563 + impl DeliverToRead for ThreadError { 1564 + fn do_work( 1565 + self: DArc<Self>, 1566 + _thread: &Thread, 1567 + writer: &mut BinderReturnWriter<'_>, 1568 + ) -> Result<bool> { 1569 + let code = self.error_code.load(Ordering::Relaxed); 1570 + self.error_code.store(BR_OK, Ordering::Relaxed); 1571 + writer.write_code(code)?; 1572 + Ok(true) 1573 + } 1574 + 1575 + fn cancel(self: DArc<Self>) {} 1576 + 1577 + fn should_sync_wakeup(&self) -> bool { 1578 + false 1579 + } 1580 + 1581 + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { 1582 + seq_print!( 1583 + m, 1584 + "{}transaction error: {}\n", 1585 + prefix, 1586 + self.error_code.load(Ordering::Relaxed) 1587 + ); 1588 + Ok(()) 1589 + } 1590 + } 1591 + 1592 + kernel::list::impl_list_arc_safe! { 1593 + impl ListArcSafe<0> for ThreadError { 1594 + tracked_by links_track: AtomicTracker; 1595 + } 1596 + }

+16

drivers/android/binder/trace.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use kernel::ffi::{c_uint, c_ulong}; 6 + use kernel::tracepoint::declare_trace; 7 + 8 + declare_trace! { 9 + unsafe fn rust_binder_ioctl(cmd: c_uint, arg: c_ulong); 10 + } 11 + 12 + #[inline] 13 + pub(crate) fn trace_ioctl(cmd: u32, arg: usize) { 14 + // SAFETY: Always safe to call. 15 + unsafe { rust_binder_ioctl(cmd, arg as c_ulong) } 16 + }

+456

drivers/android/binder/transaction.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + // Copyright (C) 2025 Google LLC. 4 + 5 + use core::sync::atomic::{AtomicBool, Ordering}; 6 + use kernel::{ 7 + prelude::*, 8 + seq_file::SeqFile, 9 + seq_print, 10 + sync::{Arc, SpinLock}, 11 + task::Kuid, 12 + time::{Instant, Monotonic}, 13 + types::ScopeGuard, 14 + }; 15 + 16 + use crate::{ 17 + allocation::{Allocation, TranslatedFds}, 18 + defs::*, 19 + error::{BinderError, BinderResult}, 20 + node::{Node, NodeRef}, 21 + process::{Process, ProcessInner}, 22 + ptr_align, 23 + thread::{PushWorkRes, Thread}, 24 + BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead, 25 + }; 26 + 27 + #[pin_data(PinnedDrop)] 28 + pub(crate) struct Transaction { 29 + pub(crate) debug_id: usize, 30 + target_node: Option<DArc<Node>>, 31 + pub(crate) from_parent: Option<DArc<Transaction>>, 32 + pub(crate) from: Arc<Thread>, 33 + pub(crate) to: Arc<Process>, 34 + #[pin] 35 + allocation: SpinLock<Option<Allocation>>, 36 + is_outstanding: AtomicBool, 37 + code: u32, 38 + pub(crate) flags: u32, 39 + data_size: usize, 40 + offsets_size: usize, 41 + data_address: usize, 42 + sender_euid: Kuid, 43 + txn_security_ctx_off: Option<usize>, 44 + pub(crate) oneway_spam_detected: bool, 45 + start_time: Instant<Monotonic>, 46 + } 47 + 48 + kernel::list::impl_list_arc_safe! { 49 + impl ListArcSafe<0> for Transaction { untracked; } 50 + } 51 + 52 + impl Transaction { 53 + pub(crate) fn new( 54 + node_ref: NodeRef, 55 + from_parent: Option<DArc<Transaction>>, 56 + from: &Arc<Thread>, 57 + tr: &BinderTransactionDataSg, 58 + ) -> BinderResult<DLArc<Self>> { 59 + let debug_id = super::next_debug_id(); 60 + let trd = &tr.transaction_data; 61 + let allow_fds = node_ref.node.flags & FLAT_BINDER_FLAG_ACCEPTS_FDS != 0; 62 + let txn_security_ctx = node_ref.node.flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX != 0; 63 + let mut txn_security_ctx_off = if txn_security_ctx { Some(0) } else { None }; 64 + let to = node_ref.node.owner.clone(); 65 + let mut alloc = match from.copy_transaction_data( 66 + to.clone(), 67 + tr, 68 + debug_id, 69 + allow_fds, 70 + txn_security_ctx_off.as_mut(), 71 + ) { 72 + Ok(alloc) => alloc, 73 + Err(err) => { 74 + if !err.is_dead() { 75 + pr_warn!("Failure in copy_transaction_data: {:?}", err); 76 + } 77 + return Err(err); 78 + } 79 + }; 80 + let oneway_spam_detected = alloc.oneway_spam_detected; 81 + if trd.flags & TF_ONE_WAY != 0 { 82 + if from_parent.is_some() { 83 + pr_warn!("Oneway transaction should not be in a transaction stack."); 84 + return Err(EINVAL.into()); 85 + } 86 + alloc.set_info_oneway_node(node_ref.node.clone()); 87 + } 88 + if trd.flags & TF_CLEAR_BUF != 0 { 89 + alloc.set_info_clear_on_drop(); 90 + } 91 + let target_node = node_ref.node.clone(); 92 + alloc.set_info_target_node(node_ref); 93 + let data_address = alloc.ptr; 94 + 95 + Ok(DTRWrap::arc_pin_init(pin_init!(Transaction { 96 + debug_id, 97 + target_node: Some(target_node), 98 + from_parent, 99 + sender_euid: from.process.task.euid(), 100 + from: from.clone(), 101 + to, 102 + code: trd.code, 103 + flags: trd.flags, 104 + data_size: trd.data_size as _, 105 + offsets_size: trd.offsets_size as _, 106 + data_address, 107 + allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"), 108 + is_outstanding: AtomicBool::new(false), 109 + txn_security_ctx_off, 110 + oneway_spam_detected, 111 + start_time: Instant::now(), 112 + }))?) 113 + } 114 + 115 + pub(crate) fn new_reply( 116 + from: &Arc<Thread>, 117 + to: Arc<Process>, 118 + tr: &BinderTransactionDataSg, 119 + allow_fds: bool, 120 + ) -> BinderResult<DLArc<Self>> { 121 + let debug_id = super::next_debug_id(); 122 + let trd = &tr.transaction_data; 123 + let mut alloc = match from.copy_transaction_data(to.clone(), tr, debug_id, allow_fds, None) 124 + { 125 + Ok(alloc) => alloc, 126 + Err(err) => { 127 + pr_warn!("Failure in copy_transaction_data: {:?}", err); 128 + return Err(err); 129 + } 130 + }; 131 + let oneway_spam_detected = alloc.oneway_spam_detected; 132 + if trd.flags & TF_CLEAR_BUF != 0 { 133 + alloc.set_info_clear_on_drop(); 134 + } 135 + Ok(DTRWrap::arc_pin_init(pin_init!(Transaction { 136 + debug_id, 137 + target_node: None, 138 + from_parent: None, 139 + sender_euid: from.process.task.euid(), 140 + from: from.clone(), 141 + to, 142 + code: trd.code, 143 + flags: trd.flags, 144 + data_size: trd.data_size as _, 145 + offsets_size: trd.offsets_size as _, 146 + data_address: alloc.ptr, 147 + allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"), 148 + is_outstanding: AtomicBool::new(false), 149 + txn_security_ctx_off: None, 150 + oneway_spam_detected, 151 + start_time: Instant::now(), 152 + }))?) 153 + } 154 + 155 + #[inline(never)] 156 + pub(crate) fn debug_print_inner(&self, m: &SeqFile, prefix: &str) { 157 + seq_print!( 158 + m, 159 + "{}{}: from {}:{} to {} code {:x} flags {:x} elapsed {}ms", 160 + prefix, 161 + self.debug_id, 162 + self.from.process.task.pid(), 163 + self.from.id, 164 + self.to.task.pid(), 165 + self.code, 166 + self.flags, 167 + self.start_time.elapsed().as_millis(), 168 + ); 169 + if let Some(target_node) = &self.target_node { 170 + seq_print!(m, " node {}", target_node.debug_id); 171 + } 172 + seq_print!(m, " size {}:{}\n", self.data_size, self.offsets_size); 173 + } 174 + 175 + /// Determines if the transaction is stacked on top of the given transaction. 176 + pub(crate) fn is_stacked_on(&self, onext: &Option<DArc<Self>>) -> bool { 177 + match (&self.from_parent, onext) { 178 + (None, None) => true, 179 + (Some(from_parent), Some(next)) => Arc::ptr_eq(from_parent, next), 180 + _ => false, 181 + } 182 + } 183 + 184 + /// Returns a pointer to the next transaction on the transaction stack, if there is one. 185 + pub(crate) fn clone_next(&self) -> Option<DArc<Self>> { 186 + Some(self.from_parent.as_ref()?.clone()) 187 + } 188 + 189 + /// Searches in the transaction stack for a thread that belongs to the target process. This is 190 + /// useful when finding a target for a new transaction: if the node belongs to a process that 191 + /// is already part of the transaction stack, we reuse the thread. 192 + fn find_target_thread(&self) -> Option<Arc<Thread>> { 193 + let mut it = &self.from_parent; 194 + while let Some(transaction) = it { 195 + if Arc::ptr_eq(&transaction.from.process, &self.to) { 196 + return Some(transaction.from.clone()); 197 + } 198 + it = &transaction.from_parent; 199 + } 200 + None 201 + } 202 + 203 + /// Searches in the transaction stack for a transaction originating at the given thread. 204 + pub(crate) fn find_from(&self, thread: &Thread) -> Option<&DArc<Transaction>> { 205 + let mut it = &self.from_parent; 206 + while let Some(transaction) = it { 207 + if core::ptr::eq(thread, transaction.from.as_ref()) { 208 + return Some(transaction); 209 + } 210 + 211 + it = &transaction.from_parent; 212 + } 213 + None 214 + } 215 + 216 + pub(crate) fn set_outstanding(&self, to_process: &mut ProcessInner) { 217 + // No race because this method is only called once. 218 + if !self.is_outstanding.load(Ordering::Relaxed) { 219 + self.is_outstanding.store(true, Ordering::Relaxed); 220 + to_process.add_outstanding_txn(); 221 + } 222 + } 223 + 224 + /// Decrement `outstanding_txns` in `to` if it hasn't already been decremented. 225 + fn drop_outstanding_txn(&self) { 226 + // No race because this is called at most twice, and one of the calls are in the 227 + // destructor, which is guaranteed to not race with any other operations on the 228 + // transaction. It also cannot race with `set_outstanding`, since submission happens 229 + // before delivery. 230 + if self.is_outstanding.load(Ordering::Relaxed) { 231 + self.is_outstanding.store(false, Ordering::Relaxed); 232 + self.to.drop_outstanding_txn(); 233 + } 234 + } 235 + 236 + /// Submits the transaction to a work queue. Uses a thread if there is one in the transaction 237 + /// stack, otherwise uses the destination process. 238 + /// 239 + /// Not used for replies. 240 + pub(crate) fn submit(self: DLArc<Self>) -> BinderResult { 241 + // Defined before `process_inner` so that the destructor runs after releasing the lock. 242 + let mut _t_outdated; 243 + 244 + let oneway = self.flags & TF_ONE_WAY != 0; 245 + let process = self.to.clone(); 246 + let mut process_inner = process.inner.lock(); 247 + 248 + self.set_outstanding(&mut process_inner); 249 + 250 + if oneway { 251 + if let Some(target_node) = self.target_node.clone() { 252 + if process_inner.is_frozen { 253 + process_inner.async_recv = true; 254 + if self.flags & TF_UPDATE_TXN != 0 { 255 + if let Some(t_outdated) = 256 + target_node.take_outdated_transaction(&self, &mut process_inner) 257 + { 258 + // Save the transaction to be dropped after locks are released. 259 + _t_outdated = t_outdated; 260 + } 261 + } 262 + } 263 + match target_node.submit_oneway(self, &mut process_inner) { 264 + Ok(()) => {} 265 + Err((err, work)) => { 266 + drop(process_inner); 267 + // Drop work after releasing process lock. 268 + drop(work); 269 + return Err(err); 270 + } 271 + } 272 + 273 + if process_inner.is_frozen { 274 + return Err(BinderError::new_frozen_oneway()); 275 + } else { 276 + return Ok(()); 277 + } 278 + } else { 279 + pr_err!("Failed to submit oneway transaction to node."); 280 + } 281 + } 282 + 283 + if process_inner.is_frozen { 284 + process_inner.sync_recv = true; 285 + return Err(BinderError::new_frozen()); 286 + } 287 + 288 + let res = if let Some(thread) = self.find_target_thread() { 289 + match thread.push_work(self) { 290 + PushWorkRes::Ok => Ok(()), 291 + PushWorkRes::FailedDead(me) => Err((BinderError::new_dead(), me)), 292 + } 293 + } else { 294 + process_inner.push_work(self) 295 + }; 296 + drop(process_inner); 297 + 298 + match res { 299 + Ok(()) => Ok(()), 300 + Err((err, work)) => { 301 + // Drop work after releasing process lock. 302 + drop(work); 303 + Err(err) 304 + } 305 + } 306 + } 307 + 308 + /// Check whether one oneway transaction can supersede another. 309 + pub(crate) fn can_replace(&self, old: &Transaction) -> bool { 310 + if self.from.process.task.pid() != old.from.process.task.pid() { 311 + return false; 312 + } 313 + 314 + if self.flags & old.flags & (TF_ONE_WAY | TF_UPDATE_TXN) != (TF_ONE_WAY | TF_UPDATE_TXN) { 315 + return false; 316 + } 317 + 318 + let target_node_match = match (self.target_node.as_ref(), old.target_node.as_ref()) { 319 + (None, None) => true, 320 + (Some(tn1), Some(tn2)) => Arc::ptr_eq(tn1, tn2), 321 + _ => false, 322 + }; 323 + 324 + self.code == old.code && self.flags == old.flags && target_node_match 325 + } 326 + 327 + fn prepare_file_list(&self) -> Result<TranslatedFds> { 328 + let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?; 329 + 330 + match alloc.translate_fds() { 331 + Ok(translated) => { 332 + *self.allocation.lock() = Some(alloc); 333 + Ok(translated) 334 + } 335 + Err(err) => { 336 + // Free the allocation eagerly. 337 + drop(alloc); 338 + Err(err) 339 + } 340 + } 341 + } 342 + } 343 + 344 + impl DeliverToRead for Transaction { 345 + fn do_work( 346 + self: DArc<Self>, 347 + thread: &Thread, 348 + writer: &mut BinderReturnWriter<'_>, 349 + ) -> Result<bool> { 350 + let send_failed_reply = ScopeGuard::new(|| { 351 + if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 { 352 + let reply = Err(BR_FAILED_REPLY); 353 + self.from.deliver_reply(reply, &self); 354 + } 355 + self.drop_outstanding_txn(); 356 + }); 357 + 358 + let files = if let Ok(list) = self.prepare_file_list() { 359 + list 360 + } else { 361 + // On failure to process the list, we send a reply back to the sender and ignore the 362 + // transaction on the recipient. 363 + return Ok(true); 364 + }; 365 + 366 + let mut tr_sec = BinderTransactionDataSecctx::default(); 367 + let tr = tr_sec.tr_data(); 368 + if let Some(target_node) = &self.target_node { 369 + let (ptr, cookie) = target_node.get_id(); 370 + tr.target.ptr = ptr as _; 371 + tr.cookie = cookie as _; 372 + }; 373 + tr.code = self.code; 374 + tr.flags = self.flags; 375 + tr.data_size = self.data_size as _; 376 + tr.data.ptr.buffer = self.data_address as _; 377 + tr.offsets_size = self.offsets_size as _; 378 + if tr.offsets_size > 0 { 379 + tr.data.ptr.offsets = (self.data_address + ptr_align(self.data_size).unwrap()) as _; 380 + } 381 + tr.sender_euid = self.sender_euid.into_uid_in_current_ns(); 382 + tr.sender_pid = 0; 383 + if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 { 384 + // Not a reply and not one-way. 385 + tr.sender_pid = self.from.process.pid_in_current_ns(); 386 + } 387 + let code = if self.target_node.is_none() { 388 + BR_REPLY 389 + } else if self.txn_security_ctx_off.is_some() { 390 + BR_TRANSACTION_SEC_CTX 391 + } else { 392 + BR_TRANSACTION 393 + }; 394 + 395 + // Write the transaction code and data to the user buffer. 396 + writer.write_code(code)?; 397 + if let Some(off) = self.txn_security_ctx_off { 398 + tr_sec.secctx = (self.data_address + off) as u64; 399 + writer.write_payload(&tr_sec)?; 400 + } else { 401 + writer.write_payload(&*tr)?; 402 + } 403 + 404 + let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?; 405 + 406 + // Dismiss the completion of transaction with a failure. No failure paths are allowed from 407 + // here on out. 408 + send_failed_reply.dismiss(); 409 + 410 + // Commit files, and set FDs in FDA to be closed on buffer free. 411 + let close_on_free = files.commit(); 412 + alloc.set_info_close_on_free(close_on_free); 413 + 414 + // It is now the user's responsibility to clear the allocation. 415 + alloc.keep_alive(); 416 + 417 + self.drop_outstanding_txn(); 418 + 419 + // When this is not a reply and not a oneway transaction, update `current_transaction`. If 420 + // it's a reply, `current_transaction` has already been updated appropriately. 421 + if self.target_node.is_some() && tr_sec.transaction_data.flags & TF_ONE_WAY == 0 { 422 + thread.set_current_transaction(self); 423 + } 424 + 425 + Ok(false) 426 + } 427 + 428 + fn cancel(self: DArc<Self>) { 429 + let allocation = self.allocation.lock().take(); 430 + drop(allocation); 431 + 432 + // If this is not a reply or oneway transaction, then send a dead reply. 433 + if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 { 434 + let reply = Err(BR_DEAD_REPLY); 435 + self.from.deliver_reply(reply, &self); 436 + } 437 + 438 + self.drop_outstanding_txn(); 439 + } 440 + 441 + fn should_sync_wakeup(&self) -> bool { 442 + self.flags & TF_ONE_WAY == 0 443 + } 444 + 445 + fn debug_print(&self, m: &SeqFile, _prefix: &str, tprefix: &str) -> Result<()> { 446 + self.debug_print_inner(m, tprefix); 447 + Ok(()) 448 + } 449 + } 450 + 451 + #[pinned_drop] 452 + impl PinnedDrop for Transaction { 453 + fn drop(self: Pin<&mut Self>) { 454 + self.drop_outstanding_txn(); 455 + } 456 + }

+1 -1

include/uapi/linux/android/binder.h

··· 38 38 BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), 39 39 }; 40 40 41 - enum { 41 + enum flat_binder_object_flags { 42 42 FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff, 43 43 FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100, 44 44

+8

rust/bindings/bindings_helper.h

··· 50 50 #include <linux/dma-mapping.h> 51 51 #include <linux/errname.h> 52 52 #include <linux/ethtool.h> 53 + #include <linux/fdtable.h> 53 54 #include <linux/file.h> 54 55 #include <linux/firmware.h> 55 56 #include <linux/fs.h> ··· 72 71 #include <linux/sched.h> 73 72 #include <linux/security.h> 74 73 #include <linux/slab.h> 74 + #include <linux/task_work.h> 75 75 #include <linux/tracepoint.h> 76 76 #include <linux/wait.h> 77 77 #include <linux/workqueue.h> ··· 101 99 102 100 const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC; 103 101 const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1; 102 + 103 + #if IS_ENABLED(CONFIG_ANDROID_BINDER_IPC_RUST) 104 + #include "../../drivers/android/binder/rust_binder.h" 105 + #include "../../drivers/android/binder/rust_binder_events.h" 106 + #include "../../drivers/android/binder/page_range_helper.h" 107 + #endif

+26

rust/helpers/binder.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright (C) 2025 Google LLC. 5 + */ 6 + 7 + #include <linux/list_lru.h> 8 + #include <linux/task_work.h> 9 + 10 + unsigned long rust_helper_list_lru_count(struct list_lru *lru) 11 + { 12 + return list_lru_count(lru); 13 + } 14 + 15 + unsigned long rust_helper_list_lru_walk(struct list_lru *lru, 16 + list_lru_walk_cb isolate, void *cb_arg, 17 + unsigned long nr_to_walk) 18 + { 19 + return list_lru_walk(lru, isolate, cb_arg, nr_to_walk); 20 + } 21 + 22 + void rust_helper_init_task_work(struct callback_head *twork, 23 + task_work_func_t func) 24 + { 25 + init_task_work(twork, func); 26 + }

+1

rust/helpers/helpers.c

··· 8 8 */ 9 9 10 10 #include "auxiliary.c" 11 + #include "binder.c" 11 12 #include "blk.c" 12 13 #include "bug.c" 13 14 #include "build_assert.c"

+8

rust/helpers/page.c

··· 2 2 3 3 #include <linux/gfp.h> 4 4 #include <linux/highmem.h> 5 + #include <linux/mm.h> 5 6 6 7 struct page *rust_helper_alloc_pages(gfp_t gfp_mask, unsigned int order) 7 8 { ··· 18 17 { 19 18 kunmap_local(addr); 20 19 } 20 + 21 + #ifndef NODE_NOT_IN_PAGE_FLAGS 22 + int rust_helper_page_to_nid(const struct page *page) 23 + { 24 + return page_to_nid(page); 25 + } 26 + #endif

+24

rust/helpers/security.c

··· 17 17 { 18 18 security_release_secctx(cp); 19 19 } 20 + 21 + int rust_helper_security_binder_set_context_mgr(const struct cred *mgr) 22 + { 23 + return security_binder_set_context_mgr(mgr); 24 + } 25 + 26 + int rust_helper_security_binder_transaction(const struct cred *from, 27 + const struct cred *to) 28 + { 29 + return security_binder_transaction(from, to); 30 + } 31 + 32 + int rust_helper_security_binder_transfer_binder(const struct cred *from, 33 + const struct cred *to) 34 + { 35 + return security_binder_transfer_binder(from, to); 36 + } 37 + 38 + int rust_helper_security_binder_transfer_file(const struct cred *from, 39 + const struct cred *to, 40 + const struct file *file) 41 + { 42 + return security_binder_transfer_file(from, to, file); 43 + } 20 44 #endif

+6

rust/kernel/cred.rs

··· 54 54 unsafe { &*ptr.cast() } 55 55 } 56 56 57 + /// Returns a raw pointer to the inner credential. 58 + #[inline] 59 + pub fn as_ptr(&self) -> *const bindings::cred { 60 + self.0.get() 61 + } 62 + 57 63 /// Get the id for this security context. 58 64 #[inline] 59 65 pub fn get_secid(&self) -> u32 {

+6

rust/kernel/page.rs

··· 85 85 self.page.as_ptr() 86 86 } 87 87 88 + /// Get the node id containing this page. 89 + pub fn nid(&self) -> i32 { 90 + // SAFETY: Always safe to call with a valid page. 91 + unsafe { bindings::page_to_nid(self.as_ptr()) } 92 + } 93 + 88 94 /// Runs a piece of code with this page mapped to an address. 89 95 /// 90 96 /// The page is unmapped when this call returns.

+37

rust/kernel/security.rs

··· 8 8 9 9 use crate::{ 10 10 bindings, 11 + cred::Credential, 11 12 error::{to_result, Result}, 13 + fs::File, 12 14 }; 15 + 16 + /// Calls the security modules to determine if the given task can become the manager of a binder 17 + /// context. 18 + #[inline] 19 + pub fn binder_set_context_mgr(mgr: &Credential) -> Result { 20 + // SAFETY: `mrg.0` is valid because the shared reference guarantees a nonzero refcount. 21 + to_result(unsafe { bindings::security_binder_set_context_mgr(mgr.as_ptr()) }) 22 + } 23 + 24 + /// Calls the security modules to determine if binder transactions are allowed from task `from` to 25 + /// task `to`. 26 + #[inline] 27 + pub fn binder_transaction(from: &Credential, to: &Credential) -> Result { 28 + // SAFETY: `from` and `to` are valid because the shared references guarantee nonzero refcounts. 29 + to_result(unsafe { bindings::security_binder_transaction(from.as_ptr(), to.as_ptr()) }) 30 + } 31 + 32 + /// Calls the security modules to determine if task `from` is allowed to send binder objects 33 + /// (owned by itself or other processes) to task `to` through a binder transaction. 34 + #[inline] 35 + pub fn binder_transfer_binder(from: &Credential, to: &Credential) -> Result { 36 + // SAFETY: `from` and `to` are valid because the shared references guarantee nonzero refcounts. 37 + to_result(unsafe { bindings::security_binder_transfer_binder(from.as_ptr(), to.as_ptr()) }) 38 + } 39 + 40 + /// Calls the security modules to determine if task `from` is allowed to send the given file to 41 + /// task `to` (which would get its own file descriptor) through a binder transaction. 42 + #[inline] 43 + pub fn binder_transfer_file(from: &Credential, to: &Credential, file: &File) -> Result { 44 + // SAFETY: `from`, `to` and `file` are valid because the shared references guarantee nonzero 45 + // refcounts. 46 + to_result(unsafe { 47 + bindings::security_binder_transfer_file(from.as_ptr(), to.as_ptr(), file.as_ptr()) 48 + }) 49 + } 13 50 14 51 /// A security context string. 15 52 ///

+1

rust/uapi/uapi_helper.h

··· 9 9 #include <uapi/asm-generic/ioctl.h> 10 10 #include <uapi/drm/drm.h> 11 11 #include <uapi/drm/nova_drm.h> 12 + #include <uapi/linux/android/binder.h> 12 13 #include <uapi/linux/mdio.h> 13 14 #include <uapi/linux/mii.h> 14 15 #include <uapi/linux/ethtool.h>