Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

binder: tell userspace to dump current backtrace when detected oneway spamming

When async binder buffer got exhausted, some normal oneway transactions
will also be discarded and may cause system or application failures. By
that time, the binder debug information we dump may not be relevant to
the root cause. And this issue is difficult to debug if without the
backtrace of the thread sending spam.

This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
spamming is detected, request to dump current backtrace. Oneway spamming
will be reported only once when exceeding the threshold (target process
dips below 80% of its oneway space, and current process is responsible for
either more than 50 transactions, or more than 50% of the oneway space).
And the detection will restart when the async buffer has returned to a
healthy state.

Acked-by: Todd Kjos <tkjos@google.com>
Signed-off-by: Hang Lu <hangl@codeaurora.org>
Link: https://lore.kernel.org/r/1617961246-4502-3-git-send-email-hangl@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Hang Lu and committed by
Greg Kroah-Hartman
a7dc1e6f 00516915

+56 -8
+24 -3
drivers/android/binder.c
··· 3020 3020 goto err_bad_object_type; 3021 3021 } 3022 3022 } 3023 - tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; 3023 + if (t->buffer->oneway_spam_suspect) 3024 + tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; 3025 + else 3026 + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; 3024 3027 t->work.type = BINDER_WORK_TRANSACTION; 3025 3028 3026 3029 if (reply) { ··· 3896 3893 3897 3894 binder_stat_br(proc, thread, cmd); 3898 3895 } break; 3899 - case BINDER_WORK_TRANSACTION_COMPLETE: { 3896 + case BINDER_WORK_TRANSACTION_COMPLETE: 3897 + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: { 3898 + if (proc->oneway_spam_detection_enabled && 3899 + w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT) 3900 + cmd = BR_ONEWAY_SPAM_SUSPECT; 3901 + else 3902 + cmd = BR_TRANSACTION_COMPLETE; 3900 3903 binder_inner_proc_unlock(proc); 3901 - cmd = BR_TRANSACTION_COMPLETE; 3902 3904 kfree(w); 3903 3905 binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); 3904 3906 if (put_user(cmd, (uint32_t __user *)ptr)) ··· 4905 4897 } 4906 4898 break; 4907 4899 } 4900 + case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: { 4901 + uint32_t enable; 4902 + 4903 + if (copy_from_user(&enable, ubuf, sizeof(enable))) { 4904 + ret = -EINVAL; 4905 + goto err; 4906 + } 4907 + binder_inner_proc_lock(proc); 4908 + proc->oneway_spam_detection_enabled = (bool)enable; 4909 + binder_inner_proc_unlock(proc); 4910 + break; 4911 + } 4908 4912 default: 4909 4913 ret = -EINVAL; 4910 4914 goto err; ··· 5581 5561 "BR_CLEAR_DEATH_NOTIFICATION_DONE", 5582 5562 "BR_FAILED_REPLY", 5583 5563 "BR_FROZEN_REPLY", 5564 + "BR_ONEWAY_SPAM_SUSPECT", 5584 5565 }; 5585 5566 5586 5567 static const char * const binder_command_strings[] = {
+12 -3
drivers/android/binder_alloc.c
··· 338 338 return vma; 339 339 } 340 340 341 - static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid) 341 + static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid) 342 342 { 343 343 /* 344 344 * Find the amount and size of buffers allocated by the current caller; ··· 366 366 367 367 /* 368 368 * Warn if this pid has more than 50 transactions, or more than 50% of 369 - * async space (which is 25% of total buffer size). 369 + * async space (which is 25% of total buffer size). Oneway spam is only 370 + * detected when the threshold is exceeded. 370 371 */ 371 372 if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) { 372 373 binder_alloc_debug(BINDER_DEBUG_USER_ERROR, 373 374 "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n", 374 375 alloc->pid, pid, num_buffers, total_alloc_size); 376 + if (!alloc->oneway_spam_detected) { 377 + alloc->oneway_spam_detected = true; 378 + return true; 379 + } 375 380 } 381 + return false; 376 382 } 377 383 378 384 static struct binder_buffer *binder_alloc_new_buf_locked( ··· 531 525 buffer->async_transaction = is_async; 532 526 buffer->extra_buffers_size = extra_buffers_size; 533 527 buffer->pid = pid; 528 + buffer->oneway_spam_suspect = false; 534 529 if (is_async) { 535 530 alloc->free_async_space -= size + sizeof(struct binder_buffer); 536 531 binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, ··· 543 536 * of async space left (which is less than 10% of total 544 537 * buffer size). 545 538 */ 546 - debug_low_async_space_locked(alloc, pid); 539 + buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid); 540 + } else { 541 + alloc->oneway_spam_detected = false; 547 542 } 548 543 } 549 544 return buffer;
+7 -1
drivers/android/binder_alloc.h
··· 26 26 * @clear_on_free: %true if buffer must be zeroed after use 27 27 * @allow_user_free: %true if user is allowed to free buffer 28 28 * @async_transaction: %true if buffer is in use for an async txn 29 + * @oneway_spam_suspect: %true if total async allocate size just exceed 30 + * spamming detect threshold 29 31 * @debug_id: unique ID for debugging 30 32 * @transaction: pointer to associated struct binder_transaction 31 33 * @target_node: struct binder_node associated with this buffer ··· 47 45 unsigned clear_on_free:1; 48 46 unsigned allow_user_free:1; 49 47 unsigned async_transaction:1; 50 - unsigned debug_id:28; 48 + unsigned oneway_spam_suspect:1; 49 + unsigned debug_id:27; 51 50 52 51 struct binder_transaction *transaction; 53 52 ··· 90 87 * @buffer_size: size of address space specified via mmap 91 88 * @pid: pid for associated binder_proc (invariant after init) 92 89 * @pages_high: high watermark of offset in @pages 90 + * @oneway_spam_detected: %true if oneway spam detection fired, clear that 91 + * flag once the async buffer has returned to a healthy state 93 92 * 94 93 * Bookkeeping structure for per-proc address space management for binder 95 94 * buffers. It is normally initialized during binder_init() and binder_mmap() ··· 112 107 uint32_t buffer_free; 113 108 int pid; 114 109 size_t pages_high; 110 + bool oneway_spam_detected; 115 111 }; 116 112 117 113 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
+5 -1
drivers/android/binder_internal.h
··· 155 155 }; 156 156 157 157 struct binder_stats { 158 - atomic_t br[_IOC_NR(BR_FROZEN_REPLY) + 1]; 158 + atomic_t br[_IOC_NR(BR_ONEWAY_SPAM_SUSPECT) + 1]; 159 159 atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1]; 160 160 atomic_t obj_created[BINDER_STAT_COUNT]; 161 161 atomic_t obj_deleted[BINDER_STAT_COUNT]; ··· 174 174 enum binder_work_type { 175 175 BINDER_WORK_TRANSACTION = 1, 176 176 BINDER_WORK_TRANSACTION_COMPLETE, 177 + BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT, 177 178 BINDER_WORK_RETURN_ERROR, 178 179 BINDER_WORK_NODE, 179 180 BINDER_WORK_DEAD_BINDER, ··· 410 409 * @outer_lock: no nesting under innor or node lock 411 410 * Lock order: 1) outer, 2) node, 3) inner 412 411 * @binderfs_entry: process-specific binderfs log file 412 + * @oneway_spam_detection_enabled: process enabled oneway spam detection 413 + * or not 413 414 * 414 415 * Bookkeeping structure for binder processes 415 416 */ ··· 447 444 spinlock_t inner_lock; 448 445 spinlock_t outer_lock; 449 446 struct dentry *binderfs_entry; 447 + bool oneway_spam_detection_enabled; 450 448 }; 451 449 452 450 /**
+8
include/uapi/linux/android/binder.h
··· 241 241 #define BINDER_SET_CONTEXT_MGR_EXT _IOW('b', 13, struct flat_binder_object) 242 242 #define BINDER_FREEZE _IOW('b', 14, struct binder_freeze_info) 243 243 #define BINDER_GET_FROZEN_INFO _IOWR('b', 15, struct binder_frozen_status_info) 244 + #define BINDER_ENABLE_ONEWAY_SPAM_DETECTION _IOW('b', 16, __u32) 244 245 245 246 /* 246 247 * NOTE: Two special error codes you should check for when calling ··· 428 427 /* 429 428 * The target of the last transaction (either a bcTRANSACTION or 430 429 * a bcATTEMPT_ACQUIRE) is frozen. No parameters. 430 + */ 431 + 432 + BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19), 433 + /* 434 + * Current process sent too many oneway calls to target, and the last 435 + * asynchronous transaction makes the allocated async buffer size exceed 436 + * detection threshold. No parameters. 431 437 */ 432 438 }; 433 439