Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib/list_debug.c: add object information in case of invalid object

As of now during link list corruption it prints about cluprit address and
its wrong value, but sometime it is not enough to catch the actual issue
point.

If it prints allocation and free path of that corrupted node, it will be a
lot easier to find and fix the issues.

Adding the same information when data mismatch is found in link list
debug data:

[ 14.243055] slab kmalloc-32 start ffff0000cda19320 data offset 32 pointer offset 8 size 32 allocated at add_to_list+0x28/0xb0
[ 14.245259] __kmalloc_cache_noprof+0x1c4/0x358
[ 14.245572] add_to_list+0x28/0xb0
...
[ 14.248632] do_el0_svc_compat+0x1c/0x34
[ 14.249018] el0_svc_compat+0x2c/0x80
[ 14.249244] Free path:
[ 14.249410] kfree+0x24c/0x2f0
[ 14.249724] do_force_corruption+0xbc/0x100
...
[ 14.252266] el0_svc_common.constprop.0+0x40/0xe0
[ 14.252540] do_el0_svc_compat+0x1c/0x34
[ 14.252763] el0_svc_compat+0x2c/0x80
[ 14.253071] ------------[ cut here ]------------
[ 14.253303] list_del corruption. next->prev should be ffff0000cda192a8, but was 6b6b6b6b6b6b6b6b. (next=ffff0000cda19348)
[ 14.254255] WARNING: CPU: 3 PID: 84 at lib/list_debug.c:65 __list_del_entry_valid_or_report+0x158/0x164

Moved prototype of mem_dump_obj() to bug.h, as mm.h can not be included in
bug.h.

Link: https://lkml.kernel.org/r/20241230101043.53773-1-maninder1.s@samsung.com
Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Rohit Thapliyal <r.thapliyal@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Maninder Singh and committed by
Andrew Morton
30cee1e4 553e7752

+22 -20
+1 -1
fs/open.c
··· 1504 1504 { 1505 1505 int retval = 0; 1506 1506 1507 - if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, 1507 + if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, filp, 1508 1508 "VFS: Close: file count is 0 (f_op=%ps)", 1509 1509 filp->f_op)) { 1510 1510 return 0;
+1 -1
fs/super.c
··· 647 647 */ 648 648 fscrypt_destroy_keyring(sb); 649 649 650 - if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes), 650 + if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes), NULL, 651 651 "VFS: Busy inodes after unmount of %s (%s)", 652 652 sb->s_id, sb->s_type->name)) { 653 653 /*
+9 -1
include/linux/bug.h
··· 73 73 74 74 #endif /* CONFIG_GENERIC_BUG */ 75 75 76 + #ifdef CONFIG_PRINTK 77 + void mem_dump_obj(void *object); 78 + #else 79 + static inline void mem_dump_obj(void *object) {} 80 + #endif 81 + 76 82 /* 77 83 * Since detected data corruption should stop operation on the affected 78 84 * structures. Return value must be checked and sanely acted on by caller. 79 85 */ 80 86 static inline __must_check bool check_data_corruption(bool v) { return v; } 81 - #define CHECK_DATA_CORRUPTION(condition, fmt, ...) \ 87 + #define CHECK_DATA_CORRUPTION(condition, addr, fmt, ...) \ 82 88 check_data_corruption(({ \ 83 89 bool corruption = unlikely(condition); \ 84 90 if (corruption) { \ 91 + if (addr) \ 92 + mem_dump_obj(addr); \ 85 93 if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ 86 94 pr_err(fmt, ##__VA_ARGS__); \ 87 95 BUG(); \
-6
include/linux/mm.h
··· 4084 4084 4085 4085 extern int sysctl_nr_trim_pages; 4086 4086 4087 - #ifdef CONFIG_PRINTK 4088 - void mem_dump_obj(void *object); 4089 - #else 4090 - static inline void mem_dump_obj(void *object) {} 4091 - #endif 4092 - 4093 4087 #ifdef CONFIG_ANON_VMA_NAME 4094 4088 int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 4095 4089 unsigned long len_in,
+11 -11
lib/list_debug.c
··· 22 22 bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, 23 23 struct list_head *next) 24 24 { 25 - if (CHECK_DATA_CORRUPTION(prev == NULL, 25 + if (CHECK_DATA_CORRUPTION(prev == NULL, NULL, 26 26 "list_add corruption. prev is NULL.\n") || 27 - CHECK_DATA_CORRUPTION(next == NULL, 27 + CHECK_DATA_CORRUPTION(next == NULL, NULL, 28 28 "list_add corruption. next is NULL.\n") || 29 - CHECK_DATA_CORRUPTION(next->prev != prev, 29 + CHECK_DATA_CORRUPTION(next->prev != prev, next, 30 30 "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n", 31 31 prev, next->prev, next) || 32 - CHECK_DATA_CORRUPTION(prev->next != next, 32 + CHECK_DATA_CORRUPTION(prev->next != next, prev, 33 33 "list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n", 34 34 next, prev->next, prev) || 35 - CHECK_DATA_CORRUPTION(new == prev || new == next, 35 + CHECK_DATA_CORRUPTION(new == prev || new == next, NULL, 36 36 "list_add double add: new=%px, prev=%px, next=%px.\n", 37 37 new, prev, next)) 38 38 return false; ··· 49 49 prev = entry->prev; 50 50 next = entry->next; 51 51 52 - if (CHECK_DATA_CORRUPTION(next == NULL, 52 + if (CHECK_DATA_CORRUPTION(next == NULL, NULL, 53 53 "list_del corruption, %px->next is NULL\n", entry) || 54 - CHECK_DATA_CORRUPTION(prev == NULL, 54 + CHECK_DATA_CORRUPTION(prev == NULL, NULL, 55 55 "list_del corruption, %px->prev is NULL\n", entry) || 56 - CHECK_DATA_CORRUPTION(next == LIST_POISON1, 56 + CHECK_DATA_CORRUPTION(next == LIST_POISON1, next, 57 57 "list_del corruption, %px->next is LIST_POISON1 (%px)\n", 58 58 entry, LIST_POISON1) || 59 - CHECK_DATA_CORRUPTION(prev == LIST_POISON2, 59 + CHECK_DATA_CORRUPTION(prev == LIST_POISON2, prev, 60 60 "list_del corruption, %px->prev is LIST_POISON2 (%px)\n", 61 61 entry, LIST_POISON2) || 62 - CHECK_DATA_CORRUPTION(prev->next != entry, 62 + CHECK_DATA_CORRUPTION(prev->next != entry, prev, 63 63 "list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n", 64 64 entry, prev->next, prev) || 65 - CHECK_DATA_CORRUPTION(next->prev != entry, 65 + CHECK_DATA_CORRUPTION(next->prev != entry, next, 66 66 "list_del corruption. next->prev should be %px, but was %px. (next=%px)\n", 67 67 entry, next->prev, next)) 68 68 return false;