Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/alternative: Fix race in try_get_desc()

I encountered some occasional crashes of poke_int3_handler() when
kprobes are set, while accessing desc->vec.

The text poke mechanism claims to have an RCU-like behavior, but it
does not appear that there is any quiescent state to ensure that
nobody holds reference to desc. As a result, the following race
appears to be possible, which can lead to memory corruption.

CPU0 CPU1
---- ----
text_poke_bp_batch()
-> smp_store_release(&bp_desc, &desc)

[ notice that desc is on
the stack ]

poke_int3_handler()

[ int3 might be kprobe's
so sync events are do not
help ]

-> try_get_desc(descp=&bp_desc)
desc = __READ_ONCE(bp_desc)

if (!desc) [false, success]
WRITE_ONCE(bp_desc, NULL);
atomic_dec_and_test(&desc.refs)

[ success, desc space on the stack
is being reused and might have
non-zero value. ]
arch_atomic_inc_not_zero(&desc->refs)

[ might succeed since desc points to
stack memory that was freed and might
be reused. ]

Fix this issue with small backportable patch. Instead of trying to
make RCU-like behavior for bp_desc, just eliminate the unnecessary
level of indirection of bp_desc, and hold the whole descriptor as a
global. Anyhow, there is only a single descriptor at any given
moment.

Fixes: 1f676247f36a4 ("x86/alternatives: Implement a better poke_int3_handler() completion scheme")
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@kernel.org
Link: https://lkml.kernel.org/r/20220920224743.3089-1-namit@vmware.com

authored by

Nadav Amit and committed by
Peter Zijlstra
efd608fa e400ad8b

+23 -22
+23 -22
arch/x86/kernel/alternative.c
··· 1319 atomic_t refs; 1320 }; 1321 1322 - static struct bp_patching_desc *bp_desc; 1323 1324 static __always_inline 1325 - struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) 1326 { 1327 - /* rcu_dereference */ 1328 - struct bp_patching_desc *desc = __READ_ONCE(*descp); 1329 1330 - if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) 1331 return NULL; 1332 1333 return desc; 1334 } 1335 1336 - static __always_inline void put_desc(struct bp_patching_desc *desc) 1337 { 1338 smp_mb__before_atomic(); 1339 arch_atomic_dec(&desc->refs); 1340 } ··· 1368 1369 /* 1370 * Having observed our INT3 instruction, we now must observe 1371 - * bp_desc: 1372 * 1373 - * bp_desc = desc INT3 1374 * WMB RMB 1375 - * write INT3 if (desc) 1376 */ 1377 smp_rmb(); 1378 1379 - desc = try_get_desc(&bp_desc); 1380 if (!desc) 1381 return 0; 1382 ··· 1430 ret = 1; 1431 1432 out_put: 1433 - put_desc(desc); 1434 return ret; 1435 } 1436 ··· 1461 */ 1462 static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1463 { 1464 - struct bp_patching_desc desc = { 1465 - .vec = tp, 1466 - .nr_entries = nr_entries, 1467 - .refs = ATOMIC_INIT(1), 1468 - }; 1469 unsigned char int3 = INT3_INSN_OPCODE; 1470 unsigned int i; 1471 int do_sync; 1472 1473 lockdep_assert_held(&text_mutex); 1474 1475 - smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ 1476 1477 /* 1478 * Corresponding read barrier in int3 notifier for making sure the ··· 1562 text_poke_sync(); 1563 1564 /* 1565 - * Remove and synchronize_rcu(), except we have a very primitive 1566 - * refcount based completion. 1567 */ 1568 - WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ 1569 - if (!atomic_dec_and_test(&desc.refs)) 1570 - atomic_cond_read_acquire(&desc.refs, !VAL); 1571 } 1572 1573 static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
··· 1319 atomic_t refs; 1320 }; 1321 1322 + static struct bp_patching_desc bp_desc; 1323 1324 static __always_inline 1325 + struct bp_patching_desc *try_get_desc(void) 1326 { 1327 + struct bp_patching_desc *desc = &bp_desc; 1328 1329 + if (!arch_atomic_inc_not_zero(&desc->refs)) 1330 return NULL; 1331 1332 return desc; 1333 } 1334 1335 + static __always_inline void put_desc(void) 1336 { 1337 + struct bp_patching_desc *desc = &bp_desc; 1338 + 1339 smp_mb__before_atomic(); 1340 arch_atomic_dec(&desc->refs); 1341 } ··· 1367 1368 /* 1369 * Having observed our INT3 instruction, we now must observe 1370 + * bp_desc with non-zero refcount: 1371 * 1372 + * bp_desc.refs = 1 INT3 1373 * WMB RMB 1374 + * write INT3 if (bp_desc.refs != 0) 1375 */ 1376 smp_rmb(); 1377 1378 + desc = try_get_desc(); 1379 if (!desc) 1380 return 0; 1381 ··· 1429 ret = 1; 1430 1431 out_put: 1432 + put_desc(); 1433 return ret; 1434 } 1435 ··· 1460 */ 1461 static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1462 { 1463 unsigned char int3 = INT3_INSN_OPCODE; 1464 unsigned int i; 1465 int do_sync; 1466 1467 lockdep_assert_held(&text_mutex); 1468 1469 + bp_desc.vec = tp; 1470 + bp_desc.nr_entries = nr_entries; 1471 + 1472 + /* 1473 + * Corresponds to the implicit memory barrier in try_get_desc() to 1474 + * ensure reading a non-zero refcount provides up to date bp_desc data. 1475 + */ 1476 + atomic_set_release(&bp_desc.refs, 1); 1477 1478 /* 1479 * Corresponding read barrier in int3 notifier for making sure the ··· 1559 text_poke_sync(); 1560 1561 /* 1562 + * Remove and wait for refs to be zero. 1563 */ 1564 + if (!atomic_dec_and_test(&bp_desc.refs)) 1565 + atomic_cond_read_acquire(&bp_desc.refs, !VAL); 1566 } 1567 1568 static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,