x86/alternative: Fix race in try_get_desc()

I encountered some occasional crashes of poke_int3_handler() when
kprobes are set, while accessing desc->vec.

The text poke mechanism claims to have an RCU-like behavior, but it
does not appear that there is any quiescent state to ensure that
nobody holds reference to desc. As a result, the following race
appears to be possible, which can lead to memory corruption.

CPU0 CPU1
---- ----
text_poke_bp_batch()
-> smp_store_release(&bp_desc, &desc)

[ notice that desc is on
the stack ]

poke_int3_handler()

[ int3 might be kprobe's
so sync events are do not
help ]

-> try_get_desc(descp=&bp_desc)
desc = __READ_ONCE(bp_desc)

if (!desc) [false, success]
WRITE_ONCE(bp_desc, NULL);
atomic_dec_and_test(&desc.refs)

[ success, desc space on the stack
is being reused and might have
non-zero value. ]
arch_atomic_inc_not_zero(&desc->refs)

[ might succeed since desc points to
stack memory that was freed and might
be reused. ]

Fix this issue with small backportable patch. Instead of trying to
make RCU-like behavior for bp_desc, just eliminate the unnecessary
level of indirection of bp_desc, and hold the whole descriptor as a
global. Anyhow, there is only a single descriptor at any given
moment.

Fixes: 1f676247f36a4 ("x86/alternatives: Implement a better poke_int3_handler() completion scheme")
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@kernel.org
Link: https://lkml.kernel.org/r/20220920224743.3089-1-namit@vmware.com

authored by Nadav Amit and committed by Peter Zijlstra efd608fa e400ad8b

Changed files
+23 -22
arch
x86
kernel
+23 -22
arch/x86/kernel/alternative.c
··· 1319 1319 atomic_t refs; 1320 1320 }; 1321 1321 1322 - static struct bp_patching_desc *bp_desc; 1322 + static struct bp_patching_desc bp_desc; 1323 1323 1324 1324 static __always_inline 1325 - struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) 1325 + struct bp_patching_desc *try_get_desc(void) 1326 1326 { 1327 - /* rcu_dereference */ 1328 - struct bp_patching_desc *desc = __READ_ONCE(*descp); 1327 + struct bp_patching_desc *desc = &bp_desc; 1329 1328 1330 - if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) 1329 + if (!arch_atomic_inc_not_zero(&desc->refs)) 1331 1330 return NULL; 1332 1331 1333 1332 return desc; 1334 1333 } 1335 1334 1336 - static __always_inline void put_desc(struct bp_patching_desc *desc) 1335 + static __always_inline void put_desc(void) 1337 1336 { 1337 + struct bp_patching_desc *desc = &bp_desc; 1338 + 1338 1339 smp_mb__before_atomic(); 1339 1340 arch_atomic_dec(&desc->refs); 1340 1341 } ··· 1368 1367 1369 1368 /* 1370 1369 * Having observed our INT3 instruction, we now must observe 1371 - * bp_desc: 1370 + * bp_desc with non-zero refcount: 1372 1371 * 1373 - * bp_desc = desc INT3 1372 + * bp_desc.refs = 1 INT3 1374 1373 * WMB RMB 1375 - * write INT3 if (desc) 1374 + * write INT3 if (bp_desc.refs != 0) 1376 1375 */ 1377 1376 smp_rmb(); 1378 1377 1379 - desc = try_get_desc(&bp_desc); 1378 + desc = try_get_desc(); 1380 1379 if (!desc) 1381 1380 return 0; 1382 1381 ··· 1430 1429 ret = 1; 1431 1430 1432 1431 out_put: 1433 - put_desc(desc); 1432 + put_desc(); 1434 1433 return ret; 1435 1434 } 1436 1435 ··· 1461 1460 */ 1462 1461 static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1463 1462 { 1464 - struct bp_patching_desc desc = { 1465 - .vec = tp, 1466 - .nr_entries = nr_entries, 1467 - .refs = ATOMIC_INIT(1), 1468 - }; 1469 1463 unsigned char int3 = INT3_INSN_OPCODE; 1470 1464 unsigned int i; 1471 1465 int do_sync; 1472 1466 1473 1467 lockdep_assert_held(&text_mutex); 1474 1468 1475 - smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ 1469 + bp_desc.vec = tp; 1470 + bp_desc.nr_entries = nr_entries; 1471 + 1472 + /* 1473 + * Corresponds to the implicit memory barrier in try_get_desc() to 1474 + * ensure reading a non-zero refcount provides up to date bp_desc data. 1475 + */ 1476 + atomic_set_release(&bp_desc.refs, 1); 1476 1477 1477 1478 /* 1478 1479 * Corresponding read barrier in int3 notifier for making sure the ··· 1562 1559 text_poke_sync(); 1563 1560 1564 1561 /* 1565 - * Remove and synchronize_rcu(), except we have a very primitive 1566 - * refcount based completion. 1562 + * Remove and wait for refs to be zero. 1567 1563 */ 1568 - WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ 1569 - if (!atomic_dec_and_test(&desc.refs)) 1570 - atomic_cond_read_acquire(&desc.refs, !VAL); 1564 + if (!atomic_dec_and_test(&bp_desc.refs)) 1565 + atomic_cond_read_acquire(&bp_desc.refs, !VAL); 1571 1566 } 1572 1567 1573 1568 static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,