Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/mm: Refactor cond_ibpb() to support other use cases

cond_ibpb() has the necessary bits required to track the previous mm in
switch_mm_irqs_off(). This can be reused for other use cases like L1D
flushing on context switch.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Balbir Singh <sblbir@amazon.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210108121056.21940-3-sblbir@amazon.com

authored by

Balbir Singh and committed by
Thomas Gleixner
371b09c6 c52787b5

+30 -25
+1 -1
arch/x86/include/asm/tlbflush.h
··· 83 83 /* Last user mm for optimizing IBPB */ 84 84 union { 85 85 struct mm_struct *last_user_mm; 86 - unsigned long last_user_mm_ibpb; 86 + unsigned long last_user_mm_spec; 87 87 }; 88 88 89 89 u16 loaded_mm_asid;
+29 -24
arch/x86/mm/tlb.c
··· 43 43 */ 44 44 45 45 /* 46 - * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is 47 - * stored in cpu_tlb_state.last_user_mm_ibpb. 46 + * Bits to mangle the TIF_SPEC_IB state into the mm pointer which is 47 + * stored in cpu_tlb_state.last_user_mm_spec. 48 48 */ 49 49 #define LAST_USER_MM_IBPB 0x1UL 50 + #define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB) 51 + 52 + /* Bits to set when tlbstate and flush is (re)initialized */ 53 + #define LAST_USER_MM_INIT LAST_USER_MM_IBPB 50 54 51 55 /* 52 56 * The x86 feature is called PCID (Process Context IDentifier). It is similar ··· 321 317 local_irq_restore(flags); 322 318 } 323 319 324 - static unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) 320 + static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next) 325 321 { 326 322 unsigned long next_tif = task_thread_info(next)->flags; 327 - unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; 323 + unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK; 328 324 329 - return (unsigned long)next->mm | ibpb; 325 + return (unsigned long)next->mm | spec_bits; 330 326 } 331 327 332 - static void cond_ibpb(struct task_struct *next) 328 + static void cond_mitigation(struct task_struct *next) 333 329 { 330 + unsigned long prev_mm, next_mm; 331 + 334 332 if (!next || !next->mm) 335 333 return; 336 334 335 + next_mm = mm_mangle_tif_spec_bits(next); 336 + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); 337 + 337 338 /* 339 + * Avoid user/user BTB poisoning by flushing the branch predictor 340 + * when switching between processes. This stops one process from 341 + * doing Spectre-v2 attacks on another. 342 + * 338 343 * Both, the conditional and the always IBPB mode use the mm 339 344 * pointer to avoid the IBPB when switching between tasks of the 340 345 * same process. Using the mm pointer instead of mm->context.ctx_id ··· 353 340 * exposed data is not really interesting. 354 341 */ 355 342 if (static_branch_likely(&switch_mm_cond_ibpb)) { 356 - unsigned long prev_mm, next_mm; 357 - 358 343 /* 359 344 * This is a bit more complex than the always mode because 360 345 * it has to handle two cases: ··· 382 371 * Optimize this with reasonably small overhead for the 383 372 * above cases. Mangle the TIF_SPEC_IB bit into the mm 384 373 * pointer of the incoming task which is stored in 385 - * cpu_tlbstate.last_user_mm_ibpb for comparison. 386 - */ 387 - next_mm = mm_mangle_tif_spec_ib(next); 388 - prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); 389 - 390 - /* 374 + * cpu_tlbstate.last_user_mm_spec for comparison. 375 + * 391 376 * Issue IBPB only if the mm's are different and one or 392 377 * both have the IBPB bit set. 393 378 */ 394 379 if (next_mm != prev_mm && 395 380 (next_mm | prev_mm) & LAST_USER_MM_IBPB) 396 381 indirect_branch_prediction_barrier(); 397 - 398 - this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); 399 382 } 400 383 401 384 if (static_branch_unlikely(&switch_mm_always_ibpb)) { ··· 398 393 * different context than the user space task which ran 399 394 * last on this CPU. 400 395 */ 401 - if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { 396 + if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != 397 + (unsigned long)next->mm) 402 398 indirect_branch_prediction_barrier(); 403 - this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); 404 - } 405 399 } 400 + 401 + this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm); 406 402 } 407 403 408 404 #ifdef CONFIG_PERF_EVENTS ··· 537 531 need_flush = true; 538 532 } else { 539 533 /* 540 - * Avoid user/user BTB poisoning by flushing the branch 541 - * predictor when switching between processes. This stops 542 - * one process from doing Spectre-v2 attacks on another. 534 + * Apply process to process speculation vulnerability 535 + * mitigations if applicable. 543 536 */ 544 - cond_ibpb(tsk); 537 + cond_mitigation(tsk); 545 538 546 539 /* 547 540 * Stop remote flushes for the previous mm. ··· 648 643 write_cr3(build_cr3(mm->pgd, 0)); 649 644 650 645 /* Reinitialize tlbstate. */ 651 - this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); 646 + this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT); 652 647 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 653 648 this_cpu_write(cpu_tlbstate.next_asid, 1); 654 649 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);