Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kcsan: Add core support for a subset of weak memory modeling

Add support for modeling a subset of weak memory, which will enable
detection of a subset of data races due to missing memory barriers.

KCSAN's approach to detecting missing memory barriers is based on
modeling access reordering, and enabled if `CONFIG_KCSAN_WEAK_MEMORY=y`,
which depends on `CONFIG_KCSAN_STRICT=y`. The feature can be enabled or
disabled at boot and runtime via the `kcsan.weak_memory` boot parameter.

Each memory access for which a watchpoint is set up, is also selected
for simulated reordering within the scope of its function (at most 1
in-flight access).

We are limited to modeling the effects of "buffering" (delaying the
access), since the runtime cannot "prefetch" accesses (therefore no
acquire modeling). Once an access has been selected for reordering, it
is checked along every other access until the end of the function scope.
If an appropriate memory barrier is encountered, the access will no
longer be considered for reordering.

When the result of a memory operation should be ordered by a barrier,
KCSAN can then detect data races where the conflict only occurs as a
result of a missing barrier due to reordering accesses.

Suggested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

authored by

Marco Elver and committed by
Paul E. McKenney
69562e49 9756f64c

+235 -19
+9 -1
include/linux/kcsan-checks.h
··· 99 99 100 100 /* Scoped access information. */ 101 101 struct kcsan_scoped_access { 102 - struct list_head list; 102 + union { 103 + struct list_head list; /* scoped_accesses list */ 104 + /* 105 + * Not an entry in scoped_accesses list; stack depth from where 106 + * the access was initialized. 107 + */ 108 + int stack_depth; 109 + }; 110 + 103 111 /* Access information. */ 104 112 const volatile void *ptr; 105 113 size_t size;
+9 -1
include/linux/kcsan.h
··· 49 49 */ 50 50 unsigned long access_mask; 51 51 52 - /* List of scoped accesses. */ 52 + /* List of scoped accesses; likely to be empty. */ 53 53 struct list_head scoped_accesses; 54 + 55 + #ifdef CONFIG_KCSAN_WEAK_MEMORY 56 + /* 57 + * Scoped access for modeling access reordering to detect missing memory 58 + * barriers; only keep 1 to keep fast-path complexity manageable. 59 + */ 60 + struct kcsan_scoped_access reorder_access; 61 + #endif 54 62 }; 55 63 56 64 /**
+3
include/linux/sched.h
··· 1339 1339 #ifdef CONFIG_TRACE_IRQFLAGS 1340 1340 struct irqtrace_events kcsan_save_irqtrace; 1341 1341 #endif 1342 + #ifdef CONFIG_KCSAN_WEAK_MEMORY 1343 + int kcsan_stack_depth; 1344 + #endif 1342 1345 #endif 1343 1346 1344 1347 #if IS_ENABLED(CONFIG_KUNIT)
+187 -15
kernel/kcsan/core.c
··· 40 40 module_param_named(skip_watch, kcsan_skip_watch, long, 0644); 41 41 module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444); 42 42 43 + #ifdef CONFIG_KCSAN_WEAK_MEMORY 44 + static bool kcsan_weak_memory = true; 45 + module_param_named(weak_memory, kcsan_weak_memory, bool, 0644); 46 + #else 47 + #define kcsan_weak_memory false 48 + #endif 49 + 43 50 bool kcsan_enabled; 44 51 45 52 /* Per-CPU kcsan_ctx for interrupts */ ··· 358 351 #endif 359 352 } 360 353 354 + static __always_inline int get_kcsan_stack_depth(void) 355 + { 356 + #ifdef CONFIG_KCSAN_WEAK_MEMORY 357 + return current->kcsan_stack_depth; 358 + #else 359 + BUILD_BUG(); 360 + return 0; 361 + #endif 362 + } 363 + 364 + static __always_inline void add_kcsan_stack_depth(int val) 365 + { 366 + #ifdef CONFIG_KCSAN_WEAK_MEMORY 367 + current->kcsan_stack_depth += val; 368 + #else 369 + BUILD_BUG(); 370 + #endif 371 + } 372 + 373 + static __always_inline struct kcsan_scoped_access *get_reorder_access(struct kcsan_ctx *ctx) 374 + { 375 + #ifdef CONFIG_KCSAN_WEAK_MEMORY 376 + return ctx->disable_scoped ? NULL : &ctx->reorder_access; 377 + #else 378 + return NULL; 379 + #endif 380 + } 381 + 382 + static __always_inline bool 383 + find_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, 384 + int type, unsigned long ip) 385 + { 386 + struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx); 387 + 388 + if (!reorder_access) 389 + return false; 390 + 391 + /* 392 + * Note: If accesses are repeated while reorder_access is identical, 393 + * never matches the new access, because !(type & KCSAN_ACCESS_SCOPED). 394 + */ 395 + return reorder_access->ptr == ptr && reorder_access->size == size && 396 + reorder_access->type == type && reorder_access->ip == ip; 397 + } 398 + 399 + static inline void 400 + set_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, 401 + int type, unsigned long ip) 402 + { 403 + struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx); 404 + 405 + if (!reorder_access || !kcsan_weak_memory) 406 + return; 407 + 408 + reorder_access->ptr = ptr; 409 + reorder_access->size = size; 410 + reorder_access->type = type | KCSAN_ACCESS_SCOPED; 411 + reorder_access->ip = ip; 412 + reorder_access->stack_depth = get_kcsan_stack_depth(); 413 + } 414 + 361 415 /* 362 416 * Pull everything together: check_access() below contains the performance 363 417 * critical operations; the fast-path (including check_access) functions should ··· 457 389 * The access_mask check relies on value-change comparison. To avoid 458 390 * reporting a race where e.g. the writer set up the watchpoint, but the 459 391 * reader has access_mask!=0, we have to ignore the found watchpoint. 392 + * 393 + * reorder_access is never created from an access with access_mask set. 460 394 */ 461 - if (ctx->access_mask) 395 + if (ctx->access_mask && !find_reorder_access(ctx, ptr, size, type, ip)) 462 396 return; 463 397 464 398 /* ··· 510 440 const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0; 511 441 atomic_long_t *watchpoint; 512 442 u64 old, new, diff; 513 - unsigned long access_mask; 514 443 enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; 444 + bool interrupt_watcher = kcsan_interrupt_watcher; 515 445 unsigned long ua_flags = user_access_save(); 516 446 struct kcsan_ctx *ctx = get_ctx(); 447 + unsigned long access_mask = ctx->access_mask; 517 448 unsigned long irq_flags = 0; 449 + bool is_reorder_access; 518 450 519 451 /* 520 452 * Always reset kcsan_skip counter in slow-path to avoid underflow; see ··· 540 468 } 541 469 542 470 /* 471 + * The local CPU cannot observe reordering of its own accesses, and 472 + * therefore we need to take care of 2 cases to avoid false positives: 473 + * 474 + * 1. Races of the reordered access with interrupts. To avoid, if 475 + * the current access is reorder_access, disable interrupts. 476 + * 2. Avoid races of scoped accesses from nested interrupts (below). 477 + */ 478 + is_reorder_access = find_reorder_access(ctx, ptr, size, type, ip); 479 + if (is_reorder_access) 480 + interrupt_watcher = false; 481 + /* 543 482 * Avoid races of scoped accesses from nested interrupts (or scheduler). 544 483 * Assume setting up a watchpoint for a non-scoped (normal) access that 545 484 * also conflicts with a current scoped access. In a nested interrupt, ··· 565 482 * information is lost if dirtied by KCSAN. 566 483 */ 567 484 kcsan_save_irqtrace(current); 568 - if (!kcsan_interrupt_watcher) 485 + if (!interrupt_watcher) 569 486 local_irq_save(irq_flags); 570 487 571 488 watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); ··· 586 503 * Read the current value, to later check and infer a race if the data 587 504 * was modified via a non-instrumented access, e.g. from a device. 588 505 */ 589 - old = read_instrumented_memory(ptr, size); 506 + old = is_reorder_access ? 0 : read_instrumented_memory(ptr, size); 590 507 591 508 /* 592 509 * Delay this thread, to increase probability of observing a racy ··· 598 515 * Re-read value, and check if it is as expected; if not, we infer a 599 516 * racy access. 600 517 */ 601 - access_mask = ctx->access_mask; 602 - new = read_instrumented_memory(ptr, size); 518 + if (!is_reorder_access) { 519 + new = read_instrumented_memory(ptr, size); 520 + } else { 521 + /* 522 + * Reordered accesses cannot be used for value change detection, 523 + * because the memory location may no longer be accessible and 524 + * could result in a fault. 525 + */ 526 + new = 0; 527 + access_mask = 0; 528 + } 603 529 604 530 diff = old ^ new; 605 531 if (access_mask) ··· 677 585 */ 678 586 remove_watchpoint(watchpoint); 679 587 atomic_long_dec(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]); 588 + 680 589 out_unlock: 681 - if (!kcsan_interrupt_watcher) 590 + if (!interrupt_watcher) 682 591 local_irq_restore(irq_flags); 683 592 kcsan_restore_irqtrace(current); 684 593 ctx->disable_scoped--; 594 + 595 + /* 596 + * Reordered accesses cannot be used for value change detection, 597 + * therefore never consider for reordering if access_mask is set. 598 + * ASSERT_EXCLUSIVE are not real accesses, ignore them as well. 599 + */ 600 + if (!access_mask && !is_assert) 601 + set_reorder_access(ctx, ptr, size, type, ip); 685 602 out: 686 603 user_access_restore(ua_flags); 687 604 } ··· 698 597 static __always_inline void 699 598 check_access(const volatile void *ptr, size_t size, int type, unsigned long ip) 700 599 { 701 - const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; 702 600 atomic_long_t *watchpoint; 703 601 long encoded_watchpoint; 704 602 ··· 708 608 if (unlikely(size == 0)) 709 609 return; 710 610 611 + again: 711 612 /* 712 613 * Avoid user_access_save in fast-path: find_watchpoint is safe without 713 614 * user_access_save, as the address that ptr points to is only used to 714 615 * check if a watchpoint exists; ptr is never dereferenced. 715 616 */ 716 - watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write, 617 + watchpoint = find_watchpoint((unsigned long)ptr, size, 618 + !(type & KCSAN_ACCESS_WRITE), 717 619 &encoded_watchpoint); 718 620 /* 719 621 * It is safe to check kcsan_is_enabled() after find_watchpoint in the ··· 729 627 else { 730 628 struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ 731 629 732 - if (unlikely(should_watch(ctx, ptr, size, type))) 630 + if (unlikely(should_watch(ctx, ptr, size, type))) { 733 631 kcsan_setup_watchpoint(ptr, size, type, ip); 734 - else if (unlikely(ctx->scoped_accesses.prev)) 632 + return; 633 + } 634 + 635 + if (!(type & KCSAN_ACCESS_SCOPED)) { 636 + struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx); 637 + 638 + if (reorder_access) { 639 + /* 640 + * reorder_access check: simulates reordering of 641 + * the access after subsequent operations. 642 + */ 643 + ptr = reorder_access->ptr; 644 + type = reorder_access->type; 645 + ip = reorder_access->ip; 646 + /* 647 + * Upon a nested interrupt, this context's 648 + * reorder_access can be modified (shared ctx). 649 + * We know that upon return, reorder_access is 650 + * always invalidated by setting size to 0 via 651 + * __tsan_func_exit(). Therefore we must read 652 + * and check size after the other fields. 653 + */ 654 + barrier(); 655 + size = READ_ONCE(reorder_access->size); 656 + if (size) 657 + goto again; 658 + } 659 + } 660 + 661 + /* 662 + * Always checked last, right before returning from runtime; 663 + * if reorder_access is valid, checked after it was checked. 664 + */ 665 + if (unlikely(ctx->scoped_accesses.prev)) 735 666 kcsan_check_scoped_accesses(); 736 667 } 737 668 } ··· 1051 916 DEFINE_TSAN_VOLATILE_READ_WRITE(16); 1052 917 1053 918 /* 1054 - * The below are not required by KCSAN, but can still be emitted by the 1055 - * compiler. 919 + * Function entry and exit are used to determine the validty of reorder_access. 920 + * Reordering of the access ends at the end of the function scope where the 921 + * access happened. This is done for two reasons: 922 + * 923 + * 1. Artificially limits the scope where missing barriers are detected. 924 + * This minimizes false positives due to uninstrumented functions that 925 + * contain the required barriers but were missed. 926 + * 927 + * 2. Simplifies generating the stack trace of the access. 1056 928 */ 1057 929 void __tsan_func_entry(void *call_pc); 1058 - void __tsan_func_entry(void *call_pc) 930 + noinline void __tsan_func_entry(void *call_pc) 1059 931 { 932 + if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY)) 933 + return; 934 + 935 + add_kcsan_stack_depth(1); 1060 936 } 1061 937 EXPORT_SYMBOL(__tsan_func_entry); 938 + 1062 939 void __tsan_func_exit(void); 1063 - void __tsan_func_exit(void) 940 + noinline void __tsan_func_exit(void) 1064 941 { 942 + struct kcsan_scoped_access *reorder_access; 943 + 944 + if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY)) 945 + return; 946 + 947 + reorder_access = get_reorder_access(get_ctx()); 948 + if (!reorder_access) 949 + goto out; 950 + 951 + if (get_kcsan_stack_depth() <= reorder_access->stack_depth) { 952 + /* 953 + * Access check to catch cases where write without a barrier 954 + * (supposed release) was last access in function: because 955 + * instrumentation is inserted before the real access, a data 956 + * race due to the write giving up a c-s would only be caught if 957 + * we do the conflicting access after. 958 + */ 959 + check_access(reorder_access->ptr, reorder_access->size, 960 + reorder_access->type, reorder_access->ip); 961 + reorder_access->size = 0; 962 + reorder_access->stack_depth = INT_MIN; 963 + } 964 + out: 965 + add_kcsan_stack_depth(-1); 1065 966 } 1066 967 EXPORT_SYMBOL(__tsan_func_exit); 968 + 1067 969 void __tsan_init(void); 1068 970 void __tsan_init(void) 1069 971 {
+20
lib/Kconfig.kcsan
··· 191 191 closely aligns with the rules defined by the Linux-kernel memory 192 192 consistency model (LKMM). 193 193 194 + config KCSAN_WEAK_MEMORY 195 + bool "Enable weak memory modeling to detect missing memory barriers" 196 + default y 197 + depends on KCSAN_STRICT 198 + # We can either let objtool nop __tsan_func_{entry,exit}() and builtin 199 + # atomics instrumentation in .noinstr.text, or use a compiler that can 200 + # implement __no_kcsan to really remove all instrumentation. 201 + depends on STACK_VALIDATION || CC_IS_GCC 202 + help 203 + Enable support for modeling a subset of weak memory, which allows 204 + detecting a subset of data races due to missing memory barriers. 205 + 206 + Depends on KCSAN_STRICT, because the options strenghtening certain 207 + plain accesses by default (depending on !KCSAN_STRICT) reduce the 208 + ability to detect any data races invoving reordered accesses, in 209 + particular reordered writes. 210 + 211 + Weak memory modeling relies on additional instrumentation and may 212 + affect performance. 213 + 194 214 config KCSAN_REPORT_VALUE_CHANGE_ONLY 195 215 bool "Only report races where watcher observed a data value change" 196 216 default y
+7 -2
scripts/Makefile.kcsan
··· 9 9 10 10 # Keep most options here optional, to allow enabling more compilers if absence 11 11 # of some options does not break KCSAN nor causes false positive reports. 12 - export CFLAGS_KCSAN := -fsanitize=thread \ 13 - $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) \ 12 + kcsan-cflags := -fsanitize=thread -fno-optimize-sibling-calls \ 14 13 $(call cc-option,$(call cc-param,tsan-compound-read-before-write=1),$(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1))) \ 15 14 $(call cc-param,tsan-distinguish-volatile=1) 15 + 16 + ifndef CONFIG_KCSAN_WEAK_MEMORY 17 + kcsan-cflags += $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0)) 18 + endif 19 + 20 + export CFLAGS_KCSAN := $(kcsan-cflags)