Merge branches 'doc.2020.02.27a', 'fixes.2020.03.21a', 'kfree_rcu.2020.02.20a', 'locktorture.2020.02.20a', 'ovld.2020.02.20a', 'rcu-tasks.2020.02.20a', 'srcu.2020.02.20a' and 'torture.2020.02.20a' into HEAD

+19

Documentation/admin-guide/kernel-parameters.txt

··· 3980 3980 Set threshold of queued RCU callbacks below which 3981 3981 batch limiting is re-enabled. 3982 3982 3983 + rcutree.qovld= [KNL] 3984 + Set threshold of queued RCU callbacks beyond which 3985 + RCU's force-quiescent-state scan will aggressively 3986 + enlist help from cond_resched() and sched IPIs to 3987 + help CPUs more quickly reach quiescent states. 3988 + Set to less than zero to make this be set based 3989 + on rcutree.qhimark at boot time and to zero to 3990 + disable more aggressive help enlistment. 3991 + 3983 3992 rcutree.rcu_idle_gp_delay= [KNL] 3984 3993 Set wakeup interval for idle CPUs that have 3985 3994 RCU callbacks (RCU_FAST_NO_HZ=y). ··· 4203 4194 4204 4195 rcupdate.rcu_cpu_stall_suppress= [KNL] 4205 4196 Suppress RCU CPU stall warning messages. 4197 + 4198 + rcupdate.rcu_cpu_stall_suppress_at_boot= [KNL] 4199 + Suppress RCU CPU stall warning messages and 4200 + rcutorture writer stall warnings that occur 4201 + during early boot, that is, during the time 4202 + before the init task is spawned. 4206 4203 4207 4204 rcupdate.rcu_cpu_stall_timeout= [KNL] 4208 4205 Set timeout for RCU CPU stall warning messages. ··· 4881 4866 Specify if the kernel should ignore (off) 4882 4867 topology updates sent by the hypervisor to this 4883 4868 LPAR. 4869 + 4870 + torture.disable_onoff_at_boot= [KNL] 4871 + Prevent the CPU-hotplug component of torturing 4872 + until after init has spawned. 4884 4873 4885 4874 tp720= [HW,PS2] 4886 4875

+1 -1

fs/nfs/dir.c

··· 2383 2383 rcu_read_lock(); 2384 2384 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) 2385 2385 goto out; 2386 - lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); 2386 + lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru)); 2387 2387 cache = list_entry(lh, struct nfs_access_entry, lru); 2388 2388 if (lh == &nfsi->access_cache_entry_lru || 2389 2389 cred_fscmp(cred, cache->cred) != 0)

+2 -2

include/linux/rculist.h

··· 60 60 #define __list_check_rcu(dummy, cond, extra...) \ 61 61 ({ \ 62 62 check_arg_count_one(extra); \ 63 - RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \ 63 + RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(), \ 64 64 "RCU-list traversed in non-reader section!"); \ 65 - }) 65 + }) 66 66 #else 67 67 #define __list_check_rcu(dummy, cond, extra...) \ 68 68 ({ check_arg_count_one(extra); })

+1

include/linux/rcutiny.h

··· 83 83 static inline void rcu_scheduler_starting(void) { } 84 84 #endif /* #else #ifndef CONFIG_SRCU */ 85 85 static inline void rcu_end_inkernel_boot(void) { } 86 + static inline bool rcu_inkernel_boot_has_ended(void) { return true; } 86 87 static inline bool rcu_is_watching(void) { return true; } 87 88 static inline void rcu_momentary_dyntick_idle(void) { } 88 89 static inline void kfree_rcu_scheduler_running(void) { }

+1

include/linux/rcutree.h

··· 54 54 void rcu_scheduler_starting(void); 55 55 extern int rcu_scheduler_active __read_mostly; 56 56 void rcu_end_inkernel_boot(void); 57 + bool rcu_inkernel_boot_has_ended(void); 57 58 bool rcu_is_watching(void); 58 59 #ifndef CONFIG_PREEMPTION 59 60 void rcu_all_qs(void);

+1 -1

include/linux/timer.h

··· 164 164 */ 165 165 static inline int timer_pending(const struct timer_list * timer) 166 166 { 167 - return timer->entry.pprev != NULL; 167 + return !hlist_unhashed_lockless(&timer->entry); 168 168 } 169 169 170 170 extern void add_timer_on(struct timer_list *timer, int cpu);

+29

include/trace/events/rcu.h

··· 624 624 ); 625 625 626 626 /* 627 + * Tracepoint for the invocation of a single RCU callback of the special 628 + * kfree_bulk() form. The first argument is the RCU flavor, the second 629 + * argument is a number of elements in array to free, the third is an 630 + * address of the array holding nr_records entries. 631 + */ 632 + TRACE_EVENT_RCU(rcu_invoke_kfree_bulk_callback, 633 + 634 + TP_PROTO(const char *rcuname, unsigned long nr_records, void **p), 635 + 636 + TP_ARGS(rcuname, nr_records, p), 637 + 638 + TP_STRUCT__entry( 639 + __field(const char *, rcuname) 640 + __field(unsigned long, nr_records) 641 + __field(void **, p) 642 + ), 643 + 644 + TP_fast_assign( 645 + __entry->rcuname = rcuname; 646 + __entry->nr_records = nr_records; 647 + __entry->p = p; 648 + ), 649 + 650 + TP_printk("%s bulk=0x%p nr_records=%lu", 651 + __entry->rcuname, __entry->p, __entry->nr_records) 652 + ); 653 + 654 + /* 627 655 * Tracepoint for exiting rcu_do_batch after RCU callbacks have been 628 656 * invoked. The first argument is the name of the RCU flavor, 629 657 * the second argument is number of callbacks actually invoked, ··· 740 712 * "Begin": rcu_barrier() started. 741 713 * "EarlyExit": rcu_barrier() piggybacked, thus early exit. 742 714 * "Inc1": rcu_barrier() piggyback check counter incremented. 715 + * "OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks. 743 716 * "OnlineQ": rcu_barrier() found online CPU with callbacks. 744 717 * "OnlineNQ": rcu_barrier() found online CPU, no callbacks. 745 718 * "IRQ": An rcu_barrier_callback() callback posted on remote CPU.

+8 -7

kernel/locking/locktorture.c

··· 618 618 static int lock_torture_writer(void *arg) 619 619 { 620 620 struct lock_stress_stats *lwsp = arg; 621 - static DEFINE_TORTURE_RANDOM(rand); 621 + DEFINE_TORTURE_RANDOM(rand); 622 622 623 623 VERBOSE_TOROUT_STRING("lock_torture_writer task started"); 624 624 set_user_nice(current, MAX_NICE); ··· 655 655 static int lock_torture_reader(void *arg) 656 656 { 657 657 struct lock_stress_stats *lrsp = arg; 658 - static DEFINE_TORTURE_RANDOM(rand); 658 + DEFINE_TORTURE_RANDOM(rand); 659 659 660 660 VERBOSE_TOROUT_STRING("lock_torture_reader task started"); 661 661 set_user_nice(current, MAX_NICE); ··· 696 696 if (statp[i].n_lock_fail) 697 697 fail = true; 698 698 sum += statp[i].n_lock_acquired; 699 - if (max < statp[i].n_lock_fail) 700 - max = statp[i].n_lock_fail; 701 - if (min > statp[i].n_lock_fail) 702 - min = statp[i].n_lock_fail; 699 + if (max < statp[i].n_lock_acquired) 700 + max = statp[i].n_lock_acquired; 701 + if (min > statp[i].n_lock_acquired) 702 + min = statp[i].n_lock_acquired; 703 703 } 704 704 page += sprintf(page, 705 705 "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", 706 706 write ? "Writes" : "Reads ", 707 - sum, max, min, max / 2 > min ? "???" : "", 707 + sum, max, min, 708 + !onoff_interval && max / 2 > min ? "???" : "", 708 709 fail, fail ? "!!!" : ""); 709 710 if (fail) 710 711 atomic_inc(&cxt.n_lock_torture_errors);

+1 -1

kernel/locking/rtmutex.c

··· 57 57 if (rt_mutex_has_waiters(lock)) 58 58 val |= RT_MUTEX_HAS_WAITERS; 59 59 60 - lock->owner = (struct task_struct *)val; 60 + WRITE_ONCE(lock->owner, (struct task_struct *)val); 61 61 } 62 62 63 63 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)

+4

kernel/rcu/Makefile

··· 3 3 # and is generally not a function of system call inputs. 4 4 KCOV_INSTRUMENT := n 5 5 6 + ifeq ($(CONFIG_KCSAN),y) 7 + KBUILD_CFLAGS += -g -fno-omit-frame-pointer 8 + endif 9 + 6 10 obj-y += update.o sync.o 7 11 obj-$(CONFIG_TREE_SRCU) += srcutree.o 8 12 obj-$(CONFIG_TINY_SRCU) += srcutiny.o

+21 -2

kernel/rcu/rcu.h

··· 198 198 } 199 199 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 200 200 201 + extern int rcu_cpu_stall_suppress_at_boot; 202 + 203 + static inline bool rcu_stall_is_suppressed_at_boot(void) 204 + { 205 + return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended(); 206 + } 207 + 201 208 #ifdef CONFIG_RCU_STALL_COMMON 202 209 203 210 extern int rcu_cpu_stall_ftrace_dump; 204 211 extern int rcu_cpu_stall_suppress; 205 212 extern int rcu_cpu_stall_timeout; 206 213 int rcu_jiffies_till_stall_check(void); 214 + 215 + static inline bool rcu_stall_is_suppressed(void) 216 + { 217 + return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress; 218 + } 207 219 208 220 #define rcu_ftrace_dump_stall_suppress() \ 209 221 do { \ ··· 230 218 } while (0) 231 219 232 220 #else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */ 221 + 222 + static inline bool rcu_stall_is_suppressed(void) 223 + { 224 + return rcu_stall_is_suppressed_at_boot(); 225 + } 233 226 #define rcu_ftrace_dump_stall_suppress() 234 227 #define rcu_ftrace_dump_stall_unsuppress() 235 228 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ ··· 342 325 * Iterate over all possible CPUs in a leaf RCU node. 343 326 */ 344 327 #define for_each_leaf_node_possible_cpu(rnp, cpu) \ 345 - for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \ 328 + for (WARN_ON_ONCE(!rcu_is_leaf_node(rnp)), \ 329 + (cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \ 346 330 (cpu) <= rnp->grphi; \ 347 331 (cpu) = cpumask_next((cpu), cpu_possible_mask)) 348 332 ··· 353 335 #define rcu_find_next_bit(rnp, cpu, mask) \ 354 336 ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu))) 355 337 #define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \ 356 - for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \ 338 + for (WARN_ON_ONCE(!rcu_is_leaf_node(rnp)), \ 339 + (cpu) = rcu_find_next_bit((rnp), 0, (mask)); \ 357 340 (cpu) <= rnp->grphi; \ 358 341 (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask))) 359 342

+1 -3

kernel/rcu/rcu_segcblist.c

··· 182 182 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) 183 183 { 184 184 return rcu_segcblist_is_enabled(rsclp) && 185 - &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; 185 + &rsclp->head != READ_ONCE(rsclp->tails[RCU_DONE_TAIL]); 186 186 } 187 187 188 188 /* ··· 381 381 return; /* Nothing to do. */ 382 382 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); 383 383 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); 384 - rclp->head = NULL; 385 - rclp->tail = &rclp->head; 386 384 } 387 385 388 386 /*

+12 -2

kernel/rcu/rcuperf.c

··· 12 12 #include <linux/types.h> 13 13 #include <linux/kernel.h> 14 14 #include <linux/init.h> 15 + #include <linux/mm.h> 15 16 #include <linux/module.h> 16 17 #include <linux/kthread.h> 17 18 #include <linux/err.h> ··· 612 611 long me = (long)arg; 613 612 struct kfree_obj *alloc_ptr; 614 613 u64 start_time, end_time; 614 + long long mem_begin, mem_during = 0; 615 615 616 616 VERBOSE_PERFOUT_STRING("kfree_perf_thread task started"); 617 617 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); ··· 628 626 } 629 627 630 628 do { 629 + if (!mem_during) { 630 + mem_during = mem_begin = si_mem_available(); 631 + } else if (loop % (kfree_loops / 4) == 0) { 632 + mem_during = (mem_during + si_mem_available()) / 2; 633 + } 634 + 631 635 for (i = 0; i < kfree_alloc_num; i++) { 632 636 alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL); 633 637 if (!alloc_ptr) ··· 653 645 else 654 646 b_rcu_gp_test_finished = cur_ops->get_gp_seq(); 655 647 656 - pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld\n", 648 + pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n", 657 649 (unsigned long long)(end_time - start_time), kfree_loops, 658 - rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started)); 650 + rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), 651 + (mem_begin - mem_during) >> (20 - PAGE_SHIFT)); 652 + 659 653 if (shutdown) { 660 654 smp_mb(); /* Assign before wake. */ 661 655 wake_up(&shutdown_wq);

+50 -17

kernel/rcu/rcutorture.c

··· 339 339 * period, and we want a long delay occasionally to trigger 340 340 * force_quiescent_state. */ 341 341 342 - if (!rcu_fwd_cb_nodelay && 342 + if (!READ_ONCE(rcu_fwd_cb_nodelay) && 343 343 !(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) { 344 344 started = cur_ops->get_gp_seq(); 345 345 ts = rcu_trace_clock_local(); ··· 375 375 { 376 376 int i; 377 377 378 - i = rp->rtort_pipe_count; 378 + i = READ_ONCE(rp->rtort_pipe_count); 379 379 if (i > RCU_TORTURE_PIPE_LEN) 380 380 i = RCU_TORTURE_PIPE_LEN; 381 381 atomic_inc(&rcu_torture_wcount[i]); 382 - if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { 382 + WRITE_ONCE(rp->rtort_pipe_count, i + 1); 383 + if (rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { 383 384 rp->rtort_mbtest = 0; 384 385 return true; 385 386 } ··· 1016 1015 if (i > RCU_TORTURE_PIPE_LEN) 1017 1016 i = RCU_TORTURE_PIPE_LEN; 1018 1017 atomic_inc(&rcu_torture_wcount[i]); 1019 - old_rp->rtort_pipe_count++; 1018 + WRITE_ONCE(old_rp->rtort_pipe_count, 1019 + old_rp->rtort_pipe_count + 1); 1020 1020 switch (synctype[torture_random(&rand) % nsynctypes]) { 1021 1021 case RTWS_DEF_FREE: 1022 1022 rcu_torture_writer_state = RTWS_DEF_FREE; ··· 1069 1067 if (stutter_wait("rcu_torture_writer") && 1070 1068 !READ_ONCE(rcu_fwd_cb_nodelay) && 1071 1069 !cur_ops->slow_gps && 1072 - !torture_must_stop()) 1070 + !torture_must_stop() && 1071 + rcu_inkernel_boot_has_ended()) 1073 1072 for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) 1074 1073 if (list_empty(&rcu_tortures[i].rtort_free) && 1075 1074 rcu_access_pointer(rcu_torture_current) != ··· 1293 1290 atomic_inc(&n_rcu_torture_mberror); 1294 1291 rtrsp = rcutorture_loop_extend(&readstate, trsp, rtrsp); 1295 1292 preempt_disable(); 1296 - pipe_count = p->rtort_pipe_count; 1293 + pipe_count = READ_ONCE(p->rtort_pipe_count); 1297 1294 if (pipe_count > RCU_TORTURE_PIPE_LEN) { 1298 1295 /* Should not happen, but... */ 1299 1296 pipe_count = RCU_TORTURE_PIPE_LEN; ··· 1407 1404 int i; 1408 1405 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1409 1406 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1407 + struct rcu_torture *rtcp; 1410 1408 static unsigned long rtcv_snap = ULONG_MAX; 1411 1409 static bool splatted; 1412 1410 struct task_struct *wtp; 1413 1411 1414 1412 for_each_possible_cpu(cpu) { 1415 1413 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { 1416 - pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i]; 1417 - batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i]; 1414 + pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]); 1415 + batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]); 1418 1416 } 1419 1417 } 1420 1418 for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) { ··· 1424 1420 } 1425 1421 1426 1422 pr_alert("%s%s ", torture_type, TORTURE_FLAG); 1423 + rtcp = rcu_access_pointer(rcu_torture_current); 1427 1424 pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", 1428 - rcu_torture_current, 1429 - rcu_torture_current ? "ver" : "VER", 1425 + rtcp, 1426 + rtcp && !rcu_stall_is_suppressed_at_boot() ? "ver" : "VER", 1430 1427 rcu_torture_current_version, 1431 1428 list_empty(&rcu_torture_freelist), 1432 1429 atomic_read(&n_rcu_torture_alloc), ··· 1483 1478 if (cur_ops->stats) 1484 1479 cur_ops->stats(); 1485 1480 if (rtcv_snap == rcu_torture_current_version && 1486 - rcu_torture_current != NULL) { 1481 + rcu_access_pointer(rcu_torture_current) && 1482 + !rcu_stall_is_suppressed()) { 1487 1483 int __maybe_unused flags = 0; 1488 1484 unsigned long __maybe_unused gp_seq = 0; 1489 1485 ··· 1999 1993 schedule_timeout_interruptible(fwd_progress_holdoff * HZ); 2000 1994 WRITE_ONCE(rcu_fwd_emergency_stop, false); 2001 1995 register_oom_notifier(&rcutorture_oom_nb); 2002 - rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries); 2003 - rcu_torture_fwd_prog_cr(rfp); 1996 + if (!IS_ENABLED(CONFIG_TINY_RCU) || 1997 + rcu_inkernel_boot_has_ended()) 1998 + rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries); 1999 + if (rcu_inkernel_boot_has_ended()) 2000 + rcu_torture_fwd_prog_cr(rfp); 2004 2001 unregister_oom_notifier(&rcutorture_oom_nb); 2005 2002 2006 2003 /* Avoid slow periods, better to test when busy. */ ··· 2053 2044 atomic_inc(&barrier_cbs_invoked); 2054 2045 } 2055 2046 2047 + /* IPI handler to get callback posted on desired CPU, if online. */ 2048 + static void rcu_torture_barrier1cb(void *rcu_void) 2049 + { 2050 + struct rcu_head *rhp = rcu_void; 2051 + 2052 + cur_ops->call(rhp, rcu_torture_barrier_cbf); 2053 + } 2054 + 2056 2055 /* kthread function to register callbacks used to test RCU barriers. */ 2057 2056 static int rcu_torture_barrier_cbs(void *arg) 2058 2057 { ··· 2084 2067 * The above smp_load_acquire() ensures barrier_phase load 2085 2068 * is ordered before the following ->call(). 2086 2069 */ 2087 - local_irq_disable(); /* Just to test no-irq call_rcu(). */ 2088 - cur_ops->call(&rcu, rcu_torture_barrier_cbf); 2089 - local_irq_enable(); 2070 + if (smp_call_function_single(myid, rcu_torture_barrier1cb, 2071 + &rcu, 1)) { 2072 + // IPI failed, so use direct call from current CPU. 2073 + cur_ops->call(&rcu, rcu_torture_barrier_cbf); 2074 + } 2090 2075 if (atomic_dec_and_test(&barrier_cbs_count)) 2091 2076 wake_up(&barrier_wq); 2092 2077 } while (!torture_must_stop()); ··· 2124 2105 pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n", 2125 2106 atomic_read(&barrier_cbs_invoked), 2126 2107 n_barrier_cbs); 2127 - WARN_ON_ONCE(1); 2108 + WARN_ON(1); 2109 + // Wait manually for the remaining callbacks 2110 + i = 0; 2111 + do { 2112 + if (WARN_ON(i++ > HZ)) 2113 + i = INT_MIN; 2114 + schedule_timeout_interruptible(1); 2115 + cur_ops->cb_barrier(); 2116 + } while (atomic_read(&barrier_cbs_invoked) != 2117 + n_barrier_cbs && 2118 + !torture_must_stop()); 2119 + smp_mb(); // Can't trust ordering if broken. 2120 + if (!torture_must_stop()) 2121 + pr_err("Recovered: barrier_cbs_invoked = %d\n", 2122 + atomic_read(&barrier_cbs_invoked)); 2128 2123 } else { 2129 2124 n_barrier_successes++; 2130 2125 }

+10 -8

kernel/rcu/srcutree.c

··· 5 5 * Copyright (C) IBM Corporation, 2006 6 6 * Copyright (C) Fujitsu, 2012 7 7 * 8 - * Author: Paul McKenney <paulmck@linux.ibm.com> 8 + * Authors: Paul McKenney <paulmck@linux.ibm.com> 9 9 * Lai Jiangshan <laijs@cn.fujitsu.com> 10 10 * 11 11 * For detailed explanation of Read-Copy Update mechanism see - ··· 450 450 spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ 451 451 smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ 452 452 rcu_seq_start(&ssp->srcu_gp_seq); 453 - state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); 453 + state = rcu_seq_state(ssp->srcu_gp_seq); 454 454 WARN_ON_ONCE(state != SRCU_STATE_SCAN1); 455 455 } 456 456 ··· 534 534 rcu_seq_end(&ssp->srcu_gp_seq); 535 535 gpseq = rcu_seq_current(&ssp->srcu_gp_seq); 536 536 if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) 537 - ssp->srcu_gp_seq_needed_exp = gpseq; 537 + WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq); 538 538 spin_unlock_irq_rcu_node(ssp); 539 539 mutex_unlock(&ssp->srcu_gp_mutex); 540 540 /* A new grace period can start at this point. But only one. */ ··· 550 550 snp->srcu_have_cbs[idx] = gpseq; 551 551 rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); 552 552 if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) 553 - snp->srcu_gp_seq_needed_exp = gpseq; 553 + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq); 554 554 mask = snp->srcu_data_have_cbs[idx]; 555 555 snp->srcu_data_have_cbs[idx] = 0; 556 556 spin_unlock_irq_rcu_node(snp); ··· 614 614 } 615 615 spin_lock_irqsave_rcu_node(ssp, flags); 616 616 if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) 617 - ssp->srcu_gp_seq_needed_exp = s; 617 + WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); 618 618 spin_unlock_irqrestore_rcu_node(ssp, flags); 619 619 } 620 620 ··· 660 660 if (snp == sdp->mynode) 661 661 snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 662 662 if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) 663 - snp->srcu_gp_seq_needed_exp = s; 663 + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 664 664 spin_unlock_irqrestore_rcu_node(snp, flags); 665 665 } 666 666 ··· 674 674 smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/ 675 675 } 676 676 if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) 677 - ssp->srcu_gp_seq_needed_exp = s; 677 + WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); 678 678 679 679 /* If grace period not already done and none in progress, start it. */ 680 680 if (!rcu_seq_done(&ssp->srcu_gp_seq, s) && ··· 1079 1079 */ 1080 1080 unsigned long srcu_batches_completed(struct srcu_struct *ssp) 1081 1081 { 1082 - return ssp->srcu_idx; 1082 + return READ_ONCE(ssp->srcu_idx); 1083 1083 } 1084 1084 EXPORT_SYMBOL_GPL(srcu_batches_completed); 1085 1085 ··· 1130 1130 return; /* readers present, retry later. */ 1131 1131 } 1132 1132 srcu_flip(ssp); 1133 + spin_lock_irq_rcu_node(ssp); 1133 1134 rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); 1135 + spin_unlock_irq_rcu_node(ssp); 1134 1136 } 1135 1137 1136 1138 if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {

+339 -113

kernel/rcu/tree.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0+ 2 2 /* 3 - * Read-Copy Update mechanism for mutual exclusion 3 + * Read-Copy Update mechanism for mutual exclusion (tree-based version) 4 4 * 5 5 * Copyright IBM Corporation, 2008 6 6 * 7 7 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 8 8 * Manfred Spraul <manfred@colorfullife.com> 9 - * Paul E. McKenney <paulmck@linux.ibm.com> Hierarchical version 9 + * Paul E. McKenney <paulmck@linux.ibm.com> 10 10 * 11 11 * Based on the original work by Paul McKenney <paulmck@linux.ibm.com> 12 12 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. ··· 150 150 static void invoke_rcu_core(void); 151 151 static void rcu_report_exp_rdp(struct rcu_data *rdp); 152 152 static void sync_sched_exp_online_cleanup(int cpu); 153 + static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp); 153 154 154 155 /* rcuc/rcub kthread realtime priority */ 155 156 static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; ··· 343 342 { 344 343 int old; 345 344 int new; 345 + int new_old; 346 346 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); 347 347 348 + new_old = atomic_read(&rdp->dynticks); 348 349 do { 349 - old = atomic_read(&rdp->dynticks); 350 + old = new_old; 350 351 if (old & RCU_DYNTICK_CTRL_CTR) 351 352 return false; 352 353 new = old | RCU_DYNTICK_CTRL_MASK; 353 - } while (atomic_cmpxchg(&rdp->dynticks, old, new) != old); 354 + new_old = atomic_cmpxchg(&rdp->dynticks, old, new); 355 + } while (new_old != old); 354 356 return true; 355 357 } 356 358 ··· 414 410 static long qhimark = DEFAULT_RCU_QHIMARK; 415 411 #define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */ 416 412 static long qlowmark = DEFAULT_RCU_QLOMARK; 413 + #define DEFAULT_RCU_QOVLD_MULT 2 414 + #define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK) 415 + static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */ 416 + static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */ 417 417 418 418 module_param(blimit, long, 0444); 419 419 module_param(qhimark, long, 0444); 420 420 module_param(qlowmark, long, 0444); 421 + module_param(qovld, long, 0444); 421 422 422 423 static ulong jiffies_till_first_fqs = ULONG_MAX; 423 424 static ulong jiffies_till_next_fqs = ULONG_MAX; ··· 827 818 incby = 1; 828 819 } else if (tick_nohz_full_cpu(rdp->cpu) && 829 820 rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE && 830 - READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) { 821 + READ_ONCE(rdp->rcu_urgent_qs) && 822 + !READ_ONCE(rdp->rcu_forced_tick)) { 831 823 raw_spin_lock_rcu_node(rdp->mynode); 832 824 // Recheck under lock. 833 825 if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) { 834 - rdp->rcu_forced_tick = true; 826 + WRITE_ONCE(rdp->rcu_forced_tick, true); 835 827 tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU); 836 828 } 837 829 raw_spin_unlock_rcu_node(rdp->mynode); ··· 909 899 WRITE_ONCE(rdp->rcu_need_heavy_qs, false); 910 900 if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) { 911 901 tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU); 912 - rdp->rcu_forced_tick = false; 902 + WRITE_ONCE(rdp->rcu_forced_tick, false); 913 903 } 914 904 } 915 905 ··· 1082 1072 rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu); 1083 1073 if (!READ_ONCE(*rnhqp) && 1084 1074 (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || 1085 - time_after(jiffies, rcu_state.jiffies_resched))) { 1075 + time_after(jiffies, rcu_state.jiffies_resched) || 1076 + rcu_state.cbovld)) { 1086 1077 WRITE_ONCE(*rnhqp, true); 1087 1078 /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ 1088 1079 smp_store_release(ruqp, true); ··· 1100 1089 * So hit them over the head with the resched_cpu() hammer! 1101 1090 */ 1102 1091 if (tick_nohz_full_cpu(rdp->cpu) && 1103 - time_after(jiffies, 1104 - READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) { 1092 + (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || 1093 + rcu_state.cbovld)) { 1105 1094 WRITE_ONCE(*ruqp, true); 1106 1095 resched_cpu(rdp->cpu); 1107 1096 WRITE_ONCE(rdp->last_fqs_resched, jiffies); ··· 1137 1126 static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, 1138 1127 unsigned long gp_seq_req, const char *s) 1139 1128 { 1140 - trace_rcu_future_grace_period(rcu_state.name, rnp->gp_seq, gp_seq_req, 1141 - rnp->level, rnp->grplo, rnp->grphi, s); 1129 + trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), 1130 + gp_seq_req, rnp->level, 1131 + rnp->grplo, rnp->grphi, s); 1142 1132 } 1143 1133 1144 1134 /* ··· 1186 1174 TPS("Prestarted")); 1187 1175 goto unlock_out; 1188 1176 } 1189 - rnp->gp_seq_needed = gp_seq_req; 1177 + WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req); 1190 1178 if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) { 1191 1179 /* 1192 1180 * We just marked the leaf or internal node, and a ··· 1211 1199 } 1212 1200 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot")); 1213 1201 WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT); 1214 - rcu_state.gp_req_activity = jiffies; 1215 - if (!rcu_state.gp_kthread) { 1202 + WRITE_ONCE(rcu_state.gp_req_activity, jiffies); 1203 + if (!READ_ONCE(rcu_state.gp_kthread)) { 1216 1204 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread")); 1217 1205 goto unlock_out; 1218 1206 } 1219 - trace_rcu_grace_period(rcu_state.name, READ_ONCE(rcu_state.gp_seq), TPS("newreq")); 1207 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq")); 1220 1208 ret = true; /* Caller must wake GP kthread. */ 1221 1209 unlock_out: 1222 1210 /* Push furthest requested GP to leaf node and rcu_data structure. */ 1223 1211 if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) { 1224 - rnp_start->gp_seq_needed = rnp->gp_seq_needed; 1225 - rdp->gp_seq_needed = rnp->gp_seq_needed; 1212 + WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed); 1213 + WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); 1226 1214 } 1227 1215 if (rnp != rnp_start) 1228 1216 raw_spin_unlock_rcu_node(rnp); ··· 1247 1235 } 1248 1236 1249 1237 /* 1250 - * Awaken the grace-period kthread. Don't do a self-awaken (unless in 1251 - * an interrupt or softirq handler), and don't bother awakening when there 1252 - * is nothing for the grace-period kthread to do (as in several CPUs raced 1253 - * to awaken, and we lost), and finally don't try to awaken a kthread that 1254 - * has not yet been created. If all those checks are passed, track some 1255 - * debug information and awaken. 1238 + * Awaken the grace-period kthread. Don't do a self-awaken (unless in an 1239 + * interrupt or softirq handler, in which case we just might immediately 1240 + * sleep upon return, resulting in a grace-period hang), and don't bother 1241 + * awakening when there is nothing for the grace-period kthread to do 1242 + * (as in several CPUs raced to awaken, we lost), and finally don't try 1243 + * to awaken a kthread that has not yet been created. If all those checks 1244 + * are passed, track some debug information and awaken. 1256 1245 * 1257 1246 * So why do the self-wakeup when in an interrupt or softirq handler 1258 1247 * in the grace-period kthread's context? Because the kthread might have ··· 1263 1250 */ 1264 1251 static void rcu_gp_kthread_wake(void) 1265 1252 { 1266 - if ((current == rcu_state.gp_kthread && 1267 - !in_irq() && !in_serving_softirq()) || 1268 - !READ_ONCE(rcu_state.gp_flags) || 1269 - !rcu_state.gp_kthread) 1253 + struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); 1254 + 1255 + if ((current == t && !in_irq() && !in_serving_softirq()) || 1256 + !READ_ONCE(rcu_state.gp_flags) || !t) 1270 1257 return; 1271 1258 WRITE_ONCE(rcu_state.gp_wake_time, jiffies); 1272 1259 WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); ··· 1334 1321 1335 1322 rcu_lockdep_assert_cblist_protected(rdp); 1336 1323 c = rcu_seq_snap(&rcu_state.gp_seq); 1337 - if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { 1324 + if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { 1338 1325 /* Old request still live, so mark recent callbacks. */ 1339 1326 (void)rcu_segcblist_accelerate(&rdp->cblist, c); 1340 1327 return; ··· 1399 1386 static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) 1400 1387 { 1401 1388 bool ret = false; 1402 - bool need_gp; 1389 + bool need_qs; 1403 1390 const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && 1404 1391 rcu_segcblist_is_offloaded(&rdp->cblist); 1405 1392 ··· 1413 1400 unlikely(READ_ONCE(rdp->gpwrap))) { 1414 1401 if (!offloaded) 1415 1402 ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */ 1403 + rdp->core_needs_qs = false; 1416 1404 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend")); 1417 1405 } else { 1418 1406 if (!offloaded) 1419 1407 ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */ 1408 + if (rdp->core_needs_qs) 1409 + rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); 1420 1410 } 1421 1411 1422 1412 /* Now handle the beginnings of any new-to-this-CPU grace periods. */ ··· 1431 1415 * go looking for one. 1432 1416 */ 1433 1417 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart")); 1434 - need_gp = !!(rnp->qsmask & rdp->grpmask); 1435 - rdp->cpu_no_qs.b.norm = need_gp; 1436 - rdp->core_needs_qs = need_gp; 1418 + need_qs = !!(rnp->qsmask & rdp->grpmask); 1419 + rdp->cpu_no_qs.b.norm = need_qs; 1420 + rdp->core_needs_qs = need_qs; 1437 1421 zero_cpu_stall_ticks(rdp); 1438 1422 } 1439 1423 rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ 1440 1424 if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) 1441 - rdp->gp_seq_needed = rnp->gp_seq_needed; 1425 + WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); 1442 1426 WRITE_ONCE(rdp->gpwrap, false); 1443 1427 rcu_gpnum_ovf(rnp, rdp); 1444 1428 return ret; ··· 1667 1651 WRITE_ONCE(rcu_state.jiffies_kick_kthreads, 1668 1652 jiffies + (j ? 3 * j : 2)); 1669 1653 } 1670 - trace_rcu_grace_period(rcu_state.name, 1671 - READ_ONCE(rcu_state.gp_seq), 1654 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, 1672 1655 TPS("fqswait")); 1673 1656 rcu_state.gp_state = RCU_GP_WAIT_FQS; 1674 1657 ret = swait_event_idle_timeout_exclusive( ··· 1681 1666 /* If time for quiescent-state forcing, do it. */ 1682 1667 if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) || 1683 1668 (gf & RCU_GP_FLAG_FQS)) { 1684 - trace_rcu_grace_period(rcu_state.name, 1685 - READ_ONCE(rcu_state.gp_seq), 1669 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, 1686 1670 TPS("fqsstart")); 1687 1671 rcu_gp_fqs(first_gp_fqs); 1688 1672 first_gp_fqs = false; 1689 - trace_rcu_grace_period(rcu_state.name, 1690 - READ_ONCE(rcu_state.gp_seq), 1673 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, 1691 1674 TPS("fqsend")); 1692 1675 cond_resched_tasks_rcu_qs(); 1693 1676 WRITE_ONCE(rcu_state.gp_activity, jiffies); ··· 1696 1683 cond_resched_tasks_rcu_qs(); 1697 1684 WRITE_ONCE(rcu_state.gp_activity, jiffies); 1698 1685 WARN_ON(signal_pending(current)); 1699 - trace_rcu_grace_period(rcu_state.name, 1700 - READ_ONCE(rcu_state.gp_seq), 1686 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, 1701 1687 TPS("fqswaitsig")); 1702 1688 ret = 1; /* Keep old FQS timing. */ 1703 1689 j = jiffies; ··· 1713 1701 */ 1714 1702 static void rcu_gp_cleanup(void) 1715 1703 { 1716 - unsigned long gp_duration; 1704 + int cpu; 1717 1705 bool needgp = false; 1706 + unsigned long gp_duration; 1718 1707 unsigned long new_gp_seq; 1719 1708 bool offloaded; 1720 1709 struct rcu_data *rdp; ··· 1761 1748 needgp = __note_gp_changes(rnp, rdp) || needgp; 1762 1749 /* smp_mb() provided by prior unlock-lock pair. */ 1763 1750 needgp = rcu_future_gp_cleanup(rnp) || needgp; 1751 + // Reset overload indication for CPUs no longer overloaded 1752 + if (rcu_is_leaf_node(rnp)) 1753 + for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) { 1754 + rdp = per_cpu_ptr(&rcu_data, cpu); 1755 + check_cb_ovld_locked(rdp, rnp); 1756 + } 1764 1757 sq = rcu_nocb_gp_get(rnp); 1765 1758 raw_spin_unlock_irq_rcu_node(rnp); 1766 1759 rcu_nocb_gp_cleanup(sq); ··· 1793 1774 rcu_segcblist_is_offloaded(&rdp->cblist); 1794 1775 if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) { 1795 1776 WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT); 1796 - rcu_state.gp_req_activity = jiffies; 1777 + WRITE_ONCE(rcu_state.gp_req_activity, jiffies); 1797 1778 trace_rcu_grace_period(rcu_state.name, 1798 - READ_ONCE(rcu_state.gp_seq), 1779 + rcu_state.gp_seq, 1799 1780 TPS("newreq")); 1800 1781 } else { 1801 1782 WRITE_ONCE(rcu_state.gp_flags, ··· 1814 1795 1815 1796 /* Handle grace-period start. */ 1816 1797 for (;;) { 1817 - trace_rcu_grace_period(rcu_state.name, 1818 - READ_ONCE(rcu_state.gp_seq), 1798 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, 1819 1799 TPS("reqwait")); 1820 1800 rcu_state.gp_state = RCU_GP_WAIT_GPS; 1821 1801 swait_event_idle_exclusive(rcu_state.gp_wq, ··· 1827 1809 cond_resched_tasks_rcu_qs(); 1828 1810 WRITE_ONCE(rcu_state.gp_activity, jiffies); 1829 1811 WARN_ON(signal_pending(current)); 1830 - trace_rcu_grace_period(rcu_state.name, 1831 - READ_ONCE(rcu_state.gp_seq), 1812 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, 1832 1813 TPS("reqwaitsig")); 1833 1814 } 1834 1815 ··· 1898 1881 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ 1899 1882 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) && 1900 1883 rcu_preempt_blocked_readers_cgp(rnp)); 1901 - rnp->qsmask &= ~mask; 1884 + WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask); 1902 1885 trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq, 1903 1886 mask, rnp->qsmask, rnp->level, 1904 1887 rnp->grplo, rnp->grphi, ··· 1921 1904 rnp_c = rnp; 1922 1905 rnp = rnp->parent; 1923 1906 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1924 - oldmask = rnp_c->qsmask; 1907 + oldmask = READ_ONCE(rnp_c->qsmask); 1925 1908 } 1926 1909 1927 1910 /* ··· 2004 1987 return; 2005 1988 } 2006 1989 mask = rdp->grpmask; 1990 + if (rdp->cpu == smp_processor_id()) 1991 + rdp->core_needs_qs = false; 2007 1992 if ((rnp->qsmask & mask) == 0) { 2008 1993 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2009 1994 } else { ··· 2071 2052 return 0; 2072 2053 2073 2054 blkd = !!(rnp->qsmask & rdp->grpmask); 2074 - trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, 2055 + trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), 2075 2056 blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); 2076 2057 return 0; 2077 2058 } ··· 2313 2294 struct rcu_data *rdp; 2314 2295 struct rcu_node *rnp; 2315 2296 2297 + rcu_state.cbovld = rcu_state.cbovldnext; 2298 + rcu_state.cbovldnext = false; 2316 2299 rcu_for_each_leaf_node(rnp) { 2317 2300 cond_resched_tasks_rcu_qs(); 2318 2301 mask = 0; 2319 2302 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2303 + rcu_state.cbovldnext |= !!rnp->cbovldmask; 2320 2304 if (rnp->qsmask == 0) { 2321 2305 if (!IS_ENABLED(CONFIG_PREEMPT_RCU) || 2322 2306 rcu_preempt_blocked_readers_cgp(rnp)) { ··· 2601 2579 } 2602 2580 2603 2581 /* 2604 - * Helper function for call_rcu() and friends. The cpu argument will 2605 - * normally be -1, indicating "currently running CPU". It may specify 2606 - * a CPU only if that CPU is a no-CBs CPU. Currently, only rcu_barrier() 2607 - * is expected to specify a CPU. 2582 + * Check and if necessary update the leaf rcu_node structure's 2583 + * ->cbovldmask bit corresponding to the current CPU based on that CPU's 2584 + * number of queued RCU callbacks. The caller must hold the leaf rcu_node 2585 + * structure's ->lock. 2608 2586 */ 2587 + static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp) 2588 + { 2589 + raw_lockdep_assert_held_rcu_node(rnp); 2590 + if (qovld_calc <= 0) 2591 + return; // Early boot and wildcard value set. 2592 + if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) 2593 + WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask); 2594 + else 2595 + WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask); 2596 + } 2597 + 2598 + /* 2599 + * Check and if necessary update the leaf rcu_node structure's 2600 + * ->cbovldmask bit corresponding to the current CPU based on that CPU's 2601 + * number of queued RCU callbacks. No locks need be held, but the 2602 + * caller must have disabled interrupts. 2603 + * 2604 + * Note that this function ignores the possibility that there are a lot 2605 + * of callbacks all of which have already seen the end of their respective 2606 + * grace periods. This omission is due to the need for no-CBs CPUs to 2607 + * be holding ->nocb_lock to do this check, which is too heavy for a 2608 + * common-case operation. 2609 + */ 2610 + static void check_cb_ovld(struct rcu_data *rdp) 2611 + { 2612 + struct rcu_node *const rnp = rdp->mynode; 2613 + 2614 + if (qovld_calc <= 0 || 2615 + ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) == 2616 + !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask))) 2617 + return; // Early boot wildcard value or already set correctly. 2618 + raw_spin_lock_rcu_node(rnp); 2619 + check_cb_ovld_locked(rdp, rnp); 2620 + raw_spin_unlock_rcu_node(rnp); 2621 + } 2622 + 2623 + /* Helper function for call_rcu() and friends. */ 2609 2624 static void 2610 2625 __call_rcu(struct rcu_head *head, rcu_callback_t func) 2611 2626 { ··· 2680 2621 rcu_segcblist_init(&rdp->cblist); 2681 2622 } 2682 2623 2624 + check_cb_ovld(rdp); 2683 2625 if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) 2684 2626 return; // Enqueued onto ->nocb_bypass, so just leave. 2685 - /* If we get here, rcu_nocb_try_bypass() acquired ->nocb_lock. */ 2627 + // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. 2686 2628 rcu_segcblist_enqueue(&rdp->cblist, head); 2687 2629 if (__is_kfree_rcu_offset((unsigned long)func)) 2688 2630 trace_rcu_kfree_callback(rcu_state.name, head, ··· 2749 2689 #define KFREE_DRAIN_JIFFIES (HZ / 50) 2750 2690 #define KFREE_N_BATCHES 2 2751 2691 2692 + /* 2693 + * This macro defines how many entries the "records" array 2694 + * will contain. It is based on the fact that the size of 2695 + * kfree_rcu_bulk_data structure becomes exactly one page. 2696 + */ 2697 + #define KFREE_BULK_MAX_ENTR ((PAGE_SIZE / sizeof(void *)) - 3) 2698 + 2699 + /** 2700 + * struct kfree_rcu_bulk_data - single block to store kfree_rcu() pointers 2701 + * @nr_records: Number of active pointers in the array 2702 + * @records: Array of the kfree_rcu() pointers 2703 + * @next: Next bulk object in the block chain 2704 + * @head_free_debug: For debug, when CONFIG_DEBUG_OBJECTS_RCU_HEAD is set 2705 + */ 2706 + struct kfree_rcu_bulk_data { 2707 + unsigned long nr_records; 2708 + void *records[KFREE_BULK_MAX_ENTR]; 2709 + struct kfree_rcu_bulk_data *next; 2710 + struct rcu_head *head_free_debug; 2711 + }; 2712 + 2752 2713 /** 2753 2714 * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests 2754 2715 * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period 2755 2716 * @head_free: List of kfree_rcu() objects waiting for a grace period 2717 + * @bhead_free: Bulk-List of kfree_rcu() objects waiting for a grace period 2756 2718 * @krcp: Pointer to @kfree_rcu_cpu structure 2757 2719 */ 2758 2720 2759 2721 struct kfree_rcu_cpu_work { 2760 2722 struct rcu_work rcu_work; 2761 2723 struct rcu_head *head_free; 2724 + struct kfree_rcu_bulk_data *bhead_free; 2762 2725 struct kfree_rcu_cpu *krcp; 2763 2726 }; 2764 2727 2765 2728 /** 2766 2729 * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period 2767 2730 * @head: List of kfree_rcu() objects not yet waiting for a grace period 2731 + * @bhead: Bulk-List of kfree_rcu() objects not yet waiting for a grace period 2732 + * @bcached: Keeps at most one object for later reuse when build chain blocks 2768 2733 * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period 2769 2734 * @lock: Synchronize access to this structure 2770 2735 * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES ··· 2803 2718 */ 2804 2719 struct kfree_rcu_cpu { 2805 2720 struct rcu_head *head; 2721 + struct kfree_rcu_bulk_data *bhead; 2722 + struct kfree_rcu_bulk_data *bcached; 2806 2723 struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES]; 2807 2724 spinlock_t lock; 2808 2725 struct delayed_work monitor_work; ··· 2814 2727 2815 2728 static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc); 2816 2729 2730 + static __always_inline void 2731 + debug_rcu_head_unqueue_bulk(struct rcu_head *head) 2732 + { 2733 + #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 2734 + for (; head; head = head->next) 2735 + debug_rcu_head_unqueue(head); 2736 + #endif 2737 + } 2738 + 2817 2739 /* 2818 2740 * This function is invoked in workqueue context after a grace period. 2819 - * It frees all the objects queued on ->head_free. 2741 + * It frees all the objects queued on ->bhead_free or ->head_free. 2820 2742 */ 2821 2743 static void kfree_rcu_work(struct work_struct *work) 2822 2744 { 2823 2745 unsigned long flags; 2824 2746 struct rcu_head *head, *next; 2747 + struct kfree_rcu_bulk_data *bhead, *bnext; 2825 2748 struct kfree_rcu_cpu *krcp; 2826 2749 struct kfree_rcu_cpu_work *krwp; 2827 2750 ··· 2841 2744 spin_lock_irqsave(&krcp->lock, flags); 2842 2745 head = krwp->head_free; 2843 2746 krwp->head_free = NULL; 2747 + bhead = krwp->bhead_free; 2748 + krwp->bhead_free = NULL; 2844 2749 spin_unlock_irqrestore(&krcp->lock, flags); 2845 2750 2846 - // List "head" is now private, so traverse locklessly. 2751 + /* "bhead" is now private, so traverse locklessly. */ 2752 + for (; bhead; bhead = bnext) { 2753 + bnext = bhead->next; 2754 + 2755 + debug_rcu_head_unqueue_bulk(bhead->head_free_debug); 2756 + 2757 + rcu_lock_acquire(&rcu_callback_map); 2758 + trace_rcu_invoke_kfree_bulk_callback(rcu_state.name, 2759 + bhead->nr_records, bhead->records); 2760 + 2761 + kfree_bulk(bhead->nr_records, bhead->records); 2762 + rcu_lock_release(&rcu_callback_map); 2763 + 2764 + if (cmpxchg(&krcp->bcached, NULL, bhead)) 2765 + free_page((unsigned long) bhead); 2766 + 2767 + cond_resched_tasks_rcu_qs(); 2768 + } 2769 + 2770 + /* 2771 + * Emergency case only. It can happen under low memory 2772 + * condition when an allocation gets failed, so the "bulk" 2773 + * path can not be temporary maintained. 2774 + */ 2847 2775 for (; head; head = next) { 2848 2776 unsigned long offset = (unsigned long)head->func; 2849 2777 2850 2778 next = head->next; 2851 - // Potentially optimize with kfree_bulk in future. 2852 2779 debug_rcu_head_unqueue(head); 2853 2780 rcu_lock_acquire(&rcu_callback_map); 2854 2781 trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset); 2855 2782 2856 - if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) { 2857 - /* Could be optimized with kfree_bulk() in future. */ 2783 + if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) 2858 2784 kfree((void *)head - offset); 2859 - } 2860 2785 2861 2786 rcu_lock_release(&rcu_callback_map); 2862 2787 cond_resched_tasks_rcu_qs(); ··· 2893 2774 */ 2894 2775 static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp) 2895 2776 { 2777 + struct kfree_rcu_cpu_work *krwp; 2778 + bool queued = false; 2896 2779 int i; 2897 - struct kfree_rcu_cpu_work *krwp = NULL; 2898 2780 2899 2781 lockdep_assert_held(&krcp->lock); 2900 - for (i = 0; i < KFREE_N_BATCHES; i++) 2901 - if (!krcp->krw_arr[i].head_free) { 2902 - krwp = &(krcp->krw_arr[i]); 2903 - break; 2782 + 2783 + for (i = 0; i < KFREE_N_BATCHES; i++) { 2784 + krwp = &(krcp->krw_arr[i]); 2785 + 2786 + /* 2787 + * Try to detach bhead or head and attach it over any 2788 + * available corresponding free channel. It can be that 2789 + * a previous RCU batch is in progress, it means that 2790 + * immediately to queue another one is not possible so 2791 + * return false to tell caller to retry. 2792 + */ 2793 + if ((krcp->bhead && !krwp->bhead_free) || 2794 + (krcp->head && !krwp->head_free)) { 2795 + /* Channel 1. */ 2796 + if (!krwp->bhead_free) { 2797 + krwp->bhead_free = krcp->bhead; 2798 + krcp->bhead = NULL; 2799 + } 2800 + 2801 + /* Channel 2. */ 2802 + if (!krwp->head_free) { 2803 + krwp->head_free = krcp->head; 2804 + krcp->head = NULL; 2805 + } 2806 + 2807 + /* 2808 + * One work is per one batch, so there are two "free channels", 2809 + * "bhead_free" and "head_free" the batch can handle. It can be 2810 + * that the work is in the pending state when two channels have 2811 + * been detached following each other, one by one. 2812 + */ 2813 + queue_rcu_work(system_wq, &krwp->rcu_work); 2814 + queued = true; 2904 2815 } 2816 + } 2905 2817 2906 - // If a previous RCU batch is in progress, we cannot immediately 2907 - // queue another one, so return false to tell caller to retry. 2908 - if (!krwp) 2909 - return false; 2910 - 2911 - krwp->head_free = krcp->head; 2912 - krcp->head = NULL; 2913 - INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work); 2914 - queue_rcu_work(system_wq, &krwp->rcu_work); 2915 - return true; 2818 + return queued; 2916 2819 } 2917 2820 2918 2821 static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp, ··· 2971 2830 spin_unlock_irqrestore(&krcp->lock, flags); 2972 2831 } 2973 2832 2833 + static inline bool 2834 + kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, 2835 + struct rcu_head *head, rcu_callback_t func) 2836 + { 2837 + struct kfree_rcu_bulk_data *bnode; 2838 + 2839 + if (unlikely(!krcp->initialized)) 2840 + return false; 2841 + 2842 + lockdep_assert_held(&krcp->lock); 2843 + 2844 + /* Check if a new block is required. */ 2845 + if (!krcp->bhead || 2846 + krcp->bhead->nr_records == KFREE_BULK_MAX_ENTR) { 2847 + bnode = xchg(&krcp->bcached, NULL); 2848 + if (!bnode) { 2849 + WARN_ON_ONCE(sizeof(struct kfree_rcu_bulk_data) > PAGE_SIZE); 2850 + 2851 + bnode = (struct kfree_rcu_bulk_data *) 2852 + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 2853 + } 2854 + 2855 + /* Switch to emergency path. */ 2856 + if (unlikely(!bnode)) 2857 + return false; 2858 + 2859 + /* Initialize the new block. */ 2860 + bnode->nr_records = 0; 2861 + bnode->next = krcp->bhead; 2862 + bnode->head_free_debug = NULL; 2863 + 2864 + /* Attach it to the head. */ 2865 + krcp->bhead = bnode; 2866 + } 2867 + 2868 + #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 2869 + head->func = func; 2870 + head->next = krcp->bhead->head_free_debug; 2871 + krcp->bhead->head_free_debug = head; 2872 + #endif 2873 + 2874 + /* Finally insert. */ 2875 + krcp->bhead->records[krcp->bhead->nr_records++] = 2876 + (void *) head - (unsigned long) func; 2877 + 2878 + return true; 2879 + } 2880 + 2974 2881 /* 2975 - * Queue a request for lazy invocation of kfree() after a grace period. 2882 + * Queue a request for lazy invocation of kfree_bulk()/kfree() after a grace 2883 + * period. Please note there are two paths are maintained, one is the main one 2884 + * that uses kfree_bulk() interface and second one is emergency one, that is 2885 + * used only when the main path can not be maintained temporary, due to memory 2886 + * pressure. 2976 2887 * 2977 2888 * Each kfree_call_rcu() request is added to a batch. The batch will be drained 2978 - * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch 2979 - * will be kfree'd in workqueue context. This allows us to: 2980 - * 2981 - * 1. Batch requests together to reduce the number of grace periods during 2982 - * heavy kfree_rcu() load. 2983 - * 2984 - * 2. It makes it possible to use kfree_bulk() on a large number of 2985 - * kfree_rcu() requests thus reducing cache misses and the per-object 2986 - * overhead of kfree(). 2889 + * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will 2890 + * be free'd in workqueue context. This allows us to: batch requests together to 2891 + * reduce the number of grace periods during heavy kfree_rcu() load. 2987 2892 */ 2988 2893 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) 2989 2894 { ··· 3048 2861 __func__, head); 3049 2862 goto unlock_return; 3050 2863 } 3051 - head->func = func; 3052 - head->next = krcp->head; 3053 - krcp->head = head; 2864 + 2865 + /* 2866 + * Under high memory pressure GFP_NOWAIT can fail, 2867 + * in that case the emergency path is maintained. 2868 + */ 2869 + if (unlikely(!kfree_call_rcu_add_ptr_to_bulk(krcp, head, func))) { 2870 + head->func = func; 2871 + head->next = krcp->head; 2872 + krcp->head = head; 2873 + } 3054 2874 3055 2875 // Set timer to drain after KFREE_DRAIN_JIFFIES. 3056 2876 if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && ··· 3269 3075 /* 3270 3076 * RCU callback function for rcu_barrier(). If we are last, wake 3271 3077 * up the task executing rcu_barrier(). 3078 + * 3079 + * Note that the value of rcu_state.barrier_sequence must be captured 3080 + * before the atomic_dec_and_test(). Otherwise, if this CPU is not last, 3081 + * other CPUs might count the value down to zero before this CPU gets 3082 + * around to invoking rcu_barrier_trace(), which might result in bogus 3083 + * data from the next instance of rcu_barrier(). 3272 3084 */ 3273 3085 static void rcu_barrier_callback(struct rcu_head *rhp) 3274 3086 { 3087 + unsigned long __maybe_unused s = rcu_state.barrier_sequence; 3088 + 3275 3089 if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) { 3276 - rcu_barrier_trace(TPS("LastCB"), -1, 3277 - rcu_state.barrier_sequence); 3090 + rcu_barrier_trace(TPS("LastCB"), -1, s); 3278 3091 complete(&rcu_state.barrier_completion); 3279 3092 } else { 3280 - rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence); 3093 + rcu_barrier_trace(TPS("CB"), -1, s); 3281 3094 } 3282 3095 } 3283 3096 3284 3097 /* 3285 3098 * Called with preemption disabled, and from cross-cpu IRQ context. 3286 3099 */ 3287 - static void rcu_barrier_func(void *unused) 3100 + static void rcu_barrier_func(void *cpu_in) 3288 3101 { 3289 - struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); 3102 + uintptr_t cpu = (uintptr_t)cpu_in; 3103 + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); 3290 3104 3291 3105 rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); 3292 3106 rdp->barrier_head.func = rcu_barrier_callback; ··· 3321 3119 */ 3322 3120 void rcu_barrier(void) 3323 3121 { 3324 - int cpu; 3122 + uintptr_t cpu; 3325 3123 struct rcu_data *rdp; 3326 3124 unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence); 3327 3125 ··· 3344 3142 rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence); 3345 3143 3346 3144 /* 3347 - * Initialize the count to one rather than to zero in order to 3348 - * avoid a too-soon return to zero in case of a short grace period 3349 - * (or preemption of this task). Exclude CPU-hotplug operations 3350 - * to ensure that no offline CPU has callbacks queued. 3145 + * Initialize the count to two rather than to zero in order 3146 + * to avoid a too-soon return to zero in case of an immediate 3147 + * invocation of the just-enqueued callback (or preemption of 3148 + * this task). Exclude CPU-hotplug operations to ensure that no 3149 + * offline non-offloaded CPU has callbacks queued. 3351 3150 */ 3352 3151 init_completion(&rcu_state.barrier_completion); 3353 - atomic_set(&rcu_state.barrier_cpu_count, 1); 3152 + atomic_set(&rcu_state.barrier_cpu_count, 2); 3354 3153 get_online_cpus(); 3355 3154 3356 3155 /* ··· 3361 3158 */ 3362 3159 for_each_possible_cpu(cpu) { 3363 3160 rdp = per_cpu_ptr(&rcu_data, cpu); 3364 - if (!cpu_online(cpu) && 3161 + if (cpu_is_offline(cpu) && 3365 3162 !rcu_segcblist_is_offloaded(&rdp->cblist)) 3366 3163 continue; 3367 - if (rcu_segcblist_n_cbs(&rdp->cblist)) { 3164 + if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) { 3368 3165 rcu_barrier_trace(TPS("OnlineQ"), cpu, 3369 3166 rcu_state.barrier_sequence); 3370 - smp_call_function_single(cpu, rcu_barrier_func, NULL, 1); 3167 + smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1); 3168 + } else if (rcu_segcblist_n_cbs(&rdp->cblist) && 3169 + cpu_is_offline(cpu)) { 3170 + rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, 3171 + rcu_state.barrier_sequence); 3172 + local_irq_disable(); 3173 + rcu_barrier_func((void *)cpu); 3174 + local_irq_enable(); 3175 + } else if (cpu_is_offline(cpu)) { 3176 + rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu, 3177 + rcu_state.barrier_sequence); 3371 3178 } else { 3372 3179 rcu_barrier_trace(TPS("OnlineNQ"), cpu, 3373 3180 rcu_state.barrier_sequence); ··· 3389 3176 * Now that we have an rcu_barrier_callback() callback on each 3390 3177 * CPU, and thus each counted, remove the initial count. 3391 3178 */ 3392 - if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) 3179 + if (atomic_sub_and_test(2, &rcu_state.barrier_cpu_count)) 3393 3180 complete(&rcu_state.barrier_completion); 3394 3181 3395 3182 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ ··· 3488 3275 rnp = rdp->mynode; 3489 3276 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 3490 3277 rdp->beenonline = true; /* We have now been online. */ 3491 - rdp->gp_seq = rnp->gp_seq; 3492 - rdp->gp_seq_needed = rnp->gp_seq; 3278 + rdp->gp_seq = READ_ONCE(rnp->gp_seq); 3279 + rdp->gp_seq_needed = rdp->gp_seq; 3493 3280 rdp->cpu_no_qs.b.norm = true; 3494 3281 rdp->core_needs_qs = false; 3495 3282 rdp->rcu_iw_pending = false; 3496 - rdp->rcu_iw_gp_seq = rnp->gp_seq - 1; 3283 + rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; 3497 3284 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); 3498 3285 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3499 3286 rcu_prepare_kthreads(cpu); ··· 3591 3378 rnp = rdp->mynode; 3592 3379 mask = rdp->grpmask; 3593 3380 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3594 - rnp->qsmaskinitnext |= mask; 3381 + WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); 3595 3382 oldmask = rnp->expmaskinitnext; 3596 3383 rnp->expmaskinitnext |= mask; 3597 3384 oldmask ^= rnp->expmaskinitnext; ··· 3644 3431 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); 3645 3432 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3646 3433 } 3647 - rnp->qsmaskinitnext &= ~mask; 3434 + WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); 3648 3435 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3649 3436 raw_spin_unlock(&rcu_state.ofl_lock); 3650 3437 ··· 3758 3545 } 3759 3546 rnp = rcu_get_root(); 3760 3547 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3761 - rcu_state.gp_kthread = t; 3548 + WRITE_ONCE(rcu_state.gp_activity, jiffies); 3549 + WRITE_ONCE(rcu_state.gp_req_activity, jiffies); 3550 + // Reset .gp_activity and .gp_req_activity before setting .gp_kthread. 3551 + smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */ 3762 3552 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3763 3553 wake_up_process(t); 3764 3554 rcu_spawn_nocb_kthreads(); ··· 3985 3769 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); 3986 3770 3987 3771 spin_lock_init(&krcp->lock); 3988 - for (i = 0; i < KFREE_N_BATCHES; i++) 3772 + for (i = 0; i < KFREE_N_BATCHES; i++) { 3773 + INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work); 3989 3774 krcp->krw_arr[i].krcp = krcp; 3775 + } 3776 + 3990 3777 INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor); 3991 3778 krcp->initialized = true; 3992 3779 } ··· 4028 3809 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); 4029 3810 WARN_ON(!rcu_par_gp_wq); 4030 3811 srcu_init(); 3812 + 3813 + /* Fill in default value for rcutree.qovld boot parameter. */ 3814 + /* -After- the rcu_node ->lock fields are initialized! */ 3815 + if (qovld < 0) 3816 + qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark; 3817 + else 3818 + qovld_calc = qovld; 4031 3819 } 4032 3820 4033 3821 #include "tree_stall.h"

+4

kernel/rcu/tree.h

··· 68 68 /* Online CPUs for next expedited GP. */ 69 69 /* Any CPU that has ever been online will */ 70 70 /* have its bit set. */ 71 + unsigned long cbovldmask; 72 + /* CPUs experiencing callback overload. */ 71 73 unsigned long ffmask; /* Fully functional CPUs. */ 72 74 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 73 75 /* Only one bit will be set in this mask. */ ··· 323 321 atomic_t expedited_need_qs; /* # CPUs left to check in. */ 324 322 struct swait_queue_head expedited_wq; /* Wait for check-ins. */ 325 323 int ncpus_snap; /* # CPUs seen last time. */ 324 + u8 cbovld; /* Callback overload now? */ 325 + u8 cbovldnext; /* ^ ^ next time? */ 326 326 327 327 unsigned long jiffies_force_qs; /* Time at which to invoke */ 328 328 /* force_quiescent_state(). */

+9 -4

kernel/rcu/tree_exp.h

··· 314 314 sync_exp_work_done(s)); 315 315 return true; 316 316 } 317 - rnp->exp_seq_rq = s; /* Followers can wait on us. */ 317 + WRITE_ONCE(rnp->exp_seq_rq, s); /* Followers can wait on us. */ 318 318 spin_unlock(&rnp->exp_lock); 319 319 trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level, 320 320 rnp->grplo, rnp->grphi, TPS("nxtlvl")); ··· 485 485 static void synchronize_rcu_expedited_wait(void) 486 486 { 487 487 int cpu; 488 + unsigned long j; 488 489 unsigned long jiffies_stall; 489 490 unsigned long jiffies_start; 490 491 unsigned long mask; ··· 497 496 trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait")); 498 497 jiffies_stall = rcu_jiffies_till_stall_check(); 499 498 jiffies_start = jiffies; 500 - if (IS_ENABLED(CONFIG_NO_HZ_FULL)) { 499 + if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) { 501 500 if (synchronize_rcu_expedited_wait_once(1)) 502 501 return; 503 502 rcu_for_each_leaf_node(rnp) { ··· 509 508 tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); 510 509 } 511 510 } 511 + j = READ_ONCE(jiffies_till_first_fqs); 512 + if (synchronize_rcu_expedited_wait_once(j + HZ)) 513 + return; 514 + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)); 512 515 } 513 516 514 517 for (;;) { 515 518 if (synchronize_rcu_expedited_wait_once(jiffies_stall)) 516 519 return; 517 - if (rcu_cpu_stall_suppress) 520 + if (rcu_stall_is_suppressed()) 518 521 continue; 519 522 panic_on_rcu_stall(); 520 523 pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", ··· 594 589 spin_lock(&rnp->exp_lock); 595 590 /* Recheck, avoid hang in case someone just arrived. */ 596 591 if (ULONG_CMP_LT(rnp->exp_seq_rq, s)) 597 - rnp->exp_seq_rq = s; 592 + WRITE_ONCE(rnp->exp_seq_rq, s); 598 593 spin_unlock(&rnp->exp_lock); 599 594 } 600 595 smp_mb(); /* All above changes before wakeup. */

+16 -9

kernel/rcu/tree_plugin.h

··· 56 56 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark); 57 57 if (qlowmark != DEFAULT_RCU_QLOMARK) 58 58 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark); 59 + if (qovld != DEFAULT_RCU_QOVLD) 60 + pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld); 59 61 if (jiffies_till_first_fqs != ULONG_MAX) 60 62 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs); 61 63 if (jiffies_till_next_fqs != ULONG_MAX) ··· 755 753 raw_lockdep_assert_held_rcu_node(rnp); 756 754 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", 757 755 __func__, rnp->grplo, rnp->grphi, rnp->level, 758 - (long)rnp->gp_seq, (long)rnp->completedqs); 756 + (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs); 759 757 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) 760 758 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", 761 759 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); ··· 1034 1032 1035 1033 trace_rcu_utilization(TPS("Start boost kthread@init")); 1036 1034 for (;;) { 1037 - rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1035 + WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING); 1038 1036 trace_rcu_utilization(TPS("End boost kthread@rcu_wait")); 1039 1037 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1040 1038 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait")); 1041 - rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1039 + WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING); 1042 1040 more2boost = rcu_boost(rnp); 1043 1041 if (more2boost) 1044 1042 spincnt++; 1045 1043 else 1046 1044 spincnt = 0; 1047 1045 if (spincnt > 10) { 1048 - rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; 1046 + WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING); 1049 1047 trace_rcu_utilization(TPS("End boost kthread@rcu_yield")); 1050 1048 schedule_timeout_interruptible(2); 1051 1049 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield")); ··· 1079 1077 (rnp->gp_tasks != NULL && 1080 1078 rnp->boost_tasks == NULL && 1081 1079 rnp->qsmask == 0 && 1082 - ULONG_CMP_GE(jiffies, rnp->boost_time))) { 1080 + (ULONG_CMP_GE(jiffies, rnp->boost_time) || rcu_state.cbovld))) { 1083 1081 if (rnp->exp_tasks == NULL) 1084 1082 rnp->boost_tasks = rnp->gp_tasks; 1085 1083 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1086 1084 rcu_wake_cond(rnp->boost_kthread_task, 1087 - rnp->boost_kthread_status); 1085 + READ_ONCE(rnp->boost_kthread_status)); 1088 1086 } else { 1089 1087 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1090 1088 } ··· 1488 1486 * flag the contention. 1489 1487 */ 1490 1488 static void rcu_nocb_bypass_lock(struct rcu_data *rdp) 1489 + __acquires(&rdp->nocb_bypass_lock) 1491 1490 { 1492 1491 lockdep_assert_irqs_disabled(); 1493 1492 if (raw_spin_trylock(&rdp->nocb_bypass_lock)) ··· 1532 1529 * Release the specified rcu_data structure's ->nocb_bypass_lock. 1533 1530 */ 1534 1531 static void rcu_nocb_bypass_unlock(struct rcu_data *rdp) 1532 + __releases(&rdp->nocb_bypass_lock) 1535 1533 { 1536 1534 lockdep_assert_irqs_disabled(); 1537 1535 raw_spin_unlock(&rdp->nocb_bypass_lock); ··· 1581 1577 static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) 1582 1578 { 1583 1579 lockdep_assert_irqs_disabled(); 1584 - if (rcu_segcblist_is_offloaded(&rdp->cblist) && 1585 - cpu_online(rdp->cpu)) 1580 + if (rcu_segcblist_is_offloaded(&rdp->cblist)) 1586 1581 lockdep_assert_held(&rdp->nocb_lock); 1587 1582 } 1588 1583 ··· 1933 1930 struct rcu_data *rdp; 1934 1931 struct rcu_node *rnp; 1935 1932 unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning. 1933 + bool wasempty = false; 1936 1934 1937 1935 /* 1938 1936 * Each pass through the following loop checks for CBs and for the ··· 1973 1969 rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) { 1974 1970 raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ 1975 1971 needwake_gp = rcu_advance_cbs(rnp, rdp); 1972 + wasempty = rcu_segcblist_restempty(&rdp->cblist, 1973 + RCU_NEXT_READY_TAIL); 1976 1974 raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */ 1977 1975 } 1978 1976 // Need to wait on some grace period? 1979 - WARN_ON_ONCE(!rcu_segcblist_restempty(&rdp->cblist, 1977 + WARN_ON_ONCE(wasempty && 1978 + !rcu_segcblist_restempty(&rdp->cblist, 1980 1979 RCU_NEXT_READY_TAIL)); 1981 1980 if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) { 1982 1981 if (!needwait_gp ||

+23 -18

kernel/rcu/tree_stall.h

··· 102 102 unsigned long j = jiffies; 103 103 unsigned long j1; 104 104 105 - rcu_state.gp_start = j; 105 + WRITE_ONCE(rcu_state.gp_start, j); 106 106 j1 = rcu_jiffies_till_stall_check(); 107 107 /* Record ->gp_start before ->jiffies_stall. */ 108 108 smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */ ··· 383 383 384 384 /* Kick and suppress, if so configured. */ 385 385 rcu_stall_kick_kthreads(); 386 - if (rcu_cpu_stall_suppress) 386 + if (rcu_stall_is_suppressed()) 387 387 return; 388 388 389 389 /* ··· 452 452 453 453 /* Kick and suppress, if so configured. */ 454 454 rcu_stall_kick_kthreads(); 455 - if (rcu_cpu_stall_suppress) 455 + if (rcu_stall_is_suppressed()) 456 456 return; 457 457 458 458 /* ··· 504 504 unsigned long js; 505 505 struct rcu_node *rnp; 506 506 507 - if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) || 507 + if ((rcu_stall_is_suppressed() && !rcu_kick_kthreads) || 508 508 !rcu_gp_in_progress()) 509 509 return; 510 510 rcu_stall_kick_kthreads(); ··· 578 578 unsigned long jw; 579 579 struct rcu_data *rdp; 580 580 struct rcu_node *rnp; 581 + struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); 581 582 582 583 j = jiffies; 583 584 ja = j - READ_ONCE(rcu_state.gp_activity); ··· 586 585 jw = j - READ_ONCE(rcu_state.gp_wake_time); 587 586 pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", 588 587 rcu_state.name, gp_state_getname(rcu_state.gp_state), 589 - rcu_state.gp_state, 590 - rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL, 588 + rcu_state.gp_state, t ? t->state : 0x1ffffL, 591 589 ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq), 592 590 (long)READ_ONCE(rcu_state.gp_seq), 593 591 (long)READ_ONCE(rcu_get_root()->gp_seq_needed), 594 592 READ_ONCE(rcu_state.gp_flags)); 595 593 rcu_for_each_node_breadth_first(rnp) { 596 - if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) 594 + if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), 595 + READ_ONCE(rnp->gp_seq_needed))) 597 596 continue; 598 597 pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n", 599 - rnp->grplo, rnp->grphi, (long)rnp->gp_seq, 600 - (long)rnp->gp_seq_needed); 598 + rnp->grplo, rnp->grphi, (long)READ_ONCE(rnp->gp_seq), 599 + (long)READ_ONCE(rnp->gp_seq_needed)); 601 600 if (!rcu_is_leaf_node(rnp)) 602 601 continue; 603 602 for_each_leaf_node_possible_cpu(rnp, cpu) { 604 603 rdp = per_cpu_ptr(&rcu_data, cpu); 605 - if (rdp->gpwrap || 606 - ULONG_CMP_GE(rcu_state.gp_seq, 607 - rdp->gp_seq_needed)) 604 + if (READ_ONCE(rdp->gpwrap) || 605 + ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), 606 + READ_ONCE(rdp->gp_seq_needed))) 608 607 continue; 609 608 pr_info("\tcpu %d ->gp_seq_needed %ld\n", 610 - cpu, (long)rdp->gp_seq_needed); 609 + cpu, (long)READ_ONCE(rdp->gp_seq_needed)); 611 610 } 612 611 } 613 612 for_each_possible_cpu(cpu) { ··· 632 631 static atomic_t warned = ATOMIC_INIT(0); 633 632 634 633 if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() || 635 - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) 634 + ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), 635 + READ_ONCE(rnp_root->gp_seq_needed)) || 636 + !smp_load_acquire(&rcu_state.gp_kthread)) // Get stable kthread. 636 637 return; 637 638 j = jiffies; /* Expensive access, and in common case don't get here. */ 638 639 if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || ··· 645 642 raw_spin_lock_irqsave_rcu_node(rnp, flags); 646 643 j = jiffies; 647 644 if (rcu_gp_in_progress() || 648 - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || 645 + ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), 646 + READ_ONCE(rnp_root->gp_seq_needed)) || 649 647 time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || 650 648 time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || 651 649 atomic_read(&warned)) { ··· 659 655 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ 660 656 j = jiffies; 661 657 if (rcu_gp_in_progress() || 662 - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || 663 - time_before(j, rcu_state.gp_req_activity + gpssdelay) || 664 - time_before(j, rcu_state.gp_activity + gpssdelay) || 658 + ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), 659 + READ_ONCE(rnp_root->gp_seq_needed)) || 660 + time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || 661 + time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || 665 662 atomic_xchg(&warned, 1)) { 666 663 if (rnp_root != rnp) 667 664 /* irqs remain disabled. */

+23 -5

kernel/rcu/update.c

··· 183 183 } 184 184 EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); 185 185 186 + static bool rcu_boot_ended __read_mostly; 187 + 186 188 /* 187 189 * Inform RCU of the end of the in-kernel boot sequence. 188 190 */ ··· 193 191 rcu_unexpedite_gp(); 194 192 if (rcu_normal_after_boot) 195 193 WRITE_ONCE(rcu_normal, 1); 194 + rcu_boot_ended = 1; 196 195 } 196 + 197 + /* 198 + * Let rcutorture know when it is OK to turn it up to eleven. 199 + */ 200 + bool rcu_inkernel_boot_has_ended(void) 201 + { 202 + return rcu_boot_ended; 203 + } 204 + EXPORT_SYMBOL_GPL(rcu_inkernel_boot_has_ended); 197 205 198 206 #endif /* #ifndef CONFIG_TINY_RCU */ 199 207 ··· 476 464 #ifdef CONFIG_RCU_STALL_COMMON 477 465 int rcu_cpu_stall_ftrace_dump __read_mostly; 478 466 module_param(rcu_cpu_stall_ftrace_dump, int, 0644); 479 - int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 467 + int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings. 480 468 EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); 481 469 module_param(rcu_cpu_stall_suppress, int, 0644); 482 470 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 483 471 module_param(rcu_cpu_stall_timeout, int, 0644); 484 472 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 473 + 474 + // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall 475 + // warnings. Also used by rcutorture even if stall warnings are excluded. 476 + int rcu_cpu_stall_suppress_at_boot __read_mostly; // !0 = suppress boot stalls. 477 + EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_at_boot); 478 + module_param(rcu_cpu_stall_suppress_at_boot, int, 0444); 485 479 486 480 #ifdef CONFIG_TASKS_RCU 487 481 ··· 546 528 rhp->func = func; 547 529 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); 548 530 needwake = !rcu_tasks_cbs_head; 549 - *rcu_tasks_cbs_tail = rhp; 531 + WRITE_ONCE(*rcu_tasks_cbs_tail, rhp); 550 532 rcu_tasks_cbs_tail = &rhp->next; 551 533 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); 552 534 /* We can't create the thread unless interrupts are enabled. */ ··· 676 658 /* If there were none, wait a bit and start over. */ 677 659 if (!list) { 678 660 wait_event_interruptible(rcu_tasks_cbs_wq, 679 - rcu_tasks_cbs_head); 661 + READ_ONCE(rcu_tasks_cbs_head)); 680 662 if (!rcu_tasks_cbs_head) { 681 663 WARN_ON(signal_pending(current)); 682 664 schedule_timeout_interruptible(HZ/10); ··· 819 801 core_initcall(rcu_spawn_tasks_kthread); 820 802 821 803 /* Do the srcu_read_lock() for the above synchronize_srcu(). */ 822 - void exit_tasks_rcu_start(void) 804 + void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) 823 805 { 824 806 preempt_disable(); 825 807 current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); ··· 827 809 } 828 810 829 811 /* Do the srcu_read_unlock() for the above synchronize_srcu(). */ 830 - void exit_tasks_rcu_finish(void) 812 + void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) 831 813 { 832 814 preempt_disable(); 833 815 __srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);

+4 -3

kernel/time/timer.c

··· 944 944 945 945 #define MOD_TIMER_PENDING_ONLY 0x01 946 946 #define MOD_TIMER_REDUCE 0x02 947 + #define MOD_TIMER_NOTPENDING 0x04 947 948 948 949 static inline int 949 950 __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) ··· 961 960 * the timer is re-modified to have the same timeout or ends up in the 962 961 * same array bucket then just return: 963 962 */ 964 - if (timer_pending(timer)) { 963 + if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) { 965 964 /* 966 965 * The downside of this optimization is that it can result in 967 966 * larger granularity than you would get from adding a new ··· 1134 1133 void add_timer(struct timer_list *timer) 1135 1134 { 1136 1135 BUG_ON(timer_pending(timer)); 1137 - mod_timer(timer, timer->expires); 1136 + __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING); 1138 1137 } 1139 1138 EXPORT_SYMBOL(add_timer); 1140 1139 ··· 1892 1891 1893 1892 timer.task = current; 1894 1893 timer_setup_on_stack(&timer.timer, process_timeout, 0); 1895 - __mod_timer(&timer.timer, expire, 0); 1894 + __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING); 1896 1895 schedule(); 1897 1896 del_singleshot_timer_sync(&timer.timer); 1898 1897

+25 -4

kernel/torture.c

··· 42 42 MODULE_LICENSE("GPL"); 43 43 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); 44 44 45 + static bool disable_onoff_at_boot; 46 + module_param(disable_onoff_at_boot, bool, 0444); 47 + 45 48 static char *torture_type; 46 49 static int verbose; 47 50 ··· 87 84 { 88 85 unsigned long delta; 89 86 int ret; 87 + char *s; 90 88 unsigned long starttime; 91 89 92 90 if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) ··· 103 99 (*n_offl_attempts)++; 104 100 ret = cpu_down(cpu); 105 101 if (ret) { 102 + s = ""; 103 + if (!rcu_inkernel_boot_has_ended() && ret == -EBUSY) { 104 + // PCI probe frequently disables hotplug during boot. 105 + (*n_offl_attempts)--; 106 + s = " (-EBUSY forgiven during boot)"; 107 + } 106 108 if (verbose) 107 109 pr_alert("%s" TORTURE_FLAG 108 - "torture_onoff task: offline %d failed: errno %d\n", 109 - torture_type, cpu, ret); 110 + "torture_onoff task: offline %d failed%s: errno %d\n", 111 + torture_type, cpu, s, ret); 110 112 } else { 111 113 if (verbose > 1) 112 114 pr_alert("%s" TORTURE_FLAG ··· 147 137 { 148 138 unsigned long delta; 149 139 int ret; 140 + char *s; 150 141 unsigned long starttime; 151 142 152 143 if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) ··· 161 150 (*n_onl_attempts)++; 162 151 ret = cpu_up(cpu); 163 152 if (ret) { 153 + s = ""; 154 + if (!rcu_inkernel_boot_has_ended() && ret == -EBUSY) { 155 + // PCI probe frequently disables hotplug during boot. 156 + (*n_onl_attempts)--; 157 + s = " (-EBUSY forgiven during boot)"; 158 + } 164 159 if (verbose) 165 160 pr_alert("%s" TORTURE_FLAG 166 - "torture_onoff task: online %d failed: errno %d\n", 167 - torture_type, cpu, ret); 161 + "torture_onoff task: online %d failed%s: errno %d\n", 162 + torture_type, cpu, s, ret); 168 163 } else { 169 164 if (verbose > 1) 170 165 pr_alert("%s" TORTURE_FLAG ··· 232 215 VERBOSE_TOROUT_STRING("torture_onoff end holdoff"); 233 216 } 234 217 while (!torture_must_stop()) { 218 + if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) { 219 + schedule_timeout_interruptible(HZ / 10); 220 + continue; 221 + } 235 222 cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); 236 223 if (!torture_offline(cpu, 237 224 &n_offline_attempts, &n_offline_successes,

+1 -1

tools/testing/selftests/rcutorture/bin/functions.sh

··· 12 12 # Returns 1 if the specified boot-parameter string tells rcutorture to 13 13 # test CPU-hotplug operations. 14 14 bootparam_hotplug_cpu () { 15 - echo "$1" | grep -q "rcutorture\.onoff_" 15 + echo "$1" | grep -q "torture\.onoff_" 16 16 } 17 17 18 18 # checkarg --argname argtype $# arg mustmatch cannotmatch

+2

tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh

··· 20 20 rundir="${1}" 21 21 if test -z "$rundir" -o ! -d "$rundir" 22 22 then 23 + echo Directory "$rundir" not found. 23 24 echo Usage: $0 directory 25 + exit 1 24 26 fi 25 27 editor=${EDITOR-vi} 26 28

+16 -1

tools/testing/selftests/rcutorture/bin/kvm-recheck.sh

··· 13 13 # 14 14 # Authors: Paul E. McKenney <paulmck@linux.ibm.com> 15 15 16 + T=/tmp/kvm-recheck.sh.$$ 17 + trap 'rm -f $T' 0 2 18 + 16 19 PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH 17 20 . functions.sh 18 21 for rd in "$@" ··· 71 68 fi 72 69 done 73 70 done 74 - EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1 71 + EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1 72 + ret=$? 73 + builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`" 74 + if test "$builderrors" -gt 0 75 + then 76 + echo $builderrors runs with build errors. 77 + fi 78 + runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`" 79 + if test "$runerrors" -gt 0 80 + then 81 + echo $runerrors runs with runtime errors. 82 + fi 83 + exit $ret

+1 -1

tools/testing/selftests/rcutorture/bin/kvm.sh

··· 39 39 resdir="" 40 40 configs="" 41 41 cpus=0 42 - ds=`date +%Y.%m.%d-%H:%M:%S` 42 + ds=`date +%Y.%m.%d-%H.%M.%S` 43 43 jitter="-1" 44 44 45 45 usage () {

+2

tools/testing/selftests/rcutorture/configs/rcu/CFcommon

··· 3 3 CONFIG_HYPERVISOR_GUEST=y 4 4 CONFIG_PARAVIRT=y 5 5 CONFIG_KVM_GUEST=y 6 + CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n 7 + CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n

+18

tools/testing/selftests/rcutorture/configs/rcu/TREE10

··· 1 + CONFIG_SMP=y 2 + CONFIG_NR_CPUS=100 3 + CONFIG_PREEMPT_NONE=y 4 + CONFIG_PREEMPT_VOLUNTARY=n 5 + CONFIG_PREEMPT=n 6 + #CHECK#CONFIG_TREE_RCU=y 7 + CONFIG_HZ_PERIODIC=n 8 + CONFIG_NO_HZ_IDLE=y 9 + CONFIG_NO_HZ_FULL=n 10 + CONFIG_RCU_FAST_NO_HZ=n 11 + CONFIG_RCU_TRACE=n 12 + CONFIG_RCU_NOCB_CPU=n 13 + CONFIG_DEBUG_LOCK_ALLOC=n 14 + CONFIG_PROVE_LOCKING=n 15 + #CHECK#CONFIG_PROVE_RCU=n 16 + CONFIG_DEBUG_OBJECTS=n 17 + CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 18 + CONFIG_RCU_EXPERT=n