sched: reintroduce SMP tunings again

Yanmin Zhang reported an aim7 regression and bisected it down to:

| commit 38ad464d410dadceda1563f36bdb0be7fe4c8938
| Author: Ingo Molnar <mingo@elte.hu>
| Date: Mon Oct 15 17:00:02 2007 +0200
|
| sched: uniform tunings
|
| use the same defaults on both UP and SMP.

fix this by reintroducing similar SMP tunings again. This resolves
the regression.

(also update the comments to match the ilog2(nr_cpus) tuning effect)

Signed-off-by: Ingo Molnar <mingo@elte.hu>

+37 -9
+28
kernel/sched.c
··· 4992 */ 4993 cpumask_t nohz_cpu_mask = CPU_MASK_NONE; 4994 4995 #ifdef CONFIG_SMP 4996 /* 4997 * This is how migration works: ··· 6714 /* Move init over to a non-isolated CPU */ 6715 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 6716 BUG(); 6717 } 6718 #else 6719 void __init sched_init_smp(void) 6720 { 6721 } 6722 #endif /* CONFIG_SMP */ 6723
··· 4992 */ 4993 cpumask_t nohz_cpu_mask = CPU_MASK_NONE; 4994 4995 + /* 4996 + * Increase the granularity value when there are more CPUs, 4997 + * because with more CPUs the 'effective latency' as visible 4998 + * to users decreases. But the relationship is not linear, 4999 + * so pick a second-best guess by going with the log2 of the 5000 + * number of CPUs. 5001 + * 5002 + * This idea comes from the SD scheduler of Con Kolivas: 5003 + */ 5004 + static inline void sched_init_granularity(void) 5005 + { 5006 + unsigned int factor = 1 + ilog2(num_online_cpus()); 5007 + const unsigned long limit = 200000000; 5008 + 5009 + sysctl_sched_min_granularity *= factor; 5010 + if (sysctl_sched_min_granularity > limit) 5011 + sysctl_sched_min_granularity = limit; 5012 + 5013 + sysctl_sched_latency *= factor; 5014 + if (sysctl_sched_latency > limit) 5015 + sysctl_sched_latency = limit; 5016 + 5017 + sysctl_sched_wakeup_granularity *= factor; 5018 + sysctl_sched_batch_wakeup_granularity *= factor; 5019 + } 5020 + 5021 #ifdef CONFIG_SMP 5022 /* 5023 * This is how migration works: ··· 6688 /* Move init over to a non-isolated CPU */ 6689 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 6690 BUG(); 6691 + sched_init_granularity(); 6692 } 6693 #else 6694 void __init sched_init_smp(void) 6695 { 6696 + sched_init_granularity(); 6697 } 6698 #endif /* CONFIG_SMP */ 6699
+9 -9
kernel/sched_fair.c
··· 22 23 /* 24 * Targeted preemption latency for CPU-bound tasks: 25 - * (default: 20ms, units: nanoseconds) 26 * 27 * NOTE: this latency value is not the same as the concept of 28 * 'timeslice length' - timeslices in CFS are of variable length ··· 32 * (to see the precise effective timeslice length of your workload, 33 * run vmstat and monitor the context-switches (cs) field) 34 */ 35 - const_debug unsigned int sysctl_sched_latency = 20000000ULL; 36 37 /* 38 * Minimal preemption granularity for CPU-bound tasks: 39 - * (default: 1 msec, units: nanoseconds) 40 */ 41 - const_debug unsigned int sysctl_sched_min_granularity = 1000000ULL; 42 43 /* 44 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity 45 */ 46 - const_debug unsigned int sched_nr_latency = 20; 47 48 /* 49 * After fork, child runs first. (default) If set to 0 then ··· 61 62 /* 63 * SCHED_BATCH wake-up granularity. 64 - * (default: 10 msec, units: nanoseconds) 65 * 66 * This option delays the preemption effects of decoupled workloads 67 * and reduces their over-scheduling. Synchronous workloads will still 68 * have immediate wakeup/sleep latencies. 69 */ 70 - const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; 71 72 /* 73 * SCHED_OTHER wake-up granularity. 74 - * (default: 10 msec, units: nanoseconds) 75 * 76 * This option delays the preemption effects of decoupled workloads 77 * and reduces their over-scheduling. Synchronous workloads will still 78 * have immediate wakeup/sleep latencies. 79 */ 80 - const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL; 81 82 const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 83
··· 22 23 /* 24 * Targeted preemption latency for CPU-bound tasks: 25 + * (default: 20ms * ilog(ncpus), units: nanoseconds) 26 * 27 * NOTE: this latency value is not the same as the concept of 28 * 'timeslice length' - timeslices in CFS are of variable length ··· 32 * (to see the precise effective timeslice length of your workload, 33 * run vmstat and monitor the context-switches (cs) field) 34 */ 35 + unsigned int sysctl_sched_latency = 20000000ULL; 36 37 /* 38 * Minimal preemption granularity for CPU-bound tasks: 39 + * (default: 1 msec * ilog(ncpus), units: nanoseconds) 40 */ 41 + unsigned int sysctl_sched_min_granularity = 1000000ULL; 42 43 /* 44 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity 45 */ 46 + unsigned int sched_nr_latency = 20; 47 48 /* 49 * After fork, child runs first. (default) If set to 0 then ··· 61 62 /* 63 * SCHED_BATCH wake-up granularity. 64 + * (default: 10 msec * ilog(ncpus), units: nanoseconds) 65 * 66 * This option delays the preemption effects of decoupled workloads 67 * and reduces their over-scheduling. Synchronous workloads will still 68 * have immediate wakeup/sleep latencies. 69 */ 70 + unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; 71 72 /* 73 * SCHED_OTHER wake-up granularity. 74 + * (default: 10 msec * ilog(ncpus), units: nanoseconds) 75 * 76 * This option delays the preemption effects of decoupled workloads 77 * and reduces their over-scheduling. Synchronous workloads will still 78 * have immediate wakeup/sleep latencies. 79 */ 80 + unsigned int sysctl_sched_wakeup_granularity = 10000000UL; 81 82 const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 83