Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue changes from Tejun Heo:
"Surprisingly, Lai and I didn't break too many things implementing
custom pools and stuff last time around and there aren't any follow-up
changes necessary at this point.

The only change in this pull request is Viresh's patches to make some
per-cpu workqueues to behave as unbound workqueues dependent on a boot
param whose default can be configured via a config option. This leads
to higher processing overhead / lower bandwidth as more work items are
bounced across CPUs; however, it can lead to noticeable powersave in
certain configurations - ~10% w/ idlish constant workload on a
big.LITTLE configuration according to Viresh.

This is because per-cpu workqueues interfere with how the scheduler
perceives whether or not each CPU is idle by forcing pinned tasks on
them, which makes the scheduler's power-aware scheduling decisions
less effective.

Its effectiveness is likely less pronounced on homogenous
configurations and this type of optimization can probably be made
automatic; however, the changes are pretty minimal and the affected
workqueues are clearly marked, so it's an easy gain for some
configurations for the time being with pretty unintrusive changes."

* 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
fbcon: queue work on power efficient wq
block: queue work on power efficient wq
PHYLIB: queue work on system_power_efficient_wq
workqueue: Add system wide power_efficient workqueues
workqueues: Introduce new flag WQ_POWER_EFFICIENT for power oriented workqueues

+113 -12
+15
Documentation/kernel-parameters.txt
··· 3341 3341 that this also can be controlled per-workqueue for 3342 3342 workqueues visible under /sys/bus/workqueue/. 3343 3343 3344 + workqueue.power_efficient 3345 + Per-cpu workqueues are generally preferred because 3346 + they show better performance thanks to cache 3347 + locality; unfortunately, per-cpu workqueues tend to 3348 + be more power hungry than unbound workqueues. 3349 + 3350 + Enabling this makes the per-cpu workqueues which 3351 + were observed to contribute significantly to power 3352 + consumption unbound, leading to measurably lower 3353 + power usage at the cost of small performance 3354 + overhead. 3355 + 3356 + The default value of this parameter is determined by 3357 + the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT. 3358 + 3344 3359 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of 3345 3360 default x2apic cluster mode on platforms 3346 3361 supporting x2apic.
+2 -1
block/blk-core.c
··· 3180 3180 3181 3181 /* used for unplugging and affects IO latency/throughput - HIGHPRI */ 3182 3182 kblockd_workqueue = alloc_workqueue("kblockd", 3183 - WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 3183 + WQ_MEM_RECLAIM | WQ_HIGHPRI | 3184 + WQ_POWER_EFFICIENT, 0); 3184 3185 if (!kblockd_workqueue) 3185 3186 panic("Failed to create kblockd\n"); 3186 3187
+2 -1
block/blk-ioc.c
··· 144 144 if (atomic_long_dec_and_test(&ioc->refcount)) { 145 145 spin_lock_irqsave(&ioc->lock, flags); 146 146 if (!hlist_empty(&ioc->icq_list)) 147 - schedule_work(&ioc->release_work); 147 + queue_work(system_power_efficient_wq, 148 + &ioc->release_work); 148 149 else 149 150 free_ioc = true; 150 151 spin_unlock_irqrestore(&ioc->lock, flags);
+8 -4
block/genhd.c
··· 1489 1489 intv = disk_events_poll_jiffies(disk); 1490 1490 set_timer_slack(&ev->dwork.timer, intv / 4); 1491 1491 if (check_now) 1492 - queue_delayed_work(system_freezable_wq, &ev->dwork, 0); 1492 + queue_delayed_work(system_freezable_power_efficient_wq, 1493 + &ev->dwork, 0); 1493 1494 else if (intv) 1494 - queue_delayed_work(system_freezable_wq, &ev->dwork, intv); 1495 + queue_delayed_work(system_freezable_power_efficient_wq, 1496 + &ev->dwork, intv); 1495 1497 out_unlock: 1496 1498 spin_unlock_irqrestore(&ev->lock, flags); 1497 1499 } ··· 1536 1534 spin_lock_irq(&ev->lock); 1537 1535 ev->clearing |= mask; 1538 1536 if (!ev->block) 1539 - mod_delayed_work(system_freezable_wq, &ev->dwork, 0); 1537 + mod_delayed_work(system_freezable_power_efficient_wq, 1538 + &ev->dwork, 0); 1540 1539 spin_unlock_irq(&ev->lock); 1541 1540 } 1542 1541 ··· 1630 1627 1631 1628 intv = disk_events_poll_jiffies(disk); 1632 1629 if (!ev->block && intv) 1633 - queue_delayed_work(system_freezable_wq, &ev->dwork, intv); 1630 + queue_delayed_work(system_freezable_power_efficient_wq, 1631 + &ev->dwork, intv); 1634 1632 1635 1633 spin_unlock_irq(&ev->lock); 1636 1634
+5 -4
drivers/net/phy/phy.c
··· 439 439 { 440 440 phydev->adjust_state = handler; 441 441 442 - schedule_delayed_work(&phydev->state_queue, HZ); 442 + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ); 443 443 } 444 444 445 445 /** ··· 500 500 disable_irq_nosync(irq); 501 501 atomic_inc(&phydev->irq_disable); 502 502 503 - schedule_work(&phydev->phy_queue); 503 + queue_work(system_power_efficient_wq, &phydev->phy_queue); 504 504 505 505 return IRQ_HANDLED; 506 506 } ··· 655 655 656 656 /* reschedule state queue work to run as soon as possible */ 657 657 cancel_delayed_work_sync(&phydev->state_queue); 658 - schedule_delayed_work(&phydev->state_queue, 0); 658 + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); 659 659 660 660 return; 661 661 ··· 918 918 if (err < 0) 919 919 phy_error(phydev); 920 920 921 - schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ); 921 + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 922 + PHY_STATE_TIME * HZ); 922 923 } 923 924 924 925 static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
+1 -1
drivers/video/console/fbcon.c
··· 404 404 struct fb_info *info = (struct fb_info *) dev_addr; 405 405 struct fbcon_ops *ops = info->fbcon_par; 406 406 407 - schedule_work(&info->queue); 407 + queue_work(system_power_efficient_wq, &info->queue); 408 408 mod_timer(&ops->cursor_timer, jiffies + HZ/5); 409 409 } 410 410
+35
include/linux/workqueue.h
··· 303 303 WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ 304 304 WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ 305 305 306 + /* 307 + * Per-cpu workqueues are generally preferred because they tend to 308 + * show better performance thanks to cache locality. Per-cpu 309 + * workqueues exclude the scheduler from choosing the CPU to 310 + * execute the worker threads, which has an unfortunate side effect 311 + * of increasing power consumption. 312 + * 313 + * The scheduler considers a CPU idle if it doesn't have any task 314 + * to execute and tries to keep idle cores idle to conserve power; 315 + * however, for example, a per-cpu work item scheduled from an 316 + * interrupt handler on an idle CPU will force the scheduler to 317 + * excute the work item on that CPU breaking the idleness, which in 318 + * turn may lead to more scheduling choices which are sub-optimal 319 + * in terms of power consumption. 320 + * 321 + * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default 322 + * but become unbound if workqueue.power_efficient kernel param is 323 + * specified. Per-cpu workqueues which are identified to 324 + * contribute significantly to power-consumption are identified and 325 + * marked with this flag and enabling the power_efficient mode 326 + * leads to noticeable power saving at the cost of small 327 + * performance disadvantage. 328 + * 329 + * http://thread.gmane.org/gmane.linux.kernel/1480396 330 + */ 331 + WQ_POWER_EFFICIENT = 1 << 7, 332 + 306 333 __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ 307 334 __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ 308 335 ··· 360 333 * 361 334 * system_freezable_wq is equivalent to system_wq except that it's 362 335 * freezable. 336 + * 337 + * *_power_efficient_wq are inclined towards saving power and converted 338 + * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, 339 + * they are same as their non-power-efficient counterparts - e.g. 340 + * system_power_efficient_wq is identical to system_wq if 341 + * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. 363 342 */ 364 343 extern struct workqueue_struct *system_wq; 365 344 extern struct workqueue_struct *system_long_wq; 366 345 extern struct workqueue_struct *system_unbound_wq; 367 346 extern struct workqueue_struct *system_freezable_wq; 347 + extern struct workqueue_struct *system_power_efficient_wq; 348 + extern struct workqueue_struct *system_freezable_power_efficient_wq; 368 349 369 350 static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) 370 351 {
+20
kernel/power/Kconfig
··· 262 262 bool 263 263 depends on PM 264 264 265 + config WQ_POWER_EFFICIENT_DEFAULT 266 + bool "Enable workqueue power-efficient mode by default" 267 + depends on PM 268 + default n 269 + help 270 + Per-cpu workqueues are generally preferred because they show 271 + better performance thanks to cache locality; unfortunately, 272 + per-cpu workqueues tend to be more power hungry than unbound 273 + workqueues. 274 + 275 + Enabling workqueue.power_efficient kernel parameter makes the 276 + per-cpu workqueues which were observed to contribute 277 + significantly to power consumption unbound, leading to measurably 278 + lower power usage at the cost of small performance overhead. 279 + 280 + This config option determines whether workqueue.power_efficient 281 + is enabled by default. 282 + 283 + If in doubt, say N. 284 + 265 285 config PM_GENERIC_DOMAINS_SLEEP 266 286 def_bool y 267 287 depends on PM_SLEEP && PM_GENERIC_DOMAINS
+25 -1
kernel/workqueue.c
··· 272 272 static bool wq_disable_numa; 273 273 module_param_named(disable_numa, wq_disable_numa, bool, 0444); 274 274 275 + /* see the comment above the definition of WQ_POWER_EFFICIENT */ 276 + #ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT 277 + static bool wq_power_efficient = true; 278 + #else 279 + static bool wq_power_efficient; 280 + #endif 281 + 282 + module_param_named(power_efficient, wq_power_efficient, bool, 0444); 283 + 275 284 static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ 276 285 277 286 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ ··· 314 305 EXPORT_SYMBOL_GPL(system_unbound_wq); 315 306 struct workqueue_struct *system_freezable_wq __read_mostly; 316 307 EXPORT_SYMBOL_GPL(system_freezable_wq); 308 + struct workqueue_struct *system_power_efficient_wq __read_mostly; 309 + EXPORT_SYMBOL_GPL(system_power_efficient_wq); 310 + struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; 311 + EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); 317 312 318 313 static int worker_thread(void *__worker); 319 314 static void copy_workqueue_attrs(struct workqueue_attrs *to, ··· 4099 4086 struct workqueue_struct *wq; 4100 4087 struct pool_workqueue *pwq; 4101 4088 4089 + /* see the comment above the definition of WQ_POWER_EFFICIENT */ 4090 + if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) 4091 + flags |= WQ_UNBOUND; 4092 + 4102 4093 /* allocate wq and format name */ 4103 4094 if (flags & WQ_UNBOUND) 4104 4095 tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); ··· 5002 4985 WQ_UNBOUND_MAX_ACTIVE); 5003 4986 system_freezable_wq = alloc_workqueue("events_freezable", 5004 4987 WQ_FREEZABLE, 0); 4988 + system_power_efficient_wq = alloc_workqueue("events_power_efficient", 4989 + WQ_POWER_EFFICIENT, 0); 4990 + system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient", 4991 + WQ_FREEZABLE | WQ_POWER_EFFICIENT, 4992 + 0); 5005 4993 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || 5006 - !system_unbound_wq || !system_freezable_wq); 4994 + !system_unbound_wq || !system_freezable_wq || 4995 + !system_power_efficient_wq || 4996 + !system_freezable_power_efficient_wq); 5007 4997 return 0; 5008 4998 } 5009 4999 early_initcall(init_workqueues);