Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: replace use of system_wq with system_percpu_wq

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.

This lack of consistentcy cannot be addressed without refactoring the API.

system_unbound_wq should be the default workqueue so as not to enforce
locality constraints for random work whenever it's not required.

Adding system_dfl_wq to encourage its use when unbound work should be used.

The old system_unbound_wq will be kept for a few release cycles.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Link: https://patch.msgid.link/20250918142427.309519-3-marco.crivellari@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Marco Crivellari and committed by
Jakub Kicinski
5fd8bb98 9870d350

+24 -24
+1 -1
drivers/net/ethernet/sfc/efx_channels.c
··· 1281 1281 time = jiffies - channel->rfs_last_expiry; 1282 1282 /* Would our quota be >= 20? */ 1283 1283 if (channel->rfs_filter_count * time >= 600 * HZ) 1284 - mod_delayed_work(system_wq, &channel->filter_work, 0); 1284 + mod_delayed_work(system_percpu_wq, &channel->filter_work, 0); 1285 1285 #endif 1286 1286 1287 1287 /* There is no race here; although napi_disable() will
+1 -1
drivers/net/ethernet/sfc/siena/efx_channels.c
··· 1300 1300 time = jiffies - channel->rfs_last_expiry; 1301 1301 /* Would our quota be >= 20? */ 1302 1302 if (channel->rfs_filter_count * time >= 600 * HZ) 1303 - mod_delayed_work(system_wq, &channel->filter_work, 0); 1303 + mod_delayed_work(system_percpu_wq, &channel->filter_work, 0); 1304 1304 #endif 1305 1305 1306 1306 /* There is no race here; although napi_disable() will
+6 -6
drivers/net/phy/sfp.c
··· 911 911 912 912 if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) && 913 913 !sfp->need_poll) 914 - mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); 914 + mod_delayed_work(system_percpu_wq, &sfp->poll, poll_jiffies); 915 915 mutex_unlock(&sfp->st_mutex); 916 916 } 917 917 ··· 1682 1682 err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag)); 1683 1683 if (err < 0) { 1684 1684 if (sfp->hwmon_tries--) { 1685 - mod_delayed_work(system_wq, &sfp->hwmon_probe, 1685 + mod_delayed_work(system_percpu_wq, &sfp->hwmon_probe, 1686 1686 T_PROBE_RETRY_SLOW); 1687 1687 } else { 1688 1688 dev_warn(sfp->dev, "hwmon probe failed: %pe\n", ··· 1709 1709 static int sfp_hwmon_insert(struct sfp *sfp) 1710 1710 { 1711 1711 if (sfp->have_a2 && sfp->id.ext.diagmon & SFP_DIAGMON_DDM) { 1712 - mod_delayed_work(system_wq, &sfp->hwmon_probe, 1); 1712 + mod_delayed_work(system_percpu_wq, &sfp->hwmon_probe, 1); 1713 1713 sfp->hwmon_tries = R_PROBE_RETRY_SLOW; 1714 1714 } 1715 1715 ··· 2563 2563 /* Force a poll to re-read the hardware signal state after 2564 2564 * sfp_sm_mod_probe() changed state_hw_mask. 2565 2565 */ 2566 - mod_delayed_work(system_wq, &sfp->poll, 1); 2566 + mod_delayed_work(system_percpu_wq, &sfp->poll, 1); 2567 2567 2568 2568 err = sfp_hwmon_insert(sfp); 2569 2569 if (err) ··· 3008 3008 // it's unimportant if we race while reading this. 3009 3009 if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) || 3010 3010 sfp->need_poll) 3011 - mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); 3011 + mod_delayed_work(system_percpu_wq, &sfp->poll, poll_jiffies); 3012 3012 } 3013 3013 3014 3014 static struct sfp *sfp_alloc(struct device *dev) ··· 3178 3178 } 3179 3179 3180 3180 if (sfp->need_poll) 3181 - mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); 3181 + mod_delayed_work(system_percpu_wq, &sfp->poll, poll_jiffies); 3182 3182 3183 3183 /* We could have an issue in cases no Tx disable pin is available or 3184 3184 * wired as modules using a laser as their light source will continue to
+3 -3
net/bridge/br_cfm.c
··· 134 134 * of the configured CC 'expected_interval' 135 135 * in order to detect CCM defect after 3.25 interval. 136 136 */ 137 - queue_delayed_work(system_wq, &peer_mep->ccm_rx_dwork, 137 + queue_delayed_work(system_percpu_wq, &peer_mep->ccm_rx_dwork, 138 138 usecs_to_jiffies(interval_us / 4)); 139 139 } 140 140 ··· 285 285 ccm_frame_tx(skb); 286 286 287 287 interval_us = interval_to_us(mep->cc_config.exp_interval); 288 - queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 288 + queue_delayed_work(system_percpu_wq, &mep->ccm_tx_dwork, 289 289 usecs_to_jiffies(interval_us)); 290 290 } 291 291 ··· 809 809 * to send first frame immediately 810 810 */ 811 811 mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); 812 - queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 0); 812 + queue_delayed_work(system_percpu_wq, &mep->ccm_tx_dwork, 0); 813 813 814 814 save: 815 815 mep->cc_ccm_tx_info = *tx_info;
+4 -4
net/bridge/br_mrp.c
··· 341 341 out: 342 342 rcu_read_unlock(); 343 343 344 - queue_delayed_work(system_wq, &mrp->test_work, 344 + queue_delayed_work(system_percpu_wq, &mrp->test_work, 345 345 usecs_to_jiffies(mrp->test_interval)); 346 346 } 347 347 ··· 418 418 out: 419 419 rcu_read_unlock(); 420 420 421 - queue_delayed_work(system_wq, &mrp->in_test_work, 421 + queue_delayed_work(system_percpu_wq, &mrp->in_test_work, 422 422 usecs_to_jiffies(mrp->in_test_interval)); 423 423 } 424 424 ··· 725 725 mrp->test_max_miss = test->max_miss; 726 726 mrp->test_monitor = test->monitor; 727 727 mrp->test_count_miss = 0; 728 - queue_delayed_work(system_wq, &mrp->test_work, 728 + queue_delayed_work(system_percpu_wq, &mrp->test_work, 729 729 usecs_to_jiffies(test->interval)); 730 730 731 731 return 0; ··· 865 865 mrp->in_test_end = jiffies + usecs_to_jiffies(in_test->period); 866 866 mrp->in_test_max_miss = in_test->max_miss; 867 867 mrp->in_test_count_miss = 0; 868 - queue_delayed_work(system_wq, &mrp->in_test_work, 868 + queue_delayed_work(system_percpu_wq, &mrp->in_test_work, 869 869 usecs_to_jiffies(in_test->interval)); 870 870 871 871 return 0;
+1 -1
net/ceph/mon_client.c
··· 314 314 delay = CEPH_MONC_PING_INTERVAL; 315 315 316 316 dout("__schedule_delayed after %lu\n", delay); 317 - mod_delayed_work(system_wq, &monc->delayed_work, 317 + mod_delayed_work(system_percpu_wq, &monc->delayed_work, 318 318 round_jiffies_relative(delay)); 319 319 } 320 320
+1 -1
net/core/skmsg.c
··· 876 876 sk_psock_stop(psock); 877 877 878 878 INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); 879 - queue_rcu_work(system_wq, &psock->rwork); 879 + queue_rcu_work(system_percpu_wq, &psock->rwork); 880 880 } 881 881 EXPORT_SYMBOL_GPL(sk_psock_drop); 882 882
+1 -1
net/devlink/core.c
··· 320 320 void devlink_put(struct devlink *devlink) 321 321 { 322 322 if (refcount_dec_and_test(&devlink->refcount)) 323 - queue_rcu_work(system_wq, &devlink->rwork); 323 + queue_rcu_work(system_percpu_wq, &devlink->rwork); 324 324 } 325 325 326 326 struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
+1 -1
net/ipv4/inet_fragment.c
··· 183 183 rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); 184 184 185 185 if (llist_add(&fqdir->free_list, &fqdir_free_list)) 186 - queue_delayed_work(system_wq, &fqdir_free_work, HZ); 186 + queue_delayed_work(system_percpu_wq, &fqdir_free_work, HZ); 187 187 } 188 188 189 189 int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
+1 -1
net/netfilter/nf_conntrack_ecache.c
··· 301 301 net->ct.ecache_dwork_pending = true; 302 302 } else if (state == NFCT_ECACHE_DESTROY_SENT) { 303 303 if (!hlist_nulls_empty(&cnet->ecache.dying_list)) 304 - mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); 304 + mod_delayed_work(system_percpu_wq, &cnet->ecache.dwork, 0); 305 305 else 306 306 net->ct.ecache_dwork_pending = false; 307 307 }
+1 -1
net/openvswitch/dp_notify.c
··· 75 75 76 76 /* schedule vport destroy, dev_put and genl notification */ 77 77 ovs_net = net_generic(dev_net(dev), ovs_net_id); 78 - queue_work(system_wq, &ovs_net->dp_notify_work); 78 + queue_work(system_percpu_wq, &ovs_net->dp_notify_work); 79 79 } 80 80 81 81 return NOTIFY_DONE;
+1 -1
net/rfkill/input.c
··· 159 159 rfkill_op_pending = true; 160 160 if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { 161 161 /* bypass the limiter for EPO */ 162 - mod_delayed_work(system_wq, &rfkill_op_work, 0); 162 + mod_delayed_work(system_percpu_wq, &rfkill_op_work, 0); 163 163 rfkill_last_scheduled = jiffies; 164 164 } else 165 165 rfkill_schedule_ratelimited();
+1 -1
net/smc/smc_core.c
··· 85 85 * otherwise there is a risk of out-of-sync link groups. 86 86 */ 87 87 if (!lgr->freeing) { 88 - mod_delayed_work(system_wq, &lgr->free_work, 88 + mod_delayed_work(system_percpu_wq, &lgr->free_work, 89 89 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 90 90 SMC_LGR_FREE_DELAY_CLNT : 91 91 SMC_LGR_FREE_DELAY_SERV);
+1 -1
net/vmw_vsock/af_vsock.c
··· 1649 1649 * reschedule it, then ungrab the socket refcount to 1650 1650 * keep it balanced. 1651 1651 */ 1652 - if (mod_delayed_work(system_wq, &vsk->connect_work, 1652 + if (mod_delayed_work(system_percpu_wq, &vsk->connect_work, 1653 1653 timeout)) 1654 1654 sock_put(sk); 1655 1655