Merge tag 'net-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+50 -2

Documentation/admin-guide/sysctl/net.rst

··· 40 40 bridge Bridging rose X.25 PLP layer 41 41 core General parameter tipc TIPC 42 42 ethernet Ethernet protocol unix Unix domain sockets 43 - ipv4 IP version 4 x25 X.25 protocol 44 - ipv6 IP version 6 43 + ipv4 IP version 4 vsock VSOCK sockets 44 + ipv6 IP version 6 x25 X.25 protocol 45 45 ========= =================== = ========== =================== 46 46 47 47 1. /proc/sys/net/core - Network core options ··· 551 551 If named_timeout is nonzero, failed topology updates will be placed on a defer 552 552 queue until another event arrives that clears the error, or until the timeout 553 553 expires. Value is in milliseconds. 554 + 555 + 6. /proc/sys/net/vsock - VSOCK sockets 556 + -------------------------------------- 557 + 558 + VSOCK sockets (AF_VSOCK) provide communication between virtual machines and 559 + their hosts. The behavior of VSOCK sockets in a network namespace is determined 560 + by the namespace's mode (``global`` or ``local``), which controls how CIDs 561 + (Context IDs) are allocated and how sockets interact across namespaces. 562 + 563 + ns_mode 564 + ------- 565 + 566 + Read-only. Reports the current namespace's mode, set at namespace creation 567 + and immutable thereafter. 568 + 569 + Values: 570 + 571 + - ``global`` - the namespace shares system-wide CID allocation and 572 + its sockets can reach any VM or socket in any global namespace. 573 + Sockets in this namespace cannot reach sockets in local 574 + namespaces. 575 + - ``local`` - the namespace has private CID allocation and its 576 + sockets can only connect to VMs or sockets within the same 577 + namespace. 578 + 579 + The init_net mode is always ``global``. 580 + 581 + child_ns_mode 582 + ------------- 583 + 584 + Controls what mode newly created child namespaces will inherit. At namespace 585 + creation, ``ns_mode`` is inherited from the parent's ``child_ns_mode``. The 586 + initial value matches the namespace's own ``ns_mode``. 587 + 588 + Values: 589 + 590 + - ``global`` - child namespaces will share system-wide CID allocation 591 + and their sockets will be able to reach any VM or socket in any 592 + global namespace. 593 + - ``local`` - child namespaces will have private CID allocation and 594 + their sockets will only be able to connect within their own 595 + namespace. 596 + 597 + Changing ``child_ns_mode`` only affects namespaces created after the change; 598 + it does not modify the current namespace or any existing children. 599 + 600 + A namespace with ``ns_mode`` set to ``local`` cannot change 601 + ``child_ns_mode`` to ``global`` (returns ``-EPERM``).

+4 -3

Documentation/networking/ip-sysctl.rst

··· 3234 3234 =========== 3235 3235 3236 3236 ratelimit - INTEGER 3237 - Limit the maximal rates for sending ICMPv6 messages. 3237 + Limit the maximal rates for sending ICMPv6 messages to a particular 3238 + peer. 3238 3239 3239 3240 0 to disable any limiting, 3240 - otherwise the minimal space between responses in milliseconds. 3241 + otherwise the space between responses in milliseconds. 3241 3242 3242 - Default: 1000 3243 + Default: 100 3243 3244 3244 3245 ratemask - list of comma separated ranges 3245 3246 For ICMPv6 message types matching the ranges in the ratemask, limit

+4

drivers/atm/fore200e.c

··· 373 373 fallthrough; 374 374 case FORE200E_STATE_IRQ: 375 375 free_irq(fore200e->irq, fore200e->atm_dev); 376 + #ifdef FORE200E_USE_TASKLET 377 + tasklet_kill(&fore200e->tx_tasklet); 378 + tasklet_kill(&fore200e->rx_tasklet); 379 + #endif 376 380 377 381 fallthrough; 378 382 case FORE200E_STATE_ALLOC_BUF:

+2

drivers/dpll/zl3073x/ref.h

··· 91 91 92 92 ref->freq_base = base; 93 93 ref->freq_mult = mult; 94 + ref->freq_ratio_m = 1; 95 + ref->freq_ratio_n = 1; 94 96 95 97 return 0; 96 98 }

+1

drivers/net/Kconfig

··· 333 333 334 334 config NETCONSOLE 335 335 tristate "Network console logging support" 336 + depends on PRINTK 336 337 help 337 338 If you want to log kernel messages over the network, enable this. 338 339 See <file:Documentation/networking/netconsole.rst> for details.

+15 -1

drivers/net/arcnet/com20020-pci.c

··· 115 115 .attrs = com20020_state_attrs, 116 116 }; 117 117 118 + static struct com20020_pci_card_info card_info_2p5mbit; 119 + 118 120 static void com20020pci_remove(struct pci_dev *pdev); 119 121 120 122 static int com20020pci_probe(struct pci_dev *pdev, ··· 142 140 143 141 ci = (struct com20020_pci_card_info *)id->driver_data; 144 142 if (!ci) 145 - return -EINVAL; 143 + ci = &card_info_2p5mbit; 146 144 147 145 priv->ci = ci; 148 146 mm = &ci->misc_map; ··· 347 345 }, 348 346 }, 349 347 .flags = ARC_IS_5MBIT, 348 + }; 349 + 350 + static struct com20020_pci_card_info card_info_2p5mbit = { 351 + .name = "ARC-PCI", 352 + .devcount = 1, 353 + .chan_map_tbl = { 354 + { 355 + .bar = 2, 356 + .offset = 0x00, 357 + .size = 0x08, 358 + }, 359 + }, 350 360 }; 351 361 352 362 static struct com20020_pci_card_info card_info_sohard = {

+5 -1

drivers/net/bonding/bond_main.c

··· 4343 4343 4344 4344 bond_work_cancel_all(bond); 4345 4345 bond->send_peer_notif = 0; 4346 + WRITE_ONCE(bond->recv_probe, NULL); 4347 + 4348 + /* Wait for any in-flight RX handlers */ 4349 + synchronize_net(); 4350 + 4346 4351 if (bond_is_lb(bond)) 4347 4352 bond_alb_deinitialize(bond); 4348 - bond->recv_probe = NULL; 4349 4353 4350 4354 if (BOND_MODE(bond) == BOND_MODE_8023AD && 4351 4355 bond->params.broadcast_neighbor)

-1

drivers/net/dsa/mxl862xx/Kconfig

··· 2 2 config NET_DSA_MXL862 3 3 tristate "MaxLinear MxL862xx" 4 4 depends on NET_DSA 5 - select MAXLINEAR_GPHY 6 5 select NET_DSA_TAG_MXL_862XX 7 6 help 8 7 This enables support for the MaxLinear MxL862xx switch family.

+1 -1

drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c

··· 442 442 struct hwrm_func_cfg_input *req; 443 443 u32 enables = 0; 444 444 445 - if (bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCFG)) 445 + if (bnge_hwrm_req_init(bd, req, HWRM_FUNC_CFG)) 446 446 return NULL; 447 447 448 448 req->fid = cpu_to_le16(0xffff);

-1

drivers/net/ethernet/broadcom/bnge/bnge_netdev.c

··· 16 16 #include <linux/etherdevice.h> 17 17 #include <linux/if.h> 18 18 #include <net/ip.h> 19 - #include <net/netdev_queues.h> 20 19 #include <linux/skbuff.h> 21 20 #include <net/page_pool/helpers.h> 22 21

+1 -1

drivers/net/ethernet/ec_bhf.c

··· 423 423 424 424 error_rx_free: 425 425 dma_free_coherent(dev, priv->rx_buf.alloc_len, priv->rx_buf.alloc, 426 - priv->rx_buf.alloc_len); 426 + priv->rx_buf.alloc_phys); 427 427 out: 428 428 return err; 429 429 }

+7 -1

drivers/net/ethernet/intel/i40e/i40e_main.c

··· 75 75 {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, 76 76 {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0}, 77 77 {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0}, 78 - {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0}, 78 + /* 79 + * This ID conflicts with ipw2200, but the devices can be differentiated 80 + * because i40e devices use PCI_CLASS_NETWORK_ETHERNET and ipw2200 81 + * devices use PCI_CLASS_NETWORK_OTHER. 82 + */ 83 + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_B), 84 + PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0}, 79 85 {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, 80 86 {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, 81 87 {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},

+22 -19

drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c

··· 1042 1042 rvu_write64(rvu, blkaddr, 1043 1043 NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action); 1044 1044 1045 - /* update the VF flow rule action with the VF default entry action */ 1046 - if (mcam_index < 0) 1047 - npc_update_vf_flow_entry(rvu, mcam, blkaddr, pcifunc, 1048 - *(u64 *)&action); 1049 - 1050 1045 /* update the action change in default rule */ 1051 1046 pfvf = rvu_get_pfvf(rvu, pcifunc); 1052 1047 if (pfvf->def_ucast_rule) 1053 1048 pfvf->def_ucast_rule->rx_action = action; 1054 1049 1055 - index = npc_get_nixlf_mcam_index(mcam, pcifunc, 1056 - nixlf, NIXLF_PROMISC_ENTRY); 1050 + if (mcam_index < 0) { 1051 + /* update the VF flow rule action with the VF default 1052 + * entry action 1053 + */ 1054 + npc_update_vf_flow_entry(rvu, mcam, blkaddr, pcifunc, 1055 + *(u64 *)&action); 1057 1056 1058 - /* If PF's promiscuous entry is enabled, 1059 - * Set RSS action for that entry as well 1060 - */ 1061 - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, 1062 - alg_idx); 1057 + index = npc_get_nixlf_mcam_index(mcam, pcifunc, 1058 + nixlf, NIXLF_PROMISC_ENTRY); 1063 1059 1064 - index = npc_get_nixlf_mcam_index(mcam, pcifunc, 1065 - nixlf, NIXLF_ALLMULTI_ENTRY); 1066 - /* If PF's allmulti entry is enabled, 1067 - * Set RSS action for that entry as well 1068 - */ 1069 - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, 1070 - alg_idx); 1060 + /* If PF's promiscuous entry is enabled, 1061 + * Set RSS action for that entry as well 1062 + */ 1063 + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, 1064 + blkaddr, alg_idx); 1065 + 1066 + index = npc_get_nixlf_mcam_index(mcam, pcifunc, 1067 + nixlf, NIXLF_ALLMULTI_ENTRY); 1068 + /* If PF's allmulti entry is enabled, 1069 + * Set RSS action for that entry as well 1070 + */ 1071 + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, 1072 + blkaddr, alg_idx); 1073 + } 1071 1074 } 1072 1075 1073 1076 void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc,

+3 -1

drivers/net/ethernet/mellanox/mlx5/core/en.h

··· 180 180 } 181 181 182 182 /* Use this function to get max num channels (rxqs/txqs) only to create netdev */ 183 - static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) 183 + static inline unsigned int 184 + mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) 184 185 { 185 186 return is_kdump_kernel() ? 186 187 MLX5E_MIN_NUM_CHANNELS : ··· 1104 1103 int mlx5e_close_locked(struct net_device *netdev); 1105 1104 1106 1105 void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c); 1106 + void mlx5e_trigger_napi_async_icosq(struct mlx5e_channel *c); 1107 1107 void mlx5e_trigger_napi_sched(struct napi_struct *napi); 1108 1108 1109 1109 int mlx5e_open_channels(struct mlx5e_priv *priv,

-14

drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

··· 457 457 { 458 458 struct mlx5e_ptpsq *ptpsq = 459 459 container_of(work, struct mlx5e_ptpsq, report_unhealthy_work); 460 - struct mlx5e_txqsq *sq = &ptpsq->txqsq; 461 - 462 - /* Recovering the PTP SQ means re-enabling NAPI, which requires the 463 - * netdev instance lock. However, SQ closing has to wait for this work 464 - * task to finish while also holding the same lock. So either get the 465 - * lock or find that the SQ is no longer enabled and thus this work is 466 - * not relevant anymore. 467 - */ 468 - while (!netdev_trylock(sq->netdev)) { 469 - if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) 470 - return; 471 - msleep(20); 472 - } 473 460 474 461 mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq); 475 - netdev_unlock(sq->netdev); 476 462 } 477 463 478 464 static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,

+13

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2019 Mellanox Technologies. 3 3 4 + #include <net/netdev_lock.h> 5 + 4 6 #include "health.h" 5 7 #include "params.h" 6 8 #include "txrx.h" ··· 179 177 rq = ctx; 180 178 priv = rq->priv; 181 179 180 + /* Acquire netdev instance lock to synchronize with channel close and 181 + * reopen flows. Either successfully obtain the lock, or detect that 182 + * channels are closing for another reason, making this work no longer 183 + * necessary. 184 + */ 185 + while (!netdev_trylock(rq->netdev)) { 186 + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) 187 + return 0; 188 + msleep(20); 189 + } 182 190 mutex_lock(&priv->state_lock); 183 191 184 192 eq = rq->cq.mcq.eq; ··· 198 186 clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); 199 187 200 188 mutex_unlock(&priv->state_lock); 189 + netdev_unlock(rq->netdev); 201 190 202 191 return err; 203 192 }

+48 -4

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 3 4 + #include <net/netdev_lock.h> 5 + 4 6 #include "health.h" 5 7 #include "en/ptp.h" 6 8 #include "en/devlink.h" ··· 81 79 if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 82 80 return 0; 83 81 82 + /* Recovering queues means re-enabling NAPI, which requires the netdev 83 + * instance lock. However, SQ closing flows have to wait for work tasks 84 + * to finish while also holding the netdev instance lock. So either get 85 + * the lock or find that the SQ is no longer enabled and thus this work 86 + * is not relevant anymore. 87 + */ 88 + while (!netdev_trylock(dev)) { 89 + if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) 90 + return 0; 91 + msleep(20); 92 + } 93 + 84 94 err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 85 95 if (err) { 86 96 netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", ··· 128 114 else 129 115 mlx5e_trigger_napi_sched(sq->cq.napi); 130 116 117 + netdev_unlock(dev); 131 118 return 0; 132 119 out: 133 120 clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 121 + netdev_unlock(dev); 134 122 return err; 135 123 } 136 124 ··· 153 137 sq = to_ctx->sq; 154 138 eq = sq->cq.mcq.eq; 155 139 priv = sq->priv; 140 + 141 + /* Recovering the TX queues implies re-enabling NAPI, which requires 142 + * the netdev instance lock. 143 + * However, channel closing flows have to wait for this work to finish 144 + * while holding the same lock. So either get the lock or find that 145 + * channels are being closed for other reason and this work is not 146 + * relevant anymore. 147 + */ 148 + while (!netdev_trylock(sq->netdev)) { 149 + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) 150 + return 0; 151 + msleep(20); 152 + } 153 + 156 154 err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 157 155 if (!err) { 158 156 to_ctx->status = 0; /* this sq recovered */ 159 - return err; 157 + goto out; 160 158 } 161 159 162 160 mutex_lock(&priv->state_lock); ··· 178 148 mutex_unlock(&priv->state_lock); 179 149 if (!err) { 180 150 to_ctx->status = 1; /* all channels recovered */ 181 - return err; 151 + goto out; 182 152 } 183 153 184 154 to_ctx->status = err; ··· 186 156 netdev_err(priv->netdev, 187 157 "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 188 158 err); 189 - 159 + out: 160 + netdev_unlock(sq->netdev); 190 161 return err; 191 162 } 192 163 ··· 204 173 return 0; 205 174 206 175 priv = ptpsq->txqsq.priv; 176 + netdev = priv->netdev; 177 + 178 + /* Recovering the PTP SQ means re-enabling NAPI, which requires the 179 + * netdev instance lock. However, SQ closing has to wait for this work 180 + * task to finish while also holding the same lock. So either get the 181 + * lock or find that the SQ is no longer enabled and thus this work is 182 + * not relevant anymore. 183 + */ 184 + while (!netdev_trylock(netdev)) { 185 + if (!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)) 186 + return 0; 187 + msleep(20); 188 + } 207 189 208 190 mutex_lock(&priv->state_lock); 209 191 chs = &priv->channels; 210 - netdev = priv->netdev; 211 192 212 193 carrier_ok = netif_carrier_ok(netdev); 213 194 netif_carrier_off(netdev); ··· 236 193 netif_carrier_on(netdev); 237 194 238 195 mutex_unlock(&priv->state_lock); 196 + netdev_unlock(netdev); 239 197 240 198 return err; 241 199 }

+3 -7

drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 2 // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 3 4 + #include <linux/iopoll.h> 4 5 #include <linux/math64.h> 5 6 #include "lib/aso.h" 6 7 #include "en/tc/post_act.h" ··· 116 115 struct mlx5e_flow_meters *flow_meters; 117 116 u8 cir_man, cir_exp, cbs_man, cbs_exp; 118 117 struct mlx5_aso_wqe *aso_wqe; 119 - unsigned long expires; 120 118 struct mlx5_aso *aso; 121 119 u64 rate, burst; 122 120 u8 ds_cnt; ··· 187 187 mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl); 188 188 189 189 /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */ 190 - expires = jiffies + msecs_to_jiffies(10); 191 - do { 192 - err = mlx5_aso_poll_cq(aso, true); 193 - if (err) 194 - usleep_range(2, 10); 195 - } while (err && time_is_after_jiffies(expires)); 190 + read_poll_timeout(mlx5_aso_poll_cq, err, !err, 10, 10 * USEC_PER_MSEC, 191 + false, aso, true); 196 192 mutex_unlock(&flow_meters->aso_lock); 197 193 198 194 return err;

+2 -2

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c

··· 127 127 goto err_remove_pool; 128 128 129 129 mlx5e_activate_xsk(c); 130 - mlx5e_trigger_napi_icosq(c); 130 + mlx5e_trigger_napi_async_icosq(c); 131 131 132 132 /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide 133 133 * any Fill Ring entries at the setup stage. ··· 179 179 c = priv->channels.c[ix]; 180 180 181 181 mlx5e_activate_rq(&c->rq); 182 - mlx5e_trigger_napi_icosq(c); 182 + mlx5e_trigger_napi_async_icosq(c); 183 183 mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT); 184 184 185 185 mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c

··· 34 34 &c->async_icosq->state)) 35 35 return 0; 36 36 37 - mlx5e_trigger_napi_icosq(c); 37 + mlx5e_trigger_napi_async_icosq(c); 38 38 } 39 39 40 40 return 0;

+5 -8

drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c

··· 5 5 #include <linux/mlx5/mlx5_ifc.h> 6 6 #include <linux/xarray.h> 7 7 #include <linux/if_vlan.h> 8 + #include <linux/iopoll.h> 8 9 9 10 #include "en.h" 10 11 #include "lib/aso.h" ··· 1386 1385 MLX5_ACCESS_ASO_OPC_MOD_MACSEC); 1387 1386 macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in); 1388 1387 mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); 1389 - err = mlx5_aso_poll_cq(maso, false); 1388 + read_poll_timeout(mlx5_aso_poll_cq, err, !err, 10, 10 * USEC_PER_MSEC, 1389 + false, maso, false); 1390 1390 mutex_unlock(&aso->aso_lock); 1391 1391 1392 1392 return err; ··· 1399 1397 struct mlx5e_macsec_aso *aso; 1400 1398 struct mlx5_aso_wqe *aso_wqe; 1401 1399 struct mlx5_aso *maso; 1402 - unsigned long expires; 1403 1400 int err; 1404 1401 1405 1402 aso = &macsec->aso; ··· 1412 1411 macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); 1413 1412 1414 1413 mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); 1415 - expires = jiffies + msecs_to_jiffies(10); 1416 - do { 1417 - err = mlx5_aso_poll_cq(maso, false); 1418 - if (err) 1419 - usleep_range(2, 10); 1420 - } while (err && time_is_after_jiffies(expires)); 1414 + read_poll_timeout(mlx5_aso_poll_cq, err, !err, 10, 10 * USEC_PER_MSEC, 1415 + false, maso, false); 1421 1416 1422 1417 if (err) 1423 1418 goto err_out;

+17 -47

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 631 631 struct mlx5e_rq, 632 632 rx_timeout_work); 633 633 634 - /* Acquire netdev instance lock to synchronize with channel close and 635 - * reopen flows. Either successfully obtain the lock, or detect that 636 - * channels are closing for another reason, making this work no longer 637 - * necessary. 638 - */ 639 - while (!netdev_trylock(rq->netdev)) { 640 - if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) 641 - return; 642 - msleep(20); 643 - } 644 - 645 634 mlx5e_reporter_rx_timeout(rq); 646 - netdev_unlock(rq->netdev); 647 635 } 648 636 649 637 static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) ··· 1940 1952 struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, 1941 1953 recover_work); 1942 1954 1943 - /* Recovering queues means re-enabling NAPI, which requires the netdev 1944 - * instance lock. However, SQ closing flows have to wait for work tasks 1945 - * to finish while also holding the netdev instance lock. So either get 1946 - * the lock or find that the SQ is no longer enabled and thus this work 1947 - * is not relevant anymore. 1948 - */ 1949 - while (!netdev_trylock(sq->netdev)) { 1950 - if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) 1951 - return; 1952 - msleep(20); 1953 - } 1954 - 1955 1955 mlx5e_reporter_tx_err_cqe(sq); 1956 - netdev_unlock(sq->netdev); 1957 1956 } 1958 1957 1959 1958 static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) ··· 2719 2744 2720 2745 void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) 2721 2746 { 2747 + struct mlx5e_icosq *sq = &c->icosq; 2722 2748 bool locked; 2723 2749 2724 - if (!test_and_set_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state)) 2725 - synchronize_net(); 2750 + set_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &sq->state); 2751 + synchronize_net(); 2726 2752 2727 - locked = mlx5e_icosq_sync_lock(&c->icosq); 2728 - mlx5e_trigger_irq(&c->icosq); 2729 - mlx5e_icosq_sync_unlock(&c->icosq, locked); 2753 + locked = mlx5e_icosq_sync_lock(sq); 2754 + mlx5e_trigger_irq(sq); 2755 + mlx5e_icosq_sync_unlock(sq, locked); 2730 2756 2731 - clear_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state); 2757 + clear_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &sq->state); 2758 + } 2759 + 2760 + void mlx5e_trigger_napi_async_icosq(struct mlx5e_channel *c) 2761 + { 2762 + struct mlx5e_icosq *sq = c->async_icosq; 2763 + 2764 + spin_lock_bh(&sq->lock); 2765 + mlx5e_trigger_irq(sq); 2766 + spin_unlock_bh(&sq->lock); 2732 2767 } 2733 2768 2734 2769 void mlx5e_trigger_napi_sched(struct napi_struct *napi) ··· 2821 2836 netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); 2822 2837 netif_napi_set_irq_locked(&c->napi, irq); 2823 2838 2824 - async_icosq_needed = !!xsk_pool || priv->ktls_rx_was_enabled; 2839 + async_icosq_needed = !!params->xdp_prog || priv->ktls_rx_was_enabled; 2825 2840 err = mlx5e_open_queues(c, params, cparam, async_icosq_needed); 2826 2841 if (unlikely(err)) 2827 2842 goto err_napi_del; ··· 5090 5105 struct net_device *netdev = priv->netdev; 5091 5106 int i; 5092 5107 5093 - /* Recovering the TX queues implies re-enabling NAPI, which requires 5094 - * the netdev instance lock. 5095 - * However, channel closing flows have to wait for this work to finish 5096 - * while holding the same lock. So either get the lock or find that 5097 - * channels are being closed for other reason and this work is not 5098 - * relevant anymore. 5099 - */ 5100 - while (!netdev_trylock(netdev)) { 5101 - if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) 5102 - return; 5103 - msleep(20); 5104 - } 5105 - 5106 5108 for (i = 0; i < netdev->real_num_tx_queues; i++) { 5107 5109 struct netdev_queue *dev_queue = 5108 5110 netdev_get_tx_queue(netdev, i); ··· 5102 5130 /* break if tried to reopened channels */ 5103 5131 break; 5104 5132 } 5105 - 5106 - netdev_unlock(netdev); 5107 5133 } 5108 5134 5109 5135 static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)

+5 -9

drivers/net/ethernet/mellanox/mlx5/core/wc.c

··· 2 2 // Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 3 4 4 #include <linux/io.h> 5 + #include <linux/iopoll.h> 5 6 #include <linux/mlx5/transobj.h> 6 7 #include "lib/clock.h" 7 8 #include "mlx5_core.h" ··· 16 15 #define TEST_WC_NUM_WQES 255 17 16 #define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES)) 18 17 #define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ 19 - #define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) 18 + #define TEST_WC_POLLING_MAX_TIME_USEC (100 * USEC_PER_MSEC) 20 19 21 20 struct mlx5_wc_cq { 22 21 /* data path - accessed per cqe */ ··· 360 359 static void mlx5_core_test_wc(struct mlx5_core_dev *mdev) 361 360 { 362 361 unsigned int offset = 0; 363 - unsigned long expires; 364 362 struct mlx5_wc_sq *sq; 365 363 int i, err; 366 364 ··· 389 389 390 390 mlx5_wc_post_nop(sq, &offset, true); 391 391 392 - expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; 393 - do { 394 - err = mlx5_wc_poll_cq(sq); 395 - if (err) 396 - usleep_range(2, 10); 397 - } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED && 398 - time_is_after_jiffies(expires)); 392 + poll_timeout_us(mlx5_wc_poll_cq(sq), 393 + mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED, 10, 394 + TEST_WC_POLLING_MAX_TIME_USEC, false); 399 395 400 396 mlx5_wc_destroy_sq(sq); 401 397

+3

drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c

··· 1145 1145 return -EINVAL; 1146 1146 } 1147 1147 1148 + dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT, 1149 + FBNIC_RCD_HDR_AL_DMA_HINT_L4); 1150 + 1148 1151 /* Write action table values */ 1149 1152 act_tcam->dest = dest; 1150 1153 act_tcam->rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, hash_idx);

-3

drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c

··· 51 51 log->data_start = data; 52 52 log->data_end = data + FBNIC_FW_LOG_SIZE; 53 53 54 - fbnic_fw_log_enable(fbd, true); 55 - 56 54 return 0; 57 55 } 58 56 ··· 61 63 if (!fbnic_fw_log_ready(fbd)) 62 64 return; 63 65 64 - fbnic_fw_log_disable(fbd); 65 66 INIT_LIST_HEAD(&log->entries); 66 67 log->size = 0; 67 68 vfree(log->data_start);

+20

drivers/net/ethernet/meta/fbnic/fbnic_netdev.c

··· 262 262 return 0; 263 263 } 264 264 265 + static int fbnic_change_mtu(struct net_device *dev, int new_mtu) 266 + { 267 + struct fbnic_net *fbn = netdev_priv(dev); 268 + 269 + if (fbnic_check_split_frames(fbn->xdp_prog, new_mtu, fbn->hds_thresh)) { 270 + dev_err(&dev->dev, 271 + "MTU %d is larger than HDS threshold %d in XDP mode\n", 272 + new_mtu, fbn->hds_thresh); 273 + 274 + return -EINVAL; 275 + } 276 + 277 + WRITE_ONCE(dev->mtu, new_mtu); 278 + 279 + return 0; 280 + } 281 + 265 282 void fbnic_clear_rx_mode(struct fbnic_dev *fbd) 266 283 { 267 284 struct net_device *netdev = fbd->netdev; ··· 550 533 .ndo_start_xmit = fbnic_xmit_frame, 551 534 .ndo_features_check = fbnic_features_check, 552 535 .ndo_set_mac_address = fbnic_set_mac, 536 + .ndo_change_mtu = fbnic_change_mtu, 553 537 .ndo_set_rx_mode = fbnic_set_rx_mode, 554 538 .ndo_get_stats64 = fbnic_get_stats64, 555 539 .ndo_bpf = fbnic_bpf, ··· 804 786 netdev->vlan_features |= netdev->features; 805 787 netdev->hw_enc_features |= netdev->features; 806 788 netdev->features |= NETIF_F_NTUPLE; 789 + 790 + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG; 807 791 808 792 netdev->min_mtu = IPV6_MIN_MTU; 809 793 netdev->max_mtu = FBNIC_MAX_JUMBO_FRAME_SIZE - ETH_HLEN;

+12 -7

drivers/net/ethernet/meta/fbnic/fbnic_pci.c

··· 311 311 goto free_irqs; 312 312 } 313 313 314 + err = fbnic_fw_log_init(fbd); 315 + if (err) 316 + dev_warn(fbd->dev, 317 + "Unable to initialize firmware log buffer: %d\n", 318 + err); 319 + 314 320 err = fbnic_fw_request_mbx(fbd); 315 321 if (err) { 316 322 dev_err(&pdev->dev, 317 323 "Firmware mailbox initialization failure\n"); 318 - goto free_irqs; 324 + goto free_fw_log; 319 325 } 320 326 321 327 /* Send the request to enable the FW logging to host. Note if this ··· 329 323 * possible the FW is just too old to support the logging and needs 330 324 * to be updated. 331 325 */ 332 - err = fbnic_fw_log_init(fbd); 333 - if (err) 334 - dev_warn(fbd->dev, 335 - "Unable to initialize firmware log buffer: %d\n", 336 - err); 326 + fbnic_fw_log_enable(fbd, true); 337 327 338 328 fbnic_devlink_register(fbd); 339 329 fbnic_devlink_otp_check(fbd, "error detected during probe"); ··· 376 374 * firmware updates for fixes. 377 375 */ 378 376 return 0; 377 + free_fw_log: 378 + fbnic_fw_log_free(fbd); 379 379 free_irqs: 380 380 fbnic_free_irqs(fbd); 381 381 err_destroy_health: ··· 412 408 fbnic_hwmon_unregister(fbd); 413 409 fbnic_dbg_fbd_exit(fbd); 414 410 fbnic_devlink_unregister(fbd); 415 - fbnic_fw_log_free(fbd); 411 + fbnic_fw_log_disable(fbd); 416 412 fbnic_fw_free_mbx(fbd); 413 + fbnic_fw_log_free(fbd); 417 414 fbnic_free_irqs(fbd); 418 415 419 416 fbnic_devlink_health_destroy(fbd);

+2 -3

drivers/net/ethernet/meta/fbnic/fbnic_rpc.c

··· 338 338 else if (tstamp_mask & (1u << flow_type)) 339 339 dest |= FBNIC_RPC_ACT_TBL0_TS_ENA; 340 340 341 - if (act1_value[flow_type] & FBNIC_RPC_TCAM_ACT1_L4_VALID) 342 - dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT, 343 - FBNIC_RCD_HDR_AL_DMA_HINT_L4); 341 + dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT, 342 + FBNIC_RCD_HDR_AL_DMA_HINT_L4); 344 343 345 344 rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, flow_type); 346 345

+14 -11

drivers/net/ethernet/meta/fbnic/fbnic_txrx.c

··· 2591 2591 } 2592 2592 2593 2593 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, 2594 - struct fbnic_ring *rcq, bool tx_pause) 2594 + struct fbnic_ring *rcq, bool tx_pause, 2595 + bool hdr_split) 2595 2596 { 2596 2597 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2597 2598 u32 drop_mode, rcq_ctl; ··· 2605 2604 /* Specify packet layout */ 2606 2605 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) | 2607 2606 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) | 2608 - FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM); 2607 + FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM) | 2608 + FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT, hdr_split); 2609 2609 2610 2610 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl); 2611 2611 } 2612 2612 2613 - void fbnic_config_drop_mode(struct fbnic_net *fbn, bool tx_pause) 2613 + void fbnic_config_drop_mode(struct fbnic_net *fbn, bool txp) 2614 2614 { 2615 + bool hds; 2615 2616 int i, t; 2617 + 2618 + hds = fbn->hds_thresh < FBNIC_HDR_BYTES_MIN; 2616 2619 2617 2620 for (i = 0; i < fbn->num_napi; i++) { 2618 2621 struct fbnic_napi_vector *nv = fbn->napi[i]; ··· 2624 2619 for (t = 0; t < nv->rxt_count; t++) { 2625 2620 struct fbnic_q_triad *qt = &nv->qt[nv->txt_count + t]; 2626 2621 2627 - fbnic_config_drop_mode_rcq(nv, &qt->cmpl, tx_pause); 2622 + fbnic_config_drop_mode_rcq(nv, &qt->cmpl, txp, hds); 2628 2623 } 2629 2624 } 2630 2625 } ··· 2675 2670 { 2676 2671 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2677 2672 u32 log_size = fls(rcq->size_mask); 2678 - u32 hds_thresh = fbn->hds_thresh; 2679 2673 u32 rcq_ctl = 0; 2680 - 2681 - fbnic_config_drop_mode_rcq(nv, rcq, fbn->tx_pause); 2674 + bool hdr_split; 2675 + u32 hds_thresh; 2682 2676 2683 2677 /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should 2684 2678 * be split at L4. It would also result in the frames being split at 2685 2679 * L2/L3 depending on the frame size. 2686 2680 */ 2687 - if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { 2688 - rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; 2689 - hds_thresh = FBNIC_HDR_BYTES_MIN; 2690 - } 2681 + hdr_split = fbn->hds_thresh < FBNIC_HDR_BYTES_MIN; 2682 + fbnic_config_drop_mode_rcq(nv, rcq, fbn->tx_pause, hdr_split); 2691 2683 2684 + hds_thresh = max(fbn->hds_thresh, FBNIC_HDR_BYTES_MIN); 2692 2685 rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | 2693 2686 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | 2694 2687 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,

+1 -1

drivers/net/ethernet/meta/fbnic/fbnic_txrx.h

··· 66 66 (4096 - FBNIC_RX_HROOM - FBNIC_RX_TROOM - FBNIC_RX_PAD) 67 67 #define FBNIC_HDS_THRESH_DEFAULT \ 68 68 (1536 - FBNIC_RX_PAD) 69 - #define FBNIC_HDR_BYTES_MIN 128 69 + #define FBNIC_HDR_BYTES_MIN 256 70 70 71 71 struct fbnic_pkt_buff { 72 72 struct xdp_buff buff;

+1 -1

drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c

··· 576 576 static struct ptp_clock_info sparx5_ptp_clock_info = { 577 577 .owner = THIS_MODULE, 578 578 .name = "sparx5 ptp", 579 - .max_adj = 200000, 579 + .max_adj = 10000000, 580 580 .gettime64 = sparx5_ptp_gettime64, 581 581 .settime64 = sparx5_ptp_settime64, 582 582 .adjtime = sparx5_ptp_adjtime,

+1 -1

drivers/net/ethernet/microchip/sparx5/sparx5_qos.h

··· 35 35 #define SPX5_SE_BURST_UNIT 4096 36 36 37 37 /* Dwrr */ 38 - #define SPX5_DWRR_COST_MAX 63 38 + #define SPX5_DWRR_COST_MAX 31 39 39 40 40 struct sparx5_shaper { 41 41 u32 mode;

+63 -26

drivers/net/ethernet/mscc/ocelot_net.c

··· 551 551 return 0; 552 552 } 553 553 554 - static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) 554 + static bool ocelot_xmit_timestamp(struct ocelot *ocelot, int port, 555 + struct sk_buff *skb, u32 *rew_op) 556 + { 557 + if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 558 + struct sk_buff *clone = NULL; 559 + 560 + if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) { 561 + kfree_skb(skb); 562 + return false; 563 + } 564 + 565 + if (clone) 566 + OCELOT_SKB_CB(skb)->clone = clone; 567 + 568 + *rew_op = ocelot_ptp_rew_op(skb); 569 + } 570 + 571 + return true; 572 + } 573 + 574 + static netdev_tx_t ocelot_port_xmit_fdma(struct sk_buff *skb, 575 + struct net_device *dev) 555 576 { 556 577 struct ocelot_port_private *priv = netdev_priv(dev); 557 578 struct ocelot_port *ocelot_port = &priv->port; ··· 580 559 int port = priv->port.index; 581 560 u32 rew_op = 0; 582 561 583 - if (!static_branch_unlikely(&ocelot_fdma_enabled) && 584 - !ocelot_can_inject(ocelot, 0)) 585 - return NETDEV_TX_BUSY; 562 + if (!ocelot_xmit_timestamp(ocelot, port, skb, &rew_op)) 563 + return NETDEV_TX_OK; 586 564 587 - /* Check if timestamping is needed */ 588 - if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 589 - struct sk_buff *clone = NULL; 590 - 591 - if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) { 592 - kfree_skb(skb); 593 - return NETDEV_TX_OK; 594 - } 595 - 596 - if (clone) 597 - OCELOT_SKB_CB(skb)->clone = clone; 598 - 599 - rew_op = ocelot_ptp_rew_op(skb); 600 - } 601 - 602 - if (static_branch_unlikely(&ocelot_fdma_enabled)) { 603 - ocelot_fdma_inject_frame(ocelot, port, rew_op, skb, dev); 604 - } else { 605 - ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); 606 - 607 - consume_skb(skb); 608 - } 565 + ocelot_fdma_inject_frame(ocelot, port, rew_op, skb, dev); 609 566 610 567 return NETDEV_TX_OK; 568 + } 569 + 570 + static netdev_tx_t ocelot_port_xmit_inj(struct sk_buff *skb, 571 + struct net_device *dev) 572 + { 573 + struct ocelot_port_private *priv = netdev_priv(dev); 574 + struct ocelot_port *ocelot_port = &priv->port; 575 + struct ocelot *ocelot = ocelot_port->ocelot; 576 + int port = priv->port.index; 577 + u32 rew_op = 0; 578 + 579 + ocelot_lock_inj_grp(ocelot, 0); 580 + 581 + if (!ocelot_can_inject(ocelot, 0)) { 582 + ocelot_unlock_inj_grp(ocelot, 0); 583 + return NETDEV_TX_BUSY; 584 + } 585 + 586 + if (!ocelot_xmit_timestamp(ocelot, port, skb, &rew_op)) { 587 + ocelot_unlock_inj_grp(ocelot, 0); 588 + return NETDEV_TX_OK; 589 + } 590 + 591 + ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); 592 + 593 + ocelot_unlock_inj_grp(ocelot, 0); 594 + 595 + consume_skb(skb); 596 + 597 + return NETDEV_TX_OK; 598 + } 599 + 600 + static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) 601 + { 602 + if (static_branch_unlikely(&ocelot_fdma_enabled)) 603 + return ocelot_port_xmit_fdma(skb, dev); 604 + 605 + return ocelot_port_xmit_inj(skb, dev); 611 606 } 612 607 613 608 enum ocelot_action_type {

+6 -6

drivers/net/ethernet/myricom/myri10ge/myri10ge.c

··· 688 688 689 689 /* probe for IPv6 TSO support */ 690 690 mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; 691 - cmd.data0 = 0, 692 - cmd.data1 = 0, 693 - cmd.data2 = 0, 691 + cmd.data0 = 0; 692 + cmd.data1 = 0; 693 + cmd.data2 = 0; 694 694 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, 695 695 &cmd, 0); 696 696 if (status == 0) { ··· 821 821 int status, ctl; 822 822 823 823 ctl = pause ? MXGEFW_ENABLE_FLOW_CONTROL : MXGEFW_DISABLE_FLOW_CONTROL; 824 - cmd.data0 = 0, 825 - cmd.data1 = 0, 826 - cmd.data2 = 0, 824 + cmd.data0 = 0; 825 + cmd.data1 = 0; 826 + cmd.data2 = 0; 827 827 status = myri10ge_send_cmd(mgp, ctl, &cmd, 0); 828 828 829 829 if (status) {

+17 -3

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

··· 5040 5040 if (!priv->sph_active) 5041 5041 return 0; 5042 5042 5043 - /* Not last descriptor */ 5044 - if (status & rx_not_ls) 5043 + /* For GMAC4, when split header is enabled, in some rare cases, the 5044 + * hardware does not fill buf2 of the first descriptor with payload. 5045 + * Thus we cannot assume buf2 is always fully filled if it is not 5046 + * the last descriptor. Otherwise, the length of buf2 of the second 5047 + * descriptor will be calculated wrong and cause an oops. 5048 + * 5049 + * If this is the last descriptor, 'plen' is the length of the 5050 + * received packet that was transferred to system memory. 5051 + * Otherwise, it is the accumulated number of bytes that have been 5052 + * transferred for the current packet. 5053 + * 5054 + * Thus 'plen - len' always gives the correct length of buf2. 5055 + */ 5056 + 5057 + /* Not GMAC4 and not last descriptor */ 5058 + if (priv->plat->core_type != DWMAC_CORE_GMAC4 && (status & rx_not_ls)) 5045 5059 return priv->dma_conf.dma_buf_sz; 5046 5060 5061 + /* GMAC4 or last descriptor */ 5047 5062 plen = stmmac_get_rx_frame_len(priv, p, coe); 5048 5063 5049 - /* Last descriptor */ 5050 5064 return plen - len; 5051 5065 } 5052 5066

+5

drivers/net/macvlan.c

··· 1572 1572 if (create) 1573 1573 macvlan_port_destroy(port->dev); 1574 1574 } 1575 + /* @dev might have been made visible before an error was detected. 1576 + * Make sure to observe an RCU grace period before our caller 1577 + * (rtnl_newlink()) frees it. 1578 + */ 1579 + synchronize_net(); 1575 1580 return err; 1576 1581 } 1577 1582 EXPORT_SYMBOL_GPL(macvlan_common_newlink);

+35 -22

drivers/net/ovpn/io.c

··· 355 355 struct ovpn_priv *ovpn = netdev_priv(dev); 356 356 struct sk_buff *segments, *curr, *next; 357 357 struct sk_buff_head skb_list; 358 + unsigned int tx_bytes = 0; 358 359 struct ovpn_peer *peer; 359 360 __be16 proto; 360 361 int ret; ··· 366 365 /* verify IP header size in network packet */ 367 366 proto = ovpn_ip_check_protocol(skb); 368 367 if (unlikely(!proto || skb->protocol != proto)) 369 - goto drop; 368 + goto drop_no_peer; 369 + 370 + /* retrieve peer serving the destination IP of this packet */ 371 + peer = ovpn_peer_get_by_dst(ovpn, skb); 372 + if (unlikely(!peer)) { 373 + switch (skb->protocol) { 374 + case htons(ETH_P_IP): 375 + net_dbg_ratelimited("%s: no peer to send data to dst=%pI4\n", 376 + netdev_name(ovpn->dev), 377 + &ip_hdr(skb)->daddr); 378 + break; 379 + case htons(ETH_P_IPV6): 380 + net_dbg_ratelimited("%s: no peer to send data to dst=%pI6c\n", 381 + netdev_name(ovpn->dev), 382 + &ipv6_hdr(skb)->daddr); 383 + break; 384 + } 385 + goto drop_no_peer; 386 + } 387 + /* dst was needed for peer selection - it can now be dropped */ 388 + skb_dst_drop(skb); 370 389 371 390 if (skb_is_gso(skb)) { 372 391 segments = skb_gso_segment(skb, 0); ··· 415 394 continue; 416 395 } 417 396 397 + /* only count what we actually send */ 398 + tx_bytes += curr->len; 418 399 __skb_queue_tail(&skb_list, curr); 400 + } 401 + 402 + /* no segments survived: don't jump to 'drop' because we already 403 + * incremented the counter for each failure in the loop 404 + */ 405 + if (unlikely(skb_queue_empty(&skb_list))) { 406 + ovpn_peer_put(peer); 407 + return NETDEV_TX_OK; 419 408 } 420 409 skb_list.prev->next = NULL; 421 410 422 - /* retrieve peer serving the destination IP of this packet */ 423 - peer = ovpn_peer_get_by_dst(ovpn, skb); 424 - if (unlikely(!peer)) { 425 - switch (skb->protocol) { 426 - case htons(ETH_P_IP): 427 - net_dbg_ratelimited("%s: no peer to send data to dst=%pI4\n", 428 - netdev_name(ovpn->dev), 429 - &ip_hdr(skb)->daddr); 430 - break; 431 - case htons(ETH_P_IPV6): 432 - net_dbg_ratelimited("%s: no peer to send data to dst=%pI6c\n", 433 - netdev_name(ovpn->dev), 434 - &ipv6_hdr(skb)->daddr); 435 - break; 436 - } 437 - goto drop; 438 - } 439 - /* dst was needed for peer selection - it can now be dropped */ 440 - skb_dst_drop(skb); 441 - 442 - ovpn_peer_stats_increment_tx(&peer->vpn_stats, skb->len); 411 + ovpn_peer_stats_increment_tx(&peer->vpn_stats, tx_bytes); 443 412 ovpn_send(ovpn, skb_list.next, peer); 444 413 445 414 return NETDEV_TX_OK; 446 415 447 416 drop: 417 + ovpn_peer_put(peer); 418 + drop_no_peer: 448 419 dev_dstats_tx_dropped(ovpn->dev); 449 420 skb_tx_error(skb); 450 421 kfree_skb_list(skb);

+22 -19

drivers/net/ovpn/socket.c

··· 200 200 ovpn_sock->sk = sk; 201 201 kref_init(&ovpn_sock->refcount); 202 202 203 - /* the newly created ovpn_socket is holding reference to sk, 204 - * therefore we increase its refcounter. 205 - * 206 - * This ovpn_socket instance is referenced by all peers 207 - * using the same socket. 208 - * 209 - * ovpn_socket_release() will take care of dropping the reference. 210 - */ 211 - sock_hold(sk); 212 - 213 - ret = ovpn_socket_attach(ovpn_sock, sock, peer); 214 - if (ret < 0) { 215 - sock_put(sk); 216 - kfree(ovpn_sock); 217 - ovpn_sock = ERR_PTR(ret); 218 - goto sock_release; 219 - } 220 - 221 203 /* TCP sockets are per-peer, therefore they are linked to their unique 222 204 * peer 223 205 */ ··· 216 234 GFP_KERNEL); 217 235 } 218 236 219 - rcu_assign_sk_user_data(sk, ovpn_sock); 237 + /* the newly created ovpn_socket is holding reference to sk, 238 + * therefore we increase its refcounter. 239 + * 240 + * This ovpn_socket instance is referenced by all peers 241 + * using the same socket. 242 + * 243 + * ovpn_socket_release() will take care of dropping the reference. 244 + */ 245 + sock_hold(sk); 246 + 247 + ret = ovpn_socket_attach(ovpn_sock, sock, peer); 248 + if (ret < 0) { 249 + if (sk->sk_protocol == IPPROTO_TCP) 250 + ovpn_peer_put(peer); 251 + else if (sk->sk_protocol == IPPROTO_UDP) 252 + netdev_put(peer->ovpn->dev, &ovpn_sock->dev_tracker); 253 + 254 + sock_put(sk); 255 + kfree(ovpn_sock); 256 + ovpn_sock = ERR_PTR(ret); 257 + } 258 + 220 259 sock_release: 221 260 release_sock(sk); 222 261 return ovpn_sock;

+20 -3

drivers/net/ovpn/tcp.c

··· 199 199 sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready; 200 200 sk->sk_write_space = peer->tcp.sk_cb.sk_write_space; 201 201 sk->sk_prot = peer->tcp.sk_cb.prot; 202 - sk->sk_socket->ops = peer->tcp.sk_cb.ops; 202 + 203 + /* tcp_close() may race this function and could set 204 + * sk->sk_socket to NULL. It does so by invoking 205 + * sock_orphan(), which holds sk_callback_lock before 206 + * doing the assignment. 207 + * 208 + * For this reason we acquire the same lock to avoid 209 + * sk_socket to disappear under our feet 210 + */ 211 + write_lock_bh(&sk->sk_callback_lock); 212 + if (sk->sk_socket) 213 + sk->sk_socket->ops = peer->tcp.sk_cb.ops; 214 + write_unlock_bh(&sk->sk_callback_lock); 203 215 204 216 rcu_assign_sk_user_data(sk, NULL); 205 217 } ··· 499 487 /* make sure no pre-existing encapsulation handler exists */ 500 488 if (ovpn_sock->sk->sk_user_data) 501 489 return -EBUSY; 490 + rcu_assign_sk_user_data(ovpn_sock->sk, ovpn_sock); 502 491 503 492 /* only a fully connected socket is expected. Connection should be 504 493 * handled in userspace ··· 508 495 net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n", 509 496 netdev_name(peer->ovpn->dev), 510 497 ovpn_sock->sk->sk_state); 511 - return -EINVAL; 498 + ret = -EINVAL; 499 + goto err; 512 500 } 513 501 514 502 ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb); 515 503 if (ret < 0) { 516 504 DEBUG_NET_WARN_ON_ONCE(1); 517 - return ret; 505 + goto err; 518 506 } 519 507 520 508 INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work); ··· 550 536 strp_check_rcv(&peer->tcp.strp); 551 537 552 538 return 0; 539 + err: 540 + rcu_assign_sk_user_data(ovpn_sock->sk, NULL); 541 + return ret; 553 542 } 554 543 555 544 static void ovpn_tcp_close(struct sock *sk, long timeout)

+1

drivers/net/ovpn/udp.c

··· 386 386 struct ovpn_priv *ovpn) 387 387 { 388 388 struct udp_tunnel_sock_cfg cfg = { 389 + .sk_user_data = ovpn_sock, 389 390 .encap_type = UDP_ENCAP_OVPNINUDP, 390 391 .encap_rcv = ovpn_udp_encap_recv, 391 392 .encap_destroy = ovpn_udp_encap_destroy,

+3

drivers/net/phy/phy_device.c

··· 3499 3499 3500 3500 port->parent_type = PHY_PORT_PHY; 3501 3501 port->phy = phydev; 3502 + 3503 + linkmode_copy(port->supported, phydev->supported); 3504 + 3502 3505 err = phy_add_port(phydev, port); 3503 3506 if (err) { 3504 3507 phy_port_destroy(port);

+19 -14

drivers/net/phy/phy_port.c

··· 53 53 enum ethtool_link_medium medium; 54 54 struct phy_port *port; 55 55 const char *med_str; 56 - u32 pairs = 0, mediums = 0; 56 + u32 pairs = 0; 57 57 int ret; 58 58 59 59 ret = fwnode_property_read_string(fwnode, "media", &med_str); ··· 85 85 return ERR_PTR(-EINVAL); 86 86 } 87 87 88 - mediums |= BIT(medium); 89 - 90 - if (!mediums) 91 - return ERR_PTR(-EINVAL); 92 - 93 88 port = phy_port_alloc(); 94 89 if (!port) 95 90 return ERR_PTR(-ENOMEM); 96 91 97 92 port->pairs = pairs; 98 - port->mediums = mediums; 93 + port->mediums = BIT(medium); 99 94 100 95 return port; 101 96 } ··· 108 113 */ 109 114 void phy_port_update_supported(struct phy_port *port) 110 115 { 111 - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0 }; 116 + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {0}; 112 117 unsigned long mode; 113 118 int i; 114 - 115 - for_each_set_bit(i, &port->mediums, __ETHTOOL_LINK_MEDIUM_LAST) { 116 - linkmode_zero(supported); 117 - phy_caps_medium_get_supported(supported, i, port->pairs); 118 - linkmode_or(port->supported, port->supported, supported); 119 - } 120 119 121 120 /* If there's no pairs specified, we grab the default number of 122 121 * pairs as the max of the default pairs for each linkmode ··· 120 131 __ETHTOOL_LINK_MODE_MASK_NBITS) 121 132 port->pairs = max_t(int, port->pairs, 122 133 ethtool_linkmode_n_pairs(mode)); 134 + 135 + for_each_set_bit(i, &port->mediums, __ETHTOOL_LINK_MEDIUM_LAST) { 136 + __ETHTOOL_DECLARE_LINK_MODE_MASK(med_supported) = {0}; 137 + 138 + phy_caps_medium_get_supported(med_supported, i, port->pairs); 139 + linkmode_or(supported, supported, med_supported); 140 + } 141 + 142 + /* If port->supported is already populated, filter it out with the 143 + * medium/pair support. Otherwise, let's just use this medium-based 144 + * support as the port's supported list. 145 + */ 146 + if (linkmode_empty(port->supported)) 147 + linkmode_copy(port->supported, supported); 148 + else 149 + linkmode_and(port->supported, supported, port->supported); 123 150 124 151 /* Serdes ports supported through SFP may not have any medium set, 125 152 * as they will output PHY_INTERFACE_MODE_XXX modes. In that case, derive

+31 -6

drivers/net/usb/catc.c

··· 59 59 #define RX_PKT_SZ 1600 /* Max size of receive packet for F5U011 */ 60 60 61 61 /* 62 + * USB endpoints. 63 + */ 64 + 65 + enum catc_usb_ep { 66 + CATC_USB_EP_CONTROL = 0, 67 + CATC_USB_EP_BULK = 1, 68 + CATC_USB_EP_INT_IN = 2, 69 + }; 70 + 71 + /* 62 72 * Control requests. 63 73 */ 64 74 ··· 775 765 u8 broadcast[ETH_ALEN]; 776 766 u8 *macbuf; 777 767 int pktsz, ret = -ENOMEM; 768 + static const u8 bulk_ep_addr[] = { 769 + CATC_USB_EP_BULK | USB_DIR_OUT, 770 + CATC_USB_EP_BULK | USB_DIR_IN, 771 + 0}; 772 + static const u8 int_ep_addr[] = { 773 + CATC_USB_EP_INT_IN | USB_DIR_IN, 774 + 0}; 778 775 779 776 macbuf = kmalloc(ETH_ALEN, GFP_KERNEL); 780 777 if (!macbuf) ··· 791 774 intf->altsetting->desc.bInterfaceNumber, 1)) { 792 775 dev_err(dev, "Can't set altsetting 1.\n"); 793 776 ret = -EIO; 777 + goto fail_mem; 778 + } 779 + 780 + /* Verify that all required endpoints are present */ 781 + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || 782 + !usb_check_int_endpoints(intf, int_ep_addr)) { 783 + dev_err(dev, "Missing or invalid endpoints\n"); 784 + ret = -ENODEV; 794 785 goto fail_mem; 795 786 } 796 787 ··· 846 821 usb_fill_control_urb(catc->ctrl_urb, usbdev, usb_sndctrlpipe(usbdev, 0), 847 822 NULL, NULL, 0, catc_ctrl_done, catc); 848 823 849 - usb_fill_bulk_urb(catc->tx_urb, usbdev, usb_sndbulkpipe(usbdev, 1), 850 - NULL, 0, catc_tx_done, catc); 824 + usb_fill_bulk_urb(catc->tx_urb, usbdev, usb_sndbulkpipe(usbdev, CATC_USB_EP_BULK), 825 + NULL, 0, catc_tx_done, catc); 851 826 852 - usb_fill_bulk_urb(catc->rx_urb, usbdev, usb_rcvbulkpipe(usbdev, 1), 853 - catc->rx_buf, pktsz, catc_rx_done, catc); 827 + usb_fill_bulk_urb(catc->rx_urb, usbdev, usb_rcvbulkpipe(usbdev, CATC_USB_EP_BULK), 828 + catc->rx_buf, pktsz, catc_rx_done, catc); 854 829 855 - usb_fill_int_urb(catc->irq_urb, usbdev, usb_rcvintpipe(usbdev, 2), 856 - catc->irq_buf, 2, catc_irq_done, catc, 1); 830 + usb_fill_int_urb(catc->irq_urb, usbdev, usb_rcvintpipe(usbdev, CATC_USB_EP_INT_IN), 831 + catc->irq_buf, 2, catc_irq_done, catc, 1); 857 832 858 833 if (!catc->is_f5u011) { 859 834 u32 *buf;

+7 -1

drivers/net/wireless/intel/ipw2x00/ipw2200.c

··· 11387 11387 {PCI_VENDOR_ID_INTEL, 0x1043, 0x8086, 0x2754, 0, 0, 0}, 11388 11388 {PCI_VENDOR_ID_INTEL, 0x1043, 0x8086, 0x2761, 0, 0, 0}, 11389 11389 {PCI_VENDOR_ID_INTEL, 0x1043, 0x8086, 0x2762, 0, 0, 0}, 11390 - {PCI_VDEVICE(INTEL, 0x104f), 0}, 11390 + /* 11391 + * This ID conflicts with i40e, but the devices can be differentiated 11392 + * because i40e devices use PCI_CLASS_NETWORK_ETHERNET and ipw2200 11393 + * devices use PCI_CLASS_NETWORK_OTHER. 11394 + */ 11395 + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x104f), 11396 + PCI_CLASS_NETWORK_OTHER << 8, 0xffff00, 0}, 11391 11397 {PCI_VDEVICE(INTEL, 0x4220), 0}, /* BG */ 11392 11398 {PCI_VDEVICE(INTEL, 0x4221), 0}, /* BG */ 11393 11399 {PCI_VDEVICE(INTEL, 0x4223), 0}, /* ABG */

+3 -2

drivers/net/xen-netback/xenbus.c

··· 735 735 */ 736 736 requested_num_queues = xenbus_read_unsigned(dev->otherend, 737 737 "multi-queue-num-queues", 1); 738 - if (requested_num_queues > xenvif_max_queues) { 738 + if (requested_num_queues > xenvif_max_queues || 739 + requested_num_queues == 0) { 739 740 /* buggy or malicious guest */ 740 741 xenbus_dev_fatal(dev, -EINVAL, 741 - "guest requested %u queues, exceeding the maximum of %u.", 742 + "guest requested %u queues, but valid range is 1 - %u.", 742 743 requested_num_queues, xenvif_max_queues); 743 744 return; 744 745 }

+2 -2

include/linux/mlx5/driver.h

··· 1282 1282 static inline int mlx5_core_is_mp_slave(struct mlx5_core_dev *dev) 1283 1283 { 1284 1284 return MLX5_CAP_GEN(dev, affiliate_nic_vport_criteria) && 1285 - MLX5_CAP_GEN(dev, num_vhca_ports) <= 1; 1285 + MLX5_CAP_GEN_MAX(dev, num_vhca_ports) <= 1; 1286 1286 } 1287 1287 1288 1288 static inline int mlx5_core_is_mp_master(struct mlx5_core_dev *dev) 1289 1289 { 1290 - return MLX5_CAP_GEN(dev, num_vhca_ports) > 1; 1290 + return MLX5_CAP_GEN_MAX(dev, num_vhca_ports) > 1; 1291 1291 } 1292 1292 1293 1293 static inline int mlx5_core_mp_enabled(struct mlx5_core_dev *dev)

+1 -1

include/linux/netfilter/nf_conntrack_amanda.h

··· 7 7 #include <linux/skbuff.h> 8 8 #include <net/netfilter/nf_conntrack_expect.h> 9 9 10 - extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, 10 + extern unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, 11 11 enum ip_conntrack_info ctinfo, 12 12 unsigned int protoff, 13 13 unsigned int matchoff,

+1 -1

include/linux/netfilter/nf_conntrack_ftp.h

··· 26 26 27 27 /* For NAT to hook in when we find a packet which describes what other 28 28 * connection we should expect. */ 29 - extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, 29 + extern unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, 30 30 enum ip_conntrack_info ctinfo, 31 31 enum nf_ct_ftp_type type, 32 32 unsigned int protoff,

+1 -1

include/linux/netfilter/nf_conntrack_irc.h

··· 8 8 9 9 #define IRC_PORT 6667 10 10 11 - extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, 11 + extern unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, 12 12 enum ip_conntrack_info ctinfo, 13 13 unsigned int protoff, 14 14 unsigned int matchoff,

+1 -1

include/linux/netfilter/nf_conntrack_snmp.h

··· 5 5 #include <linux/netfilter.h> 6 6 #include <linux/skbuff.h> 7 7 8 - extern int (*nf_nat_snmp_hook)(struct sk_buff *skb, 8 + extern int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, 9 9 unsigned int protoff, 10 10 struct nf_conn *ct, 11 11 enum ip_conntrack_info ctinfo);

+1 -1

include/linux/netfilter/nf_conntrack_tftp.h

··· 19 19 #define TFTP_OPCODE_ACK 4 20 20 #define TFTP_OPCODE_ERROR 5 21 21 22 - extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, 22 + extern unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, 23 23 enum ip_conntrack_info ctinfo, 24 24 struct nf_conntrack_expect *exp); 25 25

+2 -1

include/net/addrconf.h

··· 8 8 9 9 #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ 10 10 11 - #define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ 11 + /* TEMP_VALID_LIFETIME default value as specified in RFC 8981 3.8 */ 12 + #define TEMP_VALID_LIFETIME (2*86400) /* 2 days */ 12 13 #define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ 13 14 #define REGEN_MIN_ADVANCE (2) /* 2 seconds */ 14 15 #define REGEN_MAX_RETRY (3)

+2

include/net/ioam6.h

··· 60 60 struct ioam6_trace_hdr *trace, 61 61 bool is_input); 62 62 63 + u8 ioam6_trace_compute_nodelen(u32 trace_type); 64 + 63 65 int ioam6_init(void); 64 66 void ioam6_exit(void); 65 67

+7 -4

include/net/ipv6.h

··· 1213 1213 1214 1214 static inline int ip6_sock_set_v6only(struct sock *sk) 1215 1215 { 1216 - if (inet_sk(sk)->inet_num) 1217 - return -EINVAL; 1216 + int ret = 0; 1217 + 1218 1218 lock_sock(sk); 1219 - sk->sk_ipv6only = true; 1219 + if (inet_sk(sk)->inet_num) 1220 + ret = -EINVAL; 1221 + else 1222 + sk->sk_ipv6only = true; 1220 1223 release_sock(sk); 1221 - return 0; 1224 + return ret; 1222 1225 } 1223 1226 1224 1227 static inline void ip6_sock_set_recverr(struct sock *sk)

+7 -2

include/net/netns/ipv4.h

··· 88 88 int sysctl_tcp_rcvbuf_low_rtt; 89 89 __cacheline_group_end(netns_ipv4_read_rx); 90 90 91 + /* ICMP rate limiter hot cache line. */ 92 + __cacheline_group_begin_aligned(icmp); 93 + atomic_t icmp_global_credit; 94 + u32 icmp_global_stamp; 95 + __cacheline_group_end_aligned(icmp); 96 + 91 97 struct inet_timewait_death_row tcp_death_row; 92 98 struct udp_table *udp_table; 93 99 ··· 147 141 int sysctl_icmp_ratemask; 148 142 int sysctl_icmp_msgs_per_sec; 149 143 int sysctl_icmp_msgs_burst; 150 - atomic_t icmp_global_credit; 151 - u32 icmp_global_stamp; 144 + 152 145 u32 ip_rt_min_pmtu; 153 146 int ip_rt_mtu_expires; 154 147 int ip_rt_min_advmss;

+4

include/uapi/linux/netfilter_bridge.h

··· 5 5 /* bridge-specific defines for netfilter. 6 6 */ 7 7 8 + #ifndef __KERNEL__ 9 + #include <netinet/if_ether.h> /* for __UAPI_DEF_ETHHDR if defined */ 10 + #endif 11 + 8 12 #include <linux/in.h> 9 13 #include <linux/netfilter.h> 10 14 #include <linux/if_ether.h>

+18 -27

net/bridge/br_multicast.c

··· 244 244 245 245 lockdep_assert_held_once(&port->br->multicast_lock); 246 246 247 - if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) 248 - return NULL; 249 - 250 247 /* Take RCU to access the vlan. */ 251 248 rcu_read_lock(); 252 249 253 250 vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid); 254 - if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) 251 + if (vlan) 255 252 pmctx = &vlan->port_mcast_ctx; 256 253 257 254 rcu_read_unlock(); ··· 698 701 u32 max = READ_ONCE(pmctx->mdb_max_entries); 699 702 u32 n = READ_ONCE(pmctx->mdb_n_entries); 700 703 701 - if (max && n >= max) { 704 + /* enforce the max limit when it's a port pmctx or a port-vlan pmctx 705 + * with snooping enabled 706 + */ 707 + if (!br_multicast_port_ctx_vlan_disabled(pmctx) && max && n >= max) { 702 708 NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u", 703 709 what, n, max); 704 710 return -E2BIG; ··· 736 736 return err; 737 737 } 738 738 739 - /* Only count on the VLAN context if VID is given, and if snooping on 740 - * that VLAN is enabled. 741 - */ 739 + /* Only count on the VLAN context if VID is given */ 742 740 if (!group->vid) 743 741 return 0; 744 742 ··· 2009 2011 timer_setup(&pmctx->ip6_own_query.timer, 2010 2012 br_ip6_multicast_port_query_expired, 0); 2011 2013 #endif 2014 + /* initialize mdb_n_entries if a new port vlan is being created */ 2015 + if (vlan) { 2016 + struct net_bridge_port_group *pg; 2017 + u32 n = 0; 2018 + 2019 + spin_lock_bh(&port->br->multicast_lock); 2020 + hlist_for_each_entry(pg, &port->mglist, mglist) 2021 + if (pg->key.addr.vid == vlan->vid) 2022 + n++; 2023 + WRITE_ONCE(pmctx->mdb_n_entries, n); 2024 + spin_unlock_bh(&port->br->multicast_lock); 2025 + } 2012 2026 } 2013 2027 2014 2028 void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) ··· 2103 2093 if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) { 2104 2094 br_ip4_multicast_add_router(brmctx, pmctx); 2105 2095 br_ip6_multicast_add_router(brmctx, pmctx); 2106 - } 2107 - 2108 - if (br_multicast_port_ctx_is_vlan(pmctx)) { 2109 - struct net_bridge_port_group *pg; 2110 - u32 n = 0; 2111 - 2112 - /* The mcast_n_groups counter might be wrong. First, 2113 - * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries 2114 - * are flushed, thus mcast_n_groups after the toggle does not 2115 - * reflect the true values. And second, permanent entries added 2116 - * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected 2117 - * either. Thus we have to refresh the counter. 2118 - */ 2119 - 2120 - hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) { 2121 - if (pg->key.addr.vid == pmctx->vlan->vid) 2122 - n++; 2123 - } 2124 - WRITE_ONCE(pmctx->mdb_n_entries, n); 2125 2096 } 2126 2097 } 2127 2098

+13 -6

net/core/dev.c

··· 231 231 static inline void backlog_lock_irq_save(struct softnet_data *sd, 232 232 unsigned long *flags) 233 233 { 234 - if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) 234 + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 235 235 spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags); 236 - else 236 + } else { 237 237 local_irq_save(*flags); 238 + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) 239 + spin_lock(&sd->input_pkt_queue.lock); 240 + } 238 241 } 239 242 240 243 static inline void backlog_lock_irq_disable(struct softnet_data *sd) ··· 251 248 static inline void backlog_unlock_irq_restore(struct softnet_data *sd, 252 249 unsigned long flags) 253 250 { 254 - if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) 255 - spin_unlock(&sd->input_pkt_queue.lock); 256 - local_irq_restore(flags); 251 + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 252 + spin_unlock_irqrestore(&sd->input_pkt_queue.lock, flags); 253 + } else { 254 + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) 255 + spin_unlock(&sd->input_pkt_queue.lock); 256 + local_irq_restore(flags); 257 + } 257 258 } 258 259 259 260 static inline void backlog_unlock_irq_enable(struct softnet_data *sd) ··· 744 737 { 745 738 int k = stack->num_paths++; 746 739 747 - if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX)) 740 + if (k >= NET_DEVICE_PATH_STACK_MAX) 748 741 return NULL; 749 742 750 743 return &stack->path[k];

+6 -1

net/core/skbuff.c

··· 7266 7266 { 7267 7267 struct skb_defer_node *sdn; 7268 7268 unsigned long defer_count; 7269 - int cpu = skb->alloc_cpu; 7270 7269 unsigned int defer_max; 7271 7270 bool kick; 7271 + int cpu; 7272 7272 7273 + /* zero copy notifications should not be delayed. */ 7274 + if (skb_zcopy(skb)) 7275 + goto nodefer; 7276 + 7277 + cpu = skb->alloc_cpu; 7273 7278 if (cpu == raw_smp_processor_id() || 7274 7279 WARN_ON_ONCE(cpu >= nr_cpu_ids) || 7275 7280 !cpu_online(cpu)) {

+12 -5

net/ipv4/icmp.c

··· 250 250 if (delta < HZ / 50) 251 251 return false; 252 252 253 - incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ; 253 + incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec); 254 + incr = div_u64((u64)incr * delta, HZ); 254 255 if (!incr) 255 256 return false; 256 257 ··· 316 315 struct dst_entry *dst = &rt->dst; 317 316 struct inet_peer *peer; 318 317 struct net_device *dev; 318 + int peer_timeout; 319 319 bool rc = true; 320 320 321 321 if (!apply_ratelimit) 322 322 return true; 323 323 324 + peer_timeout = READ_ONCE(net->ipv4.sysctl_icmp_ratelimit); 325 + if (!peer_timeout) 326 + goto out; 327 + 324 328 /* No rate limit on loopback */ 325 329 rcu_read_lock(); 326 330 dev = dst_dev_rcu(dst); 327 331 if (dev && (dev->flags & IFF_LOOPBACK)) 328 - goto out; 332 + goto out_unlock; 329 333 330 334 peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 331 335 l3mdev_master_ifindex_rcu(dev)); 332 - rc = inet_peer_xrlim_allow(peer, 333 - READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); 334 - out: 336 + rc = inet_peer_xrlim_allow(peer, peer_timeout); 337 + 338 + out_unlock: 335 339 rcu_read_unlock(); 340 + out: 336 341 if (!rc) 337 342 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); 338 343 else

+20 -13

net/ipv4/ping.c

··· 148 148 pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); 149 149 spin_lock(&ping_table.lock); 150 150 if (sk_del_node_init_rcu(sk)) { 151 - isk->inet_num = 0; 151 + WRITE_ONCE(isk->inet_num, 0); 152 152 isk->inet_sport = 0; 153 153 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 154 154 } ··· 181 181 } 182 182 183 183 sk_for_each_rcu(sk, hslot) { 184 + int bound_dev_if; 185 + 184 186 if (!net_eq(sock_net(sk), net)) 185 187 continue; 186 188 isk = inet_sk(sk); 187 189 188 190 pr_debug("iterate\n"); 189 - if (isk->inet_num != ident) 191 + if (READ_ONCE(isk->inet_num) != ident) 190 192 continue; 191 193 194 + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); 192 195 if (skb->protocol == htons(ETH_P_IP) && 193 196 sk->sk_family == AF_INET) { 194 - pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, 195 - (int) isk->inet_num, &isk->inet_rcv_saddr, 196 - sk->sk_bound_dev_if); 197 + __be32 rcv_saddr = READ_ONCE(isk->inet_rcv_saddr); 197 198 198 - if (isk->inet_rcv_saddr && 199 - isk->inet_rcv_saddr != ip_hdr(skb)->daddr) 199 + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, 200 + ident, &rcv_saddr, 201 + bound_dev_if); 202 + 203 + if (rcv_saddr && rcv_saddr != ip_hdr(skb)->daddr) 200 204 continue; 201 205 #if IS_ENABLED(CONFIG_IPV6) 202 206 } else if (skb->protocol == htons(ETH_P_IPV6) && 203 207 sk->sk_family == AF_INET6) { 204 208 205 209 pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, 206 - (int) isk->inet_num, 210 + ident, 207 211 &sk->sk_v6_rcv_saddr, 208 - sk->sk_bound_dev_if); 212 + bound_dev_if); 209 213 210 214 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && 211 215 !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, ··· 220 216 continue; 221 217 } 222 218 223 - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && 224 - sk->sk_bound_dev_if != sdif) 219 + if (bound_dev_if && bound_dev_if != dif && 220 + bound_dev_if != sdif) 225 221 continue; 226 222 227 223 goto exit; ··· 396 392 if (saddr->sa_family == AF_INET) { 397 393 struct inet_sock *isk = inet_sk(sk); 398 394 struct sockaddr_in *addr = (struct sockaddr_in *) saddr; 399 - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; 395 + 396 + isk->inet_saddr = addr->sin_addr.s_addr; 397 + WRITE_ONCE(isk->inet_rcv_saddr, addr->sin_addr.s_addr); 400 398 #if IS_ENABLED(CONFIG_IPV6) 401 399 } else if (saddr->sa_family == AF_INET6) { 402 400 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; ··· 856 850 struct sk_buff *skb; 857 851 int copied, err; 858 852 859 - pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); 853 + pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, 854 + READ_ONCE(isk->inet_num)); 860 855 861 856 err = -EOPNOTSUPP; 862 857 if (flags & MSG_OOB)

+1 -1

net/ipv6/af_inet6.c

··· 952 952 int err = 0; 953 953 954 954 net->ipv6.sysctl.bindv6only = 0; 955 - net->ipv6.sysctl.icmpv6_time = 1*HZ; 955 + net->ipv6.sysctl.icmpv6_time = HZ / 10; 956 956 net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; 957 957 net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; 958 958 net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;

+5

net/ipv6/exthdrs.c

··· 931 931 if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) 932 932 goto drop; 933 933 934 + /* Inconsistent Pre-allocated Trace header */ 935 + if (trace->nodelen != 936 + ioam6_trace_compute_nodelen(be32_to_cpu(trace->type_be32))) 937 + goto drop; 938 + 934 939 /* Ignore if the IOAM namespace is unknown */ 935 940 ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id); 936 941 if (!ns)

+7 -8

net/ipv6/icmp.c

··· 217 217 } else if (dev && (dev->flags & IFF_LOOPBACK)) { 218 218 res = true; 219 219 } else { 220 - struct rt6_info *rt = dst_rt6_info(dst); 221 - int tmo = net->ipv6.sysctl.icmpv6_time; 220 + int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time); 222 221 struct inet_peer *peer; 223 222 224 - /* Give more bandwidth to wider prefixes. */ 225 - if (rt->rt6i_dst.plen < 128) 226 - tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 227 - 228 - peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); 229 - res = inet_peer_xrlim_allow(peer, tmo); 223 + if (!tmo) { 224 + res = true; 225 + } else { 226 + peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); 227 + res = inet_peer_xrlim_allow(peer, tmo); 228 + } 230 229 } 231 230 rcu_read_unlock(); 232 231 if (!res)

+14

net/ipv6/ioam6.c

··· 690 690 return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); 691 691 } 692 692 693 + #define IOAM6_MASK_SHORT_FIELDS 0xff1ffc00 694 + #define IOAM6_MASK_WIDE_FIELDS 0x00e00000 695 + 696 + u8 ioam6_trace_compute_nodelen(u32 trace_type) 697 + { 698 + u8 nodelen = hweight32(trace_type & IOAM6_MASK_SHORT_FIELDS) 699 + * (sizeof(__be32) / 4); 700 + 701 + nodelen += hweight32(trace_type & IOAM6_MASK_WIDE_FIELDS) 702 + * (sizeof(__be64) / 4); 703 + 704 + return nodelen; 705 + } 706 + 693 707 static void __ioam6_fill_trace_data(struct sk_buff *skb, 694 708 struct ioam6_namespace *ns, 695 709 struct ioam6_trace_hdr *trace,

+1 -9

net/ipv6/ioam6_iptunnel.c

··· 22 22 #include <net/ip6_route.h> 23 23 #include <net/addrconf.h> 24 24 25 - #define IOAM6_MASK_SHORT_FIELDS 0xff100000 26 - #define IOAM6_MASK_WIDE_FIELDS 0xe00000 27 - 28 25 struct ioam6_lwt_encap { 29 26 struct ipv6_hopopt_hdr eh; 30 27 u8 pad[2]; /* 2-octet padding for 4n-alignment */ ··· 90 93 trace->type.bit21 | trace->type.bit23) 91 94 return false; 92 95 93 - trace->nodelen = 0; 94 96 fields = be32_to_cpu(trace->type_be32); 95 - 96 - trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) 97 - * (sizeof(__be32) / 4); 98 - trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) 99 - * (sizeof(__be64) / 4); 97 + trace->nodelen = ioam6_trace_compute_nodelen(fields); 100 98 101 99 return true; 102 100 }

+1 -1

net/ipv6/ip6_fib.c

··· 1139 1139 fib6_add_gc_list(iter); 1140 1140 } 1141 1141 if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT)) && 1142 - !iter->fib6_nh->fib_nh_gw_family) { 1142 + (iter->nh || !iter->fib6_nh->fib_nh_gw_family)) { 1143 1143 iter->fib6_flags &= ~RTF_ADDRCONF; 1144 1144 iter->fib6_flags &= ~RTF_PREFIX_RT; 1145 1145 }

+1

net/mctp/device.c

··· 70 70 return -EMSGSIZE; 71 71 72 72 hdr = nlmsg_data(nlh); 73 + memset(hdr, 0, sizeof(*hdr)); 73 74 hdr->ifa_family = AF_MCTP; 74 75 hdr->ifa_prefixlen = 0; 75 76 hdr->ifa_flags = 0;

+1

net/mctp/neigh.c

··· 218 218 return -EMSGSIZE; 219 219 220 220 hdr = nlmsg_data(nlh); 221 + memset(hdr, 0, sizeof(*hdr)); 221 222 hdr->ndm_family = AF_MCTP; 222 223 hdr->ndm_ifindex = dev->ifindex; 223 224 hdr->ndm_state = 0; // TODO other state bits?

+1

net/mctp/route.c

··· 1643 1643 return -EMSGSIZE; 1644 1644 1645 1645 hdr = nlmsg_data(nlh); 1646 + memset(hdr, 0, sizeof(*hdr)); 1646 1647 hdr->rtm_family = AF_MCTP; 1647 1648 1648 1649 /* we use the _len fields as a number of EIDs, rather than

+6 -12

net/netfilter/ipvs/ip_vs_proto_sctp.c

··· 10 10 #include <net/ip_vs.h> 11 11 12 12 static int 13 - sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); 13 + sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 14 + unsigned int sctphoff); 14 15 15 16 static int 16 17 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 109 108 int ret; 110 109 111 110 /* Some checks before mangling */ 112 - if (!sctp_csum_check(cp->af, skb, pp)) 111 + if (!sctp_csum_check(cp->af, skb, pp, sctphoff)) 113 112 return 0; 114 113 115 114 /* Call application helper if needed */ ··· 157 156 int ret; 158 157 159 158 /* Some checks before mangling */ 160 - if (!sctp_csum_check(cp->af, skb, pp)) 159 + if (!sctp_csum_check(cp->af, skb, pp, sctphoff)) 161 160 return 0; 162 161 163 162 /* Call application helper if needed */ ··· 186 185 } 187 186 188 187 static int 189 - sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 188 + sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 189 + unsigned int sctphoff) 190 190 { 191 - unsigned int sctphoff; 192 191 struct sctphdr *sh; 193 192 __le32 cmp, val; 194 - 195 - #ifdef CONFIG_IP_VS_IPV6 196 - if (af == AF_INET6) 197 - sctphoff = sizeof(struct ipv6hdr); 198 - else 199 - #endif 200 - sctphoff = ip_hdrlen(skb); 201 193 202 194 sh = (struct sctphdr *)(skb->data + sctphoff); 203 195 cmp = sh->checksum;

+7 -14

net/netfilter/ipvs/ip_vs_proto_tcp.c

··· 28 28 #include <net/ip_vs.h> 29 29 30 30 static int 31 - tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); 31 + tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 32 + unsigned int tcphoff); 32 33 33 34 static int 34 35 tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 166 165 int ret; 167 166 168 167 /* Some checks before mangling */ 169 - if (!tcp_csum_check(cp->af, skb, pp)) 168 + if (!tcp_csum_check(cp->af, skb, pp, tcphoff)) 170 169 return 0; 171 170 172 171 /* Call application helper if needed */ ··· 244 243 int ret; 245 244 246 245 /* Some checks before mangling */ 247 - if (!tcp_csum_check(cp->af, skb, pp)) 246 + if (!tcp_csum_check(cp->af, skb, pp, tcphoff)) 248 247 return 0; 249 248 250 249 /* ··· 301 300 302 301 303 302 static int 304 - tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 303 + tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 304 + unsigned int tcphoff) 305 305 { 306 - unsigned int tcphoff; 307 - 308 - #ifdef CONFIG_IP_VS_IPV6 309 - if (af == AF_INET6) 310 - tcphoff = sizeof(struct ipv6hdr); 311 - else 312 - #endif 313 - tcphoff = ip_hdrlen(skb); 314 - 315 306 switch (skb->ip_summed) { 316 307 case CHECKSUM_NONE: 317 308 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); ··· 314 321 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 315 322 &ipv6_hdr(skb)->daddr, 316 323 skb->len - tcphoff, 317 - ipv6_hdr(skb)->nexthdr, 324 + IPPROTO_TCP, 318 325 skb->csum)) { 319 326 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 320 327 "Failed checksum for");

+7 -13

net/netfilter/ipvs/ip_vs_proto_udp.c

··· 24 24 #include <net/ip6_checksum.h> 25 25 26 26 static int 27 - udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); 27 + udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 28 + unsigned int udphoff); 28 29 29 30 static int 30 31 udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 155 154 int ret; 156 155 157 156 /* Some checks before mangling */ 158 - if (!udp_csum_check(cp->af, skb, pp)) 157 + if (!udp_csum_check(cp->af, skb, pp, udphoff)) 159 158 return 0; 160 159 161 160 /* ··· 238 237 int ret; 239 238 240 239 /* Some checks before mangling */ 241 - if (!udp_csum_check(cp->af, skb, pp)) 240 + if (!udp_csum_check(cp->af, skb, pp, udphoff)) 242 241 return 0; 243 242 244 243 /* ··· 297 296 298 297 299 298 static int 300 - udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 299 + udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 300 + unsigned int udphoff) 301 301 { 302 302 struct udphdr _udph, *uh; 303 - unsigned int udphoff; 304 - 305 - #ifdef CONFIG_IP_VS_IPV6 306 - if (af == AF_INET6) 307 - udphoff = sizeof(struct ipv6hdr); 308 - else 309 - #endif 310 - udphoff = ip_hdrlen(skb); 311 303 312 304 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); 313 305 if (uh == NULL) ··· 318 324 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 319 325 &ipv6_hdr(skb)->daddr, 320 326 skb->len - udphoff, 321 - ipv6_hdr(skb)->nexthdr, 327 + IPPROTO_UDP, 322 328 skb->csum)) { 323 329 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 324 330 "Failed checksum for");

+36 -10

net/netfilter/ipvs/ip_vs_xmit.c

··· 294 294 return true; 295 295 } 296 296 297 + /* rt has device that is down */ 298 + static bool rt_dev_is_down(const struct net_device *dev) 299 + { 300 + return dev && !netif_running(dev); 301 + } 302 + 297 303 /* Get route to destination or remote server */ 298 304 static int 299 305 __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ··· 315 309 316 310 if (dest) { 317 311 dest_dst = __ip_vs_dst_check(dest); 318 - if (likely(dest_dst)) 312 + if (likely(dest_dst)) { 319 313 rt = dst_rtable(dest_dst->dst_cache); 320 - else { 314 + if (ret_saddr) 315 + *ret_saddr = dest_dst->dst_saddr.ip; 316 + } else { 321 317 dest_dst = ip_vs_dest_dst_alloc(); 322 318 spin_lock_bh(&dest->dst_lock); 323 319 if (!dest_dst) { ··· 335 327 ip_vs_dest_dst_free(dest_dst); 336 328 goto err_unreach; 337 329 } 338 - __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 330 + /* It is forbidden to attach dest->dest_dst if 331 + * device is going down. 332 + */ 333 + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) 334 + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 335 + else 336 + noref = 0; 339 337 spin_unlock_bh(&dest->dst_lock); 340 338 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", 341 339 &dest->addr.ip, &dest_dst->dst_saddr.ip, 342 340 rcuref_read(&rt->dst.__rcuref)); 341 + if (ret_saddr) 342 + *ret_saddr = dest_dst->dst_saddr.ip; 343 + if (!noref) 344 + ip_vs_dest_dst_free(dest_dst); 343 345 } 344 - if (ret_saddr) 345 - *ret_saddr = dest_dst->dst_saddr.ip; 346 346 } else { 347 347 noref = 0; 348 348 ··· 487 471 488 472 if (dest) { 489 473 dest_dst = __ip_vs_dst_check(dest); 490 - if (likely(dest_dst)) 474 + if (likely(dest_dst)) { 491 475 rt = dst_rt6_info(dest_dst->dst_cache); 492 - else { 476 + if (ret_saddr) 477 + *ret_saddr = dest_dst->dst_saddr.in6; 478 + } else { 493 479 u32 cookie; 494 480 495 481 dest_dst = ip_vs_dest_dst_alloc(); ··· 512 494 } 513 495 rt = dst_rt6_info(dst); 514 496 cookie = rt6_get_cookie(rt); 515 - __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 497 + /* It is forbidden to attach dest->dest_dst if 498 + * device is going down. 499 + */ 500 + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) 501 + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 502 + else 503 + noref = 0; 516 504 spin_unlock_bh(&dest->dst_lock); 517 505 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 518 506 &dest->addr.in6, &dest_dst->dst_saddr.in6, 519 507 rcuref_read(&rt->dst.__rcuref)); 508 + if (ret_saddr) 509 + *ret_saddr = dest_dst->dst_saddr.in6; 510 + if (!noref) 511 + ip_vs_dest_dst_free(dest_dst); 520 512 } 521 - if (ret_saddr) 522 - *ret_saddr = dest_dst->dst_saddr.in6; 523 513 } else { 524 514 noref = 0; 525 515 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,

+7 -7

net/netfilter/nf_conntrack_amanda.c

··· 37 37 module_param(ts_algo, charp, 0400); 38 38 MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); 39 39 40 - unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, 41 - enum ip_conntrack_info ctinfo, 42 - unsigned int protoff, 43 - unsigned int matchoff, 44 - unsigned int matchlen, 45 - struct nf_conntrack_expect *exp) 46 - __read_mostly; 40 + unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, 41 + enum ip_conntrack_info ctinfo, 42 + unsigned int protoff, 43 + unsigned int matchoff, 44 + unsigned int matchlen, 45 + struct nf_conntrack_expect *exp) 46 + __read_mostly; 47 47 EXPORT_SYMBOL_GPL(nf_nat_amanda_hook); 48 48 49 49 enum amanda_strings {

+7 -7

net/netfilter/nf_conntrack_ftp.c

··· 43 43 static bool loose; 44 44 module_param(loose, bool, 0600); 45 45 46 - unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, 47 - enum ip_conntrack_info ctinfo, 48 - enum nf_ct_ftp_type type, 49 - unsigned int protoff, 50 - unsigned int matchoff, 51 - unsigned int matchlen, 52 - struct nf_conntrack_expect *exp); 46 + unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, 47 + enum ip_conntrack_info ctinfo, 48 + enum nf_ct_ftp_type type, 49 + unsigned int protoff, 50 + unsigned int matchoff, 51 + unsigned int matchlen, 52 + struct nf_conntrack_expect *exp); 53 53 EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); 54 54 55 55 static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,

+5 -5

net/netfilter/nf_conntrack_h323_main.c

··· 1187 1187 { 1188 1188 struct net *net = nf_ct_net(ct); 1189 1189 struct nf_conntrack_expect *exp; 1190 - struct nf_conntrack_tuple tuple; 1190 + struct nf_conntrack_tuple tuple = { 1191 + .src.l3num = nf_ct_l3num(ct), 1192 + .dst.protonum = IPPROTO_TCP, 1193 + .dst.u.tcp.port = port, 1194 + }; 1191 1195 1192 - memset(&tuple.src.u3, 0, sizeof(tuple.src.u3)); 1193 - tuple.src.u.tcp.port = 0; 1194 1196 memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3)); 1195 - tuple.dst.u.tcp.port = port; 1196 - tuple.dst.protonum = IPPROTO_TCP; 1197 1197 1198 1198 exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); 1199 1199 if (exp && exp->master == ct)

+7 -6

net/netfilter/nf_conntrack_irc.c

··· 30 30 static char *irc_buffer; 31 31 static DEFINE_SPINLOCK(irc_buffer_lock); 32 32 33 - unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, 34 - enum ip_conntrack_info ctinfo, 35 - unsigned int protoff, 36 - unsigned int matchoff, 37 - unsigned int matchlen, 38 - struct nf_conntrack_expect *exp) __read_mostly; 33 + unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, 34 + enum ip_conntrack_info ctinfo, 35 + unsigned int protoff, 36 + unsigned int matchoff, 37 + unsigned int matchlen, 38 + struct nf_conntrack_expect *exp) 39 + __read_mostly; 39 40 EXPORT_SYMBOL_GPL(nf_nat_irc_hook); 40 41 41 42 #define HELPER_NAME "irc"

+4 -4

net/netfilter/nf_conntrack_snmp.c

··· 25 25 module_param(timeout, uint, 0400); 26 26 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 27 27 28 - int (*nf_nat_snmp_hook)(struct sk_buff *skb, 29 - unsigned int protoff, 30 - struct nf_conn *ct, 31 - enum ip_conntrack_info ctinfo); 28 + int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, 29 + unsigned int protoff, 30 + struct nf_conn *ct, 31 + enum ip_conntrack_info ctinfo); 32 32 EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); 33 33 34 34 static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,

+4 -3

net/netfilter/nf_conntrack_tftp.c

··· 32 32 module_param_array(ports, ushort, &ports_c, 0400); 33 33 MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); 34 34 35 - unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, 36 - enum ip_conntrack_info ctinfo, 37 - struct nf_conntrack_expect *exp) __read_mostly; 35 + unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, 36 + enum ip_conntrack_info ctinfo, 37 + struct nf_conntrack_expect *exp) 38 + __read_mostly; 38 39 EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); 39 40 40 41 static int tftp_help(struct sk_buff *skb,

+43 -206

net/netfilter/nf_tables_api.c

··· 2823 2823 2824 2824 err_register_hook: 2825 2825 nft_chain_del(chain); 2826 + synchronize_rcu(); 2826 2827 err_chain_add: 2827 2828 nft_trans_destroy(trans); 2828 2829 err_trans: ··· 3902 3901 return skb->len; 3903 3902 } 3904 3903 3905 - static int nf_tables_dumpreset_rules(struct sk_buff *skb, 3906 - struct netlink_callback *cb) 3907 - { 3908 - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); 3909 - int ret; 3910 - 3911 - /* Mutex is held is to prevent that two concurrent dump-and-reset calls 3912 - * do not underrun counters and quotas. The commit_mutex is used for 3913 - * the lack a better lock, this is not transaction path. 3914 - */ 3915 - mutex_lock(&nft_net->commit_mutex); 3916 - ret = nf_tables_dump_rules(skb, cb); 3917 - mutex_unlock(&nft_net->commit_mutex); 3918 - 3919 - return ret; 3920 - } 3921 - 3922 3904 static int nf_tables_dump_rules_start(struct netlink_callback *cb) 3923 3905 { 3924 3906 struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; ··· 3921 3937 return -ENOMEM; 3922 3938 } 3923 3939 } 3940 + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) 3941 + ctx->reset = true; 3942 + 3924 3943 return 0; 3925 - } 3926 - 3927 - static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb) 3928 - { 3929 - struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; 3930 - 3931 - ctx->reset = true; 3932 - 3933 - return nf_tables_dump_rules_start(cb); 3934 3944 } 3935 3945 3936 3946 static int nf_tables_dump_rules_done(struct netlink_callback *cb) ··· 3990 4012 u32 portid = NETLINK_CB(skb).portid; 3991 4013 struct net *net = info->net; 3992 4014 struct sk_buff *skb2; 4015 + bool reset = false; 4016 + char *buf; 3993 4017 3994 4018 if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 3995 4019 struct netlink_dump_control c = { ··· 4005 4025 return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 4006 4026 } 4007 4027 4008 - skb2 = nf_tables_getrule_single(portid, info, nla, false); 4028 + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) 4029 + reset = true; 4030 + 4031 + skb2 = nf_tables_getrule_single(portid, info, nla, reset); 4009 4032 if (IS_ERR(skb2)) 4010 4033 return PTR_ERR(skb2); 4011 4034 4012 - return nfnetlink_unicast(skb2, net, portid); 4013 - } 4014 - 4015 - static int nf_tables_getrule_reset(struct sk_buff *skb, 4016 - const struct nfnl_info *info, 4017 - const struct nlattr * const nla[]) 4018 - { 4019 - struct nftables_pernet *nft_net = nft_pernet(info->net); 4020 - u32 portid = NETLINK_CB(skb).portid; 4021 - struct net *net = info->net; 4022 - struct sk_buff *skb2; 4023 - char *buf; 4024 - 4025 - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 4026 - struct netlink_dump_control c = { 4027 - .start= nf_tables_dumpreset_rules_start, 4028 - .dump = nf_tables_dumpreset_rules, 4029 - .done = nf_tables_dump_rules_done, 4030 - .module = THIS_MODULE, 4031 - .data = (void *)nla, 4032 - }; 4033 - 4034 - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 4035 - } 4036 - 4037 - if (!try_module_get(THIS_MODULE)) 4038 - return -EINVAL; 4039 - rcu_read_unlock(); 4040 - mutex_lock(&nft_net->commit_mutex); 4041 - skb2 = nf_tables_getrule_single(portid, info, nla, true); 4042 - mutex_unlock(&nft_net->commit_mutex); 4043 - rcu_read_lock(); 4044 - module_put(THIS_MODULE); 4045 - 4046 - if (IS_ERR(skb2)) 4047 - return PTR_ERR(skb2); 4035 + if (!reset) 4036 + return nfnetlink_unicast(skb2, net, portid); 4048 4037 4049 4038 buf = kasprintf(GFP_ATOMIC, "%.*s:%u", 4050 4039 nla_len(nla[NFTA_RULE_TABLE]), ··· 6273 6324 nla_nest_end(skb, nest); 6274 6325 nlmsg_end(skb, nlh); 6275 6326 6327 + if (dump_ctx->reset && args.iter.count > args.iter.skip) 6328 + audit_log_nft_set_reset(table, cb->seq, 6329 + args.iter.count - args.iter.skip); 6330 + 6276 6331 rcu_read_unlock(); 6277 6332 6278 6333 if (args.iter.err && args.iter.err != -EMSGSIZE) ··· 6290 6337 nla_put_failure: 6291 6338 rcu_read_unlock(); 6292 6339 return -ENOSPC; 6293 - } 6294 - 6295 - static int nf_tables_dumpreset_set(struct sk_buff *skb, 6296 - struct netlink_callback *cb) 6297 - { 6298 - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); 6299 - struct nft_set_dump_ctx *dump_ctx = cb->data; 6300 - int ret, skip = cb->args[0]; 6301 - 6302 - mutex_lock(&nft_net->commit_mutex); 6303 - 6304 - ret = nf_tables_dump_set(skb, cb); 6305 - 6306 - if (cb->args[0] > skip) 6307 - audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, 6308 - cb->args[0] - skip); 6309 - 6310 - mutex_unlock(&nft_net->commit_mutex); 6311 - 6312 - return ret; 6313 6340 } 6314 6341 6315 6342 static int nf_tables_dump_set_start(struct netlink_callback *cb) ··· 6535 6602 { 6536 6603 struct netlink_ext_ack *extack = info->extack; 6537 6604 struct nft_set_dump_ctx dump_ctx; 6605 + int rem, err = 0, nelems = 0; 6606 + struct net *net = info->net; 6538 6607 struct nlattr *attr; 6539 - int rem, err = 0; 6608 + bool reset = false; 6609 + 6610 + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) 6611 + reset = true; 6540 6612 6541 6613 if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 6542 6614 struct netlink_dump_control c = { ··· 6551 6613 .module = THIS_MODULE, 6552 6614 }; 6553 6615 6554 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); 6616 + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, reset); 6555 6617 if (err) 6556 6618 return err; 6557 6619 ··· 6562 6624 if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) 6563 6625 return -EINVAL; 6564 6626 6565 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); 6627 + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, reset); 6566 6628 if (err) 6567 6629 return err; 6568 6630 6569 6631 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 6570 - err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false); 6571 - if (err < 0) { 6572 - NL_SET_BAD_ATTR(extack, attr); 6573 - break; 6574 - } 6575 - } 6576 - 6577 - return err; 6578 - } 6579 - 6580 - static int nf_tables_getsetelem_reset(struct sk_buff *skb, 6581 - const struct nfnl_info *info, 6582 - const struct nlattr * const nla[]) 6583 - { 6584 - struct nftables_pernet *nft_net = nft_pernet(info->net); 6585 - struct netlink_ext_ack *extack = info->extack; 6586 - struct nft_set_dump_ctx dump_ctx; 6587 - int rem, err = 0, nelems = 0; 6588 - struct nlattr *attr; 6589 - 6590 - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 6591 - struct netlink_dump_control c = { 6592 - .start = nf_tables_dump_set_start, 6593 - .dump = nf_tables_dumpreset_set, 6594 - .done = nf_tables_dump_set_done, 6595 - .module = THIS_MODULE, 6596 - }; 6597 - 6598 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); 6599 - if (err) 6600 - return err; 6601 - 6602 - c.data = &dump_ctx; 6603 - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 6604 - } 6605 - 6606 - if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) 6607 - return -EINVAL; 6608 - 6609 - if (!try_module_get(THIS_MODULE)) 6610 - return -EINVAL; 6611 - rcu_read_unlock(); 6612 - mutex_lock(&nft_net->commit_mutex); 6613 - rcu_read_lock(); 6614 - 6615 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); 6616 - if (err) 6617 - goto out_unlock; 6618 - 6619 - nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 6620 - err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true); 6632 + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, reset); 6621 6633 if (err < 0) { 6622 6634 NL_SET_BAD_ATTR(extack, attr); 6623 6635 break; 6624 6636 } 6625 6637 nelems++; 6626 6638 } 6627 - audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems); 6628 - 6629 - out_unlock: 6630 - rcu_read_unlock(); 6631 - mutex_unlock(&nft_net->commit_mutex); 6632 - rcu_read_lock(); 6633 - module_put(THIS_MODULE); 6639 + if (reset) 6640 + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(net), 6641 + nelems); 6634 6642 6635 6643 return err; 6636 6644 } ··· 8448 8564 return skb->len; 8449 8565 } 8450 8566 8451 - static int nf_tables_dumpreset_obj(struct sk_buff *skb, 8452 - struct netlink_callback *cb) 8453 - { 8454 - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); 8455 - int ret; 8456 - 8457 - mutex_lock(&nft_net->commit_mutex); 8458 - ret = nf_tables_dump_obj(skb, cb); 8459 - mutex_unlock(&nft_net->commit_mutex); 8460 - 8461 - return ret; 8462 - } 8463 - 8464 8567 static int nf_tables_dump_obj_start(struct netlink_callback *cb) 8465 8568 { 8466 8569 struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; ··· 8464 8593 if (nla[NFTA_OBJ_TYPE]) 8465 8594 ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); 8466 8595 8596 + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) 8597 + ctx->reset = true; 8598 + 8467 8599 return 0; 8468 - } 8469 - 8470 - static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) 8471 - { 8472 - struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; 8473 - 8474 - ctx->reset = true; 8475 - 8476 - return nf_tables_dump_obj_start(cb); 8477 8600 } 8478 8601 8479 8602 static int nf_tables_dump_obj_done(struct netlink_callback *cb) ··· 8530 8665 const struct nlattr * const nla[]) 8531 8666 { 8532 8667 u32 portid = NETLINK_CB(skb).portid; 8668 + struct net *net = info->net; 8533 8669 struct sk_buff *skb2; 8670 + bool reset = false; 8671 + char *buf; 8534 8672 8535 8673 if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 8536 8674 struct netlink_dump_control c = { ··· 8547 8679 return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 8548 8680 } 8549 8681 8550 - skb2 = nf_tables_getobj_single(portid, info, nla, false); 8682 + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) 8683 + reset = true; 8684 + 8685 + skb2 = nf_tables_getobj_single(portid, info, nla, reset); 8551 8686 if (IS_ERR(skb2)) 8552 8687 return PTR_ERR(skb2); 8553 8688 8554 - return nfnetlink_unicast(skb2, info->net, portid); 8555 - } 8556 - 8557 - static int nf_tables_getobj_reset(struct sk_buff *skb, 8558 - const struct nfnl_info *info, 8559 - const struct nlattr * const nla[]) 8560 - { 8561 - struct nftables_pernet *nft_net = nft_pernet(info->net); 8562 - u32 portid = NETLINK_CB(skb).portid; 8563 - struct net *net = info->net; 8564 - struct sk_buff *skb2; 8565 - char *buf; 8566 - 8567 - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 8568 - struct netlink_dump_control c = { 8569 - .start = nf_tables_dumpreset_obj_start, 8570 - .dump = nf_tables_dumpreset_obj, 8571 - .done = nf_tables_dump_obj_done, 8572 - .module = THIS_MODULE, 8573 - .data = (void *)nla, 8574 - }; 8575 - 8576 - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 8577 - } 8578 - 8579 - if (!try_module_get(THIS_MODULE)) 8580 - return -EINVAL; 8581 - rcu_read_unlock(); 8582 - mutex_lock(&nft_net->commit_mutex); 8583 - skb2 = nf_tables_getobj_single(portid, info, nla, true); 8584 - mutex_unlock(&nft_net->commit_mutex); 8585 - rcu_read_lock(); 8586 - module_put(THIS_MODULE); 8587 - 8588 - if (IS_ERR(skb2)) 8589 - return PTR_ERR(skb2); 8689 + if (!reset) 8690 + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); 8590 8691 8591 8692 buf = kasprintf(GFP_ATOMIC, "%.*s:%u", 8592 8693 nla_len(nla[NFTA_OBJ_TABLE]), ··· 9874 10037 .policy = nft_rule_policy, 9875 10038 }, 9876 10039 [NFT_MSG_GETRULE_RESET] = { 9877 - .call = nf_tables_getrule_reset, 10040 + .call = nf_tables_getrule, 9878 10041 .type = NFNL_CB_RCU, 9879 10042 .attr_count = NFTA_RULE_MAX, 9880 10043 .policy = nft_rule_policy, ··· 9928 10091 .policy = nft_set_elem_list_policy, 9929 10092 }, 9930 10093 [NFT_MSG_GETSETELEM_RESET] = { 9931 - .call = nf_tables_getsetelem_reset, 10094 + .call = nf_tables_getsetelem, 9932 10095 .type = NFNL_CB_RCU, 9933 10096 .attr_count = NFTA_SET_ELEM_LIST_MAX, 9934 10097 .policy = nft_set_elem_list_policy, ··· 9974 10137 .policy = nft_obj_policy, 9975 10138 }, 9976 10139 [NFT_MSG_GETOBJ_RESET] = { 9977 - .call = nf_tables_getobj_reset, 10140 + .call = nf_tables_getobj, 9978 10141 .type = NFNL_CB_RCU, 9979 10142 .attr_count = NFTA_OBJ_MAX, 9980 10143 .policy = nft_obj_policy,

+16 -4

net/netfilter/nft_counter.c

··· 32 32 33 33 static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync); 34 34 35 + /* control plane only: sync fetch+reset */ 36 + static DEFINE_SPINLOCK(nft_counter_lock); 37 + 35 38 static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, 36 39 struct nft_regs *regs, 37 40 const struct nft_pktinfo *pkt) ··· 151 148 } 152 149 } 153 150 151 + static void nft_counter_fetch_and_reset(struct nft_counter_percpu_priv *priv, 152 + struct nft_counter_tot *total) 153 + { 154 + spin_lock(&nft_counter_lock); 155 + nft_counter_fetch(priv, total); 156 + nft_counter_reset(priv, total); 157 + spin_unlock(&nft_counter_lock); 158 + } 159 + 154 160 static int nft_counter_do_dump(struct sk_buff *skb, 155 161 struct nft_counter_percpu_priv *priv, 156 162 bool reset) 157 163 { 158 164 struct nft_counter_tot total; 159 165 160 - nft_counter_fetch(priv, &total); 166 + if (unlikely(reset)) 167 + nft_counter_fetch_and_reset(priv, &total); 168 + else 169 + nft_counter_fetch(priv, &total); 161 170 162 171 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), 163 172 NFTA_COUNTER_PAD) || 164 173 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), 165 174 NFTA_COUNTER_PAD)) 166 175 goto nla_put_failure; 167 - 168 - if (reset) 169 - nft_counter_reset(priv, &total); 170 176 171 177 return 0; 172 178

+7 -6

net/netfilter/nft_quota.c

··· 140 140 u64 consumed, consumed_cap, quota; 141 141 u32 flags = priv->flags; 142 142 143 - /* Since we inconditionally increment consumed quota for each packet 143 + /* Since we unconditionally increment consumed quota for each packet 144 144 * that we see, don't go over the quota boundary in what we send to 145 145 * userspace. 146 146 */ 147 - consumed = atomic64_read(priv->consumed); 147 + if (reset) { 148 + consumed = atomic64_xchg(priv->consumed, 0); 149 + clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); 150 + } else { 151 + consumed = atomic64_read(priv->consumed); 152 + } 148 153 quota = atomic64_read(&priv->quota); 149 154 if (consumed >= quota) { 150 155 consumed_cap = quota; ··· 165 160 nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) 166 161 goto nla_put_failure; 167 162 168 - if (reset) { 169 - atomic64_sub(consumed, priv->consumed); 170 - clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); 171 - } 172 163 return 0; 173 164 174 165 nla_put_failure:

+141 -18

net/nfc/nci/ntf.c

··· 58 58 struct nci_conn_info *conn_info; 59 59 int i; 60 60 61 - if (skb->len < sizeof(struct nci_core_conn_credit_ntf)) 61 + if (skb->len < offsetofend(struct nci_core_conn_credit_ntf, num_entries)) 62 62 return -EINVAL; 63 63 64 64 ntf = (struct nci_core_conn_credit_ntf *)skb->data; ··· 67 67 68 68 if (ntf->num_entries > NCI_MAX_NUM_CONN) 69 69 ntf->num_entries = NCI_MAX_NUM_CONN; 70 + 71 + if (skb->len < offsetofend(struct nci_core_conn_credit_ntf, num_entries) + 72 + ntf->num_entries * sizeof(struct conn_credit_entry)) 73 + return -EINVAL; 70 74 71 75 /* update the credits */ 72 76 for (i = 0; i < ntf->num_entries; i++) { ··· 142 138 static const __u8 * 143 139 nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, 144 140 struct rf_tech_specific_params_nfca_poll *nfca_poll, 145 - const __u8 *data) 141 + const __u8 *data, ssize_t data_len) 146 142 { 143 + /* Check if we have enough data for sens_res (2 bytes) */ 144 + if (data_len < 2) 145 + return ERR_PTR(-EINVAL); 146 + 147 147 nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data)); 148 148 data += 2; 149 + data_len -= 2; 150 + 151 + /* Check if we have enough data for nfcid1_len (1 byte) */ 152 + if (data_len < 1) 153 + return ERR_PTR(-EINVAL); 149 154 150 155 nfca_poll->nfcid1_len = min_t(__u8, *data++, NFC_NFCID1_MAXSIZE); 156 + data_len--; 151 157 152 158 pr_debug("sens_res 0x%x, nfcid1_len %d\n", 153 159 nfca_poll->sens_res, nfca_poll->nfcid1_len); 154 160 161 + /* Check if we have enough data for nfcid1 */ 162 + if (data_len < nfca_poll->nfcid1_len) 163 + return ERR_PTR(-EINVAL); 164 + 155 165 memcpy(nfca_poll->nfcid1, data, nfca_poll->nfcid1_len); 156 166 data += nfca_poll->nfcid1_len; 167 + data_len -= nfca_poll->nfcid1_len; 168 + 169 + /* Check if we have enough data for sel_res_len (1 byte) */ 170 + if (data_len < 1) 171 + return ERR_PTR(-EINVAL); 157 172 158 173 nfca_poll->sel_res_len = *data++; 174 + data_len--; 159 175 160 - if (nfca_poll->sel_res_len != 0) 176 + if (nfca_poll->sel_res_len != 0) { 177 + /* Check if we have enough data for sel_res (1 byte) */ 178 + if (data_len < 1) 179 + return ERR_PTR(-EINVAL); 180 + 161 181 nfca_poll->sel_res = *data++; 182 + } 162 183 163 184 pr_debug("sel_res_len %d, sel_res 0x%x\n", 164 185 nfca_poll->sel_res_len, ··· 195 166 static const __u8 * 196 167 nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, 197 168 struct rf_tech_specific_params_nfcb_poll *nfcb_poll, 198 - const __u8 *data) 169 + const __u8 *data, ssize_t data_len) 199 170 { 171 + /* Check if we have enough data for sensb_res_len (1 byte) */ 172 + if (data_len < 1) 173 + return ERR_PTR(-EINVAL); 174 + 200 175 nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE); 176 + data_len--; 201 177 202 178 pr_debug("sensb_res_len %d\n", nfcb_poll->sensb_res_len); 179 + 180 + /* Check if we have enough data for sensb_res */ 181 + if (data_len < nfcb_poll->sensb_res_len) 182 + return ERR_PTR(-EINVAL); 203 183 204 184 memcpy(nfcb_poll->sensb_res, data, nfcb_poll->sensb_res_len); 205 185 data += nfcb_poll->sensb_res_len; ··· 219 181 static const __u8 * 220 182 nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, 221 183 struct rf_tech_specific_params_nfcf_poll *nfcf_poll, 222 - const __u8 *data) 184 + const __u8 *data, ssize_t data_len) 223 185 { 186 + /* Check if we have enough data for bit_rate (1 byte) */ 187 + if (data_len < 1) 188 + return ERR_PTR(-EINVAL); 189 + 224 190 nfcf_poll->bit_rate = *data++; 191 + data_len--; 192 + 193 + /* Check if we have enough data for sensf_res_len (1 byte) */ 194 + if (data_len < 1) 195 + return ERR_PTR(-EINVAL); 196 + 225 197 nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE); 198 + data_len--; 226 199 227 200 pr_debug("bit_rate %d, sensf_res_len %d\n", 228 201 nfcf_poll->bit_rate, nfcf_poll->sensf_res_len); 202 + 203 + /* Check if we have enough data for sensf_res */ 204 + if (data_len < nfcf_poll->sensf_res_len) 205 + return ERR_PTR(-EINVAL); 229 206 230 207 memcpy(nfcf_poll->sensf_res, data, nfcf_poll->sensf_res_len); 231 208 data += nfcf_poll->sensf_res_len; ··· 251 198 static const __u8 * 252 199 nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, 253 200 struct rf_tech_specific_params_nfcv_poll *nfcv_poll, 254 - const __u8 *data) 201 + const __u8 *data, ssize_t data_len) 255 202 { 203 + /* Skip 1 byte (reserved) */ 204 + if (data_len < 1) 205 + return ERR_PTR(-EINVAL); 206 + 256 207 ++data; 208 + data_len--; 209 + 210 + /* Check if we have enough data for dsfid (1 byte) */ 211 + if (data_len < 1) 212 + return ERR_PTR(-EINVAL); 213 + 257 214 nfcv_poll->dsfid = *data++; 215 + data_len--; 216 + 217 + /* Check if we have enough data for uid (8 bytes) */ 218 + if (data_len < NFC_ISO15693_UID_MAXSIZE) 219 + return ERR_PTR(-EINVAL); 220 + 258 221 memcpy(nfcv_poll->uid, data, NFC_ISO15693_UID_MAXSIZE); 259 222 data += NFC_ISO15693_UID_MAXSIZE; 223 + 260 224 return data; 261 225 } 262 226 263 227 static const __u8 * 264 228 nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, 265 229 struct rf_tech_specific_params_nfcf_listen *nfcf_listen, 266 - const __u8 *data) 230 + const __u8 *data, ssize_t data_len) 267 231 { 232 + /* Check if we have enough data for local_nfcid2_len (1 byte) */ 233 + if (data_len < 1) 234 + return ERR_PTR(-EINVAL); 235 + 268 236 nfcf_listen->local_nfcid2_len = min_t(__u8, *data++, 269 237 NFC_NFCID2_MAXSIZE); 238 + data_len--; 239 + 240 + /* Check if we have enough data for local_nfcid2 */ 241 + if (data_len < nfcf_listen->local_nfcid2_len) 242 + return ERR_PTR(-EINVAL); 243 + 270 244 memcpy(nfcf_listen->local_nfcid2, data, nfcf_listen->local_nfcid2_len); 271 245 data += nfcf_listen->local_nfcid2_len; 272 246 ··· 444 364 const __u8 *data; 445 365 bool add_target = true; 446 366 447 - if (skb->len < sizeof(struct nci_rf_discover_ntf)) 367 + if (skb->len < offsetofend(struct nci_rf_discover_ntf, rf_tech_specific_params_len)) 448 368 return -EINVAL; 449 369 450 370 data = skb->data; ··· 460 380 pr_debug("rf_tech_specific_params_len %d\n", 461 381 ntf.rf_tech_specific_params_len); 462 382 383 + if (skb->len < (data - skb->data) + 384 + ntf.rf_tech_specific_params_len + sizeof(ntf.ntf_type)) 385 + return -EINVAL; 386 + 463 387 if (ntf.rf_tech_specific_params_len > 0) { 464 388 switch (ntf.rf_tech_and_mode) { 465 389 case NCI_NFC_A_PASSIVE_POLL_MODE: 466 390 data = nci_extract_rf_params_nfca_passive_poll(ndev, 467 - &(ntf.rf_tech_specific_params.nfca_poll), data); 391 + &(ntf.rf_tech_specific_params.nfca_poll), data, 392 + ntf.rf_tech_specific_params_len); 393 + if (IS_ERR(data)) 394 + return PTR_ERR(data); 468 395 break; 469 396 470 397 case NCI_NFC_B_PASSIVE_POLL_MODE: 471 398 data = nci_extract_rf_params_nfcb_passive_poll(ndev, 472 - &(ntf.rf_tech_specific_params.nfcb_poll), data); 399 + &(ntf.rf_tech_specific_params.nfcb_poll), data, 400 + ntf.rf_tech_specific_params_len); 401 + if (IS_ERR(data)) 402 + return PTR_ERR(data); 473 403 break; 474 404 475 405 case NCI_NFC_F_PASSIVE_POLL_MODE: 476 406 data = nci_extract_rf_params_nfcf_passive_poll(ndev, 477 - &(ntf.rf_tech_specific_params.nfcf_poll), data); 407 + &(ntf.rf_tech_specific_params.nfcf_poll), data, 408 + ntf.rf_tech_specific_params_len); 409 + if (IS_ERR(data)) 410 + return PTR_ERR(data); 478 411 break; 479 412 480 413 case NCI_NFC_V_PASSIVE_POLL_MODE: 481 414 data = nci_extract_rf_params_nfcv_passive_poll(ndev, 482 - &(ntf.rf_tech_specific_params.nfcv_poll), data); 415 + &(ntf.rf_tech_specific_params.nfcv_poll), data, 416 + ntf.rf_tech_specific_params_len); 417 + if (IS_ERR(data)) 418 + return PTR_ERR(data); 483 419 break; 484 420 485 421 default: ··· 692 596 const __u8 *data; 693 597 int err = NCI_STATUS_OK; 694 598 695 - if (skb->len < sizeof(struct nci_rf_intf_activated_ntf)) 599 + if (skb->len < offsetofend(struct nci_rf_intf_activated_ntf, rf_tech_specific_params_len)) 696 600 return -EINVAL; 697 601 698 602 data = skb->data; ··· 724 628 if (ntf.rf_interface == NCI_RF_INTERFACE_NFCEE_DIRECT) 725 629 goto listen; 726 630 631 + if (skb->len < (data - skb->data) + ntf.rf_tech_specific_params_len) 632 + return -EINVAL; 633 + 727 634 if (ntf.rf_tech_specific_params_len > 0) { 728 635 switch (ntf.activation_rf_tech_and_mode) { 729 636 case NCI_NFC_A_PASSIVE_POLL_MODE: 730 637 data = nci_extract_rf_params_nfca_passive_poll(ndev, 731 - &(ntf.rf_tech_specific_params.nfca_poll), data); 638 + &(ntf.rf_tech_specific_params.nfca_poll), data, 639 + ntf.rf_tech_specific_params_len); 640 + if (IS_ERR(data)) 641 + return -EINVAL; 732 642 break; 733 643 734 644 case NCI_NFC_B_PASSIVE_POLL_MODE: 735 645 data = nci_extract_rf_params_nfcb_passive_poll(ndev, 736 - &(ntf.rf_tech_specific_params.nfcb_poll), data); 646 + &(ntf.rf_tech_specific_params.nfcb_poll), data, 647 + ntf.rf_tech_specific_params_len); 648 + if (IS_ERR(data)) 649 + return -EINVAL; 737 650 break; 738 651 739 652 case NCI_NFC_F_PASSIVE_POLL_MODE: 740 653 data = nci_extract_rf_params_nfcf_passive_poll(ndev, 741 - &(ntf.rf_tech_specific_params.nfcf_poll), data); 654 + &(ntf.rf_tech_specific_params.nfcf_poll), data, 655 + ntf.rf_tech_specific_params_len); 656 + if (IS_ERR(data)) 657 + return -EINVAL; 742 658 break; 743 659 744 660 case NCI_NFC_V_PASSIVE_POLL_MODE: 745 661 data = nci_extract_rf_params_nfcv_passive_poll(ndev, 746 - &(ntf.rf_tech_specific_params.nfcv_poll), data); 662 + &(ntf.rf_tech_specific_params.nfcv_poll), data, 663 + ntf.rf_tech_specific_params_len); 664 + if (IS_ERR(data)) 665 + return -EINVAL; 747 666 break; 748 667 749 668 case NCI_NFC_A_PASSIVE_LISTEN_MODE: ··· 768 657 case NCI_NFC_F_PASSIVE_LISTEN_MODE: 769 658 data = nci_extract_rf_params_nfcf_passive_listen(ndev, 770 659 &(ntf.rf_tech_specific_params.nfcf_listen), 771 - data); 660 + data, ntf.rf_tech_specific_params_len); 661 + if (IS_ERR(data)) 662 + return -EINVAL; 772 663 break; 773 664 774 665 default: ··· 780 667 goto exit; 781 668 } 782 669 } 670 + 671 + if (skb->len < (data - skb->data) + 672 + sizeof(ntf.data_exch_rf_tech_and_mode) + 673 + sizeof(ntf.data_exch_tx_bit_rate) + 674 + sizeof(ntf.data_exch_rx_bit_rate) + 675 + sizeof(ntf.activation_params_len)) 676 + return -EINVAL; 783 677 784 678 ntf.data_exch_rf_tech_and_mode = *data++; 785 679 ntf.data_exch_tx_bit_rate = *data++; ··· 798 678 pr_debug("data_exch_tx_bit_rate 0x%x\n", ntf.data_exch_tx_bit_rate); 799 679 pr_debug("data_exch_rx_bit_rate 0x%x\n", ntf.data_exch_rx_bit_rate); 800 680 pr_debug("activation_params_len %d\n", ntf.activation_params_len); 681 + 682 + if (skb->len < (data - skb->data) + ntf.activation_params_len) 683 + return -EINVAL; 801 684 802 685 if (ntf.activation_params_len > 0) { 803 686 switch (ntf.rf_interface) {

+1

net/psp/Kconfig

··· 6 6 bool "PSP Security Protocol support" 7 7 depends on INET 8 8 select SKB_DECRYPTED 9 + select SKB_EXTENSIONS 9 10 select SOCK_VALIDATE_XMIT 10 11 help 11 12 Enable kernel support for the PSP Security Protocol (PSP).

+4 -2

net/rds/send.c

··· 1431 1431 else 1432 1432 queue_delayed_work(cpath->cp_wq, &cpath->cp_send_w, 1); 1433 1433 rcu_read_unlock(); 1434 + 1435 + if (ret) 1436 + goto out; 1434 1437 } 1435 - if (ret) 1436 - goto out; 1438 + 1437 1439 rds_message_put(rm); 1438 1440 1439 1441 for (ind = 0; ind < vct.indx; ind++)

+1 -1

net/rds/tcp.c

··· 373 373 int ret = 0; 374 374 375 375 for (i = 0; i < RDS_MPATH_WORKERS; i++) { 376 - tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 376 + tc = kmem_cache_zalloc(rds_tcp_conn_slab, gfp); 377 377 if (!tc) { 378 378 ret = -ENOMEM; 379 379 goto fail;

+17 -3

net/rds/tcp_listen.c

··· 177 177 struct rds_tcp_connection *rs_tcp = NULL; 178 178 int conn_state; 179 179 struct rds_conn_path *cp; 180 + struct sock *sk; 180 181 struct in6_addr *my_addr, *peer_addr; 181 182 #if !IS_ENABLED(CONFIG_IPV6) 182 183 struct in6_addr saddr, daddr; ··· 299 298 rds_conn_path_drop(cp, 0); 300 299 goto rst_nsk; 301 300 } 301 + /* Save a local pointer to sk and hold a reference before setting 302 + * callbacks. Once callbacks are set, a concurrent 303 + * rds_tcp_conn_path_shutdown() may call sock_release(), which 304 + * sets new_sock->sk to NULL and drops a reference on sk. 305 + * The local pointer lets us safely access sk_state below even 306 + * if new_sock->sk has been nulled, and sock_hold() keeps sk 307 + * itself valid until we are done. 308 + */ 309 + sk = new_sock->sk; 310 + sock_hold(sk); 311 + 302 312 if (rs_tcp->t_sock) { 303 313 /* Duelling SYN has been handled in rds_tcp_accept_one() */ 304 314 rds_tcp_reset_callbacks(new_sock, cp); ··· 328 316 * knowing that "rds_tcp_conn_path_shutdown" will 329 317 * dequeue pending messages. 330 318 */ 331 - if (new_sock->sk->sk_state == TCP_CLOSE_WAIT || 332 - new_sock->sk->sk_state == TCP_LAST_ACK || 333 - new_sock->sk->sk_state == TCP_CLOSE) 319 + if (READ_ONCE(sk->sk_state) == TCP_CLOSE_WAIT || 320 + READ_ONCE(sk->sk_state) == TCP_LAST_ACK || 321 + READ_ONCE(sk->sk_state) == TCP_CLOSE) 334 322 rds_conn_path_drop(cp, 0); 335 323 else 336 324 queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0); 325 + 326 + sock_put(sk); 337 327 338 328 new_sock = NULL; 339 329 ret = 0;

+5 -1

net/sched/act_skbedit.c

··· 126 126 struct tcf_skbedit *d; 127 127 u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; 128 128 u16 *queue_mapping = NULL, *ptype = NULL; 129 - u16 mapping_mod = 1; 129 + u32 mapping_mod = 1; 130 130 bool exists = false; 131 131 int ret = 0, err; 132 132 u32 index; ··· 194 194 } 195 195 196 196 mapping_mod = *queue_mapping_max - *queue_mapping + 1; 197 + if (mapping_mod > U16_MAX) { 198 + NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid."); 199 + return -EINVAL; 200 + } 197 201 flags |= SKBEDIT_F_TXQ_SKBHASH; 198 202 } 199 203 if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)

+15 -5

net/vmw_vsock/af_vsock.c

··· 91 91 * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's 92 92 * mode, which is set at namespace creation and immutable thereafter. 93 93 * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future 94 - * child namespaces will inherit when created. The default is "global". 94 + * child namespaces will inherit when created. The initial value matches 95 + * the namespace's own ns_mode. 95 96 * 96 97 * Changing child_ns_mode only affects newly created namespaces, not the 97 - * current namespace or existing children. At namespace creation, ns_mode 98 - * is inherited from the parent's child_ns_mode. 98 + * current namespace or existing children. A "local" namespace cannot set 99 + * child_ns_mode to "global". At namespace creation, ns_mode is inherited 100 + * from the parent's child_ns_mode. 99 101 * 100 102 * The init_net mode is "global" and cannot be modified. 101 103 * ··· 2845 2843 if (ret) 2846 2844 return ret; 2847 2845 2848 - if (write) 2846 + if (write) { 2847 + /* Prevent a "local" namespace from escalating to "global", 2848 + * which would give nested namespaces access to global CIDs. 2849 + */ 2850 + if (vsock_net_mode(net) == VSOCK_NET_MODE_LOCAL && 2851 + new_mode == VSOCK_NET_MODE_GLOBAL) 2852 + return -EPERM; 2853 + 2849 2854 vsock_net_set_child_mode(net, new_mode); 2855 + } 2850 2856 2851 2857 return 0; 2852 2858 } ··· 2922 2912 else 2923 2913 net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); 2924 2914 2925 - net->vsock.child_ns_mode = VSOCK_NET_MODE_GLOBAL; 2915 + net->vsock.child_ns_mode = net->vsock.mode; 2926 2916 } 2927 2917 2928 2918 static __net_init int vsock_sysctl_init_net(struct net *net)

+18 -1

tools/testing/selftests/drivers/net/hw/devmem.py

··· 63 63 ksft_eq(socat.stdout.strip(), "hello\nworld") 64 64 65 65 66 + def check_rx_hds(cfg) -> None: 67 + """Test HDS splitting across payload sizes.""" 68 + require_devmem(cfg) 69 + 70 + for size in [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]: 71 + port = rand_port() 72 + listen_cmd = f"{cfg.bin_local} -L -l -f {cfg.ifname} -s {cfg.addr} -p {port}" 73 + 74 + with bkg(listen_cmd, exit_wait=True) as ncdevmem: 75 + wait_port_listen(port) 76 + cmd(f"dd if=/dev/zero bs={size} count=1 2>/dev/null | " + 77 + f"socat -b {size} -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},nodelay", 78 + host=cfg.remote, shell=True) 79 + 80 + ksft_eq(ncdevmem.ret, 0, f"HDS failed for payload size {size}") 81 + 82 + 66 83 def main() -> None: 67 84 with NetDrvEpEnv(__file__) as cfg: 68 85 cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") 69 86 cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) 70 87 71 - ksft_run([check_rx, check_tx, check_tx_chunks], 88 + ksft_run([check_rx, check_tx, check_tx_chunks, check_rx_hds], 72 89 args=(cfg, )) 73 90 ksft_exit() 74 91

+10 -1

tools/testing/selftests/drivers/net/hw/ncdevmem.c

··· 98 98 static unsigned int dmabuf_id; 99 99 static uint32_t tx_dmabuf_id; 100 100 static int waittime_ms = 500; 101 + static bool fail_on_linear; 101 102 102 103 /* System state loaded by current_config_load() */ 103 104 #define MAX_FLOWS 8 ··· 976 975 "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", 977 976 dmabuf_cmsg->frag_size); 978 977 978 + if (fail_on_linear) { 979 + pr_err("received SCM_DEVMEM_LINEAR but --fail-on-linear (-L) set"); 980 + goto err_close_client; 981 + } 982 + 979 983 continue; 980 984 } 981 985 ··· 1404 1398 int is_server = 0, opt; 1405 1399 int ret, err = 1; 1406 1400 1407 - while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { 1401 + while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:")) != -1) { 1408 1402 switch (opt) { 1403 + case 'L': 1404 + fail_on_linear = true; 1405 + break; 1409 1406 case 'l': 1410 1407 is_server = 1; 1411 1408 break;

+10 -7

tools/testing/selftests/drivers/net/hw/toeplitz.py

··· 19 19 20 20 # "define" for the ID of the Toeplitz hash function 21 21 ETH_RSS_HASH_TOP = 1 22 + # Must match RPS_MAX_CPUS in toeplitz.c 23 + RPS_MAX_CPUS = 16 22 24 23 25 24 26 def _check_rps_and_rfs_not_configured(cfg): ··· 69 67 return cpus 70 68 71 69 72 - def _get_unused_cpus(cfg, count=2): 70 + def _get_unused_rps_cpus(cfg, count=2): 73 71 """ 74 - Get CPUs that are not used by Rx queues. 75 - Returns a list of at least 'count' CPU numbers. 72 + Get CPUs that are not used by Rx queues for RPS. 73 + Returns a list of at least 'count' CPU numbers within 74 + the RPS_MAX_CPUS supported range. 76 75 """ 77 76 78 77 # Get CPUs used by Rx queues 79 78 rx_cpus = set(_get_irq_cpus(cfg)) 80 79 81 - # Get total number of CPUs 82 - num_cpus = os.cpu_count() 80 + # Get total number of CPUs, capped by RPS_MAX_CPUS 81 + num_cpus = min(os.cpu_count(), RPS_MAX_CPUS) 83 82 84 83 # Find unused CPUs 85 84 unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus] 86 85 87 86 if len(unused_cpus) < count: 88 - raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}") 87 + raise KsftSkipEx(f"Need at least {count} CPUs in range 0..{num_cpus - 1} not used by Rx queues, found {len(unused_cpus)}") 89 88 90 89 return unused_cpus[:count] 91 90 ··· 184 181 ksft_pr(f"RSS using CPUs: {irq_cpus}") 185 182 elif grp == "rps": 186 183 # Get CPUs not used by Rx queues and configure them for RPS 187 - rps_cpus = _get_unused_cpus(cfg, count=2) 184 + rps_cpus = _get_unused_rps_cpus(cfg, count=2) 188 185 rps_mask = _configure_rps(cfg, rps_cpus) 189 186 defer(_configure_rps, cfg, []) 190 187 rx_cmd += ["-r", rps_mask]

+2 -2

tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh

··· 249 249 SOCAT_MODE="UDP6-LISTEN" 250 250 fi 251 251 252 - # Just wait for 2 seconds 253 - timeout 2 ip netns exec "${NAMESPACE}" \ 252 + # Just wait for 3 seconds 253 + timeout 3 ip netns exec "${NAMESPACE}" \ 254 254 socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null 255 255 } 256 256

+2 -2

tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh

··· 317 317 318 318 tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ 319 319 flower skip_sw \ 320 - action police rate 0.5kbit burst 1m conform-exceed drop/ok 320 + action police rate 0.5kbit burst 2k conform-exceed drop/ok 321 321 check_fail $? "Incorrect success to add police action with too low rate" 322 322 323 323 tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ ··· 327 327 328 328 tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ 329 329 flower skip_sw \ 330 - action police rate 1.5kbit burst 1m conform-exceed drop/ok 330 + action police rate 1.5kbit burst 2k conform-exceed drop/ok 331 331 check_err $? "Failed to add police action with low rate" 332 332 333 333 tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower

+88 -2

tools/testing/selftests/net/forwarding/bridge_mdb_max.sh

··· 28 28 test_8021d 29 29 test_8021q 30 30 test_8021qvs 31 + test_mdb_count_warning 31 32 " 32 33 33 34 NUM_NETIFS=4 ··· 84 83 { 85 84 local br_flags=$1; shift 86 85 87 - log_info "802.1q $br_flags${br_flags:+ }tests" 88 - 89 86 ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ 90 87 mcast_snooping 1 $br_flags \ 91 88 mcast_igmp_version 3 mcast_mld_version 2 ··· 105 106 106 107 switch_create_8021qvs() 107 108 { 109 + log_info "802.1q mcast_vlan_snooping 1 tests" 108 110 switch_create_8021q "mcast_vlan_snooping 1" 109 111 bridge vlan global set dev br0 vid 10 mcast_igmp_version 3 110 112 bridge vlan global set dev br0 vid 10 mcast_mld_version 2 ··· 1272 1272 test_toggle_vlan_snooping_permanent 1273 1273 } 1274 1274 1275 + mdb_count_check_warn() 1276 + { 1277 + local msg=$1; shift 1278 + 1279 + dmesg | grep -q "WARNING:.*br_multicast_port_ngroups_dec.*" 1280 + check_fail $? "$msg" 1281 + } 1282 + 1283 + test_mdb_count_mcast_vlan_snooping_flush() 1284 + { 1285 + RET=0 1286 + 1287 + # check if we already have a warning 1288 + mdb_count_check_warn "Check MDB entries count warning before test" 1289 + 1290 + bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10 1291 + ip link set dev br0 down 1292 + ip link set dev br0 type bridge mcast_vlan_snooping 1 1293 + bridge mdb flush dev br0 1294 + 1295 + mdb_count_check_warn "Check MDB entries count warning after test" 1296 + 1297 + ip link set dev br0 type bridge mcast_vlan_snooping 0 1298 + ip link set dev br0 up 1299 + 1300 + log_test "MDB count warning: mcast_vlan_snooping and MDB flush" 1301 + } 1302 + 1303 + test_mdb_count_mcast_snooping_flush() 1304 + { 1305 + RET=0 1306 + 1307 + # check if we already have a warning 1308 + mdb_count_check_warn "Check MDB entries count warning before test" 1309 + 1310 + bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10 1311 + ip link set dev br0 type bridge mcast_snooping 0 1312 + ip link set dev br0 type bridge mcast_vlan_snooping 1 1313 + bridge mdb flush dev br0 1314 + 1315 + mdb_count_check_warn "Check MDB entries count warning after test" 1316 + 1317 + ip link set dev br0 type bridge mcast_vlan_snooping 0 1318 + ip link set dev br0 type bridge mcast_snooping 1 1319 + 1320 + log_test "MDB count warning: mcast_snooping and MDB flush" 1321 + } 1322 + 1323 + test_mdb_count_vlan_state_flush() 1324 + { 1325 + RET=0 1326 + 1327 + # check if we already have a warning 1328 + mdb_count_check_warn "Check MDB entries count warning before test" 1329 + 1330 + bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10 1331 + ip link set dev br0 down 1332 + bridge vlan set vid 10 dev "$swp1" state blocking 1333 + ip link set dev br0 type bridge mcast_vlan_snooping 1 1334 + ip link set dev br0 up 1335 + bridge mdb flush dev br0 1336 + 1337 + mdb_count_check_warn "Check MDB entries count warning after test" 1338 + 1339 + bridge vlan set vid 10 dev "$swp1" state forwarding 1340 + ip link set dev br0 type bridge mcast_vlan_snooping 0 1341 + 1342 + log_test "MDB count warning: disabled vlan state and MDB flush" 1343 + } 1344 + 1275 1345 # test groups 1276 1346 1277 1347 test_8021d() ··· 1367 1297 { 1368 1298 # Tests for vlan_filtering 1 mcast_vlan_snooping 0. 1369 1299 1300 + log_info "802.1q tests" 1370 1301 switch_create_8021q 1371 1302 setup_wait 1372 1303 ··· 1401 1330 test_8021qvs_maxgroups_cfg6 1402 1331 test_8021qvs_maxgroups_ctl6 1403 1332 test_8021qvs_toggle_vlan_snooping 1333 + 1334 + switch_destroy 1335 + } 1336 + 1337 + test_mdb_count_warning() 1338 + { 1339 + # Tests for mdb_n_entries warning 1340 + 1341 + log_info "MDB count warning tests" 1342 + switch_create_8021q 1343 + setup_wait 1344 + 1345 + test_mdb_count_mcast_vlan_snooping_flush 1346 + test_mdb_count_mcast_snooping_flush 1347 + test_mdb_count_vlan_state_flush 1404 1348 1405 1349 switch_destroy 1406 1350 }

+8

tools/testing/selftests/net/forwarding/pedit_dsfield.sh

··· 98 98 h1_create 99 99 h2_create 100 100 switch_create 101 + 102 + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then 103 + sysctl_set net.bridge.bridge-nf-call-iptables 0 104 + fi 101 105 } 102 106 103 107 cleanup() 104 108 { 105 109 pre_cleanup 110 + 111 + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then 112 + sysctl_restore net.bridge.bridge-nf-call-iptables 113 + fi 106 114 107 115 switch_destroy 108 116 h2_destroy

+8

tools/testing/selftests/net/forwarding/pedit_ip.sh

··· 91 91 h1_create 92 92 h2_create 93 93 switch_create 94 + 95 + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then 96 + sysctl_set net.bridge.bridge-nf-call-iptables 0 97 + fi 94 98 } 95 99 96 100 cleanup() 97 101 { 98 102 pre_cleanup 103 + 104 + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then 105 + sysctl_restore net.bridge.bridge-nf-call-iptables 106 + fi 99 107 100 108 switch_destroy 101 109 h2_destroy

+1 -1

tools/testing/selftests/net/forwarding/tc_actions.sh

··· 223 223 ip_proto icmp \ 224 224 action drop 225 225 226 - ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & 226 + ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 > $mirred_e2i_tf2 & 227 227 local rpid=$! 228 228 ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 229 229 wait -n $rpid

+16 -10

tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh

··· 567 567 local inner_tos=$1; shift 568 568 local outer_tos=$1; shift 569 569 570 + local ipv4hdr=$(: 571 + )"45:"$( : IP version + IHL 572 + )"$inner_tos:"$( : IP TOS 573 + )"00:54:"$( : IP total length 574 + )"99:83:"$( : IP identification 575 + )"40:00:"$( : IP flags + frag off 576 + )"40:"$( : IP TTL 577 + )"01:"$( : IP proto 578 + )"CHECKSUM:"$( : IP header csum 579 + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 580 + )"c0:00:02:01"$( : IP daddr: 192.0.2.1 581 + ) 582 + local checksum=$(payload_template_calc_checksum "$ipv4hdr") 583 + ipv4hdr=$(payload_template_expand_checksum "$ipv4hdr" $checksum) 584 + 570 585 $MZ $dev -c $count -d 100msec -q \ 571 586 -b $next_hop_mac -B $dest_ip \ 572 587 -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: ··· 592 577 )"$dest_mac:"$( : ETH daddr 593 578 )"$(mac_get w2):"$( : ETH saddr 594 579 )"08:00:"$( : ETH type 595 - )"45:"$( : IP version + IHL 596 - )"$inner_tos:"$( : IP TOS 597 - )"00:54:"$( : IP total length 598 - )"99:83:"$( : IP identification 599 - )"40:00:"$( : IP flags + frag off 600 - )"40:"$( : IP TTL 601 - )"01:"$( : IP proto 602 - )"00:00:"$( : IP header csum 603 - )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 604 - )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 580 + )"$ipv4hdr:"$( : IPv4 header 605 581 )"08:"$( : ICMP type 606 582 )"00:"$( : ICMP code 607 583 )"8b:f2:"$( : ICMP csum

+1 -1

tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh

··· 695 695 )"6"$( : IP version 696 696 )"$inner_tos"$( : Traffic class 697 697 )"0:00:00:"$( : Flow label 698 - )"00:08:"$( : Payload length 698 + )"00:03:"$( : Payload length 699 699 )"3a:"$( : Next header 700 700 )"04:"$( : Hop limit 701 701 )"$saddr:"$( : IP saddr

+1 -1

tools/testing/selftests/net/lib.sh

··· 577 577 local flag=$1; shift 578 578 579 579 local state=$(ip -j link show "$name" | 580 - jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)') 580 + jq --arg flag "$flag" 'any(.[].flags[]; . == $flag)') 581 581 [[ $state == true ]] 582 582 } 583 583

+10 -1

tools/testing/selftests/net/packetdrill/ksft_runner.sh

··· 13 13 -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 14 14 -D CMSG_LEVEL_IP=SOL_IP 15 15 -D CMSG_TYPE_RECVERR=IP_RECVERR" 16 + [ipv4-mapped-ipv6]="--ip_version=ipv4-mapped-ipv6 17 + --local_ip=192.168.0.1 18 + --gateway_ip=192.168.0.1 19 + --netmask_ip=255.255.0.0 20 + --remote_ip=192.0.2.1 21 + -D TFO_COOKIE=3021b9d889017eeb 22 + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 23 + -D CMSG_LEVEL_IP=SOL_IPV6 24 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" 16 25 [ipv6]="--ip_version=ipv6 17 26 --mtu=1520 18 27 --local_ip=fd3d:0a0b:17d6::1 ··· 54 45 55 46 ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) 56 47 if [[ -z $ip_versions ]]; then 57 - ip_versions="ipv4 ipv6" 48 + ip_versions="ipv4 ipv6 ipv4-mapped-ipv6" 58 49 elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then 59 50 ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" 60 51 exit "$KSFT_FAIL"