Merge tag 'net-6.2-final' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull networking fixes from Jakub Kicinski:
"Fixes from the main networking tree only, probably because all
sub-trees have backed off and haven't submitted their changes.

None of the fixes here are particularly scary and no outstanding
regressions. In an ideal world the "current release" sections would be
empty at this stage but that never happens.

Current release - regressions:

- fix unwanted sign extension in netdev_stats_to_stats64()

Current release - new code bugs:

- initialize net->notrefcnt_tracker earlier

- devlink: fix netdev notifier chain corruption

- nfp: make sure mbox accesses in IPsec code are atomic

- ice: fix check for weight and priority of a scheduling node

Previous releases - regressions:

- ice: xsk: fix cleaning of XDP_TX frame, prevent inf loop

- igb: fix I2C bit banging config with external thermal sensor

Previous releases - always broken:

- sched: tcindex: update imperfect hash filters respecting rcu

- mpls: fix stale pointer if allocation fails during device rename

- dccp/tcp: avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions

- remove WARN_ON_ONCE(sk->sk_forward_alloc) from
sk_stream_kill_queues()

- af_key: fix heap information leak

- ipv6: fix socket connection with DSCP (correct interpretation of
the tclass field vs fib rule matching)

- tipc: fix kernel warning when sending SYN message

- vmxnet3: read RSS information from the correct descriptor (eop)"

* tag 'net-6.2-final' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (35 commits)
devlink: Fix netdev notifier chain corruption
igb: conditionalize I2C bit banging on external thermal sensor support
net: mpls: fix stale pointer if allocation fails during device rename
net/sched: tcindex: search key must be 16 bits
tipc: fix kernel warning when sending SYN message
igb: Fix PPS input and output using 3rd and 4th SDP
net: use a bounce buffer for copying skb->mark
ixgbe: add double of VLAN header when computing the max MTU
i40e: add double of VLAN header when computing the max MTU
ixgbe: allow to increase MTU to 3K with XDP enabled
net: stmmac: Restrict warning on disabling DMA store and fwd mode
net/sched: act_ctinfo: use percpu stats
net: stmmac: fix order of dwmac5 FlexPPS parametrization sequence
ice: fix lost multicast packets in promisc mode
ice: Fix check for weight and priority of a scheduling node
bnxt_en: Fix mqprio and XDP ring checking logic
net: Fix unwanted sign extension in netdev_stats_to_stats64()
net/usb: kalmia: Don't pass act_len in usb_bulk_msg error path
net: openvswitch: fix possible memory leak in ovs_meter_cmd_set()
af_key: Fix heap information leak
...

Linus Torvalds 3 years ago 3ac88fa4 d3d6f0eb

+512 -198

39 changed files

expand all

drivers

net

ethernet

broadcom

bgmac-bcma.c

bnxt

bnxt.c

intel

i40e

i40e_main.c

ice

ice_devlink.c

ice_main.c

ice_xsk.c

igb

igb_main.c

ixgbe

ixgbe.h

ixgbe_main.c

netronome

nfp

crypto

ipsec.c

nfp_net.h

nfp_net_common.c

nfp_net_ctrl.h

stmicro

stmmac

dwmac5.c

stmmac_platform.c

am65-cpsw-nuss.c

am65-cpsw-nuss.h

usb

kalmia.c

vmxnet3

vmxnet3_drv.c

include

linux

netdevice.h

net

sock.h

net

caif

caif_socket.c

core

dev.c

devlink.c

net_namespace.c

stream.c

dccp

ipv6.c

ipv6

datagram.c

tcp_ipv6.c

key

af_key.c

mpls

af_mpls.c

openvswitch

meter.c

sched

act_ctinfo.c

cls_tcindex.c

sctp

diag.c

socket.c

tipc

socket.c

tools

testing

selftests

net

fib_rule_tests.sh

nettest.c

+3 -3

drivers/net/ethernet/broadcom/bgmac-bcma.c

··· 240 240 bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; 241 241 bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1; 242 242 bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY; 243 - if (ci->pkg == BCMA_PKG_ID_BCM47188 || 244 - ci->pkg == BCMA_PKG_ID_BCM47186) { 243 + if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) || 244 + (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) { 245 245 bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII; 246 246 bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED; 247 247 } 248 - if (ci->pkg == BCMA_PKG_ID_BCM5358) 248 + if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) 249 249 bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII; 250 250 break; 251 251 case BCMA_CHIP_ID_BCM53573:

+6 -2

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 9274 9274 netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc); 9275 9275 return rc; 9276 9276 } 9277 - if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) { 9277 + if (tcs && (bp->tx_nr_rings_per_tc * tcs != 9278 + bp->tx_nr_rings - bp->tx_nr_rings_xdp)) { 9278 9279 netdev_err(bp->dev, "tx ring reservation failure\n"); 9279 9280 netdev_reset_tc(bp->dev); 9280 - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; 9281 + if (bp->tx_nr_rings_xdp) 9282 + bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp; 9283 + else 9284 + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; 9281 9285 return -ENOMEM; 9282 9286 } 9283 9287 return 0;

+3 -1

drivers/net/ethernet/intel/i40e/i40e_main.c

··· 2921 2921 struct i40e_pf *pf = vsi->back; 2922 2922 2923 2923 if (i40e_enabled_xdp_vsi(vsi)) { 2924 - int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 2924 + int frame_size = new_mtu + I40E_PACKET_HDR_PAD; 2925 2925 2926 2926 if (frame_size > i40e_max_xdp_frame_size(vsi)) 2927 2927 return -EINVAL; ··· 13167 13167 } 13168 13168 13169 13169 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 13170 + if (!br_spec) 13171 + return -EINVAL; 13170 13172 13171 13173 nla_for_each_nested(attr, br_spec, rem) { 13172 13174 __u16 mode;

+2 -2

drivers/net/ethernet/intel/ice/ice_devlink.c

··· 899 899 { 900 900 int status; 901 901 902 - if (node->tx_priority >= 8) { 902 + if (priority >= 8) { 903 903 NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); 904 904 return -EINVAL; 905 905 } ··· 929 929 { 930 930 int status; 931 931 932 - if (node->tx_weight > 200 || node->tx_weight < 1) { 932 + if (weight > 200 || weight < 1) { 933 933 NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); 934 934 return -EINVAL; 935 935 }

+26

drivers/net/ethernet/intel/ice/ice_main.c

··· 275 275 if (status && status != -EEXIST) 276 276 return status; 277 277 278 + netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n", 279 + vsi->vsi_num, promisc_m); 278 280 return 0; 279 281 } 280 282 ··· 302 300 promisc_m, 0); 303 301 } 304 302 303 + netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n", 304 + vsi->vsi_num, promisc_m); 305 305 return status; 306 306 } 307 307 ··· 418 414 } 419 415 err = 0; 420 416 vlan_ops->dis_rx_filtering(vsi); 417 + 418 + /* promiscuous mode implies allmulticast so 419 + * that VSIs that are in promiscuous mode are 420 + * subscribed to multicast packets coming to 421 + * the port 422 + */ 423 + err = ice_set_promisc(vsi, 424 + ICE_MCAST_PROMISC_BITS); 425 + if (err) 426 + goto out_promisc; 421 427 } 422 428 } else { 423 429 /* Clear Rx filter to remove traffic from wire */ ··· 443 429 if (vsi->netdev->features & 444 430 NETIF_F_HW_VLAN_CTAG_FILTER) 445 431 vlan_ops->ena_rx_filtering(vsi); 432 + } 433 + 434 + /* disable allmulti here, but only if allmulti is not 435 + * still enabled for the netdev 436 + */ 437 + if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) { 438 + err = ice_clear_promisc(vsi, 439 + ICE_MCAST_PROMISC_BITS); 440 + if (err) { 441 + netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n", 442 + err, vsi->vsi_num); 443 + } 446 444 } 447 445 } 448 446 }

+9 -6

drivers/net/ethernet/intel/ice/ice_xsk.c

··· 800 800 struct ice_tx_desc *tx_desc; 801 801 u16 cnt = xdp_ring->count; 802 802 struct ice_tx_buf *tx_buf; 803 + u16 completed_frames = 0; 803 804 u16 xsk_frames = 0; 804 805 u16 last_rs; 805 806 int i; ··· 810 809 if ((tx_desc->cmd_type_offset_bsz & 811 810 cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { 812 811 if (last_rs >= ntc) 813 - xsk_frames = last_rs - ntc + 1; 812 + completed_frames = last_rs - ntc + 1; 814 813 else 815 - xsk_frames = last_rs + cnt - ntc + 1; 814 + completed_frames = last_rs + cnt - ntc + 1; 816 815 } 817 816 818 - if (!xsk_frames) 817 + if (!completed_frames) 819 818 return; 820 819 821 - if (likely(!xdp_ring->xdp_tx_active)) 820 + if (likely(!xdp_ring->xdp_tx_active)) { 821 + xsk_frames = completed_frames; 822 822 goto skip; 823 + } 823 824 824 825 ntc = xdp_ring->next_to_clean; 825 - for (i = 0; i < xsk_frames; i++) { 826 + for (i = 0; i < completed_frames; i++) { 826 827 tx_buf = &xdp_ring->tx_buf[ntc]; 827 828 828 829 if (tx_buf->raw_buf) { ··· 840 837 } 841 838 skip: 842 839 tx_desc->cmd_type_offset_bsz = 0; 843 - xdp_ring->next_to_clean += xsk_frames; 840 + xdp_ring->next_to_clean += completed_frames; 844 841 if (xdp_ring->next_to_clean >= cnt) 845 842 xdp_ring->next_to_clean -= cnt; 846 843 if (xsk_frames)

+38 -16

drivers/net/ethernet/intel/igb/igb_main.c

··· 2256 2256 } 2257 2257 } 2258 2258 2259 + #ifdef CONFIG_IGB_HWMON 2260 + /** 2261 + * igb_set_i2c_bb - Init I2C interface 2262 + * @hw: pointer to hardware structure 2263 + **/ 2264 + static void igb_set_i2c_bb(struct e1000_hw *hw) 2265 + { 2266 + u32 ctrl_ext; 2267 + s32 i2cctl; 2268 + 2269 + ctrl_ext = rd32(E1000_CTRL_EXT); 2270 + ctrl_ext |= E1000_CTRL_I2C_ENA; 2271 + wr32(E1000_CTRL_EXT, ctrl_ext); 2272 + wrfl(); 2273 + 2274 + i2cctl = rd32(E1000_I2CPARAMS); 2275 + i2cctl |= E1000_I2CBB_EN 2276 + | E1000_I2C_CLK_OE_N 2277 + | E1000_I2C_DATA_OE_N; 2278 + wr32(E1000_I2CPARAMS, i2cctl); 2279 + wrfl(); 2280 + } 2281 + #endif 2282 + 2259 2283 void igb_reset(struct igb_adapter *adapter) 2260 2284 { 2261 2285 struct pci_dev *pdev = adapter->pdev; ··· 2424 2400 * interface. 2425 2401 */ 2426 2402 if (adapter->ets) 2427 - mac->ops.init_thermal_sensor_thresh(hw); 2403 + igb_set_i2c_bb(hw); 2404 + mac->ops.init_thermal_sensor_thresh(hw); 2428 2405 } 2429 2406 } 2430 2407 #endif ··· 3142 3117 **/ 3143 3118 static s32 igb_init_i2c(struct igb_adapter *adapter) 3144 3119 { 3145 - struct e1000_hw *hw = &adapter->hw; 3146 3120 s32 status = 0; 3147 - s32 i2cctl; 3148 3121 3149 3122 /* I2C interface supported on i350 devices */ 3150 3123 if (adapter->hw.mac.type != e1000_i350) 3151 3124 return 0; 3152 - 3153 - i2cctl = rd32(E1000_I2CPARAMS); 3154 - i2cctl |= E1000_I2CBB_EN 3155 - | E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N 3156 - | E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; 3157 - wr32(E1000_I2CPARAMS, i2cctl); 3158 - wrfl(); 3159 3125 3160 3126 /* Initialize the i2c bus which is controlled by the registers. 3161 3127 * This bus will use the i2c_algo_bit structure that implements ··· 3537 3521 adapter->ets = true; 3538 3522 else 3539 3523 adapter->ets = false; 3524 + /* Only enable I2C bit banging if an external thermal 3525 + * sensor is supported. 3526 + */ 3527 + if (adapter->ets) 3528 + igb_set_i2c_bb(hw); 3529 + hw->mac.ops.init_thermal_sensor_thresh(hw); 3540 3530 if (igb_sysfs_init(adapter)) 3541 3531 dev_err(&pdev->dev, 3542 3532 "failed to allocate sysfs resources\n"); ··· 6816 6794 struct timespec64 ts; 6817 6795 u32 tsauxc; 6818 6796 6819 - if (pin < 0 || pin >= IGB_N_PEROUT) 6797 + if (pin < 0 || pin >= IGB_N_SDP) 6820 6798 return; 6821 6799 6822 6800 spin_lock(&adapter->tmreg_lock); ··· 6824 6802 if (hw->mac.type == e1000_82580 || 6825 6803 hw->mac.type == e1000_i354 || 6826 6804 hw->mac.type == e1000_i350) { 6827 - s64 ns = timespec64_to_ns(&adapter->perout[pin].period); 6805 + s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period); 6828 6806 u32 systiml, systimh, level_mask, level, rem; 6829 6807 u64 systim, now; 6830 6808 ··· 6872 6850 ts.tv_nsec = (u32)systim; 6873 6851 ts.tv_sec = ((u32)(systim >> 32)) & 0xFF; 6874 6852 } else { 6875 - ts = timespec64_add(adapter->perout[pin].start, 6876 - adapter->perout[pin].period); 6853 + ts = timespec64_add(adapter->perout[tsintr_tt].start, 6854 + adapter->perout[tsintr_tt].period); 6877 6855 } 6878 6856 6879 6857 /* u32 conversion of tv_sec is safe until y2106 */ ··· 6882 6860 tsauxc = rd32(E1000_TSAUXC); 6883 6861 tsauxc |= TSAUXC_EN_TT0; 6884 6862 wr32(E1000_TSAUXC, tsauxc); 6885 - adapter->perout[pin].start = ts; 6863 + adapter->perout[tsintr_tt].start = ts; 6886 6864 6887 6865 spin_unlock(&adapter->tmreg_lock); 6888 6866 } ··· 6896 6874 struct ptp_clock_event event; 6897 6875 struct timespec64 ts; 6898 6876 6899 - if (pin < 0 || pin >= IGB_N_EXTTS) 6877 + if (pin < 0 || pin >= IGB_N_SDP) 6900 6878 return; 6901 6879 6902 6880 if (hw->mac.type == e1000_82580 ||

drivers/net/ethernet/intel/ixgbe/ixgbe.h

··· 73 73 #define IXGBE_RXBUFFER_4K 4096 74 74 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ 75 75 76 + #define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) 77 + 76 78 /* Attempt to maximize the headroom available for incoming frames. We 77 79 * use a 2K buffer for receives and need 1536/1534 to store the data for 78 80 * the frame. This leaves us with 512 bytes of room. From that we need

+17 -11

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

··· 6778 6778 } 6779 6779 6780 6780 /** 6781 + * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP 6782 + * @adapter: device handle, pointer to adapter 6783 + */ 6784 + static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) 6785 + { 6786 + if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) 6787 + return IXGBE_RXBUFFER_2K; 6788 + else 6789 + return IXGBE_RXBUFFER_3K; 6790 + } 6791 + 6792 + /** 6781 6793 * ixgbe_change_mtu - Change the Maximum Transfer Unit 6782 6794 * @netdev: network interface device structure 6783 6795 * @new_mtu: new value for maximum frame size ··· 6800 6788 { 6801 6789 struct ixgbe_adapter *adapter = netdev_priv(netdev); 6802 6790 6803 - if (adapter->xdp_prog) { 6804 - int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 6805 - VLAN_HLEN; 6806 - int i; 6791 + if (ixgbe_enabled_xdp_adapter(adapter)) { 6792 + int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD; 6807 6793 6808 - for (i = 0; i < adapter->num_rx_queues; i++) { 6809 - struct ixgbe_ring *ring = adapter->rx_ring[i]; 6810 - 6811 - if (new_frame_size > ixgbe_rx_bufsz(ring)) { 6812 - e_warn(probe, "Requested MTU size is not supported with XDP\n"); 6813 - return -EINVAL; 6814 - } 6794 + if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { 6795 + e_warn(probe, "Requested MTU size is not supported with XDP\n"); 6796 + return -EINVAL; 6815 6797 } 6816 6798 } 6817 6799

+28 -17

drivers/net/ethernet/netronome/nfp/crypto/ipsec.c

··· 129 129 }; 130 130 }; 131 131 132 - static int nfp_ipsec_cfg_cmd_issue(struct nfp_net *nn, int type, int saidx, 133 - struct nfp_ipsec_cfg_mssg *msg) 132 + static int nfp_net_ipsec_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry) 134 133 { 134 + unsigned int offset = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; 135 + struct nfp_ipsec_cfg_mssg *msg = (struct nfp_ipsec_cfg_mssg *)entry->msg; 135 136 int i, msg_size, ret; 136 137 137 - msg->cmd = type; 138 - msg->sa_idx = saidx; 139 - msg->rsp = 0; 140 - msg_size = ARRAY_SIZE(msg->raw); 141 - 142 - for (i = 0; i < msg_size; i++) 143 - nn_writel(nn, NFP_NET_CFG_MBOX_VAL + 4 * i, msg->raw[i]); 144 - 145 - ret = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_IPSEC); 146 - if (ret < 0) 138 + ret = nfp_net_mbox_lock(nn, sizeof(*msg)); 139 + if (ret) 147 140 return ret; 141 + 142 + msg_size = ARRAY_SIZE(msg->raw); 143 + for (i = 0; i < msg_size; i++) 144 + nn_writel(nn, offset + 4 * i, msg->raw[i]); 145 + 146 + ret = nfp_net_mbox_reconfig(nn, entry->cmd); 147 + if (ret < 0) { 148 + nn_ctrl_bar_unlock(nn); 149 + return ret; 150 + } 148 151 149 152 /* For now we always read the whole message response back */ 150 153 for (i = 0; i < msg_size; i++) 151 - msg->raw[i] = nn_readl(nn, NFP_NET_CFG_MBOX_VAL + 4 * i); 154 + msg->raw[i] = nn_readl(nn, offset + 4 * i); 155 + 156 + nn_ctrl_bar_unlock(nn); 152 157 153 158 switch (msg->rsp) { 154 159 case NFP_IPSEC_CFG_MSSG_OK: ··· 482 477 } 483 478 484 479 /* Allocate saidx and commit the SA */ 485 - err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_ADD_SA, saidx, &msg); 480 + msg.cmd = NFP_IPSEC_CFG_MSSG_ADD_SA; 481 + msg.sa_idx = saidx; 482 + err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, 483 + sizeof(msg), nfp_net_ipsec_cfg); 486 484 if (err) { 487 485 xa_erase(&nn->xa_ipsec, saidx); 488 486 nn_err(nn, "Failed to issue IPsec command err ret=%d\n", err); ··· 499 491 500 492 static void nfp_net_xfrm_del_state(struct xfrm_state *x) 501 493 { 494 + struct nfp_ipsec_cfg_mssg msg = { 495 + .cmd = NFP_IPSEC_CFG_MSSG_INV_SA, 496 + .sa_idx = x->xso.offload_handle - 1, 497 + }; 502 498 struct net_device *netdev = x->xso.dev; 503 - struct nfp_ipsec_cfg_mssg msg; 504 499 struct nfp_net *nn; 505 500 int err; 506 501 507 502 nn = netdev_priv(netdev); 508 - err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_INV_SA, 509 - x->xso.offload_handle - 1, &msg); 503 + err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, 504 + sizeof(msg), nfp_net_ipsec_cfg); 510 505 if (err) 511 506 nn_warn(nn, "Failed to invalidate SA in hardware\n"); 512 507

+19 -6

drivers/net/ethernet/netronome/nfp/nfp_net.h

··· 617 617 * @vnic_no_name: For non-port PF vNIC make ndo_get_phys_port_name return 618 618 * -EOPNOTSUPP to keep backwards compatibility (set by app) 619 619 * @port: Pointer to nfp_port structure if vNIC is a port 620 - * @mc_lock: Protect mc_addrs list 621 - * @mc_addrs: List of mc addrs to add/del to HW 622 - * @mc_work: Work to update mc addrs 620 + * @mbox_amsg: Asynchronously processed message via mailbox 621 + * @mbox_amsg.lock: Protect message list 622 + * @mbox_amsg.list: List of message to process 623 + * @mbox_amsg.work: Work to process message asynchronously 623 624 * @app_priv: APP private data for this vNIC 624 625 */ 625 626 struct nfp_net { ··· 722 721 723 722 struct nfp_port *port; 724 723 725 - spinlock_t mc_lock; 726 - struct list_head mc_addrs; 727 - struct work_struct mc_work; 724 + struct { 725 + spinlock_t lock; 726 + struct list_head list; 727 + struct work_struct work; 728 + } mbox_amsg; 728 729 729 730 void *app_priv; 730 731 }; 732 + 733 + struct nfp_mbox_amsg_entry { 734 + struct list_head list; 735 + int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry); 736 + u32 cmd; 737 + char msg[]; 738 + }; 739 + 740 + int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len, 741 + int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *)); 731 742 732 743 /* Functions to read/write from/to a BAR 733 744 * Performs any endian conversion necessary.

+56 -54

drivers/net/ethernet/netronome/nfp/nfp_net_common.c

··· 1334 1334 return err; 1335 1335 } 1336 1336 1337 - struct nfp_mc_addr_entry { 1338 - u8 addr[ETH_ALEN]; 1339 - u32 cmd; 1340 - struct list_head list; 1341 - }; 1342 - 1343 - static int nfp_net_mc_cfg(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) 1337 + int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len, 1338 + int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *)) 1344 1339 { 1340 + struct nfp_mbox_amsg_entry *entry; 1341 + 1342 + entry = kmalloc(sizeof(*entry) + len, GFP_ATOMIC); 1343 + if (!entry) 1344 + return -ENOMEM; 1345 + 1346 + memcpy(entry->msg, data, len); 1347 + entry->cmd = cmd; 1348 + entry->cfg = cb; 1349 + 1350 + spin_lock_bh(&nn->mbox_amsg.lock); 1351 + list_add_tail(&entry->list, &nn->mbox_amsg.list); 1352 + spin_unlock_bh(&nn->mbox_amsg.lock); 1353 + 1354 + schedule_work(&nn->mbox_amsg.work); 1355 + 1356 + return 0; 1357 + } 1358 + 1359 + static void nfp_net_mbox_amsg_work(struct work_struct *work) 1360 + { 1361 + struct nfp_net *nn = container_of(work, struct nfp_net, mbox_amsg.work); 1362 + struct nfp_mbox_amsg_entry *entry, *tmp; 1363 + struct list_head tmp_list; 1364 + 1365 + INIT_LIST_HEAD(&tmp_list); 1366 + 1367 + spin_lock_bh(&nn->mbox_amsg.lock); 1368 + list_splice_init(&nn->mbox_amsg.list, &tmp_list); 1369 + spin_unlock_bh(&nn->mbox_amsg.lock); 1370 + 1371 + list_for_each_entry_safe(entry, tmp, &tmp_list, list) { 1372 + int err = entry->cfg(nn, entry); 1373 + 1374 + if (err) 1375 + nn_err(nn, "Config cmd %d to HW failed %d.\n", entry->cmd, err); 1376 + 1377 + list_del(&entry->list); 1378 + kfree(entry); 1379 + } 1380 + } 1381 + 1382 + static int nfp_net_mc_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry) 1383 + { 1384 + unsigned char *addr = entry->msg; 1345 1385 int ret; 1346 1386 1347 1387 ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ); ··· 1393 1353 nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO, 1394 1354 get_unaligned_be16(addr + 4)); 1395 1355 1396 - return nfp_net_mbox_reconfig_and_unlock(nn, cmd); 1397 - } 1398 - 1399 - static int nfp_net_mc_prep(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) 1400 - { 1401 - struct nfp_mc_addr_entry *entry; 1402 - 1403 - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 1404 - if (!entry) 1405 - return -ENOMEM; 1406 - 1407 - ether_addr_copy(entry->addr, addr); 1408 - entry->cmd = cmd; 1409 - spin_lock_bh(&nn->mc_lock); 1410 - list_add_tail(&entry->list, &nn->mc_addrs); 1411 - spin_unlock_bh(&nn->mc_lock); 1412 - 1413 - schedule_work(&nn->mc_work); 1414 - 1415 - return 0; 1356 + return nfp_net_mbox_reconfig_and_unlock(nn, entry->cmd); 1416 1357 } 1417 1358 1418 1359 static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) ··· 1406 1385 return -EINVAL; 1407 1386 } 1408 1387 1409 - return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD); 1388 + return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD, addr, 1389 + NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg); 1410 1390 } 1411 1391 1412 1392 static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr) 1413 1393 { 1414 1394 struct nfp_net *nn = netdev_priv(netdev); 1415 1395 1416 - return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL); 1417 - } 1418 - 1419 - static void nfp_net_mc_addr_config(struct work_struct *work) 1420 - { 1421 - struct nfp_net *nn = container_of(work, struct nfp_net, mc_work); 1422 - struct nfp_mc_addr_entry *entry, *tmp; 1423 - struct list_head tmp_list; 1424 - 1425 - INIT_LIST_HEAD(&tmp_list); 1426 - 1427 - spin_lock_bh(&nn->mc_lock); 1428 - list_splice_init(&nn->mc_addrs, &tmp_list); 1429 - spin_unlock_bh(&nn->mc_lock); 1430 - 1431 - list_for_each_entry_safe(entry, tmp, &tmp_list, list) { 1432 - if (nfp_net_mc_cfg(nn, entry->addr, entry->cmd)) 1433 - nn_err(nn, "Config mc address to HW failed.\n"); 1434 - 1435 - list_del(&entry->list); 1436 - kfree(entry); 1437 - } 1396 + return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL, addr, 1397 + NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg); 1438 1398 } 1439 1399 1440 1400 static void nfp_net_set_rx_mode(struct net_device *netdev) ··· 2683 2681 if (!nn->dp.netdev) 2684 2682 return 0; 2685 2683 2686 - spin_lock_init(&nn->mc_lock); 2687 - INIT_LIST_HEAD(&nn->mc_addrs); 2688 - INIT_WORK(&nn->mc_work, nfp_net_mc_addr_config); 2684 + spin_lock_init(&nn->mbox_amsg.lock); 2685 + INIT_LIST_HEAD(&nn->mbox_amsg.list); 2686 + INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work); 2689 2687 2690 2688 return register_netdev(nn->dp.netdev); 2691 2689 ··· 2706 2704 unregister_netdev(nn->dp.netdev); 2707 2705 nfp_net_ipsec_clean(nn); 2708 2706 nfp_ccm_mbox_clean(nn); 2709 - flush_work(&nn->mc_work); 2707 + flush_work(&nn->mbox_amsg.work); 2710 2708 nfp_net_reconfig_wait_posted(nn); 2711 2709 }

-1

drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h

··· 403 403 */ 404 404 #define NFP_NET_CFG_MBOX_BASE 0x1800 405 405 #define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8 406 - #define NFP_NET_CFG_MBOX_VAL 0x1808 407 406 #define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0 408 407 #define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4 409 408 #define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8

+2 -1

drivers/net/ethernet/stmicro/stmmac/dwmac5.c

··· 541 541 return 0; 542 542 } 543 543 544 - val |= PPSCMDx(index, 0x2); 545 544 val |= TRGTMODSELx(index, 0x2); 546 545 val |= PPSEN0; 546 + writel(val, ioaddr + MAC_PPS_CONTROL); 547 547 548 548 writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index)); 549 549 ··· 568 568 writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index)); 569 569 570 570 /* Finally, activate it */ 571 + val |= PPSCMDx(index, 0x2); 571 572 writel(val, ioaddr + MAC_PPS_CONTROL); 572 573 return 0; 573 574 }

+1 -1

drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

··· 559 559 dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); 560 560 561 561 plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode"); 562 - if (plat->force_thresh_dma_mode) { 562 + if (plat->force_thresh_dma_mode && plat->force_sf_dma_mode) { 563 563 plat->force_sf_dma_mode = 0; 564 564 dev_warn(&pdev->dev, 565 565 "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n");

+11 -1

drivers/net/ethernet/ti/am65-cpsw-nuss.c

··· 501 501 k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn); 502 502 } 503 503 504 + reinit_completion(&common->tdown_complete); 504 505 k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true); 506 + 507 + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) { 508 + i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000)); 509 + if (!i) 510 + dev_err(common->dev, "rx teardown timeout\n"); 511 + } 512 + 505 513 napi_disable(&common->napi_rx); 506 514 507 515 for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) ··· 729 721 730 722 if (cppi5_desc_is_tdcm(desc_dma)) { 731 723 dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx); 724 + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) 725 + complete(&common->tdown_complete); 732 726 return 0; 733 727 } 734 728 ··· 2682 2672 }; 2683 2673 2684 2674 static const struct am65_cpsw_pdata am64x_cpswxg_pdata = { 2685 - .quirks = 0, 2675 + .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ, 2686 2676 .ale_dev_id = "am64-cpswxg", 2687 2677 .fdqring_mode = K3_RINGACC_RING_MODE_RING, 2688 2678 };

drivers/net/ethernet/ti/am65-cpsw-nuss.h

··· 90 90 }; 91 91 92 92 #define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0) 93 + #define AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ BIT(1) 93 94 94 95 struct am65_cpsw_pdata { 95 96 u32 quirks;

+4 -4

drivers/net/usb/kalmia.c

··· 65 65 init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT); 66 66 if (status != 0) { 67 67 netdev_err(dev->net, 68 - "Error sending init packet. Status %i, length %i\n", 69 - status, act_len); 68 + "Error sending init packet. Status %i\n", 69 + status); 70 70 return status; 71 71 } 72 72 else if (act_len != init_msg_len) { ··· 83 83 84 84 if (status != 0) 85 85 netdev_err(dev->net, 86 - "Error receiving init result. Status %i, length %i\n", 87 - status, act_len); 86 + "Error receiving init result. Status %i\n", 87 + status); 88 88 else if (act_len != expected_len) 89 89 netdev_err(dev->net, "Unexpected init result length: %i\n", 90 90 act_len);

+25 -25

drivers/net/vmxnet3/vmxnet3_drv.c

··· 1546 1546 rxd->len = rbi->len; 1547 1547 } 1548 1548 1549 - #ifdef VMXNET3_RSS 1550 - if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && 1551 - (adapter->netdev->features & NETIF_F_RXHASH)) { 1552 - enum pkt_hash_types hash_type; 1553 - 1554 - switch (rcd->rssType) { 1555 - case VMXNET3_RCD_RSS_TYPE_IPV4: 1556 - case VMXNET3_RCD_RSS_TYPE_IPV6: 1557 - hash_type = PKT_HASH_TYPE_L3; 1558 - break; 1559 - case VMXNET3_RCD_RSS_TYPE_TCPIPV4: 1560 - case VMXNET3_RCD_RSS_TYPE_TCPIPV6: 1561 - case VMXNET3_RCD_RSS_TYPE_UDPIPV4: 1562 - case VMXNET3_RCD_RSS_TYPE_UDPIPV6: 1563 - hash_type = PKT_HASH_TYPE_L4; 1564 - break; 1565 - default: 1566 - hash_type = PKT_HASH_TYPE_L3; 1567 - break; 1568 - } 1569 - skb_set_hash(ctx->skb, 1570 - le32_to_cpu(rcd->rssHash), 1571 - hash_type); 1572 - } 1573 - #endif 1574 1549 skb_record_rx_queue(ctx->skb, rq->qid); 1575 1550 skb_put(ctx->skb, rcd->len); 1576 1551 ··· 1628 1653 u32 mtu = adapter->netdev->mtu; 1629 1654 skb->len += skb->data_len; 1630 1655 1656 + #ifdef VMXNET3_RSS 1657 + if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && 1658 + (adapter->netdev->features & NETIF_F_RXHASH)) { 1659 + enum pkt_hash_types hash_type; 1660 + 1661 + switch (rcd->rssType) { 1662 + case VMXNET3_RCD_RSS_TYPE_IPV4: 1663 + case VMXNET3_RCD_RSS_TYPE_IPV6: 1664 + hash_type = PKT_HASH_TYPE_L3; 1665 + break; 1666 + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: 1667 + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: 1668 + case VMXNET3_RCD_RSS_TYPE_UDPIPV4: 1669 + case VMXNET3_RCD_RSS_TYPE_UDPIPV6: 1670 + hash_type = PKT_HASH_TYPE_L4; 1671 + break; 1672 + default: 1673 + hash_type = PKT_HASH_TYPE_L3; 1674 + break; 1675 + } 1676 + skb_set_hash(skb, 1677 + le32_to_cpu(rcd->rssHash), 1678 + hash_type); 1679 + } 1680 + #endif 1631 1681 vmxnet3_rx_csum(adapter, skb, 1632 1682 (union Vmxnet3_GenericDesc *)rcd); 1633 1683 skb->protocol = eth_type_trans(skb, adapter->netdev);

-2

include/linux/netdevice.h

··· 2839 2839 int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); 2840 2840 int unregister_netdevice_notifier_net(struct net *net, 2841 2841 struct notifier_block *nb); 2842 - void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, 2843 - struct notifier_block *nb); 2844 2842 int register_netdevice_notifier_dev_net(struct net_device *dev, 2845 2843 struct notifier_block *nb, 2846 2844 struct netdev_net_notifier *nn);

+13

include/net/sock.h

··· 2434 2434 return false; 2435 2435 } 2436 2436 2437 + static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk) 2438 + { 2439 + skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 2440 + if (skb) { 2441 + if (sk_rmem_schedule(sk, skb, skb->truesize)) { 2442 + skb_set_owner_r(skb, sk); 2443 + return skb; 2444 + } 2445 + __kfree_skb(skb); 2446 + } 2447 + return NULL; 2448 + } 2449 + 2437 2450 static inline void skb_prepare_for_gro(struct sk_buff *skb) 2438 2451 { 2439 2452 if (skb->destructor != sock_wfree) {

net/caif/caif_socket.c

··· 1015 1015 return; 1016 1016 } 1017 1017 sk_stream_kill_queues(&cf_sk->sk); 1018 + WARN_ON_ONCE(sk->sk_forward_alloc); 1018 1019 caif_free_client(&cf_sk->layer); 1019 1020 } 1020 1021

+1 -9

net/core/dev.c

··· 1869 1869 __register_netdevice_notifier_net(dst_net, nb, true); 1870 1870 } 1871 1871 1872 - void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, 1873 - struct notifier_block *nb) 1874 - { 1875 - rtnl_lock(); 1876 - __move_netdevice_notifier_net(src_net, dst_net, nb); 1877 - rtnl_unlock(); 1878 - } 1879 - 1880 1872 int register_netdevice_notifier_dev_net(struct net_device *dev, 1881 1873 struct notifier_block *nb, 1882 1874 struct netdev_net_notifier *nn) ··· 10367 10375 10368 10376 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); 10369 10377 for (i = 0; i < n; i++) 10370 - dst[i] = atomic_long_read(&src[i]); 10378 + dst[i] = (unsigned long)atomic_long_read(&src[i]); 10371 10379 /* zero out counters that only exist in rtnl_link_stats64 */ 10372 10380 memset((char *)stats64 + n * sizeof(u64), 0, 10373 10381 sizeof(*stats64) - n * sizeof(u64));

+1 -4

net/core/devlink.c

··· 4742 4742 if (err) 4743 4743 return err; 4744 4744 4745 - if (dest_net && !net_eq(dest_net, curr_net)) { 4746 - move_netdevice_notifier_net(curr_net, dest_net, 4747 - &devlink->netdevice_nb); 4745 + if (dest_net && !net_eq(dest_net, curr_net)) 4748 4746 write_pnet(&devlink->_net, dest_net); 4749 - } 4750 4747 4751 4748 err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); 4752 4749 devlink_reload_failed_set(devlink, !!err);

+9 -1

net/core/net_namespace.c

··· 304 304 } 305 305 EXPORT_SYMBOL_GPL(get_net_ns_by_id); 306 306 307 + /* init code that must occur even if setup_net() is not called. */ 308 + static __net_init void preinit_net(struct net *net) 309 + { 310 + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); 311 + } 312 + 307 313 /* 308 314 * setup_net runs the initializers for the network namespace object. 309 315 */ ··· 322 316 323 317 refcount_set(&net->ns.count, 1); 324 318 ref_tracker_dir_init(&net->refcnt_tracker, 128); 325 - ref_tracker_dir_init(&net->notrefcnt_tracker, 128); 326 319 327 320 refcount_set(&net->passive, 1); 328 321 get_random_bytes(&net->hash_mix, sizeof(u32)); ··· 477 472 rv = -ENOMEM; 478 473 goto dec_ucounts; 479 474 } 475 + 476 + preinit_net(net); 480 477 refcount_set(&net->passive, 1); 481 478 net->ucounts = ucounts; 482 479 get_user_ns(user_ns); ··· 1125 1118 init_net.key_domain = &init_net_key_domain; 1126 1119 #endif 1127 1120 down_write(&pernet_ops_rwsem); 1121 + preinit_net(&init_net); 1128 1122 if (setup_net(&init_net, &init_user_ns)) 1129 1123 panic("Could not setup the initial network namespace"); 1130 1124

-1

net/core/stream.c

··· 209 209 sk_mem_reclaim_final(sk); 210 210 211 211 WARN_ON_ONCE(sk->sk_wmem_queued); 212 - WARN_ON_ONCE(sk->sk_forward_alloc); 213 212 214 213 /* It is _impossible_ for the backlog to contain anything 215 214 * when we get here. All user references to this socket

+2 -5

net/dccp/ipv6.c

··· 551 551 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); 552 552 /* Clone pktoptions received with SYN, if we own the req */ 553 553 if (*own_req && ireq->pktopts) { 554 - newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); 554 + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 555 555 consume_skb(ireq->pktopts); 556 556 ireq->pktopts = NULL; 557 - if (newnp->pktoptions) 558 - skb_set_owner_r(newnp->pktoptions, newsk); 559 557 } 560 558 561 559 return newsk; ··· 613 615 --ANK (980728) 614 616 */ 615 617 if (np->rxopt.all) 616 - opt_skb = skb_clone(skb, GFP_ATOMIC); 618 + opt_skb = skb_clone_and_charge_r(skb, sk); 617 619 618 620 if (sk->sk_state == DCCP_OPEN) { /* Fast path */ 619 621 if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) ··· 677 679 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 678 680 if (ipv6_opt_accepted(sk, opt_skb, 679 681 &DCCP_SKB_CB(opt_skb)->header.h6)) { 680 - skb_set_owner_r(opt_skb, sk); 681 682 memmove(IP6CB(opt_skb), 682 683 &DCCP_SKB_CB(opt_skb)->header.h6, 683 684 sizeof(struct inet6_skb_parm));

+1 -1

net/ipv6/datagram.c

··· 51 51 fl6->flowi6_mark = sk->sk_mark; 52 52 fl6->fl6_dport = inet->inet_dport; 53 53 fl6->fl6_sport = inet->inet_sport; 54 - fl6->flowlabel = np->flow_label; 54 + fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 55 55 fl6->flowi6_uid = sk->sk_uid; 56 56 57 57 if (!oif)

+4 -7

net/ipv6/tcp_ipv6.c

··· 272 272 fl6.flowi6_proto = IPPROTO_TCP; 273 273 fl6.daddr = sk->sk_v6_daddr; 274 274 fl6.saddr = saddr ? *saddr : np->saddr; 275 + fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 275 276 fl6.flowi6_oif = sk->sk_bound_dev_if; 276 277 fl6.flowi6_mark = sk->sk_mark; 277 278 fl6.fl6_dport = usin->sin6_port; ··· 1388 1387 1389 1388 /* Clone pktoptions received with SYN, if we own the req */ 1390 1389 if (ireq->pktopts) { 1391 - newnp->pktoptions = skb_clone(ireq->pktopts, 1392 - sk_gfp_mask(sk, GFP_ATOMIC)); 1390 + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1393 1391 consume_skb(ireq->pktopts); 1394 1392 ireq->pktopts = NULL; 1395 - if (newnp->pktoptions) { 1393 + if (newnp->pktoptions) 1396 1394 tcp_v6_restore_cb(newnp->pktoptions); 1397 - skb_set_owner_r(newnp->pktoptions, newsk); 1398 - } 1399 1395 } 1400 1396 } else { 1401 1397 if (!req_unhash && found_dup_sk) { ··· 1464 1466 --ANK (980728) 1465 1467 */ 1466 1468 if (np->rxopt.all) 1467 - opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1469 + opt_skb = skb_clone_and_charge_r(skb, sk); 1468 1470 1469 1471 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1470 1472 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ ··· 1550 1552 if (np->repflow) 1551 1553 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1552 1554 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1553 - skb_set_owner_r(opt_skb, sk); 1554 1555 tcp_v6_restore_cb(opt_skb); 1555 1556 opt_skb = xchg(&np->pktoptions, opt_skb); 1556 1557 } else {

+1 -1

net/key/af_key.c

··· 1261 1261 const struct sadb_x_nat_t_type* n_type; 1262 1262 struct xfrm_encap_tmpl *natt; 1263 1263 1264 - x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); 1264 + x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); 1265 1265 if (!x->encap) { 1266 1266 err = -ENOMEM; 1267 1267 goto out;

net/mpls/af_mpls.c

··· 1428 1428 free: 1429 1429 kfree(table); 1430 1430 out: 1431 + mdev->sysctl = NULL; 1431 1432 return -ENOBUFS; 1432 1433 } 1433 1434 ··· 1437 1436 { 1438 1437 struct net *net = dev_net(dev); 1439 1438 struct ctl_table *table; 1439 + 1440 + if (!mdev->sysctl) 1441 + return; 1440 1442 1441 1443 table = mdev->sysctl->ctl_table_arg; 1442 1444 unregister_net_sysctl_table(mdev->sysctl);

+3 -1

net/openvswitch/meter.c

··· 449 449 450 450 err = attach_meter(meter_tbl, meter); 451 451 if (err) 452 - goto exit_unlock; 452 + goto exit_free_old_meter; 453 453 454 454 ovs_unlock(); 455 455 ··· 472 472 genlmsg_end(reply, ovs_reply_header); 473 473 return genlmsg_reply(reply, info); 474 474 475 + exit_free_old_meter: 476 + ovs_meter_free(old_meter); 475 477 exit_unlock: 476 478 ovs_unlock(); 477 479 nlmsg_free(reply);

+3 -3

net/sched/act_ctinfo.c

··· 93 93 cp = rcu_dereference_bh(ca->params); 94 94 95 95 tcf_lastuse_update(&ca->tcf_tm); 96 - bstats_update(&ca->tcf_bstats, skb); 96 + tcf_action_update_bstats(&ca->common, skb); 97 97 action = READ_ONCE(ca->tcf_action); 98 98 99 99 wlen = skb_network_offset(skb); ··· 212 212 index = actparm->index; 213 213 err = tcf_idr_check_alloc(tn, &index, a, bind); 214 214 if (!err) { 215 - ret = tcf_idr_create(tn, index, est, a, 216 - &act_ctinfo_ops, bind, false, flags); 215 + ret = tcf_idr_create_from_flags(tn, index, est, a, 216 + &act_ctinfo_ops, bind, flags); 217 217 if (ret) { 218 218 tcf_idr_cleanup(tn, index); 219 219 return ret;

+30 -4

net/sched/cls_tcindex.c

··· 12 12 #include <linux/errno.h> 13 13 #include <linux/slab.h> 14 14 #include <linux/refcount.h> 15 + #include <linux/rcupdate.h> 15 16 #include <net/act_api.h> 16 17 #include <net/netlink.h> 17 18 #include <net/pkt_cls.h> ··· 340 339 struct tcf_result cr = {}; 341 340 int err, balloc = 0; 342 341 struct tcf_exts e; 342 + bool update_h = false; 343 343 344 344 err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); 345 345 if (err < 0) ··· 458 456 } 459 457 } 460 458 461 - if (cp->perfect) 459 + if (cp->perfect) { 462 460 r = cp->perfect + handle; 463 - else 464 - r = tcindex_lookup(cp, handle) ? : &new_filter_result; 461 + } else { 462 + /* imperfect area is updated in-place using rcu */ 463 + update_h = !!tcindex_lookup(cp, handle); 464 + r = &new_filter_result; 465 + } 465 466 466 467 if (r == &new_filter_result) { 467 468 f = kzalloc(sizeof(*f), GFP_KERNEL); ··· 490 485 491 486 rcu_assign_pointer(tp->root, cp); 492 487 493 - if (r == &new_filter_result) { 488 + if (update_h) { 489 + struct tcindex_filter __rcu **fp; 490 + struct tcindex_filter *cf; 491 + 492 + f->result.res = r->res; 493 + tcf_exts_change(&f->result.exts, &r->exts); 494 + 495 + /* imperfect area bucket */ 496 + fp = cp->h + (handle % cp->hash); 497 + 498 + /* lookup the filter, guaranteed to exist */ 499 + for (cf = rcu_dereference_bh_rtnl(*fp); cf; 500 + fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) 501 + if (cf->key == (u16)handle) 502 + break; 503 + 504 + f->next = cf->next; 505 + 506 + cf = rcu_replace_pointer(*fp, f, 1); 507 + tcf_exts_get_net(&cf->result.exts); 508 + tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); 509 + } else if (r == &new_filter_result) { 494 510 struct tcindex_filter *nfp; 495 511 struct tcindex_filter __rcu **fp; 496 512

+1 -3

net/sctp/diag.c

··· 343 343 struct sctp_comm_param *commp = p; 344 344 struct sock *sk = ep->base.sk; 345 345 const struct inet_diag_req_v2 *r = commp->r; 346 - struct sctp_association *assoc = 347 - list_entry(ep->asocs.next, struct sctp_association, asocs); 348 346 349 347 /* find the ep only once through the transports by this condition */ 350 - if (tsp->asoc != assoc) 348 + if (!list_is_first(&tsp->asoc->asocs, &ep->asocs)) 351 349 return 0; 352 350 353 351 if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)

+6 -3

net/socket.c

··· 971 971 static void sock_recv_mark(struct msghdr *msg, struct sock *sk, 972 972 struct sk_buff *skb) 973 973 { 974 - if (sock_flag(sk, SOCK_RCVMARK) && skb) 975 - put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), 976 - &skb->mark); 974 + if (sock_flag(sk, SOCK_RCVMARK) && skb) { 975 + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ 976 + __u32 mark = skb->mark; 977 + 978 + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark); 979 + } 977 980 } 978 981 979 982 void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,

net/tipc/socket.c

··· 2614 2614 /* Send a 'SYN-' to destination */ 2615 2615 m.msg_name = dest; 2616 2616 m.msg_namelen = destlen; 2617 + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); 2617 2618 2618 2619 /* If connect is in non-blocking case, set MSG_DONTWAIT to 2619 2620 * indicate send_msg() is never blocked. ··· 2777 2776 __skb_queue_head(&new_sk->sk_receive_queue, buf); 2778 2777 skb_set_owner_r(buf, new_sk); 2779 2778 } 2779 + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); 2780 2780 __tipc_sendstream(new_sock, &m, 0); 2781 2781 release_sock(new_sk); 2782 2782 exit:

+127 -1

tools/testing/selftests/net/fib_rule_tests.sh

··· 10 10 11 11 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} 12 12 IP="ip -netns testns" 13 + IP_PEER="ip -netns peerns" 13 14 14 15 RTABLE=100 16 + RTABLE_PEER=101 15 17 GW_IP4=192.51.100.2 16 18 SRC_IP=192.51.100.3 17 19 GW_IP6=2001:db8:1::2 ··· 22 20 DEV_ADDR=192.51.100.1 23 21 DEV_ADDR6=2001:db8:1::1 24 22 DEV=dummy0 25 - TESTS="fib_rule6 fib_rule4" 23 + TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" 24 + 25 + SELFTEST_PATH="" 26 26 27 27 log_test() 28 28 { ··· 56 52 echo "######################################################################" 57 53 } 58 54 55 + check_nettest() 56 + { 57 + if which nettest > /dev/null 2>&1; then 58 + return 0 59 + fi 60 + 61 + # Add the selftest directory to PATH if not already done 62 + if [ "${SELFTEST_PATH}" = "" ]; then 63 + SELFTEST_PATH="$(dirname $0)" 64 + PATH="${PATH}:${SELFTEST_PATH}" 65 + 66 + # Now retry with the new path 67 + if which nettest > /dev/null 2>&1; then 68 + return 0 69 + fi 70 + 71 + if [ "${ret}" -eq 0 ]; then 72 + ret="${ksft_skip}" 73 + fi 74 + echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" 75 + fi 76 + 77 + return 1 78 + } 79 + 59 80 setup() 60 81 { 61 82 set -e ··· 99 70 { 100 71 $IP link del dev dummy0 &> /dev/null 101 72 ip netns del testns 73 + } 74 + 75 + setup_peer() 76 + { 77 + set -e 78 + 79 + ip netns add peerns 80 + $IP_PEER link set dev lo up 81 + 82 + ip link add name veth0 netns testns type veth \ 83 + peer name veth1 netns peerns 84 + $IP link set dev veth0 up 85 + $IP_PEER link set dev veth1 up 86 + 87 + $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0 88 + $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1 89 + 90 + $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad 91 + $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad 92 + 93 + $IP_PEER address add 198.51.100.11/32 dev lo 94 + $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11 95 + 96 + $IP_PEER address add 2001:db8::1:11/128 dev lo 97 + $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11 98 + 99 + set +e 100 + } 101 + 102 + cleanup_peer() 103 + { 104 + $IP link del dev veth0 105 + ip netns del peerns 102 106 } 103 107 104 108 fib_check_iproute_support() ··· 252 190 fi 253 191 } 254 192 193 + # Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly 194 + # taken into account when connecting the socket and when sending packets. 195 + fib_rule6_connect_test() 196 + { 197 + local dsfield 198 + 199 + if ! check_nettest; then 200 + echo "SKIP: Could not run test without nettest tool" 201 + return 202 + fi 203 + 204 + setup_peer 205 + $IP -6 rule add dsfield 0x04 table $RTABLE_PEER 206 + 207 + # Combine the base DS Field value (0x04) with all possible ECN values 208 + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). 209 + # The ECN bits shouldn't influence the result of the test. 210 + for dsfield in 0x04 0x05 0x06 0x07; do 211 + nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ 212 + -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 213 + log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" 214 + 215 + nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ 216 + -l 2001:db8::1:11 -r 2001:db8::1:11 217 + log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" 218 + done 219 + 220 + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER 221 + cleanup_peer 222 + } 223 + 255 224 fib_rule4_del() 256 225 { 257 226 $IP rule del $1 ··· 389 296 fi 390 297 } 391 298 299 + # Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken 300 + # into account when connecting the socket and when sending packets. 301 + fib_rule4_connect_test() 302 + { 303 + local dsfield 304 + 305 + if ! check_nettest; then 306 + echo "SKIP: Could not run test without nettest tool" 307 + return 308 + fi 309 + 310 + setup_peer 311 + $IP -4 rule add dsfield 0x04 table $RTABLE_PEER 312 + 313 + # Combine the base DS Field value (0x04) with all possible ECN values 314 + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). 315 + # The ECN bits shouldn't influence the result of the test. 316 + for dsfield in 0x04 0x05 0x06 0x07; do 317 + nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ 318 + -l 198.51.100.11 -r 198.51.100.11 319 + log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" 320 + 321 + nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ 322 + -l 198.51.100.11 -r 198.51.100.11 323 + log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" 324 + done 325 + 326 + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER 327 + cleanup_peer 328 + } 329 + 392 330 run_fibrule_tests() 393 331 { 394 332 log_section "IPv4 fib rule" ··· 469 345 case $t in 470 346 fib_rule6_test|fib_rule6) fib_rule6_test;; 471 347 fib_rule4_test|fib_rule4) fib_rule4_test;; 348 + fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; 349 + fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; 472 350 473 351 help) echo "Test names: $TESTS"; exit 0;; 474 352

+50 -1

tools/testing/selftests/net/nettest.c

··· 87 87 int use_setsockopt; 88 88 int use_freebind; 89 89 int use_cmsg; 90 + uint8_t dsfield; 90 91 const char *dev; 91 92 const char *server_dev; 92 93 int ifindex; ··· 579 578 } 580 579 581 580 return rc; 581 + } 582 + 583 + static int set_dsfield(int sd, int version, int dsfield) 584 + { 585 + if (!dsfield) 586 + return 0; 587 + 588 + switch (version) { 589 + case AF_INET: 590 + if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield, 591 + sizeof(dsfield)) < 0) { 592 + log_err_errno("setsockopt(IP_TOS)"); 593 + return -1; 594 + } 595 + break; 596 + 597 + case AF_INET6: 598 + if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield, 599 + sizeof(dsfield)) < 0) { 600 + log_err_errno("setsockopt(IPV6_TCLASS)"); 601 + return -1; 602 + } 603 + break; 604 + 605 + default: 606 + log_error("Invalid address family\n"); 607 + return -1; 608 + } 609 + 610 + return 0; 582 611 } 583 612 584 613 static int str_to_uint(const char *str, int min, int max, unsigned int *value) ··· 1348 1317 (char *)&one, sizeof(one)) < 0) 1349 1318 log_err_errno("Setting SO_BROADCAST error"); 1350 1319 1320 + if (set_dsfield(sd, AF_INET, args->dsfield) != 0) 1321 + goto out_err; 1322 + 1351 1323 if (args->dev && bind_to_device(sd, args->dev) != 0) 1352 1324 goto out_err; 1353 1325 else if (args->use_setsockopt && ··· 1477 1443 goto err; 1478 1444 1479 1445 if (set_reuseport(sd) != 0) 1446 + goto err; 1447 + 1448 + if (set_dsfield(sd, args->version, args->dsfield) != 0) 1480 1449 goto err; 1481 1450 1482 1451 if (args->dev && bind_to_device(sd, args->dev) != 0) ··· 1695 1658 if (set_reuseport(sd) != 0) 1696 1659 goto err; 1697 1660 1661 + if (set_dsfield(sd, args->version, args->dsfield) != 0) 1662 + goto err; 1663 + 1698 1664 if (args->dev && bind_to_device(sd, args->dev) != 0) 1699 1665 goto err; 1700 1666 else if (args->use_setsockopt && ··· 1902 1862 return client_status; 1903 1863 } 1904 1864 1905 - #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" 1865 + #define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" 1906 1866 #define OPT_FORCE_BIND_KEY_IFINDEX 1001 1907 1867 #define OPT_NO_BIND_KEY_IFINDEX 1002 1908 1868 ··· 1933 1893 " -D|R datagram (D) / raw (R) socket (default stream)\n" 1934 1894 " -l addr local address to bind to in server mode\n" 1935 1895 " -c addr local address to bind to in client mode\n" 1896 + " -Q dsfield DS Field value of the socket (the IP_TOS or\n" 1897 + " IPV6_TCLASS socket option)\n" 1936 1898 " -x configure XFRM policy on socket\n" 1937 1899 "\n" 1938 1900 " -d dev bind socket to given device name\n" ··· 2012 1970 case 'c': 2013 1971 args.has_local_ip = 1; 2014 1972 args.client_local_addr_str = optarg; 1973 + break; 1974 + case 'Q': 1975 + if (str_to_uint(optarg, 0, 255, &tmp) != 0) { 1976 + fprintf(stderr, "Invalid DS Field\n"); 1977 + return 1; 1978 + } 1979 + args.dsfield = tmp; 2015 1980 break; 2016 1981 case 'p': 2017 1982 if (str_to_uint(optarg, 1, 65535, &tmp) != 0) {