Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from David Miller:

1) Fix type of bind option flag in af_xdp, from Baruch Siach.

2) Fix use after free in bpf_xdp_link_release(), from Xuan Zhao.

3) PM refcnt imbakance in r8152, from Takashi Iwai.

4) Sign extension ug in liquidio, from Colin Ian King.

5) Mising range check in s390 bpf jit, from Colin Ian King.

6) Uninit value in caif_seqpkt_sendmsg(), from Ziyong Xuan.

7) Fix skb page recycling race, from Ilias Apalodimas.

8) Fix memory leak in tcindex_partial_destroy_work, from Pave Skripkin.

9) netrom timer sk refcnt issues, from Nguyen Dinh Phi.

10) Fix data races aroun tcp's tfo_active_disable_stamp, from Eric
Dumazet.

11) act_skbmod should only operate on ethernet packets, from Peilin Ye.

12) Fix slab out-of-bpunds in fib6_nh_flush_exceptions(),, from Psolo
Abeni.

13) Fix sparx5 dependencies, from Yajun Deng.

* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (74 commits)
dpaa2-switch: seed the buffer pool after allocating the swp
net: sched: cls_api: Fix the the wrong parameter
net: sparx5: fix unmet dependencies warning
net: dsa: tag_ksz: dont let the hardware process the layer 4 checksum
net: dsa: ensure linearized SKBs in case of tail taggers
ravb: Remove extra TAB
ravb: Fix a typo in comment
net: dsa: sja1105: make VID 4095 a bridge VLAN too
tcp: disable TFO blackhole logic by default
sctp: do not update transport pathmtu if SPP_PMTUD_ENABLE is not set
net: ixp46x: fix ptp build failure
ibmvnic: Remove the proper scrq flush
selftests: net: add ESP-in-UDP PMTU test
udp: check encap socket in __udp_lib_err
sctp: update active_key for asoc when old key is being replaced
r8169: Avoid duplicate sysfs entry creation error
ixgbe: Fix packet corruption due to missing DMA sync
Revert "qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union()"
ipv6: fix another slab-out-of-bounds in fib6_nh_flush_exceptions
fsl/fman: Add fibre support
...

+1218 -270
-56
Documentation/devicetree/bindings/net/imx-dwmac.txt
··· 1 - IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP. 2 - 3 - This file documents platform glue layer for IMX. 4 - Please see stmmac.txt for the other unchanged properties. 5 - 6 - The device node has following properties. 7 - 8 - Required properties: 9 - - compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer 10 - and "snps,dwmac-5.10a" to select IP version. 11 - - clocks: Must contain a phandle for each entry in clock-names. 12 - - clock-names: Should be "stmmaceth" for the host clock. 13 - Should be "pclk" for the MAC apb clock. 14 - Should be "ptp_ref" for the MAC timer clock. 15 - Should be "tx" for the MAC RGMII TX clock: 16 - Should be "mem" for EQOS MEM clock. 17 - - "mem" clock is required for imx8dxl platform. 18 - - "mem" clock is not required for imx8mp platform. 19 - - interrupt-names: Should contain a list of interrupt names corresponding to 20 - the interrupts in the interrupts property, if available. 21 - Should be "macirq" for the main MAC IRQ 22 - Should be "eth_wake_irq" for the IT which wake up system 23 - - intf_mode: Should be phandle/offset pair. The phandle to the syscon node which 24 - encompases the GPR register, and the offset of the GPR register. 25 - - required for imx8mp platform. 26 - - is optional for imx8dxl platform. 27 - 28 - Optional properties: 29 - - intf_mode: is optional for imx8dxl platform. 30 - - snps,rmii_refclk_ext: to select RMII reference clock from external. 31 - 32 - Example: 33 - eqos: ethernet@30bf0000 { 34 - compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; 35 - reg = <0x30bf0000 0x10000>; 36 - interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, 37 - <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; 38 - interrupt-names = "eth_wake_irq", "macirq"; 39 - clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, 40 - <&clk IMX8MP_CLK_QOS_ENET_ROOT>, 41 - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, 42 - <&clk IMX8MP_CLK_ENET_QOS>; 43 - clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; 44 - assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, 45 - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, 46 - <&clk IMX8MP_CLK_ENET_QOS>; 47 - assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, 48 - <&clk IMX8MP_SYS_PLL2_100M>, 49 - <&clk IMX8MP_SYS_PLL2_125M>; 50 - assigned-clock-rates = <0>, <100000000>, <125000000>; 51 - nvmem-cells = <&eth_mac0>; 52 - nvmem-cell-names = "mac-address"; 53 - nvmem_macaddr_swap; 54 - intf_mode = <&gpr 0x4>; 55 - status = "disabled"; 56 - };
+93
Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/net/nxp,dwmac-imx.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: NXP i.MX8 DWMAC glue layer Device Tree Bindings 8 + 9 + maintainers: 10 + - Joakim Zhang <qiangqing.zhang@nxp.com> 11 + 12 + # We need a select here so we don't match all nodes with 'snps,dwmac' 13 + select: 14 + properties: 15 + compatible: 16 + contains: 17 + enum: 18 + - nxp,imx8mp-dwmac-eqos 19 + - nxp,imx8dxl-dwmac-eqos 20 + required: 21 + - compatible 22 + 23 + allOf: 24 + - $ref: "snps,dwmac.yaml#" 25 + 26 + properties: 27 + compatible: 28 + oneOf: 29 + - items: 30 + - enum: 31 + - nxp,imx8mp-dwmac-eqos 32 + - nxp,imx8dxl-dwmac-eqos 33 + - const: snps,dwmac-5.10a 34 + 35 + clocks: 36 + minItems: 3 37 + maxItems: 5 38 + items: 39 + - description: MAC host clock 40 + - description: MAC apb clock 41 + - description: MAC timer clock 42 + - description: MAC RGMII TX clock 43 + - description: EQOS MEM clock 44 + 45 + clock-names: 46 + minItems: 3 47 + maxItems: 5 48 + contains: 49 + enum: 50 + - stmmaceth 51 + - pclk 52 + - ptp_ref 53 + - tx 54 + - mem 55 + 56 + intf_mode: 57 + $ref: /schemas/types.yaml#/definitions/phandle-array 58 + description: 59 + Should be phandle/offset pair. The phandle to the syscon node which 60 + encompases the GPR register, and the offset of the GPR register. 61 + 62 + snps,rmii_refclk_ext: 63 + $ref: /schemas/types.yaml#/definitions/flag 64 + description: 65 + To select RMII reference clock from external. 66 + 67 + required: 68 + - compatible 69 + - clocks 70 + - clock-names 71 + 72 + unevaluatedProperties: false 73 + 74 + examples: 75 + - | 76 + #include <dt-bindings/interrupt-controller/arm-gic.h> 77 + #include <dt-bindings/interrupt-controller/irq.h> 78 + #include <dt-bindings/clock/imx8mp-clock.h> 79 + 80 + eqos: ethernet@30bf0000 { 81 + compatible = "nxp,imx8mp-dwmac-eqos","snps,dwmac-5.10a"; 82 + reg = <0x30bf0000 0x10000>; 83 + interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, 84 + <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; 85 + interrupt-names = "macirq", "eth_wake_irq"; 86 + clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, 87 + <&clk IMX8MP_CLK_QOS_ENET_ROOT>, 88 + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, 89 + <&clk IMX8MP_CLK_ENET_QOS>; 90 + clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; 91 + phy-mode = "rgmii"; 92 + status = "disabled"; 93 + };
+3
Documentation/devicetree/bindings/net/snps,dwmac.yaml
··· 28 28 - snps,dwmac-4.00 29 29 - snps,dwmac-4.10a 30 30 - snps,dwmac-4.20a 31 + - snps,dwmac-5.10a 31 32 - snps,dwxgmac 32 33 - snps,dwxgmac-2.10 33 34 ··· 83 82 - snps,dwmac-4.00 84 83 - snps,dwmac-4.10a 85 84 - snps,dwmac-4.20a 85 + - snps,dwmac-5.10a 86 86 - snps,dwxgmac 87 87 - snps,dwxgmac-2.10 88 88 ··· 377 375 - snps,dwmac-4.00 378 376 - snps,dwmac-4.10a 379 377 - snps,dwmac-4.20a 378 + - snps,dwmac-5.10a 380 379 - snps,dwxgmac 381 380 - snps,dwxgmac-2.10 382 381 - st,spear600-gmac
+3 -3
Documentation/networking/af_xdp.rst
··· 243 243 These are the various configuration flags that can be used to control 244 244 and monitor the behavior of AF_XDP sockets. 245 245 246 - XDP_COPY and XDP_ZERO_COPY bind flags 247 - ------------------------------------- 246 + XDP_COPY and XDP_ZEROCOPY bind flags 247 + ------------------------------------ 248 248 249 249 When you bind to a socket, the kernel will first try to use zero-copy 250 250 copy. If zero-copy is not supported, it will fall back on using copy ··· 252 252 like to force a certain mode, you can use the following flags. If you 253 253 pass the XDP_COPY flag to the bind call, the kernel will force the 254 254 socket into copy mode. If it cannot use copy mode, the bind call will 255 - fail with an error. Conversely, the XDP_ZERO_COPY flag will force the 255 + fail with an error. Conversely, the XDP_ZEROCOPY flag will force the 256 256 socket into zero-copy mode or fail. 257 257 258 258 XDP_SHARED_UMEM bind flag
+1 -1
Documentation/networking/ip-sysctl.rst
··· 826 826 initial value when the blackhole issue goes away. 827 827 0 to disable the blackhole detection. 828 828 829 - By default, it is set to 1hr. 829 + By default, it is set to 0 (feature is disabled). 830 830 831 831 tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs 832 832 The list consists of a primary key and an optional backup key. The
+1
MAINTAINERS
··· 11758 11758 MEDIATEK SWITCH DRIVER 11759 11759 M: Sean Wang <sean.wang@mediatek.com> 11760 11760 M: Landen Chao <Landen.Chao@mediatek.com> 11761 + M: DENG Qingfang <dqfext@gmail.com> 11761 11762 L: netdev@vger.kernel.org 11762 11763 S: Maintained 11763 11764 F: drivers/net/dsa/mt7530.*
+3 -3
arch/arm64/boot/dts/freescale/imx8mp.dtsi
··· 821 821 eqos: ethernet@30bf0000 { 822 822 compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; 823 823 reg = <0x30bf0000 0x10000>; 824 - interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, 825 - <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; 826 - interrupt-names = "eth_wake_irq", "macirq"; 824 + interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, 825 + <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; 826 + interrupt-names = "macirq", "eth_wake_irq"; 827 827 clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, 828 828 <&clk IMX8MP_CLK_QOS_ENET_ROOT>, 829 829 <&clk IMX8MP_CLK_ENET_QOS_TIMER>,
+1 -1
arch/s390/net/bpf_jit_comp.c
··· 112 112 { 113 113 u32 r1 = reg2hex[b1]; 114 114 115 - if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) 115 + if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) 116 116 jit->seen_reg[r1] = 1; 117 117 } 118 118
+2
drivers/net/bonding/bond_main.c
··· 3450 3450 return bond_event_changename(event_bond); 3451 3451 case NETDEV_UNREGISTER: 3452 3452 bond_remove_proc_entry(event_bond); 3453 + #ifdef CONFIG_XFRM_OFFLOAD 3453 3454 xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); 3455 + #endif /* CONFIG_XFRM_OFFLOAD */ 3454 3456 break; 3455 3457 case NETDEV_REGISTER: 3456 3458 bond_create_proc_entry(event_bond);
+2
drivers/net/dsa/mt7530.c
··· 366 366 int i; 367 367 368 368 reg[1] |= vid & CVID_MASK; 369 + if (vid > 1) 370 + reg[1] |= ATA2_IVL; 369 371 reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; 370 372 reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; 371 373 /* STATIC_ENT indicate that entry is static wouldn't
+1
drivers/net/dsa/mt7530.h
··· 79 79 #define STATIC_EMP 0 80 80 #define STATIC_ENT 3 81 81 #define MT7530_ATA2 0x78 82 + #define ATA2_IVL BIT(15) 82 83 83 84 /* Register for address table write data */ 84 85 #define MT7530_ATWD 0x7c
+1 -1
drivers/net/dsa/mv88e6xxx/Kconfig
··· 12 12 config NET_DSA_MV88E6XXX_PTP 13 13 bool "PTP support for Marvell 88E6xxx" 14 14 default n 15 - depends on PTP_1588_CLOCK 15 + depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK 16 16 help 17 17 Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch 18 18 chips that support it.
+6
drivers/net/dsa/sja1105/sja1105_main.c
··· 397 397 if (dsa_is_cpu_port(ds, port)) 398 398 v->pvid = true; 399 399 list_add(&v->list, &priv->dsa_8021q_vlans); 400 + 401 + v = kmemdup(v, sizeof(*v), GFP_KERNEL); 402 + if (!v) 403 + return -ENOMEM; 404 + 405 + list_add(&v->list, &priv->bridge_vlans); 400 406 } 401 407 402 408 ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;
+62 -23
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 1671 1671 1672 1672 if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && 1673 1673 (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { 1674 - u16 vlan_proto = tpa_info->metadata >> 1675 - RX_CMP_FLAGS2_METADATA_TPID_SFT; 1674 + __be16 vlan_proto = htons(tpa_info->metadata >> 1675 + RX_CMP_FLAGS2_METADATA_TPID_SFT); 1676 1676 u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; 1677 1677 1678 - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); 1678 + if (eth_type_vlan(vlan_proto)) { 1679 + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); 1680 + } else { 1681 + dev_kfree_skb(skb); 1682 + return NULL; 1683 + } 1679 1684 } 1680 1685 1681 1686 skb_checksum_none_assert(skb); ··· 1902 1897 (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { 1903 1898 u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); 1904 1899 u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; 1905 - u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; 1900 + __be16 vlan_proto = htons(meta_data >> 1901 + RX_CMP_FLAGS2_METADATA_TPID_SFT); 1906 1902 1907 - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); 1903 + if (eth_type_vlan(vlan_proto)) { 1904 + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); 1905 + } else { 1906 + dev_kfree_skb(skb); 1907 + goto next_rx; 1908 + } 1908 1909 } 1909 1910 1910 1911 skb_checksum_none_assert(skb); ··· 7574 7563 bp->flags &= ~BNXT_FLAG_WOL_CAP; 7575 7564 if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED) 7576 7565 bp->flags |= BNXT_FLAG_WOL_CAP; 7577 - if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) 7566 + if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) { 7578 7567 __bnxt_hwrm_ptp_qcfg(bp); 7568 + } else { 7569 + kfree(bp->ptp_cfg); 7570 + bp->ptp_cfg = NULL; 7571 + } 7579 7572 } else { 7580 7573 #ifdef CONFIG_BNXT_SRIOV 7581 7574 struct bnxt_vf_info *vf = &bp->vf; ··· 10138 10123 } 10139 10124 } 10140 10125 10141 - bnxt_ptp_start(bp); 10142 10126 rc = bnxt_init_nic(bp, irq_re_init); 10143 10127 if (rc) { 10144 10128 netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); ··· 10211 10197 { 10212 10198 int rc = 0; 10213 10199 10200 + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { 10201 + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting half open\n"); 10202 + rc = -ENODEV; 10203 + goto half_open_err; 10204 + } 10205 + 10214 10206 rc = bnxt_alloc_mem(bp, false); 10215 10207 if (rc) { 10216 10208 netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); ··· 10276 10256 rc = bnxt_hwrm_if_change(bp, true); 10277 10257 if (rc) 10278 10258 return rc; 10259 + 10260 + if (bnxt_ptp_init(bp)) { 10261 + netdev_warn(dev, "PTP initialization failed.\n"); 10262 + kfree(bp->ptp_cfg); 10263 + bp->ptp_cfg = NULL; 10264 + } 10279 10265 rc = __bnxt_open_nic(bp, true, true); 10280 10266 if (rc) { 10281 10267 bnxt_hwrm_if_change(bp, false); 10268 + bnxt_ptp_clear(bp); 10282 10269 } else { 10283 10270 if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { 10284 10271 if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { ··· 10376 10349 { 10377 10350 struct bnxt *bp = netdev_priv(dev); 10378 10351 10352 + bnxt_ptp_clear(bp); 10379 10353 bnxt_hwmon_close(bp); 10380 10354 bnxt_close_nic(bp, true, true); 10381 10355 bnxt_hwrm_shutdown_link(bp); ··· 11363 11335 bnxt_clear_int_mode(bp); 11364 11336 pci_disable_device(bp->pdev); 11365 11337 } 11338 + bnxt_ptp_clear(bp); 11366 11339 __bnxt_close_nic(bp, true, false); 11367 11340 bnxt_vf_reps_free(bp); 11368 11341 bnxt_clear_int_mode(bp); ··· 11988 11959 (bp->fw_reset_max_dsecs * HZ / 10)); 11989 11960 } 11990 11961 11962 + static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) 11963 + { 11964 + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); 11965 + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { 11966 + bnxt_ulp_start(bp, rc); 11967 + bnxt_dl_health_status_update(bp, false); 11968 + } 11969 + bp->fw_reset_state = 0; 11970 + dev_close(bp->dev); 11971 + } 11972 + 11991 11973 static void bnxt_fw_reset_task(struct work_struct *work) 11992 11974 { 11993 11975 struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); 11994 - int rc; 11976 + int rc = 0; 11995 11977 11996 11978 if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { 11997 11979 netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); ··· 12032 11992 } 12033 11993 bp->fw_reset_timestamp = jiffies; 12034 11994 rtnl_lock(); 11995 + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { 11996 + bnxt_fw_reset_abort(bp, rc); 11997 + rtnl_unlock(); 11998 + return; 11999 + } 12035 12000 bnxt_fw_reset_close(bp); 12036 12001 if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { 12037 12002 bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN; ··· 12084 12039 if (val == 0xffff) { 12085 12040 if (bnxt_fw_reset_timeout(bp)) { 12086 12041 netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); 12042 + rc = -ETIMEDOUT; 12087 12043 goto fw_reset_abort; 12088 12044 } 12089 12045 bnxt_queue_fw_reset_work(bp, HZ / 1000); ··· 12094 12048 clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); 12095 12049 if (pci_enable_device(bp->pdev)) { 12096 12050 netdev_err(bp->dev, "Cannot re-enable PCI device\n"); 12051 + rc = -ENODEV; 12097 12052 goto fw_reset_abort; 12098 12053 } 12099 12054 pci_set_master(bp->pdev); ··· 12121 12074 } 12122 12075 rc = bnxt_open(bp->dev); 12123 12076 if (rc) { 12124 - netdev_err(bp->dev, "bnxt_open_nic() failed\n"); 12125 - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); 12126 - dev_close(bp->dev); 12077 + netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); 12078 + bnxt_fw_reset_abort(bp, rc); 12079 + rtnl_unlock(); 12080 + return; 12127 12081 } 12128 12082 12129 12083 bp->fw_reset_state = 0; ··· 12151 12103 netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); 12152 12104 } 12153 12105 fw_reset_abort: 12154 - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); 12155 - if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) 12156 - bnxt_dl_health_status_update(bp, false); 12157 - bp->fw_reset_state = 0; 12158 12106 rtnl_lock(); 12159 - dev_close(bp->dev); 12107 + bnxt_fw_reset_abort(bp, rc); 12160 12108 rtnl_unlock(); 12161 12109 } 12162 12110 ··· 12706 12662 if (BNXT_PF(bp)) 12707 12663 devlink_port_type_clear(&bp->dl_port); 12708 12664 12709 - bnxt_ptp_clear(bp); 12710 12665 pci_disable_pcie_error_reporting(pdev); 12711 12666 unregister_netdev(dev); 12712 12667 clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); ··· 13289 13246 rc); 13290 13247 } 13291 13248 13292 - if (bnxt_ptp_init(bp)) { 13293 - netdev_warn(dev, "PTP initialization failed.\n"); 13294 - kfree(bp->ptp_cfg); 13295 - bp->ptp_cfg = NULL; 13296 - } 13297 13249 bnxt_inv_fw_health_reg(bp); 13298 13250 bnxt_dl_register(bp); 13299 13251 ··· 13474 13436 if (netif_running(netdev)) 13475 13437 bnxt_close(netdev); 13476 13438 13477 - pci_disable_device(pdev); 13439 + if (pci_is_enabled(pdev)) 13440 + pci_disable_device(pdev); 13478 13441 bnxt_free_ctx_mem(bp); 13479 13442 kfree(bp->ctx); 13480 13443 bp->ctx = NULL;
+9 -1
drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
··· 433 433 static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) 434 434 { 435 435 int total_ets_bw = 0; 436 + bool zero = false; 436 437 u8 max_tc = 0; 437 438 int i; 438 439 ··· 454 453 break; 455 454 case IEEE_8021QAZ_TSA_ETS: 456 455 total_ets_bw += ets->tc_tx_bw[i]; 456 + zero = zero || !ets->tc_tx_bw[i]; 457 457 break; 458 458 default: 459 459 return -ENOTSUPP; 460 460 } 461 461 } 462 - if (total_ets_bw > 100) 462 + if (total_ets_bw > 100) { 463 + netdev_warn(bp->dev, "rejecting ETS config exceeding available bandwidth\n"); 463 464 return -EINVAL; 465 + } 466 + if (zero && total_ets_bw == 100) { 467 + netdev_warn(bp->dev, "rejecting ETS config starving a TC\n"); 468 + return -EINVAL; 469 + } 464 470 465 471 if (max_tc >= bp->max_tc) 466 472 *tc = bp->max_tc;
+7 -17
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
··· 385 385 return 0; 386 386 } 387 387 388 - void bnxt_ptp_start(struct bnxt *bp) 389 - { 390 - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; 391 - 392 - if (!ptp) 393 - return; 394 - 395 - if (bp->flags & BNXT_FLAG_CHIP_P5) { 396 - spin_lock_bh(&ptp->ptp_lock); 397 - ptp->current_time = bnxt_refclk_read(bp, NULL); 398 - WRITE_ONCE(ptp->old_time, ptp->current_time); 399 - spin_unlock_bh(&ptp->ptp_lock); 400 - ptp_schedule_worker(ptp->ptp_clock, 0); 401 - } 402 - } 403 - 404 388 static const struct ptp_clock_info bnxt_ptp_caps = { 405 389 .owner = THIS_MODULE, 406 390 .name = "bnxt clock", ··· 434 450 bnxt_unmap_ptp_regs(bp); 435 451 return err; 436 452 } 437 - 453 + if (bp->flags & BNXT_FLAG_CHIP_P5) { 454 + spin_lock_bh(&ptp->ptp_lock); 455 + ptp->current_time = bnxt_refclk_read(bp, NULL); 456 + WRITE_ONCE(ptp->old_time, ptp->current_time); 457 + spin_unlock_bh(&ptp->ptp_lock); 458 + ptp_schedule_worker(ptp->ptp_clock, 0); 459 + } 438 460 return 0; 439 461 } 440 462
-1
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
··· 75 75 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); 76 76 int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); 77 77 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); 78 - void bnxt_ptp_start(struct bnxt *bp); 79 78 int bnxt_ptp_init(struct bnxt *bp); 80 79 void bnxt_ptp_clear(struct bnxt *bp); 81 80 #endif
+5 -4
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
··· 479 479 if (!edev) 480 480 return ERR_PTR(-ENOMEM); 481 481 edev->en_ops = &bnxt_en_ops_tbl; 482 - if (bp->flags & BNXT_FLAG_ROCEV1_CAP) 483 - edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; 484 - if (bp->flags & BNXT_FLAG_ROCEV2_CAP) 485 - edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; 486 482 edev->net = dev; 487 483 edev->pdev = bp->pdev; 488 484 edev->l2_db_size = bp->db_size; 489 485 edev->l2_db_size_nc = bp->db_size; 490 486 bp->edev = edev; 491 487 } 488 + edev->flags &= ~BNXT_EN_FLAG_ROCE_CAP; 489 + if (bp->flags & BNXT_FLAG_ROCEV1_CAP) 490 + edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; 491 + if (bp->flags & BNXT_FLAG_ROCEV2_CAP) 492 + edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; 492 493 return bp->edev; 493 494 } 494 495 EXPORT_SYMBOL(bnxt_ulp_probe);
+1 -1
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
··· 420 420 * bits 32:47 indicate the PVF num. 421 421 */ 422 422 for (q_no = 0; q_no < ern; q_no++) { 423 - reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; 423 + reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; 424 424 425 425 /* for VF assigned queues. */ 426 426 if (q_no < oct->sriov_info.pf_srn) {
+8 -8
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
··· 2770 2770 if (err) 2771 2771 return err; 2772 2772 2773 - err = dpaa2_switch_seed_bp(ethsw); 2774 - if (err) 2775 - goto err_free_dpbp; 2776 - 2777 2773 err = dpaa2_switch_alloc_rings(ethsw); 2778 2774 if (err) 2779 - goto err_drain_dpbp; 2775 + goto err_free_dpbp; 2780 2776 2781 2777 err = dpaa2_switch_setup_dpio(ethsw); 2782 2778 if (err) 2783 2779 goto err_destroy_rings; 2784 2780 2781 + err = dpaa2_switch_seed_bp(ethsw); 2782 + if (err) 2783 + goto err_deregister_dpio; 2784 + 2785 2785 err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); 2786 2786 if (err) { 2787 2787 dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err); 2788 - goto err_deregister_dpio; 2788 + goto err_drain_dpbp; 2789 2789 } 2790 2790 2791 2791 return 0; 2792 2792 2793 + err_drain_dpbp: 2794 + dpaa2_switch_drain_bp(ethsw); 2793 2795 err_deregister_dpio: 2794 2796 dpaa2_switch_free_dpio(ethsw); 2795 2797 err_destroy_rings: 2796 2798 dpaa2_switch_destroy_rings(ethsw); 2797 - err_drain_dpbp: 2798 - dpaa2_switch_drain_bp(ethsw); 2799 2799 err_free_dpbp: 2800 2800 dpaa2_switch_free_dpbp(ethsw); 2801 2801
+1
drivers/net/ethernet/freescale/fman/mac.c
··· 524 524 | SUPPORTED_Autoneg \ 525 525 | SUPPORTED_Pause \ 526 526 | SUPPORTED_Asym_Pause \ 527 + | SUPPORTED_FIBRE \ 527 528 | SUPPORTED_MII) 528 529 529 530 static DEFINE_MUTEX(eth_lock);
+3 -3
drivers/net/ethernet/hisilicon/hip04_eth.c
··· 131 131 /* buf unit size is cache_line_size, which is 64, so the shift is 6 */ 132 132 #define PPE_BUF_SIZE_SHIFT 6 133 133 #define PPE_TX_BUF_HOLD BIT(31) 134 - #define CACHE_LINE_MASK 0x3F 134 + #define SOC_CACHE_LINE_MASK 0x3F 135 135 #else 136 136 #define PPE_CFG_QOS_VMID_GRP_SHIFT 8 137 137 #define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 ··· 531 531 #if defined(CONFIG_HI13X1_GMAC) 532 532 desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV 533 533 | TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT); 534 - desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK); 535 - desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK); 534 + desc->data_offset = (__force u32)cpu_to_be32(phys & SOC_CACHE_LINE_MASK); 535 + desc->send_addr = (__force u32)cpu_to_be32(phys & ~SOC_CACHE_LINE_MASK); 536 536 #else 537 537 desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); 538 538 desc->send_addr = (__force u32)cpu_to_be32(phys);
+5 -2
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
··· 98 98 u32 origin_mbx_msg; 99 99 bool received_resp; 100 100 int resp_status; 101 + u16 match_id; 101 102 u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; 102 103 }; 103 104 ··· 144 143 u8 mbx_need_resp; 145 144 u8 rsv1[1]; 146 145 u8 msg_len; 147 - u8 rsv2[3]; 146 + u8 rsv2; 147 + u16 match_id; 148 148 struct hclge_vf_to_pf_msg msg; 149 149 }; 150 150 ··· 155 153 u8 dest_vfid; 156 154 u8 rsv[3]; 157 155 u8 msg_len; 158 - u8 rsv1[3]; 156 + u8 rsv1; 157 + u16 match_id; 159 158 struct hclge_pf_to_vf_msg msg; 160 159 }; 161 160
+6 -2
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
··· 9552 9552 if (ret) 9553 9553 return ret; 9554 9554 9555 - if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) 9555 + if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) { 9556 9556 ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id, 9557 9557 !enable); 9558 - else if (!vport->vport_id) 9558 + } else if (!vport->vport_id) { 9559 + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) 9560 + enable = false; 9561 + 9559 9562 ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, 9560 9563 HCLGE_FILTER_FE_INGRESS, 9561 9564 enable, 0); 9565 + } 9562 9566 9563 9567 return ret; 9564 9568 }
+1
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
··· 47 47 48 48 resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; 49 49 resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; 50 + resp_pf_to_vf->match_id = vf_to_pf_req->match_id; 50 51 51 52 resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; 52 53 resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code;
+10
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
··· 2641 2641 2642 2642 static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) 2643 2643 { 2644 + struct hnae3_handle *nic = &hdev->nic; 2645 + int ret; 2646 + 2647 + ret = hclgevf_en_hw_strip_rxvtag(nic, true); 2648 + if (ret) { 2649 + dev_err(&hdev->pdev->dev, 2650 + "failed to enable rx vlan offload, ret = %d\n", ret); 2651 + return ret; 2652 + } 2653 + 2644 2654 return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, 2645 2655 false); 2646 2656 }
+19
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
··· 13 13 return resp_code ? -resp_code : 0; 14 14 } 15 15 16 + #define HCLGEVF_MBX_MATCH_ID_START 1 16 17 static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) 17 18 { 18 19 /* this function should be called with mbx_resp.mbx_mutex held ··· 22 21 hdev->mbx_resp.received_resp = false; 23 22 hdev->mbx_resp.origin_mbx_msg = 0; 24 23 hdev->mbx_resp.resp_status = 0; 24 + hdev->mbx_resp.match_id++; 25 + /* Update match_id and ensure the value of match_id is not zero */ 26 + if (hdev->mbx_resp.match_id == 0) 27 + hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START; 25 28 memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE); 26 29 } 27 30 ··· 120 115 if (need_resp) { 121 116 mutex_lock(&hdev->mbx_resp.mbx_mutex); 122 117 hclgevf_reset_mbx_resp_status(hdev); 118 + req->match_id = hdev->mbx_resp.match_id; 123 119 status = hclgevf_cmd_send(&hdev->hw, &desc, 1); 124 120 if (status) { 125 121 dev_err(&hdev->pdev->dev, ··· 216 210 for (i = 0; i < HCLGE_MBX_MAX_RESP_DATA_SIZE; i++) { 217 211 resp->additional_info[i] = *temp; 218 212 temp++; 213 + } 214 + 215 + /* If match_id is not zero, it means PF support 216 + * match_id. If the match_id is right, VF get the 217 + * right response, otherwise ignore the response. 218 + * Driver will clear hdev->mbx_resp when send 219 + * next message which need response. 220 + */ 221 + if (req->match_id) { 222 + if (req->match_id == resp->match_id) 223 + resp->received_resp = true; 224 + } else { 225 + resp->received_resp = true; 219 226 } 220 227 break; 221 228 case HCLGE_MBX_LINK_STAT_CHANGE:
+1 -1
drivers/net/ethernet/ibm/ibmvnic.c
··· 1731 1731 tx_send_failed++; 1732 1732 tx_dropped++; 1733 1733 ret = NETDEV_TX_OK; 1734 - ibmvnic_tx_scrq_flush(adapter, tx_scrq); 1735 1734 goto out; 1736 1735 } 1737 1736 ··· 1752 1753 dev_kfree_skb_any(skb); 1753 1754 tx_send_failed++; 1754 1755 tx_dropped++; 1756 + ibmvnic_tx_scrq_flush(adapter, tx_scrq); 1755 1757 ret = NETDEV_TX_OK; 1756 1758 goto out; 1757 1759 }
+2 -1
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
··· 1825 1825 struct sk_buff *skb) 1826 1826 { 1827 1827 if (ring_uses_build_skb(rx_ring)) { 1828 - unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; 1828 + unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; 1829 + unsigned long offset = (unsigned long)(skb->data) & mask; 1829 1830 1830 1831 dma_sync_single_range_for_cpu(rx_ring->dev, 1831 1832 IXGBE_CB(skb)->dma,
+1 -1
drivers/net/ethernet/marvell/octeontx2/af/Makefile
··· 10 10 rvu_mbox-y := mbox.o rvu_trace.o 11 11 rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ 12 12 rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ 13 - rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o 13 + rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o
+9 -1
drivers/net/ethernet/marvell/octeontx2/af/rvu.c
··· 1314 1314 return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc); 1315 1315 } 1316 1316 1317 - static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) 1317 + int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) 1318 1318 { 1319 1319 struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); 1320 1320 int blkaddr = BLKADDR_NIX0, vf; ··· 2859 2859 if (!vfs) 2860 2860 return 0; 2861 2861 2862 + /* LBK channel number 63 is used for switching packets between 2863 + * CGX mapped VFs. Hence limit LBK pairs till 62 only. 2864 + */ 2865 + if (vfs > 62) 2866 + vfs = 62; 2867 + 2862 2868 /* Save VFs number for reference in VF interrupts handlers. 2863 2869 * Since interrupts might start arriving during SRIOV enablement 2864 2870 * ordinary API cannot be used to get number of enabled VFs. ··· 3006 3000 3007 3001 /* Initialize debugfs */ 3008 3002 rvu_dbg_init(rvu); 3003 + 3004 + mutex_init(&rvu->rswitch.switch_lock); 3009 3005 3010 3006 return 0; 3011 3007 err_dl:
+21
drivers/net/ethernet/marvell/octeontx2/af/rvu.h
··· 415 415 size_t kpus; 416 416 }; 417 417 418 + #define RVU_SWITCH_LBK_CHAN 63 419 + 420 + struct rvu_switch { 421 + struct mutex switch_lock; /* Serialize flow installation */ 422 + u32 used_entries; 423 + u16 *entry2pcifunc; 424 + u16 mode; 425 + u16 start_entry; 426 + }; 427 + 418 428 struct rvu { 419 429 void __iomem *afreg_base; 420 430 void __iomem *pfreg_base; ··· 455 445 456 446 /* CGX */ 457 447 #define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */ 448 + u16 cgx_mapped_vfs; /* maximum CGX mapped VFs */ 458 449 u8 cgx_mapped_pfs; 459 450 u8 cgx_cnt_max; /* CGX port count max */ 460 451 u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */ ··· 488 477 struct rvu_debugfs rvu_dbg; 489 478 #endif 490 479 struct rvu_devlink *rvu_dl; 480 + 481 + /* RVU switch implementation over NPC with DMAC rules */ 482 + struct rvu_switch rswitch; 491 483 }; 492 484 493 485 static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) ··· 705 691 struct nix_cn10k_aq_enq_req *aq_req, 706 692 struct nix_cn10k_aq_enq_rsp *aq_rsp, 707 693 u16 pcifunc, u8 ctype, u32 qidx); 694 + int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); 708 695 709 696 /* NPC APIs */ 710 697 int rvu_npc_init(struct rvu *rvu); ··· 783 768 static inline void rvu_dbg_init(struct rvu *rvu) {} 784 769 static inline void rvu_dbg_exit(struct rvu *rvu) {} 785 770 #endif 771 + 772 + /* RVU Switch */ 773 + void rvu_switch_enable(struct rvu *rvu); 774 + void rvu_switch_disable(struct rvu *rvu); 775 + void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc); 776 + 786 777 #endif /* RVU_H */
+3
drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
··· 126 126 unsigned long lmac_bmap; 127 127 int size, free_pkind; 128 128 int cgx, lmac, iter; 129 + int numvfs, hwvfs; 129 130 130 131 if (!cgx_cnt_max) 131 132 return 0; ··· 167 166 pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; 168 167 rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); 169 168 rvu->cgx_mapped_pfs++; 169 + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvfs); 170 + rvu->cgx_mapped_vfs += numvfs; 170 171 pf++; 171 172 } 172 173 }
+1 -4
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
··· 2113 2113 int entry_acnt, entry_ecnt; 2114 2114 int cntr_acnt, cntr_ecnt; 2115 2115 2116 - /* Skip PF0 */ 2117 - if (!pcifunc) 2118 - return; 2119 2116 rvu_npc_get_mcam_entry_alloc_info(rvu, pcifunc, blkaddr, 2120 2117 &entry_acnt, &entry_ecnt); 2121 2118 rvu_npc_get_mcam_counter_alloc_info(rvu, pcifunc, blkaddr, ··· 2295 2298 static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, 2296 2299 struct rvu_npc_mcam_rule *rule) 2297 2300 { 2298 - if (rule->intf == NIX_INTF_TX) { 2301 + if (is_npc_intf_tx(rule->intf)) { 2299 2302 switch (rule->tx_action.op) { 2300 2303 case NIX_TX_ACTIONOP_DROP: 2301 2304 seq_puts(s, "\taction: Drop\n");
+41 -7
drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
··· 1364 1364 rvu_nix_health_reporters_destroy(rvu_dl); 1365 1365 } 1366 1366 1367 + static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) 1368 + { 1369 + struct rvu_devlink *rvu_dl = devlink_priv(devlink); 1370 + struct rvu *rvu = rvu_dl->rvu; 1371 + struct rvu_switch *rswitch; 1372 + 1373 + rswitch = &rvu->rswitch; 1374 + *mode = rswitch->mode; 1375 + 1376 + return 0; 1377 + } 1378 + 1379 + static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, 1380 + struct netlink_ext_ack *extack) 1381 + { 1382 + struct rvu_devlink *rvu_dl = devlink_priv(devlink); 1383 + struct rvu *rvu = rvu_dl->rvu; 1384 + struct rvu_switch *rswitch; 1385 + 1386 + rswitch = &rvu->rswitch; 1387 + switch (mode) { 1388 + case DEVLINK_ESWITCH_MODE_LEGACY: 1389 + case DEVLINK_ESWITCH_MODE_SWITCHDEV: 1390 + if (rswitch->mode == mode) 1391 + return 0; 1392 + rswitch->mode = mode; 1393 + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) 1394 + rvu_switch_enable(rvu); 1395 + else 1396 + rvu_switch_disable(rvu); 1397 + break; 1398 + default: 1399 + return -EINVAL; 1400 + } 1401 + 1402 + return 0; 1403 + } 1404 + 1367 1405 static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, 1368 1406 struct netlink_ext_ack *extack) 1369 1407 { ··· 1410 1372 1411 1373 static const struct devlink_ops rvu_devlink_ops = { 1412 1374 .info_get = rvu_devlink_info_get, 1375 + .eswitch_mode_get = rvu_devlink_eswitch_mode_get, 1376 + .eswitch_mode_set = rvu_devlink_eswitch_mode_set, 1413 1377 }; 1414 1378 1415 1379 int rvu_register_dl(struct rvu *rvu) ··· 1420 1380 struct devlink *dl; 1421 1381 int err; 1422 1382 1423 - rvu_dl = kzalloc(sizeof(*rvu_dl), GFP_KERNEL); 1424 - if (!rvu_dl) 1425 - return -ENOMEM; 1426 - 1427 1383 dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); 1428 1384 if (!dl) { 1429 1385 dev_warn(rvu->dev, "devlink_alloc failed\n"); 1430 - kfree(rvu_dl); 1431 1386 return -ENOMEM; 1432 1387 } 1433 1388 ··· 1430 1395 if (err) { 1431 1396 dev_err(rvu->dev, "devlink register failed with error %d\n", err); 1432 1397 devlink_free(dl); 1433 - kfree(rvu_dl); 1434 1398 return err; 1435 1399 } 1436 1400 1401 + rvu_dl = devlink_priv(dl); 1437 1402 rvu_dl->dl = dl; 1438 1403 rvu_dl->rvu = rvu; 1439 1404 rvu->rvu_dl = rvu_dl; ··· 1452 1417 rvu_health_reporters_destroy(rvu); 1453 1418 devlink_unregister(dl); 1454 1419 devlink_free(dl); 1455 - kfree(rvu_dl); 1456 1420 }
+36
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
··· 1952 1952 pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); 1953 1953 } 1954 1954 1955 + static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, 1956 + u16 pcifunc, struct nix_txsch *txsch) 1957 + { 1958 + struct rvu_hwinfo *hw = rvu->hw; 1959 + int lbk_link_start, lbk_links; 1960 + u8 pf = rvu_get_pf(pcifunc); 1961 + int schq; 1962 + 1963 + if (!is_pf_cgxmapped(rvu, pf)) 1964 + return; 1965 + 1966 + lbk_link_start = hw->cgx_links; 1967 + 1968 + for (schq = 0; schq < txsch->schq.max; schq++) { 1969 + if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) 1970 + continue; 1971 + /* Enable all LBK links with channel 63 by default so that 1972 + * packets can be sent to LBK with a NPC TX MCAM rule 1973 + */ 1974 + lbk_links = hw->lbk_links; 1975 + while (lbk_links--) 1976 + rvu_write64(rvu, blkaddr, 1977 + NIX_AF_TL3_TL2X_LINKX_CFG(schq, 1978 + lbk_link_start + 1979 + lbk_links), 1980 + BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); 1981 + } 1982 + } 1983 + 1955 1984 int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, 1956 1985 struct nix_txschq_config *req, 1957 1986 struct msg_rsp *rsp) ··· 2068 2039 } 2069 2040 rvu_write64(rvu, blkaddr, reg, regval); 2070 2041 } 2042 + 2043 + rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, 2044 + &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); 2071 2045 2072 2046 return 0; 2073 2047 } ··· 3212 3180 if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf) 3213 3181 ether_addr_copy(pfvf->default_mac, req->mac_addr); 3214 3182 3183 + rvu_switch_update_rules(rvu, pcifunc); 3184 + 3215 3185 return 0; 3216 3186 } 3217 3187 ··· 3882 3848 3883 3849 pfvf = rvu_get_pfvf(rvu, pcifunc); 3884 3850 set_bit(NIXLF_INITIALIZED, &pfvf->flags); 3851 + 3852 + rvu_switch_update_rules(rvu, pcifunc); 3885 3853 3886 3854 return rvu_cgx_start_stop_io(rvu, pcifunc, true); 3887 3855 }
+36 -11
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
··· 442 442 owner = mcam->entry2pfvf_map[index]; 443 443 target_func = (entry->action >> 4) & 0xffff; 444 444 /* do nothing when target is LBK/PF or owner is not PF */ 445 - if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) || 445 + if (is_pffunc_af(owner) || is_afvf(target_func) || 446 + (owner & RVU_PFVF_FUNC_MASK) || 446 447 !(target_func & RVU_PFVF_FUNC_MASK)) 447 448 return; 448 449 ··· 469 468 { 470 469 int bank = npc_get_bank(mcam, index); 471 470 int kw = 0, actbank, actindex; 471 + u8 tx_intf_mask = ~intf & 0x3; 472 + u8 tx_intf = intf; 472 473 u64 cam0, cam1; 473 474 474 475 actbank = bank; /* Save bank id, to set action later on */ ··· 491 488 */ 492 489 for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) { 493 490 /* Interface should be set in all banks */ 491 + if (is_npc_intf_tx(intf)) { 492 + /* Last bit must be set and rest don't care 493 + * for TX interfaces 494 + */ 495 + tx_intf_mask = 0x1; 496 + tx_intf = intf & tx_intf_mask; 497 + tx_intf_mask = ~tx_intf & tx_intf_mask; 498 + } 499 + 494 500 rvu_write64(rvu, blkaddr, 495 501 NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1), 496 - intf); 502 + tx_intf); 497 503 rvu_write64(rvu, blkaddr, 498 504 NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0), 499 - ~intf & 0x3); 505 + tx_intf_mask); 500 506 501 507 /* Set the match key */ 502 508 npc_get_keyword(entry, kw, &cam0, &cam1); ··· 662 650 eth_broadcast_addr((u8 *)&req.mask.dmac); 663 651 req.features = BIT_ULL(NPC_DMAC); 664 652 req.channel = chan; 653 + req.chan_mask = 0xFFFU; 665 654 req.intf = pfvf->nix_rx_intf; 666 655 req.op = action.op; 667 656 req.hdr.pcifunc = 0; /* AF is requester */ ··· 812 799 eth_broadcast_addr((u8 *)&req.mask.dmac); 813 800 req.features = BIT_ULL(NPC_DMAC); 814 801 req.channel = chan; 802 + req.chan_mask = 0xFFFU; 815 803 req.intf = pfvf->nix_rx_intf; 816 804 req.entry = index; 817 805 req.hdr.pcifunc = 0; /* AF is requester */ ··· 1759 1745 int nixlf_count = rvu_get_nixlf_count(rvu); 1760 1746 struct npc_mcam *mcam = &rvu->hw->mcam; 1761 1747 int rsvd, err; 1748 + u16 index; 1749 + int cntr; 1762 1750 u64 cfg; 1763 1751 1764 1752 /* Actual number of MCAM entries vary by entry size */ ··· 1860 1844 sizeof(u16), GFP_KERNEL); 1861 1845 if (!mcam->entry2target_pffunc) 1862 1846 goto free_mem; 1847 + 1848 + for (index = 0; index < mcam->bmap_entries; index++) { 1849 + mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; 1850 + mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; 1851 + } 1852 + 1853 + for (cntr = 0; cntr < mcam->counters.max; cntr++) 1854 + mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP; 1863 1855 1864 1856 mutex_init(&mcam->lock); 1865 1857 ··· 2586 2562 } 2587 2563 2588 2564 /* Alloc request from PFFUNC with no NIXLF attached should be denied */ 2589 - if (!is_nixlf_attached(rvu, pcifunc)) 2565 + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) 2590 2566 return NPC_MCAM_ALLOC_DENIED; 2591 2567 2592 2568 return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp); ··· 2606 2582 return NPC_MCAM_INVALID_REQ; 2607 2583 2608 2584 /* Free request from PFFUNC with no NIXLF attached, ignore */ 2609 - if (!is_nixlf_attached(rvu, pcifunc)) 2585 + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) 2610 2586 return NPC_MCAM_INVALID_REQ; 2611 2587 2612 2588 mutex_lock(&mcam->lock); ··· 2618 2594 if (rc) 2619 2595 goto exit; 2620 2596 2621 - mcam->entry2pfvf_map[req->entry] = 0; 2597 + mcam->entry2pfvf_map[req->entry] = NPC_MCAM_INVALID_MAP; 2622 2598 mcam->entry2target_pffunc[req->entry] = 0x0; 2623 2599 npc_mcam_clear_bit(mcam, req->entry); 2624 2600 npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false); ··· 2703 2679 else 2704 2680 nix_intf = pfvf->nix_rx_intf; 2705 2681 2706 - if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { 2682 + if (!is_pffunc_af(pcifunc) && 2683 + npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { 2707 2684 rc = NPC_MCAM_INVALID_REQ; 2708 2685 goto exit; 2709 2686 } 2710 2687 2711 - if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, 2712 - pcifunc)) { 2688 + if (!is_pffunc_af(pcifunc) && 2689 + npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) { 2713 2690 rc = NPC_MCAM_INVALID_REQ; 2714 2691 goto exit; 2715 2692 } ··· 2861 2836 return NPC_MCAM_INVALID_REQ; 2862 2837 2863 2838 /* If the request is from a PFFUNC with no NIXLF attached, ignore */ 2864 - if (!is_nixlf_attached(rvu, pcifunc)) 2839 + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) 2865 2840 return NPC_MCAM_INVALID_REQ; 2866 2841 2867 2842 /* Since list of allocated counter IDs needs to be sent to requester, ··· 3106 3081 if (rc) { 3107 3082 /* Free allocated MCAM entry */ 3108 3083 mutex_lock(&mcam->lock); 3109 - mcam->entry2pfvf_map[entry] = 0; 3084 + mcam->entry2pfvf_map[entry] = NPC_MCAM_INVALID_MAP; 3110 3085 npc_mcam_clear_bit(mcam, entry); 3111 3086 mutex_unlock(&mcam->lock); 3112 3087 return rc;
+21 -8
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
··· 910 910 911 911 static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, 912 912 struct mcam_entry *entry, 913 - struct npc_install_flow_req *req, u16 target) 913 + struct npc_install_flow_req *req, 914 + u16 target, bool pf_set_vfs_mac) 914 915 { 916 + struct rvu_switch *rswitch = &rvu->rswitch; 915 917 struct nix_rx_action action; 916 - u64 chan_mask; 917 918 918 - chan_mask = req->chan_mask ? req->chan_mask : ~0ULL; 919 - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0, 920 - NIX_INTF_RX); 919 + if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac) 920 + req->chan_mask = 0x0; /* Do not care channel */ 921 + 922 + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, 923 + 0, NIX_INTF_RX); 921 924 922 925 *(u64 *)&action = 0x00; 923 926 action.pf_func = target; ··· 952 949 struct npc_install_flow_req *req, u16 target) 953 950 { 954 951 struct nix_tx_action action; 952 + u64 mask = ~0ULL; 953 + 954 + /* If AF is installing then do not care about 955 + * PF_FUNC in Send Descriptor 956 + */ 957 + if (is_pffunc_af(req->hdr.pcifunc)) 958 + mask = 0; 955 959 956 960 npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), 957 - 0, ~0ULL, 0, NIX_INTF_TX); 961 + 0, mask, 0, NIX_INTF_TX); 958 962 959 963 *(u64 *)&action = 0x00; 960 964 action.op = req->op; ··· 1012 1002 req->intf); 1013 1003 1014 1004 if (is_npc_intf_rx(req->intf)) 1015 - npc_update_rx_entry(rvu, pfvf, entry, req, target); 1005 + npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac); 1016 1006 else 1017 1007 npc_update_tx_entry(rvu, pfvf, entry, req, target); 1018 1008 ··· 1174 1164 if (err) 1175 1165 return err; 1176 1166 1177 - if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) 1167 + /* Skip channel validation if AF is installing */ 1168 + if (!is_pffunc_af(req->hdr.pcifunc) && 1169 + npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) 1178 1170 return -EINVAL; 1179 1171 1180 1172 pfvf = rvu_get_pfvf(rvu, target); ··· 1192 1180 eth_broadcast_addr((u8 *)&req->mask.dmac); 1193 1181 } 1194 1182 1183 + /* Proceed if NIXLF is attached or not for TX rules */ 1195 1184 err = nix_get_nixlf(rvu, target, &nixlf, NULL); 1196 1185 if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) 1197 1186 return -EINVAL;
+258
drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Marvell OcteonTx2 RVU Admin Function driver 3 + * 4 + * Copyright (C) 2021 Marvell. 5 + */ 6 + 7 + #include <linux/bitfield.h> 8 + #include "rvu.h" 9 + 10 + static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, 11 + u16 chan_mask) 12 + { 13 + struct npc_install_flow_req req = { 0 }; 14 + struct npc_install_flow_rsp rsp = { 0 }; 15 + struct rvu_pfvf *pfvf; 16 + 17 + pfvf = rvu_get_pfvf(rvu, pcifunc); 18 + /* If the pcifunc is not initialized then nothing to do. 19 + * This same function will be called again via rvu_switch_update_rules 20 + * after pcifunc is initialized. 21 + */ 22 + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) 23 + return 0; 24 + 25 + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); 26 + eth_broadcast_addr((u8 *)&req.mask.dmac); 27 + req.hdr.pcifunc = 0; /* AF is requester */ 28 + req.vf = pcifunc; 29 + req.features = BIT_ULL(NPC_DMAC); 30 + req.channel = pfvf->rx_chan_base; 31 + req.chan_mask = chan_mask; 32 + req.intf = pfvf->nix_rx_intf; 33 + req.op = NIX_RX_ACTION_DEFAULT; 34 + req.default_rule = 1; 35 + 36 + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); 37 + } 38 + 39 + static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) 40 + { 41 + struct npc_install_flow_req req = { 0 }; 42 + struct npc_install_flow_rsp rsp = { 0 }; 43 + struct rvu_pfvf *pfvf; 44 + u8 lbkid; 45 + 46 + pfvf = rvu_get_pfvf(rvu, pcifunc); 47 + /* If the pcifunc is not initialized then nothing to do. 48 + * This same function will be called again via rvu_switch_update_rules 49 + * after pcifunc is initialized. 50 + */ 51 + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) 52 + return 0; 53 + 54 + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; 55 + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); 56 + eth_broadcast_addr((u8 *)&req.mask.dmac); 57 + req.hdr.pcifunc = 0; /* AF is requester */ 58 + req.vf = pcifunc; 59 + req.entry = entry; 60 + req.features = BIT_ULL(NPC_DMAC); 61 + req.intf = pfvf->nix_tx_intf; 62 + req.op = NIX_TX_ACTIONOP_UCAST_CHAN; 63 + req.index = (lbkid << 8) | RVU_SWITCH_LBK_CHAN; 64 + req.set_cntr = 1; 65 + 66 + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); 67 + } 68 + 69 + static int rvu_switch_install_rules(struct rvu *rvu) 70 + { 71 + struct rvu_switch *rswitch = &rvu->rswitch; 72 + u16 start = rswitch->start_entry; 73 + struct rvu_hwinfo *hw = rvu->hw; 74 + int pf, vf, numvfs, hwvf; 75 + u16 pcifunc, entry = 0; 76 + int err; 77 + 78 + for (pf = 1; pf < hw->total_pfs; pf++) { 79 + if (!is_pf_cgxmapped(rvu, pf)) 80 + continue; 81 + 82 + pcifunc = pf << 10; 83 + /* rvu_get_nix_blkaddr sets up the corresponding NIX block 84 + * address and NIX RX and TX interfaces for a pcifunc. 85 + * Generally it is called during attach call of a pcifunc but it 86 + * is called here since we are pre-installing rules before 87 + * nixlfs are attached 88 + */ 89 + rvu_get_nix_blkaddr(rvu, pcifunc); 90 + 91 + /* MCAM RX rule for a PF/VF already exists as default unicast 92 + * rules installed by AF. Hence change the channel in those 93 + * rules to ignore channel so that packets with the required 94 + * DMAC received from LBK(by other PF/VFs in system) or from 95 + * external world (from wire) are accepted. 96 + */ 97 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); 98 + if (err) { 99 + dev_err(rvu->dev, "RX rule for PF%d failed(%d)\n", 100 + pf, err); 101 + return err; 102 + } 103 + 104 + err = rvu_switch_install_tx_rule(rvu, pcifunc, start + entry); 105 + if (err) { 106 + dev_err(rvu->dev, "TX rule for PF%d failed(%d)\n", 107 + pf, err); 108 + return err; 109 + } 110 + 111 + rswitch->entry2pcifunc[entry++] = pcifunc; 112 + 113 + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); 114 + for (vf = 0; vf < numvfs; vf++, hwvf++) { 115 + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); 116 + rvu_get_nix_blkaddr(rvu, pcifunc); 117 + 118 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); 119 + if (err) { 120 + dev_err(rvu->dev, 121 + "RX rule for PF%dVF%d failed(%d)\n", 122 + pf, vf, err); 123 + return err; 124 + } 125 + 126 + err = rvu_switch_install_tx_rule(rvu, pcifunc, 127 + start + entry); 128 + if (err) { 129 + dev_err(rvu->dev, 130 + "TX rule for PF%dVF%d failed(%d)\n", 131 + pf, vf, err); 132 + return err; 133 + } 134 + 135 + rswitch->entry2pcifunc[entry++] = pcifunc; 136 + } 137 + } 138 + 139 + return 0; 140 + } 141 + 142 + void rvu_switch_enable(struct rvu *rvu) 143 + { 144 + struct npc_mcam_alloc_entry_req alloc_req = { 0 }; 145 + struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 }; 146 + struct npc_delete_flow_req uninstall_req = { 0 }; 147 + struct npc_mcam_free_entry_req free_req = { 0 }; 148 + struct rvu_switch *rswitch = &rvu->rswitch; 149 + struct msg_rsp rsp; 150 + int ret; 151 + 152 + alloc_req.contig = true; 153 + alloc_req.count = rvu->cgx_mapped_pfs + rvu->cgx_mapped_vfs; 154 + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req, 155 + &alloc_rsp); 156 + if (ret) { 157 + dev_err(rvu->dev, 158 + "Unable to allocate MCAM entries\n"); 159 + goto exit; 160 + } 161 + 162 + if (alloc_rsp.count != alloc_req.count) { 163 + dev_err(rvu->dev, 164 + "Unable to allocate %d MCAM entries, got %d\n", 165 + alloc_req.count, alloc_rsp.count); 166 + goto free_entries; 167 + } 168 + 169 + rswitch->entry2pcifunc = kcalloc(alloc_req.count, sizeof(u16), 170 + GFP_KERNEL); 171 + if (!rswitch->entry2pcifunc) 172 + goto free_entries; 173 + 174 + rswitch->used_entries = alloc_rsp.count; 175 + rswitch->start_entry = alloc_rsp.entry; 176 + 177 + ret = rvu_switch_install_rules(rvu); 178 + if (ret) 179 + goto uninstall_rules; 180 + 181 + return; 182 + 183 + uninstall_rules: 184 + uninstall_req.start = rswitch->start_entry; 185 + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; 186 + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); 187 + kfree(rswitch->entry2pcifunc); 188 + free_entries: 189 + free_req.all = 1; 190 + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); 191 + exit: 192 + return; 193 + } 194 + 195 + void rvu_switch_disable(struct rvu *rvu) 196 + { 197 + struct npc_delete_flow_req uninstall_req = { 0 }; 198 + struct npc_mcam_free_entry_req free_req = { 0 }; 199 + struct rvu_switch *rswitch = &rvu->rswitch; 200 + struct rvu_hwinfo *hw = rvu->hw; 201 + int pf, vf, numvfs, hwvf; 202 + struct msg_rsp rsp; 203 + u16 pcifunc; 204 + int err; 205 + 206 + if (!rswitch->used_entries) 207 + return; 208 + 209 + for (pf = 1; pf < hw->total_pfs; pf++) { 210 + if (!is_pf_cgxmapped(rvu, pf)) 211 + continue; 212 + 213 + pcifunc = pf << 10; 214 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); 215 + if (err) 216 + dev_err(rvu->dev, 217 + "Reverting RX rule for PF%d failed(%d)\n", 218 + pf, err); 219 + 220 + for (vf = 0; vf < numvfs; vf++, hwvf++) { 221 + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); 222 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); 223 + if (err) 224 + dev_err(rvu->dev, 225 + "Reverting RX rule for PF%dVF%d failed(%d)\n", 226 + pf, vf, err); 227 + } 228 + } 229 + 230 + uninstall_req.start = rswitch->start_entry; 231 + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; 232 + free_req.all = 1; 233 + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); 234 + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); 235 + rswitch->used_entries = 0; 236 + kfree(rswitch->entry2pcifunc); 237 + } 238 + 239 + void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc) 240 + { 241 + struct rvu_switch *rswitch = &rvu->rswitch; 242 + u32 max = rswitch->used_entries; 243 + u16 entry; 244 + 245 + if (!rswitch->used_entries) 246 + return; 247 + 248 + for (entry = 0; entry < max; entry++) { 249 + if (rswitch->entry2pcifunc[entry] == pcifunc) 250 + break; 251 + } 252 + 253 + if (entry >= max) 254 + return; 255 + 256 + rvu_switch_install_tx_rule(rvu, pcifunc, rswitch->start_entry + entry); 257 + rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); 258 + }
+1
drivers/net/ethernet/microchip/sparx5/Kconfig
··· 3 3 depends on NET_SWITCHDEV 4 4 depends on HAS_IOMEM 5 5 depends on OF 6 + depends on ARCH_SPARX5 || COMPILE_TEST 6 7 select PHYLINK 7 8 select PHY_SPARX5_SERDES 8 9 select RESET_CONTROLLER
+2 -1
drivers/net/ethernet/realtek/r8169_main.c
··· 5084 5084 new_bus->priv = tp; 5085 5085 new_bus->parent = &pdev->dev; 5086 5086 new_bus->irq[0] = PHY_MAC_INTERRUPT; 5087 - snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev)); 5087 + snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", 5088 + pci_domain_nr(pdev->bus), pci_dev_id(pdev)); 5088 5089 5089 5090 new_bus->read = r8169_mdio_read_reg; 5090 5091 new_bus->write = r8169_mdio_write_reg;
+1 -1
drivers/net/ethernet/renesas/ravb.h
··· 864 864 865 865 /* The Ethernet AVB descriptor definitions. */ 866 866 struct ravb_desc { 867 - __le16 ds; /* Descriptor size */ 867 + __le16 ds; /* Descriptor size */ 868 868 u8 cc; /* Content control MSBs (reserved) */ 869 869 u8 die_dt; /* Descriptor interrupt enable and type */ 870 870 __le32 dptr; /* Descriptor pointer */
+1 -1
drivers/net/ethernet/renesas/ravb_main.c
··· 920 920 if (ravb_rx(ndev, &quota, q)) 921 921 goto out; 922 922 923 - /* Processing RX Descriptor Ring */ 923 + /* Processing TX Descriptor Ring */ 924 924 spin_lock_irqsave(&priv->lock, flags); 925 925 /* Clear TX interrupt */ 926 926 ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
+2
drivers/net/ethernet/xscale/ptp_ixp46x.c
··· 14 14 #include <linux/kernel.h> 15 15 #include <linux/ptp_clock_kernel.h> 16 16 #include <linux/soc/ixp4xx/cpu.h> 17 + #include <linux/module.h> 18 + #include <mach/ixp4xx-regs.h> 17 19 18 20 #include "ixp46x_ts.h" 19 21
+23 -10
drivers/net/usb/hso.c
··· 2495 2495 hso_net_init); 2496 2496 if (!net) { 2497 2497 dev_err(&interface->dev, "Unable to create ethernet device\n"); 2498 - goto exit; 2498 + goto err_hso_dev; 2499 2499 } 2500 2500 2501 2501 hso_net = netdev_priv(net); ··· 2508 2508 USB_DIR_IN); 2509 2509 if (!hso_net->in_endp) { 2510 2510 dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); 2511 - goto exit; 2511 + goto err_net; 2512 2512 } 2513 2513 hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, 2514 2514 USB_DIR_OUT); 2515 2515 if (!hso_net->out_endp) { 2516 2516 dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); 2517 - goto exit; 2517 + goto err_net; 2518 2518 } 2519 2519 SET_NETDEV_DEV(net, &interface->dev); 2520 2520 SET_NETDEV_DEVTYPE(net, &hso_type); ··· 2523 2523 for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { 2524 2524 hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); 2525 2525 if (!hso_net->mux_bulk_rx_urb_pool[i]) 2526 - goto exit; 2526 + goto err_mux_bulk_rx; 2527 2527 hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, 2528 2528 GFP_KERNEL); 2529 2529 if (!hso_net->mux_bulk_rx_buf_pool[i]) 2530 - goto exit; 2530 + goto err_mux_bulk_rx; 2531 2531 } 2532 2532 hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); 2533 2533 if (!hso_net->mux_bulk_tx_urb) 2534 - goto exit; 2534 + goto err_mux_bulk_rx; 2535 2535 hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); 2536 2536 if (!hso_net->mux_bulk_tx_buf) 2537 - goto exit; 2537 + goto err_free_tx_urb; 2538 2538 2539 2539 add_net_device(hso_dev); 2540 2540 ··· 2542 2542 result = register_netdev(net); 2543 2543 if (result) { 2544 2544 dev_err(&interface->dev, "Failed to register device\n"); 2545 - goto exit; 2545 + goto err_free_tx_buf; 2546 2546 } 2547 2547 2548 2548 hso_log_port(hso_dev); ··· 2550 2550 hso_create_rfkill(hso_dev, interface); 2551 2551 2552 2552 return hso_dev; 2553 - exit: 2554 - hso_free_net_device(hso_dev, true); 2553 + 2554 + err_free_tx_buf: 2555 + remove_net_device(hso_dev); 2556 + kfree(hso_net->mux_bulk_tx_buf); 2557 + err_free_tx_urb: 2558 + usb_free_urb(hso_net->mux_bulk_tx_urb); 2559 + err_mux_bulk_rx: 2560 + for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { 2561 + usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]); 2562 + kfree(hso_net->mux_bulk_rx_buf_pool[i]); 2563 + } 2564 + err_net: 2565 + free_netdev(net); 2566 + err_hso_dev: 2567 + kfree(hso_dev); 2555 2568 return NULL; 2556 2569 } 2557 2570
+21 -11
drivers/net/usb/r8152.c
··· 1552 1552 rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, 1553 1553 u32 advertising); 1554 1554 1555 - static int rtl8152_set_mac_address(struct net_device *netdev, void *p) 1555 + static int __rtl8152_set_mac_address(struct net_device *netdev, void *p, 1556 + bool in_resume) 1556 1557 { 1557 1558 struct r8152 *tp = netdev_priv(netdev); 1558 1559 struct sockaddr *addr = p; ··· 1562 1561 if (!is_valid_ether_addr(addr->sa_data)) 1563 1562 goto out1; 1564 1563 1565 - ret = usb_autopm_get_interface(tp->intf); 1566 - if (ret < 0) 1567 - goto out1; 1564 + if (!in_resume) { 1565 + ret = usb_autopm_get_interface(tp->intf); 1566 + if (ret < 0) 1567 + goto out1; 1568 + } 1568 1569 1569 1570 mutex_lock(&tp->control); 1570 1571 ··· 1578 1575 1579 1576 mutex_unlock(&tp->control); 1580 1577 1581 - usb_autopm_put_interface(tp->intf); 1578 + if (!in_resume) 1579 + usb_autopm_put_interface(tp->intf); 1582 1580 out1: 1583 1581 return ret; 1582 + } 1583 + 1584 + static int rtl8152_set_mac_address(struct net_device *netdev, void *p) 1585 + { 1586 + return __rtl8152_set_mac_address(netdev, p, false); 1584 1587 } 1585 1588 1586 1589 /* Devices containing proper chips can support a persistent ··· 1707 1698 return ret; 1708 1699 } 1709 1700 1710 - static int set_ethernet_addr(struct r8152 *tp) 1701 + static int set_ethernet_addr(struct r8152 *tp, bool in_resume) 1711 1702 { 1712 1703 struct net_device *dev = tp->netdev; 1713 1704 struct sockaddr sa; ··· 1720 1711 if (tp->version == RTL_VER_01) 1721 1712 ether_addr_copy(dev->dev_addr, sa.sa_data); 1722 1713 else 1723 - ret = rtl8152_set_mac_address(dev, &sa); 1714 + ret = __rtl8152_set_mac_address(dev, &sa, in_resume); 1724 1715 1725 1716 return ret; 1726 1717 } ··· 6772 6763 tp->rtl_ops.down(tp); 6773 6764 6774 6765 mutex_unlock(&tp->control); 6775 - 6776 - usb_autopm_put_interface(tp->intf); 6777 6766 } 6767 + 6768 + if (!res) 6769 + usb_autopm_put_interface(tp->intf); 6778 6770 6779 6771 free_all_mem(tp); 6780 6772 ··· 8453 8443 clear_bit(SELECTIVE_SUSPEND, &tp->flags); 8454 8444 tp->rtl_ops.init(tp); 8455 8445 queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); 8456 - set_ethernet_addr(tp); 8446 + set_ethernet_addr(tp, true); 8457 8447 return rtl8152_resume(intf); 8458 8448 } 8459 8449 ··· 9654 9644 tp->rtl_fw.retry = true; 9655 9645 #endif 9656 9646 queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); 9657 - set_ethernet_addr(tp); 9647 + set_ethernet_addr(tp, false); 9658 9648 9659 9649 usb_set_intfdata(intf, tp); 9660 9650
-1
include/net/tcp.h
··· 1709 1709 struct rcu_head rcu; 1710 1710 }; 1711 1711 1712 - extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; 1713 1712 void tcp_fastopen_active_disable(struct sock *sk); 1714 1713 bool tcp_fastopen_active_should_disable(struct sock *sk); 1715 1714 void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
+1 -1
include/trace/events/net.h
··· 136 136 __assign_str(name, skb->dev->name); 137 137 ), 138 138 139 - TP_printk("dev=%s skbaddr=%p len=%u", 139 + TP_printk("dev=%s skbaddr=%px len=%u", 140 140 __get_str(name), __entry->skbaddr, __entry->len) 141 141 ) 142 142
+27 -1
include/trace/events/qdisc.h
··· 41 41 __entry->txq_state = txq->state; 42 42 ), 43 43 44 - TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", 44 + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px", 45 45 __entry->ifindex, __entry->handle, __entry->parent, 46 46 __entry->txq_state, __entry->packets, __entry->skbaddr ) 47 + ); 48 + 49 + TRACE_EVENT(qdisc_enqueue, 50 + 51 + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), 52 + 53 + TP_ARGS(qdisc, txq, skb), 54 + 55 + TP_STRUCT__entry( 56 + __field(struct Qdisc *, qdisc) 57 + __field(void *, skbaddr) 58 + __field(int, ifindex) 59 + __field(u32, handle) 60 + __field(u32, parent) 61 + ), 62 + 63 + TP_fast_assign( 64 + __entry->qdisc = qdisc; 65 + __entry->skbaddr = skb; 66 + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; 67 + __entry->handle = qdisc->handle; 68 + __entry->parent = qdisc->parent; 69 + ), 70 + 71 + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", 72 + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) 47 73 ); 48 74 49 75 TRACE_EVENT(qdisc_reset,
+2
kernel/bpf/verifier.c
··· 3677 3677 if (tail_call_reachable) 3678 3678 for (j = 0; j < frame; j++) 3679 3679 subprog[ret_prog[j]].tail_call_reachable = true; 3680 + if (subprog[0].tail_call_reachable) 3681 + env->prog->aux->tail_call_reachable = true; 3680 3682 3681 3683 /* end of for() loop means the last insn of the 'subprog' 3682 3684 * was reached. Doesn't matter whether it was JA or EXIT
+3
net/bpf/test_run.c
··· 701 701 void *data; 702 702 int ret; 703 703 704 + if (prog->expected_attach_type == BPF_XDP_DEVMAP || 705 + prog->expected_attach_type == BPF_XDP_CPUMAP) 706 + return -EINVAL; 704 707 if (kattr->test.ctx_in || kattr->test.ctx_out) 705 708 return -EINVAL; 706 709
+1 -1
net/bridge/br_fdb.c
··· 780 780 struct net_device *dst_dev; 781 781 782 782 dst_dev = dst ? dst->dev : br->dev; 783 - if (dst_dev != br_dev && dst_dev != dev) 783 + if (dst_dev && dst_dev != dev) 784 784 continue; 785 785 786 786 err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);
+2 -1
net/caif/caif_socket.c
··· 539 539 goto err; 540 540 541 541 ret = -EINVAL; 542 - if (unlikely(msg->msg_iter.iov->iov_base == NULL)) 542 + if (unlikely(msg->msg_iter.nr_segs == 0) || 543 + unlikely(msg->msg_iter.iov->iov_base == NULL)) 543 544 goto err; 544 545 noblock = msg->msg_flags & MSG_DONTWAIT; 545 546
+26 -8
net/core/dev.c
··· 131 131 #include <trace/events/napi.h> 132 132 #include <trace/events/net.h> 133 133 #include <trace/events/skb.h> 134 + #include <trace/events/qdisc.h> 134 135 #include <linux/inetdevice.h> 135 136 #include <linux/cpu_rmap.h> 136 137 #include <linux/static_key.h> ··· 3845 3844 } 3846 3845 } 3847 3846 3847 + static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, 3848 + struct sk_buff **to_free, 3849 + struct netdev_queue *txq) 3850 + { 3851 + int rc; 3852 + 3853 + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; 3854 + if (rc == NET_XMIT_SUCCESS) 3855 + trace_qdisc_enqueue(q, txq, skb); 3856 + return rc; 3857 + } 3858 + 3848 3859 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 3849 3860 struct net_device *dev, 3850 3861 struct netdev_queue *txq) ··· 3875 3862 * of q->seqlock to protect from racing with requeuing. 3876 3863 */ 3877 3864 if (unlikely(!nolock_qdisc_is_empty(q))) { 3878 - rc = q->enqueue(skb, q, &to_free) & 3879 - NET_XMIT_MASK; 3865 + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); 3880 3866 __qdisc_run(q); 3881 3867 qdisc_run_end(q); 3882 3868 ··· 3891 3879 return NET_XMIT_SUCCESS; 3892 3880 } 3893 3881 3894 - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3882 + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); 3895 3883 qdisc_run(q); 3896 3884 3897 3885 no_lock_out: ··· 3935 3923 qdisc_run_end(q); 3936 3924 rc = NET_XMIT_SUCCESS; 3937 3925 } else { 3938 - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3926 + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); 3939 3927 if (qdisc_run_begin(q)) { 3940 3928 if (unlikely(contended)) { 3941 3929 spin_unlock(&q->busylock); ··· 9712 9700 struct net_device *dev; 9713 9701 int err, fd; 9714 9702 9703 + rtnl_lock(); 9715 9704 dev = dev_get_by_index(net, attr->link_create.target_ifindex); 9716 - if (!dev) 9705 + if (!dev) { 9706 + rtnl_unlock(); 9717 9707 return -EINVAL; 9708 + } 9718 9709 9719 9710 link = kzalloc(sizeof(*link), GFP_USER); 9720 9711 if (!link) { 9721 9712 err = -ENOMEM; 9722 - goto out_put_dev; 9713 + goto unlock; 9723 9714 } 9724 9715 9725 9716 bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); ··· 9732 9717 err = bpf_link_prime(&link->link, &link_primer); 9733 9718 if (err) { 9734 9719 kfree(link); 9735 - goto out_put_dev; 9720 + goto unlock; 9736 9721 } 9737 9722 9738 - rtnl_lock(); 9739 9723 err = dev_xdp_attach_link(dev, NULL, link); 9740 9724 rtnl_unlock(); 9741 9725 9742 9726 if (err) { 9727 + link->dev = NULL; 9743 9728 bpf_link_cleanup(&link_primer); 9744 9729 goto out_put_dev; 9745 9730 } ··· 9748 9733 /* link itself doesn't hold dev's refcnt to not complicate shutdown */ 9749 9734 dev_put(dev); 9750 9735 return fd; 9736 + 9737 + unlock: 9738 + rtnl_unlock(); 9751 9739 9752 9740 out_put_dev: 9753 9741 dev_put(dev);
+16 -2
net/core/skbuff.c
··· 663 663 if (skb->cloned && 664 664 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 665 665 &shinfo->dataref)) 666 - return; 666 + goto exit; 667 667 668 668 skb_zcopy_clear(skb, true); 669 669 ··· 674 674 kfree_skb_list(shinfo->frag_list); 675 675 676 676 skb_free_head(skb); 677 + exit: 678 + /* When we clone an SKB we copy the reycling bit. The pp_recycle 679 + * bit is only set on the head though, so in order to avoid races 680 + * while trying to recycle fragments on __skb_frag_unref() we need 681 + * to make one SKB responsible for triggering the recycle path. 682 + * So disable the recycling bit if an SKB is cloned and we have 683 + * additional references to to the fragmented part of the SKB. 684 + * Eventually the last SKB will have the recycling bit set and it's 685 + * dataref set to 0, which will trigger the recycling 686 + */ 687 + skb->pp_recycle = 0; 677 688 } 678 689 679 690 /* ··· 3022 3011 3023 3012 if (!from->head_frag || 3024 3013 skb_headlen(from) < L1_CACHE_BYTES || 3025 - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) 3014 + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { 3026 3015 hlen = skb_headlen(from); 3016 + if (!hlen) 3017 + hlen = from->len; 3018 + } 3027 3019 3028 3020 if (skb_has_frag_list(from)) 3029 3021 hlen = from->len;
+11 -5
net/core/skmsg.c
··· 508 508 if (skb_linearize(skb)) 509 509 return -EAGAIN; 510 510 num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); 511 - if (unlikely(num_sge < 0)) { 512 - kfree(msg); 511 + if (unlikely(num_sge < 0)) 513 512 return num_sge; 514 - } 515 513 516 514 copied = skb->len; 517 515 msg->sg.start = 0; ··· 528 530 { 529 531 struct sock *sk = psock->sk; 530 532 struct sk_msg *msg; 533 + int err; 531 534 532 535 /* If we are receiving on the same sock skb->sk is already assigned, 533 536 * skip memory accounting and owner transition seeing it already set ··· 547 548 * into user buffers. 548 549 */ 549 550 skb_set_owner_r(skb, sk); 550 - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 551 + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 552 + if (err < 0) 553 + kfree(msg); 554 + return err; 551 555 } 552 556 553 557 /* Puts an skb on the ingress queue of the socket already assigned to the ··· 561 559 { 562 560 struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); 563 561 struct sock *sk = psock->sk; 562 + int err; 564 563 565 564 if (unlikely(!msg)) 566 565 return -EAGAIN; 567 566 sk_msg_init(msg); 568 567 skb_set_owner_r(skb, sk); 569 - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 568 + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 569 + if (err < 0) 570 + kfree(msg); 571 + return err; 570 572 } 571 573 572 574 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+12 -15
net/decnet/af_decnet.c
··· 816 816 static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) 817 817 { 818 818 struct dn_scp *scp = DN_SK(sk); 819 - DEFINE_WAIT(wait); 819 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 820 820 int err; 821 821 822 822 if (scp->state != DN_CR) ··· 826 826 scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); 827 827 dn_send_conn_conf(sk, allocation); 828 828 829 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 829 + add_wait_queue(sk_sleep(sk), &wait); 830 830 for(;;) { 831 831 release_sock(sk); 832 832 if (scp->state == DN_CC) 833 - *timeo = schedule_timeout(*timeo); 833 + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); 834 834 lock_sock(sk); 835 835 err = 0; 836 836 if (scp->state == DN_RUN) ··· 844 844 err = -EAGAIN; 845 845 if (!*timeo) 846 846 break; 847 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 848 847 } 849 - finish_wait(sk_sleep(sk), &wait); 848 + remove_wait_queue(sk_sleep(sk), &wait); 850 849 if (err == 0) { 851 850 sk->sk_socket->state = SS_CONNECTED; 852 851 } else if (scp->state != DN_CC) { ··· 857 858 static int dn_wait_run(struct sock *sk, long *timeo) 858 859 { 859 860 struct dn_scp *scp = DN_SK(sk); 860 - DEFINE_WAIT(wait); 861 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 861 862 int err = 0; 862 863 863 864 if (scp->state == DN_RUN) ··· 866 867 if (!*timeo) 867 868 return -EALREADY; 868 869 869 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 870 + add_wait_queue(sk_sleep(sk), &wait); 870 871 for(;;) { 871 872 release_sock(sk); 872 873 if (scp->state == DN_CI || scp->state == DN_CC) 873 - *timeo = schedule_timeout(*timeo); 874 + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); 874 875 lock_sock(sk); 875 876 err = 0; 876 877 if (scp->state == DN_RUN) ··· 884 885 err = -ETIMEDOUT; 885 886 if (!*timeo) 886 887 break; 887 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 888 888 } 889 - finish_wait(sk_sleep(sk), &wait); 889 + remove_wait_queue(sk_sleep(sk), &wait); 890 890 out: 891 891 if (err == 0) { 892 892 sk->sk_socket->state = SS_CONNECTED; ··· 1030 1032 1031 1033 static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) 1032 1034 { 1033 - DEFINE_WAIT(wait); 1035 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 1034 1036 struct sk_buff *skb = NULL; 1035 1037 int err = 0; 1036 1038 1037 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1039 + add_wait_queue(sk_sleep(sk), &wait); 1038 1040 for(;;) { 1039 1041 release_sock(sk); 1040 1042 skb = skb_dequeue(&sk->sk_receive_queue); 1041 1043 if (skb == NULL) { 1042 - *timeo = schedule_timeout(*timeo); 1044 + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); 1043 1045 skb = skb_dequeue(&sk->sk_receive_queue); 1044 1046 } 1045 1047 lock_sock(sk); ··· 1054 1056 err = -EAGAIN; 1055 1057 if (!*timeo) 1056 1058 break; 1057 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1058 1059 } 1059 - finish_wait(sk_sleep(sk), &wait); 1060 + remove_wait_queue(sk_sleep(sk), &wait); 1060 1061 1061 1062 return skb == NULL ? ERR_PTR(err) : skb; 1062 1063 }
+9 -5
net/dsa/slave.c
··· 1808 1808 struct dsa_slave_priv *p = netdev_priv(slave); 1809 1809 const struct dsa_port *cpu_dp = dp->cpu_dp; 1810 1810 struct net_device *master = cpu_dp->master; 1811 + const struct dsa_switch *ds = dp->ds; 1811 1812 1812 1813 slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; 1813 1814 slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; ··· 1820 1819 slave->needed_tailroom += master->needed_tailroom; 1821 1820 1822 1821 p->xmit = cpu_dp->tag_ops->xmit; 1822 + 1823 + slave->features = master->vlan_features | NETIF_F_HW_TC; 1824 + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) 1825 + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 1826 + slave->hw_features |= NETIF_F_HW_TC; 1827 + slave->features |= NETIF_F_LLTX; 1828 + if (slave->needed_tailroom) 1829 + slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); 1823 1830 } 1824 1831 1825 1832 static struct lock_class_key dsa_slave_netdev_xmit_lock_key; ··· 1890 1881 if (slave_dev == NULL) 1891 1882 return -ENOMEM; 1892 1883 1893 - slave_dev->features = master->vlan_features | NETIF_F_HW_TC; 1894 - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) 1895 - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 1896 - slave_dev->hw_features |= NETIF_F_HW_TC; 1897 - slave_dev->features |= NETIF_F_LLTX; 1898 1884 slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; 1899 1885 if (!is_zero_ether_addr(port->mac)) 1900 1886 ether_addr_copy(slave_dev->dev_addr, port->mac);
+9
net/dsa/tag_ksz.c
··· 53 53 u8 *tag; 54 54 u8 *addr; 55 55 56 + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) 57 + return NULL; 58 + 56 59 /* Tag encoding */ 57 60 tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); 58 61 addr = skb_mac_header(skb); ··· 117 114 u8 *addr; 118 115 u16 val; 119 116 117 + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) 118 + return NULL; 119 + 120 120 /* Tag encoding */ 121 121 tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); 122 122 addr = skb_mac_header(skb); ··· 169 163 struct dsa_port *dp = dsa_slave_to_port(dev); 170 164 u8 *addr; 171 165 u8 *tag; 166 + 167 + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) 168 + return NULL; 172 169 173 170 /* Tag encoding */ 174 171 tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
+1 -1
net/ipv4/tcp_bpf.c
··· 503 503 tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot); 504 504 return 0; 505 505 } 506 - core_initcall(tcp_bpf_v4_build_proto); 506 + late_initcall(tcp_bpf_v4_build_proto); 507 507 508 508 static int tcp_bpf_assert_proto_ops(struct proto *ops) 509 509 {
+24 -4
net/ipv4/tcp_fastopen.c
··· 507 507 { 508 508 struct net *net = sock_net(sk); 509 509 510 + if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) 511 + return; 512 + 513 + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ 514 + WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); 515 + 516 + /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). 517 + * We want net->ipv4.tfo_active_disable_stamp to be updated first. 518 + */ 519 + smp_mb__before_atomic(); 510 520 atomic_inc(&net->ipv4.tfo_active_disable_times); 511 - net->ipv4.tfo_active_disable_stamp = jiffies; 521 + 512 522 NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); 513 523 } 514 524 ··· 529 519 bool tcp_fastopen_active_should_disable(struct sock *sk) 530 520 { 531 521 unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; 532 - int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); 533 522 unsigned long timeout; 523 + int tfo_da_times; 534 524 int multiplier; 535 525 526 + if (!tfo_bh_timeout) 527 + return false; 528 + 529 + tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); 536 530 if (!tfo_da_times) 537 531 return false; 538 532 533 + /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ 534 + smp_rmb(); 535 + 539 536 /* Limit timeout to max: 2^6 * initial timeout */ 540 537 multiplier = 1 << min(tfo_da_times - 1, 6); 541 - timeout = multiplier * tfo_bh_timeout * HZ; 542 - if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) 538 + 539 + /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ 540 + timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + 541 + multiplier * tfo_bh_timeout * HZ; 542 + if (time_before(jiffies, timeout)) 543 543 return true; 544 544 545 545 /* Mark check bit so we can check for successful active TFO
+1 -1
net/ipv4/tcp_ipv4.c
··· 2965 2965 net->ipv4.sysctl_tcp_comp_sack_nr = 44; 2966 2966 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; 2967 2967 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); 2968 - net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; 2968 + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; 2969 2969 atomic_set(&net->ipv4.tfo_active_disable_times, 0); 2970 2970 2971 2971 /* Reno is always built in */
+19 -6
net/ipv4/udp.c
··· 645 645 const struct iphdr *iph, 646 646 struct udphdr *uh, 647 647 struct udp_table *udptable, 648 + struct sock *sk, 648 649 struct sk_buff *skb, u32 info) 649 650 { 651 + int (*lookup)(struct sock *sk, struct sk_buff *skb); 650 652 int network_offset, transport_offset; 651 - struct sock *sk; 653 + struct udp_sock *up; 652 654 653 655 network_offset = skb_network_offset(skb); 654 656 transport_offset = skb_transport_offset(skb); ··· 661 659 /* Transport header needs to point to the UDP header */ 662 660 skb_set_transport_header(skb, iph->ihl << 2); 663 661 662 + if (sk) { 663 + up = udp_sk(sk); 664 + 665 + lookup = READ_ONCE(up->encap_err_lookup); 666 + if (lookup && lookup(sk, skb)) 667 + sk = NULL; 668 + 669 + goto out; 670 + } 671 + 664 672 sk = __udp4_lib_lookup(net, iph->daddr, uh->source, 665 673 iph->saddr, uh->dest, skb->dev->ifindex, 0, 666 674 udptable, NULL); 667 675 if (sk) { 668 - int (*lookup)(struct sock *sk, struct sk_buff *skb); 669 - struct udp_sock *up = udp_sk(sk); 676 + up = udp_sk(sk); 670 677 671 678 lookup = READ_ONCE(up->encap_err_lookup); 672 679 if (!lookup || lookup(sk, skb)) 673 680 sk = NULL; 674 681 } 675 682 683 + out: 676 684 if (!sk) 677 685 sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); 678 686 ··· 719 707 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 720 708 iph->saddr, uh->source, skb->dev->ifindex, 721 709 inet_sdif(skb), udptable, NULL); 710 + 722 711 if (!sk || udp_sk(sk)->encap_type) { 723 712 /* No socket for error: try tunnels before discarding */ 724 - sk = ERR_PTR(-ENOENT); 725 713 if (static_branch_unlikely(&udp_encap_needed_key)) { 726 - sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, 714 + sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, 727 715 info); 728 716 if (!sk) 729 717 return 0; 730 - } 718 + } else 719 + sk = ERR_PTR(-ENOENT); 731 720 732 721 if (IS_ERR(sk)) { 733 722 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+1 -1
net/ipv4/udp_bpf.c
··· 134 134 udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); 135 135 return 0; 136 136 } 137 - core_initcall(udp_bpf_v4_build_proto); 137 + late_initcall(udp_bpf_v4_build_proto); 138 138 139 139 int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) 140 140 {
+1 -1
net/ipv6/ip6_output.c
··· 74 74 75 75 if (likely(nskb)) { 76 76 if (skb->sk) 77 - skb_set_owner_w(skb, skb->sk); 77 + skb_set_owner_w(nskb, skb->sk); 78 78 consume_skb(skb); 79 79 } else { 80 80 kfree_skb(skb);
+1 -1
net/ipv6/route.c
··· 3769 3769 err = PTR_ERR(rt->fib6_metrics); 3770 3770 /* Do not leave garbage there. */ 3771 3771 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; 3772 - goto out; 3772 + goto out_free; 3773 3773 } 3774 3774 3775 3775 if (cfg->fc_flags & RTF_ADDRCONF)
+19 -6
net/ipv6/udp.c
··· 502 502 const struct ipv6hdr *hdr, int offset, 503 503 struct udphdr *uh, 504 504 struct udp_table *udptable, 505 + struct sock *sk, 505 506 struct sk_buff *skb, 506 507 struct inet6_skb_parm *opt, 507 508 u8 type, u8 code, __be32 info) 508 509 { 510 + int (*lookup)(struct sock *sk, struct sk_buff *skb); 509 511 int network_offset, transport_offset; 510 - struct sock *sk; 512 + struct udp_sock *up; 511 513 512 514 network_offset = skb_network_offset(skb); 513 515 transport_offset = skb_transport_offset(skb); ··· 520 518 /* Transport header needs to point to the UDP header */ 521 519 skb_set_transport_header(skb, offset); 522 520 521 + if (sk) { 522 + up = udp_sk(sk); 523 + 524 + lookup = READ_ONCE(up->encap_err_lookup); 525 + if (lookup && lookup(sk, skb)) 526 + sk = NULL; 527 + 528 + goto out; 529 + } 530 + 523 531 sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, 524 532 &hdr->saddr, uh->dest, 525 533 inet6_iif(skb), 0, udptable, skb); 526 534 if (sk) { 527 - int (*lookup)(struct sock *sk, struct sk_buff *skb); 528 - struct udp_sock *up = udp_sk(sk); 535 + up = udp_sk(sk); 529 536 530 537 lookup = READ_ONCE(up->encap_err_lookup); 531 538 if (!lookup || lookup(sk, skb)) 532 539 sk = NULL; 533 540 } 534 541 542 + out: 535 543 if (!sk) { 536 544 sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, 537 545 offset, info)); ··· 570 558 571 559 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, 572 560 inet6_iif(skb), inet6_sdif(skb), udptable, NULL); 561 + 573 562 if (!sk || udp_sk(sk)->encap_type) { 574 563 /* No socket for error: try tunnels before discarding */ 575 - sk = ERR_PTR(-ENOENT); 576 564 if (static_branch_unlikely(&udpv6_encap_needed_key)) { 577 565 sk = __udp6_lib_err_encap(net, hdr, offset, uh, 578 - udptable, skb, 566 + udptable, sk, skb, 579 567 opt, type, code, info); 580 568 if (!sk) 581 569 return 0; 582 - } 570 + } else 571 + sk = ERR_PTR(-ENOENT); 583 572 584 573 if (IS_ERR(sk)) { 585 574 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+11 -9
net/netrom/nr_timer.c
··· 121 121 is accepted() it isn't 'dead' so doesn't get removed. */ 122 122 if (sock_flag(sk, SOCK_DESTROY) || 123 123 (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { 124 - sock_hold(sk); 125 124 bh_unlock_sock(sk); 126 125 nr_destroy_socket(sk); 127 - sock_put(sk); 128 - return; 126 + goto out; 129 127 } 130 128 break; 131 129 ··· 144 146 145 147 nr_start_heartbeat(sk); 146 148 bh_unlock_sock(sk); 149 + out: 150 + sock_put(sk); 147 151 } 148 152 149 153 static void nr_t2timer_expiry(struct timer_list *t) ··· 159 159 nr_enquiry_response(sk); 160 160 } 161 161 bh_unlock_sock(sk); 162 + sock_put(sk); 162 163 } 163 164 164 165 static void nr_t4timer_expiry(struct timer_list *t) ··· 170 169 bh_lock_sock(sk); 171 170 nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; 172 171 bh_unlock_sock(sk); 172 + sock_put(sk); 173 173 } 174 174 175 175 static void nr_idletimer_expiry(struct timer_list *t) ··· 199 197 sock_set_flag(sk, SOCK_DEAD); 200 198 } 201 199 bh_unlock_sock(sk); 200 + sock_put(sk); 202 201 } 203 202 204 203 static void nr_t1timer_expiry(struct timer_list *t) ··· 212 209 case NR_STATE_1: 213 210 if (nr->n2count == nr->n2) { 214 211 nr_disconnect(sk, ETIMEDOUT); 215 - bh_unlock_sock(sk); 216 - return; 212 + goto out; 217 213 } else { 218 214 nr->n2count++; 219 215 nr_write_internal(sk, NR_CONNREQ); ··· 222 220 case NR_STATE_2: 223 221 if (nr->n2count == nr->n2) { 224 222 nr_disconnect(sk, ETIMEDOUT); 225 - bh_unlock_sock(sk); 226 - return; 223 + goto out; 227 224 } else { 228 225 nr->n2count++; 229 226 nr_write_internal(sk, NR_DISCREQ); ··· 232 231 case NR_STATE_3: 233 232 if (nr->n2count == nr->n2) { 234 233 nr_disconnect(sk, ETIMEDOUT); 235 - bh_unlock_sock(sk); 236 - return; 234 + goto out; 237 235 } else { 238 236 nr->n2count++; 239 237 nr_requeue_frames(sk); ··· 241 241 } 242 242 243 243 nr_start_t1timer(sk); 244 + out: 244 245 bh_unlock_sock(sk); 246 + sock_put(sk); 245 247 }
+8 -4
net/sched/act_skbmod.c
··· 6 6 */ 7 7 8 8 #include <linux/module.h> 9 + #include <linux/if_arp.h> 9 10 #include <linux/init.h> 10 11 #include <linux/kernel.h> 11 12 #include <linux/skbuff.h> ··· 34 33 tcf_lastuse_update(&d->tcf_tm); 35 34 bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); 36 35 36 + action = READ_ONCE(d->tcf_action); 37 + if (unlikely(action == TC_ACT_SHOT)) 38 + goto drop; 39 + 40 + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) 41 + return action; 42 + 37 43 /* XXX: if you are going to edit more fields beyond ethernet header 38 44 * (example when you add IP header replacement or vlan swap) 39 45 * then MAX_EDIT_LEN needs to change appropriately 40 46 */ 41 47 err = skb_ensure_writable(skb, MAX_EDIT_LEN); 42 48 if (unlikely(err)) /* best policy is to drop on the floor */ 43 - goto drop; 44 - 45 - action = READ_ONCE(d->tcf_action); 46 - if (unlikely(action == TC_ACT_SHOT)) 47 49 goto drop; 48 50 49 51 p = rcu_dereference_bh(d->skbmod_p);
+1 -1
net/sched/cls_api.c
··· 2904 2904 break; 2905 2905 case RTM_GETCHAIN: 2906 2906 err = tc_chain_notify(chain, skb, n->nlmsg_seq, 2907 - n->nlmsg_seq, n->nlmsg_type, true); 2907 + n->nlmsg_flags, n->nlmsg_type, true); 2908 2908 if (err < 0) 2909 2909 NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); 2910 2910 break;
+4 -1
net/sched/cls_tcindex.c
··· 278 278 TCA_TCINDEX_POLICE); 279 279 } 280 280 281 + static void tcindex_free_perfect_hash(struct tcindex_data *cp); 282 + 281 283 static void tcindex_partial_destroy_work(struct work_struct *work) 282 284 { 283 285 struct tcindex_data *p = container_of(to_rcu_work(work), ··· 287 285 rwork); 288 286 289 287 rtnl_lock(); 290 - kfree(p->perfect); 288 + if (p->perfect) 289 + tcindex_free_perfect_hash(p); 291 290 kfree(p); 292 291 rtnl_unlock(); 293 292 }
+2
net/sctp/auth.c
··· 860 860 if (replace) { 861 861 list_del_init(&shkey->key_list); 862 862 sctp_auth_shkey_release(shkey); 863 + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) 864 + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); 863 865 } 864 866 list_add(&cur_key->key_list, sh_keys); 865 867
+2 -2
net/sctp/output.c
··· 104 104 if (asoc->param_flags & SPP_PMTUD_ENABLE) 105 105 sctp_assoc_sync_pmtu(asoc); 106 106 } else if (!sctp_transport_pl_enabled(tp) && 107 - !sctp_transport_pmtu_check(tp)) { 108 - if (asoc->param_flags & SPP_PMTUD_ENABLE) 107 + asoc->param_flags & SPP_PMTUD_ENABLE) { 108 + if (!sctp_transport_pmtu_check(tp)) 109 109 sctp_assoc_sync_pmtu(asoc); 110 110 } 111 111
+4
net/sctp/socket.c
··· 4577 4577 } 4578 4578 4579 4579 if (optlen > 0) { 4580 + /* Trim it to the biggest size sctp sockopt may need if necessary */ 4581 + optlen = min_t(unsigned int, optlen, 4582 + PAGE_ALIGN(USHRT_MAX + 4583 + sizeof(__u16) * sizeof(struct sctp_reset_streams))); 4580 4584 kopt = memdup_sockptr(optval, optlen); 4581 4585 if (IS_ERR(kopt)) 4582 4586 return PTR_ERR(kopt);
+5
tools/bpf/bpftool/common.c
··· 222 222 int err = 0; 223 223 224 224 file = malloc(strlen(name) + 1); 225 + if (!file) { 226 + p_err("mem alloc failed"); 227 + return -1; 228 + } 229 + 225 230 strcpy(file, name); 226 231 dir = dirname(file); 227 232
+54 -1
tools/testing/selftests/net/nettest.c
··· 11 11 #include <sys/socket.h> 12 12 #include <sys/wait.h> 13 13 #include <linux/tcp.h> 14 + #include <linux/udp.h> 14 15 #include <arpa/inet.h> 15 16 #include <net/if.h> 16 17 #include <netinet/in.h> 18 + #include <netinet/ip.h> 17 19 #include <netdb.h> 18 20 #include <fcntl.h> 19 21 #include <libgen.h> ··· 28 26 #include <unistd.h> 29 27 #include <time.h> 30 28 #include <errno.h> 29 + 30 + #include <linux/xfrm.h> 31 + #include <linux/ipsec.h> 32 + #include <linux/pfkeyv2.h> 31 33 32 34 #ifndef IPV6_UNICAST_IF 33 35 #define IPV6_UNICAST_IF 76 ··· 120 114 struct in_addr in; 121 115 struct in6_addr in6; 122 116 } expected_raddr; 117 + 118 + /* ESP in UDP encap test */ 119 + int use_xfrm; 123 120 }; 124 121 125 122 static int server_mode; ··· 1355 1346 return 0; 1356 1347 } 1357 1348 1349 + static int config_xfrm_policy(int sd, struct sock_args *args) 1350 + { 1351 + struct xfrm_userpolicy_info policy = {}; 1352 + int type = UDP_ENCAP_ESPINUDP; 1353 + int xfrm_af = IP_XFRM_POLICY; 1354 + int level = SOL_IP; 1355 + 1356 + if (args->type != SOCK_DGRAM) { 1357 + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); 1358 + return 1; 1359 + } 1360 + 1361 + policy.action = XFRM_POLICY_ALLOW; 1362 + policy.sel.family = args->version; 1363 + if (args->version == AF_INET6) { 1364 + xfrm_af = IPV6_XFRM_POLICY; 1365 + level = SOL_IPV6; 1366 + } 1367 + 1368 + policy.dir = XFRM_POLICY_OUT; 1369 + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) 1370 + return 1; 1371 + 1372 + policy.dir = XFRM_POLICY_IN; 1373 + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) 1374 + return 1; 1375 + 1376 + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { 1377 + log_err_errno("Failed to set xfrm encap"); 1378 + return 1; 1379 + } 1380 + 1381 + return 0; 1382 + } 1383 + 1358 1384 static int lsock_init(struct sock_args *args) 1359 1385 { 1360 1386 long flags; ··· 1432 1388 1433 1389 if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) 1434 1390 log_err_errno("Failed to set close-on-exec flag"); 1391 + 1392 + if (args->use_xfrm && config_xfrm_policy(sd, args)) { 1393 + log_err_errno("Failed to set xfrm policy"); 1394 + goto err; 1395 + } 1435 1396 1436 1397 out: 1437 1398 return sd; ··· 1821 1772 return client_status; 1822 1773 } 1823 1774 1824 - #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" 1775 + #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" 1825 1776 1826 1777 static void print_usage(char *prog) 1827 1778 { ··· 1844 1795 " -D|R datagram (D) / raw (R) socket (default stream)\n" 1845 1796 " -l addr local address to bind to in server mode\n" 1846 1797 " -c addr local address to bind to in client mode\n" 1798 + " -x configure XFRM policy on socket\n" 1847 1799 "\n" 1848 1800 " -d dev bind socket to given device name\n" 1849 1801 " -I dev bind socket to given device name - server mode\n" ··· 2015 1965 break; 2016 1966 case 'q': 2017 1967 quiet = 1; 1968 + break; 1969 + case 'x': 1970 + args.use_xfrm = 1; 2018 1971 break; 2019 1972 default: 2020 1973 print_usage(argv[0]);
+206 -6
tools/testing/selftests/net/pmtu.sh
··· 118 118 # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU 119 119 # changes alone won't affect PMTU 120 120 # 121 + # - pmtu_vti4_udp_exception 122 + # Same as pmtu_vti4_exception, but using ESP-in-UDP 123 + # 124 + # - pmtu_vti4_udp_routed_exception 125 + # Set up vti tunnel on top of veth connected through routing namespace and 126 + # add xfrm states and policies with ESP-in-UDP encapsulation. Check that 127 + # route exception is not created if link layer MTU is not exceeded, then 128 + # lower MTU on second part of routed environment and check that exception 129 + # is created with the expected PMTU. 130 + # 121 131 # - pmtu_vti6_exception 122 132 # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two 123 133 # namespaces with matching endpoints. Check that route exception is 124 134 # created by exceeding link layer MTU with ping to other endpoint. Then 125 135 # decrease and increase MTU of tunnel, checking that route exception PMTU 126 136 # changes accordingly 137 + # 138 + # - pmtu_vti6_udp_exception 139 + # Same as pmtu_vti6_exception, but using ESP-in-UDP 140 + # 141 + # - pmtu_vti6_udp_routed_exception 142 + # Same as pmtu_vti6_udp_routed_exception but with routing between vti 143 + # endpoints 127 144 # 128 145 # - pmtu_vti4_default_mtu 129 146 # Set up vti4 tunnel on top of veth, in two namespaces with matching ··· 241 224 pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 242 225 pmtu_vti6_exception vti6: PMTU exceptions 0 243 226 pmtu_vti4_exception vti4: PMTU exceptions 0 227 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 228 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 229 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 230 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 244 231 pmtu_vti4_default_mtu vti4: default MTU assignment 0 245 232 pmtu_vti6_default_mtu vti6: default MTU assignment 0 246 233 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 ··· 267 246 ns_c="ip netns exec ${NS_C}" 268 247 ns_r1="ip netns exec ${NS_R1}" 269 248 ns_r2="ip netns exec ${NS_R2}" 270 - 271 249 # Addressing and routing for tests with routers: four network segments, with 272 250 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an 273 251 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). ··· 299 279 A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 300 280 B default ${prefix6}:${b_r1}::2 301 281 " 302 - 303 282 USE_NH="no" 304 283 # ns family nh id destination gateway 305 284 nexthops=" ··· 345 326 346 327 err_buf= 347 328 tcpdump_pids= 329 + nettest_pids= 348 330 349 331 err() { 350 332 err_buf="${err_buf}${1} ··· 568 548 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} 569 549 } 570 550 551 + setup_vti4routed() { 552 + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} 553 + } 554 + 555 + setup_vti6routed() { 556 + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} 557 + } 558 + 571 559 setup_vxlan_or_geneve() { 572 560 type="${1}" 573 561 a_addr="${2}" ··· 647 619 proto=${1} 648 620 veth_a_addr="${2}" 649 621 veth_b_addr="${3}" 622 + encap=${4} 650 623 651 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 652 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 624 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 625 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} 653 626 run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 654 627 run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 655 628 656 - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 657 - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 629 + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} 630 + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} 658 631 run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 659 632 run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 633 + } 634 + 635 + setup_nettest_xfrm() { 636 + which nettest >/dev/null 637 + if [ $? -ne 0 ]; then 638 + echo "'nettest' command not found; skipping tests" 639 + return 1 640 + fi 641 + 642 + [ ${1} -eq 6 ] && proto="-6" || proto="" 643 + port=${2} 644 + 645 + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & 646 + nettest_pids="${nettest_pids} $!" 647 + 648 + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & 649 + nettest_pids="${nettest_pids} $!" 660 650 } 661 651 662 652 setup_xfrm4() { ··· 683 637 684 638 setup_xfrm6() { 685 639 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} 640 + } 641 + 642 + setup_xfrm4udp() { 643 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" 644 + setup_nettest_xfrm 4 4500 645 + } 646 + 647 + setup_xfrm6udp() { 648 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" 649 + setup_nettest_xfrm 6 4500 650 + } 651 + 652 + setup_xfrm4udprouted() { 653 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" 654 + setup_nettest_xfrm 4 4500 655 + } 656 + 657 + setup_xfrm6udprouted() { 658 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" 659 + setup_nettest_xfrm 6 4500 686 660 } 687 661 688 662 setup_routing_old() { ··· 888 822 kill ${pid} 889 823 done 890 824 tcpdump_pids= 825 + 826 + for pid in ${nettest_pids}; do 827 + kill ${pid} 828 + done 829 + nettest_pids= 891 830 892 831 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do 893 832 ip netns del ${n} 2> /dev/null ··· 1501 1430 check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 1502 1431 1503 1432 return ${fail} 1433 + } 1434 + 1435 + test_pmtu_vti4_udp_exception() { 1436 + setup namespaces veth vti4 xfrm4udp || return $ksft_skip 1437 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ 1438 + "${ns_a}" vti4_a "${ns_b}" vti4_b 1439 + 1440 + veth_mtu=1500 1441 + vti_mtu=$((veth_mtu - 20)) 1442 + 1443 + # UDP SPI SN IV ICV pad length next header 1444 + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) 1445 + ping_payload=$((esp_payload_rfc4106 - 28)) 1446 + 1447 + mtu "${ns_a}" veth_a ${veth_mtu} 1448 + mtu "${ns_b}" veth_b ${veth_mtu} 1449 + mtu "${ns_a}" vti4_a ${vti_mtu} 1450 + mtu "${ns_b}" vti4_b ${vti_mtu} 1451 + 1452 + # Send DF packet without exceeding link layer MTU, check that no 1453 + # exception is created 1454 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} 1455 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1456 + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 1457 + 1458 + # Now exceed link layer MTU by one byte, check that exception is created 1459 + # with the right PMTU value 1460 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} 1461 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1462 + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" 1463 + } 1464 + 1465 + test_pmtu_vti6_udp_exception() { 1466 + setup namespaces veth vti6 xfrm6udp || return $ksft_skip 1467 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ 1468 + "${ns_a}" vti6_a "${ns_b}" vti6_b 1469 + fail=0 1470 + 1471 + # Create route exception by exceeding link layer MTU 1472 + mtu "${ns_a}" veth_a 4000 1473 + mtu "${ns_b}" veth_b 4000 1474 + mtu "${ns_a}" vti6_a 5000 1475 + mtu "${ns_b}" vti6_b 5000 1476 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} 1477 + 1478 + # Check that exception was created 1479 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1480 + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 1481 + 1482 + # Decrease tunnel MTU, check for PMTU decrease in route exception 1483 + mtu "${ns_a}" vti6_a 3000 1484 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1485 + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 1486 + 1487 + # Increase tunnel MTU, check for PMTU increase in route exception 1488 + mtu "${ns_a}" vti6_a 9000 1489 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1490 + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 1491 + 1492 + return ${fail} 1493 + } 1494 + 1495 + test_pmtu_vti4_udp_routed_exception() { 1496 + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip 1497 + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ 1498 + "${ns_a}" vti4_a "${ns_b}" vti4_b 1499 + 1500 + veth_mtu=1500 1501 + vti_mtu=$((veth_mtu - 20)) 1502 + 1503 + # UDP SPI SN IV ICV pad length next header 1504 + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) 1505 + ping_payload=$((esp_payload_rfc4106 - 28)) 1506 + 1507 + mtu "${ns_a}" veth_A-R1 ${veth_mtu} 1508 + mtu "${ns_r1}" veth_R1-A ${veth_mtu} 1509 + mtu "${ns_b}" veth_B-R1 ${veth_mtu} 1510 + mtu "${ns_r1}" veth_R1-B ${veth_mtu} 1511 + 1512 + mtu "${ns_a}" vti4_a ${vti_mtu} 1513 + mtu "${ns_b}" vti4_b ${vti_mtu} 1514 + 1515 + # Send DF packet without exceeding link layer MTU, check that no 1516 + # exception is created 1517 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} 1518 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1519 + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 1520 + 1521 + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created 1522 + # with the right PMTU value 1523 + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) 1524 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} 1525 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1526 + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" 1527 + } 1528 + 1529 + test_pmtu_vti6_udp_routed_exception() { 1530 + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip 1531 + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ 1532 + "${ns_a}" vti6_a "${ns_b}" vti6_b 1533 + 1534 + veth_mtu=1500 1535 + vti_mtu=$((veth_mtu - 40)) 1536 + 1537 + # UDP SPI SN IV ICV pad length next header 1538 + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) 1539 + ping_payload=$((esp_payload_rfc4106 - 48)) 1540 + 1541 + mtu "${ns_a}" veth_A-R1 ${veth_mtu} 1542 + mtu "${ns_r1}" veth_R1-A ${veth_mtu} 1543 + mtu "${ns_b}" veth_B-R1 ${veth_mtu} 1544 + mtu "${ns_r1}" veth_R1-B ${veth_mtu} 1545 + 1546 + # mtu "${ns_a}" vti6_a ${vti_mtu} 1547 + # mtu "${ns_b}" vti6_b ${vti_mtu} 1548 + 1549 + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} 1550 + 1551 + # Check that exception was not created 1552 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1553 + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 1554 + 1555 + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created 1556 + # with the right PMTU value 1557 + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) 1558 + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} 1559 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1560 + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" 1561 + 1504 1562 } 1505 1563 1506 1564 test_pmtu_vti4_default_mtu() {