Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

-56

Documentation/devicetree/bindings/net/imx-dwmac.txt

··· 1 - IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP. 2 - 3 - This file documents platform glue layer for IMX. 4 - Please see stmmac.txt for the other unchanged properties. 5 - 6 - The device node has following properties. 7 - 8 - Required properties: 9 - - compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer 10 - and "snps,dwmac-5.10a" to select IP version. 11 - - clocks: Must contain a phandle for each entry in clock-names. 12 - - clock-names: Should be "stmmaceth" for the host clock. 13 - Should be "pclk" for the MAC apb clock. 14 - Should be "ptp_ref" for the MAC timer clock. 15 - Should be "tx" for the MAC RGMII TX clock: 16 - Should be "mem" for EQOS MEM clock. 17 - - "mem" clock is required for imx8dxl platform. 18 - - "mem" clock is not required for imx8mp platform. 19 - - interrupt-names: Should contain a list of interrupt names corresponding to 20 - the interrupts in the interrupts property, if available. 21 - Should be "macirq" for the main MAC IRQ 22 - Should be "eth_wake_irq" for the IT which wake up system 23 - - intf_mode: Should be phandle/offset pair. The phandle to the syscon node which 24 - encompases the GPR register, and the offset of the GPR register. 25 - - required for imx8mp platform. 26 - - is optional for imx8dxl platform. 27 - 28 - Optional properties: 29 - - intf_mode: is optional for imx8dxl platform. 30 - - snps,rmii_refclk_ext: to select RMII reference clock from external. 31 - 32 - Example: 33 - eqos: ethernet@30bf0000 { 34 - compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; 35 - reg = <0x30bf0000 0x10000>; 36 - interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, 37 - <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; 38 - interrupt-names = "eth_wake_irq", "macirq"; 39 - clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, 40 - <&clk IMX8MP_CLK_QOS_ENET_ROOT>, 41 - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, 42 - <&clk IMX8MP_CLK_ENET_QOS>; 43 - clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; 44 - assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, 45 - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, 46 - <&clk IMX8MP_CLK_ENET_QOS>; 47 - assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, 48 - <&clk IMX8MP_SYS_PLL2_100M>, 49 - <&clk IMX8MP_SYS_PLL2_125M>; 50 - assigned-clock-rates = <0>, <100000000>, <125000000>; 51 - nvmem-cells = <&eth_mac0>; 52 - nvmem-cell-names = "mac-address"; 53 - nvmem_macaddr_swap; 54 - intf_mode = <&gpr 0x4>; 55 - status = "disabled"; 56 - };

+93

Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml

··· 1 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/net/nxp,dwmac-imx.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: NXP i.MX8 DWMAC glue layer Device Tree Bindings 8 + 9 + maintainers: 10 + - Joakim Zhang <qiangqing.zhang@nxp.com> 11 + 12 + # We need a select here so we don't match all nodes with 'snps,dwmac' 13 + select: 14 + properties: 15 + compatible: 16 + contains: 17 + enum: 18 + - nxp,imx8mp-dwmac-eqos 19 + - nxp,imx8dxl-dwmac-eqos 20 + required: 21 + - compatible 22 + 23 + allOf: 24 + - $ref: "snps,dwmac.yaml#" 25 + 26 + properties: 27 + compatible: 28 + oneOf: 29 + - items: 30 + - enum: 31 + - nxp,imx8mp-dwmac-eqos 32 + - nxp,imx8dxl-dwmac-eqos 33 + - const: snps,dwmac-5.10a 34 + 35 + clocks: 36 + minItems: 3 37 + maxItems: 5 38 + items: 39 + - description: MAC host clock 40 + - description: MAC apb clock 41 + - description: MAC timer clock 42 + - description: MAC RGMII TX clock 43 + - description: EQOS MEM clock 44 + 45 + clock-names: 46 + minItems: 3 47 + maxItems: 5 48 + contains: 49 + enum: 50 + - stmmaceth 51 + - pclk 52 + - ptp_ref 53 + - tx 54 + - mem 55 + 56 + intf_mode: 57 + $ref: /schemas/types.yaml#/definitions/phandle-array 58 + description: 59 + Should be phandle/offset pair. The phandle to the syscon node which 60 + encompases the GPR register, and the offset of the GPR register. 61 + 62 + snps,rmii_refclk_ext: 63 + $ref: /schemas/types.yaml#/definitions/flag 64 + description: 65 + To select RMII reference clock from external. 66 + 67 + required: 68 + - compatible 69 + - clocks 70 + - clock-names 71 + 72 + unevaluatedProperties: false 73 + 74 + examples: 75 + - | 76 + #include <dt-bindings/interrupt-controller/arm-gic.h> 77 + #include <dt-bindings/interrupt-controller/irq.h> 78 + #include <dt-bindings/clock/imx8mp-clock.h> 79 + 80 + eqos: ethernet@30bf0000 { 81 + compatible = "nxp,imx8mp-dwmac-eqos","snps,dwmac-5.10a"; 82 + reg = <0x30bf0000 0x10000>; 83 + interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, 84 + <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; 85 + interrupt-names = "macirq", "eth_wake_irq"; 86 + clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, 87 + <&clk IMX8MP_CLK_QOS_ENET_ROOT>, 88 + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, 89 + <&clk IMX8MP_CLK_ENET_QOS>; 90 + clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; 91 + phy-mode = "rgmii"; 92 + status = "disabled"; 93 + };

+3

Documentation/devicetree/bindings/net/snps,dwmac.yaml

··· 28 28 - snps,dwmac-4.00 29 29 - snps,dwmac-4.10a 30 30 - snps,dwmac-4.20a 31 + - snps,dwmac-5.10a 31 32 - snps,dwxgmac 32 33 - snps,dwxgmac-2.10 33 34 ··· 83 82 - snps,dwmac-4.00 84 83 - snps,dwmac-4.10a 85 84 - snps,dwmac-4.20a 85 + - snps,dwmac-5.10a 86 86 - snps,dwxgmac 87 87 - snps,dwxgmac-2.10 88 88 ··· 377 375 - snps,dwmac-4.00 378 376 - snps,dwmac-4.10a 379 377 - snps,dwmac-4.20a 378 + - snps,dwmac-5.10a 380 379 - snps,dwxgmac 381 380 - snps,dwxgmac-2.10 382 381 - st,spear600-gmac

+3 -3

Documentation/networking/af_xdp.rst

··· 243 243 These are the various configuration flags that can be used to control 244 244 and monitor the behavior of AF_XDP sockets. 245 245 246 - XDP_COPY and XDP_ZERO_COPY bind flags 247 - ------------------------------------- 246 + XDP_COPY and XDP_ZEROCOPY bind flags 247 + ------------------------------------ 248 248 249 249 When you bind to a socket, the kernel will first try to use zero-copy 250 250 copy. If zero-copy is not supported, it will fall back on using copy ··· 252 252 like to force a certain mode, you can use the following flags. If you 253 253 pass the XDP_COPY flag to the bind call, the kernel will force the 254 254 socket into copy mode. If it cannot use copy mode, the bind call will 255 - fail with an error. Conversely, the XDP_ZERO_COPY flag will force the 255 + fail with an error. Conversely, the XDP_ZEROCOPY flag will force the 256 256 socket into zero-copy mode or fail. 257 257 258 258 XDP_SHARED_UMEM bind flag

+1 -1

Documentation/networking/ip-sysctl.rst

··· 826 826 initial value when the blackhole issue goes away. 827 827 0 to disable the blackhole detection. 828 828 829 - By default, it is set to 1hr. 829 + By default, it is set to 0 (feature is disabled). 830 830 831 831 tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs 832 832 The list consists of a primary key and an optional backup key. The

+1

MAINTAINERS

··· 11758 11758 MEDIATEK SWITCH DRIVER 11759 11759 M: Sean Wang <sean.wang@mediatek.com> 11760 11760 M: Landen Chao <Landen.Chao@mediatek.com> 11761 + M: DENG Qingfang <dqfext@gmail.com> 11761 11762 L: netdev@vger.kernel.org 11762 11763 S: Maintained 11763 11764 F: drivers/net/dsa/mt7530.*

+3 -3

arch/arm64/boot/dts/freescale/imx8mp.dtsi

··· 821 821 eqos: ethernet@30bf0000 { 822 822 compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; 823 823 reg = <0x30bf0000 0x10000>; 824 - interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, 825 - <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; 826 - interrupt-names = "eth_wake_irq", "macirq"; 824 + interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, 825 + <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; 826 + interrupt-names = "macirq", "eth_wake_irq"; 827 827 clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, 828 828 <&clk IMX8MP_CLK_QOS_ENET_ROOT>, 829 829 <&clk IMX8MP_CLK_ENET_QOS_TIMER>,

+1 -1

arch/s390/net/bpf_jit_comp.c

··· 112 112 { 113 113 u32 r1 = reg2hex[b1]; 114 114 115 - if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) 115 + if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) 116 116 jit->seen_reg[r1] = 1; 117 117 } 118 118

+2

drivers/net/bonding/bond_main.c

··· 3450 3450 return bond_event_changename(event_bond); 3451 3451 case NETDEV_UNREGISTER: 3452 3452 bond_remove_proc_entry(event_bond); 3453 + #ifdef CONFIG_XFRM_OFFLOAD 3453 3454 xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); 3455 + #endif /* CONFIG_XFRM_OFFLOAD */ 3454 3456 break; 3455 3457 case NETDEV_REGISTER: 3456 3458 bond_create_proc_entry(event_bond);

+2

drivers/net/dsa/mt7530.c

··· 366 366 int i; 367 367 368 368 reg[1] |= vid & CVID_MASK; 369 + if (vid > 1) 370 + reg[1] |= ATA2_IVL; 369 371 reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; 370 372 reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; 371 373 /* STATIC_ENT indicate that entry is static wouldn't

+1

drivers/net/dsa/mt7530.h

··· 79 79 #define STATIC_EMP 0 80 80 #define STATIC_ENT 3 81 81 #define MT7530_ATA2 0x78 82 + #define ATA2_IVL BIT(15) 82 83 83 84 /* Register for address table write data */ 84 85 #define MT7530_ATWD 0x7c

+1 -1

drivers/net/dsa/mv88e6xxx/Kconfig

··· 12 12 config NET_DSA_MV88E6XXX_PTP 13 13 bool "PTP support for Marvell 88E6xxx" 14 14 default n 15 - depends on PTP_1588_CLOCK 15 + depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK 16 16 help 17 17 Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch 18 18 chips that support it.

+6

drivers/net/dsa/sja1105/sja1105_main.c

··· 397 397 if (dsa_is_cpu_port(ds, port)) 398 398 v->pvid = true; 399 399 list_add(&v->list, &priv->dsa_8021q_vlans); 400 + 401 + v = kmemdup(v, sizeof(*v), GFP_KERNEL); 402 + if (!v) 403 + return -ENOMEM; 404 + 405 + list_add(&v->list, &priv->bridge_vlans); 400 406 } 401 407 402 408 ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;

+62 -23

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 1671 1671 1672 1672 if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && 1673 1673 (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { 1674 - u16 vlan_proto = tpa_info->metadata >> 1675 - RX_CMP_FLAGS2_METADATA_TPID_SFT; 1674 + __be16 vlan_proto = htons(tpa_info->metadata >> 1675 + RX_CMP_FLAGS2_METADATA_TPID_SFT); 1676 1676 u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; 1677 1677 1678 - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); 1678 + if (eth_type_vlan(vlan_proto)) { 1679 + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); 1680 + } else { 1681 + dev_kfree_skb(skb); 1682 + return NULL; 1683 + } 1679 1684 } 1680 1685 1681 1686 skb_checksum_none_assert(skb); ··· 1902 1897 (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { 1903 1898 u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); 1904 1899 u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; 1905 - u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; 1900 + __be16 vlan_proto = htons(meta_data >> 1901 + RX_CMP_FLAGS2_METADATA_TPID_SFT); 1906 1902 1907 - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); 1903 + if (eth_type_vlan(vlan_proto)) { 1904 + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); 1905 + } else { 1906 + dev_kfree_skb(skb); 1907 + goto next_rx; 1908 + } 1908 1909 } 1909 1910 1910 1911 skb_checksum_none_assert(skb); ··· 7574 7563 bp->flags &= ~BNXT_FLAG_WOL_CAP; 7575 7564 if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED) 7576 7565 bp->flags |= BNXT_FLAG_WOL_CAP; 7577 - if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) 7566 + if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) { 7578 7567 __bnxt_hwrm_ptp_qcfg(bp); 7568 + } else { 7569 + kfree(bp->ptp_cfg); 7570 + bp->ptp_cfg = NULL; 7571 + } 7579 7572 } else { 7580 7573 #ifdef CONFIG_BNXT_SRIOV 7581 7574 struct bnxt_vf_info *vf = &bp->vf; ··· 10138 10123 } 10139 10124 } 10140 10125 10141 - bnxt_ptp_start(bp); 10142 10126 rc = bnxt_init_nic(bp, irq_re_init); 10143 10127 if (rc) { 10144 10128 netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); ··· 10211 10197 { 10212 10198 int rc = 0; 10213 10199 10200 + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { 10201 + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting half open\n"); 10202 + rc = -ENODEV; 10203 + goto half_open_err; 10204 + } 10205 + 10214 10206 rc = bnxt_alloc_mem(bp, false); 10215 10207 if (rc) { 10216 10208 netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); ··· 10276 10256 rc = bnxt_hwrm_if_change(bp, true); 10277 10257 if (rc) 10278 10258 return rc; 10259 + 10260 + if (bnxt_ptp_init(bp)) { 10261 + netdev_warn(dev, "PTP initialization failed.\n"); 10262 + kfree(bp->ptp_cfg); 10263 + bp->ptp_cfg = NULL; 10264 + } 10279 10265 rc = __bnxt_open_nic(bp, true, true); 10280 10266 if (rc) { 10281 10267 bnxt_hwrm_if_change(bp, false); 10268 + bnxt_ptp_clear(bp); 10282 10269 } else { 10283 10270 if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { 10284 10271 if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { ··· 10376 10349 { 10377 10350 struct bnxt *bp = netdev_priv(dev); 10378 10351 10352 + bnxt_ptp_clear(bp); 10379 10353 bnxt_hwmon_close(bp); 10380 10354 bnxt_close_nic(bp, true, true); 10381 10355 bnxt_hwrm_shutdown_link(bp); ··· 11363 11335 bnxt_clear_int_mode(bp); 11364 11336 pci_disable_device(bp->pdev); 11365 11337 } 11338 + bnxt_ptp_clear(bp); 11366 11339 __bnxt_close_nic(bp, true, false); 11367 11340 bnxt_vf_reps_free(bp); 11368 11341 bnxt_clear_int_mode(bp); ··· 11988 11959 (bp->fw_reset_max_dsecs * HZ / 10)); 11989 11960 } 11990 11961 11962 + static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) 11963 + { 11964 + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); 11965 + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { 11966 + bnxt_ulp_start(bp, rc); 11967 + bnxt_dl_health_status_update(bp, false); 11968 + } 11969 + bp->fw_reset_state = 0; 11970 + dev_close(bp->dev); 11971 + } 11972 + 11991 11973 static void bnxt_fw_reset_task(struct work_struct *work) 11992 11974 { 11993 11975 struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); 11994 - int rc; 11976 + int rc = 0; 11995 11977 11996 11978 if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { 11997 11979 netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); ··· 12032 11992 } 12033 11993 bp->fw_reset_timestamp = jiffies; 12034 11994 rtnl_lock(); 11995 + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { 11996 + bnxt_fw_reset_abort(bp, rc); 11997 + rtnl_unlock(); 11998 + return; 11999 + } 12035 12000 bnxt_fw_reset_close(bp); 12036 12001 if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { 12037 12002 bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN; ··· 12084 12039 if (val == 0xffff) { 12085 12040 if (bnxt_fw_reset_timeout(bp)) { 12086 12041 netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); 12042 + rc = -ETIMEDOUT; 12087 12043 goto fw_reset_abort; 12088 12044 } 12089 12045 bnxt_queue_fw_reset_work(bp, HZ / 1000); ··· 12094 12048 clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); 12095 12049 if (pci_enable_device(bp->pdev)) { 12096 12050 netdev_err(bp->dev, "Cannot re-enable PCI device\n"); 12051 + rc = -ENODEV; 12097 12052 goto fw_reset_abort; 12098 12053 } 12099 12054 pci_set_master(bp->pdev); ··· 12121 12074 } 12122 12075 rc = bnxt_open(bp->dev); 12123 12076 if (rc) { 12124 - netdev_err(bp->dev, "bnxt_open_nic() failed\n"); 12125 - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); 12126 - dev_close(bp->dev); 12077 + netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); 12078 + bnxt_fw_reset_abort(bp, rc); 12079 + rtnl_unlock(); 12080 + return; 12127 12081 } 12128 12082 12129 12083 bp->fw_reset_state = 0; ··· 12151 12103 netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); 12152 12104 } 12153 12105 fw_reset_abort: 12154 - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); 12155 - if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) 12156 - bnxt_dl_health_status_update(bp, false); 12157 - bp->fw_reset_state = 0; 12158 12106 rtnl_lock(); 12159 - dev_close(bp->dev); 12107 + bnxt_fw_reset_abort(bp, rc); 12160 12108 rtnl_unlock(); 12161 12109 } 12162 12110 ··· 12706 12662 if (BNXT_PF(bp)) 12707 12663 devlink_port_type_clear(&bp->dl_port); 12708 12664 12709 - bnxt_ptp_clear(bp); 12710 12665 pci_disable_pcie_error_reporting(pdev); 12711 12666 unregister_netdev(dev); 12712 12667 clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); ··· 13289 13246 rc); 13290 13247 } 13291 13248 13292 - if (bnxt_ptp_init(bp)) { 13293 - netdev_warn(dev, "PTP initialization failed.\n"); 13294 - kfree(bp->ptp_cfg); 13295 - bp->ptp_cfg = NULL; 13296 - } 13297 13249 bnxt_inv_fw_health_reg(bp); 13298 13250 bnxt_dl_register(bp); 13299 13251 ··· 13474 13436 if (netif_running(netdev)) 13475 13437 bnxt_close(netdev); 13476 13438 13477 - pci_disable_device(pdev); 13439 + if (pci_is_enabled(pdev)) 13440 + pci_disable_device(pdev); 13478 13441 bnxt_free_ctx_mem(bp); 13479 13442 kfree(bp->ctx); 13480 13443 bp->ctx = NULL;

+9 -1

drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c

··· 433 433 static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) 434 434 { 435 435 int total_ets_bw = 0; 436 + bool zero = false; 436 437 u8 max_tc = 0; 437 438 int i; 438 439 ··· 454 453 break; 455 454 case IEEE_8021QAZ_TSA_ETS: 456 455 total_ets_bw += ets->tc_tx_bw[i]; 456 + zero = zero || !ets->tc_tx_bw[i]; 457 457 break; 458 458 default: 459 459 return -ENOTSUPP; 460 460 } 461 461 } 462 - if (total_ets_bw > 100) 462 + if (total_ets_bw > 100) { 463 + netdev_warn(bp->dev, "rejecting ETS config exceeding available bandwidth\n"); 463 464 return -EINVAL; 465 + } 466 + if (zero && total_ets_bw == 100) { 467 + netdev_warn(bp->dev, "rejecting ETS config starving a TC\n"); 468 + return -EINVAL; 469 + } 464 470 465 471 if (max_tc >= bp->max_tc) 466 472 *tc = bp->max_tc;

+7 -17

drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c

··· 385 385 return 0; 386 386 } 387 387 388 - void bnxt_ptp_start(struct bnxt *bp) 389 - { 390 - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; 391 - 392 - if (!ptp) 393 - return; 394 - 395 - if (bp->flags & BNXT_FLAG_CHIP_P5) { 396 - spin_lock_bh(&ptp->ptp_lock); 397 - ptp->current_time = bnxt_refclk_read(bp, NULL); 398 - WRITE_ONCE(ptp->old_time, ptp->current_time); 399 - spin_unlock_bh(&ptp->ptp_lock); 400 - ptp_schedule_worker(ptp->ptp_clock, 0); 401 - } 402 - } 403 - 404 388 static const struct ptp_clock_info bnxt_ptp_caps = { 405 389 .owner = THIS_MODULE, 406 390 .name = "bnxt clock", ··· 434 450 bnxt_unmap_ptp_regs(bp); 435 451 return err; 436 452 } 437 - 453 + if (bp->flags & BNXT_FLAG_CHIP_P5) { 454 + spin_lock_bh(&ptp->ptp_lock); 455 + ptp->current_time = bnxt_refclk_read(bp, NULL); 456 + WRITE_ONCE(ptp->old_time, ptp->current_time); 457 + spin_unlock_bh(&ptp->ptp_lock); 458 + ptp_schedule_worker(ptp->ptp_clock, 0); 459 + } 438 460 return 0; 439 461 } 440 462

-1

drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h

··· 75 75 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); 76 76 int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); 77 77 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); 78 - void bnxt_ptp_start(struct bnxt *bp); 79 78 int bnxt_ptp_init(struct bnxt *bp); 80 79 void bnxt_ptp_clear(struct bnxt *bp); 81 80 #endif

+5 -4

drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c

··· 479 479 if (!edev) 480 480 return ERR_PTR(-ENOMEM); 481 481 edev->en_ops = &bnxt_en_ops_tbl; 482 - if (bp->flags & BNXT_FLAG_ROCEV1_CAP) 483 - edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; 484 - if (bp->flags & BNXT_FLAG_ROCEV2_CAP) 485 - edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; 486 482 edev->net = dev; 487 483 edev->pdev = bp->pdev; 488 484 edev->l2_db_size = bp->db_size; 489 485 edev->l2_db_size_nc = bp->db_size; 490 486 bp->edev = edev; 491 487 } 488 + edev->flags &= ~BNXT_EN_FLAG_ROCE_CAP; 489 + if (bp->flags & BNXT_FLAG_ROCEV1_CAP) 490 + edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; 491 + if (bp->flags & BNXT_FLAG_ROCEV2_CAP) 492 + edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; 492 493 return bp->edev; 493 494 } 494 495 EXPORT_SYMBOL(bnxt_ulp_probe);

+1 -1

drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c

··· 420 420 * bits 32:47 indicate the PVF num. 421 421 */ 422 422 for (q_no = 0; q_no < ern; q_no++) { 423 - reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; 423 + reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; 424 424 425 425 /* for VF assigned queues. */ 426 426 if (q_no < oct->sriov_info.pf_srn) {

+8 -8

drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c

··· 2770 2770 if (err) 2771 2771 return err; 2772 2772 2773 - err = dpaa2_switch_seed_bp(ethsw); 2774 - if (err) 2775 - goto err_free_dpbp; 2776 - 2777 2773 err = dpaa2_switch_alloc_rings(ethsw); 2778 2774 if (err) 2779 - goto err_drain_dpbp; 2775 + goto err_free_dpbp; 2780 2776 2781 2777 err = dpaa2_switch_setup_dpio(ethsw); 2782 2778 if (err) 2783 2779 goto err_destroy_rings; 2784 2780 2781 + err = dpaa2_switch_seed_bp(ethsw); 2782 + if (err) 2783 + goto err_deregister_dpio; 2784 + 2785 2785 err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); 2786 2786 if (err) { 2787 2787 dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err); 2788 - goto err_deregister_dpio; 2788 + goto err_drain_dpbp; 2789 2789 } 2790 2790 2791 2791 return 0; 2792 2792 2793 + err_drain_dpbp: 2794 + dpaa2_switch_drain_bp(ethsw); 2793 2795 err_deregister_dpio: 2794 2796 dpaa2_switch_free_dpio(ethsw); 2795 2797 err_destroy_rings: 2796 2798 dpaa2_switch_destroy_rings(ethsw); 2797 - err_drain_dpbp: 2798 - dpaa2_switch_drain_bp(ethsw); 2799 2799 err_free_dpbp: 2800 2800 dpaa2_switch_free_dpbp(ethsw); 2801 2801

+1

drivers/net/ethernet/freescale/fman/mac.c

+3 -3

drivers/net/ethernet/hisilicon/hip04_eth.c

··· 131 131 /* buf unit size is cache_line_size, which is 64, so the shift is 6 */ 132 132 #define PPE_BUF_SIZE_SHIFT 6 133 133 #define PPE_TX_BUF_HOLD BIT(31) 134 - #define CACHE_LINE_MASK 0x3F 134 + #define SOC_CACHE_LINE_MASK 0x3F 135 135 #else 136 136 #define PPE_CFG_QOS_VMID_GRP_SHIFT 8 137 137 #define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 ··· 531 531 #if defined(CONFIG_HI13X1_GMAC) 532 532 desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV 533 533 | TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT); 534 - desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK); 535 - desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK); 534 + desc->data_offset = (__force u32)cpu_to_be32(phys & SOC_CACHE_LINE_MASK); 535 + desc->send_addr = (__force u32)cpu_to_be32(phys & ~SOC_CACHE_LINE_MASK); 536 536 #else 537 537 desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); 538 538 desc->send_addr = (__force u32)cpu_to_be32(phys);

+5 -2

drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h

··· 98 98 u32 origin_mbx_msg; 99 99 bool received_resp; 100 100 int resp_status; 101 + u16 match_id; 101 102 u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; 102 103 }; 103 104 ··· 144 143 u8 mbx_need_resp; 145 144 u8 rsv1[1]; 146 145 u8 msg_len; 147 - u8 rsv2[3]; 146 + u8 rsv2; 147 + u16 match_id; 148 148 struct hclge_vf_to_pf_msg msg; 149 149 }; 150 150 ··· 155 153 u8 dest_vfid; 156 154 u8 rsv[3]; 157 155 u8 msg_len; 158 - u8 rsv1[3]; 156 + u8 rsv1; 157 + u16 match_id; 159 158 struct hclge_pf_to_vf_msg msg; 160 159 }; 161 160

+6 -2

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

··· 9552 9552 if (ret) 9553 9553 return ret; 9554 9554 9555 - if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) 9555 + if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) { 9556 9556 ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id, 9557 9557 !enable); 9558 - else if (!vport->vport_id) 9558 + } else if (!vport->vport_id) { 9559 + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) 9560 + enable = false; 9561 + 9559 9562 ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, 9560 9563 HCLGE_FILTER_FE_INGRESS, 9561 9564 enable, 0); 9565 + } 9562 9566 9563 9567 return ret; 9564 9568 }

+1

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c

··· 47 47 48 48 resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; 49 49 resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; 50 + resp_pf_to_vf->match_id = vf_to_pf_req->match_id; 50 51 51 52 resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; 52 53 resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code;

+10

drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c

··· 2641 2641 2642 2642 static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) 2643 2643 { 2644 + struct hnae3_handle *nic = &hdev->nic; 2645 + int ret; 2646 + 2647 + ret = hclgevf_en_hw_strip_rxvtag(nic, true); 2648 + if (ret) { 2649 + dev_err(&hdev->pdev->dev, 2650 + "failed to enable rx vlan offload, ret = %d\n", ret); 2651 + return ret; 2652 + } 2653 + 2644 2654 return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, 2645 2655 false); 2646 2656 }

+19

drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c

··· 13 13 return resp_code ? -resp_code : 0; 14 14 } 15 15 16 + #define HCLGEVF_MBX_MATCH_ID_START 1 16 17 static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) 17 18 { 18 19 /* this function should be called with mbx_resp.mbx_mutex held ··· 22 21 hdev->mbx_resp.received_resp = false; 23 22 hdev->mbx_resp.origin_mbx_msg = 0; 24 23 hdev->mbx_resp.resp_status = 0; 24 + hdev->mbx_resp.match_id++; 25 + /* Update match_id and ensure the value of match_id is not zero */ 26 + if (hdev->mbx_resp.match_id == 0) 27 + hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START; 25 28 memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE); 26 29 } 27 30 ··· 120 115 if (need_resp) { 121 116 mutex_lock(&hdev->mbx_resp.mbx_mutex); 122 117 hclgevf_reset_mbx_resp_status(hdev); 118 + req->match_id = hdev->mbx_resp.match_id; 123 119 status = hclgevf_cmd_send(&hdev->hw, &desc, 1); 124 120 if (status) { 125 121 dev_err(&hdev->pdev->dev, ··· 216 210 for (i = 0; i < HCLGE_MBX_MAX_RESP_DATA_SIZE; i++) { 217 211 resp->additional_info[i] = *temp; 218 212 temp++; 213 + } 214 + 215 + /* If match_id is not zero, it means PF support 216 + * match_id. If the match_id is right, VF get the 217 + * right response, otherwise ignore the response. 218 + * Driver will clear hdev->mbx_resp when send 219 + * next message which need response. 220 + */ 221 + if (req->match_id) { 222 + if (req->match_id == resp->match_id) 223 + resp->received_resp = true; 224 + } else { 225 + resp->received_resp = true; 219 226 } 220 227 break; 221 228 case HCLGE_MBX_LINK_STAT_CHANGE:

+1 -1

drivers/net/ethernet/ibm/ibmvnic.c

··· 1731 1731 tx_send_failed++; 1732 1732 tx_dropped++; 1733 1733 ret = NETDEV_TX_OK; 1734 - ibmvnic_tx_scrq_flush(adapter, tx_scrq); 1735 1734 goto out; 1736 1735 } 1737 1736 ··· 1752 1753 dev_kfree_skb_any(skb); 1753 1754 tx_send_failed++; 1754 1755 tx_dropped++; 1756 + ibmvnic_tx_scrq_flush(adapter, tx_scrq); 1755 1757 ret = NETDEV_TX_OK; 1756 1758 goto out; 1757 1759 }

+2 -1

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

··· 1825 1825 struct sk_buff *skb) 1826 1826 { 1827 1827 if (ring_uses_build_skb(rx_ring)) { 1828 - unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; 1828 + unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; 1829 + unsigned long offset = (unsigned long)(skb->data) & mask; 1829 1830 1830 1831 dma_sync_single_range_for_cpu(rx_ring->dev, 1831 1832 IXGBE_CB(skb)->dma,

+1 -1

drivers/net/ethernet/marvell/octeontx2/af/Makefile

··· 10 10 rvu_mbox-y := mbox.o rvu_trace.o 11 11 rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ 12 12 rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ 13 - rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o 13 + rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o

+9 -1

drivers/net/ethernet/marvell/octeontx2/af/rvu.c

··· 1314 1314 return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc); 1315 1315 } 1316 1316 1317 - static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) 1317 + int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) 1318 1318 { 1319 1319 struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); 1320 1320 int blkaddr = BLKADDR_NIX0, vf; ··· 2859 2859 if (!vfs) 2860 2860 return 0; 2861 2861 2862 + /* LBK channel number 63 is used for switching packets between 2863 + * CGX mapped VFs. Hence limit LBK pairs till 62 only. 2864 + */ 2865 + if (vfs > 62) 2866 + vfs = 62; 2867 + 2862 2868 /* Save VFs number for reference in VF interrupts handlers. 2863 2869 * Since interrupts might start arriving during SRIOV enablement 2864 2870 * ordinary API cannot be used to get number of enabled VFs. ··· 3006 3000 3007 3001 /* Initialize debugfs */ 3008 3002 rvu_dbg_init(rvu); 3003 + 3004 + mutex_init(&rvu->rswitch.switch_lock); 3009 3005 3010 3006 return 0; 3011 3007 err_dl:

+21

drivers/net/ethernet/marvell/octeontx2/af/rvu.h

··· 415 415 size_t kpus; 416 416 }; 417 417 418 + #define RVU_SWITCH_LBK_CHAN 63 419 + 420 + struct rvu_switch { 421 + struct mutex switch_lock; /* Serialize flow installation */ 422 + u32 used_entries; 423 + u16 *entry2pcifunc; 424 + u16 mode; 425 + u16 start_entry; 426 + }; 427 + 418 428 struct rvu { 419 429 void __iomem *afreg_base; 420 430 void __iomem *pfreg_base; ··· 455 445 456 446 /* CGX */ 457 447 #define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */ 448 + u16 cgx_mapped_vfs; /* maximum CGX mapped VFs */ 458 449 u8 cgx_mapped_pfs; 459 450 u8 cgx_cnt_max; /* CGX port count max */ 460 451 u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */ ··· 488 477 struct rvu_debugfs rvu_dbg; 489 478 #endif 490 479 struct rvu_devlink *rvu_dl; 480 + 481 + /* RVU switch implementation over NPC with DMAC rules */ 482 + struct rvu_switch rswitch; 491 483 }; 492 484 493 485 static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) ··· 705 691 struct nix_cn10k_aq_enq_req *aq_req, 706 692 struct nix_cn10k_aq_enq_rsp *aq_rsp, 707 693 u16 pcifunc, u8 ctype, u32 qidx); 694 + int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); 708 695 709 696 /* NPC APIs */ 710 697 int rvu_npc_init(struct rvu *rvu); ··· 783 768 static inline void rvu_dbg_init(struct rvu *rvu) {} 784 769 static inline void rvu_dbg_exit(struct rvu *rvu) {} 785 770 #endif 771 + 772 + /* RVU Switch */ 773 + void rvu_switch_enable(struct rvu *rvu); 774 + void rvu_switch_disable(struct rvu *rvu); 775 + void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc); 776 + 786 777 #endif /* RVU_H */

+3

drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c

··· 126 126 unsigned long lmac_bmap; 127 127 int size, free_pkind; 128 128 int cgx, lmac, iter; 129 + int numvfs, hwvfs; 129 130 130 131 if (!cgx_cnt_max) 131 132 return 0; ··· 167 166 pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; 168 167 rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); 169 168 rvu->cgx_mapped_pfs++; 169 + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvfs); 170 + rvu->cgx_mapped_vfs += numvfs; 170 171 pf++; 171 172 } 172 173 }

+1 -4

drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c

··· 2113 2113 int entry_acnt, entry_ecnt; 2114 2114 int cntr_acnt, cntr_ecnt; 2115 2115 2116 - /* Skip PF0 */ 2117 - if (!pcifunc) 2118 - return; 2119 2116 rvu_npc_get_mcam_entry_alloc_info(rvu, pcifunc, blkaddr, 2120 2117 &entry_acnt, &entry_ecnt); 2121 2118 rvu_npc_get_mcam_counter_alloc_info(rvu, pcifunc, blkaddr, ··· 2295 2298 static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, 2296 2299 struct rvu_npc_mcam_rule *rule) 2297 2300 { 2298 - if (rule->intf == NIX_INTF_TX) { 2301 + if (is_npc_intf_tx(rule->intf)) { 2299 2302 switch (rule->tx_action.op) { 2300 2303 case NIX_TX_ACTIONOP_DROP: 2301 2304 seq_puts(s, "\taction: Drop\n");

+41 -7

drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c

··· 1364 1364 rvu_nix_health_reporters_destroy(rvu_dl); 1365 1365 } 1366 1366 1367 + static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) 1368 + { 1369 + struct rvu_devlink *rvu_dl = devlink_priv(devlink); 1370 + struct rvu *rvu = rvu_dl->rvu; 1371 + struct rvu_switch *rswitch; 1372 + 1373 + rswitch = &rvu->rswitch; 1374 + *mode = rswitch->mode; 1375 + 1376 + return 0; 1377 + } 1378 + 1379 + static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, 1380 + struct netlink_ext_ack *extack) 1381 + { 1382 + struct rvu_devlink *rvu_dl = devlink_priv(devlink); 1383 + struct rvu *rvu = rvu_dl->rvu; 1384 + struct rvu_switch *rswitch; 1385 + 1386 + rswitch = &rvu->rswitch; 1387 + switch (mode) { 1388 + case DEVLINK_ESWITCH_MODE_LEGACY: 1389 + case DEVLINK_ESWITCH_MODE_SWITCHDEV: 1390 + if (rswitch->mode == mode) 1391 + return 0; 1392 + rswitch->mode = mode; 1393 + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) 1394 + rvu_switch_enable(rvu); 1395 + else 1396 + rvu_switch_disable(rvu); 1397 + break; 1398 + default: 1399 + return -EINVAL; 1400 + } 1401 + 1402 + return 0; 1403 + } 1404 + 1367 1405 static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, 1368 1406 struct netlink_ext_ack *extack) 1369 1407 { ··· 1410 1372 1411 1373 static const struct devlink_ops rvu_devlink_ops = { 1412 1374 .info_get = rvu_devlink_info_get, 1375 + .eswitch_mode_get = rvu_devlink_eswitch_mode_get, 1376 + .eswitch_mode_set = rvu_devlink_eswitch_mode_set, 1413 1377 }; 1414 1378 1415 1379 int rvu_register_dl(struct rvu *rvu) ··· 1420 1380 struct devlink *dl; 1421 1381 int err; 1422 1382 1423 - rvu_dl = kzalloc(sizeof(*rvu_dl), GFP_KERNEL); 1424 - if (!rvu_dl) 1425 - return -ENOMEM; 1426 - 1427 1383 dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); 1428 1384 if (!dl) { 1429 1385 dev_warn(rvu->dev, "devlink_alloc failed\n"); 1430 - kfree(rvu_dl); 1431 1386 return -ENOMEM; 1432 1387 } 1433 1388 ··· 1430 1395 if (err) { 1431 1396 dev_err(rvu->dev, "devlink register failed with error %d\n", err); 1432 1397 devlink_free(dl); 1433 - kfree(rvu_dl); 1434 1398 return err; 1435 1399 } 1436 1400 1401 + rvu_dl = devlink_priv(dl); 1437 1402 rvu_dl->dl = dl; 1438 1403 rvu_dl->rvu = rvu; 1439 1404 rvu->rvu_dl = rvu_dl; ··· 1452 1417 rvu_health_reporters_destroy(rvu); 1453 1418 devlink_unregister(dl); 1454 1419 devlink_free(dl); 1455 - kfree(rvu_dl); 1456 1420 }

+36

drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c

··· 1952 1952 pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); 1953 1953 } 1954 1954 1955 + static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, 1956 + u16 pcifunc, struct nix_txsch *txsch) 1957 + { 1958 + struct rvu_hwinfo *hw = rvu->hw; 1959 + int lbk_link_start, lbk_links; 1960 + u8 pf = rvu_get_pf(pcifunc); 1961 + int schq; 1962 + 1963 + if (!is_pf_cgxmapped(rvu, pf)) 1964 + return; 1965 + 1966 + lbk_link_start = hw->cgx_links; 1967 + 1968 + for (schq = 0; schq < txsch->schq.max; schq++) { 1969 + if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) 1970 + continue; 1971 + /* Enable all LBK links with channel 63 by default so that 1972 + * packets can be sent to LBK with a NPC TX MCAM rule 1973 + */ 1974 + lbk_links = hw->lbk_links; 1975 + while (lbk_links--) 1976 + rvu_write64(rvu, blkaddr, 1977 + NIX_AF_TL3_TL2X_LINKX_CFG(schq, 1978 + lbk_link_start + 1979 + lbk_links), 1980 + BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); 1981 + } 1982 + } 1983 + 1955 1984 int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, 1956 1985 struct nix_txschq_config *req, 1957 1986 struct msg_rsp *rsp) ··· 2068 2039 } 2069 2040 rvu_write64(rvu, blkaddr, reg, regval); 2070 2041 } 2042 + 2043 + rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, 2044 + &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); 2071 2045 2072 2046 return 0; 2073 2047 } ··· 3212 3180 if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf) 3213 3181 ether_addr_copy(pfvf->default_mac, req->mac_addr); 3214 3182 3183 + rvu_switch_update_rules(rvu, pcifunc); 3184 + 3215 3185 return 0; 3216 3186 } 3217 3187 ··· 3882 3848 3883 3849 pfvf = rvu_get_pfvf(rvu, pcifunc); 3884 3850 set_bit(NIXLF_INITIALIZED, &pfvf->flags); 3851 + 3852 + rvu_switch_update_rules(rvu, pcifunc); 3885 3853 3886 3854 return rvu_cgx_start_stop_io(rvu, pcifunc, true); 3887 3855 }

+36 -11

drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c

··· 442 442 owner = mcam->entry2pfvf_map[index]; 443 443 target_func = (entry->action >> 4) & 0xffff; 444 444 /* do nothing when target is LBK/PF or owner is not PF */ 445 - if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) || 445 + if (is_pffunc_af(owner) || is_afvf(target_func) || 446 + (owner & RVU_PFVF_FUNC_MASK) || 446 447 !(target_func & RVU_PFVF_FUNC_MASK)) 447 448 return; 448 449 ··· 469 468 { 470 469 int bank = npc_get_bank(mcam, index); 471 470 int kw = 0, actbank, actindex; 471 + u8 tx_intf_mask = ~intf & 0x3; 472 + u8 tx_intf = intf; 472 473 u64 cam0, cam1; 473 474 474 475 actbank = bank; /* Save bank id, to set action later on */ ··· 491 488 */ 492 489 for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) { 493 490 /* Interface should be set in all banks */ 491 + if (is_npc_intf_tx(intf)) { 492 + /* Last bit must be set and rest don't care 493 + * for TX interfaces 494 + */ 495 + tx_intf_mask = 0x1; 496 + tx_intf = intf & tx_intf_mask; 497 + tx_intf_mask = ~tx_intf & tx_intf_mask; 498 + } 499 + 494 500 rvu_write64(rvu, blkaddr, 495 501 NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1), 496 - intf); 502 + tx_intf); 497 503 rvu_write64(rvu, blkaddr, 498 504 NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0), 499 - ~intf & 0x3); 505 + tx_intf_mask); 500 506 501 507 /* Set the match key */ 502 508 npc_get_keyword(entry, kw, &cam0, &cam1); ··· 662 650 eth_broadcast_addr((u8 *)&req.mask.dmac); 663 651 req.features = BIT_ULL(NPC_DMAC); 664 652 req.channel = chan; 653 + req.chan_mask = 0xFFFU; 665 654 req.intf = pfvf->nix_rx_intf; 666 655 req.op = action.op; 667 656 req.hdr.pcifunc = 0; /* AF is requester */ ··· 812 799 eth_broadcast_addr((u8 *)&req.mask.dmac); 813 800 req.features = BIT_ULL(NPC_DMAC); 814 801 req.channel = chan; 802 + req.chan_mask = 0xFFFU; 815 803 req.intf = pfvf->nix_rx_intf; 816 804 req.entry = index; 817 805 req.hdr.pcifunc = 0; /* AF is requester */ ··· 1759 1745 int nixlf_count = rvu_get_nixlf_count(rvu); 1760 1746 struct npc_mcam *mcam = &rvu->hw->mcam; 1761 1747 int rsvd, err; 1748 + u16 index; 1749 + int cntr; 1762 1750 u64 cfg; 1763 1751 1764 1752 /* Actual number of MCAM entries vary by entry size */ ··· 1860 1844 sizeof(u16), GFP_KERNEL); 1861 1845 if (!mcam->entry2target_pffunc) 1862 1846 goto free_mem; 1847 + 1848 + for (index = 0; index < mcam->bmap_entries; index++) { 1849 + mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; 1850 + mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; 1851 + } 1852 + 1853 + for (cntr = 0; cntr < mcam->counters.max; cntr++) 1854 + mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP; 1863 1855 1864 1856 mutex_init(&mcam->lock); 1865 1857 ··· 2586 2562 } 2587 2563 2588 2564 /* Alloc request from PFFUNC with no NIXLF attached should be denied */ 2589 - if (!is_nixlf_attached(rvu, pcifunc)) 2565 + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) 2590 2566 return NPC_MCAM_ALLOC_DENIED; 2591 2567 2592 2568 return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp); ··· 2606 2582 return NPC_MCAM_INVALID_REQ; 2607 2583 2608 2584 /* Free request from PFFUNC with no NIXLF attached, ignore */ 2609 - if (!is_nixlf_attached(rvu, pcifunc)) 2585 + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) 2610 2586 return NPC_MCAM_INVALID_REQ; 2611 2587 2612 2588 mutex_lock(&mcam->lock); ··· 2618 2594 if (rc) 2619 2595 goto exit; 2620 2596 2621 - mcam->entry2pfvf_map[req->entry] = 0; 2597 + mcam->entry2pfvf_map[req->entry] = NPC_MCAM_INVALID_MAP; 2622 2598 mcam->entry2target_pffunc[req->entry] = 0x0; 2623 2599 npc_mcam_clear_bit(mcam, req->entry); 2624 2600 npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false); ··· 2703 2679 else 2704 2680 nix_intf = pfvf->nix_rx_intf; 2705 2681 2706 - if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { 2682 + if (!is_pffunc_af(pcifunc) && 2683 + npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { 2707 2684 rc = NPC_MCAM_INVALID_REQ; 2708 2685 goto exit; 2709 2686 } 2710 2687 2711 - if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, 2712 - pcifunc)) { 2688 + if (!is_pffunc_af(pcifunc) && 2689 + npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) { 2713 2690 rc = NPC_MCAM_INVALID_REQ; 2714 2691 goto exit; 2715 2692 } ··· 2861 2836 return NPC_MCAM_INVALID_REQ; 2862 2837 2863 2838 /* If the request is from a PFFUNC with no NIXLF attached, ignore */ 2864 - if (!is_nixlf_attached(rvu, pcifunc)) 2839 + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) 2865 2840 return NPC_MCAM_INVALID_REQ; 2866 2841 2867 2842 /* Since list of allocated counter IDs needs to be sent to requester, ··· 3106 3081 if (rc) { 3107 3082 /* Free allocated MCAM entry */ 3108 3083 mutex_lock(&mcam->lock); 3109 - mcam->entry2pfvf_map[entry] = 0; 3084 + mcam->entry2pfvf_map[entry] = NPC_MCAM_INVALID_MAP; 3110 3085 npc_mcam_clear_bit(mcam, entry); 3111 3086 mutex_unlock(&mcam->lock); 3112 3087 return rc;

+21 -8

drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c

··· 910 910 911 911 static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, 912 912 struct mcam_entry *entry, 913 - struct npc_install_flow_req *req, u16 target) 913 + struct npc_install_flow_req *req, 914 + u16 target, bool pf_set_vfs_mac) 914 915 { 916 + struct rvu_switch *rswitch = &rvu->rswitch; 915 917 struct nix_rx_action action; 916 - u64 chan_mask; 917 918 918 - chan_mask = req->chan_mask ? req->chan_mask : ~0ULL; 919 - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0, 920 - NIX_INTF_RX); 919 + if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac) 920 + req->chan_mask = 0x0; /* Do not care channel */ 921 + 922 + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, 923 + 0, NIX_INTF_RX); 921 924 922 925 *(u64 *)&action = 0x00; 923 926 action.pf_func = target; ··· 952 949 struct npc_install_flow_req *req, u16 target) 953 950 { 954 951 struct nix_tx_action action; 952 + u64 mask = ~0ULL; 953 + 954 + /* If AF is installing then do not care about 955 + * PF_FUNC in Send Descriptor 956 + */ 957 + if (is_pffunc_af(req->hdr.pcifunc)) 958 + mask = 0; 955 959 956 960 npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), 957 - 0, ~0ULL, 0, NIX_INTF_TX); 961 + 0, mask, 0, NIX_INTF_TX); 958 962 959 963 *(u64 *)&action = 0x00; 960 964 action.op = req->op; ··· 1012 1002 req->intf); 1013 1003 1014 1004 if (is_npc_intf_rx(req->intf)) 1015 - npc_update_rx_entry(rvu, pfvf, entry, req, target); 1005 + npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac); 1016 1006 else 1017 1007 npc_update_tx_entry(rvu, pfvf, entry, req, target); 1018 1008 ··· 1174 1164 if (err) 1175 1165 return err; 1176 1166 1177 - if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) 1167 + /* Skip channel validation if AF is installing */ 1168 + if (!is_pffunc_af(req->hdr.pcifunc) && 1169 + npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) 1178 1170 return -EINVAL; 1179 1171 1180 1172 pfvf = rvu_get_pfvf(rvu, target); ··· 1192 1180 eth_broadcast_addr((u8 *)&req->mask.dmac); 1193 1181 } 1194 1182 1183 + /* Proceed if NIXLF is attached or not for TX rules */ 1195 1184 err = nix_get_nixlf(rvu, target, &nixlf, NULL); 1196 1185 if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) 1197 1186 return -EINVAL;

+258

drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Marvell OcteonTx2 RVU Admin Function driver 3 + * 4 + * Copyright (C) 2021 Marvell. 5 + */ 6 + 7 + #include <linux/bitfield.h> 8 + #include "rvu.h" 9 + 10 + static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, 11 + u16 chan_mask) 12 + { 13 + struct npc_install_flow_req req = { 0 }; 14 + struct npc_install_flow_rsp rsp = { 0 }; 15 + struct rvu_pfvf *pfvf; 16 + 17 + pfvf = rvu_get_pfvf(rvu, pcifunc); 18 + /* If the pcifunc is not initialized then nothing to do. 19 + * This same function will be called again via rvu_switch_update_rules 20 + * after pcifunc is initialized. 21 + */ 22 + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) 23 + return 0; 24 + 25 + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); 26 + eth_broadcast_addr((u8 *)&req.mask.dmac); 27 + req.hdr.pcifunc = 0; /* AF is requester */ 28 + req.vf = pcifunc; 29 + req.features = BIT_ULL(NPC_DMAC); 30 + req.channel = pfvf->rx_chan_base; 31 + req.chan_mask = chan_mask; 32 + req.intf = pfvf->nix_rx_intf; 33 + req.op = NIX_RX_ACTION_DEFAULT; 34 + req.default_rule = 1; 35 + 36 + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); 37 + } 38 + 39 + static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) 40 + { 41 + struct npc_install_flow_req req = { 0 }; 42 + struct npc_install_flow_rsp rsp = { 0 }; 43 + struct rvu_pfvf *pfvf; 44 + u8 lbkid; 45 + 46 + pfvf = rvu_get_pfvf(rvu, pcifunc); 47 + /* If the pcifunc is not initialized then nothing to do. 48 + * This same function will be called again via rvu_switch_update_rules 49 + * after pcifunc is initialized. 50 + */ 51 + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) 52 + return 0; 53 + 54 + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; 55 + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); 56 + eth_broadcast_addr((u8 *)&req.mask.dmac); 57 + req.hdr.pcifunc = 0; /* AF is requester */ 58 + req.vf = pcifunc; 59 + req.entry = entry; 60 + req.features = BIT_ULL(NPC_DMAC); 61 + req.intf = pfvf->nix_tx_intf; 62 + req.op = NIX_TX_ACTIONOP_UCAST_CHAN; 63 + req.index = (lbkid << 8) | RVU_SWITCH_LBK_CHAN; 64 + req.set_cntr = 1; 65 + 66 + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); 67 + } 68 + 69 + static int rvu_switch_install_rules(struct rvu *rvu) 70 + { 71 + struct rvu_switch *rswitch = &rvu->rswitch; 72 + u16 start = rswitch->start_entry; 73 + struct rvu_hwinfo *hw = rvu->hw; 74 + int pf, vf, numvfs, hwvf; 75 + u16 pcifunc, entry = 0; 76 + int err; 77 + 78 + for (pf = 1; pf < hw->total_pfs; pf++) { 79 + if (!is_pf_cgxmapped(rvu, pf)) 80 + continue; 81 + 82 + pcifunc = pf << 10; 83 + /* rvu_get_nix_blkaddr sets up the corresponding NIX block 84 + * address and NIX RX and TX interfaces for a pcifunc. 85 + * Generally it is called during attach call of a pcifunc but it 86 + * is called here since we are pre-installing rules before 87 + * nixlfs are attached 88 + */ 89 + rvu_get_nix_blkaddr(rvu, pcifunc); 90 + 91 + /* MCAM RX rule for a PF/VF already exists as default unicast 92 + * rules installed by AF. Hence change the channel in those 93 + * rules to ignore channel so that packets with the required 94 + * DMAC received from LBK(by other PF/VFs in system) or from 95 + * external world (from wire) are accepted. 96 + */ 97 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); 98 + if (err) { 99 + dev_err(rvu->dev, "RX rule for PF%d failed(%d)\n", 100 + pf, err); 101 + return err; 102 + } 103 + 104 + err = rvu_switch_install_tx_rule(rvu, pcifunc, start + entry); 105 + if (err) { 106 + dev_err(rvu->dev, "TX rule for PF%d failed(%d)\n", 107 + pf, err); 108 + return err; 109 + } 110 + 111 + rswitch->entry2pcifunc[entry++] = pcifunc; 112 + 113 + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); 114 + for (vf = 0; vf < numvfs; vf++, hwvf++) { 115 + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); 116 + rvu_get_nix_blkaddr(rvu, pcifunc); 117 + 118 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); 119 + if (err) { 120 + dev_err(rvu->dev, 121 + "RX rule for PF%dVF%d failed(%d)\n", 122 + pf, vf, err); 123 + return err; 124 + } 125 + 126 + err = rvu_switch_install_tx_rule(rvu, pcifunc, 127 + start + entry); 128 + if (err) { 129 + dev_err(rvu->dev, 130 + "TX rule for PF%dVF%d failed(%d)\n", 131 + pf, vf, err); 132 + return err; 133 + } 134 + 135 + rswitch->entry2pcifunc[entry++] = pcifunc; 136 + } 137 + } 138 + 139 + return 0; 140 + } 141 + 142 + void rvu_switch_enable(struct rvu *rvu) 143 + { 144 + struct npc_mcam_alloc_entry_req alloc_req = { 0 }; 145 + struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 }; 146 + struct npc_delete_flow_req uninstall_req = { 0 }; 147 + struct npc_mcam_free_entry_req free_req = { 0 }; 148 + struct rvu_switch *rswitch = &rvu->rswitch; 149 + struct msg_rsp rsp; 150 + int ret; 151 + 152 + alloc_req.contig = true; 153 + alloc_req.count = rvu->cgx_mapped_pfs + rvu->cgx_mapped_vfs; 154 + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req, 155 + &alloc_rsp); 156 + if (ret) { 157 + dev_err(rvu->dev, 158 + "Unable to allocate MCAM entries\n"); 159 + goto exit; 160 + } 161 + 162 + if (alloc_rsp.count != alloc_req.count) { 163 + dev_err(rvu->dev, 164 + "Unable to allocate %d MCAM entries, got %d\n", 165 + alloc_req.count, alloc_rsp.count); 166 + goto free_entries; 167 + } 168 + 169 + rswitch->entry2pcifunc = kcalloc(alloc_req.count, sizeof(u16), 170 + GFP_KERNEL); 171 + if (!rswitch->entry2pcifunc) 172 + goto free_entries; 173 + 174 + rswitch->used_entries = alloc_rsp.count; 175 + rswitch->start_entry = alloc_rsp.entry; 176 + 177 + ret = rvu_switch_install_rules(rvu); 178 + if (ret) 179 + goto uninstall_rules; 180 + 181 + return; 182 + 183 + uninstall_rules: 184 + uninstall_req.start = rswitch->start_entry; 185 + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; 186 + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); 187 + kfree(rswitch->entry2pcifunc); 188 + free_entries: 189 + free_req.all = 1; 190 + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); 191 + exit: 192 + return; 193 + } 194 + 195 + void rvu_switch_disable(struct rvu *rvu) 196 + { 197 + struct npc_delete_flow_req uninstall_req = { 0 }; 198 + struct npc_mcam_free_entry_req free_req = { 0 }; 199 + struct rvu_switch *rswitch = &rvu->rswitch; 200 + struct rvu_hwinfo *hw = rvu->hw; 201 + int pf, vf, numvfs, hwvf; 202 + struct msg_rsp rsp; 203 + u16 pcifunc; 204 + int err; 205 + 206 + if (!rswitch->used_entries) 207 + return; 208 + 209 + for (pf = 1; pf < hw->total_pfs; pf++) { 210 + if (!is_pf_cgxmapped(rvu, pf)) 211 + continue; 212 + 213 + pcifunc = pf << 10; 214 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); 215 + if (err) 216 + dev_err(rvu->dev, 217 + "Reverting RX rule for PF%d failed(%d)\n", 218 + pf, err); 219 + 220 + for (vf = 0; vf < numvfs; vf++, hwvf++) { 221 + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); 222 + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); 223 + if (err) 224 + dev_err(rvu->dev, 225 + "Reverting RX rule for PF%dVF%d failed(%d)\n", 226 + pf, vf, err); 227 + } 228 + } 229 + 230 + uninstall_req.start = rswitch->start_entry; 231 + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; 232 + free_req.all = 1; 233 + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); 234 + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); 235 + rswitch->used_entries = 0; 236 + kfree(rswitch->entry2pcifunc); 237 + } 238 + 239 + void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc) 240 + { 241 + struct rvu_switch *rswitch = &rvu->rswitch; 242 + u32 max = rswitch->used_entries; 243 + u16 entry; 244 + 245 + if (!rswitch->used_entries) 246 + return; 247 + 248 + for (entry = 0; entry < max; entry++) { 249 + if (rswitch->entry2pcifunc[entry] == pcifunc) 250 + break; 251 + } 252 + 253 + if (entry >= max) 254 + return; 255 + 256 + rvu_switch_install_tx_rule(rvu, pcifunc, rswitch->start_entry + entry); 257 + rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); 258 + }

+1

drivers/net/ethernet/microchip/sparx5/Kconfig

··· 3 3 depends on NET_SWITCHDEV 4 4 depends on HAS_IOMEM 5 5 depends on OF 6 + depends on ARCH_SPARX5 || COMPILE_TEST 6 7 select PHYLINK 7 8 select PHY_SPARX5_SERDES 8 9 select RESET_CONTROLLER

+2 -1

drivers/net/ethernet/realtek/r8169_main.c

··· 5084 5084 new_bus->priv = tp; 5085 5085 new_bus->parent = &pdev->dev; 5086 5086 new_bus->irq[0] = PHY_MAC_INTERRUPT; 5087 - snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev)); 5087 + snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", 5088 + pci_domain_nr(pdev->bus), pci_dev_id(pdev)); 5088 5089 5089 5090 new_bus->read = r8169_mdio_read_reg; 5090 5091 new_bus->write = r8169_mdio_write_reg;

+1 -1

drivers/net/ethernet/renesas/ravb.h

··· 864 864 865 865 /* The Ethernet AVB descriptor definitions. */ 866 866 struct ravb_desc { 867 - __le16 ds; /* Descriptor size */ 867 + __le16 ds; /* Descriptor size */ 868 868 u8 cc; /* Content control MSBs (reserved) */ 869 869 u8 die_dt; /* Descriptor interrupt enable and type */ 870 870 __le32 dptr; /* Descriptor pointer */

+1 -1

drivers/net/ethernet/renesas/ravb_main.c

··· 920 920 if (ravb_rx(ndev, &quota, q)) 921 921 goto out; 922 922 923 - /* Processing RX Descriptor Ring */ 923 + /* Processing TX Descriptor Ring */ 924 924 spin_lock_irqsave(&priv->lock, flags); 925 925 /* Clear TX interrupt */ 926 926 ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);

+2

drivers/net/ethernet/xscale/ptp_ixp46x.c

··· 14 14 #include <linux/kernel.h> 15 15 #include <linux/ptp_clock_kernel.h> 16 16 #include <linux/soc/ixp4xx/cpu.h> 17 + #include <linux/module.h> 18 + #include <mach/ixp4xx-regs.h> 17 19 18 20 #include "ixp46x_ts.h" 19 21

+23 -10

drivers/net/usb/hso.c

··· 2495 2495 hso_net_init); 2496 2496 if (!net) { 2497 2497 dev_err(&interface->dev, "Unable to create ethernet device\n"); 2498 - goto exit; 2498 + goto err_hso_dev; 2499 2499 } 2500 2500 2501 2501 hso_net = netdev_priv(net); ··· 2508 2508 USB_DIR_IN); 2509 2509 if (!hso_net->in_endp) { 2510 2510 dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); 2511 - goto exit; 2511 + goto err_net; 2512 2512 } 2513 2513 hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, 2514 2514 USB_DIR_OUT); 2515 2515 if (!hso_net->out_endp) { 2516 2516 dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); 2517 - goto exit; 2517 + goto err_net; 2518 2518 } 2519 2519 SET_NETDEV_DEV(net, &interface->dev); 2520 2520 SET_NETDEV_DEVTYPE(net, &hso_type); ··· 2523 2523 for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { 2524 2524 hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); 2525 2525 if (!hso_net->mux_bulk_rx_urb_pool[i]) 2526 - goto exit; 2526 + goto err_mux_bulk_rx; 2527 2527 hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, 2528 2528 GFP_KERNEL); 2529 2529 if (!hso_net->mux_bulk_rx_buf_pool[i]) 2530 - goto exit; 2530 + goto err_mux_bulk_rx; 2531 2531 } 2532 2532 hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); 2533 2533 if (!hso_net->mux_bulk_tx_urb) 2534 - goto exit; 2534 + goto err_mux_bulk_rx; 2535 2535 hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); 2536 2536 if (!hso_net->mux_bulk_tx_buf) 2537 - goto exit; 2537 + goto err_free_tx_urb; 2538 2538 2539 2539 add_net_device(hso_dev); 2540 2540 ··· 2542 2542 result = register_netdev(net); 2543 2543 if (result) { 2544 2544 dev_err(&interface->dev, "Failed to register device\n"); 2545 - goto exit; 2545 + goto err_free_tx_buf; 2546 2546 } 2547 2547 2548 2548 hso_log_port(hso_dev); ··· 2550 2550 hso_create_rfkill(hso_dev, interface); 2551 2551 2552 2552 return hso_dev; 2553 - exit: 2554 - hso_free_net_device(hso_dev, true); 2553 + 2554 + err_free_tx_buf: 2555 + remove_net_device(hso_dev); 2556 + kfree(hso_net->mux_bulk_tx_buf); 2557 + err_free_tx_urb: 2558 + usb_free_urb(hso_net->mux_bulk_tx_urb); 2559 + err_mux_bulk_rx: 2560 + for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { 2561 + usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]); 2562 + kfree(hso_net->mux_bulk_rx_buf_pool[i]); 2563 + } 2564 + err_net: 2565 + free_netdev(net); 2566 + err_hso_dev: 2567 + kfree(hso_dev); 2555 2568 return NULL; 2556 2569 } 2557 2570

+21 -11

drivers/net/usb/r8152.c

··· 1552 1552 rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, 1553 1553 u32 advertising); 1554 1554 1555 - static int rtl8152_set_mac_address(struct net_device *netdev, void *p) 1555 + static int __rtl8152_set_mac_address(struct net_device *netdev, void *p, 1556 + bool in_resume) 1556 1557 { 1557 1558 struct r8152 *tp = netdev_priv(netdev); 1558 1559 struct sockaddr *addr = p; ··· 1562 1561 if (!is_valid_ether_addr(addr->sa_data)) 1563 1562 goto out1; 1564 1563 1565 - ret = usb_autopm_get_interface(tp->intf); 1566 - if (ret < 0) 1567 - goto out1; 1564 + if (!in_resume) { 1565 + ret = usb_autopm_get_interface(tp->intf); 1566 + if (ret < 0) 1567 + goto out1; 1568 + } 1568 1569 1569 1570 mutex_lock(&tp->control); 1570 1571 ··· 1578 1575 1579 1576 mutex_unlock(&tp->control); 1580 1577 1581 - usb_autopm_put_interface(tp->intf); 1578 + if (!in_resume) 1579 + usb_autopm_put_interface(tp->intf); 1582 1580 out1: 1583 1581 return ret; 1582 + } 1583 + 1584 + static int rtl8152_set_mac_address(struct net_device *netdev, void *p) 1585 + { 1586 + return __rtl8152_set_mac_address(netdev, p, false); 1584 1587 } 1585 1588 1586 1589 /* Devices containing proper chips can support a persistent ··· 1707 1698 return ret; 1708 1699 } 1709 1700 1710 - static int set_ethernet_addr(struct r8152 *tp) 1701 + static int set_ethernet_addr(struct r8152 *tp, bool in_resume) 1711 1702 { 1712 1703 struct net_device *dev = tp->netdev; 1713 1704 struct sockaddr sa; ··· 1720 1711 if (tp->version == RTL_VER_01) 1721 1712 ether_addr_copy(dev->dev_addr, sa.sa_data); 1722 1713 else 1723 - ret = rtl8152_set_mac_address(dev, &sa); 1714 + ret = __rtl8152_set_mac_address(dev, &sa, in_resume); 1724 1715 1725 1716 return ret; 1726 1717 } ··· 6772 6763 tp->rtl_ops.down(tp); 6773 6764 6774 6765 mutex_unlock(&tp->control); 6775 - 6776 - usb_autopm_put_interface(tp->intf); 6777 6766 } 6767 + 6768 + if (!res) 6769 + usb_autopm_put_interface(tp->intf); 6778 6770 6779 6771 free_all_mem(tp); 6780 6772 ··· 8453 8443 clear_bit(SELECTIVE_SUSPEND, &tp->flags); 8454 8444 tp->rtl_ops.init(tp); 8455 8445 queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); 8456 - set_ethernet_addr(tp); 8446 + set_ethernet_addr(tp, true); 8457 8447 return rtl8152_resume(intf); 8458 8448 } 8459 8449 ··· 9654 9644 tp->rtl_fw.retry = true; 9655 9645 #endif 9656 9646 queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); 9657 - set_ethernet_addr(tp); 9647 + set_ethernet_addr(tp, false); 9658 9648 9659 9649 usb_set_intfdata(intf, tp); 9660 9650

-1

include/net/tcp.h

··· 1709 1709 struct rcu_head rcu; 1710 1710 }; 1711 1711 1712 - extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; 1713 1712 void tcp_fastopen_active_disable(struct sock *sk); 1714 1713 bool tcp_fastopen_active_should_disable(struct sock *sk); 1715 1714 void tcp_fastopen_active_disable_ofo_check(struct sock *sk);

+1 -1

include/trace/events/net.h

··· 136 136 __assign_str(name, skb->dev->name); 137 137 ), 138 138 139 - TP_printk("dev=%s skbaddr=%p len=%u", 139 + TP_printk("dev=%s skbaddr=%px len=%u", 140 140 __get_str(name), __entry->skbaddr, __entry->len) 141 141 ) 142 142

+27 -1

include/trace/events/qdisc.h

··· 41 41 __entry->txq_state = txq->state; 42 42 ), 43 43 44 - TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", 44 + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px", 45 45 __entry->ifindex, __entry->handle, __entry->parent, 46 46 __entry->txq_state, __entry->packets, __entry->skbaddr ) 47 + ); 48 + 49 + TRACE_EVENT(qdisc_enqueue, 50 + 51 + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), 52 + 53 + TP_ARGS(qdisc, txq, skb), 54 + 55 + TP_STRUCT__entry( 56 + __field(struct Qdisc *, qdisc) 57 + __field(void *, skbaddr) 58 + __field(int, ifindex) 59 + __field(u32, handle) 60 + __field(u32, parent) 61 + ), 62 + 63 + TP_fast_assign( 64 + __entry->qdisc = qdisc; 65 + __entry->skbaddr = skb; 66 + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; 67 + __entry->handle = qdisc->handle; 68 + __entry->parent = qdisc->parent; 69 + ), 70 + 71 + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", 72 + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) 47 73 ); 48 74 49 75 TRACE_EVENT(qdisc_reset,

+2

kernel/bpf/verifier.c

··· 3677 3677 if (tail_call_reachable) 3678 3678 for (j = 0; j < frame; j++) 3679 3679 subprog[ret_prog[j]].tail_call_reachable = true; 3680 + if (subprog[0].tail_call_reachable) 3681 + env->prog->aux->tail_call_reachable = true; 3680 3682 3681 3683 /* end of for() loop means the last insn of the 'subprog' 3682 3684 * was reached. Doesn't matter whether it was JA or EXIT

+3

net/bpf/test_run.c

··· 701 701 void *data; 702 702 int ret; 703 703 704 + if (prog->expected_attach_type == BPF_XDP_DEVMAP || 705 + prog->expected_attach_type == BPF_XDP_CPUMAP) 706 + return -EINVAL; 704 707 if (kattr->test.ctx_in || kattr->test.ctx_out) 705 708 return -EINVAL; 706 709

+1 -1

net/bridge/br_fdb.c

··· 780 780 struct net_device *dst_dev; 781 781 782 782 dst_dev = dst ? dst->dev : br->dev; 783 - if (dst_dev != br_dev && dst_dev != dev) 783 + if (dst_dev && dst_dev != dev) 784 784 continue; 785 785 786 786 err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);

+2 -1

net/caif/caif_socket.c

··· 539 539 goto err; 540 540 541 541 ret = -EINVAL; 542 - if (unlikely(msg->msg_iter.iov->iov_base == NULL)) 542 + if (unlikely(msg->msg_iter.nr_segs == 0) || 543 + unlikely(msg->msg_iter.iov->iov_base == NULL)) 543 544 goto err; 544 545 noblock = msg->msg_flags & MSG_DONTWAIT; 545 546

+26 -8

net/core/dev.c

··· 131 131 #include <trace/events/napi.h> 132 132 #include <trace/events/net.h> 133 133 #include <trace/events/skb.h> 134 + #include <trace/events/qdisc.h> 134 135 #include <linux/inetdevice.h> 135 136 #include <linux/cpu_rmap.h> 136 137 #include <linux/static_key.h> ··· 3845 3844 } 3846 3845 } 3847 3846 3847 + static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, 3848 + struct sk_buff **to_free, 3849 + struct netdev_queue *txq) 3850 + { 3851 + int rc; 3852 + 3853 + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; 3854 + if (rc == NET_XMIT_SUCCESS) 3855 + trace_qdisc_enqueue(q, txq, skb); 3856 + return rc; 3857 + } 3858 + 3848 3859 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 3849 3860 struct net_device *dev, 3850 3861 struct netdev_queue *txq) ··· 3875 3862 * of q->seqlock to protect from racing with requeuing. 3876 3863 */ 3877 3864 if (unlikely(!nolock_qdisc_is_empty(q))) { 3878 - rc = q->enqueue(skb, q, &to_free) & 3879 - NET_XMIT_MASK; 3865 + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); 3880 3866 __qdisc_run(q); 3881 3867 qdisc_run_end(q); 3882 3868 ··· 3891 3879 return NET_XMIT_SUCCESS; 3892 3880 } 3893 3881 3894 - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3882 + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); 3895 3883 qdisc_run(q); 3896 3884 3897 3885 no_lock_out: ··· 3935 3923 qdisc_run_end(q); 3936 3924 rc = NET_XMIT_SUCCESS; 3937 3925 } else { 3938 - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3926 + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); 3939 3927 if (qdisc_run_begin(q)) { 3940 3928 if (unlikely(contended)) { 3941 3929 spin_unlock(&q->busylock); ··· 9712 9700 struct net_device *dev; 9713 9701 int err, fd; 9714 9702 9703 + rtnl_lock(); 9715 9704 dev = dev_get_by_index(net, attr->link_create.target_ifindex); 9716 - if (!dev) 9705 + if (!dev) { 9706 + rtnl_unlock(); 9717 9707 return -EINVAL; 9708 + } 9718 9709 9719 9710 link = kzalloc(sizeof(*link), GFP_USER); 9720 9711 if (!link) { 9721 9712 err = -ENOMEM; 9722 - goto out_put_dev; 9713 + goto unlock; 9723 9714 } 9724 9715 9725 9716 bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); ··· 9732 9717 err = bpf_link_prime(&link->link, &link_primer); 9733 9718 if (err) { 9734 9719 kfree(link); 9735 - goto out_put_dev; 9720 + goto unlock; 9736 9721 } 9737 9722 9738 - rtnl_lock(); 9739 9723 err = dev_xdp_attach_link(dev, NULL, link); 9740 9724 rtnl_unlock(); 9741 9725 9742 9726 if (err) { 9727 + link->dev = NULL; 9743 9728 bpf_link_cleanup(&link_primer); 9744 9729 goto out_put_dev; 9745 9730 } ··· 9748 9733 /* link itself doesn't hold dev's refcnt to not complicate shutdown */ 9749 9734 dev_put(dev); 9750 9735 return fd; 9736 + 9737 + unlock: 9738 + rtnl_unlock(); 9751 9739 9752 9740 out_put_dev: 9753 9741 dev_put(dev);

+16 -2

net/core/skbuff.c

··· 663 663 if (skb->cloned && 664 664 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 665 665 &shinfo->dataref)) 666 - return; 666 + goto exit; 667 667 668 668 skb_zcopy_clear(skb, true); 669 669 ··· 674 674 kfree_skb_list(shinfo->frag_list); 675 675 676 676 skb_free_head(skb); 677 + exit: 678 + /* When we clone an SKB we copy the reycling bit. The pp_recycle 679 + * bit is only set on the head though, so in order to avoid races 680 + * while trying to recycle fragments on __skb_frag_unref() we need 681 + * to make one SKB responsible for triggering the recycle path. 682 + * So disable the recycling bit if an SKB is cloned and we have 683 + * additional references to to the fragmented part of the SKB. 684 + * Eventually the last SKB will have the recycling bit set and it's 685 + * dataref set to 0, which will trigger the recycling 686 + */ 687 + skb->pp_recycle = 0; 677 688 } 678 689 679 690 /* ··· 3022 3011 3023 3012 if (!from->head_frag || 3024 3013 skb_headlen(from) < L1_CACHE_BYTES || 3025 - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) 3014 + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { 3026 3015 hlen = skb_headlen(from); 3016 + if (!hlen) 3017 + hlen = from->len; 3018 + } 3027 3019 3028 3020 if (skb_has_frag_list(from)) 3029 3021 hlen = from->len;

+11 -5

net/core/skmsg.c

··· 508 508 if (skb_linearize(skb)) 509 509 return -EAGAIN; 510 510 num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); 511 - if (unlikely(num_sge < 0)) { 512 - kfree(msg); 511 + if (unlikely(num_sge < 0)) 513 512 return num_sge; 514 - } 515 513 516 514 copied = skb->len; 517 515 msg->sg.start = 0; ··· 528 530 { 529 531 struct sock *sk = psock->sk; 530 532 struct sk_msg *msg; 533 + int err; 531 534 532 535 /* If we are receiving on the same sock skb->sk is already assigned, 533 536 * skip memory accounting and owner transition seeing it already set ··· 547 548 * into user buffers. 548 549 */ 549 550 skb_set_owner_r(skb, sk); 550 - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 551 + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 552 + if (err < 0) 553 + kfree(msg); 554 + return err; 551 555 } 552 556 553 557 /* Puts an skb on the ingress queue of the socket already assigned to the ··· 561 559 { 562 560 struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); 563 561 struct sock *sk = psock->sk; 562 + int err; 564 563 565 564 if (unlikely(!msg)) 566 565 return -EAGAIN; 567 566 sk_msg_init(msg); 568 567 skb_set_owner_r(skb, sk); 569 - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 568 + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); 569 + if (err < 0) 570 + kfree(msg); 571 + return err; 570 572 } 571 573 572 574 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,

+12 -15

net/decnet/af_decnet.c

··· 816 816 static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) 817 817 { 818 818 struct dn_scp *scp = DN_SK(sk); 819 - DEFINE_WAIT(wait); 819 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 820 820 int err; 821 821 822 822 if (scp->state != DN_CR) ··· 826 826 scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); 827 827 dn_send_conn_conf(sk, allocation); 828 828 829 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 829 + add_wait_queue(sk_sleep(sk), &wait); 830 830 for(;;) { 831 831 release_sock(sk); 832 832 if (scp->state == DN_CC) 833 - *timeo = schedule_timeout(*timeo); 833 + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); 834 834 lock_sock(sk); 835 835 err = 0; 836 836 if (scp->state == DN_RUN) ··· 844 844 err = -EAGAIN; 845 845 if (!*timeo) 846 846 break; 847 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 848 847 } 849 - finish_wait(sk_sleep(sk), &wait); 848 + remove_wait_queue(sk_sleep(sk), &wait); 850 849 if (err == 0) { 851 850 sk->sk_socket->state = SS_CONNECTED; 852 851 } else if (scp->state != DN_CC) { ··· 857 858 static int dn_wait_run(struct sock *sk, long *timeo) 858 859 { 859 860 struct dn_scp *scp = DN_SK(sk); 860 - DEFINE_WAIT(wait); 861 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 861 862 int err = 0; 862 863 863 864 if (scp->state == DN_RUN) ··· 866 867 if (!*timeo) 867 868 return -EALREADY; 868 869 869 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 870 + add_wait_queue(sk_sleep(sk), &wait); 870 871 for(;;) { 871 872 release_sock(sk); 872 873 if (scp->state == DN_CI || scp->state == DN_CC) 873 - *timeo = schedule_timeout(*timeo); 874 + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); 874 875 lock_sock(sk); 875 876 err = 0; 876 877 if (scp->state == DN_RUN) ··· 884 885 err = -ETIMEDOUT; 885 886 if (!*timeo) 886 887 break; 887 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 888 888 } 889 - finish_wait(sk_sleep(sk), &wait); 889 + remove_wait_queue(sk_sleep(sk), &wait); 890 890 out: 891 891 if (err == 0) { 892 892 sk->sk_socket->state = SS_CONNECTED; ··· 1030 1032 1031 1033 static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) 1032 1034 { 1033 - DEFINE_WAIT(wait); 1035 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 1034 1036 struct sk_buff *skb = NULL; 1035 1037 int err = 0; 1036 1038 1037 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1039 + add_wait_queue(sk_sleep(sk), &wait); 1038 1040 for(;;) { 1039 1041 release_sock(sk); 1040 1042 skb = skb_dequeue(&sk->sk_receive_queue); 1041 1043 if (skb == NULL) { 1042 - *timeo = schedule_timeout(*timeo); 1044 + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); 1043 1045 skb = skb_dequeue(&sk->sk_receive_queue); 1044 1046 } 1045 1047 lock_sock(sk); ··· 1054 1056 err = -EAGAIN; 1055 1057 if (!*timeo) 1056 1058 break; 1057 - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1058 1059 } 1059 - finish_wait(sk_sleep(sk), &wait); 1060 + remove_wait_queue(sk_sleep(sk), &wait); 1060 1061 1061 1062 return skb == NULL ? ERR_PTR(err) : skb; 1062 1063 }

+9 -5

net/dsa/slave.c

··· 1808 1808 struct dsa_slave_priv *p = netdev_priv(slave); 1809 1809 const struct dsa_port *cpu_dp = dp->cpu_dp; 1810 1810 struct net_device *master = cpu_dp->master; 1811 + const struct dsa_switch *ds = dp->ds; 1811 1812 1812 1813 slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; 1813 1814 slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; ··· 1820 1819 slave->needed_tailroom += master->needed_tailroom; 1821 1820 1822 1821 p->xmit = cpu_dp->tag_ops->xmit; 1822 + 1823 + slave->features = master->vlan_features | NETIF_F_HW_TC; 1824 + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) 1825 + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 1826 + slave->hw_features |= NETIF_F_HW_TC; 1827 + slave->features |= NETIF_F_LLTX; 1828 + if (slave->needed_tailroom) 1829 + slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); 1823 1830 } 1824 1831 1825 1832 static struct lock_class_key dsa_slave_netdev_xmit_lock_key; ··· 1890 1881 if (slave_dev == NULL) 1891 1882 return -ENOMEM; 1892 1883 1893 - slave_dev->features = master->vlan_features | NETIF_F_HW_TC; 1894 - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) 1895 - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 1896 - slave_dev->hw_features |= NETIF_F_HW_TC; 1897 - slave_dev->features |= NETIF_F_LLTX; 1898 1884 slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; 1899 1885 if (!is_zero_ether_addr(port->mac)) 1900 1886 ether_addr_copy(slave_dev->dev_addr, port->mac);

+9

net/dsa/tag_ksz.c

··· 53 53 u8 *tag; 54 54 u8 *addr; 55 55 56 + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) 57 + return NULL; 58 + 56 59 /* Tag encoding */ 57 60 tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); 58 61 addr = skb_mac_header(skb); ··· 117 114 u8 *addr; 118 115 u16 val; 119 116 117 + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) 118 + return NULL; 119 + 120 120 /* Tag encoding */ 121 121 tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); 122 122 addr = skb_mac_header(skb); ··· 169 163 struct dsa_port *dp = dsa_slave_to_port(dev); 170 164 u8 *addr; 171 165 u8 *tag; 166 + 167 + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) 168 + return NULL; 172 169 173 170 /* Tag encoding */ 174 171 tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);

+1 -1

net/ipv4/tcp_bpf.c

··· 503 503 tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot); 504 504 return 0; 505 505 } 506 - core_initcall(tcp_bpf_v4_build_proto); 506 + late_initcall(tcp_bpf_v4_build_proto); 507 507 508 508 static int tcp_bpf_assert_proto_ops(struct proto *ops) 509 509 {

+24 -4

net/ipv4/tcp_fastopen.c

··· 507 507 { 508 508 struct net *net = sock_net(sk); 509 509 510 + if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) 511 + return; 512 + 513 + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ 514 + WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); 515 + 516 + /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). 517 + * We want net->ipv4.tfo_active_disable_stamp to be updated first. 518 + */ 519 + smp_mb__before_atomic(); 510 520 atomic_inc(&net->ipv4.tfo_active_disable_times); 511 - net->ipv4.tfo_active_disable_stamp = jiffies; 521 + 512 522 NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); 513 523 } 514 524 ··· 529 519 bool tcp_fastopen_active_should_disable(struct sock *sk) 530 520 { 531 521 unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; 532 - int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); 533 522 unsigned long timeout; 523 + int tfo_da_times; 534 524 int multiplier; 535 525 526 + if (!tfo_bh_timeout) 527 + return false; 528 + 529 + tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); 536 530 if (!tfo_da_times) 537 531 return false; 538 532 533 + /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ 534 + smp_rmb(); 535 + 539 536 /* Limit timeout to max: 2^6 * initial timeout */ 540 537 multiplier = 1 << min(tfo_da_times - 1, 6); 541 - timeout = multiplier * tfo_bh_timeout * HZ; 542 - if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) 538 + 539 + /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ 540 + timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + 541 + multiplier * tfo_bh_timeout * HZ; 542 + if (time_before(jiffies, timeout)) 543 543 return true; 544 544 545 545 /* Mark check bit so we can check for successful active TFO

+1 -1

net/ipv4/tcp_ipv4.c

··· 2965 2965 net->ipv4.sysctl_tcp_comp_sack_nr = 44; 2966 2966 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; 2967 2967 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); 2968 - net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; 2968 + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; 2969 2969 atomic_set(&net->ipv4.tfo_active_disable_times, 0); 2970 2970 2971 2971 /* Reno is always built in */

+19 -6

net/ipv4/udp.c

··· 645 645 const struct iphdr *iph, 646 646 struct udphdr *uh, 647 647 struct udp_table *udptable, 648 + struct sock *sk, 648 649 struct sk_buff *skb, u32 info) 649 650 { 651 + int (*lookup)(struct sock *sk, struct sk_buff *skb); 650 652 int network_offset, transport_offset; 651 - struct sock *sk; 653 + struct udp_sock *up; 652 654 653 655 network_offset = skb_network_offset(skb); 654 656 transport_offset = skb_transport_offset(skb); ··· 661 659 /* Transport header needs to point to the UDP header */ 662 660 skb_set_transport_header(skb, iph->ihl << 2); 663 661 662 + if (sk) { 663 + up = udp_sk(sk); 664 + 665 + lookup = READ_ONCE(up->encap_err_lookup); 666 + if (lookup && lookup(sk, skb)) 667 + sk = NULL; 668 + 669 + goto out; 670 + } 671 + 664 672 sk = __udp4_lib_lookup(net, iph->daddr, uh->source, 665 673 iph->saddr, uh->dest, skb->dev->ifindex, 0, 666 674 udptable, NULL); 667 675 if (sk) { 668 - int (*lookup)(struct sock *sk, struct sk_buff *skb); 669 - struct udp_sock *up = udp_sk(sk); 676 + up = udp_sk(sk); 670 677 671 678 lookup = READ_ONCE(up->encap_err_lookup); 672 679 if (!lookup || lookup(sk, skb)) 673 680 sk = NULL; 674 681 } 675 682 683 + out: 676 684 if (!sk) 677 685 sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); 678 686 ··· 719 707 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 720 708 iph->saddr, uh->source, skb->dev->ifindex, 721 709 inet_sdif(skb), udptable, NULL); 710 + 722 711 if (!sk || udp_sk(sk)->encap_type) { 723 712 /* No socket for error: try tunnels before discarding */ 724 - sk = ERR_PTR(-ENOENT); 725 713 if (static_branch_unlikely(&udp_encap_needed_key)) { 726 - sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, 714 + sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, 727 715 info); 728 716 if (!sk) 729 717 return 0; 730 - } 718 + } else 719 + sk = ERR_PTR(-ENOENT); 731 720 732 721 if (IS_ERR(sk)) { 733 722 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);

+1 -1

net/ipv4/udp_bpf.c

··· 134 134 udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); 135 135 return 0; 136 136 } 137 - core_initcall(udp_bpf_v4_build_proto); 137 + late_initcall(udp_bpf_v4_build_proto); 138 138 139 139 int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) 140 140 {

+1 -1

net/ipv6/ip6_output.c

··· 74 74 75 75 if (likely(nskb)) { 76 76 if (skb->sk) 77 - skb_set_owner_w(skb, skb->sk); 77 + skb_set_owner_w(nskb, skb->sk); 78 78 consume_skb(skb); 79 79 } else { 80 80 kfree_skb(skb);

+1 -1

net/ipv6/route.c

··· 3769 3769 err = PTR_ERR(rt->fib6_metrics); 3770 3770 /* Do not leave garbage there. */ 3771 3771 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; 3772 - goto out; 3772 + goto out_free; 3773 3773 } 3774 3774 3775 3775 if (cfg->fc_flags & RTF_ADDRCONF)

+19 -6

net/ipv6/udp.c

··· 502 502 const struct ipv6hdr *hdr, int offset, 503 503 struct udphdr *uh, 504 504 struct udp_table *udptable, 505 + struct sock *sk, 505 506 struct sk_buff *skb, 506 507 struct inet6_skb_parm *opt, 507 508 u8 type, u8 code, __be32 info) 508 509 { 510 + int (*lookup)(struct sock *sk, struct sk_buff *skb); 509 511 int network_offset, transport_offset; 510 - struct sock *sk; 512 + struct udp_sock *up; 511 513 512 514 network_offset = skb_network_offset(skb); 513 515 transport_offset = skb_transport_offset(skb); ··· 520 518 /* Transport header needs to point to the UDP header */ 521 519 skb_set_transport_header(skb, offset); 522 520 521 + if (sk) { 522 + up = udp_sk(sk); 523 + 524 + lookup = READ_ONCE(up->encap_err_lookup); 525 + if (lookup && lookup(sk, skb)) 526 + sk = NULL; 527 + 528 + goto out; 529 + } 530 + 523 531 sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, 524 532 &hdr->saddr, uh->dest, 525 533 inet6_iif(skb), 0, udptable, skb); 526 534 if (sk) { 527 - int (*lookup)(struct sock *sk, struct sk_buff *skb); 528 - struct udp_sock *up = udp_sk(sk); 535 + up = udp_sk(sk); 529 536 530 537 lookup = READ_ONCE(up->encap_err_lookup); 531 538 if (!lookup || lookup(sk, skb)) 532 539 sk = NULL; 533 540 } 534 541 542 + out: 535 543 if (!sk) { 536 544 sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, 537 545 offset, info)); ··· 570 558 571 559 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, 572 560 inet6_iif(skb), inet6_sdif(skb), udptable, NULL); 561 + 573 562 if (!sk || udp_sk(sk)->encap_type) { 574 563 /* No socket for error: try tunnels before discarding */ 575 - sk = ERR_PTR(-ENOENT); 576 564 if (static_branch_unlikely(&udpv6_encap_needed_key)) { 577 565 sk = __udp6_lib_err_encap(net, hdr, offset, uh, 578 - udptable, skb, 566 + udptable, sk, skb, 579 567 opt, type, code, info); 580 568 if (!sk) 581 569 return 0; 582 - } 570 + } else 571 + sk = ERR_PTR(-ENOENT); 583 572 584 573 if (IS_ERR(sk)) { 585 574 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),

+11 -9

net/netrom/nr_timer.c

··· 121 121 is accepted() it isn't 'dead' so doesn't get removed. */ 122 122 if (sock_flag(sk, SOCK_DESTROY) || 123 123 (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { 124 - sock_hold(sk); 125 124 bh_unlock_sock(sk); 126 125 nr_destroy_socket(sk); 127 - sock_put(sk); 128 - return; 126 + goto out; 129 127 } 130 128 break; 131 129 ··· 144 146 145 147 nr_start_heartbeat(sk); 146 148 bh_unlock_sock(sk); 149 + out: 150 + sock_put(sk); 147 151 } 148 152 149 153 static void nr_t2timer_expiry(struct timer_list *t) ··· 159 159 nr_enquiry_response(sk); 160 160 } 161 161 bh_unlock_sock(sk); 162 + sock_put(sk); 162 163 } 163 164 164 165 static void nr_t4timer_expiry(struct timer_list *t) ··· 170 169 bh_lock_sock(sk); 171 170 nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; 172 171 bh_unlock_sock(sk); 172 + sock_put(sk); 173 173 } 174 174 175 175 static void nr_idletimer_expiry(struct timer_list *t) ··· 199 197 sock_set_flag(sk, SOCK_DEAD); 200 198 } 201 199 bh_unlock_sock(sk); 200 + sock_put(sk); 202 201 } 203 202 204 203 static void nr_t1timer_expiry(struct timer_list *t) ··· 212 209 case NR_STATE_1: 213 210 if (nr->n2count == nr->n2) { 214 211 nr_disconnect(sk, ETIMEDOUT); 215 - bh_unlock_sock(sk); 216 - return; 212 + goto out; 217 213 } else { 218 214 nr->n2count++; 219 215 nr_write_internal(sk, NR_CONNREQ); ··· 222 220 case NR_STATE_2: 223 221 if (nr->n2count == nr->n2) { 224 222 nr_disconnect(sk, ETIMEDOUT); 225 - bh_unlock_sock(sk); 226 - return; 223 + goto out; 227 224 } else { 228 225 nr->n2count++; 229 226 nr_write_internal(sk, NR_DISCREQ); ··· 232 231 case NR_STATE_3: 233 232 if (nr->n2count == nr->n2) { 234 233 nr_disconnect(sk, ETIMEDOUT); 235 - bh_unlock_sock(sk); 236 - return; 234 + goto out; 237 235 } else { 238 236 nr->n2count++; 239 237 nr_requeue_frames(sk); ··· 241 241 } 242 242 243 243 nr_start_t1timer(sk); 244 + out: 244 245 bh_unlock_sock(sk); 246 + sock_put(sk); 245 247 }

+8 -4

net/sched/act_skbmod.c

··· 6 6 */ 7 7 8 8 #include <linux/module.h> 9 + #include <linux/if_arp.h> 9 10 #include <linux/init.h> 10 11 #include <linux/kernel.h> 11 12 #include <linux/skbuff.h> ··· 34 33 tcf_lastuse_update(&d->tcf_tm); 35 34 bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); 36 35 36 + action = READ_ONCE(d->tcf_action); 37 + if (unlikely(action == TC_ACT_SHOT)) 38 + goto drop; 39 + 40 + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) 41 + return action; 42 + 37 43 /* XXX: if you are going to edit more fields beyond ethernet header 38 44 * (example when you add IP header replacement or vlan swap) 39 45 * then MAX_EDIT_LEN needs to change appropriately 40 46 */ 41 47 err = skb_ensure_writable(skb, MAX_EDIT_LEN); 42 48 if (unlikely(err)) /* best policy is to drop on the floor */ 43 - goto drop; 44 - 45 - action = READ_ONCE(d->tcf_action); 46 - if (unlikely(action == TC_ACT_SHOT)) 47 49 goto drop; 48 50 49 51 p = rcu_dereference_bh(d->skbmod_p);

+1 -1

net/sched/cls_api.c

··· 2904 2904 break; 2905 2905 case RTM_GETCHAIN: 2906 2906 err = tc_chain_notify(chain, skb, n->nlmsg_seq, 2907 - n->nlmsg_seq, n->nlmsg_type, true); 2907 + n->nlmsg_flags, n->nlmsg_type, true); 2908 2908 if (err < 0) 2909 2909 NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); 2910 2910 break;

+4 -1

net/sched/cls_tcindex.c

··· 278 278 TCA_TCINDEX_POLICE); 279 279 } 280 280 281 + static void tcindex_free_perfect_hash(struct tcindex_data *cp); 282 + 281 283 static void tcindex_partial_destroy_work(struct work_struct *work) 282 284 { 283 285 struct tcindex_data *p = container_of(to_rcu_work(work), ··· 287 285 rwork); 288 286 289 287 rtnl_lock(); 290 - kfree(p->perfect); 288 + if (p->perfect) 289 + tcindex_free_perfect_hash(p); 291 290 kfree(p); 292 291 rtnl_unlock(); 293 292 }

+2

net/sctp/auth.c

··· 860 860 if (replace) { 861 861 list_del_init(&shkey->key_list); 862 862 sctp_auth_shkey_release(shkey); 863 + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) 864 + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); 863 865 } 864 866 list_add(&cur_key->key_list, sh_keys); 865 867

+2 -2

net/sctp/output.c

··· 104 104 if (asoc->param_flags & SPP_PMTUD_ENABLE) 105 105 sctp_assoc_sync_pmtu(asoc); 106 106 } else if (!sctp_transport_pl_enabled(tp) && 107 - !sctp_transport_pmtu_check(tp)) { 108 - if (asoc->param_flags & SPP_PMTUD_ENABLE) 107 + asoc->param_flags & SPP_PMTUD_ENABLE) { 108 + if (!sctp_transport_pmtu_check(tp)) 109 109 sctp_assoc_sync_pmtu(asoc); 110 110 } 111 111

+4

net/sctp/socket.c

··· 4577 4577 } 4578 4578 4579 4579 if (optlen > 0) { 4580 + /* Trim it to the biggest size sctp sockopt may need if necessary */ 4581 + optlen = min_t(unsigned int, optlen, 4582 + PAGE_ALIGN(USHRT_MAX + 4583 + sizeof(__u16) * sizeof(struct sctp_reset_streams))); 4580 4584 kopt = memdup_sockptr(optval, optlen); 4581 4585 if (IS_ERR(kopt)) 4582 4586 return PTR_ERR(kopt);

+5

tools/bpf/bpftool/common.c

··· 222 222 int err = 0; 223 223 224 224 file = malloc(strlen(name) + 1); 225 + if (!file) { 226 + p_err("mem alloc failed"); 227 + return -1; 228 + } 229 + 225 230 strcpy(file, name); 226 231 dir = dirname(file); 227 232

+54 -1

tools/testing/selftests/net/nettest.c

··· 11 11 #include <sys/socket.h> 12 12 #include <sys/wait.h> 13 13 #include <linux/tcp.h> 14 + #include <linux/udp.h> 14 15 #include <arpa/inet.h> 15 16 #include <net/if.h> 16 17 #include <netinet/in.h> 18 + #include <netinet/ip.h> 17 19 #include <netdb.h> 18 20 #include <fcntl.h> 19 21 #include <libgen.h> ··· 28 26 #include <unistd.h> 29 27 #include <time.h> 30 28 #include <errno.h> 29 + 30 + #include <linux/xfrm.h> 31 + #include <linux/ipsec.h> 32 + #include <linux/pfkeyv2.h> 31 33 32 34 #ifndef IPV6_UNICAST_IF 33 35 #define IPV6_UNICAST_IF 76 ··· 120 114 struct in_addr in; 121 115 struct in6_addr in6; 122 116 } expected_raddr; 117 + 118 + /* ESP in UDP encap test */ 119 + int use_xfrm; 123 120 }; 124 121 125 122 static int server_mode; ··· 1355 1346 return 0; 1356 1347 } 1357 1348 1349 + static int config_xfrm_policy(int sd, struct sock_args *args) 1350 + { 1351 + struct xfrm_userpolicy_info policy = {}; 1352 + int type = UDP_ENCAP_ESPINUDP; 1353 + int xfrm_af = IP_XFRM_POLICY; 1354 + int level = SOL_IP; 1355 + 1356 + if (args->type != SOCK_DGRAM) { 1357 + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); 1358 + return 1; 1359 + } 1360 + 1361 + policy.action = XFRM_POLICY_ALLOW; 1362 + policy.sel.family = args->version; 1363 + if (args->version == AF_INET6) { 1364 + xfrm_af = IPV6_XFRM_POLICY; 1365 + level = SOL_IPV6; 1366 + } 1367 + 1368 + policy.dir = XFRM_POLICY_OUT; 1369 + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) 1370 + return 1; 1371 + 1372 + policy.dir = XFRM_POLICY_IN; 1373 + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) 1374 + return 1; 1375 + 1376 + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { 1377 + log_err_errno("Failed to set xfrm encap"); 1378 + return 1; 1379 + } 1380 + 1381 + return 0; 1382 + } 1383 + 1358 1384 static int lsock_init(struct sock_args *args) 1359 1385 { 1360 1386 long flags; ··· 1432 1388 1433 1389 if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) 1434 1390 log_err_errno("Failed to set close-on-exec flag"); 1391 + 1392 + if (args->use_xfrm && config_xfrm_policy(sd, args)) { 1393 + log_err_errno("Failed to set xfrm policy"); 1394 + goto err; 1395 + } 1435 1396 1436 1397 out: 1437 1398 return sd; ··· 1821 1772 return client_status; 1822 1773 } 1823 1774 1824 - #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" 1775 + #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" 1825 1776 1826 1777 static void print_usage(char *prog) 1827 1778 { ··· 1844 1795 " -D|R datagram (D) / raw (R) socket (default stream)\n" 1845 1796 " -l addr local address to bind to in server mode\n" 1846 1797 " -c addr local address to bind to in client mode\n" 1798 + " -x configure XFRM policy on socket\n" 1847 1799 "\n" 1848 1800 " -d dev bind socket to given device name\n" 1849 1801 " -I dev bind socket to given device name - server mode\n" ··· 2015 1965 break; 2016 1966 case 'q': 2017 1967 quiet = 1; 1968 + break; 1969 + case 'x': 1970 + args.use_xfrm = 1; 2018 1971 break; 2019 1972 default: 2020 1973 print_usage(argv[0]);

+206 -6

tools/testing/selftests/net/pmtu.sh

··· 118 118 # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU 119 119 # changes alone won't affect PMTU 120 120 # 121 + # - pmtu_vti4_udp_exception 122 + # Same as pmtu_vti4_exception, but using ESP-in-UDP 123 + # 124 + # - pmtu_vti4_udp_routed_exception 125 + # Set up vti tunnel on top of veth connected through routing namespace and 126 + # add xfrm states and policies with ESP-in-UDP encapsulation. Check that 127 + # route exception is not created if link layer MTU is not exceeded, then 128 + # lower MTU on second part of routed environment and check that exception 129 + # is created with the expected PMTU. 130 + # 121 131 # - pmtu_vti6_exception 122 132 # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two 123 133 # namespaces with matching endpoints. Check that route exception is 124 134 # created by exceeding link layer MTU with ping to other endpoint. Then 125 135 # decrease and increase MTU of tunnel, checking that route exception PMTU 126 136 # changes accordingly 137 + # 138 + # - pmtu_vti6_udp_exception 139 + # Same as pmtu_vti6_exception, but using ESP-in-UDP 140 + # 141 + # - pmtu_vti6_udp_routed_exception 142 + # Same as pmtu_vti6_udp_routed_exception but with routing between vti 143 + # endpoints 127 144 # 128 145 # - pmtu_vti4_default_mtu 129 146 # Set up vti4 tunnel on top of veth, in two namespaces with matching ··· 241 224 pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 242 225 pmtu_vti6_exception vti6: PMTU exceptions 0 243 226 pmtu_vti4_exception vti4: PMTU exceptions 0 227 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 228 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 229 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 230 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 244 231 pmtu_vti4_default_mtu vti4: default MTU assignment 0 245 232 pmtu_vti6_default_mtu vti6: default MTU assignment 0 246 233 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 ··· 267 246 ns_c="ip netns exec ${NS_C}" 268 247 ns_r1="ip netns exec ${NS_R1}" 269 248 ns_r2="ip netns exec ${NS_R2}" 270 - 271 249 # Addressing and routing for tests with routers: four network segments, with 272 250 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an 273 251 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). ··· 299 279 A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 300 280 B default ${prefix6}:${b_r1}::2 301 281 " 302 - 303 282 USE_NH="no" 304 283 # ns family nh id destination gateway 305 284 nexthops=" ··· 345 326 346 327 err_buf= 347 328 tcpdump_pids= 329 + nettest_pids= 348 330 349 331 err() { 350 332 err_buf="${err_buf}${1} ··· 568 548 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} 569 549 } 570 550 551 + setup_vti4routed() { 552 + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} 553 + } 554 + 555 + setup_vti6routed() { 556 + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} 557 + } 558 + 571 559 setup_vxlan_or_geneve() { 572 560 type="${1}" 573 561 a_addr="${2}" ··· 647 619 proto=${1} 648 620 veth_a_addr="${2}" 649 621 veth_b_addr="${3}" 622 + encap=${4} 650 623 651 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 652 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 624 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 625 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} 653 626 run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 654 627 run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 655 628 656 - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 657 - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 629 + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} 630 + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} 658 631 run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 659 632 run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 633 + } 634 + 635 + setup_nettest_xfrm() { 636 + which nettest >/dev/null 637 + if [ $? -ne 0 ]; then 638 + echo "'nettest' command not found; skipping tests" 639 + return 1 640 + fi 641 + 642 + [ ${1} -eq 6 ] && proto="-6" || proto="" 643 + port=${2} 644 + 645 + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & 646 + nettest_pids="${nettest_pids} $!" 647 + 648 + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & 649 + nettest_pids="${nettest_pids} $!" 660 650 } 661 651 662 652 setup_xfrm4() { ··· 683 637 684 638 setup_xfrm6() { 685 639 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} 640 + } 641 + 642 + setup_xfrm4udp() { 643 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" 644 + setup_nettest_xfrm 4 4500 645 + } 646 + 647 + setup_xfrm6udp() { 648 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" 649 + setup_nettest_xfrm 6 4500 650 + } 651 + 652 + setup_xfrm4udprouted() { 653 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" 654 + setup_nettest_xfrm 4 4500 655 + } 656 + 657 + setup_xfrm6udprouted() { 658 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" 659 + setup_nettest_xfrm 6 4500 686 660 } 687 661 688 662 setup_routing_old() { ··· 888 822 kill ${pid} 889 823 done 890 824 tcpdump_pids= 825 + 826 + for pid in ${nettest_pids}; do 827 + kill ${pid} 828 + done 829 + nettest_pids= 891 830 892 831 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do 893 832 ip netns del ${n} 2> /dev/null ··· 1501 1430 check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 1502 1431 1503 1432 return ${fail} 1433 + } 1434 + 1435 + test_pmtu_vti4_udp_exception() { 1436 + setup namespaces veth vti4 xfrm4udp || return $ksft_skip 1437 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ 1438 + "${ns_a}" vti4_a "${ns_b}" vti4_b 1439 + 1440 + veth_mtu=1500 1441 + vti_mtu=$((veth_mtu - 20)) 1442 + 1443 + # UDP SPI SN IV ICV pad length next header 1444 + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) 1445 + ping_payload=$((esp_payload_rfc4106 - 28)) 1446 + 1447 + mtu "${ns_a}" veth_a ${veth_mtu} 1448 + mtu "${ns_b}" veth_b ${veth_mtu} 1449 + mtu "${ns_a}" vti4_a ${vti_mtu} 1450 + mtu "${ns_b}" vti4_b ${vti_mtu} 1451 + 1452 + # Send DF packet without exceeding link layer MTU, check that no 1453 + # exception is created 1454 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} 1455 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1456 + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 1457 + 1458 + # Now exceed link layer MTU by one byte, check that exception is created 1459 + # with the right PMTU value 1460 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} 1461 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1462 + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" 1463 + } 1464 + 1465 + test_pmtu_vti6_udp_exception() { 1466 + setup namespaces veth vti6 xfrm6udp || return $ksft_skip 1467 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ 1468 + "${ns_a}" vti6_a "${ns_b}" vti6_b 1469 + fail=0 1470 + 1471 + # Create route exception by exceeding link layer MTU 1472 + mtu "${ns_a}" veth_a 4000 1473 + mtu "${ns_b}" veth_b 4000 1474 + mtu "${ns_a}" vti6_a 5000 1475 + mtu "${ns_b}" vti6_b 5000 1476 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} 1477 + 1478 + # Check that exception was created 1479 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1480 + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 1481 + 1482 + # Decrease tunnel MTU, check for PMTU decrease in route exception 1483 + mtu "${ns_a}" vti6_a 3000 1484 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1485 + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 1486 + 1487 + # Increase tunnel MTU, check for PMTU increase in route exception 1488 + mtu "${ns_a}" vti6_a 9000 1489 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1490 + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 1491 + 1492 + return ${fail} 1493 + } 1494 + 1495 + test_pmtu_vti4_udp_routed_exception() { 1496 + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip 1497 + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ 1498 + "${ns_a}" vti4_a "${ns_b}" vti4_b 1499 + 1500 + veth_mtu=1500 1501 + vti_mtu=$((veth_mtu - 20)) 1502 + 1503 + # UDP SPI SN IV ICV pad length next header 1504 + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) 1505 + ping_payload=$((esp_payload_rfc4106 - 28)) 1506 + 1507 + mtu "${ns_a}" veth_A-R1 ${veth_mtu} 1508 + mtu "${ns_r1}" veth_R1-A ${veth_mtu} 1509 + mtu "${ns_b}" veth_B-R1 ${veth_mtu} 1510 + mtu "${ns_r1}" veth_R1-B ${veth_mtu} 1511 + 1512 + mtu "${ns_a}" vti4_a ${vti_mtu} 1513 + mtu "${ns_b}" vti4_b ${vti_mtu} 1514 + 1515 + # Send DF packet without exceeding link layer MTU, check that no 1516 + # exception is created 1517 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} 1518 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1519 + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 1520 + 1521 + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created 1522 + # with the right PMTU value 1523 + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) 1524 + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} 1525 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 1526 + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" 1527 + } 1528 + 1529 + test_pmtu_vti6_udp_routed_exception() { 1530 + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip 1531 + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ 1532 + "${ns_a}" vti6_a "${ns_b}" vti6_b 1533 + 1534 + veth_mtu=1500 1535 + vti_mtu=$((veth_mtu - 40)) 1536 + 1537 + # UDP SPI SN IV ICV pad length next header 1538 + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) 1539 + ping_payload=$((esp_payload_rfc4106 - 48)) 1540 + 1541 + mtu "${ns_a}" veth_A-R1 ${veth_mtu} 1542 + mtu "${ns_r1}" veth_R1-A ${veth_mtu} 1543 + mtu "${ns_b}" veth_B-R1 ${veth_mtu} 1544 + mtu "${ns_r1}" veth_R1-B ${veth_mtu} 1545 + 1546 + # mtu "${ns_a}" vti6_a ${vti_mtu} 1547 + # mtu "${ns_b}" vti6_b ${vti_mtu} 1548 + 1549 + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} 1550 + 1551 + # Check that exception was not created 1552 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1553 + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 1554 + 1555 + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created 1556 + # with the right PMTU value 1557 + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) 1558 + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} 1559 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 1560 + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" 1561 + 1504 1562 } 1505 1563 1506 1564 test_pmtu_vti4_default_mtu() {