Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+3 -4

Documentation/networking/ip-sysctl.rst

··· 651 651 652 652 default: initial size of receive buffer used by TCP sockets. 653 653 This value overrides net.core.rmem_default used by other protocols. 654 - Default: 87380 bytes. This value results in window of 65535 with 655 - default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit 656 - less for default tcp_app_win. See below about these variables. 654 + Default: 131072 bytes. 655 + This value results in initial window of 65535. 657 656 658 657 max: maximal size of receive buffer allowed for automatically 659 658 selected receiver buffers for TCP socket. This value does not override 660 659 net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables 661 660 automatic tuning of that socket's receive buffer size, in which 662 661 case this value is ignored. 663 - Default: between 87380B and 6MB, depending on RAM size. 662 + Default: between 131072 and 6MB, depending on RAM size. 664 663 665 664 tcp_sack - BOOLEAN 666 665 Enable select acknowledgments (SACKS).

+16

Documentation/networking/netdev-FAQ.rst

··· 272 272 Posting as one thread is discouraged because it confuses patchwork 273 273 (as of patchwork 2.2.2). 274 274 275 + Can I reproduce the checks from patchwork on my local machine? 276 + -------------------------------------------------------------- 277 + 278 + Checks in patchwork are mostly simple wrappers around existing kernel 279 + scripts, the sources are available at: 280 + 281 + https://github.com/kuba-moo/nipa/tree/master/tests 282 + 283 + Running all the builds and checks locally is a pain, can I post my patches and have the patchwork bot validate them? 284 + -------------------------------------------------------------------------------------------------------------------- 285 + 286 + No, you must ensure that your patches are ready by testing them locally 287 + before posting to the mailing list. The patchwork build bot instance 288 + gets overloaded very easily and netdev@vger really doesn't need more 289 + traffic if we can help it. 290 + 275 291 Any other tips to help ensure my net/net-next patch gets OK'd? 276 292 -------------------------------------------------------------- 277 293 Attention to detail. Re-read your own work as if you were the

+1 -10

drivers/atm/idt77252.c

··· 3743 3743 struct sk_buff *skb; 3744 3744 3745 3745 printk("%s: at %p\n", __func__, idt77252_init); 3746 - 3747 - if (sizeof(skb->cb) < sizeof(struct atm_skb_data) + 3748 - sizeof(struct idt77252_skb_prv)) { 3749 - printk(KERN_ERR "%s: skb->cb is too small (%lu < %lu)\n", 3750 - __func__, (unsigned long) sizeof(skb->cb), 3751 - (unsigned long) sizeof(struct atm_skb_data) + 3752 - sizeof(struct idt77252_skb_prv)); 3753 - return -EIO; 3754 - } 3755 - 3746 + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct idt77252_skb_prv) + sizeof(struct atm_skb_data)); 3756 3747 return pci_register_driver(&idt77252_driver); 3757 3748 } 3758 3749

+1 -1

drivers/atm/idt77252.h

··· 789 789 struct scqe tbd; /* Transmit Buffer Descriptor */ 790 790 dma_addr_t paddr; /* DMA handle */ 791 791 u32 pool; /* sb_pool handle */ 792 - }; 792 + } __packed; 793 793 794 794 #define IDT77252_PRV_TBD(skb) \ 795 795 (((struct idt77252_skb_prv *)(ATM_SKB(skb)+1))->tbd)

+14

drivers/net/ethernet/amd/xgbe/xgbe-common.h

··· 1279 1279 #define MDIO_PMA_10GBR_FECCTRL 0x00ab 1280 1280 #endif 1281 1281 1282 + #ifndef MDIO_PMA_RX_CTRL1 1283 + #define MDIO_PMA_RX_CTRL1 0x8051 1284 + #endif 1285 + 1282 1286 #ifndef MDIO_PCS_DIG_CTRL 1283 1287 #define MDIO_PCS_DIG_CTRL 0x8000 1288 + #endif 1289 + 1290 + #ifndef MDIO_PCS_DIGITAL_STAT 1291 + #define MDIO_PCS_DIGITAL_STAT 0x8010 1284 1292 #endif 1285 1293 1286 1294 #ifndef MDIO_AN_XNP ··· 1366 1358 #define XGBE_KR_TRAINING_ENABLE BIT(1) 1367 1359 1368 1360 #define XGBE_PCS_CL37_BP BIT(12) 1361 + #define XGBE_PCS_PSEQ_STATE_MASK 0x1c 1362 + #define XGBE_PCS_PSEQ_STATE_POWER_GOOD 0x10 1369 1363 1370 1364 #define XGBE_AN_CL37_INT_CMPLT BIT(0) 1371 1365 #define XGBE_AN_CL37_INT_MASK 0x01 ··· 1384 1374 #define XGBE_PMA_CDR_TRACK_EN_MASK 0x01 1385 1375 #define XGBE_PMA_CDR_TRACK_EN_OFF 0x00 1386 1376 #define XGBE_PMA_CDR_TRACK_EN_ON 0x01 1377 + 1378 + #define XGBE_PMA_RX_RST_0_MASK BIT(4) 1379 + #define XGBE_PMA_RX_RST_0_RESET_ON 0x10 1380 + #define XGBE_PMA_RX_RST_0_RESET_OFF 0x00 1387 1381 1388 1382 /* Bit setting and getting macros 1389 1383 * The get macro will extract the current bit field value from within

+1

drivers/net/ethernet/amd/xgbe/xgbe-drv.c

··· 1368 1368 return; 1369 1369 1370 1370 netif_tx_stop_all_queues(netdev); 1371 + netif_carrier_off(pdata->netdev); 1371 1372 1372 1373 xgbe_stop_timers(pdata); 1373 1374 flush_workqueue(pdata->dev_workqueue);

+1 -2

drivers/net/ethernet/amd/xgbe/xgbe-mdio.c

··· 1345 1345 &an_restart); 1346 1346 if (an_restart) { 1347 1347 xgbe_phy_config_aneg(pdata); 1348 - return; 1348 + goto adjust_link; 1349 1349 } 1350 1350 1351 1351 if (pdata->phy.link) { ··· 1396 1396 pdata->phy_if.phy_impl.stop(pdata); 1397 1397 1398 1398 pdata->phy.link = 0; 1399 - netif_carrier_off(pdata->netdev); 1400 1399 1401 1400 xgbe_phy_adjust_link(pdata); 1402 1401 }

+38 -1

drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c

··· 922 922 if ((phy_id & 0xfffffff0) != 0x03625d10) 923 923 return false; 924 924 925 + /* Reset PHY - wait for self-clearing reset bit to clear */ 926 + genphy_soft_reset(phy_data->phydev); 927 + 925 928 /* Disable RGMII mode */ 926 929 phy_write(phy_data->phydev, 0x18, 0x7007); 927 930 reg = phy_read(phy_data->phydev, 0x18); ··· 1956 1953 xgbe_phy_put_comm_ownership(pdata); 1957 1954 } 1958 1955 1956 + static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) 1957 + { 1958 + int reg; 1959 + 1960 + reg = XMDIO_READ_BITS(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT, 1961 + XGBE_PCS_PSEQ_STATE_MASK); 1962 + if (reg == XGBE_PCS_PSEQ_STATE_POWER_GOOD) { 1963 + /* Mailbox command timed out, reset of RX block is required. 1964 + * This can be done by asseting the reset bit and wait for 1965 + * its compeletion. 1966 + */ 1967 + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1, 1968 + XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_ON); 1969 + ndelay(20); 1970 + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1, 1971 + XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_OFF); 1972 + usleep_range(40, 50); 1973 + netif_err(pdata, link, pdata->netdev, "firmware mailbox reset performed\n"); 1974 + } 1975 + } 1976 + 1959 1977 static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, 1960 1978 unsigned int cmd, unsigned int sub_cmd) 1961 1979 { ··· 1984 1960 unsigned int wait; 1985 1961 1986 1962 /* Log if a previous command did not complete */ 1987 - if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) 1963 + if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) { 1988 1964 netif_dbg(pdata, link, pdata->netdev, 1989 1965 "firmware mailbox not ready for command\n"); 1966 + xgbe_phy_rx_reset(pdata); 1967 + } 1990 1968 1991 1969 /* Construct the command */ 1992 1970 XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd); ··· 2010 1984 2011 1985 netif_dbg(pdata, link, pdata->netdev, 2012 1986 "firmware mailbox command did not complete\n"); 1987 + 1988 + /* Reset on error */ 1989 + xgbe_phy_rx_reset(pdata); 2013 1990 } 2014 1991 2015 1992 static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) ··· 2612 2583 reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); 2613 2584 if (reg & MDIO_STAT1_LSTATUS) 2614 2585 return 1; 2586 + 2587 + if (pdata->phy.autoneg == AUTONEG_ENABLE && 2588 + phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) { 2589 + if (!test_bit(XGBE_LINK_INIT, &pdata->dev_state)) { 2590 + netif_carrier_off(pdata->netdev); 2591 + *an_restart = 1; 2592 + } 2593 + } 2615 2594 2616 2595 /* No link, attempt a receiver reset cycle */ 2617 2596 if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) {

+3 -1

drivers/net/ethernet/aquantia/atlantic/aq_main.c

··· 71 71 goto err_exit; 72 72 73 73 err = aq_nic_start(aq_nic); 74 - if (err < 0) 74 + if (err < 0) { 75 + aq_nic_stop(aq_nic); 75 76 goto err_exit; 77 + } 76 78 77 79 err_exit: 78 80 if (err < 0)

+2 -1

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 8984 8984 txr->dev_state = BNXT_DEV_STATE_CLOSING; 8985 8985 } 8986 8986 } 8987 + /* Drop carrier first to prevent TX timeout */ 8988 + netif_carrier_off(bp->dev); 8987 8989 /* Stop all TX queues */ 8988 8990 netif_tx_disable(bp->dev); 8989 - netif_carrier_off(bp->dev); 8990 8991 } 8991 8992 8992 8993 void bnxt_tx_enable(struct bnxt *bp)

+2 -2

drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c

··· 471 471 if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) { 472 472 u32 ver = nvm_cfg_ver.vu32; 473 473 474 - sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF, 475 - ver & 0xF); 474 + sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xf, (ver >> 8) & 0xf, 475 + ver & 0xf); 476 476 rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED, 477 477 DEVLINK_INFO_VERSION_GENERIC_FW_PSID, 478 478 buf);

+3

drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h

··· 46 46 #define MAX_ULD_QSETS 16 47 47 #define MAX_ULD_NPORTS 4 48 48 49 + /* ulp_mem_io + ulptx_idata + payload + padding */ 50 + #define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8) 51 + 49 52 /* CPL message priority levels */ 50 53 enum { 51 54 CPL_PRIORITY_DATA = 0, /* data messages */

+8 -3

drivers/net/ethernet/chelsio/cxgb4/sge.c

··· 2846 2846 * @skb: the packet 2847 2847 * 2848 2848 * Returns true if a packet can be sent as an offload WR with immediate 2849 - * data. We currently use the same limit as for Ethernet packets. 2849 + * data. 2850 + * FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field. 2851 + * However, FW_ULPTX_WR commands have a 256 byte immediate only 2852 + * payload limit. 2850 2853 */ 2851 2854 static inline int is_ofld_imm(const struct sk_buff *skb) 2852 2855 { 2853 2856 struct work_request_hdr *req = (struct work_request_hdr *)skb->data; 2854 2857 unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi)); 2855 2858 2856 - if (opcode == FW_CRYPTO_LOOKASIDE_WR) 2859 + if (unlikely(opcode == FW_ULPTX_WR)) 2860 + return skb->len <= MAX_IMM_ULPTX_WR_LEN; 2861 + else if (opcode == FW_CRYPTO_LOOKASIDE_WR) 2857 2862 return skb->len <= SGE_MAX_WR_LEN; 2858 2863 else 2859 - return skb->len <= MAX_IMM_TX_PKT_LEN; 2864 + return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN; 2860 2865 } 2861 2866 2862 2867 /**

-3

drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h

··· 50 50 #define MIN_RCV_WND (24 * 1024U) 51 51 #define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000)) 52 52 53 - /* ulp_mem_io + ulptx_idata + payload + padding */ 54 - #define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8) 55 - 56 53 /* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */ 57 54 #define TX_HEADER_LEN \ 58 55 (sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr))

+12 -2

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c

··· 395 395 xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE; 396 396 397 397 err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog); 398 - if (unlikely(err)) 398 + if (unlikely(err)) { 399 + addr = dma_map_page(priv->net_dev->dev.parent, 400 + virt_to_page(vaddr), 0, 401 + priv->rx_buf_size, DMA_BIDIRECTIONAL); 402 + if (unlikely(dma_mapping_error(priv->net_dev->dev.parent, addr))) { 403 + free_pages((unsigned long)vaddr, 0); 404 + } else { 405 + ch->buf_count++; 406 + dpaa2_eth_xdp_release_buf(priv, ch, addr); 407 + } 399 408 ch->stats.xdp_drop++; 400 - else 409 + } else { 401 410 ch->stats.xdp_redirect++; 411 + } 402 412 break; 403 413 } 404 414

+1 -1

drivers/net/ethernet/freescale/enetc/Kconfig

··· 27 27 28 28 config FSL_ENETC_MDIO 29 29 tristate "ENETC MDIO driver" 30 - depends on PCI 30 + depends on PCI && MDIO_DEVRES && MDIO_BUS 31 31 help 32 32 This driver supports NXP ENETC Central MDIO controller as a PCIe 33 33 physical function (PF) device.

+3 -2

drivers/net/ethernet/freescale/enetc/enetc_pf.c

··· 1157 1157 struct enetc_ndev_priv *priv; 1158 1158 1159 1159 priv = netdev_priv(si->ndev); 1160 - enetc_phylink_destroy(priv); 1161 - enetc_mdiobus_destroy(pf); 1162 1160 1163 1161 if (pf->num_vfs) 1164 1162 enetc_sriov_configure(pdev, 0); 1165 1163 1166 1164 unregister_netdev(si->ndev); 1165 + 1166 + enetc_phylink_destroy(priv); 1167 + enetc_mdiobus_destroy(pf); 1167 1168 1168 1169 enetc_free_msix(priv); 1169 1170

+32 -12

drivers/net/ethernet/ibm/ibmvnic.c

··· 247 247 if (!ltb->buff) 248 248 return; 249 249 250 + /* VIOS automatically unmaps the long term buffer at remote 251 + * end for the following resets: 252 + * FAILOVER, MOBILITY, TIMEOUT. 253 + */ 250 254 if (adapter->reset_reason != VNIC_RESET_FAILOVER && 251 - adapter->reset_reason != VNIC_RESET_MOBILITY) 255 + adapter->reset_reason != VNIC_RESET_MOBILITY && 256 + adapter->reset_reason != VNIC_RESET_TIMEOUT) 252 257 send_request_unmap(adapter, ltb->map_id); 253 258 dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); 254 259 } ··· 1327 1322 1328 1323 adapter->state = VNIC_CLOSING; 1329 1324 rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); 1330 - if (rc) 1331 - return rc; 1332 1325 adapter->state = VNIC_CLOSED; 1333 - return 0; 1326 + return rc; 1334 1327 } 1335 1328 1336 1329 static int ibmvnic_close(struct net_device *netdev) ··· 1672 1669 } else { 1673 1670 skb_copy_from_linear_data(skb, dst, skb->len); 1674 1671 } 1672 + 1673 + /* post changes to long_term_buff *dst before VIOS accessing it */ 1674 + dma_wmb(); 1675 1675 1676 1676 tx_pool->consumer_index = 1677 1677 (tx_pool->consumer_index + 1) % tx_pool->num_buffers; ··· 2294 2288 unsigned long flags; 2295 2289 int ret; 2296 2290 2297 - /* If failover is pending don't schedule any other reset. 2291 + /* 2292 + * If failover is pending don't schedule any other reset. 2298 2293 * Instead let the failover complete. If there is already a 2299 2294 * a failover reset scheduled, we will detect and drop the 2300 2295 * duplicate reset when walking the ->rwi_list below. ··· 2315 2308 goto err; 2316 2309 } 2317 2310 2318 - spin_lock_irqsave(&adapter->rwi_lock, flags); 2319 - 2320 2311 list_for_each(entry, &adapter->rwi_list) { 2321 2312 tmp = list_entry(entry, struct ibmvnic_rwi, list); 2322 2313 if (tmp->reset_reason == reason) { 2323 2314 netdev_dbg(netdev, "Skipping matching reset, reason=%d\n", 2324 2315 reason); 2325 - spin_unlock_irqrestore(&adapter->rwi_lock, flags); 2326 2316 ret = EBUSY; 2327 2317 goto err; 2328 2318 } ··· 2327 2323 2328 2324 rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); 2329 2325 if (!rwi) { 2330 - spin_unlock_irqrestore(&adapter->rwi_lock, flags); 2331 - ibmvnic_close(netdev); 2332 2326 ret = ENOMEM; 2333 2327 goto err; 2334 2328 } ··· 2339 2337 } 2340 2338 rwi->reset_reason = reason; 2341 2339 list_add_tail(&rwi->list, &adapter->rwi_list); 2342 - spin_unlock_irqrestore(&adapter->rwi_lock, flags); 2343 2340 netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); 2344 2341 schedule_work(&adapter->ibmvnic_reset); 2345 2342 2346 - return 0; 2343 + ret = 0; 2347 2344 err: 2345 + /* ibmvnic_close() below can block, so drop the lock first */ 2346 + spin_unlock_irqrestore(&adapter->rwi_lock, flags); 2347 + 2348 + if (ret == ENOMEM) 2349 + ibmvnic_close(netdev); 2350 + 2348 2351 return -ret; 2349 2352 } 2350 2353 ··· 2440 2433 offset = be16_to_cpu(next->rx_comp.off_frame_data); 2441 2434 flags = next->rx_comp.flags; 2442 2435 skb = rx_buff->skb; 2436 + /* load long_term_buff before copying to skb */ 2437 + dma_rmb(); 2443 2438 skb_copy_to_linear_data(skb, rx_buff->data + offset, 2444 2439 length); 2445 2440 ··· 5355 5346 unsigned long flags; 5356 5347 5357 5348 spin_lock_irqsave(&adapter->state_lock, flags); 5349 + 5350 + /* If ibmvnic_reset() is scheduling a reset, wait for it to 5351 + * finish. Then, set the state to REMOVING to prevent it from 5352 + * scheduling any more work and to have reset functions ignore 5353 + * any resets that have already been scheduled. Drop the lock 5354 + * after setting state, so __ibmvnic_reset() which is called 5355 + * from the flush_work() below, can make progress. 5356 + */ 5357 + spin_lock_irqsave(&adapter->rwi_lock, flags); 5358 5358 adapter->state = VNIC_REMOVING; 5359 + spin_unlock_irqrestore(&adapter->rwi_lock, flags); 5360 + 5359 5361 spin_unlock_irqrestore(&adapter->state_lock, flags); 5360 5362 5361 5363 flush_work(&adapter->ibmvnic_reset);

+3 -1

drivers/net/ethernet/ibm/ibmvnic.h

··· 31 31 #define IBMVNIC_BUFFS_PER_POOL 100 32 32 #define IBMVNIC_MAX_QUEUES 16 33 33 #define IBMVNIC_MAX_QUEUE_SZ 4096 34 - #define IBMVNIC_MAX_IND_DESCS 128 34 + #define IBMVNIC_MAX_IND_DESCS 16 35 35 #define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) 36 36 37 37 #define IBMVNIC_TSO_BUF_SZ 65536 ··· 1084 1084 /* Used for serializatin of state field */ 1085 1085 spinlock_t state_lock; 1086 1086 enum ibmvnic_reset_reason reset_reason; 1087 + /* when taking both state and rwi locks, take state lock first */ 1088 + spinlock_t rwi_lock; 1087 1089 struct list_head rwi_list; 1088 1090 /* Used for serialization of rwi_list */ 1089 1091 spinlock_t rwi_lock;

+9

drivers/net/ethernet/mellanox/mlx5/core/devlink.c

··· 141 141 return -EOPNOTSUPP; 142 142 } 143 143 144 + if (mlx5_lag_is_active(dev)) { 145 + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n"); 146 + return -EOPNOTSUPP; 147 + } 148 + 144 149 switch (action) { 145 150 case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: 146 151 mlx5_unload_one(dev, false); ··· 431 426 432 427 if (new_state && !MLX5_CAP_GEN(dev, roce)) { 433 428 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); 429 + return -EOPNOTSUPP; 430 + } 431 + if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) { 432 + NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE"); 434 433 return -EOPNOTSUPP; 435 434 } 436 435

+192 -67

drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c

··· 12 12 #include <net/flow_offload.h> 13 13 #include <net/netfilter/nf_flow_table.h> 14 14 #include <linux/workqueue.h> 15 + #include <linux/refcount.h> 15 16 #include <linux/xarray.h> 16 17 17 18 #include "lib/fs_chains.h" ··· 53 52 struct mlx5_flow_table *ct_nat; 54 53 struct mlx5_flow_table *post_ct; 55 54 struct mutex control_lock; /* guards parallel adds/dels */ 56 - struct mutex shared_counter_lock; 57 55 struct mapping_ctx *zone_mapping; 58 56 struct mapping_ctx *labels_mapping; 59 57 enum mlx5_flow_namespace_type ns_type; 60 58 struct mlx5_fs_chains *chains; 59 + spinlock_t ht_lock; /* protects ft entries */ 61 60 }; 62 61 63 62 struct mlx5_ct_flow { ··· 126 125 bool is_shared; 127 126 }; 128 127 128 + enum { 129 + MLX5_CT_ENTRY_FLAG_VALID, 130 + }; 131 + 129 132 struct mlx5_ct_entry { 130 133 struct rhash_head node; 131 134 struct rhash_head tuple_node; ··· 140 135 struct mlx5_ct_tuple tuple; 141 136 struct mlx5_ct_tuple tuple_nat; 142 137 struct mlx5_ct_zone_rule zone_rules[2]; 138 + 139 + struct mlx5_tc_ct_priv *ct_priv; 140 + struct work_struct work; 141 + 142 + refcount_t refcnt; 143 + unsigned long flags; 143 144 }; 144 145 145 146 static const struct rhashtable_params cts_ht_params = { ··· 753 742 return err; 754 743 } 755 744 745 + static bool 746 + mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) 747 + { 748 + return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 749 + } 750 + 751 + static struct mlx5_ct_entry * 752 + mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) 753 + { 754 + struct mlx5_ct_entry *entry; 755 + 756 + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, 757 + tuples_ht_params); 758 + if (entry && mlx5_tc_ct_entry_valid(entry) && 759 + refcount_inc_not_zero(&entry->refcnt)) { 760 + return entry; 761 + } else if (!entry) { 762 + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 763 + tuple, tuples_nat_ht_params); 764 + if (entry && mlx5_tc_ct_entry_valid(entry) && 765 + refcount_inc_not_zero(&entry->refcnt)) 766 + return entry; 767 + } 768 + 769 + return entry ? ERR_PTR(-EINVAL) : NULL; 770 + } 771 + 772 + static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) 773 + { 774 + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 775 + 776 + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 777 + &entry->tuple_nat_node, 778 + tuples_nat_ht_params); 779 + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 780 + tuples_ht_params); 781 + } 782 + 783 + static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) 784 + { 785 + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 786 + 787 + mlx5_tc_ct_entry_del_rules(ct_priv, entry); 788 + 789 + spin_lock_bh(&ct_priv->ht_lock); 790 + mlx5_tc_ct_entry_remove_from_tuples(entry); 791 + spin_unlock_bh(&ct_priv->ht_lock); 792 + 793 + mlx5_tc_ct_counter_put(ct_priv, entry); 794 + kfree(entry); 795 + } 796 + 797 + static void 798 + mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 799 + { 800 + if (!refcount_dec_and_test(&entry->refcnt)) 801 + return; 802 + 803 + mlx5_tc_ct_entry_del(entry); 804 + } 805 + 806 + static void mlx5_tc_ct_entry_del_work(struct work_struct *work) 807 + { 808 + struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); 809 + 810 + mlx5_tc_ct_entry_del(entry); 811 + } 812 + 813 + static void 814 + __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 815 + { 816 + struct mlx5e_priv *priv; 817 + 818 + if (!refcount_dec_and_test(&entry->refcnt)) 819 + return; 820 + 821 + priv = netdev_priv(entry->ct_priv->netdev); 822 + INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); 823 + queue_work(priv->wq, &entry->work); 824 + } 825 + 756 826 static struct mlx5_ct_counter * 757 827 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) 758 828 { ··· 885 793 } 886 794 887 795 /* Use the same counter as the reverse direction */ 888 - mutex_lock(&ct_priv->shared_counter_lock); 889 - rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, 890 - tuples_ht_params); 891 - if (rev_entry) { 892 - if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { 893 - mutex_unlock(&ct_priv->shared_counter_lock); 894 - return rev_entry->counter; 895 - } 796 + spin_lock_bh(&ct_priv->ht_lock); 797 + rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); 798 + 799 + if (IS_ERR(rev_entry)) { 800 + spin_unlock_bh(&ct_priv->ht_lock); 801 + goto create_counter; 896 802 } 897 - mutex_unlock(&ct_priv->shared_counter_lock); 803 + 804 + if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { 805 + ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry); 806 + shared_counter = rev_entry->counter; 807 + spin_unlock_bh(&ct_priv->ht_lock); 808 + 809 + mlx5_tc_ct_entry_put(rev_entry); 810 + return shared_counter; 811 + } 812 + 813 + spin_unlock_bh(&ct_priv->ht_lock); 814 + 815 + create_counter: 898 816 899 817 shared_counter = mlx5_tc_ct_counter_create(ct_priv); 900 818 if (IS_ERR(shared_counter)) ··· 967 865 if (!meta_action) 968 866 return -EOPNOTSUPP; 969 867 970 - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 971 - cts_ht_params); 972 - if (entry) 973 - return 0; 868 + spin_lock_bh(&ct_priv->ht_lock); 869 + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 870 + if (entry && refcount_inc_not_zero(&entry->refcnt)) { 871 + spin_unlock_bh(&ct_priv->ht_lock); 872 + mlx5_tc_ct_entry_put(entry); 873 + return -EEXIST; 874 + } 875 + spin_unlock_bh(&ct_priv->ht_lock); 974 876 975 877 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 976 878 if (!entry) ··· 983 877 entry->tuple.zone = ft->zone; 984 878 entry->cookie = flow->cookie; 985 879 entry->restore_cookie = meta_action->ct_metadata.cookie; 880 + refcount_set(&entry->refcnt, 2); 881 + entry->ct_priv = ct_priv; 986 882 987 883 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 988 884 if (err) ··· 995 887 if (err) 996 888 goto err_set; 997 889 998 - err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht, 999 - &entry->tuple_node, 1000 - tuples_ht_params); 890 + spin_lock_bh(&ct_priv->ht_lock); 891 + 892 + err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, 893 + cts_ht_params); 894 + if (err) 895 + goto err_entries; 896 + 897 + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, 898 + &entry->tuple_node, 899 + tuples_ht_params); 1001 900 if (err) 1002 901 goto err_tuple; 1003 902 1004 903 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 1005 - err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht, 1006 - &entry->tuple_nat_node, 1007 - tuples_nat_ht_params); 904 + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, 905 + &entry->tuple_nat_node, 906 + tuples_nat_ht_params); 1008 907 if (err) 1009 908 goto err_tuple_nat; 1010 909 } 910 + spin_unlock_bh(&ct_priv->ht_lock); 1011 911 1012 912 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 1013 913 ft->zone_restore_id); 1014 914 if (err) 1015 915 goto err_rules; 1016 916 1017 - err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, 1018 - cts_ht_params); 1019 - if (err) 1020 - goto err_insert; 917 + set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 918 + mlx5_tc_ct_entry_put(entry); /* this function reference */ 1021 919 1022 920 return 0; 1023 921 1024 - err_insert: 1025 - mlx5_tc_ct_entry_del_rules(ct_priv, entry); 1026 922 err_rules: 923 + spin_lock_bh(&ct_priv->ht_lock); 1027 924 if (mlx5_tc_ct_entry_has_nat(entry)) 1028 925 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 1029 926 &entry->tuple_nat_node, tuples_nat_ht_params); ··· 1037 924 &entry->tuple_node, 1038 925 tuples_ht_params); 1039 926 err_tuple: 927 + rhashtable_remove_fast(&ft->ct_entries_ht, 928 + &entry->node, 929 + cts_ht_params); 930 + err_entries: 931 + spin_unlock_bh(&ct_priv->ht_lock); 1040 932 err_set: 1041 933 kfree(entry); 1042 - netdev_warn(ct_priv->netdev, 1043 - "Failed to offload ct entry, err: %d\n", err); 934 + if (err != -EEXIST) 935 + netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); 1044 936 return err; 1045 - } 1046 - 1047 - static void 1048 - mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, 1049 - struct mlx5_ct_entry *entry) 1050 - { 1051 - mlx5_tc_ct_entry_del_rules(ct_priv, entry); 1052 - mutex_lock(&ct_priv->shared_counter_lock); 1053 - if (mlx5_tc_ct_entry_has_nat(entry)) 1054 - rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 1055 - &entry->tuple_nat_node, 1056 - tuples_nat_ht_params); 1057 - rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 1058 - tuples_ht_params); 1059 - mutex_unlock(&ct_priv->shared_counter_lock); 1060 - mlx5_tc_ct_counter_put(ct_priv, entry); 1061 - 1062 937 } 1063 938 1064 939 static int 1065 940 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 1066 941 struct flow_cls_offload *flow) 1067 942 { 943 + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1068 944 unsigned long cookie = flow->cookie; 1069 945 struct mlx5_ct_entry *entry; 1070 946 1071 - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 1072 - cts_ht_params); 1073 - if (!entry) 947 + spin_lock_bh(&ct_priv->ht_lock); 948 + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 949 + if (!entry) { 950 + spin_unlock_bh(&ct_priv->ht_lock); 1074 951 return -ENOENT; 952 + } 1075 953 1076 - mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry); 1077 - WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, 1078 - &entry->node, 1079 - cts_ht_params)); 1080 - kfree(entry); 954 + if (!mlx5_tc_ct_entry_valid(entry)) { 955 + spin_unlock_bh(&ct_priv->ht_lock); 956 + return -EINVAL; 957 + } 958 + 959 + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 960 + mlx5_tc_ct_entry_remove_from_tuples(entry); 961 + spin_unlock_bh(&ct_priv->ht_lock); 962 + 963 + mlx5_tc_ct_entry_put(entry); 1081 964 1082 965 return 0; 1083 966 } ··· 1082 973 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 1083 974 struct flow_cls_offload *f) 1084 975 { 976 + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1085 977 unsigned long cookie = f->cookie; 1086 978 struct mlx5_ct_entry *entry; 1087 979 u64 lastuse, packets, bytes; 1088 980 1089 - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 1090 - cts_ht_params); 1091 - if (!entry) 981 + spin_lock_bh(&ct_priv->ht_lock); 982 + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 983 + if (!entry) { 984 + spin_unlock_bh(&ct_priv->ht_lock); 1092 985 return -ENOENT; 986 + } 987 + 988 + if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { 989 + spin_unlock_bh(&ct_priv->ht_lock); 990 + return -EINVAL; 991 + } 992 + 993 + spin_unlock_bh(&ct_priv->ht_lock); 1093 994 1094 995 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); 1095 996 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 1096 997 FLOW_ACTION_HW_STATS_DELAYED); 1097 998 999 + mlx5_tc_ct_entry_put(entry); 1098 1000 return 0; 1099 1001 } 1100 1002 ··· 1601 1481 static void 1602 1482 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1603 1483 { 1604 - struct mlx5_tc_ct_priv *ct_priv = arg; 1605 1484 struct mlx5_ct_entry *entry = ptr; 1606 1485 1607 - mlx5_tc_ct_del_ft_entry(ct_priv, entry); 1608 - kfree(entry); 1486 + mlx5_tc_ct_entry_put(entry); 1609 1487 } 1610 1488 1611 1489 static void ··· 2080 1962 goto err_mapping_labels; 2081 1963 } 2082 1964 1965 + spin_lock_init(&ct_priv->ht_lock); 2083 1966 ct_priv->ns_type = ns_type; 2084 1967 ct_priv->chains = chains; 2085 1968 ct_priv->netdev = priv->netdev; ··· 2115 1996 2116 1997 idr_init(&ct_priv->fte_ids); 2117 1998 mutex_init(&ct_priv->control_lock); 2118 - mutex_init(&ct_priv->shared_counter_lock); 2119 1999 rhashtable_init(&ct_priv->zone_ht, &zone_params); 2120 2000 rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); 2121 2001 rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); ··· 2157 2039 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2158 2040 rhashtable_destroy(&ct_priv->zone_ht); 2159 2041 mutex_destroy(&ct_priv->control_lock); 2160 - mutex_destroy(&ct_priv->shared_counter_lock); 2161 2042 idr_destroy(&ct_priv->fte_ids); 2162 2043 kfree(ct_priv); 2163 2044 } ··· 2178 2061 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 2179 2062 return false; 2180 2063 2181 - entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, 2182 - tuples_ht_params); 2183 - if (!entry) 2184 - entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 2185 - &tuple, tuples_nat_ht_params); 2186 - if (!entry) 2064 + spin_lock(&ct_priv->ht_lock); 2065 + 2066 + entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); 2067 + if (!entry) { 2068 + spin_unlock(&ct_priv->ht_lock); 2187 2069 return false; 2070 + } 2071 + 2072 + if (IS_ERR(entry)) { 2073 + spin_unlock(&ct_priv->ht_lock); 2074 + return false; 2075 + } 2076 + spin_unlock(&ct_priv->ht_lock); 2188 2077 2189 2078 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 2079 + __mlx5_tc_ct_entry_put(entry); 2080 + 2190 2081 return true; 2191 2082 }

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

··· 83 83 84 84 clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); 85 85 /* Let other device's napi(s) and XSK wakeups see our new state. */ 86 - synchronize_rcu(); 86 + synchronize_net(); 87 87 } 88 88 89 89 static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c

··· 111 111 void mlx5e_close_xsk(struct mlx5e_channel *c) 112 112 { 113 113 clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); 114 - synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */ 114 + synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */ 115 115 116 116 mlx5e_close_rq(&c->xskrq); 117 117 mlx5e_close_cq(&c->xskrq.cq);

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h

··· 173 173 #endif 174 174 175 175 #if IS_ENABLED(CONFIG_GENEVE) 176 - if (skb->encapsulation) 176 + if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) 177 177 mlx5e_tx_tunnel_accel(skb, eseg, ihs); 178 178 #endif 179 179

+31 -35

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c

··· 57 57 struct mlx5e_ktls_rx_resync_ctx resync; 58 58 }; 59 59 60 + static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) 61 + { 62 + if (!refcount_dec_and_test(&priv_rx->resync.refcnt)) 63 + return false; 64 + 65 + kfree(priv_rx); 66 + return true; 67 + } 68 + 69 + static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx) 70 + { 71 + refcount_inc(&priv_rx->resync.refcnt); 72 + } 73 + 60 74 static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) 61 75 { 62 76 int err, inlen; ··· 340 326 priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); 341 327 342 328 if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { 343 - refcount_dec(&resync->refcnt); 329 + mlx5e_ktls_priv_rx_put(priv_rx); 344 330 return; 345 331 } 346 332 ··· 348 334 sq = &c->async_icosq; 349 335 350 336 if (resync_post_get_progress_params(sq, priv_rx)) 351 - refcount_dec(&resync->refcnt); 337 + mlx5e_ktls_priv_rx_put(priv_rx); 352 338 } 353 339 354 340 static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, ··· 391 377 return err; 392 378 } 393 379 394 - /* Function is called with elevated refcount, it decreases it. */ 380 + /* Function can be called with the refcount being either elevated or not. 381 + * It decreases the refcount and may free the kTLS priv context. 382 + * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was 383 + * already in flight. 384 + */ 395 385 void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, 396 386 struct mlx5e_icosq *sq) 397 387 { ··· 428 410 tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); 429 411 priv_rx->stats->tls_resync_req_end++; 430 412 out: 431 - refcount_dec(&resync->refcnt); 413 + mlx5e_ktls_priv_rx_put(priv_rx); 432 414 dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); 433 415 kfree(buf); 434 416 } ··· 449 431 return false; 450 432 451 433 resync = &priv_rx->resync; 452 - refcount_inc(&resync->refcnt); 434 + mlx5e_ktls_priv_rx_get(priv_rx); 453 435 if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) 454 - refcount_dec(&resync->refcnt); 436 + mlx5e_ktls_priv_rx_put(priv_rx); 455 437 456 438 return true; 457 439 } ··· 643 625 return err; 644 626 } 645 627 646 - /* Elevated refcount on the resync object means there are 647 - * outstanding operations (uncompleted GET_PSV WQEs) that 648 - * will read the resync / priv_rx objects once completed. 649 - * Wait for them to avoid use-after-free. 650 - */ 651 - static void wait_for_resync(struct net_device *netdev, 652 - struct mlx5e_ktls_rx_resync_ctx *resync) 653 - { 654 - #define MLX5E_KTLS_RX_RESYNC_TIMEOUT 20000 /* msecs */ 655 - unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5E_KTLS_RX_RESYNC_TIMEOUT); 656 - unsigned int refcnt; 657 - 658 - do { 659 - refcnt = refcount_read(&resync->refcnt); 660 - if (refcnt == 1) 661 - return; 662 - 663 - msleep(20); 664 - } while (time_before(jiffies, exp_time)); 665 - 666 - netdev_warn(netdev, 667 - "Failed waiting for kTLS RX resync refcnt to be released (%u).\n", 668 - refcnt); 669 - } 670 - 671 628 void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) 672 629 { 673 630 struct mlx5e_ktls_offload_context_rx *priv_rx; ··· 656 663 priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); 657 664 set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); 658 665 mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); 659 - synchronize_rcu(); /* Sync with NAPI */ 666 + synchronize_net(); /* Sync with NAPI */ 660 667 if (!cancel_work_sync(&priv_rx->rule.work)) 661 668 /* completion is needed, as the priv_rx in the add flow 662 669 * is maintained on the wqe info (wi), not on the socket. ··· 664 671 wait_for_completion(&priv_rx->add_ctx); 665 672 resync = &priv_rx->resync; 666 673 if (cancel_work_sync(&resync->work)) 667 - refcount_dec(&resync->refcnt); 668 - wait_for_resync(netdev, resync); 674 + mlx5e_ktls_priv_rx_put(priv_rx); 669 675 670 676 priv_rx->stats->tls_del++; 671 677 if (priv_rx->rule.rule) ··· 672 680 673 681 mlx5_core_destroy_tir(mdev, priv_rx->tirn); 674 682 mlx5_ktls_destroy_key(mdev, priv_rx->key_id); 675 - kfree(priv_rx); 683 + /* priv_rx should normally be freed here, but if there is an outstanding 684 + * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is 685 + * processed. 686 + */ 687 + mlx5e_ktls_priv_rx_put(priv_rx); 676 688 }

+26 -13

drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

··· 536 536 #define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT 537 537 538 538 static void 539 - mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) 539 + mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) 540 540 { 541 541 struct mlx5_core_dev *mdev = priv->mdev; 542 542 int tc; ··· 551 551 coal->tx_coalesce_usecs, 552 552 coal->tx_max_coalesced_frames); 553 553 } 554 + } 555 + } 556 + 557 + static void 558 + mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) 559 + { 560 + struct mlx5_core_dev *mdev = priv->mdev; 561 + int i; 562 + 563 + for (i = 0; i < priv->channels.num; ++i) { 564 + struct mlx5e_channel *c = priv->channels.c[i]; 554 565 555 566 mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, 556 567 coal->rx_coalesce_usecs, ··· 608 597 tx_moder->pkts = coal->tx_max_coalesced_frames; 609 598 new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; 610 599 611 - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 612 - priv->channels.params = new_channels.params; 613 - goto out; 614 - } 615 - /* we are opened */ 616 - 617 600 reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; 618 601 reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; 619 - 620 - if (!reset_rx && !reset_tx) { 621 - mlx5e_set_priv_channels_coalesce(priv, coal); 622 - priv->channels.params = new_channels.params; 623 - goto out; 624 - } 625 602 626 603 if (reset_rx) { 627 604 u8 mode = MLX5E_GET_PFLAG(&new_channels.params, ··· 622 623 MLX5E_PFLAG_TX_CQE_BASED_MODER); 623 624 624 625 mlx5e_reset_tx_moderation(&new_channels.params, mode); 626 + } 627 + 628 + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 629 + priv->channels.params = new_channels.params; 630 + goto out; 631 + } 632 + 633 + if (!reset_rx && !reset_tx) { 634 + if (!coal->use_adaptive_rx_coalesce) 635 + mlx5e_set_priv_channels_rx_coalesce(priv, coal); 636 + if (!coal->use_adaptive_tx_coalesce) 637 + mlx5e_set_priv_channels_tx_coalesce(priv, coal); 638 + priv->channels.params = new_channels.params; 639 + goto out; 625 640 } 626 641 627 642 err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);

+16 -9

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 67 67 #include "en/ptp.h" 68 68 #include "qos.h" 69 69 #include "en/trap.h" 70 + #include "fpga/ipsec.h" 70 71 71 72 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) 72 73 { ··· 109 108 if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) 110 109 return false; 111 110 112 - if (MLX5_IPSEC_DEV(mdev)) 111 + if (mlx5_fpga_is_ipsec_device(mdev)) 113 112 return false; 114 113 115 114 if (params->xdp_prog) { ··· 948 947 void mlx5e_deactivate_rq(struct mlx5e_rq *rq) 949 948 { 950 949 clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); 951 - synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ 950 + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ 952 951 } 953 952 954 953 void mlx5e_close_rq(struct mlx5e_rq *rq) ··· 1402 1401 struct mlx5_wq_cyc *wq = &sq->wq; 1403 1402 1404 1403 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 1405 - synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ 1404 + synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ 1406 1405 1407 1406 mlx5e_tx_disable_queue(sq->txq); 1408 1407 ··· 1477 1476 void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) 1478 1477 { 1479 1478 clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); 1480 - synchronize_rcu(); /* Sync with NAPI. */ 1479 + synchronize_net(); /* Sync with NAPI. */ 1481 1480 } 1482 1481 1483 1482 void mlx5e_close_icosq(struct mlx5e_icosq *sq) ··· 1556 1555 struct mlx5e_channel *c = sq->channel; 1557 1556 1558 1557 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 1559 - synchronize_rcu(); /* Sync with NAPI. */ 1558 + synchronize_net(); /* Sync with NAPI. */ 1560 1559 1561 1560 mlx5e_destroy_sq(c->mdev, sq->sqn); 1562 1561 mlx5e_free_xdpsq_descs(sq); ··· 1880 1879 1881 1880 mlx5e_build_create_cq_param(&ccp, c); 1882 1881 1883 - err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, 1882 + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, 1884 1883 &c->async_icosq.cq); 1885 1884 if (err) 1886 1885 return err; 1887 1886 1888 - err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, 1887 + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, 1889 1888 &c->icosq.cq); 1890 1889 if (err) 1891 1890 goto err_close_async_icosq_cq; ··· 2123 2122 u32 buf_size = 0; 2124 2123 int i; 2125 2124 2125 + <<<<<<< HEAD 2126 2126 if (MLX5_IPSEC_DEV(mdev)) 2127 + ======= 2128 + #ifdef CONFIG_MLX5_EN_IPSEC 2129 + if (mlx5_fpga_is_ipsec_device(mdev)) 2130 + >>>>>>> 3af409ca278d4a8d50e91f9f7c4c33b175645cf3 2127 2131 byte_count += MLX5E_METADATA_ETHER_LEN; 2128 2132 2129 2133 if (mlx5e_rx_is_linear_skb(params, xsk)) { ··· 4596 4590 return -EINVAL; 4597 4591 } 4598 4592 4599 - if (MLX5_IPSEC_DEV(priv->mdev)) { 4600 - netdev_warn(netdev, "can't set XDP with IPSec offload\n"); 4593 + if (mlx5_fpga_is_ipsec_device(priv->mdev)) { 4594 + netdev_warn(netdev, 4595 + "XDP is not available on Innova cards with IPsec support\n"); 4601 4596 return -EINVAL; 4602 4597 } 4603 4598

+2 -2

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

··· 1783 1783 rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; 1784 1784 1785 1785 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; 1786 - if (MLX5_IPSEC_DEV(mdev)) { 1787 - netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n"); 1786 + if (mlx5_fpga_is_ipsec_device(mdev)) { 1787 + netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); 1788 1788 return -EINVAL; 1789 1789 } 1790 1790 if (!rq->handle_rx_cqe) {

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

··· 4445 4445 */ 4446 4446 if (rate) { 4447 4447 rate = (rate * BITS_PER_BYTE) + 500000; 4448 - rate_mbps = max_t(u32, do_div(rate, 1000000), 1); 4448 + rate_mbps = max_t(u64, do_div(rate, 1000000), 1); 4449 4449 } 4450 4450 4451 4451 err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c

··· 124 124 struct ida halloc; 125 125 }; 126 126 127 - static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) 127 + bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) 128 128 { 129 129 if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) 130 130 return false;

+2

drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h

··· 43 43 const struct mlx5_flow_cmds * 44 44 mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); 45 45 void mlx5_fpga_ipsec_build_fs_cmds(void); 46 + bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev); 46 47 #else 47 48 static inline 48 49 const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) ··· 56 55 } 57 56 58 57 static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {}; 58 + static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; } 59 59 60 60 #endif /* CONFIG_MLX5_FPGA_IPSEC */ 61 61 #endif /* __MLX5_FPGA_IPSEC_H__ */

+14 -8

drivers/net/ethernet/mellanox/mlx5/core/health.c

··· 190 190 return true; 191 191 } 192 192 193 + static void enter_error_state(struct mlx5_core_dev *dev, bool force) 194 + { 195 + if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ 196 + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 197 + mlx5_cmd_flush(dev); 198 + } 199 + 200 + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); 201 + } 202 + 193 203 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) 194 204 { 195 205 bool err_detected = false; ··· 218 208 goto unlock; 219 209 } 220 210 221 - if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ 222 - dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 223 - mlx5_cmd_flush(dev); 224 - } 225 - 226 - mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); 211 + enter_error_state(dev, force); 227 212 unlock: 228 213 mutex_unlock(&dev->intf_state_mutex); 229 214 } ··· 618 613 priv = container_of(health, struct mlx5_priv, health); 619 614 dev = container_of(priv, struct mlx5_core_dev, priv); 620 615 621 - mlx5_enter_error_state(dev, false); 616 + enter_error_state(dev, false); 622 617 if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { 623 618 if (mlx5_health_try_recover(dev)) 624 619 mlx5_core_err(dev, "health recovery failed\n"); ··· 712 707 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); 713 708 dev->priv.health.fatal_error = fatal_error; 714 709 print_health_info(dev); 710 + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 715 711 mlx5_trigger_health_work(dev); 716 - goto out; 712 + return; 717 713 } 718 714 719 715 count = ioread32be(health->health_counter);

+2 -1

drivers/net/ethernet/mellanox/mlx5/core/main.c

··· 1445 1445 dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); 1446 1446 1447 1447 pci_save_state(pdev); 1448 - devlink_reload_enable(devlink); 1448 + if (!mlx5_core_is_mp_slave(dev)) 1449 + devlink_reload_enable(devlink); 1449 1450 return 0; 1450 1451 1451 1452 err_load_one:

+25

drivers/net/ethernet/realtek/r8169_main.c

··· 2230 2230 phy_speed_down(tp->phydev, false); 2231 2231 rtl_wol_enable_rx(tp); 2232 2232 } 2233 + 2234 + switch (tp->mac_version) { 2235 + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: 2236 + case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30: 2237 + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: 2238 + case RTL_GIGA_MAC_VER_37: 2239 + case RTL_GIGA_MAC_VER_39: 2240 + case RTL_GIGA_MAC_VER_43: 2241 + case RTL_GIGA_MAC_VER_44: 2242 + case RTL_GIGA_MAC_VER_45: 2243 + case RTL_GIGA_MAC_VER_46: 2244 + case RTL_GIGA_MAC_VER_47: 2245 + case RTL_GIGA_MAC_VER_48: 2246 + case RTL_GIGA_MAC_VER_50 ... RTL_GIGA_MAC_VER_63: 2247 + RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); 2248 + break; 2249 + case RTL_GIGA_MAC_VER_40: 2250 + case RTL_GIGA_MAC_VER_41: 2251 + case RTL_GIGA_MAC_VER_49: 2252 + rtl_eri_clear_bits(tp, 0x1a8, 0xfc000000); 2253 + RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); 2254 + break; 2255 + default: 2256 + break; 2257 + } 2233 2258 } 2234 2259 2235 2260 static void rtl_init_rxcfg(struct rtl8169_private *tp)

+12 -14

drivers/net/ethernet/xilinx/xilinx_axienet_main.c

··· 1862 1862 lp->options = XAE_OPTION_DEFAULTS; 1863 1863 lp->rx_bd_num = RX_BD_NUM_DEFAULT; 1864 1864 lp->tx_bd_num = TX_BD_NUM_DEFAULT; 1865 + 1866 + lp->clk = devm_clk_get_optional(&pdev->dev, NULL); 1867 + if (IS_ERR(lp->clk)) { 1868 + ret = PTR_ERR(lp->clk); 1869 + goto free_netdev; 1870 + } 1871 + ret = clk_prepare_enable(lp->clk); 1872 + if (ret) { 1873 + dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); 1874 + goto free_netdev; 1875 + } 1876 + 1865 1877 /* Map device registers */ 1866 1878 ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1867 1879 lp->regs = devm_ioremap_resource(&pdev->dev, ethres); ··· 2058 2046 2059 2047 lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); 2060 2048 if (lp->phy_node) { 2061 - lp->clk = devm_clk_get(&pdev->dev, NULL); 2062 - if (IS_ERR(lp->clk)) { 2063 - dev_warn(&pdev->dev, "Failed to get clock: %ld\n", 2064 - PTR_ERR(lp->clk)); 2065 - lp->clk = NULL; 2066 - } else { 2067 - ret = clk_prepare_enable(lp->clk); 2068 - if (ret) { 2069 - dev_err(&pdev->dev, "Unable to enable clock: %d\n", 2070 - ret); 2071 - goto free_netdev; 2072 - } 2073 - } 2074 - 2075 2049 ret = axienet_mdio_setup(lp); 2076 2050 if (ret) 2077 2051 dev_warn(&pdev->dev,

+2 -2

drivers/net/ipa/ipa_main.c

··· 580 580 return -EINVAL; 581 581 582 582 for (i = 0; i < data->resource_src_count; i++) 583 - ipa_resource_config_src(ipa, data->resource_src); 583 + ipa_resource_config_src(ipa, &data->resource_src[i]); 584 584 585 585 for (i = 0; i < data->resource_dst_count; i++) 586 - ipa_resource_config_dst(ipa, data->resource_dst); 586 + ipa_resource_config_dst(ipa, &data->resource_dst[i]); 587 587 588 588 return 0; 589 589 }

+18 -37

drivers/net/phy/phy_device.c

··· 300 300 301 301 phydev->suspended_by_mdio_bus = 0; 302 302 303 - ret = phy_resume(phydev); 303 + ret = phy_init_hw(phydev); 304 304 if (ret < 0) 305 305 return ret; 306 306 307 + ret = phy_resume(phydev); 308 + if (ret < 0) 309 + return ret; 307 310 no_resume: 308 311 if (phydev->attached_dev && phydev->adjust_link) 309 312 phy_start_machine(phydev); ··· 314 311 return 0; 315 312 } 316 313 317 - static int mdio_bus_phy_restore(struct device *dev) 318 - { 319 - struct phy_device *phydev = to_phy_device(dev); 320 - struct net_device *netdev = phydev->attached_dev; 321 - int ret; 322 - 323 - if (!netdev) 324 - return 0; 325 - 326 - ret = phy_init_hw(phydev); 327 - if (ret < 0) 328 - return ret; 329 - 330 - if (phydev->attached_dev && phydev->adjust_link) 331 - phy_start_machine(phydev); 332 - 333 - return 0; 334 - } 335 - 336 - static const struct dev_pm_ops mdio_bus_phy_pm_ops = { 337 - .suspend = mdio_bus_phy_suspend, 338 - .resume = mdio_bus_phy_resume, 339 - .freeze = mdio_bus_phy_suspend, 340 - .thaw = mdio_bus_phy_resume, 341 - .restore = mdio_bus_phy_restore, 342 - }; 343 - 344 - #define MDIO_BUS_PHY_PM_OPS (&mdio_bus_phy_pm_ops) 345 - 346 - #else 347 - 348 - #define MDIO_BUS_PHY_PM_OPS NULL 349 - 314 + static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend, 315 + mdio_bus_phy_resume); 350 316 #endif /* CONFIG_PM */ 351 317 352 318 /** ··· 526 554 .name = "PHY", 527 555 .groups = phy_dev_groups, 528 556 .release = phy_device_release, 529 - .pm = MDIO_BUS_PHY_PM_OPS, 557 + .pm = pm_ptr(&mdio_bus_phy_pm_ops), 530 558 }; 531 559 532 560 static int phy_request_driver_module(struct phy_device *dev, u32 phy_id) ··· 1116 1144 if (ret < 0) 1117 1145 return ret; 1118 1146 1119 - if (phydev->drv->config_init) 1147 + if (phydev->drv->config_init) { 1120 1148 ret = phydev->drv->config_init(phydev); 1149 + if (ret < 0) 1150 + return ret; 1151 + } 1121 1152 1122 - return ret; 1153 + if (phydev->drv->config_intr) { 1154 + ret = phydev->drv->config_intr(phydev); 1155 + if (ret < 0) 1156 + return ret; 1157 + } 1158 + 1159 + return 0; 1123 1160 } 1124 1161 EXPORT_SYMBOL(phy_init_hw); 1125 1162

+3 -1

drivers/net/wan/lmc/lmc_main.c

··· 854 854 spin_lock_init(&sc->lmc_lock); 855 855 pci_set_master(pdev); 856 856 857 - printk(KERN_INFO "%s: detected at %lx, irq %d\n", dev->name, 857 + printk(KERN_INFO "hdlc: detected at %lx, irq %d\n", 858 858 dev->base_addr, dev->irq); 859 859 860 860 err = register_hdlc_device(dev); ··· 899 899 break; 900 900 default: 901 901 printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name); 902 + unregister_hdlc_device(dev); 903 + return -EIO; 902 904 break; 903 905 } 904 906

+1 -1

drivers/net/wireless/broadcom/b43/phy_n.c

··· 5311 5311 5312 5312 for (i = 0; i < 4; i++) { 5313 5313 if (dev->phy.rev >= 3) 5314 - table[i] = coef[i]; 5314 + coef[i] = table[i]; 5315 5315 else 5316 5316 coef[i] = 0; 5317 5317 }

+1 -2

drivers/target/iscsi/cxgbit/cxgbit_target.c

··· 86 86 if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_ISO)) 87 87 length += sizeof(struct cpl_tx_data_iso); 88 88 89 - #define MAX_IMM_TX_PKT_LEN 256 90 - return length <= MAX_IMM_TX_PKT_LEN; 89 + return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; 91 90 } 92 91 93 92 /*

+1 -1

include/linux/atmdev.h

··· 207 207 struct atm_vcc *vcc; /* ATM VCC */ 208 208 unsigned long atm_options; /* ATM layer options */ 209 209 unsigned int acct_truesize; /* truesize accounted to vcc */ 210 - }; 210 + } __packed; 211 211 212 212 #define VCC_HTABLE_SIZE 32 213 213

+1

include/net/act_api.h

··· 166 166 struct nlattr *est, struct tc_action **a, 167 167 const struct tc_action_ops *ops, int bind, 168 168 u32 flags); 169 + void tcf_idr_insert_many(struct tc_action *actions[]); 169 170 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); 170 171 int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, 171 172 struct tc_action **a, int bind);

+1

include/uapi/linux/pkt_cls.h

··· 593 593 TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ 594 594 TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ 595 595 TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */ 596 + __TCA_FLOWER_KEY_CT_FLAGS_MAX, 596 597 }; 597 598 598 599 enum {

+6 -4

kernel/bpf/verifier.c

··· 11563 11563 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 11564 11564 struct bpf_insn *patchlet; 11565 11565 struct bpf_insn chk_and_div[] = { 11566 - /* Rx div 0 -> 0 */ 11566 + /* [R,W]x div 0 -> 0 */ 11567 11567 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 11568 11568 BPF_JNE | BPF_K, insn->src_reg, 11569 11569 0, 2, 0), ··· 11572 11572 *insn, 11573 11573 }; 11574 11574 struct bpf_insn chk_and_mod[] = { 11575 - /* Rx mod 0 -> Rx */ 11575 + /* [R,W]x mod 0 -> [R,W]x */ 11576 11576 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 11577 11577 BPF_JEQ | BPF_K, insn->src_reg, 11578 - 0, 1, 0), 11578 + 0, 1 + (is64 ? 0 : 1), 0), 11579 11579 *insn, 11580 + BPF_JMP_IMM(BPF_JA, 0, 0, 1), 11581 + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), 11580 11582 }; 11581 11583 11582 11584 patchlet = isdiv ? chk_and_div : chk_and_mod; 11583 11585 cnt = isdiv ? ARRAY_SIZE(chk_and_div) : 11584 - ARRAY_SIZE(chk_and_mod); 11586 + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); 11585 11587 11586 11588 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); 11587 11589 if (!new_prog)

+21 -12

net/appletalk/ddp.c

··· 1577 1577 struct sk_buff *skb; 1578 1578 struct net_device *dev; 1579 1579 struct ddpehdr *ddp; 1580 - int size; 1581 - struct atalk_route *rt; 1580 + int size, hard_header_len; 1581 + struct atalk_route *rt, *rt_lo = NULL; 1582 1582 int err; 1583 1583 1584 1584 if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) ··· 1641 1641 SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", 1642 1642 sk, size, dev->name); 1643 1643 1644 - size += dev->hard_header_len; 1644 + hard_header_len = dev->hard_header_len; 1645 + /* Leave room for loopback hardware header if necessary */ 1646 + if (usat->sat_addr.s_node == ATADDR_BCAST && 1647 + (dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) { 1648 + struct atalk_addr at_lo; 1649 + 1650 + at_lo.s_node = 0; 1651 + at_lo.s_net = 0; 1652 + 1653 + rt_lo = atrtr_find(&at_lo); 1654 + 1655 + if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len) 1656 + hard_header_len = rt_lo->dev->hard_header_len; 1657 + } 1658 + 1659 + size += hard_header_len; 1645 1660 release_sock(sk); 1646 1661 skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); 1647 1662 lock_sock(sk); ··· 1664 1649 goto out; 1665 1650 1666 1651 skb_reserve(skb, ddp_dl->header_length); 1667 - skb_reserve(skb, dev->hard_header_len); 1652 + skb_reserve(skb, hard_header_len); 1668 1653 skb->dev = dev; 1669 1654 1670 1655 SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); ··· 1715 1700 /* loop back */ 1716 1701 skb_orphan(skb); 1717 1702 if (ddp->deh_dnode == ATADDR_BCAST) { 1718 - struct atalk_addr at_lo; 1719 - 1720 - at_lo.s_node = 0; 1721 - at_lo.s_net = 0; 1722 - 1723 - rt = atrtr_find(&at_lo); 1724 - if (!rt) { 1703 + if (!rt_lo) { 1725 1704 kfree_skb(skb); 1726 1705 err = -ENETUNREACH; 1727 1706 goto out; 1728 1707 } 1729 - dev = rt->dev; 1708 + dev = rt_lo->dev; 1730 1709 skb->dev = dev; 1731 1710 } 1732 1711 ddp_dl->request(ddp_dl, skb, dev->dev_addr);

+1 -4

net/caif/chnl_net.c

··· 115 115 else 116 116 skb->ip_summed = CHECKSUM_NONE; 117 117 118 - if (in_interrupt()) 119 - netif_rx(skb); 120 - else 121 - netif_rx_ni(skb); 118 + netif_rx_any_context(skb); 122 119 123 120 /* Update statistics. */ 124 121 priv->netdev->stats.rx_packets++;

+3 -3

net/core/flow_dissector.c

··· 1056 1056 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1057 1057 } 1058 1058 1059 + __skb_flow_dissect_ipv4(skb, flow_dissector, 1060 + target_container, data, iph); 1061 + 1059 1062 if (ip_is_fragment(iph)) { 1060 1063 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1061 1064 ··· 1074 1071 } 1075 1072 } 1076 1073 } 1077 - 1078 - __skb_flow_dissect_ipv4(skb, flow_dissector, 1079 - target_container, data, iph); 1080 1074 1081 1075 break; 1082 1076 }

+5 -5

net/mptcp/options.c

··· 508 508 { 509 509 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 510 510 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 511 + u64 snd_data_fin_enable, ack_seq; 511 512 unsigned int dss_size = 0; 512 - u64 snd_data_fin_enable; 513 513 struct mptcp_ext *mpext; 514 514 unsigned int ack_size; 515 515 bool ret = false; ··· 541 541 return ret; 542 542 } 543 543 544 + ack_seq = READ_ONCE(msk->ack_seq); 544 545 if (READ_ONCE(msk->use_64bit_ack)) { 545 546 ack_size = TCPOLEN_MPTCP_DSS_ACK64; 546 - opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); 547 + opts->ext_copy.data_ack = ack_seq; 547 548 opts->ext_copy.ack64 = 1; 548 549 } else { 549 550 ack_size = TCPOLEN_MPTCP_DSS_ACK32; 550 - opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq); 551 + opts->ext_copy.data_ack32 = (uint32_t)ack_seq; 551 552 opts->ext_copy.ack64 = 0; 552 553 } 553 554 opts->ext_copy.use_ack = 1; ··· 919 918 msk->wnd_end = new_wnd_end; 920 919 921 920 /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ 922 - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) && 923 - sk_stream_memory_free(ssk)) 921 + if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) 924 922 __mptcp_check_push(sk, ssk); 925 923 926 924 if (after64(new_snd_una, old_snd_una)) {

+36 -19

net/mptcp/protocol.c

··· 363 363 364 364 /* Look for an acknowledged DATA_FIN */ 365 365 if (mptcp_pending_data_fin_ack(sk)) { 366 - mptcp_stop_timer(sk); 367 - 368 366 WRITE_ONCE(msk->snd_data_fin_enable, 0); 369 367 370 368 switch (sk->sk_state) { ··· 456 458 static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) 457 459 { 458 460 struct sock *ack_hint = READ_ONCE(msk->ack_hint); 461 + int old_space = READ_ONCE(msk->old_wspace); 459 462 struct mptcp_subflow_context *subflow; 463 + struct sock *sk = (struct sock *)msk; 464 + bool cleanup; 465 + 466 + /* this is a simple superset of what tcp_cleanup_rbuf() implements 467 + * so that we don't have to acquire the ssk socket lock most of the time 468 + * to do actually nothing 469 + */ 470 + cleanup = __mptcp_space(sk) - old_space >= max(0, old_space); 471 + if (!cleanup) 472 + return; 460 473 461 474 /* if the hinted ssk is still active, try to use it */ 462 475 if (likely(ack_hint)) { ··· 1574 1565 mptcp_set_timeout(sk, ssk); 1575 1566 tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, 1576 1567 info.size_goal); 1568 + if (!mptcp_timer_pending(sk)) 1569 + mptcp_reset_timer(sk); 1570 + 1577 1571 if (msk->snd_data_fin_enable && 1578 1572 msk->snd_nxt + 1 == msk->write_seq) 1579 1573 mptcp_schedule_work(sk); ··· 1880 1868 skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); 1881 1869 } 1882 1870 1883 - static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) 1871 + static bool __mptcp_move_skbs(struct mptcp_sock *msk) 1884 1872 { 1885 1873 struct sock *sk = (struct sock *)msk; 1886 1874 unsigned int moved = 0; ··· 1900 1888 1901 1889 slowpath = lock_sock_fast(ssk); 1902 1890 mptcp_data_lock(sk); 1891 + __mptcp_update_rmem(sk); 1903 1892 done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); 1904 1893 mptcp_data_unlock(sk); 1905 - if (moved && rcv) { 1906 - WRITE_ONCE(msk->rmem_pending, min(rcv, moved)); 1907 - tcp_cleanup_rbuf(ssk, 1); 1908 - WRITE_ONCE(msk->rmem_pending, 0); 1909 - } 1894 + tcp_cleanup_rbuf(ssk, moved); 1910 1895 unlock_sock_fast(ssk, slowpath); 1911 1896 } while (!done); 1912 1897 ··· 1916 1907 ret |= __mptcp_ofo_queue(msk); 1917 1908 __mptcp_splice_receive_queue(sk); 1918 1909 mptcp_data_unlock(sk); 1910 + mptcp_cleanup_rbuf(msk); 1919 1911 } 1920 1912 if (ret) 1921 1913 mptcp_check_data_fin((struct sock *)msk); ··· 1946 1936 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1947 1937 1948 1938 while (copied < len) { 1949 - int bytes_read, old_space; 1939 + int bytes_read; 1950 1940 1951 1941 bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); 1952 1942 if (unlikely(bytes_read < 0)) { ··· 1957 1947 1958 1948 copied += bytes_read; 1959 1949 1960 - if (skb_queue_empty(&msk->receive_queue) && 1961 - __mptcp_move_skbs(msk, len - copied)) 1962 - continue; 1963 - 1964 1950 /* be sure to advertise window change */ 1965 - old_space = READ_ONCE(msk->old_wspace); 1966 - if ((tcp_space(sk) - old_space) >= old_space) 1967 - mptcp_cleanup_rbuf(msk); 1951 + mptcp_cleanup_rbuf(msk); 1952 + 1953 + if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) 1954 + continue; 1968 1955 1969 1956 /* only the master socket status is relevant here. The exit 1970 1957 * conditions mirror closely tcp_recvmsg() ··· 1989 1982 /* race breaker: the shutdown could be after the 1990 1983 * previous receive queue check 1991 1984 */ 1992 - if (__mptcp_move_skbs(msk, len - copied)) 1985 + if (__mptcp_move_skbs(msk)) 1993 1986 continue; 1994 1987 break; 1995 1988 } ··· 2022 2015 /* .. race-breaker: ssk might have gotten new data 2023 2016 * after last __mptcp_move_skbs() returned false. 2024 2017 */ 2025 - if (unlikely(__mptcp_move_skbs(msk, 0))) 2018 + if (unlikely(__mptcp_move_skbs(msk))) 2026 2019 set_bit(MPTCP_DATA_READY, &msk->flags); 2027 2020 } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { 2028 2021 /* data to read but mptcp_wait_data() cleared DATA_READY */ ··· 2282 2275 if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) 2283 2276 goto unlock; 2284 2277 2278 + __mptcp_clean_una(sk); 2285 2279 dfrag = mptcp_rtx_head(sk); 2286 2280 if (!dfrag) 2287 2281 goto unlock; ··· 2951 2943 mptcp_push_pending(sk, 0); 2952 2944 spin_lock_bh(&sk->sk_lock.slock); 2953 2945 } 2946 + if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) 2947 + __mptcp_error_report(sk); 2954 2948 2955 2949 /* clear any wmem reservation and errors */ 2956 2950 __mptcp_update_wmem(sk); ··· 3329 3319 struct sock *sk = (struct sock *)msk; 3330 3320 3331 3321 if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) 3332 - return 0; 3322 + return EPOLLOUT | EPOLLWRNORM; 3333 3323 3334 3324 if (sk_stream_is_writeable(sk)) 3335 3325 return EPOLLOUT | EPOLLWRNORM; ··· 3362 3352 mask |= mptcp_check_readable(msk); 3363 3353 mask |= mptcp_check_writeable(msk); 3364 3354 } 3355 + if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) 3356 + mask |= EPOLLHUP; 3365 3357 if (sk->sk_shutdown & RCV_SHUTDOWN) 3366 3358 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 3359 + 3360 + /* This barrier is coupled with smp_wmb() in tcp_reset() */ 3361 + smp_rmb(); 3362 + if (sk->sk_err) 3363 + mask |= EPOLLERR; 3367 3364 3368 3365 return mask; 3369 3366 }

+8 -12

net/mptcp/protocol.h

··· 102 102 #define MPTCP_WORK_CLOSE_SUBFLOW 5 103 103 #define MPTCP_PUSH_PENDING 6 104 104 #define MPTCP_CLEAN_UNA 7 105 + #define MPTCP_ERROR_REPORT 8 105 106 106 107 static inline bool before64(__u64 seq1, __u64 seq2) 107 108 { ··· 238 237 u64 wnd_end; 239 238 unsigned long timer_ival; 240 239 u32 token; 241 - int rmem_pending; 242 240 int rmem_released; 243 241 unsigned long flags; 244 242 bool can_ack; ··· 301 301 302 302 static inline int __mptcp_space(const struct sock *sk) 303 303 { 304 - return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending); 304 + return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); 305 305 } 306 306 307 307 static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) ··· 334 334 return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); 335 335 } 336 336 337 - static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) 338 - { 339 - struct mptcp_sock *msk = mptcp_sk(sk); 340 - 341 - if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una))) 342 - return NULL; 343 - 344 - return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); 345 - } 346 - 347 337 static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) 348 338 { 349 339 struct mptcp_sock *msk = mptcp_sk(sk); 340 + 341 + if (msk->snd_una == READ_ONCE(msk->snd_nxt)) 342 + return NULL; 350 343 351 344 return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); 352 345 } ··· 429 436 void (*tcp_data_ready)(struct sock *sk); 430 437 void (*tcp_state_change)(struct sock *sk); 431 438 void (*tcp_write_space)(struct sock *sk); 439 + void (*tcp_error_report)(struct sock *sk); 432 440 433 441 struct rcu_head rcu; 434 442 }; ··· 554 560 sk->sk_data_ready = ctx->tcp_data_ready; 555 561 sk->sk_state_change = ctx->tcp_state_change; 556 562 sk->sk_write_space = ctx->tcp_write_space; 563 + sk->sk_error_report = ctx->tcp_error_report; 557 564 558 565 inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; 559 566 } ··· 582 587 bool mptcp_schedule_work(struct sock *sk); 583 588 void __mptcp_check_push(struct sock *sk, struct sock *ssk); 584 589 void __mptcp_data_acked(struct sock *sk); 590 + void __mptcp_error_report(struct sock *sk); 585 591 void mptcp_subflow_eof(struct sock *sk); 586 592 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); 587 593 void __mptcp_flush_join_list(struct mptcp_sock *msk);

+59 -24

net/mptcp/subflow.c

··· 100 100 return msk; 101 101 } 102 102 103 - static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) 103 + static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener) 104 104 { 105 105 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 106 106 ··· 108 108 subflow_req->mp_join = 0; 109 109 subflow_req->msk = NULL; 110 110 mptcp_token_init_request(req); 111 - 112 - #ifdef CONFIG_TCP_MD5SIG 113 - /* no MPTCP if MD5SIG is enabled on this socket or we may run out of 114 - * TCP option space. 115 - */ 116 - if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) 117 - return -EINVAL; 118 - #endif 119 - 120 - return 0; 121 111 } 122 112 123 113 static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk) ··· 120 130 * Returns an error code if a JOIN has failed and a TCP reset 121 131 * should be sent. 122 132 */ 123 - static int subflow_init_req(struct request_sock *req, 124 - const struct sock *sk_listener, 125 - struct sk_buff *skb) 133 + static int subflow_check_req(struct request_sock *req, 134 + const struct sock *sk_listener, 135 + struct sk_buff *skb) 126 136 { 127 137 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 128 138 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 129 139 struct mptcp_options_received mp_opt; 130 - int ret; 131 140 132 141 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); 133 142 134 - ret = __subflow_init_req(req, sk_listener); 135 - if (ret) 136 - return 0; 143 + #ifdef CONFIG_TCP_MD5SIG 144 + /* no MPTCP if MD5SIG is enabled on this socket or we may run out of 145 + * TCP option space. 146 + */ 147 + if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) 148 + return -EINVAL; 149 + #endif 137 150 138 151 mptcp_get_options(skb, &mp_opt); 139 152 ··· 229 236 struct mptcp_options_received mp_opt; 230 237 int err; 231 238 232 - err = __subflow_init_req(req, sk_listener); 233 - if (err) 234 - return err; 235 - 239 + subflow_init_req(req, sk_listener); 236 240 mptcp_get_options(skb, &mp_opt); 237 241 238 242 if (mp_opt.mp_capable && mp_opt.mp_join) ··· 269 279 int err; 270 280 271 281 tcp_rsk(req)->is_mptcp = 1; 282 + subflow_init_req(req, sk); 272 283 273 284 dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req); 274 285 if (!dst) 275 286 return NULL; 276 287 277 - err = subflow_init_req(req, sk, skb); 288 + err = subflow_check_req(req, sk, skb); 278 289 if (err == 0) 279 290 return dst; 280 291 ··· 295 304 int err; 296 305 297 306 tcp_rsk(req)->is_mptcp = 1; 307 + subflow_init_req(req, sk); 298 308 299 309 dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req); 300 310 if (!dst) 301 311 return NULL; 302 312 303 - err = subflow_init_req(req, sk, skb); 313 + err = subflow_check_req(req, sk, skb); 304 314 if (err == 0) 305 315 return dst; 306 316 ··· 1116 1124 mptcp_write_space(sk); 1117 1125 } 1118 1126 1127 + void __mptcp_error_report(struct sock *sk) 1128 + { 1129 + struct mptcp_subflow_context *subflow; 1130 + struct mptcp_sock *msk = mptcp_sk(sk); 1131 + 1132 + mptcp_for_each_subflow(msk, subflow) { 1133 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1134 + int err = sock_error(ssk); 1135 + 1136 + if (!err) 1137 + continue; 1138 + 1139 + /* only propagate errors on fallen-back sockets or 1140 + * on MPC connect 1141 + */ 1142 + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) 1143 + continue; 1144 + 1145 + inet_sk_state_store(sk, inet_sk_state_load(ssk)); 1146 + sk->sk_err = -err; 1147 + 1148 + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ 1149 + smp_wmb(); 1150 + sk->sk_error_report(sk); 1151 + break; 1152 + } 1153 + } 1154 + 1155 + static void subflow_error_report(struct sock *ssk) 1156 + { 1157 + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; 1158 + 1159 + mptcp_data_lock(sk); 1160 + if (!sock_owned_by_user(sk)) 1161 + __mptcp_error_report(sk); 1162 + else 1163 + set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags); 1164 + mptcp_data_unlock(sk); 1165 + } 1166 + 1119 1167 static struct inet_connection_sock_af_ops * 1120 1168 subflow_default_af_ops(struct sock *sk) 1121 1169 { ··· 1502 1470 ctx->tcp_data_ready = sk->sk_data_ready; 1503 1471 ctx->tcp_state_change = sk->sk_state_change; 1504 1472 ctx->tcp_write_space = sk->sk_write_space; 1473 + ctx->tcp_error_report = sk->sk_error_report; 1505 1474 sk->sk_data_ready = subflow_data_ready; 1506 1475 sk->sk_write_space = subflow_write_space; 1507 1476 sk->sk_state_change = subflow_state_change; 1477 + sk->sk_error_report = subflow_error_report; 1508 1478 out: 1509 1479 return err; 1510 1480 } ··· 1560 1526 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; 1561 1527 new_ctx->tcp_state_change = old_ctx->tcp_state_change; 1562 1528 new_ctx->tcp_write_space = old_ctx->tcp_write_space; 1529 + new_ctx->tcp_error_report = old_ctx->tcp_error_report; 1563 1530 new_ctx->rel_write_seq = 1; 1564 1531 new_ctx->tcp_sock = newsk; 1565 1532

+1 -1

net/sched/act_api.c

··· 908 908 [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), 909 909 }; 910 910 911 - static void tcf_idr_insert_many(struct tc_action *actions[]) 911 + void tcf_idr_insert_many(struct tc_action *actions[]) 912 912 { 913 913 int i; 914 914

+1

net/sched/cls_api.c

··· 3060 3060 act->type = exts->type = TCA_OLD_COMPAT; 3061 3061 exts->actions[0] = act; 3062 3062 exts->nr_actions = 1; 3063 + tcf_idr_insert_many(exts->actions); 3063 3064 } else if (exts->action && tb[exts->action]) { 3064 3065 int err; 3065 3066

+37 -2

net/sched/cls_flower.c

··· 30 30 31 31 #include <uapi/linux/netfilter/nf_conntrack_common.h> 32 32 33 + #define TCA_FLOWER_KEY_CT_FLAGS_MAX \ 34 + ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1) 35 + #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ 36 + (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) 37 + 33 38 struct fl_flow_key { 34 39 struct flow_dissector_key_meta meta; 35 40 struct flow_dissector_key_control control; ··· 695 690 [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, 696 691 [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, 697 692 [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, 698 - [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 }, 699 - [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 }, 693 + [TCA_FLOWER_KEY_CT_STATE] = 694 + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), 695 + [TCA_FLOWER_KEY_CT_STATE_MASK] = 696 + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), 700 697 [TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 }, 701 698 [TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 }, 702 699 [TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 }, ··· 1401 1394 return 0; 1402 1395 } 1403 1396 1397 + static int fl_validate_ct_state(u16 state, struct nlattr *tb, 1398 + struct netlink_ext_ack *extack) 1399 + { 1400 + if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { 1401 + NL_SET_ERR_MSG_ATTR(extack, tb, 1402 + "no trk, so no other flag can be set"); 1403 + return -EINVAL; 1404 + } 1405 + 1406 + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && 1407 + state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { 1408 + NL_SET_ERR_MSG_ATTR(extack, tb, 1409 + "new and est are mutually exclusive"); 1410 + return -EINVAL; 1411 + } 1412 + 1413 + return 0; 1414 + } 1415 + 1404 1416 static int fl_set_key_ct(struct nlattr **tb, 1405 1417 struct flow_dissector_key_ct *key, 1406 1418 struct flow_dissector_key_ct *mask, 1407 1419 struct netlink_ext_ack *extack) 1408 1420 { 1409 1421 if (tb[TCA_FLOWER_KEY_CT_STATE]) { 1422 + int err; 1423 + 1410 1424 if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) { 1411 1425 NL_SET_ERR_MSG(extack, "Conntrack isn't enabled"); 1412 1426 return -EOPNOTSUPP; ··· 1435 1407 fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE, 1436 1408 &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, 1437 1409 sizeof(key->ct_state)); 1410 + 1411 + err = fl_validate_ct_state(mask->ct_state, 1412 + tb[TCA_FLOWER_KEY_CT_STATE_MASK], 1413 + extack); 1414 + if (err) 1415 + return err; 1416 + 1438 1417 } 1439 1418 if (tb[TCA_FLOWER_KEY_CT_ZONE]) { 1440 1419 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {

+3 -7

tools/testing/selftests/bpf/test_xdp_redirect.sh

··· 1 - #!/bin/sh 1 + #!/bin/bash 2 2 # Create 2 namespaces with two veth peers, and 3 3 # forward packets in-between using generic XDP 4 4 # ··· 57 57 ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null 58 58 ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null 59 59 60 - ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null 61 - local ret1=$? 62 - ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null 63 - local ret2=$? 64 - 65 - if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then 60 + if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null && 61 + ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then 66 62 echo "selftests: test_xdp_redirect $xdpmode [PASS]"; 67 63 else 68 64 ret=1

+37 -1

tools/testing/selftests/net/forwarding/tc_flower.sh

··· 3 3 4 4 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ 5 5 match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \ 6 - match_ip_tos_test match_indev_test match_mpls_label_test \ 6 + match_ip_tos_test match_indev_testmatch_ip_ttl_test match_mpls_label_test \ 7 7 match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ 8 8 match_mpls_lse_test" 9 9 NUM_NETIFS=2 ··· 310 310 tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower 311 311 312 312 log_test "ip_tos match ($tcflags)" 313 + } 314 + 315 + match_ip_ttl_test() 316 + { 317 + RET=0 318 + 319 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ 320 + $tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop 321 + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ 322 + $tcflags dst_ip 192.0.2.2 action drop 323 + 324 + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ 325 + -t ip "ttl=63" -q 326 + 327 + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ 328 + -t ip "ttl=63,mf,frag=256" -q 329 + 330 + tc_check_packets "dev $h2 ingress" 102 1 331 + check_fail $? "Matched on the wrong filter (no check on ttl)" 332 + 333 + tc_check_packets "dev $h2 ingress" 101 2 334 + check_err $? "Did not match on correct filter (ttl=63)" 335 + 336 + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ 337 + -t ip "ttl=255" -q 338 + 339 + tc_check_packets "dev $h2 ingress" 101 3 340 + check_fail $? "Matched on a wrong filter (ttl=63)" 341 + 342 + tc_check_packets "dev $h2 ingress" 102 1 343 + check_err $? "Did not match on correct filter (no check on ttl)" 344 + 345 + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower 346 + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower 347 + 348 + log_test "ip_ttl match ($tcflags)" 313 349 } 314 350 315 351 match_indev_test()