Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

1) Verify netlink attributes properly in nf_queue, from Eric Dumazet.

2) Need to bump memory lock rlimit for test_sockmap bpf test, from
Yonghong Song.

3) Fix VLAN handling in lan78xx driver, from Dave Stevenson.

4) Fix uninitialized read in nf_log, from Jann Horn.

5) Fix raw command length parsing in mlx5, from Alex Vesker.

6) Cleanup loopback RDS connections upon netns deletion, from Sowmini
Varadhan.

7) Fix regressions in FIB rule matching during create, from Jason A.
Donenfeld and Roopa Prabhu.

8) Fix mpls ether type detection in nfp, from Pieter Jansen van Vuuren.

9) More bpfilter build fixes/adjustments from Masahiro Yamada.

10) Fix XDP_{TX,REDIRECT} flushing in various drivers, from Jesper
Dangaard Brouer.

11) fib_tests.sh file permissions were broken, from Shuah Khan.

12) Make sure BH/preemption is disabled in data path of mac80211, from
Denis Kenzior.

13) Don't ignore nla_parse_nested() return values in nl80211, from
Johannes berg.

14) Properly account sock objects ot kmemcg, from Shakeel Butt.

15) Adjustments to setting bpf program permissions to read-only, from
Daniel Borkmann.

16) TCP Fast Open key endianness was broken, it always took on the host
endiannness. Whoops. Explicitly make it little endian. From Yuching
Cheng.

17) Fix prefix route setting for link local addresses in ipv6, from
David Ahern.

18) Potential Spectre v1 in zatm driver, from Gustavo A. R. Silva.

19) Various bpf sockmap fixes, from John Fastabend.

20) Use after free for GRO with ESP, from Sabrina Dubroca.

21) Passing bogus flags to crypto_alloc_shash() in ipv6 SR code, from
Eric Biggers.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (87 commits)
qede: Adverstise software timestamp caps when PHC is not available.
qed: Fix use of incorrect size in memcpy call.
qed: Fix setting of incorrect eswitch mode.
qed: Limit msix vectors in kdump kernel to the minimum required count.
ipvlan: call dev_change_flags when ipvlan mode is reset
ipv6: sr: fix passing wrong flags to crypto_alloc_shash()
net: fix use-after-free in GRO with ESP
tcp: prevent bogus FRTO undos with non-SACK flows
bpf: sockhash, add release routine
bpf: sockhash fix omitted bucket lock in sock_close
bpf: sockmap, fix smap_list_map_remove when psock is in many maps
bpf: sockmap, fix crash when ipv6 sock is added
net: fib_rules: bring back rule_exists to match rule during add
hv_netvsc: split sub-channel setup into async and sync
net: use dev_change_tx_queue_len() for SIOCSIFTXQLEN
atm: zatm: Fix potential Spectre v1
s390/qeth: consistently re-enable device features
s390/qeth: don't clobber buffer on async TX completion
s390/qeth: avoid using is_multicast_ether_addr_64bits on (u8 *)[6]
s390/qeth: fix race when setting MAC address
...

+1248 -623
-5
Makefile
··· 507 507 KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO 508 508 endif 509 509 510 - ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/cc-can-link.sh $(CC)), y) 511 - CC_CAN_LINK := y 512 - export CC_CAN_LINK 513 - endif 514 - 515 510 # The expansion should be delayed until arch/$(SRCARCH)/Makefile is included. 516 511 # Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile. 517 512 # CC_VERSION_TEXT is referenced from Kconfig (so it needs export),
+1 -1
arch/arm/net/bpf_jit_32.c
··· 1844 1844 /* there are 2 passes here */ 1845 1845 bpf_jit_dump(prog->len, image_size, 2, ctx.target); 1846 1846 1847 - set_memory_ro((unsigned long)header, header->pages); 1847 + bpf_jit_binary_lock_ro(header); 1848 1848 prog->bpf_func = (void *)ctx.target; 1849 1849 prog->jited = 1; 1850 1850 prog->jited_len = image_size;
+1
arch/s390/net/bpf_jit_comp.c
··· 1286 1286 goto free_addrs; 1287 1287 } 1288 1288 if (bpf_jit_prog(&jit, fp)) { 1289 + bpf_jit_binary_free(header); 1289 1290 fp = orig_fp; 1290 1291 goto free_addrs; 1291 1292 }
+1 -1
drivers/atm/iphase.c
··· 1618 1618 skb_queue_head_init(&iadev->rx_dma_q); 1619 1619 iadev->rx_free_desc_qhead = NULL; 1620 1620 1621 - iadev->rx_open = kcalloc(4, iadev->num_vc, GFP_KERNEL); 1621 + iadev->rx_open = kcalloc(iadev->num_vc, sizeof(void *), GFP_KERNEL); 1622 1622 if (!iadev->rx_open) { 1623 1623 printk(KERN_ERR DEV_LABEL "itf %d couldn't get free page\n", 1624 1624 dev->number);
+2
drivers/atm/zatm.c
··· 1483 1483 return -EFAULT; 1484 1484 if (pool < 0 || pool > ZATM_LAST_POOL) 1485 1485 return -EINVAL; 1486 + pool = array_index_nospec(pool, 1487 + ZATM_LAST_POOL + 1); 1486 1488 if (copy_from_user(&info, 1487 1489 &((struct zatm_pool_req __user *) arg)->info, 1488 1490 sizeof(info))) return -EFAULT;
+1 -1
drivers/infiniband/hw/mlx5/main.c
··· 6113 6113 dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), 6114 6114 MLX5_CAP_GEN(mdev, num_vhca_ports)); 6115 6115 6116 - if (MLX5_VPORT_MANAGER(mdev) && 6116 + if (MLX5_ESWITCH_MANAGER(mdev) && 6117 6117 mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { 6118 6118 dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); 6119 6119
+2 -12
drivers/media/rc/bpf-lirc.c
··· 207 207 bpf_prog_array_free(rcdev->raw->progs); 208 208 } 209 209 210 - int lirc_prog_attach(const union bpf_attr *attr) 210 + int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) 211 211 { 212 - struct bpf_prog *prog; 213 212 struct rc_dev *rcdev; 214 213 int ret; 215 214 216 215 if (attr->attach_flags) 217 216 return -EINVAL; 218 217 219 - prog = bpf_prog_get_type(attr->attach_bpf_fd, 220 - BPF_PROG_TYPE_LIRC_MODE2); 221 - if (IS_ERR(prog)) 222 - return PTR_ERR(prog); 223 - 224 218 rcdev = rc_dev_get_from_fd(attr->target_fd); 225 - if (IS_ERR(rcdev)) { 226 - bpf_prog_put(prog); 219 + if (IS_ERR(rcdev)) 227 220 return PTR_ERR(rcdev); 228 - } 229 221 230 222 ret = lirc_bpf_attach(rcdev, prog); 231 - if (ret) 232 - bpf_prog_put(prog); 233 223 234 224 put_device(&rcdev->dev); 235 225
+7 -1
drivers/net/ethernet/atheros/alx/main.c
··· 1897 1897 struct pci_dev *pdev = to_pci_dev(dev); 1898 1898 struct alx_priv *alx = pci_get_drvdata(pdev); 1899 1899 struct alx_hw *hw = &alx->hw; 1900 + int err; 1900 1901 1901 1902 alx_reset_phy(hw); 1902 1903 1903 1904 if (!netif_running(alx->dev)) 1904 1905 return 0; 1905 1906 netif_device_attach(alx->dev); 1906 - return __alx_open(alx, true); 1907 + 1908 + rtnl_lock(); 1909 + err = __alx_open(alx, true); 1910 + rtnl_unlock(); 1911 + 1912 + return err; 1907 1913 } 1908 1914 1909 1915 static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);
+1
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
··· 1533 1533 struct link_vars link_vars; 1534 1534 u32 link_cnt; 1535 1535 struct bnx2x_link_report_data last_reported_link; 1536 + bool force_link_down; 1536 1537 1537 1538 struct mdio_if_info mdio; 1538 1539
+6
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
··· 1261 1261 { 1262 1262 struct bnx2x_link_report_data cur_data; 1263 1263 1264 + if (bp->force_link_down) { 1265 + bp->link_vars.link_up = 0; 1266 + return; 1267 + } 1268 + 1264 1269 /* reread mf_cfg */ 1265 1270 if (IS_PF(bp) && !CHIP_IS_E1(bp)) 1266 1271 bnx2x_read_mf_cfg(bp); ··· 2822 2817 bp->pending_max = 0; 2823 2818 } 2824 2819 2820 + bp->force_link_down = false; 2825 2821 if (bp->port.pmf) { 2826 2822 rc = bnx2x_initial_phy_init(bp, load_mode); 2827 2823 if (rc)
+6
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
··· 10279 10279 bp->sp_rtnl_state = 0; 10280 10280 smp_mb(); 10281 10281 10282 + /* Immediately indicate link as down */ 10283 + bp->link_vars.link_up = 0; 10284 + bp->force_link_down = true; 10285 + netif_carrier_off(bp->dev); 10286 + BNX2X_ERR("Indicating link is down due to Tx-timeout\n"); 10287 + 10282 10288 bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); 10283 10289 /* When ret value shows failure of allocation failure, 10284 10290 * the nic is rebooted again. If open still fails, a error
+1 -1
drivers/net/ethernet/broadcom/cnic.c
··· 660 660 id_tbl->max = size; 661 661 id_tbl->next = next; 662 662 spin_lock_init(&id_tbl->lock); 663 - id_tbl->table = kcalloc(DIV_ROUND_UP(size, 32), 4, GFP_KERNEL); 663 + id_tbl->table = kcalloc(BITS_TO_LONGS(size), sizeof(long), GFP_KERNEL); 664 664 if (!id_tbl->table) 665 665 return -ENOMEM; 666 666
+2
drivers/net/ethernet/cadence/macb_main.c
··· 3726 3726 int err; 3727 3727 u32 reg; 3728 3728 3729 + bp->queues[0].bp = bp; 3730 + 3729 3731 dev->netdev_ops = &at91ether_netdev_ops; 3730 3732 dev->ethtool_ops = &macb_ethtool_ops; 3731 3733
+8 -7
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
··· 125 125 /* Default alignment for start of data in an Rx FD */ 126 126 #define DPAA_FD_DATA_ALIGNMENT 16 127 127 128 + /* The DPAA requires 256 bytes reserved and mapped for the SGT */ 129 + #define DPAA_SGT_SIZE 256 130 + 128 131 /* Values for the L3R field of the FM Parse Results 129 132 */ 130 133 /* L3 Type field: First IP Present IPv4 */ ··· 1620 1617 1621 1618 if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) { 1622 1619 nr_frags = skb_shinfo(skb)->nr_frags; 1623 - dma_unmap_single(dev, addr, qm_fd_get_offset(fd) + 1624 - sizeof(struct qm_sg_entry) * (1 + nr_frags), 1620 + dma_unmap_single(dev, addr, 1621 + qm_fd_get_offset(fd) + DPAA_SGT_SIZE, 1625 1622 dma_dir); 1626 1623 1627 1624 /* The sgt buffer has been allocated with netdev_alloc_frag(), ··· 1906 1903 void *sgt_buf; 1907 1904 1908 1905 /* get a page frag to store the SGTable */ 1909 - sz = SKB_DATA_ALIGN(priv->tx_headroom + 1910 - sizeof(struct qm_sg_entry) * (1 + nr_frags)); 1906 + sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE); 1911 1907 sgt_buf = netdev_alloc_frag(sz); 1912 1908 if (unlikely(!sgt_buf)) { 1913 1909 netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n", ··· 1974 1972 skbh = (struct sk_buff **)buffer_start; 1975 1973 *skbh = skb; 1976 1974 1977 - addr = dma_map_single(dev, buffer_start, priv->tx_headroom + 1978 - sizeof(struct qm_sg_entry) * (1 + nr_frags), 1979 - dma_dir); 1975 + addr = dma_map_single(dev, buffer_start, 1976 + priv->tx_headroom + DPAA_SGT_SIZE, dma_dir); 1980 1977 if (unlikely(dma_mapping_error(dev, addr))) { 1981 1978 dev_err(dev, "DMA mapping failed"); 1982 1979 err = -EINVAL;
+8
drivers/net/ethernet/freescale/fman/fman_port.c
··· 324 324 #define HWP_HXS_PHE_REPORT 0x00000800 325 325 #define HWP_HXS_PCAC_PSTAT 0x00000100 326 326 #define HWP_HXS_PCAC_PSTOP 0x00000001 327 + #define HWP_HXS_TCP_OFFSET 0xA 328 + #define HWP_HXS_UDP_OFFSET 0xB 329 + #define HWP_HXS_SH_PAD_REM 0x80000000 330 + 327 331 struct fman_port_hwp_regs { 328 332 struct { 329 333 u32 ssa; /* Soft Sequence Attachment */ ··· 731 727 iowrite32be(0x00000000, &regs->pmda[i].ssa); 732 728 iowrite32be(0xffffffff, &regs->pmda[i].lcv); 733 729 } 730 + 731 + /* Short packet padding removal from checksum calculation */ 732 + iowrite32be(HWP_HXS_SH_PAD_REM, &regs->pmda[HWP_HXS_TCP_OFFSET].ssa); 733 + iowrite32be(HWP_HXS_SH_PAD_REM, &regs->pmda[HWP_HXS_UDP_OFFSET].ssa); 734 734 735 735 start_port_hwp(port); 736 736 }
+1
drivers/net/ethernet/huawei/hinic/hinic_rx.c
··· 439 439 { 440 440 struct hinic_rq *rq = rxq->rq; 441 441 442 + irq_set_affinity_hint(rq->irq, NULL); 442 443 free_irq(rq->irq, rxq); 443 444 rx_del_napi(rxq); 444 445 }
+15 -9
drivers/net/ethernet/intel/i40e/i40e_txrx.c
··· 2199 2199 return true; 2200 2200 } 2201 2201 2202 - #define I40E_XDP_PASS 0 2203 - #define I40E_XDP_CONSUMED 1 2204 - #define I40E_XDP_TX 2 2202 + #define I40E_XDP_PASS 0 2203 + #define I40E_XDP_CONSUMED BIT(0) 2204 + #define I40E_XDP_TX BIT(1) 2205 + #define I40E_XDP_REDIR BIT(2) 2205 2206 2206 2207 static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, 2207 2208 struct i40e_ring *xdp_ring); ··· 2249 2248 break; 2250 2249 case XDP_REDIRECT: 2251 2250 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); 2252 - result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED; 2251 + result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; 2253 2252 break; 2254 2253 default: 2255 2254 bpf_warn_invalid_xdp_action(act); ··· 2312 2311 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 2313 2312 struct sk_buff *skb = rx_ring->skb; 2314 2313 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 2315 - bool failure = false, xdp_xmit = false; 2314 + unsigned int xdp_xmit = 0; 2315 + bool failure = false; 2316 2316 struct xdp_buff xdp; 2317 2317 2318 2318 xdp.rxq = &rx_ring->xdp_rxq; ··· 2374 2372 } 2375 2373 2376 2374 if (IS_ERR(skb)) { 2377 - if (PTR_ERR(skb) == -I40E_XDP_TX) { 2378 - xdp_xmit = true; 2375 + unsigned int xdp_res = -PTR_ERR(skb); 2376 + 2377 + if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { 2378 + xdp_xmit |= xdp_res; 2379 2379 i40e_rx_buffer_flip(rx_ring, rx_buffer, size); 2380 2380 } else { 2381 2381 rx_buffer->pagecnt_bias++; ··· 2431 2427 total_rx_packets++; 2432 2428 } 2433 2429 2434 - if (xdp_xmit) { 2430 + if (xdp_xmit & I40E_XDP_REDIR) 2431 + xdp_do_flush_map(); 2432 + 2433 + if (xdp_xmit & I40E_XDP_TX) { 2435 2434 struct i40e_ring *xdp_ring = 2436 2435 rx_ring->vsi->xdp_rings[rx_ring->queue_index]; 2437 2436 2438 2437 i40e_xdp_ring_update_tail(xdp_ring); 2439 - xdp_do_flush_map(); 2440 2438 } 2441 2439 2442 2440 rx_ring->skb = skb;
+14 -10
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
··· 2186 2186 return skb; 2187 2187 } 2188 2188 2189 - #define IXGBE_XDP_PASS 0 2190 - #define IXGBE_XDP_CONSUMED 1 2191 - #define IXGBE_XDP_TX 2 2189 + #define IXGBE_XDP_PASS 0 2190 + #define IXGBE_XDP_CONSUMED BIT(0) 2191 + #define IXGBE_XDP_TX BIT(1) 2192 + #define IXGBE_XDP_REDIR BIT(2) 2192 2193 2193 2194 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, 2194 2195 struct xdp_frame *xdpf); ··· 2226 2225 case XDP_REDIRECT: 2227 2226 err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); 2228 2227 if (!err) 2229 - result = IXGBE_XDP_TX; 2228 + result = IXGBE_XDP_REDIR; 2230 2229 else 2231 2230 result = IXGBE_XDP_CONSUMED; 2232 2231 break; ··· 2286 2285 unsigned int mss = 0; 2287 2286 #endif /* IXGBE_FCOE */ 2288 2287 u16 cleaned_count = ixgbe_desc_unused(rx_ring); 2289 - bool xdp_xmit = false; 2288 + unsigned int xdp_xmit = 0; 2290 2289 struct xdp_buff xdp; 2291 2290 2292 2291 xdp.rxq = &rx_ring->xdp_rxq; ··· 2329 2328 } 2330 2329 2331 2330 if (IS_ERR(skb)) { 2332 - if (PTR_ERR(skb) == -IXGBE_XDP_TX) { 2333 - xdp_xmit = true; 2331 + unsigned int xdp_res = -PTR_ERR(skb); 2332 + 2333 + if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { 2334 + xdp_xmit |= xdp_res; 2334 2335 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size); 2335 2336 } else { 2336 2337 rx_buffer->pagecnt_bias++; ··· 2404 2401 total_rx_packets++; 2405 2402 } 2406 2403 2407 - if (xdp_xmit) { 2404 + if (xdp_xmit & IXGBE_XDP_REDIR) 2405 + xdp_do_flush_map(); 2406 + 2407 + if (xdp_xmit & IXGBE_XDP_TX) { 2408 2408 struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; 2409 2409 2410 2410 /* Force memory writes to complete before letting h/w ··· 2415 2409 */ 2416 2410 wmb(); 2417 2411 writel(ring->next_to_use, ring->tail); 2418 - 2419 - xdp_do_flush_map(); 2420 2412 } 2421 2413 2422 2414 u64_stats_update_begin(&rx_ring->syncp);
+4 -4
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
··· 807 807 unsigned long flags; 808 808 bool poll_cmd = ent->polling; 809 809 int alloc_ret; 810 + int cmd_mode; 810 811 811 812 sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; 812 813 down(sem); ··· 854 853 set_signature(ent, !cmd->checksum_disabled); 855 854 dump_command(dev, ent, 1); 856 855 ent->ts1 = ktime_get_ns(); 856 + cmd_mode = cmd->mode; 857 857 858 858 if (ent->callback) 859 859 schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); ··· 879 877 iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); 880 878 mmiowb(); 881 879 /* if not in polling don't use ent after this point */ 882 - if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { 880 + if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { 883 881 poll_timeout(ent); 884 882 /* make sure we read the descriptor after ownership is SW */ 885 883 rmb(); ··· 1278 1276 { 1279 1277 struct mlx5_core_dev *dev = filp->private_data; 1280 1278 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; 1281 - char outlen_str[8]; 1279 + char outlen_str[8] = {0}; 1282 1280 int outlen; 1283 1281 void *ptr; 1284 1282 int err; ··· 1292 1290 1293 1291 if (copy_from_user(outlen_str, buf, count)) 1294 1292 return -EFAULT; 1295 - 1296 - outlen_str[7] = 0; 1297 1293 1298 1294 err = sscanf(outlen_str, "%d", &outlen); 1299 1295 if (err < 0)
+6 -6
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 2846 2846 mlx5e_activate_channels(&priv->channels); 2847 2847 netif_tx_start_all_queues(priv->netdev); 2848 2848 2849 - if (MLX5_VPORT_MANAGER(priv->mdev)) 2849 + if (MLX5_ESWITCH_MANAGER(priv->mdev)) 2850 2850 mlx5e_add_sqs_fwd_rules(priv); 2851 2851 2852 2852 mlx5e_wait_channels_min_rx_wqes(&priv->channels); ··· 2857 2857 { 2858 2858 mlx5e_redirect_rqts_to_drop(priv); 2859 2859 2860 - if (MLX5_VPORT_MANAGER(priv->mdev)) 2860 + if (MLX5_ESWITCH_MANAGER(priv->mdev)) 2861 2861 mlx5e_remove_sqs_fwd_rules(priv); 2862 2862 2863 2863 /* FIXME: This is a W/A only for tx timeout watch dog false alarm when ··· 4597 4597 mlx5e_set_netdev_dev_addr(netdev); 4598 4598 4599 4599 #if IS_ENABLED(CONFIG_MLX5_ESWITCH) 4600 - if (MLX5_VPORT_MANAGER(mdev)) 4600 + if (MLX5_ESWITCH_MANAGER(mdev)) 4601 4601 netdev->switchdev_ops = &mlx5e_switchdev_ops; 4602 4602 #endif 4603 4603 ··· 4753 4753 4754 4754 mlx5e_enable_async_events(priv); 4755 4755 4756 - if (MLX5_VPORT_MANAGER(priv->mdev)) 4756 + if (MLX5_ESWITCH_MANAGER(priv->mdev)) 4757 4757 mlx5e_register_vport_reps(priv); 4758 4758 4759 4759 if (netdev->reg_state != NETREG_REGISTERED) ··· 4788 4788 4789 4789 queue_work(priv->wq, &priv->set_rx_mode_work); 4790 4790 4791 - if (MLX5_VPORT_MANAGER(priv->mdev)) 4791 + if (MLX5_ESWITCH_MANAGER(priv->mdev)) 4792 4792 mlx5e_unregister_vport_reps(priv); 4793 4793 4794 4794 mlx5e_disable_async_events(priv); ··· 4972 4972 return NULL; 4973 4973 4974 4974 #ifdef CONFIG_MLX5_ESWITCH 4975 - if (MLX5_VPORT_MANAGER(mdev)) { 4975 + if (MLX5_ESWITCH_MANAGER(mdev)) { 4976 4976 rpriv = mlx5e_alloc_nic_rep_priv(mdev); 4977 4977 if (!rpriv) { 4978 4978 mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
+6 -2
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
··· 823 823 struct mlx5e_rep_priv *rpriv = priv->ppriv; 824 824 struct mlx5_eswitch_rep *rep; 825 825 826 - if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) 826 + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) 827 827 return false; 828 828 829 829 rep = rpriv->rep; ··· 837 837 static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) 838 838 { 839 839 struct mlx5e_rep_priv *rpriv = priv->ppriv; 840 - struct mlx5_eswitch_rep *rep = rpriv->rep; 840 + struct mlx5_eswitch_rep *rep; 841 841 842 + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) 843 + return false; 844 + 845 + rep = rpriv->rep; 842 846 if (rep && rep->vport != FDB_UPLINK_VPORT) 843 847 return true; 844 848
+5 -7
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
··· 1594 1594 } 1595 1595 1596 1596 /* Public E-Switch API */ 1597 - #define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) 1597 + #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) 1598 + 1598 1599 1599 1600 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) 1600 1601 { 1601 1602 int err; 1602 1603 int i, enabled_events; 1603 1604 1604 - if (!ESW_ALLOWED(esw)) 1605 - return 0; 1606 - 1607 - if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || 1605 + if (!ESW_ALLOWED(esw) || 1608 1606 !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { 1609 1607 esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); 1610 1608 return -EOPNOTSUPP; ··· 1804 1806 u64 node_guid; 1805 1807 int err = 0; 1806 1808 1807 - if (!ESW_ALLOWED(esw)) 1809 + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) 1808 1810 return -EPERM; 1809 1811 if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) 1810 1812 return -EINVAL; ··· 1881 1883 { 1882 1884 struct mlx5_vport *evport; 1883 1885 1884 - if (!ESW_ALLOWED(esw)) 1886 + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) 1885 1887 return -EPERM; 1886 1888 if (!LEGAL_VPORT(esw, vport)) 1887 1889 return -EINVAL;
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
··· 1079 1079 if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) 1080 1080 return -EOPNOTSUPP; 1081 1081 1082 - if (!MLX5_CAP_GEN(dev, vport_group_manager)) 1083 - return -EOPNOTSUPP; 1082 + if(!MLX5_ESWITCH_MANAGER(dev)) 1083 + return -EPERM; 1084 1084 1085 1085 if (dev->priv.eswitch->mode == SRIOV_NONE) 1086 1086 return -EOPNOTSUPP;
+2 -1
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
··· 32 32 33 33 #include <linux/mutex.h> 34 34 #include <linux/mlx5/driver.h> 35 + #include <linux/mlx5/eswitch.h> 35 36 36 37 #include "mlx5_core.h" 37 38 #include "fs_core.h" ··· 2653 2652 goto err; 2654 2653 } 2655 2654 2656 - if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { 2655 + if (MLX5_ESWITCH_MANAGER(dev)) { 2657 2656 if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { 2658 2657 err = init_fdb_root_ns(steering); 2659 2658 if (err)
+3 -2
drivers/net/ethernet/mellanox/mlx5/core/fw.c
··· 32 32 33 33 #include <linux/mlx5/driver.h> 34 34 #include <linux/mlx5/cmd.h> 35 + #include <linux/mlx5/eswitch.h> 35 36 #include <linux/module.h> 36 37 #include "mlx5_core.h" 37 38 #include "../../mlxfw/mlxfw.h" ··· 160 159 } 161 160 162 161 if (MLX5_CAP_GEN(dev, vport_group_manager) && 163 - MLX5_CAP_GEN(dev, eswitch_flow_table)) { 162 + MLX5_ESWITCH_MANAGER(dev)) { 164 163 err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); 165 164 if (err) 166 165 return err; 167 166 } 168 167 169 - if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { 168 + if (MLX5_ESWITCH_MANAGER(dev)) { 170 169 err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); 171 170 if (err) 172 171 return err;
+5 -4
drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
··· 33 33 #include <linux/etherdevice.h> 34 34 #include <linux/mlx5/driver.h> 35 35 #include <linux/mlx5/mlx5_ifc.h> 36 + #include <linux/mlx5/eswitch.h> 36 37 #include "mlx5_core.h" 37 38 #include "lib/mpfs.h" 38 39 ··· 99 98 int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); 100 99 struct mlx5_mpfs *mpfs; 101 100 102 - if (!MLX5_VPORT_MANAGER(dev)) 101 + if (!MLX5_ESWITCH_MANAGER(dev)) 103 102 return 0; 104 103 105 104 mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); ··· 123 122 { 124 123 struct mlx5_mpfs *mpfs = dev->priv.mpfs; 125 124 126 - if (!MLX5_VPORT_MANAGER(dev)) 125 + if (!MLX5_ESWITCH_MANAGER(dev)) 127 126 return; 128 127 129 128 WARN_ON(!hlist_empty(mpfs->hash)); ··· 138 137 u32 index; 139 138 int err; 140 139 141 - if (!MLX5_VPORT_MANAGER(dev)) 140 + if (!MLX5_ESWITCH_MANAGER(dev)) 142 141 return 0; 143 142 144 143 mutex_lock(&mpfs->lock); ··· 180 179 int err = 0; 181 180 u32 index; 182 181 183 - if (!MLX5_VPORT_MANAGER(dev)) 182 + if (!MLX5_ESWITCH_MANAGER(dev)) 184 183 return 0; 185 184 186 185 mutex_lock(&mpfs->lock);
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/port.c
··· 701 701 static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, 702 702 int inlen) 703 703 { 704 - u32 out[MLX5_ST_SZ_DW(qtct_reg)]; 704 + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; 705 705 706 706 if (!MLX5_CAP_GEN(mdev, ets)) 707 707 return -EOPNOTSUPP; ··· 713 713 static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, 714 714 int outlen) 715 715 { 716 - u32 in[MLX5_ST_SZ_DW(qtct_reg)]; 716 + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; 717 717 718 718 if (!MLX5_CAP_GEN(mdev, ets)) 719 719 return -EOPNOTSUPP;
+6 -1
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
··· 88 88 return -EBUSY; 89 89 } 90 90 91 + if (!MLX5_ESWITCH_MANAGER(dev)) 92 + goto enable_vfs_hca; 93 + 91 94 err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); 92 95 if (err) { 93 96 mlx5_core_warn(dev, ··· 98 95 return err; 99 96 } 100 97 98 + enable_vfs_hca: 101 99 for (vf = 0; vf < num_vfs; vf++) { 102 100 err = mlx5_core_enable_hca(dev, vf + 1); 103 101 if (err) { ··· 144 140 } 145 141 146 142 out: 147 - mlx5_eswitch_disable_sriov(dev->priv.eswitch); 143 + if (MLX5_ESWITCH_MANAGER(dev)) 144 + mlx5_eswitch_disable_sriov(dev->priv.eswitch); 148 145 149 146 if (mlx5_wait_for_vf_pages(dev)) 150 147 mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
-2
drivers/net/ethernet/mellanox/mlx5/core/vport.c
··· 549 549 return -EINVAL; 550 550 if (!MLX5_CAP_GEN(mdev, vport_group_manager)) 551 551 return -EACCES; 552 - if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) 553 - return -EOPNOTSUPP; 554 552 555 553 in = kvzalloc(inlen, GFP_KERNEL); 556 554 if (!in)
+6 -3
drivers/net/ethernet/netronome/nfp/bpf/main.c
··· 81 81 82 82 ret = nfp_net_bpf_offload(nn, prog, running, extack); 83 83 /* Stop offload if replace not possible */ 84 - if (ret && prog) 85 - nfp_bpf_xdp_offload(app, nn, NULL, extack); 84 + if (ret) 85 + return ret; 86 86 87 - nn->dp.bpf_offload_xdp = prog && !ret; 87 + nn->dp.bpf_offload_xdp = !!prog; 88 88 return ret; 89 89 } 90 90 ··· 200 200 struct nfp_net *nn = netdev_priv(netdev); 201 201 202 202 if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) 203 + return -EOPNOTSUPP; 204 + 205 + if (tcf_block_shared(f->block)) 203 206 return -EOPNOTSUPP; 204 207 205 208 switch (f->command) {
+14
drivers/net/ethernet/netronome/nfp/flower/match.c
··· 123 123 NFP_FLOWER_MASK_MPLS_Q; 124 124 125 125 frame->mpls_lse = cpu_to_be32(t_mpls); 126 + } else if (dissector_uses_key(flow->dissector, 127 + FLOW_DISSECTOR_KEY_BASIC)) { 128 + /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q 129 + * bit, which indicates an mpls ether type but without any 130 + * mpls fields. 131 + */ 132 + struct flow_dissector_key_basic *key_basic; 133 + 134 + key_basic = skb_flow_dissector_target(flow->dissector, 135 + FLOW_DISSECTOR_KEY_BASIC, 136 + flow->key); 137 + if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || 138 + key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) 139 + frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); 126 140 } 127 141 } 128 142
+11
drivers/net/ethernet/netronome/nfp/flower/offload.c
··· 264 264 case cpu_to_be16(ETH_P_ARP): 265 265 return -EOPNOTSUPP; 266 266 267 + case cpu_to_be16(ETH_P_MPLS_UC): 268 + case cpu_to_be16(ETH_P_MPLS_MC): 269 + if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { 270 + key_layer |= NFP_FLOWER_LAYER_MAC; 271 + key_size += sizeof(struct nfp_flower_mac_mpls); 272 + } 273 + break; 274 + 267 275 /* Will be included in layer 2. */ 268 276 case cpu_to_be16(ETH_P_8021Q): 269 277 break; ··· 629 621 struct nfp_repr *repr = netdev_priv(netdev); 630 622 631 623 if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) 624 + return -EOPNOTSUPP; 625 + 626 + if (tcf_block_shared(f->block)) 632 627 return -EOPNOTSUPP; 633 628 634 629 switch (f->command) {
+1 -1
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
··· 232 232 err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res), 233 233 nfp_resource_address(state->res), 234 234 fwinf, sizeof(*fwinf)); 235 - if (err < sizeof(*fwinf)) 235 + if (err < (int)sizeof(*fwinf)) 236 236 goto err_release; 237 237 238 238 if (!nffw_res_flg_init_get(fwinf))
+4 -4
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
··· 709 709 p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE]; 710 710 711 711 memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id, 712 - ARRAY_SIZE(p_local->local_chassis_id)); 712 + sizeof(p_local->local_chassis_id)); 713 713 memcpy(params->lldp_local.local_port_id, p_local->local_port_id, 714 - ARRAY_SIZE(p_local->local_port_id)); 714 + sizeof(p_local->local_port_id)); 715 715 } 716 716 717 717 static void ··· 723 723 p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE]; 724 724 725 725 memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id, 726 - ARRAY_SIZE(p_remote->peer_chassis_id)); 726 + sizeof(p_remote->peer_chassis_id)); 727 727 memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id, 728 - ARRAY_SIZE(p_remote->peer_port_id)); 728 + sizeof(p_remote->peer_port_id)); 729 729 } 730 730 731 731 static int
+1 -1
drivers/net/ethernet/qlogic/qed/qed_dev.c
··· 1804 1804 DP_INFO(p_hwfn, "Failed to update driver state\n"); 1805 1805 1806 1806 rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, 1807 - QED_OV_ESWITCH_VEB); 1807 + QED_OV_ESWITCH_NONE); 1808 1808 if (rc) 1809 1809 DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); 1810 1810 }
+8
drivers/net/ethernet/qlogic/qed/qed_main.c
··· 789 789 /* We want a minimum of one slowpath and one fastpath vector per hwfn */ 790 790 cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2; 791 791 792 + if (is_kdump_kernel()) { 793 + DP_INFO(cdev, 794 + "Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n", 795 + cdev->int_params.in.min_msix_cnt); 796 + cdev->int_params.in.num_vectors = 797 + cdev->int_params.in.min_msix_cnt; 798 + } 799 + 792 800 rc = qed_set_int_mode(cdev, false); 793 801 if (rc) { 794 802 DP_ERR(cdev, "qed_slowpath_setup_int ERR\n");
+17 -2
drivers/net/ethernet/qlogic/qed/qed_sriov.c
··· 4513 4513 static int qed_sriov_enable(struct qed_dev *cdev, int num) 4514 4514 { 4515 4515 struct qed_iov_vf_init_params params; 4516 + struct qed_hwfn *hwfn; 4517 + struct qed_ptt *ptt; 4516 4518 int i, j, rc; 4517 4519 4518 4520 if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) { ··· 4527 4525 4528 4526 /* Initialize HW for VF access */ 4529 4527 for_each_hwfn(cdev, j) { 4530 - struct qed_hwfn *hwfn = &cdev->hwfns[j]; 4531 - struct qed_ptt *ptt = qed_ptt_acquire(hwfn); 4528 + hwfn = &cdev->hwfns[j]; 4529 + ptt = qed_ptt_acquire(hwfn); 4532 4530 4533 4531 /* Make sure not to use more than 16 queues per VF */ 4534 4532 params.num_queues = min_t(int, ··· 4563 4561 DP_ERR(cdev, "Failed to enable sriov [%d]\n", rc); 4564 4562 goto err; 4565 4563 } 4564 + 4565 + hwfn = QED_LEADING_HWFN(cdev); 4566 + ptt = qed_ptt_acquire(hwfn); 4567 + if (!ptt) { 4568 + DP_ERR(hwfn, "Failed to acquire ptt\n"); 4569 + rc = -EBUSY; 4570 + goto err; 4571 + } 4572 + 4573 + rc = qed_mcp_ov_update_eswitch(hwfn, ptt, QED_OV_ESWITCH_VEB); 4574 + if (rc) 4575 + DP_INFO(cdev, "Failed to update eswitch mode\n"); 4576 + qed_ptt_release(hwfn, ptt); 4566 4577 4567 4578 return num; 4568 4579
+8 -2
drivers/net/ethernet/qlogic/qede/qede_ptp.c
··· 337 337 { 338 338 struct qede_ptp *ptp = edev->ptp; 339 339 340 - if (!ptp) 341 - return -EIO; 340 + if (!ptp) { 341 + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | 342 + SOF_TIMESTAMPING_RX_SOFTWARE | 343 + SOF_TIMESTAMPING_SOFTWARE; 344 + info->phc_index = -1; 345 + 346 + return 0; 347 + } 342 348 343 349 info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | 344 350 SOF_TIMESTAMPING_RX_SOFTWARE |
+1
drivers/net/ethernet/sfc/farch.c
··· 2794 2794 if (!state) 2795 2795 return -ENOMEM; 2796 2796 efx->filter_state = state; 2797 + init_rwsem(&state->lock); 2797 2798 2798 2799 table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; 2799 2800 table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
+12
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
··· 407 407 } 408 408 } 409 409 410 + static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan) 411 + { 412 + u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan)); 413 + 414 + value &= ~DMA_RBSZ_MASK; 415 + value |= (bfsize << DMA_RBSZ_SHIFT) & DMA_RBSZ_MASK; 416 + 417 + writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan)); 418 + } 419 + 410 420 const struct stmmac_dma_ops dwmac4_dma_ops = { 411 421 .reset = dwmac4_dma_reset, 412 422 .init = dwmac4_dma_init, ··· 441 431 .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr, 442 432 .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr, 443 433 .enable_tso = dwmac4_enable_tso, 434 + .set_bfsize = dwmac4_set_bfsize, 444 435 }; 445 436 446 437 const struct stmmac_dma_ops dwmac410_dma_ops = { ··· 468 457 .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr, 469 458 .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr, 470 459 .enable_tso = dwmac4_enable_tso, 460 + .set_bfsize = dwmac4_set_bfsize, 471 461 };
+2
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
··· 120 120 121 121 /* DMA Rx Channel X Control register defines */ 122 122 #define DMA_CONTROL_SR BIT(0) 123 + #define DMA_RBSZ_MASK GENMASK(14, 1) 124 + #define DMA_RBSZ_SHIFT 1 123 125 124 126 /* Interrupt status per channel */ 125 127 #define DMA_CHAN_STATUS_REB GENMASK(21, 19)
+3
drivers/net/ethernet/stmicro/stmmac/hwif.h
··· 183 183 void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); 184 184 void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); 185 185 void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan); 186 + void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan); 186 187 }; 187 188 188 189 #define stmmac_reset(__priv, __args...) \ ··· 236 235 stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args) 237 236 #define stmmac_enable_tso(__priv, __args...) \ 238 237 stmmac_do_void_callback(__priv, dma, enable_tso, __args) 238 + #define stmmac_set_dma_bfsize(__priv, __args...) \ 239 + stmmac_do_void_callback(__priv, dma, set_bfsize, __args) 239 240 240 241 struct mac_device_info; 241 242 struct net_device;
+2
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
··· 1804 1804 1805 1805 stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, 1806 1806 rxfifosz, qmode); 1807 + stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz, 1808 + chan); 1807 1809 } 1808 1810 1809 1811 for (chan = 0; chan < tx_channels_count; chan++) {
+1 -1
drivers/net/geneve.c
··· 476 476 out_unlock: 477 477 rcu_read_unlock(); 478 478 out: 479 - NAPI_GRO_CB(skb)->flush |= flush; 479 + skb_gro_flush_final(skb, pp, flush); 480 480 481 481 return pp; 482 482 }
+1 -1
drivers/net/hyperv/hyperv_net.h
··· 210 210 void netvsc_channel_cb(void *context); 211 211 int netvsc_poll(struct napi_struct *napi, int budget); 212 212 213 - void rndis_set_subchannel(struct work_struct *w); 213 + int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev); 214 214 int rndis_filter_open(struct netvsc_device *nvdev); 215 215 int rndis_filter_close(struct netvsc_device *nvdev); 216 216 struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+36 -1
drivers/net/hyperv/netvsc.c
··· 65 65 VM_PKT_DATA_INBAND, 0); 66 66 } 67 67 68 + /* Worker to setup sub channels on initial setup 69 + * Initial hotplug event occurs in softirq context 70 + * and can't wait for channels. 71 + */ 72 + static void netvsc_subchan_work(struct work_struct *w) 73 + { 74 + struct netvsc_device *nvdev = 75 + container_of(w, struct netvsc_device, subchan_work); 76 + struct rndis_device *rdev; 77 + int i, ret; 78 + 79 + /* Avoid deadlock with device removal already under RTNL */ 80 + if (!rtnl_trylock()) { 81 + schedule_work(w); 82 + return; 83 + } 84 + 85 + rdev = nvdev->extension; 86 + if (rdev) { 87 + ret = rndis_set_subchannel(rdev->ndev, nvdev); 88 + if (ret == 0) { 89 + netif_device_attach(rdev->ndev); 90 + } else { 91 + /* fallback to only primary channel */ 92 + for (i = 1; i < nvdev->num_chn; i++) 93 + netif_napi_del(&nvdev->chan_table[i].napi); 94 + 95 + nvdev->max_chn = 1; 96 + nvdev->num_chn = 1; 97 + } 98 + } 99 + 100 + rtnl_unlock(); 101 + } 102 + 68 103 static struct netvsc_device *alloc_net_device(void) 69 104 { 70 105 struct netvsc_device *net_device; ··· 116 81 117 82 init_completion(&net_device->channel_init_wait); 118 83 init_waitqueue_head(&net_device->subchan_open); 119 - INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); 84 + INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); 120 85 121 86 return net_device; 122 87 }
+16 -1
drivers/net/hyperv/netvsc_drv.c
··· 905 905 if (IS_ERR(nvdev)) 906 906 return PTR_ERR(nvdev); 907 907 908 - /* Note: enable and attach happen when sub-channels setup */ 908 + if (nvdev->num_chn > 1) { 909 + ret = rndis_set_subchannel(ndev, nvdev); 909 910 911 + /* if unavailable, just proceed with one queue */ 912 + if (ret) { 913 + nvdev->max_chn = 1; 914 + nvdev->num_chn = 1; 915 + } 916 + } 917 + 918 + /* In any case device is now ready */ 919 + netif_device_attach(ndev); 920 + 921 + /* Note: enable and attach happen when sub-channels setup */ 910 922 netif_carrier_off(ndev); 911 923 912 924 if (netif_running(ndev)) { ··· 2100 2088 } 2101 2089 2102 2090 memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); 2091 + 2092 + if (nvdev->num_chn > 1) 2093 + schedule_work(&nvdev->subchan_work); 2103 2094 2104 2095 /* hw_features computed in rndis_netdev_set_hwcaps() */ 2105 2096 net->features = net->hw_features |
+12 -49
drivers/net/hyperv/rndis_filter.c
··· 1062 1062 * This breaks overlap of processing the host message for the 1063 1063 * new primary channel with the initialization of sub-channels. 1064 1064 */ 1065 - void rndis_set_subchannel(struct work_struct *w) 1065 + int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) 1066 1066 { 1067 - struct netvsc_device *nvdev 1068 - = container_of(w, struct netvsc_device, subchan_work); 1069 1067 struct nvsp_message *init_packet = &nvdev->channel_init_pkt; 1070 - struct net_device_context *ndev_ctx; 1071 - struct rndis_device *rdev; 1072 - struct net_device *ndev; 1073 - struct hv_device *hv_dev; 1068 + struct net_device_context *ndev_ctx = netdev_priv(ndev); 1069 + struct hv_device *hv_dev = ndev_ctx->device_ctx; 1070 + struct rndis_device *rdev = nvdev->extension; 1074 1071 int i, ret; 1075 1072 1076 - if (!rtnl_trylock()) { 1077 - schedule_work(w); 1078 - return; 1079 - } 1080 - 1081 - rdev = nvdev->extension; 1082 - if (!rdev) 1083 - goto unlock; /* device was removed */ 1084 - 1085 - ndev = rdev->ndev; 1086 - ndev_ctx = netdev_priv(ndev); 1087 - hv_dev = ndev_ctx->device_ctx; 1073 + ASSERT_RTNL(); 1088 1074 1089 1075 memset(init_packet, 0, sizeof(struct nvsp_message)); 1090 1076 init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; ··· 1086 1100 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1087 1101 if (ret) { 1088 1102 netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); 1089 - goto failed; 1103 + return ret; 1090 1104 } 1091 1105 1092 1106 wait_for_completion(&nvdev->channel_init_wait); 1093 1107 if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { 1094 1108 netdev_err(ndev, "sub channel request failed\n"); 1095 - goto failed; 1109 + return -EIO; 1096 1110 } 1097 1111 1098 1112 nvdev->num_chn = 1 + ··· 1111 1125 for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) 1112 1126 ndev_ctx->tx_table[i] = i % nvdev->num_chn; 1113 1127 1114 - netif_device_attach(ndev); 1115 - rtnl_unlock(); 1116 - return; 1117 - 1118 - failed: 1119 - /* fallback to only primary channel */ 1120 - for (i = 1; i < nvdev->num_chn; i++) 1121 - netif_napi_del(&nvdev->chan_table[i].napi); 1122 - 1123 - nvdev->max_chn = 1; 1124 - nvdev->num_chn = 1; 1125 - 1126 - netif_device_attach(ndev); 1127 - unlock: 1128 - rtnl_unlock(); 1128 + return 0; 1129 1129 } 1130 1130 1131 1131 static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, ··· 1332 1360 netif_napi_add(net, &net_device->chan_table[i].napi, 1333 1361 netvsc_poll, NAPI_POLL_WEIGHT); 1334 1362 1335 - if (net_device->num_chn > 1) 1336 - schedule_work(&net_device->subchan_work); 1363 + return net_device; 1337 1364 1338 1365 out: 1339 - /* if unavailable, just proceed with one queue */ 1340 - if (ret) { 1341 - net_device->max_chn = 1; 1342 - net_device->num_chn = 1; 1343 - } 1344 - 1345 - /* No sub channels, device is ready */ 1346 - if (net_device->num_chn == 1) 1347 - netif_device_attach(net); 1348 - 1349 - return net_device; 1366 + /* setting up multiple channels failed */ 1367 + net_device->max_chn = 1; 1368 + net_device->num_chn = 1; 1350 1369 1351 1370 err_dev_remv: 1352 1371 rndis_filter_device_remove(dev, net_device);
+28 -8
drivers/net/ipvlan/ipvlan_main.c
··· 75 75 { 76 76 struct ipvl_dev *ipvlan; 77 77 struct net_device *mdev = port->dev; 78 - int err = 0; 78 + unsigned int flags; 79 + int err; 79 80 80 81 ASSERT_RTNL(); 81 82 if (port->mode != nval) { 83 + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 84 + flags = ipvlan->dev->flags; 85 + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { 86 + err = dev_change_flags(ipvlan->dev, 87 + flags | IFF_NOARP); 88 + } else { 89 + err = dev_change_flags(ipvlan->dev, 90 + flags & ~IFF_NOARP); 91 + } 92 + if (unlikely(err)) 93 + goto fail; 94 + } 82 95 if (nval == IPVLAN_MODE_L3S) { 83 96 /* New mode is L3S */ 84 97 err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); ··· 99 86 mdev->l3mdev_ops = &ipvl_l3mdev_ops; 100 87 mdev->priv_flags |= IFF_L3MDEV_MASTER; 101 88 } else 102 - return err; 89 + goto fail; 103 90 } else if (port->mode == IPVLAN_MODE_L3S) { 104 91 /* Old mode was L3S */ 105 92 mdev->priv_flags &= ~IFF_L3MDEV_MASTER; 106 93 ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); 107 94 mdev->l3mdev_ops = NULL; 108 95 } 109 - list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 110 - if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) 111 - ipvlan->dev->flags |= IFF_NOARP; 112 - else 113 - ipvlan->dev->flags &= ~IFF_NOARP; 114 - } 115 96 port->mode = nval; 116 97 } 98 + return 0; 99 + 100 + fail: 101 + /* Undo the flags changes that have been done so far. */ 102 + list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { 103 + flags = ipvlan->dev->flags; 104 + if (port->mode == IPVLAN_MODE_L3 || 105 + port->mode == IPVLAN_MODE_L3S) 106 + dev_change_flags(ipvlan->dev, flags | IFF_NOARP); 107 + else 108 + dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP); 109 + } 110 + 117 111 return err; 118 112 } 119 113
+1 -1
drivers/net/phy/dp83tc811.c
··· 222 222 if (err < 0) 223 223 return err; 224 224 225 - err = phy_write(phydev, MII_DP83811_INT_STAT1, 0); 225 + err = phy_write(phydev, MII_DP83811_INT_STAT2, 0); 226 226 } 227 227 228 228 return err;
+34 -3
drivers/net/usb/lan78xx.c
··· 64 64 #define DEFAULT_RX_CSUM_ENABLE (true) 65 65 #define DEFAULT_TSO_CSUM_ENABLE (true) 66 66 #define DEFAULT_VLAN_FILTER_ENABLE (true) 67 + #define DEFAULT_VLAN_RX_OFFLOAD (true) 67 68 #define TX_OVERHEAD (8) 68 69 #define RXW_PADDING 2 69 70 ··· 2299 2298 if ((ll_mtu % dev->maxpacket) == 0) 2300 2299 return -EDOM; 2301 2300 2302 - ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN); 2301 + ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); 2303 2302 2304 2303 netdev->mtu = new_mtu; 2305 2304 ··· 2365 2364 } 2366 2365 2367 2366 if (features & NETIF_F_HW_VLAN_CTAG_RX) 2367 + pdata->rfe_ctl |= RFE_CTL_VLAN_STRIP_; 2368 + else 2369 + pdata->rfe_ctl &= ~RFE_CTL_VLAN_STRIP_; 2370 + 2371 + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) 2368 2372 pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_; 2369 2373 else 2370 2374 pdata->rfe_ctl &= ~RFE_CTL_VLAN_FILTER_; ··· 2593 2587 buf |= FCT_TX_CTL_EN_; 2594 2588 ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf); 2595 2589 2596 - ret = lan78xx_set_rx_max_frame_length(dev, dev->net->mtu + ETH_HLEN); 2590 + ret = lan78xx_set_rx_max_frame_length(dev, 2591 + dev->net->mtu + VLAN_ETH_HLEN); 2597 2592 2598 2593 ret = lan78xx_read_reg(dev, MAC_RX, &buf); 2599 2594 buf |= MAC_RX_RXEN_; ··· 2982 2975 if (DEFAULT_TSO_CSUM_ENABLE) 2983 2976 dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG; 2984 2977 2978 + if (DEFAULT_VLAN_RX_OFFLOAD) 2979 + dev->net->features |= NETIF_F_HW_VLAN_CTAG_RX; 2980 + 2981 + if (DEFAULT_VLAN_FILTER_ENABLE) 2982 + dev->net->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 2983 + 2985 2984 dev->net->hw_features = dev->net->features; 2986 2985 2987 2986 ret = lan78xx_setup_irq_domain(dev); ··· 3052 3039 struct sk_buff *skb, 3053 3040 u32 rx_cmd_a, u32 rx_cmd_b) 3054 3041 { 3042 + /* HW Checksum offload appears to be flawed if used when not stripping 3043 + * VLAN headers. Drop back to S/W checksums under these conditions. 3044 + */ 3055 3045 if (!(dev->net->features & NETIF_F_RXCSUM) || 3056 - unlikely(rx_cmd_a & RX_CMD_A_ICSM_)) { 3046 + unlikely(rx_cmd_a & RX_CMD_A_ICSM_) || 3047 + ((rx_cmd_a & RX_CMD_A_FVTG_) && 3048 + !(dev->net->features & NETIF_F_HW_VLAN_CTAG_RX))) { 3057 3049 skb->ip_summed = CHECKSUM_NONE; 3058 3050 } else { 3059 3051 skb->csum = ntohs((u16)(rx_cmd_b >> RX_CMD_B_CSUM_SHIFT_)); 3060 3052 skb->ip_summed = CHECKSUM_COMPLETE; 3061 3053 } 3054 + } 3055 + 3056 + static void lan78xx_rx_vlan_offload(struct lan78xx_net *dev, 3057 + struct sk_buff *skb, 3058 + u32 rx_cmd_a, u32 rx_cmd_b) 3059 + { 3060 + if ((dev->net->features & NETIF_F_HW_VLAN_CTAG_RX) && 3061 + (rx_cmd_a & RX_CMD_A_FVTG_)) 3062 + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 3063 + (rx_cmd_b & 0xffff)); 3062 3064 } 3063 3065 3064 3066 static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) ··· 3140 3112 if (skb->len == size) { 3141 3113 lan78xx_rx_csum_offload(dev, skb, 3142 3114 rx_cmd_a, rx_cmd_b); 3115 + lan78xx_rx_vlan_offload(dev, skb, 3116 + rx_cmd_a, rx_cmd_b); 3143 3117 3144 3118 skb_trim(skb, skb->len - 4); /* remove fcs */ 3145 3119 skb->truesize = size + sizeof(struct sk_buff); ··· 3160 3130 skb_set_tail_pointer(skb2, size); 3161 3131 3162 3132 lan78xx_rx_csum_offload(dev, skb2, rx_cmd_a, rx_cmd_b); 3133 + lan78xx_rx_vlan_offload(dev, skb2, rx_cmd_a, rx_cmd_b); 3163 3134 3164 3135 skb_trim(skb2, skb2->len - 4); /* remove fcs */ 3165 3136 skb2->truesize = size + sizeof(struct sk_buff);
+2 -1
drivers/net/usb/r8152.c
··· 3962 3962 #ifdef CONFIG_PM_SLEEP 3963 3963 unregister_pm_notifier(&tp->pm_notifier); 3964 3964 #endif 3965 - napi_disable(&tp->napi); 3965 + if (!test_bit(RTL8152_UNPLUG, &tp->flags)) 3966 + napi_disable(&tp->napi); 3966 3967 clear_bit(WORK_ENABLE, &tp->flags); 3967 3968 usb_kill_urb(tp->intr_urb); 3968 3969 cancel_delayed_work_sync(&tp->schedule);
+19 -11
drivers/net/virtio_net.c
··· 53 53 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ 54 54 #define VIRTIO_XDP_HEADROOM 256 55 55 56 + /* Separating two types of XDP xmit */ 57 + #define VIRTIO_XDP_TX BIT(0) 58 + #define VIRTIO_XDP_REDIR BIT(1) 59 + 56 60 /* RX packet size EWMA. The average packet size is used to determine the packet 57 61 * buffer size when refilling RX rings. As the entire RX ring may be refilled 58 62 * at once, the weight is chosen so that the EWMA will be insensitive to short- ··· 586 582 struct receive_queue *rq, 587 583 void *buf, void *ctx, 588 584 unsigned int len, 589 - bool *xdp_xmit) 585 + unsigned int *xdp_xmit) 590 586 { 591 587 struct sk_buff *skb; 592 588 struct bpf_prog *xdp_prog; ··· 658 654 trace_xdp_exception(vi->dev, xdp_prog, act); 659 655 goto err_xdp; 660 656 } 661 - *xdp_xmit = true; 657 + *xdp_xmit |= VIRTIO_XDP_TX; 662 658 rcu_read_unlock(); 663 659 goto xdp_xmit; 664 660 case XDP_REDIRECT: 665 661 err = xdp_do_redirect(dev, &xdp, xdp_prog); 666 662 if (err) 667 663 goto err_xdp; 668 - *xdp_xmit = true; 664 + *xdp_xmit |= VIRTIO_XDP_REDIR; 669 665 rcu_read_unlock(); 670 666 goto xdp_xmit; 671 667 default: ··· 727 723 void *buf, 728 724 void *ctx, 729 725 unsigned int len, 730 - bool *xdp_xmit) 726 + unsigned int *xdp_xmit) 731 727 { 732 728 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 733 729 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); ··· 822 818 put_page(xdp_page); 823 819 goto err_xdp; 824 820 } 825 - *xdp_xmit = true; 821 + *xdp_xmit |= VIRTIO_XDP_TX; 826 822 if (unlikely(xdp_page != page)) 827 823 put_page(page); 828 824 rcu_read_unlock(); ··· 834 830 put_page(xdp_page); 835 831 goto err_xdp; 836 832 } 837 - *xdp_xmit = true; 833 + *xdp_xmit |= VIRTIO_XDP_REDIR; 838 834 if (unlikely(xdp_page != page)) 839 835 put_page(page); 840 836 rcu_read_unlock(); ··· 943 939 } 944 940 945 941 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 946 - void *buf, unsigned int len, void **ctx, bool *xdp_xmit) 942 + void *buf, unsigned int len, void **ctx, 943 + unsigned int *xdp_xmit) 947 944 { 948 945 struct net_device *dev = vi->dev; 949 946 struct sk_buff *skb; ··· 1237 1232 } 1238 1233 } 1239 1234 1240 - static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) 1235 + static int virtnet_receive(struct receive_queue *rq, int budget, 1236 + unsigned int *xdp_xmit) 1241 1237 { 1242 1238 struct virtnet_info *vi = rq->vq->vdev->priv; 1243 1239 unsigned int len, received = 0, bytes = 0; ··· 1327 1321 struct virtnet_info *vi = rq->vq->vdev->priv; 1328 1322 struct send_queue *sq; 1329 1323 unsigned int received, qp; 1330 - bool xdp_xmit = false; 1324 + unsigned int xdp_xmit = 0; 1331 1325 1332 1326 virtnet_poll_cleantx(rq); 1333 1327 ··· 1337 1331 if (received < budget) 1338 1332 virtqueue_napi_complete(napi, rq->vq, received); 1339 1333 1340 - if (xdp_xmit) { 1334 + if (xdp_xmit & VIRTIO_XDP_REDIR) 1335 + xdp_do_flush_map(); 1336 + 1337 + if (xdp_xmit & VIRTIO_XDP_TX) { 1341 1338 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + 1342 1339 smp_processor_id(); 1343 1340 sq = &vi->sq[qp]; 1344 1341 virtqueue_kick(sq->vq); 1345 - xdp_do_flush_map(); 1346 1342 } 1347 1343 1348 1344 return received;
+1 -3
drivers/net/vxlan.c
··· 623 623 flush = 0; 624 624 625 625 out: 626 - skb_gro_remcsum_cleanup(skb, &grc); 627 - skb->remcsum_offload = 0; 628 - NAPI_GRO_CB(skb)->flush |= flush; 626 + skb_gro_flush_final_remcsum(skb, pp, flush, &grc); 629 627 630 628 return pp; 631 629 }
+12 -1
drivers/s390/net/qeth_core.h
··· 829 829 /*some helper functions*/ 830 830 #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") 831 831 832 + static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf, 833 + unsigned int elements) 834 + { 835 + unsigned int i; 836 + 837 + for (i = 0; i < elements; i++) 838 + memset(&buf->element[i], 0, sizeof(struct qdio_buffer_element)); 839 + buf->element[14].sflags = 0; 840 + buf->element[15].sflags = 0; 841 + } 842 + 832 843 /** 833 844 * qeth_get_elements_for_range() - find number of SBALEs to cover range. 834 845 * @start: Start of the address range. ··· 1040 1029 __u16, __u16, 1041 1030 enum qeth_prot_versions); 1042 1031 int qeth_set_features(struct net_device *, netdev_features_t); 1043 - void qeth_recover_features(struct net_device *dev); 1032 + void qeth_enable_hw_features(struct net_device *dev); 1044 1033 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); 1045 1034 netdev_features_t qeth_features_check(struct sk_buff *skb, 1046 1035 struct net_device *dev,
+28 -19
drivers/s390/net/qeth_core_main.c
··· 73 73 struct qeth_qdio_out_buffer *buf, 74 74 enum iucv_tx_notify notification); 75 75 static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); 76 - static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, 77 - struct qeth_qdio_out_buffer *buf, 78 - enum qeth_qdio_buffer_states newbufstate); 79 76 static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); 80 77 81 78 struct workqueue_struct *qeth_wq; ··· 486 489 struct qaob *aob; 487 490 struct qeth_qdio_out_buffer *buffer; 488 491 enum iucv_tx_notify notification; 492 + unsigned int i; 489 493 490 494 aob = (struct qaob *) phys_to_virt(phys_aob_addr); 491 495 QETH_CARD_TEXT(card, 5, "haob"); ··· 511 513 qeth_notify_skbs(buffer->q, buffer, notification); 512 514 513 515 buffer->aob = NULL; 514 - qeth_clear_output_buffer(buffer->q, buffer, 515 - QETH_QDIO_BUF_HANDLED_DELAYED); 516 + /* Free dangling allocations. The attached skbs are handled by 517 + * qeth_cleanup_handled_pending(). 518 + */ 519 + for (i = 0; 520 + i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); 521 + i++) { 522 + if (aob->sba[i] && buffer->is_header[i]) 523 + kmem_cache_free(qeth_core_header_cache, 524 + (void *) aob->sba[i]); 525 + } 526 + atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED); 516 527 517 - /* from here on: do not touch buffer anymore */ 518 528 qdio_release_aob(aob); 519 529 } 520 530 ··· 3765 3759 QETH_CARD_TEXT(queue->card, 5, "aob"); 3766 3760 QETH_CARD_TEXT_(queue->card, 5, "%lx", 3767 3761 virt_to_phys(buffer->aob)); 3762 + 3763 + /* prepare the queue slot for re-use: */ 3764 + qeth_scrub_qdio_buffer(buffer->buffer, 3765 + QETH_MAX_BUFFER_ELEMENTS(card)); 3768 3766 if (qeth_init_qdio_out_buf(queue, bidx)) { 3769 3767 QETH_CARD_TEXT(card, 2, "outofbuf"); 3770 3768 qeth_schedule_recovery(card); ··· 4844 4834 goto out; 4845 4835 } 4846 4836 4847 - ccw_device_get_id(CARD_RDEV(card), &id); 4837 + ccw_device_get_id(CARD_DDEV(card), &id); 4848 4838 request->resp_buf_len = sizeof(*response); 4849 4839 request->resp_version = DIAG26C_VERSION2; 4850 4840 request->op_code = DIAG26C_GET_MAC; ··· 6469 6459 #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ 6470 6460 NETIF_F_IPV6_CSUM) 6471 6461 /** 6472 - * qeth_recover_features() - Restore device features after recovery 6473 - * @dev: the recovering net_device 6474 - * 6475 - * Caller must hold rtnl lock. 6462 + * qeth_enable_hw_features() - (Re-)Enable HW functions for device features 6463 + * @dev: a net_device 6476 6464 */ 6477 - void qeth_recover_features(struct net_device *dev) 6465 + void qeth_enable_hw_features(struct net_device *dev) 6478 6466 { 6479 - netdev_features_t features = dev->features; 6480 6467 struct qeth_card *card = dev->ml_priv; 6468 + netdev_features_t features; 6481 6469 6470 + rtnl_lock(); 6471 + features = dev->features; 6482 6472 /* force-off any feature that needs an IPA sequence. 6483 6473 * netdev_update_features() will restart them. 6484 6474 */ 6485 6475 dev->features &= ~QETH_HW_FEATURES; 6486 6476 netdev_update_features(dev); 6487 - 6488 - if (features == dev->features) 6489 - return; 6490 - dev_warn(&card->gdev->dev, 6491 - "Device recovery failed to restore all offload features\n"); 6477 + if (features != dev->features) 6478 + dev_warn(&card->gdev->dev, 6479 + "Device recovery failed to restore all offload features\n"); 6480 + rtnl_unlock(); 6492 6481 } 6493 - EXPORT_SYMBOL_GPL(qeth_recover_features); 6482 + EXPORT_SYMBOL_GPL(qeth_enable_hw_features); 6494 6483 6495 6484 int qeth_set_features(struct net_device *dev, netdev_features_t features) 6496 6485 {
+15 -9
drivers/s390/net/qeth_l2_main.c
··· 140 140 141 141 static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) 142 142 { 143 - enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? 143 + enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ? 144 144 IPA_CMD_SETGMAC : IPA_CMD_SETVMAC; 145 145 int rc; 146 146 ··· 157 157 158 158 static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac) 159 159 { 160 - enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? 160 + enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ? 161 161 IPA_CMD_DELGMAC : IPA_CMD_DELVMAC; 162 162 int rc; 163 163 ··· 501 501 return -ERESTARTSYS; 502 502 } 503 503 504 + /* avoid racing against concurrent state change: */ 505 + if (!mutex_trylock(&card->conf_mutex)) 506 + return -EAGAIN; 507 + 504 508 if (!qeth_card_hw_is_reachable(card)) { 505 509 ether_addr_copy(dev->dev_addr, addr->sa_data); 506 - return 0; 510 + goto out_unlock; 507 511 } 508 512 509 513 /* don't register the same address twice */ 510 514 if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && 511 515 (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) 512 - return 0; 516 + goto out_unlock; 513 517 514 518 /* add the new address, switch over, drop the old */ 515 519 rc = qeth_l2_send_setmac(card, addr->sa_data); 516 520 if (rc) 517 - return rc; 521 + goto out_unlock; 518 522 ether_addr_copy(old_addr, dev->dev_addr); 519 523 ether_addr_copy(dev->dev_addr, addr->sa_data); 520 524 521 525 if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) 522 526 qeth_l2_remove_mac(card, old_addr); 523 527 card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; 524 - return 0; 528 + 529 + out_unlock: 530 + mutex_unlock(&card->conf_mutex); 531 + return rc; 525 532 } 526 533 527 534 static void qeth_promisc_to_bridge(struct qeth_card *card) ··· 1119 1112 netif_carrier_off(card->dev); 1120 1113 1121 1114 qeth_set_allowed_threads(card, 0xffffffff, 0); 1115 + 1116 + qeth_enable_hw_features(card->dev); 1122 1117 if (recover_flag == CARD_STATE_RECOVER) { 1123 1118 if (recovery_mode && 1124 1119 card->info.type != QETH_CARD_TYPE_OSN) { ··· 1132 1123 } 1133 1124 /* this also sets saved unicast addresses */ 1134 1125 qeth_l2_set_rx_mode(card->dev); 1135 - rtnl_lock(); 1136 - qeth_recover_features(card->dev); 1137 - rtnl_unlock(); 1138 1126 } 1139 1127 /* let user_space know that device is online */ 1140 1128 kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
+2 -1
drivers/s390/net/qeth_l3_main.c
··· 2662 2662 netif_carrier_on(card->dev); 2663 2663 else 2664 2664 netif_carrier_off(card->dev); 2665 + 2666 + qeth_enable_hw_features(card->dev); 2665 2667 if (recover_flag == CARD_STATE_RECOVER) { 2666 2668 rtnl_lock(); 2667 2669 if (recovery_mode) ··· 2671 2669 else 2672 2670 dev_open(card->dev); 2673 2671 qeth_l3_set_rx_mode(card->dev); 2674 - qeth_recover_features(card->dev); 2675 2672 rtnl_unlock(); 2676 2673 } 2677 2674 qeth_trace_features(card);
+26
include/linux/bpf-cgroup.h
··· 188 188 \ 189 189 __ret; \ 190 190 }) 191 + int cgroup_bpf_prog_attach(const union bpf_attr *attr, 192 + enum bpf_prog_type ptype, struct bpf_prog *prog); 193 + int cgroup_bpf_prog_detach(const union bpf_attr *attr, 194 + enum bpf_prog_type ptype); 195 + int cgroup_bpf_prog_query(const union bpf_attr *attr, 196 + union bpf_attr __user *uattr); 191 197 #else 192 198 199 + struct bpf_prog; 193 200 struct cgroup_bpf {}; 194 201 static inline void cgroup_bpf_put(struct cgroup *cgrp) {} 195 202 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } 203 + 204 + static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, 205 + enum bpf_prog_type ptype, 206 + struct bpf_prog *prog) 207 + { 208 + return -EINVAL; 209 + } 210 + 211 + static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, 212 + enum bpf_prog_type ptype) 213 + { 214 + return -EINVAL; 215 + } 216 + 217 + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, 218 + union bpf_attr __user *uattr) 219 + { 220 + return -EINVAL; 221 + } 196 222 197 223 #define cgroup_bpf_enabled (0) 198 224 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
+8
include/linux/bpf.h
··· 696 696 struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); 697 697 struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); 698 698 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); 699 + int sockmap_get_from_fd(const union bpf_attr *attr, int type, 700 + struct bpf_prog *prog); 699 701 #else 700 702 static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) 701 703 { ··· 715 713 u32 type) 716 714 { 717 715 return -EOPNOTSUPP; 716 + } 717 + 718 + static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type, 719 + struct bpf_prog *prog) 720 + { 721 + return -EINVAL; 718 722 } 719 723 #endif 720 724
+3 -2
include/linux/bpf_lirc.h
··· 5 5 #include <uapi/linux/bpf.h> 6 6 7 7 #ifdef CONFIG_BPF_LIRC_MODE2 8 - int lirc_prog_attach(const union bpf_attr *attr); 8 + int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); 9 9 int lirc_prog_detach(const union bpf_attr *attr); 10 10 int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); 11 11 #else 12 - static inline int lirc_prog_attach(const union bpf_attr *attr) 12 + static inline int lirc_prog_attach(const union bpf_attr *attr, 13 + struct bpf_prog *prog) 13 14 { 14 15 return -EINVAL; 15 16 }
+8 -48
include/linux/filter.h
··· 470 470 }; 471 471 472 472 struct bpf_binary_header { 473 - u16 pages; 474 - u16 locked:1; 475 - 473 + u32 pages; 476 474 /* Some arches need word alignment for their instructions */ 477 475 u8 image[] __aligned(4); 478 476 }; ··· 479 481 u16 pages; /* Number of allocated pages */ 480 482 u16 jited:1, /* Is our filter JIT'ed? */ 481 483 jit_requested:1,/* archs need to JIT the prog */ 482 - locked:1, /* Program image locked? */ 484 + undo_set_mem:1, /* Passed set_memory_ro() checkpoint */ 483 485 gpl_compatible:1, /* Is filter GPL compatible? */ 484 486 cb_access:1, /* Is control block accessed? */ 485 487 dst_needed:1, /* Do we need dst entry? */ ··· 675 677 676 678 static inline void bpf_prog_lock_ro(struct bpf_prog *fp) 677 679 { 678 - #ifdef CONFIG_ARCH_HAS_SET_MEMORY 679 - fp->locked = 1; 680 - if (set_memory_ro((unsigned long)fp, fp->pages)) 681 - fp->locked = 0; 682 - #endif 680 + fp->undo_set_mem = 1; 681 + set_memory_ro((unsigned long)fp, fp->pages); 683 682 } 684 683 685 684 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) 686 685 { 687 - #ifdef CONFIG_ARCH_HAS_SET_MEMORY 688 - if (fp->locked) { 689 - WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); 690 - /* In case set_memory_rw() fails, we want to be the first 691 - * to crash here instead of some random place later on. 692 - */ 693 - fp->locked = 0; 694 - } 695 - #endif 686 + if (fp->undo_set_mem) 687 + set_memory_rw((unsigned long)fp, fp->pages); 696 688 } 697 689 698 690 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) 699 691 { 700 - #ifdef CONFIG_ARCH_HAS_SET_MEMORY 701 - hdr->locked = 1; 702 - if (set_memory_ro((unsigned long)hdr, hdr->pages)) 703 - hdr->locked = 0; 704 - #endif 692 + set_memory_ro((unsigned long)hdr, hdr->pages); 705 693 } 706 694 707 695 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) 708 696 { 709 - #ifdef CONFIG_ARCH_HAS_SET_MEMORY 710 - if (hdr->locked) { 711 - WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); 712 - /* In case set_memory_rw() fails, we want to be the first 713 - * to crash here instead of some random place later on. 714 - */ 715 - hdr->locked = 0; 716 - } 717 - #endif 697 + set_memory_rw((unsigned long)hdr, hdr->pages); 718 698 } 719 699 720 700 static inline struct bpf_binary_header * ··· 703 727 704 728 return (void *)addr; 705 729 } 706 - 707 - #ifdef CONFIG_ARCH_HAS_SET_MEMORY 708 - static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp) 709 - { 710 - if (!fp->locked) 711 - return -ENOLCK; 712 - if (fp->jited) { 713 - const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); 714 - 715 - if (!hdr->locked) 716 - return -ENOLCK; 717 - } 718 - 719 - return 0; 720 - } 721 - #endif 722 730 723 731 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); 724 732 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
+2
include/linux/mlx5/eswitch.h
··· 8 8 9 9 #include <linux/mlx5/driver.h> 10 10 11 + #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) 12 + 11 13 enum { 12 14 SRIOV_NONE, 13 15 SRIOV_LEGACY,
+1 -1
include/linux/mlx5/mlx5_ifc.h
··· 922 922 u8 vnic_env_queue_counters[0x1]; 923 923 u8 ets[0x1]; 924 924 u8 nic_flow_table[0x1]; 925 - u8 eswitch_flow_table[0x1]; 925 + u8 eswitch_manager[0x1]; 926 926 u8 device_memory[0x1]; 927 927 u8 mcam_reg[0x1]; 928 928 u8 pcam_reg[0x1];
+20
include/linux/netdevice.h
··· 2789 2789 if (PTR_ERR(pp) != -EINPROGRESS) 2790 2790 NAPI_GRO_CB(skb)->flush |= flush; 2791 2791 } 2792 + static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 2793 + struct sk_buff **pp, 2794 + int flush, 2795 + struct gro_remcsum *grc) 2796 + { 2797 + if (PTR_ERR(pp) != -EINPROGRESS) { 2798 + NAPI_GRO_CB(skb)->flush |= flush; 2799 + skb_gro_remcsum_cleanup(skb, grc); 2800 + skb->remcsum_offload = 0; 2801 + } 2802 + } 2792 2803 #else 2793 2804 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) 2794 2805 { 2795 2806 NAPI_GRO_CB(skb)->flush |= flush; 2807 + } 2808 + static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 2809 + struct sk_buff **pp, 2810 + int flush, 2811 + struct gro_remcsum *grc) 2812 + { 2813 + NAPI_GRO_CB(skb)->flush |= flush; 2814 + skb_gro_remcsum_cleanup(skb, grc); 2815 + skb->remcsum_offload = 0; 2796 2816 } 2797 2817 #endif 2798 2818
+1
include/net/net_namespace.h
··· 128 128 #endif 129 129 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 130 130 struct netns_nf_frag nf_frag; 131 + struct ctl_table_header *nf_frag_frags_hdr; 131 132 #endif 132 133 struct sock *nfnl; 133 134 struct sock *nfnl_stash;
-1
include/net/netns/ipv6.h
··· 109 109 110 110 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 111 111 struct netns_nf_frag { 112 - struct netns_sysctl_ipv6 sysctl; 113 112 struct netns_frags frags; 114 113 }; 115 114 #endif
+5
include/net/pkt_cls.h
··· 111 111 { 112 112 } 113 113 114 + static inline bool tcf_block_shared(struct tcf_block *block) 115 + { 116 + return false; 117 + } 118 + 114 119 static inline struct Qdisc *tcf_block_q(struct tcf_block *block) 115 120 { 116 121 return NULL;
+23 -5
include/uapi/linux/bpf.h
··· 1857 1857 * is resolved), the nexthop address is returned in ipv4_dst 1858 1858 * or ipv6_dst based on family, smac is set to mac address of 1859 1859 * egress device, dmac is set to nexthop mac address, rt_metric 1860 - * is set to metric from route (IPv4/IPv6 only). 1860 + * is set to metric from route (IPv4/IPv6 only), and ifindex 1861 + * is set to the device index of the nexthop from the FIB lookup. 1861 1862 * 1862 1863 * *plen* argument is the size of the passed in struct. 1863 1864 * *flags* argument can be a combination of one or more of the ··· 1874 1873 * *ctx* is either **struct xdp_md** for XDP programs or 1875 1874 * **struct sk_buff** tc cls_act programs. 1876 1875 * Return 1877 - * Egress device index on success, 0 if packet needs to continue 1878 - * up the stack for further processing or a negative error in case 1879 - * of failure. 1876 + * * < 0 if any input argument is invalid 1877 + * * 0 on success (packet is forwarded, nexthop neighbor exists) 1878 + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the 1879 + * * packet is not forwarded or needs assist from full stack 1880 1880 * 1881 1881 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) 1882 1882 * Description ··· 2614 2612 #define BPF_FIB_LOOKUP_DIRECT BIT(0) 2615 2613 #define BPF_FIB_LOOKUP_OUTPUT BIT(1) 2616 2614 2615 + enum { 2616 + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ 2617 + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ 2618 + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ 2619 + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ 2620 + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ 2621 + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ 2622 + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ 2623 + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ 2624 + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ 2625 + }; 2626 + 2617 2627 struct bpf_fib_lookup { 2618 2628 /* input: network family for lookup (AF_INET, AF_INET6) 2619 2629 * output: network family of egress nexthop ··· 2639 2625 2640 2626 /* total length of packet from network header - used for MTU check */ 2641 2627 __u16 tot_len; 2642 - __u32 ifindex; /* L3 device index for lookup */ 2628 + 2629 + /* input: L3 device index for lookup 2630 + * output: device index from FIB lookup 2631 + */ 2632 + __u32 ifindex; 2643 2633 2644 2634 union { 2645 2635 /* inputs to lookup */
+54
kernel/bpf/cgroup.c
··· 428 428 return ret; 429 429 } 430 430 431 + int cgroup_bpf_prog_attach(const union bpf_attr *attr, 432 + enum bpf_prog_type ptype, struct bpf_prog *prog) 433 + { 434 + struct cgroup *cgrp; 435 + int ret; 436 + 437 + cgrp = cgroup_get_from_fd(attr->target_fd); 438 + if (IS_ERR(cgrp)) 439 + return PTR_ERR(cgrp); 440 + 441 + ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 442 + attr->attach_flags); 443 + cgroup_put(cgrp); 444 + return ret; 445 + } 446 + 447 + int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 448 + { 449 + struct bpf_prog *prog; 450 + struct cgroup *cgrp; 451 + int ret; 452 + 453 + cgrp = cgroup_get_from_fd(attr->target_fd); 454 + if (IS_ERR(cgrp)) 455 + return PTR_ERR(cgrp); 456 + 457 + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 458 + if (IS_ERR(prog)) 459 + prog = NULL; 460 + 461 + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 462 + if (prog) 463 + bpf_prog_put(prog); 464 + 465 + cgroup_put(cgrp); 466 + return ret; 467 + } 468 + 469 + int cgroup_bpf_prog_query(const union bpf_attr *attr, 470 + union bpf_attr __user *uattr) 471 + { 472 + struct cgroup *cgrp; 473 + int ret; 474 + 475 + cgrp = cgroup_get_from_fd(attr->query.target_fd); 476 + if (IS_ERR(cgrp)) 477 + return PTR_ERR(cgrp); 478 + 479 + ret = cgroup_bpf_query(cgrp, attr, uattr); 480 + 481 + cgroup_put(cgrp); 482 + return ret; 483 + } 484 + 431 485 /** 432 486 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 433 487 * @sk: The socket sending or receiving traffic
-28
kernel/bpf/core.c
··· 598 598 bpf_fill_ill_insns(hdr, size); 599 599 600 600 hdr->pages = size / PAGE_SIZE; 601 - hdr->locked = 0; 602 - 603 601 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), 604 602 PAGE_SIZE - sizeof(*hdr)); 605 603 start = (get_random_int() % hole) & ~(alignment - 1); ··· 1448 1450 return 0; 1449 1451 } 1450 1452 1451 - static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp) 1452 - { 1453 - #ifdef CONFIG_ARCH_HAS_SET_MEMORY 1454 - int i, err; 1455 - 1456 - for (i = 0; i < fp->aux->func_cnt; i++) { 1457 - err = bpf_prog_check_pages_ro_single(fp->aux->func[i]); 1458 - if (err) 1459 - return err; 1460 - } 1461 - 1462 - return bpf_prog_check_pages_ro_single(fp); 1463 - #endif 1464 - return 0; 1465 - } 1466 - 1467 1453 static void bpf_prog_select_func(struct bpf_prog *fp) 1468 1454 { 1469 1455 #ifndef CONFIG_BPF_JIT_ALWAYS_ON ··· 1506 1524 * all eBPF JITs might immediately support all features. 1507 1525 */ 1508 1526 *err = bpf_check_tail_call(fp); 1509 - if (*err) 1510 - return fp; 1511 1527 1512 - /* Checkpoint: at this point onwards any cBPF -> eBPF or 1513 - * native eBPF program is read-only. If we failed to change 1514 - * the page attributes (e.g. allocation failure from 1515 - * splitting large pages), then reject the whole program 1516 - * in order to guarantee not ending up with any W+X pages 1517 - * from BPF side in kernel. 1518 - */ 1519 - *err = bpf_prog_check_pages_ro_locked(fp); 1520 1528 return fp; 1521 1529 } 1522 1530 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
+184 -70
kernel/bpf/sockmap.c
··· 72 72 u32 n_buckets; 73 73 u32 elem_size; 74 74 struct bpf_sock_progs progs; 75 + struct rcu_head rcu; 75 76 }; 76 77 77 78 struct htab_elem { ··· 90 89 struct smap_psock_map_entry { 91 90 struct list_head list; 92 91 struct sock **entry; 93 - struct htab_elem *hash_link; 94 - struct bpf_htab *htab; 92 + struct htab_elem __rcu *hash_link; 93 + struct bpf_htab __rcu *htab; 95 94 }; 96 95 97 96 struct smap_psock { ··· 121 120 struct bpf_prog *bpf_parse; 122 121 struct bpf_prog *bpf_verdict; 123 122 struct list_head maps; 123 + spinlock_t maps_lock; 124 124 125 125 /* Back reference used when sock callback trigger sockmap operations */ 126 126 struct sock *sock; ··· 142 140 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); 143 141 static int bpf_tcp_sendpage(struct sock *sk, struct page *page, 144 142 int offset, size_t size, int flags); 143 + static void bpf_tcp_close(struct sock *sk, long timeout); 145 144 146 145 static inline struct smap_psock *smap_psock_sk(const struct sock *sk) 147 146 { ··· 164 161 return !empty; 165 162 } 166 163 167 - static struct proto tcp_bpf_proto; 164 + enum { 165 + SOCKMAP_IPV4, 166 + SOCKMAP_IPV6, 167 + SOCKMAP_NUM_PROTS, 168 + }; 169 + 170 + enum { 171 + SOCKMAP_BASE, 172 + SOCKMAP_TX, 173 + SOCKMAP_NUM_CONFIGS, 174 + }; 175 + 176 + static struct proto *saved_tcpv6_prot __read_mostly; 177 + static DEFINE_SPINLOCK(tcpv6_prot_lock); 178 + static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS]; 179 + static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], 180 + struct proto *base) 181 + { 182 + prot[SOCKMAP_BASE] = *base; 183 + prot[SOCKMAP_BASE].close = bpf_tcp_close; 184 + prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; 185 + prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; 186 + 187 + prot[SOCKMAP_TX] = prot[SOCKMAP_BASE]; 188 + prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg; 189 + prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage; 190 + } 191 + 192 + static void update_sk_prot(struct sock *sk, struct smap_psock *psock) 193 + { 194 + int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4; 195 + int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE; 196 + 197 + sk->sk_prot = &bpf_tcp_prots[family][conf]; 198 + } 199 + 168 200 static int bpf_tcp_init(struct sock *sk) 169 201 { 170 202 struct smap_psock *psock; ··· 219 181 psock->save_close = sk->sk_prot->close; 220 182 psock->sk_proto = sk->sk_prot; 221 183 222 - if (psock->bpf_tx_msg) { 223 - tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; 224 - tcp_bpf_proto.sendpage = bpf_tcp_sendpage; 225 - tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; 226 - tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; 184 + /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */ 185 + if (sk->sk_family == AF_INET6 && 186 + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { 187 + spin_lock_bh(&tcpv6_prot_lock); 188 + if (likely(sk->sk_prot != saved_tcpv6_prot)) { 189 + build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot); 190 + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); 191 + } 192 + spin_unlock_bh(&tcpv6_prot_lock); 227 193 } 228 - 229 - sk->sk_prot = &tcp_bpf_proto; 194 + update_sk_prot(sk, psock); 230 195 rcu_read_unlock(); 231 196 return 0; 232 197 } ··· 260 219 rcu_read_unlock(); 261 220 } 262 221 222 + static struct htab_elem *lookup_elem_raw(struct hlist_head *head, 223 + u32 hash, void *key, u32 key_size) 224 + { 225 + struct htab_elem *l; 226 + 227 + hlist_for_each_entry_rcu(l, head, hash_node) { 228 + if (l->hash == hash && !memcmp(&l->key, key, key_size)) 229 + return l; 230 + } 231 + 232 + return NULL; 233 + } 234 + 235 + static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 236 + { 237 + return &htab->buckets[hash & (htab->n_buckets - 1)]; 238 + } 239 + 240 + static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 241 + { 242 + return &__select_bucket(htab, hash)->head; 243 + } 244 + 263 245 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 264 246 { 265 247 atomic_dec(&htab->count); 266 248 kfree_rcu(l, rcu); 267 249 } 268 250 251 + static struct smap_psock_map_entry *psock_map_pop(struct sock *sk, 252 + struct smap_psock *psock) 253 + { 254 + struct smap_psock_map_entry *e; 255 + 256 + spin_lock_bh(&psock->maps_lock); 257 + e = list_first_entry_or_null(&psock->maps, 258 + struct smap_psock_map_entry, 259 + list); 260 + if (e) 261 + list_del(&e->list); 262 + spin_unlock_bh(&psock->maps_lock); 263 + return e; 264 + } 265 + 269 266 static void bpf_tcp_close(struct sock *sk, long timeout) 270 267 { 271 268 void (*close_fun)(struct sock *sk, long timeout); 272 - struct smap_psock_map_entry *e, *tmp; 269 + struct smap_psock_map_entry *e; 273 270 struct sk_msg_buff *md, *mtmp; 274 271 struct smap_psock *psock; 275 272 struct sock *osk; ··· 326 247 */ 327 248 close_fun = psock->save_close; 328 249 329 - write_lock_bh(&sk->sk_callback_lock); 330 250 if (psock->cork) { 331 251 free_start_sg(psock->sock, psock->cork); 332 252 kfree(psock->cork); ··· 338 260 kfree(md); 339 261 } 340 262 341 - list_for_each_entry_safe(e, tmp, &psock->maps, list) { 263 + e = psock_map_pop(sk, psock); 264 + while (e) { 342 265 if (e->entry) { 343 266 osk = cmpxchg(e->entry, sk, NULL); 344 267 if (osk == sk) { 345 - list_del(&e->list); 346 268 smap_release_sock(psock, sk); 347 269 } 348 270 } else { 349 - hlist_del_rcu(&e->hash_link->hash_node); 350 - smap_release_sock(psock, e->hash_link->sk); 351 - free_htab_elem(e->htab, e->hash_link); 271 + struct htab_elem *link = rcu_dereference(e->hash_link); 272 + struct bpf_htab *htab = rcu_dereference(e->htab); 273 + struct hlist_head *head; 274 + struct htab_elem *l; 275 + struct bucket *b; 276 + 277 + b = __select_bucket(htab, link->hash); 278 + head = &b->head; 279 + raw_spin_lock_bh(&b->lock); 280 + l = lookup_elem_raw(head, 281 + link->hash, link->key, 282 + htab->map.key_size); 283 + /* If another thread deleted this object skip deletion. 284 + * The refcnt on psock may or may not be zero. 285 + */ 286 + if (l) { 287 + hlist_del_rcu(&link->hash_node); 288 + smap_release_sock(psock, link->sk); 289 + free_htab_elem(htab, link); 290 + } 291 + raw_spin_unlock_bh(&b->lock); 352 292 } 293 + e = psock_map_pop(sk, psock); 353 294 } 354 - write_unlock_bh(&sk->sk_callback_lock); 355 295 rcu_read_unlock(); 356 296 close_fun(sk, timeout); 357 297 } ··· 1207 1111 1208 1112 static int bpf_tcp_ulp_register(void) 1209 1113 { 1210 - tcp_bpf_proto = tcp_prot; 1211 - tcp_bpf_proto.close = bpf_tcp_close; 1114 + build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot); 1212 1115 /* Once BPF TX ULP is registered it is never unregistered. It 1213 1116 * will be in the ULP list for the lifetime of the system. Doing 1214 1117 * duplicate registers is not a problem. ··· 1452 1357 { 1453 1358 if (refcount_dec_and_test(&psock->refcnt)) { 1454 1359 tcp_cleanup_ulp(sock); 1360 + write_lock_bh(&sock->sk_callback_lock); 1455 1361 smap_stop_sock(psock, sock); 1362 + write_unlock_bh(&sock->sk_callback_lock); 1456 1363 clear_bit(SMAP_TX_RUNNING, &psock->state); 1457 1364 rcu_assign_sk_user_data(sock, NULL); 1458 1365 call_rcu_sched(&psock->rcu, smap_destroy_psock); ··· 1605 1508 INIT_LIST_HEAD(&psock->maps); 1606 1509 INIT_LIST_HEAD(&psock->ingress); 1607 1510 refcount_set(&psock->refcnt, 1); 1511 + spin_lock_init(&psock->maps_lock); 1608 1512 1609 1513 rcu_assign_sk_user_data(sock, psock); 1610 1514 sock_hold(sock); ··· 1662 1564 return ERR_PTR(err); 1663 1565 } 1664 1566 1665 - static void smap_list_remove(struct smap_psock *psock, 1666 - struct sock **entry, 1667 - struct htab_elem *hash_link) 1567 + static void smap_list_map_remove(struct smap_psock *psock, 1568 + struct sock **entry) 1668 1569 { 1669 1570 struct smap_psock_map_entry *e, *tmp; 1670 1571 1572 + spin_lock_bh(&psock->maps_lock); 1671 1573 list_for_each_entry_safe(e, tmp, &psock->maps, list) { 1672 - if (e->entry == entry || e->hash_link == hash_link) { 1574 + if (e->entry == entry) 1673 1575 list_del(&e->list); 1674 - break; 1675 - } 1676 1576 } 1577 + spin_unlock_bh(&psock->maps_lock); 1578 + } 1579 + 1580 + static void smap_list_hash_remove(struct smap_psock *psock, 1581 + struct htab_elem *hash_link) 1582 + { 1583 + struct smap_psock_map_entry *e, *tmp; 1584 + 1585 + spin_lock_bh(&psock->maps_lock); 1586 + list_for_each_entry_safe(e, tmp, &psock->maps, list) { 1587 + struct htab_elem *c = rcu_dereference(e->hash_link); 1588 + 1589 + if (c == hash_link) 1590 + list_del(&e->list); 1591 + } 1592 + spin_unlock_bh(&psock->maps_lock); 1677 1593 } 1678 1594 1679 1595 static void sock_map_free(struct bpf_map *map) ··· 1713 1601 if (!sock) 1714 1602 continue; 1715 1603 1716 - write_lock_bh(&sock->sk_callback_lock); 1717 1604 psock = smap_psock_sk(sock); 1718 1605 /* This check handles a racing sock event that can get the 1719 1606 * sk_callback_lock before this case but after xchg happens ··· 1720 1609 * to be null and queued for garbage collection. 1721 1610 */ 1722 1611 if (likely(psock)) { 1723 - smap_list_remove(psock, &stab->sock_map[i], NULL); 1612 + smap_list_map_remove(psock, &stab->sock_map[i]); 1724 1613 smap_release_sock(psock, sock); 1725 1614 } 1726 - write_unlock_bh(&sock->sk_callback_lock); 1727 1615 } 1728 1616 rcu_read_unlock(); 1729 1617 ··· 1771 1661 if (!sock) 1772 1662 return -EINVAL; 1773 1663 1774 - write_lock_bh(&sock->sk_callback_lock); 1775 1664 psock = smap_psock_sk(sock); 1776 1665 if (!psock) 1777 1666 goto out; 1778 1667 1779 1668 if (psock->bpf_parse) 1780 1669 smap_stop_sock(psock, sock); 1781 - smap_list_remove(psock, &stab->sock_map[k], NULL); 1670 + smap_list_map_remove(psock, &stab->sock_map[k]); 1782 1671 smap_release_sock(psock, sock); 1783 1672 out: 1784 - write_unlock_bh(&sock->sk_callback_lock); 1785 1673 return 0; 1786 1674 } 1787 1675 ··· 1860 1752 } 1861 1753 } 1862 1754 1863 - write_lock_bh(&sock->sk_callback_lock); 1864 1755 psock = smap_psock_sk(sock); 1865 1756 1866 1757 /* 2. Do not allow inheriting programs if psock exists and has ··· 1916 1809 if (err) 1917 1810 goto out_free; 1918 1811 smap_init_progs(psock, verdict, parse); 1812 + write_lock_bh(&sock->sk_callback_lock); 1919 1813 smap_start_sock(psock, sock); 1814 + write_unlock_bh(&sock->sk_callback_lock); 1920 1815 } 1921 1816 1922 1817 /* 4. Place psock in sockmap for use and stop any programs on ··· 1928 1819 */ 1929 1820 if (map_link) { 1930 1821 e->entry = map_link; 1822 + spin_lock_bh(&psock->maps_lock); 1931 1823 list_add_tail(&e->list, &psock->maps); 1824 + spin_unlock_bh(&psock->maps_lock); 1932 1825 } 1933 - write_unlock_bh(&sock->sk_callback_lock); 1934 1826 return err; 1935 1827 out_free: 1936 1828 smap_release_sock(psock, sock); ··· 1942 1832 } 1943 1833 if (tx_msg) 1944 1834 bpf_prog_put(tx_msg); 1945 - write_unlock_bh(&sock->sk_callback_lock); 1946 1835 kfree(e); 1947 1836 return err; 1948 1837 } ··· 1978 1869 if (osock) { 1979 1870 struct smap_psock *opsock = smap_psock_sk(osock); 1980 1871 1981 - write_lock_bh(&osock->sk_callback_lock); 1982 - smap_list_remove(opsock, &stab->sock_map[i], NULL); 1872 + smap_list_map_remove(opsock, &stab->sock_map[i]); 1983 1873 smap_release_sock(opsock, osock); 1984 - write_unlock_bh(&osock->sk_callback_lock); 1985 1874 } 1986 1875 out: 1987 1876 return err; ··· 2020 1913 bpf_prog_put(orig); 2021 1914 2022 1915 return 0; 1916 + } 1917 + 1918 + int sockmap_get_from_fd(const union bpf_attr *attr, int type, 1919 + struct bpf_prog *prog) 1920 + { 1921 + int ufd = attr->target_fd; 1922 + struct bpf_map *map; 1923 + struct fd f; 1924 + int err; 1925 + 1926 + f = fdget(ufd); 1927 + map = __bpf_map_get(f); 1928 + if (IS_ERR(map)) 1929 + return PTR_ERR(map); 1930 + 1931 + err = sock_map_prog(map, prog, attr->attach_type); 1932 + fdput(f); 1933 + return err; 2023 1934 } 2024 1935 2025 1936 static void *sock_map_lookup(struct bpf_map *map, void *key) ··· 2168 2043 return ERR_PTR(err); 2169 2044 } 2170 2045 2171 - static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 2046 + static void __bpf_htab_free(struct rcu_head *rcu) 2172 2047 { 2173 - return &htab->buckets[hash & (htab->n_buckets - 1)]; 2174 - } 2048 + struct bpf_htab *htab; 2175 2049 2176 - static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 2177 - { 2178 - return &__select_bucket(htab, hash)->head; 2050 + htab = container_of(rcu, struct bpf_htab, rcu); 2051 + bpf_map_area_free(htab->buckets); 2052 + kfree(htab); 2179 2053 } 2180 2054 2181 2055 static void sock_hash_free(struct bpf_map *map) ··· 2193 2069 */ 2194 2070 rcu_read_lock(); 2195 2071 for (i = 0; i < htab->n_buckets; i++) { 2196 - struct hlist_head *head = select_bucket(htab, i); 2072 + struct bucket *b = __select_bucket(htab, i); 2073 + struct hlist_head *head; 2197 2074 struct hlist_node *n; 2198 2075 struct htab_elem *l; 2199 2076 2077 + raw_spin_lock_bh(&b->lock); 2078 + head = &b->head; 2200 2079 hlist_for_each_entry_safe(l, n, head, hash_node) { 2201 2080 struct sock *sock = l->sk; 2202 2081 struct smap_psock *psock; 2203 2082 2204 2083 hlist_del_rcu(&l->hash_node); 2205 - write_lock_bh(&sock->sk_callback_lock); 2206 2084 psock = smap_psock_sk(sock); 2207 2085 /* This check handles a racing sock event that can get 2208 2086 * the sk_callback_lock before this case but after xchg ··· 2212 2086 * (psock) to be null and queued for garbage collection. 2213 2087 */ 2214 2088 if (likely(psock)) { 2215 - smap_list_remove(psock, NULL, l); 2089 + smap_list_hash_remove(psock, l); 2216 2090 smap_release_sock(psock, sock); 2217 2091 } 2218 - write_unlock_bh(&sock->sk_callback_lock); 2219 - kfree(l); 2092 + free_htab_elem(htab, l); 2220 2093 } 2094 + raw_spin_unlock_bh(&b->lock); 2221 2095 } 2222 2096 rcu_read_unlock(); 2223 - bpf_map_area_free(htab->buckets); 2224 - kfree(htab); 2097 + call_rcu(&htab->rcu, __bpf_htab_free); 2225 2098 } 2226 2099 2227 2100 static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, ··· 2245 2120 l_new->sk = sk; 2246 2121 l_new->hash = hash; 2247 2122 return l_new; 2248 - } 2249 - 2250 - static struct htab_elem *lookup_elem_raw(struct hlist_head *head, 2251 - u32 hash, void *key, u32 key_size) 2252 - { 2253 - struct htab_elem *l; 2254 - 2255 - hlist_for_each_entry_rcu(l, head, hash_node) { 2256 - if (l->hash == hash && !memcmp(&l->key, key, key_size)) 2257 - return l; 2258 - } 2259 - 2260 - return NULL; 2261 2123 } 2262 2124 2263 2125 static inline u32 htab_map_hash(const void *key, u32 key_len) ··· 2366 2254 goto bucket_err; 2367 2255 } 2368 2256 2369 - e->hash_link = l_new; 2370 - e->htab = container_of(map, struct bpf_htab, map); 2257 + rcu_assign_pointer(e->hash_link, l_new); 2258 + rcu_assign_pointer(e->htab, 2259 + container_of(map, struct bpf_htab, map)); 2260 + spin_lock_bh(&psock->maps_lock); 2371 2261 list_add_tail(&e->list, &psock->maps); 2262 + spin_unlock_bh(&psock->maps_lock); 2372 2263 2373 2264 /* add new element to the head of the list, so that 2374 2265 * concurrent search will find it before old elem ··· 2381 2266 psock = smap_psock_sk(l_old->sk); 2382 2267 2383 2268 hlist_del_rcu(&l_old->hash_node); 2384 - smap_list_remove(psock, NULL, l_old); 2269 + smap_list_hash_remove(psock, l_old); 2385 2270 smap_release_sock(psock, l_old->sk); 2386 2271 free_htab_elem(htab, l_old); 2387 2272 } ··· 2441 2326 struct smap_psock *psock; 2442 2327 2443 2328 hlist_del_rcu(&l->hash_node); 2444 - write_lock_bh(&sock->sk_callback_lock); 2445 2329 psock = smap_psock_sk(sock); 2446 2330 /* This check handles a racing sock event that can get the 2447 2331 * sk_callback_lock before this case but after xchg happens ··· 2448 2334 * to be null and queued for garbage collection. 2449 2335 */ 2450 2336 if (likely(psock)) { 2451 - smap_list_remove(psock, NULL, l); 2337 + smap_list_hash_remove(psock, l); 2452 2338 smap_release_sock(psock, sock); 2453 2339 } 2454 - write_unlock_bh(&sock->sk_callback_lock); 2455 2340 free_htab_elem(htab, l); 2456 2341 ret = 0; 2457 2342 } ··· 2496 2383 .map_get_next_key = sock_hash_get_next_key, 2497 2384 .map_update_elem = sock_hash_update_elem, 2498 2385 .map_delete_elem = sock_hash_delete_elem, 2386 + .map_release_uref = sock_map_release, 2499 2387 }; 2500 2388 2501 2389 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
+21 -78
kernel/bpf/syscall.c
··· 1483 1483 return err; 1484 1484 } 1485 1485 1486 - #ifdef CONFIG_CGROUP_BPF 1487 - 1488 1486 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 1489 1487 enum bpf_attach_type attach_type) 1490 1488 { ··· 1497 1499 1498 1500 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1499 1501 1500 - static int sockmap_get_from_fd(const union bpf_attr *attr, 1501 - int type, bool attach) 1502 - { 1503 - struct bpf_prog *prog = NULL; 1504 - int ufd = attr->target_fd; 1505 - struct bpf_map *map; 1506 - struct fd f; 1507 - int err; 1508 - 1509 - f = fdget(ufd); 1510 - map = __bpf_map_get(f); 1511 - if (IS_ERR(map)) 1512 - return PTR_ERR(map); 1513 - 1514 - if (attach) { 1515 - prog = bpf_prog_get_type(attr->attach_bpf_fd, type); 1516 - if (IS_ERR(prog)) { 1517 - fdput(f); 1518 - return PTR_ERR(prog); 1519 - } 1520 - } 1521 - 1522 - err = sock_map_prog(map, prog, attr->attach_type); 1523 - if (err) { 1524 - fdput(f); 1525 - if (prog) 1526 - bpf_prog_put(prog); 1527 - return err; 1528 - } 1529 - 1530 - fdput(f); 1531 - return 0; 1532 - } 1533 - 1534 1502 #define BPF_F_ATTACH_MASK \ 1535 1503 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1536 1504 ··· 1504 1540 { 1505 1541 enum bpf_prog_type ptype; 1506 1542 struct bpf_prog *prog; 1507 - struct cgroup *cgrp; 1508 1543 int ret; 1509 1544 1510 1545 if (!capable(CAP_NET_ADMIN)) ··· 1540 1577 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1541 1578 break; 1542 1579 case BPF_SK_MSG_VERDICT: 1543 - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true); 1580 + ptype = BPF_PROG_TYPE_SK_MSG; 1581 + break; 1544 1582 case BPF_SK_SKB_STREAM_PARSER: 1545 1583 case BPF_SK_SKB_STREAM_VERDICT: 1546 - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true); 1584 + ptype = BPF_PROG_TYPE_SK_SKB; 1585 + break; 1547 1586 case BPF_LIRC_MODE2: 1548 - return lirc_prog_attach(attr); 1587 + ptype = BPF_PROG_TYPE_LIRC_MODE2; 1588 + break; 1549 1589 default: 1550 1590 return -EINVAL; 1551 1591 } ··· 1562 1596 return -EINVAL; 1563 1597 } 1564 1598 1565 - cgrp = cgroup_get_from_fd(attr->target_fd); 1566 - if (IS_ERR(cgrp)) { 1567 - bpf_prog_put(prog); 1568 - return PTR_ERR(cgrp); 1599 + switch (ptype) { 1600 + case BPF_PROG_TYPE_SK_SKB: 1601 + case BPF_PROG_TYPE_SK_MSG: 1602 + ret = sockmap_get_from_fd(attr, ptype, prog); 1603 + break; 1604 + case BPF_PROG_TYPE_LIRC_MODE2: 1605 + ret = lirc_prog_attach(attr, prog); 1606 + break; 1607 + default: 1608 + ret = cgroup_bpf_prog_attach(attr, ptype, prog); 1569 1609 } 1570 1610 1571 - ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1572 - attr->attach_flags); 1573 1611 if (ret) 1574 1612 bpf_prog_put(prog); 1575 - cgroup_put(cgrp); 1576 - 1577 1613 return ret; 1578 1614 } 1579 1615 ··· 1584 1616 static int bpf_prog_detach(const union bpf_attr *attr) 1585 1617 { 1586 1618 enum bpf_prog_type ptype; 1587 - struct bpf_prog *prog; 1588 - struct cgroup *cgrp; 1589 - int ret; 1590 1619 1591 1620 if (!capable(CAP_NET_ADMIN)) 1592 1621 return -EPERM; ··· 1616 1651 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1617 1652 break; 1618 1653 case BPF_SK_MSG_VERDICT: 1619 - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false); 1654 + return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL); 1620 1655 case BPF_SK_SKB_STREAM_PARSER: 1621 1656 case BPF_SK_SKB_STREAM_VERDICT: 1622 - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false); 1657 + return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL); 1623 1658 case BPF_LIRC_MODE2: 1624 1659 return lirc_prog_detach(attr); 1625 1660 default: 1626 1661 return -EINVAL; 1627 1662 } 1628 1663 1629 - cgrp = cgroup_get_from_fd(attr->target_fd); 1630 - if (IS_ERR(cgrp)) 1631 - return PTR_ERR(cgrp); 1632 - 1633 - prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1634 - if (IS_ERR(prog)) 1635 - prog = NULL; 1636 - 1637 - ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1638 - if (prog) 1639 - bpf_prog_put(prog); 1640 - cgroup_put(cgrp); 1641 - return ret; 1664 + return cgroup_bpf_prog_detach(attr, ptype); 1642 1665 } 1643 1666 1644 1667 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt ··· 1634 1681 static int bpf_prog_query(const union bpf_attr *attr, 1635 1682 union bpf_attr __user *uattr) 1636 1683 { 1637 - struct cgroup *cgrp; 1638 - int ret; 1639 - 1640 1684 if (!capable(CAP_NET_ADMIN)) 1641 1685 return -EPERM; 1642 1686 if (CHECK_ATTR(BPF_PROG_QUERY)) ··· 1661 1711 default: 1662 1712 return -EINVAL; 1663 1713 } 1664 - cgrp = cgroup_get_from_fd(attr->query.target_fd); 1665 - if (IS_ERR(cgrp)) 1666 - return PTR_ERR(cgrp); 1667 - ret = cgroup_bpf_query(cgrp, attr, uattr); 1668 - cgroup_put(cgrp); 1669 - return ret; 1714 + 1715 + return cgroup_bpf_prog_query(attr, uattr); 1670 1716 } 1671 - #endif /* CONFIG_CGROUP_BPF */ 1672 1717 1673 1718 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1674 1719 ··· 2310 2365 case BPF_OBJ_GET: 2311 2366 err = bpf_obj_get(&attr); 2312 2367 break; 2313 - #ifdef CONFIG_CGROUP_BPF 2314 2368 case BPF_PROG_ATTACH: 2315 2369 err = bpf_prog_attach(&attr); 2316 2370 break; ··· 2319 2375 case BPF_PROG_QUERY: 2320 2376 err = bpf_prog_query(&attr, uattr); 2321 2377 break; 2322 - #endif 2323 2378 case BPF_PROG_TEST_RUN: 2324 2379 err = bpf_prog_test_run(&attr, uattr); 2325 2380 break;
+20
lib/test_bpf.c
··· 5282 5282 { /* Mainly checking JIT here. */ 5283 5283 "BPF_MAXINSNS: Ctx heavy transformations", 5284 5284 { }, 5285 + #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5286 + CLASSIC | FLAG_EXPECTED_FAIL, 5287 + #else 5285 5288 CLASSIC, 5289 + #endif 5286 5290 { }, 5287 5291 { 5288 5292 { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, 5289 5293 { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } 5290 5294 }, 5291 5295 .fill_helper = bpf_fill_maxinsns6, 5296 + .expected_errcode = -ENOTSUPP, 5292 5297 }, 5293 5298 { /* Mainly checking JIT here. */ 5294 5299 "BPF_MAXINSNS: Call heavy transformations", 5295 5300 { }, 5301 + #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5302 + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, 5303 + #else 5296 5304 CLASSIC | FLAG_NO_DATA, 5305 + #endif 5297 5306 { }, 5298 5307 { { 1, 0 }, { 10, 0 } }, 5299 5308 .fill_helper = bpf_fill_maxinsns7, 5309 + .expected_errcode = -ENOTSUPP, 5300 5310 }, 5301 5311 { /* Mainly checking JIT here. */ 5302 5312 "BPF_MAXINSNS: Jump heavy test", ··· 5357 5347 { 5358 5348 "BPF_MAXINSNS: exec all MSH", 5359 5349 { }, 5350 + #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5351 + CLASSIC | FLAG_EXPECTED_FAIL, 5352 + #else 5360 5353 CLASSIC, 5354 + #endif 5361 5355 { 0xfa, 0xfb, 0xfc, 0xfd, }, 5362 5356 { { 4, 0xababab83 } }, 5363 5357 .fill_helper = bpf_fill_maxinsns13, 5358 + .expected_errcode = -ENOTSUPP, 5364 5359 }, 5365 5360 { 5366 5361 "BPF_MAXINSNS: ld_abs+get_processor_id", 5367 5362 { }, 5363 + #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5364 + CLASSIC | FLAG_EXPECTED_FAIL, 5365 + #else 5368 5366 CLASSIC, 5367 + #endif 5369 5368 { }, 5370 5369 { { 1, 0xbee } }, 5371 5370 .fill_helper = bpf_fill_ld_abs_get_processor_id, 5371 + .expected_errcode = -ENOTSUPP, 5372 5372 }, 5373 5373 /* 5374 5374 * LD_IND / LD_ABS on fragmented SKBs
+1 -1
net/8021q/vlan.c
··· 693 693 out_unlock: 694 694 rcu_read_unlock(); 695 695 out: 696 - NAPI_GRO_CB(skb)->flush |= flush; 696 + skb_gro_flush_final(skb, pp, flush); 697 697 698 698 return pp; 699 699 }
-4
net/Makefile
··· 20 20 obj-$(CONFIG_XFRM) += xfrm/ 21 21 obj-$(CONFIG_UNIX) += unix/ 22 22 obj-$(CONFIG_NET) += ipv6/ 23 - ifneq ($(CC_CAN_LINK),y) 24 - $(warning CC cannot link executables. Skipping bpfilter.) 25 - else 26 23 obj-$(CONFIG_BPFILTER) += bpfilter/ 27 - endif 28 24 obj-$(CONFIG_PACKET) += packet/ 29 25 obj-$(CONFIG_NET_KEY) += key/ 30 26 obj-$(CONFIG_BRIDGE) += bridge/
+1 -1
net/bpfilter/Kconfig
··· 1 1 menuconfig BPFILTER 2 2 bool "BPF based packet filtering framework (BPFILTER)" 3 - default n 4 3 depends on NET && BPF && INET 5 4 help 6 5 This builds experimental bpfilter framework that is aiming to ··· 8 9 if BPFILTER 9 10 config BPFILTER_UMH 10 11 tristate "bpfilter kernel module with user mode helper" 12 + depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC)) 11 13 default m 12 14 help 13 15 This builds bpfilter kernel module with embedded user mode helper
+2 -15
net/bpfilter/Makefile
··· 15 15 HOSTLDFLAGS += -static 16 16 endif 17 17 18 - # a bit of elf magic to convert bpfilter_umh binary into a binary blob 19 - # inside bpfilter_umh.o elf file referenced by 20 - # _binary_net_bpfilter_bpfilter_umh_start symbol 21 - # which bpfilter_kern.c passes further into umh blob loader at run-time 22 - quiet_cmd_copy_umh = GEN $@ 23 - cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ 24 - $(OBJCOPY) -I binary \ 25 - `LC_ALL=C $(OBJDUMP) -f net/bpfilter/bpfilter_umh \ 26 - |awk -F' |,' '/file format/{print "-O",$$NF} \ 27 - /^architecture:/{print "-B",$$2}'` \ 28 - --rename-section .data=.init.rodata $< $@ 29 - 30 - $(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh 31 - $(call cmd,copy_umh) 18 + $(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh 32 19 33 20 obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o 34 - bpfilter-objs += bpfilter_kern.o bpfilter_umh.o 21 + bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
+5 -6
net/bpfilter/bpfilter_kern.c
··· 10 10 #include <linux/file.h> 11 11 #include "msgfmt.h" 12 12 13 - #define UMH_start _binary_net_bpfilter_bpfilter_umh_start 14 - #define UMH_end _binary_net_bpfilter_bpfilter_umh_end 15 - 16 - extern char UMH_start; 17 - extern char UMH_end; 13 + extern char bpfilter_umh_start; 14 + extern char bpfilter_umh_end; 18 15 19 16 static struct umh_info info; 20 17 /* since ip_getsockopt() can run in parallel, serialize access to umh */ ··· 90 93 int err; 91 94 92 95 /* fork usermode process */ 93 - err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); 96 + err = fork_usermode_blob(&bpfilter_umh_start, 97 + &bpfilter_umh_end - &bpfilter_umh_start, 98 + &info); 94 99 if (err) 95 100 return err; 96 101 pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
+7
net/bpfilter/bpfilter_umh_blob.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + .section .init.rodata, "a" 3 + .global bpfilter_umh_start 4 + bpfilter_umh_start: 5 + .incbin "net/bpfilter/bpfilter_umh" 6 + .global bpfilter_umh_end 7 + bpfilter_umh_end:
+2 -9
net/core/dev_ioctl.c
··· 285 285 if (ifr->ifr_qlen < 0) 286 286 return -EINVAL; 287 287 if (dev->tx_queue_len ^ ifr->ifr_qlen) { 288 - unsigned int orig_len = dev->tx_queue_len; 289 - 290 - dev->tx_queue_len = ifr->ifr_qlen; 291 - err = call_netdevice_notifiers( 292 - NETDEV_CHANGE_TX_QUEUE_LEN, dev); 293 - err = notifier_to_errno(err); 294 - if (err) { 295 - dev->tx_queue_len = orig_len; 288 + err = dev_change_tx_queue_len(dev, ifr->ifr_qlen); 289 + if (err) 296 290 return err; 297 - } 298 291 } 299 292 return 0; 300 293
+79 -1
net/core/fib_rules.c
··· 416 416 if (rule->mark && r->mark != rule->mark) 417 417 continue; 418 418 419 + if (rule->suppress_ifgroup != -1 && 420 + r->suppress_ifgroup != rule->suppress_ifgroup) 421 + continue; 422 + 423 + if (rule->suppress_prefixlen != -1 && 424 + r->suppress_prefixlen != rule->suppress_prefixlen) 425 + continue; 426 + 419 427 if (rule->mark_mask && r->mark_mask != rule->mark_mask) 420 428 continue; 421 429 ··· 442 434 continue; 443 435 444 436 if (rule->ip_proto && r->ip_proto != rule->ip_proto) 437 + continue; 438 + 439 + if (rule->proto && r->proto != rule->proto) 445 440 continue; 446 441 447 442 if (fib_rule_port_range_set(&rule->sport_range) && ··· 656 645 return err; 657 646 } 658 647 648 + static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, 649 + struct nlattr **tb, struct fib_rule *rule) 650 + { 651 + struct fib_rule *r; 652 + 653 + list_for_each_entry(r, &ops->rules_list, list) { 654 + if (r->action != rule->action) 655 + continue; 656 + 657 + if (r->table != rule->table) 658 + continue; 659 + 660 + if (r->pref != rule->pref) 661 + continue; 662 + 663 + if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) 664 + continue; 665 + 666 + if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) 667 + continue; 668 + 669 + if (r->mark != rule->mark) 670 + continue; 671 + 672 + if (r->suppress_ifgroup != rule->suppress_ifgroup) 673 + continue; 674 + 675 + if (r->suppress_prefixlen != rule->suppress_prefixlen) 676 + continue; 677 + 678 + if (r->mark_mask != rule->mark_mask) 679 + continue; 680 + 681 + if (r->tun_id != rule->tun_id) 682 + continue; 683 + 684 + if (r->fr_net != rule->fr_net) 685 + continue; 686 + 687 + if (r->l3mdev != rule->l3mdev) 688 + continue; 689 + 690 + if (!uid_eq(r->uid_range.start, rule->uid_range.start) || 691 + !uid_eq(r->uid_range.end, rule->uid_range.end)) 692 + continue; 693 + 694 + if (r->ip_proto != rule->ip_proto) 695 + continue; 696 + 697 + if (r->proto != rule->proto) 698 + continue; 699 + 700 + if (!fib_rule_port_range_compare(&r->sport_range, 701 + &rule->sport_range)) 702 + continue; 703 + 704 + if (!fib_rule_port_range_compare(&r->dport_range, 705 + &rule->dport_range)) 706 + continue; 707 + 708 + if (!ops->compare(r, frh, tb)) 709 + continue; 710 + return 1; 711 + } 712 + return 0; 713 + } 714 + 659 715 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, 660 716 struct netlink_ext_ack *extack) 661 717 { ··· 757 679 goto errout; 758 680 759 681 if ((nlh->nlmsg_flags & NLM_F_EXCL) && 760 - rule_find(ops, frh, tb, rule, user_priority)) { 682 + rule_exists(ops, frh, tb, rule)) { 761 683 err = -EEXIST; 762 684 goto errout_free; 763 685 }
+54 -32
net/core/filter.c
··· 4073 4073 memcpy(params->smac, dev->dev_addr, ETH_ALEN); 4074 4074 params->h_vlan_TCI = 0; 4075 4075 params->h_vlan_proto = 0; 4076 + params->ifindex = dev->ifindex; 4076 4077 4077 - return dev->ifindex; 4078 + return 0; 4078 4079 } 4079 4080 #endif 4080 4081 ··· 4099 4098 /* verify forwarding is enabled on this interface */ 4100 4099 in_dev = __in_dev_get_rcu(dev); 4101 4100 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) 4102 - return 0; 4101 + return BPF_FIB_LKUP_RET_FWD_DISABLED; 4103 4102 4104 4103 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4105 4104 fl4.flowi4_iif = 1; ··· 4124 4123 4125 4124 tb = fib_get_table(net, tbid); 4126 4125 if (unlikely(!tb)) 4127 - return 0; 4126 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4128 4127 4129 4128 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); 4130 4129 } else { ··· 4136 4135 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); 4137 4136 } 4138 4137 4139 - if (err || res.type != RTN_UNICAST) 4140 - return 0; 4138 + if (err) { 4139 + /* map fib lookup errors to RTN_ type */ 4140 + if (err == -EINVAL) 4141 + return BPF_FIB_LKUP_RET_BLACKHOLE; 4142 + if (err == -EHOSTUNREACH) 4143 + return BPF_FIB_LKUP_RET_UNREACHABLE; 4144 + if (err == -EACCES) 4145 + return BPF_FIB_LKUP_RET_PROHIBIT; 4146 + 4147 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4148 + } 4149 + 4150 + if (res.type != RTN_UNICAST) 4151 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4141 4152 4142 4153 if (res.fi->fib_nhs > 1) 4143 4154 fib_select_path(net, &res, &fl4, NULL); ··· 4157 4144 if (check_mtu) { 4158 4145 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); 4159 4146 if (params->tot_len > mtu) 4160 - return 0; 4147 + return BPF_FIB_LKUP_RET_FRAG_NEEDED; 4161 4148 } 4162 4149 4163 4150 nh = &res.fi->fib_nh[res.nh_sel]; 4164 4151 4165 4152 /* do not handle lwt encaps right now */ 4166 4153 if (nh->nh_lwtstate) 4167 - return 0; 4154 + return BPF_FIB_LKUP_RET_UNSUPP_LWT; 4168 4155 4169 4156 dev = nh->nh_dev; 4170 - if (unlikely(!dev)) 4171 - return 0; 4172 - 4173 4157 if (nh->nh_gw) 4174 4158 params->ipv4_dst = nh->nh_gw; 4175 4159 ··· 4176 4166 * rcu_read_lock_bh is not needed here 4177 4167 */ 4178 4168 neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); 4179 - if (neigh) 4180 - return bpf_fib_set_fwd_params(params, neigh, dev); 4169 + if (!neigh) 4170 + return BPF_FIB_LKUP_RET_NO_NEIGH; 4181 4171 4182 - return 0; 4172 + return bpf_fib_set_fwd_params(params, neigh, dev); 4183 4173 } 4184 4174 #endif 4185 4175 ··· 4200 4190 4201 4191 /* link local addresses are never forwarded */ 4202 4192 if (rt6_need_strict(dst) || rt6_need_strict(src)) 4203 - return 0; 4193 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4204 4194 4205 4195 dev = dev_get_by_index_rcu(net, params->ifindex); 4206 4196 if (unlikely(!dev)) ··· 4208 4198 4209 4199 idev = __in6_dev_get_safely(dev); 4210 4200 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) 4211 - return 0; 4201 + return BPF_FIB_LKUP_RET_FWD_DISABLED; 4212 4202 4213 4203 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4214 4204 fl6.flowi6_iif = 1; ··· 4235 4225 4236 4226 tb = ipv6_stub->fib6_get_table(net, tbid); 4237 4227 if (unlikely(!tb)) 4238 - return 0; 4228 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4239 4229 4240 4230 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); 4241 4231 } else { ··· 4248 4238 } 4249 4239 4250 4240 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) 4251 - return 0; 4241 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4252 4242 4253 - if (unlikely(f6i->fib6_flags & RTF_REJECT || 4254 - f6i->fib6_type != RTN_UNICAST)) 4255 - return 0; 4243 + if (unlikely(f6i->fib6_flags & RTF_REJECT)) { 4244 + switch (f6i->fib6_type) { 4245 + case RTN_BLACKHOLE: 4246 + return BPF_FIB_LKUP_RET_BLACKHOLE; 4247 + case RTN_UNREACHABLE: 4248 + return BPF_FIB_LKUP_RET_UNREACHABLE; 4249 + case RTN_PROHIBIT: 4250 + return BPF_FIB_LKUP_RET_PROHIBIT; 4251 + default: 4252 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4253 + } 4254 + } 4255 + 4256 + if (f6i->fib6_type != RTN_UNICAST) 4257 + return BPF_FIB_LKUP_RET_NOT_FWDED; 4256 4258 4257 4259 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) 4258 4260 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, ··· 4274 4252 if (check_mtu) { 4275 4253 mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); 4276 4254 if (params->tot_len > mtu) 4277 - return 0; 4255 + return BPF_FIB_LKUP_RET_FRAG_NEEDED; 4278 4256 } 4279 4257 4280 4258 if (f6i->fib6_nh.nh_lwtstate) 4281 - return 0; 4259 + return BPF_FIB_LKUP_RET_UNSUPP_LWT; 4282 4260 4283 4261 if (f6i->fib6_flags & RTF_GATEWAY) 4284 4262 *dst = f6i->fib6_nh.nh_gw; ··· 4292 4270 */ 4293 4271 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, 4294 4272 ndisc_hashfn, dst, dev); 4295 - if (neigh) 4296 - return bpf_fib_set_fwd_params(params, neigh, dev); 4273 + if (!neigh) 4274 + return BPF_FIB_LKUP_RET_NO_NEIGH; 4297 4275 4298 - return 0; 4276 + return bpf_fib_set_fwd_params(params, neigh, dev); 4299 4277 } 4300 4278 #endif 4301 4279 ··· 4337 4315 struct bpf_fib_lookup *, params, int, plen, u32, flags) 4338 4316 { 4339 4317 struct net *net = dev_net(skb->dev); 4340 - int index = -EAFNOSUPPORT; 4318 + int rc = -EAFNOSUPPORT; 4341 4319 4342 4320 if (plen < sizeof(*params)) 4343 4321 return -EINVAL; ··· 4348 4326 switch (params->family) { 4349 4327 #if IS_ENABLED(CONFIG_INET) 4350 4328 case AF_INET: 4351 - index = bpf_ipv4_fib_lookup(net, params, flags, false); 4329 + rc = bpf_ipv4_fib_lookup(net, params, flags, false); 4352 4330 break; 4353 4331 #endif 4354 4332 #if IS_ENABLED(CONFIG_IPV6) 4355 4333 case AF_INET6: 4356 - index = bpf_ipv6_fib_lookup(net, params, flags, false); 4334 + rc = bpf_ipv6_fib_lookup(net, params, flags, false); 4357 4335 break; 4358 4336 #endif 4359 4337 } 4360 4338 4361 - if (index > 0) { 4339 + if (!rc) { 4362 4340 struct net_device *dev; 4363 4341 4364 - dev = dev_get_by_index_rcu(net, index); 4342 + dev = dev_get_by_index_rcu(net, params->ifindex); 4365 4343 if (!is_skb_forwardable(dev, skb)) 4366 - index = 0; 4344 + rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; 4367 4345 } 4368 4346 4369 - return index; 4347 + return rc; 4370 4348 } 4371 4349 4372 4350 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
+1 -2
net/core/skbuff.c
··· 5276 5276 if (npages >= 1 << order) { 5277 5277 page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | 5278 5278 __GFP_COMP | 5279 - __GFP_NOWARN | 5280 - __GFP_NORETRY, 5279 + __GFP_NOWARN, 5281 5280 order); 5282 5281 if (page) 5283 5282 goto fill_page;
+5 -2
net/core/sock.c
··· 3243 3243 3244 3244 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, 3245 3245 rsk_prot->obj_size, 0, 3246 - prot->slab_flags, NULL); 3246 + SLAB_ACCOUNT | prot->slab_flags, 3247 + NULL); 3247 3248 3248 3249 if (!rsk_prot->slab) { 3249 3250 pr_crit("%s: Can't create request sock SLAB cache!\n", ··· 3259 3258 if (alloc_slab) { 3260 3259 prot->slab = kmem_cache_create_usercopy(prot->name, 3261 3260 prot->obj_size, 0, 3262 - SLAB_HWCACHE_ALIGN | prot->slab_flags, 3261 + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | 3262 + prot->slab_flags, 3263 3263 prot->useroffset, prot->usersize, 3264 3264 NULL); 3265 3265 ··· 3283 3281 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 3284 3282 prot->twsk_prot->twsk_obj_size, 3285 3283 0, 3284 + SLAB_ACCOUNT | 3286 3285 prot->slab_flags, 3287 3286 NULL); 3288 3287 if (prot->twsk_prot->twsk_slab == NULL)
+1 -3
net/ipv4/fou.c
··· 448 448 out_unlock: 449 449 rcu_read_unlock(); 450 450 out: 451 - NAPI_GRO_CB(skb)->flush |= flush; 452 - skb_gro_remcsum_cleanup(skb, &grc); 453 - skb->remcsum_offload = 0; 451 + skb_gro_flush_final_remcsum(skb, pp, flush, &grc); 454 452 455 453 return pp; 456 454 }
+1 -1
net/ipv4/gre_offload.c
··· 223 223 out_unlock: 224 224 rcu_read_unlock(); 225 225 out: 226 - NAPI_GRO_CB(skb)->flush |= flush; 226 + skb_gro_flush_final(skb, pp, flush); 227 227 228 228 return pp; 229 229 }
+13 -5
net/ipv4/sysctl_net_ipv4.c
··· 265 265 ipv4.sysctl_tcp_fastopen); 266 266 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 267 267 struct tcp_fastopen_context *ctxt; 268 - int ret; 269 268 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ 269 + __le32 key[4]; 270 + int ret, i; 270 271 271 272 tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); 272 273 if (!tbl.data) ··· 276 275 rcu_read_lock(); 277 276 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 278 277 if (ctxt) 279 - memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 278 + memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 280 279 else 281 - memset(user_key, 0, sizeof(user_key)); 280 + memset(key, 0, sizeof(key)); 282 281 rcu_read_unlock(); 282 + 283 + for (i = 0; i < ARRAY_SIZE(key); i++) 284 + user_key[i] = le32_to_cpu(key[i]); 283 285 284 286 snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", 285 287 user_key[0], user_key[1], user_key[2], user_key[3]); ··· 294 290 ret = -EINVAL; 295 291 goto bad_key; 296 292 } 297 - tcp_fastopen_reset_cipher(net, NULL, user_key, 293 + 294 + for (i = 0; i < ARRAY_SIZE(user_key); i++) 295 + key[i] = cpu_to_le32(user_key[i]); 296 + 297 + tcp_fastopen_reset_cipher(net, NULL, key, 298 298 TCP_FASTOPEN_KEY_LENGTH); 299 299 } 300 300 301 301 bad_key: 302 302 pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", 303 - user_key[0], user_key[1], user_key[2], user_key[3], 303 + user_key[0], user_key[1], user_key[2], user_key[3], 304 304 (char *)tbl.data, ret); 305 305 kfree(tbl.data); 306 306 return ret;
+11 -2
net/ipv4/tcp_input.c
··· 265 265 * it is probably a retransmit. 266 266 */ 267 267 if (tp->ecn_flags & TCP_ECN_SEEN) 268 - tcp_enter_quickack_mode(sk, 1); 268 + tcp_enter_quickack_mode(sk, 2); 269 269 break; 270 270 case INET_ECN_CE: 271 271 if (tcp_ca_needs_ecn(sk)) ··· 273 273 274 274 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { 275 275 /* Better not delay acks, sender can have a very low cwnd */ 276 - tcp_enter_quickack_mode(sk, 1); 276 + tcp_enter_quickack_mode(sk, 2); 277 277 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 278 278 } 279 279 tp->ecn_flags |= TCP_ECN_SEEN; ··· 3181 3181 3182 3182 if (tcp_is_reno(tp)) { 3183 3183 tcp_remove_reno_sacks(sk, pkts_acked); 3184 + 3185 + /* If any of the cumulatively ACKed segments was 3186 + * retransmitted, non-SACK case cannot confirm that 3187 + * progress was due to original transmission due to 3188 + * lack of TCPCB_SACKED_ACKED bits even if some of 3189 + * the packets may have been never retransmitted. 3190 + */ 3191 + if (flag & FLAG_RETRANS_DATA_ACKED) 3192 + flag &= ~FLAG_ORIG_SACK_ACKED; 3184 3193 } else { 3185 3194 int delta; 3186 3195
+1 -1
net/ipv4/udp_offload.c
··· 394 394 out_unlock: 395 395 rcu_read_unlock(); 396 396 out: 397 - NAPI_GRO_CB(skb)->flush |= flush; 397 + skb_gro_flush_final(skb, pp, flush); 398 398 return pp; 399 399 } 400 400 EXPORT_SYMBOL(udp_gro_receive);
+6 -3
net/ipv6/addrconf.c
··· 4528 4528 unsigned long expires, u32 flags) 4529 4529 { 4530 4530 struct fib6_info *f6i; 4531 + u32 prio; 4531 4532 4532 4533 f6i = addrconf_get_prefix_route(&ifp->addr, 4533 4534 ifp->prefix_len, ··· 4537 4536 if (!f6i) 4538 4537 return -ENOENT; 4539 4538 4540 - if (f6i->fib6_metric != ifp->rt_priority) { 4539 + prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF; 4540 + if (f6i->fib6_metric != prio) { 4541 + /* delete old one */ 4542 + ip6_del_rt(dev_net(ifp->idev->dev), f6i); 4543 + 4541 4544 /* add new one */ 4542 4545 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, 4543 4546 ifp->rt_priority, ifp->idev->dev, 4544 4547 expires, flags, GFP_KERNEL); 4545 - /* delete old one */ 4546 - ip6_del_rt(dev_net(ifp->idev->dev), f6i); 4547 4548 } else { 4548 4549 if (!expires) 4549 4550 fib6_clean_expires(f6i);
+3 -3
net/ipv6/netfilter/nf_conntrack_reasm.c
··· 107 107 if (hdr == NULL) 108 108 goto err_reg; 109 109 110 - net->nf_frag.sysctl.frags_hdr = hdr; 110 + net->nf_frag_frags_hdr = hdr; 111 111 return 0; 112 112 113 113 err_reg: ··· 121 121 { 122 122 struct ctl_table *table; 123 123 124 - table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; 125 - unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); 124 + table = net->nf_frag_frags_hdr->ctl_table_arg; 125 + unregister_net_sysctl_table(net->nf_frag_frags_hdr); 126 126 if (!net_eq(net, &init_net)) 127 127 kfree(table); 128 128 }
+1 -1
net/ipv6/seg6_hmac.c
··· 373 373 return -ENOMEM; 374 374 375 375 for_each_possible_cpu(cpu) { 376 - tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); 376 + tfm = crypto_alloc_shash(algo->name, 0, 0); 377 377 if (IS_ERR(tfm)) 378 378 return PTR_ERR(tfm); 379 379 p_tfm = per_cpu_ptr(algo->tfms, cpu);
+2
net/mac80211/tx.c
··· 4845 4845 skb_reset_network_header(skb); 4846 4846 skb_reset_mac_header(skb); 4847 4847 4848 + local_bh_disable(); 4848 4849 __ieee80211_subif_start_xmit(skb, skb->dev, flags); 4850 + local_bh_enable(); 4849 4851 4850 4852 return 0; 4851 4853 }
+47 -5
net/netfilter/nf_conncount.c
··· 47 47 struct hlist_node node; 48 48 struct nf_conntrack_tuple tuple; 49 49 struct nf_conntrack_zone zone; 50 + int cpu; 51 + u32 jiffies32; 50 52 }; 51 53 52 54 struct nf_conncount_rb { ··· 93 91 return false; 94 92 conn->tuple = *tuple; 95 93 conn->zone = *zone; 94 + conn->cpu = raw_smp_processor_id(); 95 + conn->jiffies32 = (u32)jiffies; 96 96 hlist_add_head(&conn->node, head); 97 97 return true; 98 98 } 99 99 EXPORT_SYMBOL_GPL(nf_conncount_add); 100 + 101 + static const struct nf_conntrack_tuple_hash * 102 + find_or_evict(struct net *net, struct nf_conncount_tuple *conn) 103 + { 104 + const struct nf_conntrack_tuple_hash *found; 105 + unsigned long a, b; 106 + int cpu = raw_smp_processor_id(); 107 + __s32 age; 108 + 109 + found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); 110 + if (found) 111 + return found; 112 + b = conn->jiffies32; 113 + a = (u32)jiffies; 114 + 115 + /* conn might have been added just before by another cpu and 116 + * might still be unconfirmed. In this case, nf_conntrack_find() 117 + * returns no result. Thus only evict if this cpu added the 118 + * stale entry or if the entry is older than two jiffies. 119 + */ 120 + age = a - b; 121 + if (conn->cpu == cpu || age >= 2) { 122 + hlist_del(&conn->node); 123 + kmem_cache_free(conncount_conn_cachep, conn); 124 + return ERR_PTR(-ENOENT); 125 + } 126 + 127 + return ERR_PTR(-EAGAIN); 128 + } 100 129 101 130 unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, 102 131 const struct nf_conntrack_tuple *tuple, ··· 136 103 { 137 104 const struct nf_conntrack_tuple_hash *found; 138 105 struct nf_conncount_tuple *conn; 139 - struct hlist_node *n; 140 106 struct nf_conn *found_ct; 107 + struct hlist_node *n; 141 108 unsigned int length = 0; 142 109 143 110 *addit = tuple ? true : false; 144 111 145 112 /* check the saved connections */ 146 113 hlist_for_each_entry_safe(conn, n, head, node) { 147 - found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); 148 - if (found == NULL) { 149 - hlist_del(&conn->node); 150 - kmem_cache_free(conncount_conn_cachep, conn); 114 + found = find_or_evict(net, conn); 115 + if (IS_ERR(found)) { 116 + /* Not found, but might be about to be confirmed */ 117 + if (PTR_ERR(found) == -EAGAIN) { 118 + length++; 119 + if (!tuple) 120 + continue; 121 + 122 + if (nf_ct_tuple_equal(&conn->tuple, tuple) && 123 + nf_ct_zone_id(&conn->zone, conn->zone.dir) == 124 + nf_ct_zone_id(zone, zone->dir)) 125 + *addit = false; 126 + } 151 127 continue; 152 128 } 153 129
+5
net/netfilter/nf_conntrack_helper.c
··· 465 465 466 466 nf_ct_expect_iterate_destroy(expect_iter_me, NULL); 467 467 nf_ct_iterate_destroy(unhelp, me); 468 + 469 + /* Maybe someone has gotten the helper already when unhelp above. 470 + * So need to wait it. 471 + */ 472 + synchronize_rcu(); 468 473 } 469 474 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); 470 475
+10 -3
net/netfilter/nf_log.c
··· 424 424 if (write) { 425 425 struct ctl_table tmp = *table; 426 426 427 + /* proc_dostring() can append to existing strings, so we need to 428 + * initialize it as an empty string. 429 + */ 430 + buf[0] = '\0'; 427 431 tmp.data = buf; 428 432 r = proc_dostring(&tmp, write, buffer, lenp, ppos); 429 433 if (r) ··· 446 442 rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); 447 443 mutex_unlock(&nf_log_mutex); 448 444 } else { 445 + struct ctl_table tmp = *table; 446 + 447 + tmp.data = buf; 449 448 mutex_lock(&nf_log_mutex); 450 449 logger = nft_log_dereference(net->nf.nf_loggers[tindex]); 451 450 if (!logger) 452 - table->data = "NONE"; 451 + strlcpy(buf, "NONE", sizeof(buf)); 453 452 else 454 - table->data = logger->name; 455 - r = proc_dostring(table, write, buffer, lenp, ppos); 453 + strlcpy(buf, logger->name, sizeof(buf)); 456 454 mutex_unlock(&nf_log_mutex); 455 + r = proc_dostring(&tmp, write, buffer, lenp, ppos); 457 456 } 458 457 459 458 return r;
+10 -1
net/rds/connection.c
··· 659 659 660 660 int rds_conn_init(void) 661 661 { 662 + int ret; 663 + 664 + ret = rds_loop_net_init(); /* register pernet callback */ 665 + if (ret) 666 + return ret; 667 + 662 668 rds_conn_slab = kmem_cache_create("rds_connection", 663 669 sizeof(struct rds_connection), 664 670 0, 0, NULL); 665 - if (!rds_conn_slab) 671 + if (!rds_conn_slab) { 672 + rds_loop_net_exit(); 666 673 return -ENOMEM; 674 + } 667 675 668 676 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 669 677 rds_info_register_func(RDS_INFO_SEND_MESSAGES, ··· 684 676 685 677 void rds_conn_exit(void) 686 678 { 679 + rds_loop_net_exit(); /* unregister pernet callback */ 687 680 rds_loop_exit(); 688 681 689 682 WARN_ON(!hlist_empty(rds_conn_hash));
+56
net/rds/loop.c
··· 33 33 #include <linux/kernel.h> 34 34 #include <linux/slab.h> 35 35 #include <linux/in.h> 36 + #include <net/net_namespace.h> 37 + #include <net/netns/generic.h> 36 38 37 39 #include "rds_single_path.h" 38 40 #include "rds.h" ··· 42 40 43 41 static DEFINE_SPINLOCK(loop_conns_lock); 44 42 static LIST_HEAD(loop_conns); 43 + static atomic_t rds_loop_unloading = ATOMIC_INIT(0); 44 + 45 + static void rds_loop_set_unloading(void) 46 + { 47 + atomic_set(&rds_loop_unloading, 1); 48 + } 49 + 50 + static bool rds_loop_is_unloading(struct rds_connection *conn) 51 + { 52 + return atomic_read(&rds_loop_unloading) != 0; 53 + } 45 54 46 55 /* 47 56 * This 'loopback' transport is a special case for flows that originate ··· 178 165 struct rds_loop_connection *lc, *_lc; 179 166 LIST_HEAD(tmp_list); 180 167 168 + rds_loop_set_unloading(); 169 + synchronize_rcu(); 181 170 /* avoid calling conn_destroy with irqs off */ 182 171 spin_lock_irq(&loop_conns_lock); 183 172 list_splice(&loop_conns, &tmp_list); ··· 190 175 WARN_ON(lc->conn->c_passive); 191 176 rds_conn_destroy(lc->conn); 192 177 } 178 + } 179 + 180 + static void rds_loop_kill_conns(struct net *net) 181 + { 182 + struct rds_loop_connection *lc, *_lc; 183 + LIST_HEAD(tmp_list); 184 + 185 + spin_lock_irq(&loop_conns_lock); 186 + list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node) { 187 + struct net *c_net = read_pnet(&lc->conn->c_net); 188 + 189 + if (net != c_net) 190 + continue; 191 + list_move_tail(&lc->loop_node, &tmp_list); 192 + } 193 + spin_unlock_irq(&loop_conns_lock); 194 + 195 + list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) { 196 + WARN_ON(lc->conn->c_passive); 197 + rds_conn_destroy(lc->conn); 198 + } 199 + } 200 + 201 + static void __net_exit rds_loop_exit_net(struct net *net) 202 + { 203 + rds_loop_kill_conns(net); 204 + } 205 + 206 + static struct pernet_operations rds_loop_net_ops = { 207 + .exit = rds_loop_exit_net, 208 + }; 209 + 210 + int rds_loop_net_init(void) 211 + { 212 + return register_pernet_device(&rds_loop_net_ops); 213 + } 214 + 215 + void rds_loop_net_exit(void) 216 + { 217 + unregister_pernet_device(&rds_loop_net_ops); 193 218 } 194 219 195 220 /* ··· 249 194 .inc_free = rds_loop_inc_free, 250 195 .t_name = "loopback", 251 196 .t_type = RDS_TRANS_LOOP, 197 + .t_unloading = rds_loop_is_unloading, 252 198 };
+2
net/rds/loop.h
··· 5 5 /* loop.c */ 6 6 extern struct rds_transport rds_loop_transport; 7 7 8 + int rds_loop_net_init(void); 9 + void rds_loop_net_exit(void); 8 10 void rds_loop_exit(void); 9 11 10 12 #endif
+63 -33
net/smc/af_smc.c
··· 45 45 */ 46 46 47 47 static void smc_tcp_listen_work(struct work_struct *); 48 + static void smc_connect_work(struct work_struct *); 48 49 49 50 static void smc_set_keepalive(struct sock *sk, int val) 50 51 { ··· 123 122 goto out; 124 123 125 124 smc = smc_sk(sk); 125 + 126 + /* cleanup for a dangling non-blocking connect */ 127 + flush_work(&smc->connect_work); 128 + kfree(smc->connect_info); 129 + smc->connect_info = NULL; 130 + 126 131 if (sk->sk_state == SMC_LISTEN) 127 132 /* smc_close_non_accepted() is called and acquires 128 133 * sock lock for child sockets again ··· 193 186 sk->sk_protocol = protocol; 194 187 smc = smc_sk(sk); 195 188 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 189 + INIT_WORK(&smc->connect_work, smc_connect_work); 196 190 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 197 191 INIT_LIST_HEAD(&smc->accept_q); 198 192 spin_lock_init(&smc->accept_q_lock); ··· 584 576 return 0; 585 577 } 586 578 579 + static void smc_connect_work(struct work_struct *work) 580 + { 581 + struct smc_sock *smc = container_of(work, struct smc_sock, 582 + connect_work); 583 + int rc; 584 + 585 + lock_sock(&smc->sk); 586 + rc = kernel_connect(smc->clcsock, &smc->connect_info->addr, 587 + smc->connect_info->alen, smc->connect_info->flags); 588 + if (smc->clcsock->sk->sk_err) { 589 + smc->sk.sk_err = smc->clcsock->sk->sk_err; 590 + goto out; 591 + } 592 + if (rc < 0) { 593 + smc->sk.sk_err = -rc; 594 + goto out; 595 + } 596 + 597 + rc = __smc_connect(smc); 598 + if (rc < 0) 599 + smc->sk.sk_err = -rc; 600 + 601 + out: 602 + smc->sk.sk_state_change(&smc->sk); 603 + kfree(smc->connect_info); 604 + smc->connect_info = NULL; 605 + release_sock(&smc->sk); 606 + } 607 + 587 608 static int smc_connect(struct socket *sock, struct sockaddr *addr, 588 609 int alen, int flags) 589 610 { ··· 642 605 643 606 smc_copy_sock_settings_to_clc(smc); 644 607 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 645 - rc = kernel_connect(smc->clcsock, addr, alen, flags); 646 - if (rc) 647 - goto out; 608 + if (flags & O_NONBLOCK) { 609 + if (smc->connect_info) { 610 + rc = -EALREADY; 611 + goto out; 612 + } 613 + smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL); 614 + if (!smc->connect_info) { 615 + rc = -ENOMEM; 616 + goto out; 617 + } 618 + smc->connect_info->alen = alen; 619 + smc->connect_info->flags = flags ^ O_NONBLOCK; 620 + memcpy(&smc->connect_info->addr, addr, alen); 621 + schedule_work(&smc->connect_work); 622 + rc = -EINPROGRESS; 623 + } else { 624 + rc = kernel_connect(smc->clcsock, addr, alen, flags); 625 + if (rc) 626 + goto out; 648 627 649 - rc = __smc_connect(smc); 650 - if (rc < 0) 651 - goto out; 652 - else 653 - rc = 0; /* success cases including fallback */ 628 + rc = __smc_connect(smc); 629 + if (rc < 0) 630 + goto out; 631 + else 632 + rc = 0; /* success cases including fallback */ 633 + } 654 634 655 635 out: 656 636 release_sock(sk); ··· 1333 1279 struct sock *sk = sock->sk; 1334 1280 __poll_t mask = 0; 1335 1281 struct smc_sock *smc; 1336 - int rc; 1337 1282 1338 1283 if (!sk) 1339 1284 return EPOLLNVAL; 1340 1285 1341 1286 smc = smc_sk(sock->sk); 1342 - sock_hold(sk); 1343 - lock_sock(sk); 1344 1287 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1345 1288 /* delegate to CLC child sock */ 1346 - release_sock(sk); 1347 1289 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1348 - lock_sock(sk); 1349 1290 sk->sk_err = smc->clcsock->sk->sk_err; 1350 - if (sk->sk_err) { 1291 + if (sk->sk_err) 1351 1292 mask |= EPOLLERR; 1352 - } else { 1353 - /* if non-blocking connect finished ... */ 1354 - if (sk->sk_state == SMC_INIT && 1355 - mask & EPOLLOUT && 1356 - smc->clcsock->sk->sk_state != TCP_CLOSE) { 1357 - rc = __smc_connect(smc); 1358 - if (rc < 0) 1359 - mask |= EPOLLERR; 1360 - /* success cases including fallback */ 1361 - mask |= EPOLLOUT | EPOLLWRNORM; 1362 - } 1363 - } 1364 1293 } else { 1365 - if (sk->sk_state != SMC_CLOSED) { 1366 - release_sock(sk); 1367 - sock_poll_wait(file, sk_sleep(sk), wait); 1368 - lock_sock(sk); 1369 - } 1370 1294 if (sk->sk_err) 1371 1295 mask |= EPOLLERR; 1372 1296 if ((sk->sk_shutdown == SHUTDOWN_MASK) || ··· 1372 1340 mask |= EPOLLPRI; 1373 1341 1374 1342 } 1375 - release_sock(sk); 1376 - sock_put(sk); 1377 1343 1378 1344 return mask; 1379 1345 }
+8
net/smc/smc.h
··· 187 187 struct work_struct close_work; /* peer sent some closing */ 188 188 }; 189 189 190 + struct smc_connect_info { 191 + int flags; 192 + int alen; 193 + struct sockaddr addr; 194 + }; 195 + 190 196 struct smc_sock { /* smc sock container */ 191 197 struct sock sk; 192 198 struct socket *clcsock; /* internal tcp socket */ 193 199 struct smc_connection conn; /* smc connection */ 194 200 struct smc_sock *listen_smc; /* listen parent */ 201 + struct smc_connect_info *connect_info; /* connect address & flags */ 202 + struct work_struct connect_work; /* handle non-blocking connect*/ 195 203 struct work_struct tcp_listen_work;/* handle tcp socket accepts */ 196 204 struct work_struct smc_listen_work;/* prepare new accept socket */ 197 205 struct list_head accept_q; /* sockets to be accepted */
+1 -16
net/strparser/strparser.c
··· 35 35 */ 36 36 struct strp_msg strp; 37 37 int accum_len; 38 - int early_eaten; 39 38 }; 40 39 41 40 static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) ··· 114 115 head = strp->skb_head; 115 116 if (head) { 116 117 /* Message already in progress */ 117 - 118 - stm = _strp_msg(head); 119 - if (unlikely(stm->early_eaten)) { 120 - /* Already some number of bytes on the receive sock 121 - * data saved in skb_head, just indicate they 122 - * are consumed. 123 - */ 124 - eaten = orig_len <= stm->early_eaten ? 125 - orig_len : stm->early_eaten; 126 - stm->early_eaten -= eaten; 127 - 128 - return eaten; 129 - } 130 - 131 118 if (unlikely(orig_offset)) { 132 119 /* Getting data with a non-zero offset when a message is 133 120 * in progress is not expected. If it does happen, we ··· 282 297 } 283 298 284 299 stm->accum_len += cand_len; 300 + eaten += cand_len; 285 301 strp->need_bytes = stm->strp.full_len - 286 302 stm->accum_len; 287 - stm->early_eaten = cand_len; 288 303 STRP_STATS_ADD(strp->stats.bytes, cand_len); 289 304 desc->count = 0; /* Stop reading socket */ 290 305 break;
+14 -21
net/wireless/nl80211.c
··· 6231 6231 nl80211_check_s32); 6232 6232 /* 6233 6233 * Check HT operation mode based on 6234 - * IEEE 802.11 2012 8.4.2.59 HT Operation element. 6234 + * IEEE 802.11-2016 9.4.2.57 HT Operation element. 6235 6235 */ 6236 6236 if (tb[NL80211_MESHCONF_HT_OPMODE]) { 6237 6237 ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); ··· 6241 6241 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) 6242 6242 return -EINVAL; 6243 6243 6244 - if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) && 6245 - (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) 6246 - return -EINVAL; 6244 + /* NON_HT_STA bit is reserved, but some programs set it */ 6245 + ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; 6247 6246 6248 - switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) { 6249 - case IEEE80211_HT_OP_MODE_PROTECTION_NONE: 6250 - case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: 6251 - if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT) 6252 - return -EINVAL; 6253 - break; 6254 - case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: 6255 - case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: 6256 - if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) 6257 - return -EINVAL; 6258 - break; 6259 - } 6260 6247 cfg->ht_opmode = ht_opmode; 6261 6248 mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); 6262 6249 } ··· 10949 10962 rem) { 10950 10963 u8 *mask_pat; 10951 10964 10952 - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 10953 - nl80211_packet_pattern_policy, 10954 - info->extack); 10965 + err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 10966 + nl80211_packet_pattern_policy, 10967 + info->extack); 10968 + if (err) 10969 + goto error; 10970 + 10955 10971 err = -EINVAL; 10956 10972 if (!pat_tb[NL80211_PKTPAT_MASK] || 10957 10973 !pat_tb[NL80211_PKTPAT_PATTERN]) ··· 11203 11213 rem) { 11204 11214 u8 *mask_pat; 11205 11215 11206 - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 11207 - nl80211_packet_pattern_policy, NULL); 11216 + err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 11217 + nl80211_packet_pattern_policy, NULL); 11218 + if (err) 11219 + return err; 11220 + 11208 11221 if (!pat_tb[NL80211_PKTPAT_MASK] || 11209 11222 !pat_tb[NL80211_PKTPAT_PATTERN]) 11210 11223 return -EINVAL;
+4 -4
samples/bpf/xdp_fwd_kern.c
··· 48 48 struct ethhdr *eth = data; 49 49 struct ipv6hdr *ip6h; 50 50 struct iphdr *iph; 51 - int out_index; 52 51 u16 h_proto; 53 52 u64 nh_off; 53 + int rc; 54 54 55 55 nh_off = sizeof(*eth); 56 56 if (data + nh_off > data_end) ··· 101 101 102 102 fib_params.ifindex = ctx->ingress_ifindex; 103 103 104 - out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); 104 + rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); 105 105 106 106 /* verify egress index has xdp support 107 107 * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with ··· 109 109 * NOTE: without verification that egress index supports XDP 110 110 * forwarding packets are dropped. 111 111 */ 112 - if (out_index > 0) { 112 + if (rc == 0) { 113 113 if (h_proto == htons(ETH_P_IP)) 114 114 ip_decrease_ttl(iph); 115 115 else if (h_proto == htons(ETH_P_IPV6)) ··· 117 117 118 118 memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); 119 119 memcpy(eth->h_source, fib_params.smac, ETH_ALEN); 120 - return bpf_redirect_map(&tx_port, out_index, 0); 120 + return bpf_redirect_map(&tx_port, fib_params.ifindex, 0); 121 121 } 122 122 123 123 return XDP_PASS;
+1 -1
scripts/cc-can-link.sh
··· 1 1 #!/bin/sh 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 4 - cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 && echo "y" 4 + cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 5 5 #include <stdio.h> 6 6 int main(void) 7 7 {
+8 -4
tools/bpf/bpftool/prog.c
··· 694 694 return -1; 695 695 } 696 696 697 - if (do_pin_fd(prog_fd, argv[1])) { 698 - p_err("failed to pin program"); 699 - return -1; 700 - } 697 + if (do_pin_fd(prog_fd, argv[1])) 698 + goto err_close_obj; 701 699 702 700 if (json_output) 703 701 jsonw_null(json_wtr); 704 702 703 + bpf_object__close(obj); 704 + 705 705 return 0; 706 + 707 + err_close_obj: 708 + bpf_object__close(obj); 709 + return -1; 706 710 } 707 711 708 712 static int do_help(int argc, char **argv)
+1
tools/testing/selftests/bpf/config
··· 6 6 CONFIG_CGROUP_BPF=y 7 7 CONFIG_NETDEVSIM=m 8 8 CONFIG_NET_CLS_ACT=y 9 + CONFIG_NET_SCHED=y 9 10 CONFIG_NET_SCH_INGRESS=y 10 11 CONFIG_NET_IPIP=y 11 12 CONFIG_IPV6=y
+9
tools/testing/selftests/bpf/test_kmod.sh
··· 1 1 #!/bin/sh 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 4 + # Kselftest framework requirement - SKIP code is 4. 5 + ksft_skip=4 6 + 7 + msg="skip all tests:" 8 + if [ "$(id -u)" != "0" ]; then 9 + echo $msg please run this as root >&2 10 + exit $ksft_skip 11 + fi 12 + 4 13 SRC_TREE=../../../../ 5 14 6 15 test_run()
+9
tools/testing/selftests/bpf/test_lirc_mode2.sh
··· 1 1 #!/bin/bash 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 4 + # Kselftest framework requirement - SKIP code is 4. 5 + ksft_skip=4 6 + 7 + msg="skip all tests:" 8 + if [ $UID != 0 ]; then 9 + echo $msg please run this as root >&2 10 + exit $ksft_skip 11 + fi 12 + 4 13 GREEN='\033[0;92m' 5 14 RED='\033[0;31m' 6 15 NC='\033[0m' # No Color
+9
tools/testing/selftests/bpf/test_lwt_seg6local.sh
··· 21 21 # An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this 22 22 # datagram can be read on NS6 when binding to fb00::6. 23 23 24 + # Kselftest framework requirement - SKIP code is 4. 25 + ksft_skip=4 26 + 27 + msg="skip all tests:" 28 + if [ $UID != 0 ]; then 29 + echo $msg please run this as root >&2 30 + exit $ksft_skip 31 + fi 32 + 24 33 TMP_FILE="/tmp/selftest_lwt_seg6local.txt" 25 34 26 35 cleanup()
-6
tools/testing/selftests/bpf/test_sockmap.c
··· 1413 1413 1414 1414 int main(int argc, char **argv) 1415 1415 { 1416 - struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; 1417 1416 int iov_count = 1, length = 1024, rate = 1; 1418 1417 struct sockmap_options options = {0}; 1419 1418 int opt, longindex, err, cg_fd = 0; 1420 1419 char *bpf_file = BPF_SOCKMAP_FILENAME; 1421 1420 int test = PING_PONG; 1422 - 1423 - if (setrlimit(RLIMIT_MEMLOCK, &r)) { 1424 - perror("setrlimit(RLIMIT_MEMLOCK)"); 1425 - return 1; 1426 - } 1427 1421 1428 1422 if (argc < 2) 1429 1423 return test_suite();
tools/testing/selftests/net/fib_tests.sh