Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from David Miller:

1) Limit xt_hashlimit hash table size to avoid OOM or hung tasks, from
Cong Wang.

2) Fix deadlock in xsk by publishing global consumer pointers when NAPI
is finished, from Magnus Karlsson.

3) Set table field properly to RT_TABLE_COMPAT when necessary, from
Jethro Beekman.

4) NLA_STRING attributes are not necessary NULL terminated, deal wiht
that in IFLA_ALT_IFNAME. From Eric Dumazet.

5) Fix checksum handling in atlantic driver, from Dmitry Bezrukov.

6) Handle mtu==0 devices properly in wireguard, from Jason A.
Donenfeld.

7) Fix several lockdep warnings in bonding, from Taehee Yoo.

8) Fix cls_flower port blocking, from Jason Baron.

9) Sanitize internal map names in libbpf, from Toke Høiland-Jørgensen.

10) Fix RDMA race in qede driver, from Michal Kalderon.

11) Fix several false lockdep warnings by adding conditions to
list_for_each_entry_rcu(), from Madhuparna Bhowmik.

12) Fix sleep in atomic in mlx5 driver, from Huy Nguyen.

13) Fix potential deadlock in bpf_map_do_batch(), from Yonghong Song.

14) Hey, variables declared in switch statement before any case
statements are not initialized. I learn something every day. Get
rids of this stuff in several parts of the networking, from Kees
Cook.

* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (99 commits)
bnxt_en: Issue PCIe FLR in kdump kernel to cleanup pending DMAs.
bnxt_en: Improve device shutdown method.
net: netlink: cap max groups which will be considered in netlink_bind()
net: thunderx: workaround BGX TX Underflow issue
ionic: fix fw_status read
net: disable BRIDGE_NETFILTER by default
net: macb: Properly handle phylink on at91rm9200
s390/qeth: fix off-by-one in RX copybreak check
s390/qeth: don't warn for napi with 0 budget
s390/qeth: vnicc Fix EOPNOTSUPP precedence
openvswitch: Distribute switch variables for initialization
net: ip6_gre: Distribute switch variables for initialization
net: core: Distribute switch variables for initialization
udp: rehash on disconnect
net/tls: Fix to avoid gettig invalid tls record
bpf: Fix a potential deadlock with bpf_map_do_batch
bpf: Do not grab the bucket spinlock by default on htab batch ops
ice: Wait for VF to be reset/ready before configuration
ice: Don't tell the OS that link is going down
ice: Don't reject odd values of usecs set by user
...

+1132 -506
+52 -3
drivers/net/bonding/bond_main.c
··· 3526 3526 } 3527 3527 } 3528 3528 3529 + #ifdef CONFIG_LOCKDEP 3530 + static int bond_get_lowest_level_rcu(struct net_device *dev) 3531 + { 3532 + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 3533 + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 3534 + int cur = 0, max = 0; 3535 + 3536 + now = dev; 3537 + iter = &dev->adj_list.lower; 3538 + 3539 + while (1) { 3540 + next = NULL; 3541 + while (1) { 3542 + ldev = netdev_next_lower_dev_rcu(now, &iter); 3543 + if (!ldev) 3544 + break; 3545 + 3546 + next = ldev; 3547 + niter = &ldev->adj_list.lower; 3548 + dev_stack[cur] = now; 3549 + iter_stack[cur++] = iter; 3550 + if (max <= cur) 3551 + max = cur; 3552 + break; 3553 + } 3554 + 3555 + if (!next) { 3556 + if (!cur) 3557 + return max; 3558 + next = dev_stack[--cur]; 3559 + niter = iter_stack[cur]; 3560 + } 3561 + 3562 + now = next; 3563 + iter = niter; 3564 + } 3565 + 3566 + return max; 3567 + } 3568 + #endif 3569 + 3529 3570 static void bond_get_stats(struct net_device *bond_dev, 3530 3571 struct rtnl_link_stats64 *stats) 3531 3572 { ··· 3574 3533 struct rtnl_link_stats64 temp; 3575 3534 struct list_head *iter; 3576 3535 struct slave *slave; 3536 + int nest_level = 0; 3577 3537 3578 - spin_lock(&bond->stats_lock); 3579 - memcpy(stats, &bond->bond_stats, sizeof(*stats)); 3580 3538 3581 3539 rcu_read_lock(); 3540 + #ifdef CONFIG_LOCKDEP 3541 + nest_level = bond_get_lowest_level_rcu(bond_dev); 3542 + #endif 3543 + 3544 + spin_lock_nested(&bond->stats_lock, nest_level); 3545 + memcpy(stats, &bond->bond_stats, sizeof(*stats)); 3546 + 3582 3547 bond_for_each_slave_rcu(bond, slave, iter) { 3583 3548 const struct rtnl_link_stats64 *new = 3584 3549 dev_get_stats(slave->dev, &temp); ··· 3594 3547 /* save off the slave stats for the next run */ 3595 3548 memcpy(&slave->slave_stats, new, sizeof(*new)); 3596 3549 } 3597 - rcu_read_unlock(); 3598 3550 3599 3551 memcpy(&bond->bond_stats, stats, sizeof(*stats)); 3600 3552 spin_unlock(&bond->stats_lock); 3553 + rcu_read_unlock(); 3601 3554 } 3602 3555 3603 3556 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) ··· 3687 3640 case BOND_RELEASE_OLD: 3688 3641 case SIOCBONDRELEASE: 3689 3642 res = bond_release(bond_dev, slave_dev); 3643 + if (!res) 3644 + netdev_update_lockdep_key(slave_dev); 3690 3645 break; 3691 3646 case BOND_SETHWADDR_OLD: 3692 3647 case SIOCBONDSETHWADDR:
+2
drivers/net/bonding/bond_options.c
··· 1398 1398 case '-': 1399 1399 slave_dbg(bond->dev, dev, "Releasing interface\n"); 1400 1400 ret = bond_release(bond->dev, dev); 1401 + if (!ret) 1402 + netdev_update_lockdep_key(dev); 1401 1403 break; 1402 1404 1403 1405 default:
+3
drivers/net/dsa/b53/b53_common.c
··· 1366 1366 1367 1367 b53_get_vlan_entry(dev, vid, vl); 1368 1368 1369 + if (vid == 0 && vid == b53_default_pvid(dev)) 1370 + untagged = true; 1371 + 1369 1372 vl->members |= BIT(port); 1370 1373 if (untagged && !dsa_is_cpu_port(ds, port)) 1371 1374 vl->untag |= BIT(port);
+5
drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
··· 722 722 if (flags & ~AQ_PRIV_FLAGS_MASK) 723 723 return -EOPNOTSUPP; 724 724 725 + if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) { 726 + netdev_info(ndev, "Can't enable more than one loopback simultaneously\n"); 727 + return -EINVAL; 728 + } 729 + 725 730 cfg->priv_flags = flags; 726 731 727 732 if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
+1 -1
drivers/net/ethernet/aquantia/atlantic/aq_filters.c
··· 163 163 } 164 164 165 165 if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) && 166 - (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci), 166 + (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK, 167 167 aq_nic->active_vlans))) { 168 168 netdev_err(aq_nic->ndev, 169 169 "ethtool: unknown vlan-id specified");
+2
drivers/net/ethernet/aquantia/atlantic/aq_hw.h
··· 337 337 338 338 void (*enable_ptp)(struct aq_hw_s *self, int enable); 339 339 340 + void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj); 341 + 340 342 int (*set_eee_rate)(struct aq_hw_s *self, u32 speed); 341 343 342 344 int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
+3 -5
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
··· 533 533 dx_buff->len, 534 534 DMA_TO_DEVICE); 535 535 536 - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) 536 + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { 537 + ret = 0; 537 538 goto exit; 539 + } 538 540 539 541 first = dx_buff; 540 542 dx_buff->len_pkt = skb->len; ··· 657 655 if (likely(frags)) { 658 656 err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, 659 657 ring, frags); 660 - if (err >= 0) { 661 - ++ring->stats.tx.packets; 662 - ring->stats.tx.bytes += skb->len; 663 - } 664 658 } else { 665 659 err = NETDEV_TX_BUSY; 666 660 }
+8 -5
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
··· 359 359 netif_device_detach(nic->ndev); 360 360 netif_tx_stop_all_queues(nic->ndev); 361 361 362 - aq_nic_stop(nic); 362 + if (netif_running(nic->ndev)) 363 + aq_nic_stop(nic); 363 364 364 365 if (deep) { 365 366 aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); ··· 376 375 { 377 376 struct pci_dev *pdev = to_pci_dev(dev); 378 377 struct aq_nic_s *nic; 379 - int ret; 378 + int ret = 0; 380 379 381 380 nic = pci_get_drvdata(pdev); 382 381 ··· 391 390 goto err_exit; 392 391 } 393 392 394 - ret = aq_nic_start(nic); 395 - if (ret) 396 - goto err_exit; 393 + if (netif_running(nic->ndev)) { 394 + ret = aq_nic_start(nic); 395 + if (ret) 396 + goto err_exit; 397 + } 397 398 398 399 netif_device_attach(nic->ndev); 399 400 netif_tx_start_all_queues(nic->ndev);
+7 -3
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
··· 272 272 } 273 273 } 274 274 275 - if (unlikely(buff->is_eop)) 276 - dev_kfree_skb_any(buff->skb); 275 + if (unlikely(buff->is_eop)) { 276 + ++self->stats.rx.packets; 277 + self->stats.tx.bytes += buff->skb->len; 277 278 279 + dev_kfree_skb_any(buff->skb); 280 + } 278 281 buff->pa = 0U; 279 282 buff->eop_index = 0xffffU; 280 283 self->sw_head = aq_ring_next_dx(self, self->sw_head); ··· 354 351 err = 0; 355 352 goto err_exit; 356 353 } 357 - if (buff->is_error || buff->is_cso_err) { 354 + if (buff->is_error || 355 + (buff->is_lro && buff->is_cso_err)) { 358 356 buff_ = buff; 359 357 do { 360 358 next_ = buff_->next,
+2 -1
drivers/net/ethernet/aquantia/atlantic/aq_ring.h
··· 78 78 u32 is_cleaned:1; 79 79 u32 is_error:1; 80 80 u32 is_vlan:1; 81 - u32 rsvd3:4; 81 + u32 is_lro:1; 82 + u32 rsvd3:3; 82 83 u16 eop_index; 83 84 u16 rsvd4; 84 85 };
+14 -8
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
··· 823 823 } 824 824 } 825 825 826 + buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT & 827 + rxd_wb->status); 826 828 if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { 827 829 buff->len = rxd_wb->pkt_len % 828 830 AQ_CFG_RX_FRAME_MAX; ··· 837 835 rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? 838 836 AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; 839 837 840 - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & 841 - rxd_wb->status) { 838 + if (buff->is_lro) { 842 839 /* LRO */ 843 840 buff->next = rxd_wb->next_desc_ptr; 844 841 ++ring->stats.rx.lro_packets; ··· 885 884 { 886 885 struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; 887 886 unsigned int i = 0U; 887 + u32 vlan_promisc; 888 + u32 l2_promisc; 888 889 889 - hw_atl_rpfl2promiscuous_mode_en_set(self, 890 - IS_FILTER_ENABLED(IFF_PROMISC)); 890 + l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) || 891 + !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)); 892 + vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc; 891 893 892 - hw_atl_rpf_vlan_prom_mode_en_set(self, 893 - IS_FILTER_ENABLED(IFF_PROMISC) || 894 - cfg->is_vlan_force_promisc); 894 + hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc); 895 + 896 + hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc); 895 897 896 898 hw_atl_rpfl2multicast_flr_en_set(self, 897 899 IS_FILTER_ENABLED(IFF_ALLMULTI) && ··· 1165 1161 { 1166 1162 self->ptp_clk_offset += delta; 1167 1163 1164 + self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset); 1165 + 1168 1166 return 0; 1169 1167 } 1170 1168 ··· 1217 1211 fwreq.ptp_gpio_ctrl.index = index; 1218 1212 fwreq.ptp_gpio_ctrl.period = period; 1219 1213 /* Apply time offset */ 1220 - fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset; 1214 + fwreq.ptp_gpio_ctrl.start = start; 1221 1215 1222 1216 size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl); 1223 1217 return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
+17 -2
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
··· 22 22 #define HW_ATL_MIF_ADDR 0x0208U 23 23 #define HW_ATL_MIF_VAL 0x020CU 24 24 25 + #define HW_ATL_MPI_RPC_ADDR 0x0334U 25 26 #define HW_ATL_RPC_CONTROL_ADR 0x0338U 26 27 #define HW_ATL_RPC_STATE_ADR 0x033CU 27 28 ··· 54 53 }; 55 54 56 55 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); 57 - 58 56 static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, 59 57 enum hal_atl_utils_fw_state_e state); 60 - 61 58 static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self); 62 59 static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self); 63 60 static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self); 64 61 static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self); 65 62 static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self); 63 + static u32 aq_fw1x_rpc_get(struct aq_hw_s *self); 66 64 67 65 int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) 68 66 { ··· 476 476 self, self->mbox_addr, 477 477 self->mbox_addr != 0U, 478 478 1000U, 10000U); 479 + err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self, 480 + self->rpc_addr, 481 + self->rpc_addr != 0U, 482 + 1000U, 100000U); 479 483 480 484 return err; 481 485 } ··· 535 531 self, fw.val, 536 532 sw.tid == fw.tid, 537 533 1000U, 100000U); 534 + if (err < 0) 535 + goto err_exit; 536 + 537 + err = aq_hw_err_from_flags(self); 538 + if (err < 0) 539 + goto err_exit; 538 540 539 541 if (fw.len == 0xFFFFU) { 540 542 err = hw_atl_utils_fw_rpc_call(self, sw.len); ··· 1033 1023 static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self) 1034 1024 { 1035 1025 return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR); 1026 + } 1027 + 1028 + static u32 aq_fw1x_rpc_get(struct aq_hw_s *self) 1029 + { 1030 + return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR); 1036 1031 } 1037 1032 1038 1033 const struct aq_fw_ops aq_fw_1x_ops = {
+12
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
··· 30 30 #define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378 31 31 #define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c 32 32 33 + #define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0 34 + #define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4 35 + 33 36 #define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) 34 37 #define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) 35 38 #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) ··· 478 475 aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts); 479 476 } 480 477 478 + static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj) 479 + { 480 + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR, 481 + (adj >> 0) & 0xffffffff); 482 + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR, 483 + (adj >> 32) & 0xffffffff); 484 + } 485 + 481 486 static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode) 482 487 { 483 488 if (self->fw_ver_actual < HW_ATL_FW_VER_LED) ··· 644 633 .enable_ptp = aq_fw3x_enable_ptp, 645 634 .led_control = aq_fw2x_led_control, 646 635 .set_phyloopback = aq_fw2x_set_phyloopback, 636 + .adjust_ptp = aq_fw3x_adjust_ptp, 647 637 };
+10 -2
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 11786 11786 if (version_printed++ == 0) 11787 11787 pr_info("%s", version); 11788 11788 11789 + /* Clear any pending DMA transactions from crash kernel 11790 + * while loading driver in capture kernel. 11791 + */ 11792 + if (is_kdump_kernel()) { 11793 + pci_clear_master(pdev); 11794 + pcie_flr(pdev); 11795 + } 11796 + 11789 11797 max_irqs = bnxt_get_max_irq(pdev); 11790 11798 dev = alloc_etherdev_mq(sizeof(*bp), max_irqs); 11791 11799 if (!dev) ··· 11991 11983 dev_close(dev); 11992 11984 11993 11985 bnxt_ulp_shutdown(bp); 11986 + bnxt_clear_int_mode(bp); 11987 + pci_disable_device(pdev); 11994 11988 11995 11989 if (system_state == SYSTEM_POWER_OFF) { 11996 - bnxt_clear_int_mode(bp); 11997 - pci_disable_device(pdev); 11998 11990 pci_wake_from_d3(pdev, bp->wol); 11999 11991 pci_set_power_state(pdev, PCI_D3hot); 12000 11992 }
+2 -2
drivers/net/ethernet/broadcom/cnic_defs.h
··· 543 543 #define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2 544 544 #endif 545 545 #if defined(__BIG_ENDIAN) 546 - u16 reserverd3; 546 + u16 reserved3; 547 547 u8 da0; 548 548 u8 da1; 549 549 #elif defined(__LITTLE_ENDIAN) 550 550 u8 da1; 551 551 u8 da0; 552 - u16 reserverd3; 552 + u16 reserved3; 553 553 #endif 554 554 #if defined(__BIG_ENDIAN) 555 555 u8 da2;
+1
drivers/net/ethernet/cadence/macb.h
··· 652 652 #define MACB_CAPS_GEM_HAS_PTP 0x00000040 653 653 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 654 654 #define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 655 + #define MACB_CAPS_MACB_IS_EMAC 0x08000000 655 656 #define MACB_CAPS_FIFO_MODE 0x10000000 656 657 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 657 658 #define MACB_CAPS_SG_DISABLED 0x40000000
+37 -29
drivers/net/ethernet/cadence/macb_main.c
··· 572 572 old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR); 573 573 574 574 /* Clear all the bits we might set later */ 575 - ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE) | 576 - GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); 575 + ctrl &= ~(MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE)); 576 + 577 + if (bp->caps & MACB_CAPS_MACB_IS_EMAC) { 578 + if (state->interface == PHY_INTERFACE_MODE_RMII) 579 + ctrl |= MACB_BIT(RM9200_RMII); 580 + } else { 581 + ctrl &= ~(GEM_BIT(GBE) | GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); 582 + 583 + /* We do not support MLO_PAUSE_RX yet */ 584 + if (state->pause & MLO_PAUSE_TX) 585 + ctrl |= MACB_BIT(PAE); 586 + 587 + if (state->interface == PHY_INTERFACE_MODE_SGMII) 588 + ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); 589 + } 577 590 578 591 if (state->speed == SPEED_1000) 579 592 ctrl |= GEM_BIT(GBE); ··· 595 582 596 583 if (state->duplex) 597 584 ctrl |= MACB_BIT(FD); 598 - 599 - /* We do not support MLO_PAUSE_RX yet */ 600 - if (state->pause & MLO_PAUSE_TX) 601 - ctrl |= MACB_BIT(PAE); 602 - 603 - if (state->interface == PHY_INTERFACE_MODE_SGMII) 604 - ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); 605 585 606 586 /* Apply the new configuration, if any */ 607 587 if (old_ctrl ^ ctrl) ··· 614 608 unsigned int q; 615 609 u32 ctrl; 616 610 617 - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) 618 - queue_writel(queue, IDR, 619 - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); 611 + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) 612 + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) 613 + queue_writel(queue, IDR, 614 + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); 620 615 621 616 /* Disable Rx and Tx */ 622 617 ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE)); ··· 634 627 struct macb_queue *queue; 635 628 unsigned int q; 636 629 637 - macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); 630 + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { 631 + macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); 638 632 639 - /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down 640 - * cleared the pipeline and control registers. 641 - */ 642 - bp->macbgem_ops.mog_init_rings(bp); 643 - macb_init_buffers(bp); 633 + /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down 634 + * cleared the pipeline and control registers. 635 + */ 636 + bp->macbgem_ops.mog_init_rings(bp); 637 + macb_init_buffers(bp); 644 638 645 - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) 646 - queue_writel(queue, IER, 647 - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); 639 + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) 640 + queue_writel(queue, IER, 641 + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); 642 + } 648 643 649 644 /* Enable Rx and Tx */ 650 645 macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); ··· 3799 3790 u32 ctl; 3800 3791 int ret; 3801 3792 3793 + ret = pm_runtime_get_sync(&lp->pdev->dev); 3794 + if (ret < 0) 3795 + return ret; 3796 + 3802 3797 /* Clear internal statistics */ 3803 3798 ctl = macb_readl(lp, NCR); 3804 3799 macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); ··· 3867 3854 q->rx_buffers, q->rx_buffers_dma); 3868 3855 q->rx_buffers = NULL; 3869 3856 3870 - return 0; 3857 + return pm_runtime_put(&lp->pdev->dev); 3871 3858 } 3872 3859 3873 3860 /* Transmit packet */ ··· 4050 4037 struct net_device *dev = platform_get_drvdata(pdev); 4051 4038 struct macb *bp = netdev_priv(dev); 4052 4039 int err; 4053 - u32 reg; 4054 4040 4055 4041 bp->queues[0].bp = bp; 4056 4042 ··· 4063 4051 4064 4052 macb_writel(bp, NCR, 0); 4065 4053 4066 - reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG); 4067 - if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) 4068 - reg |= MACB_BIT(RM9200_RMII); 4069 - 4070 - macb_writel(bp, NCFGR, reg); 4054 + macb_writel(bp, NCFGR, MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG)); 4071 4055 4072 4056 return 0; 4073 4057 } ··· 4222 4214 }; 4223 4215 4224 4216 static const struct macb_config emac_config = { 4225 - .caps = MACB_CAPS_NEEDS_RSTONUBR, 4217 + .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, 4226 4218 .clk_init = at91ether_clk_init, 4227 4219 .init = at91ether_init, 4228 4220 };
+59 -3
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
··· 410 410 lmac = &bgx->lmac[lmacid]; 411 411 412 412 cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); 413 - if (enable) 413 + if (enable) { 414 414 cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; 415 - else 415 + 416 + /* enable TX FIFO Underflow interrupt */ 417 + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, 418 + GMI_TXX_INT_UNDFLW); 419 + } else { 416 420 cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); 421 + 422 + /* Disable TX FIFO Underflow interrupt */ 423 + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, 424 + GMI_TXX_INT_UNDFLW); 425 + } 417 426 bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); 418 427 419 428 if (bgx->is_rgx) ··· 1544 1535 return bgx_init_of_phy(bgx); 1545 1536 } 1546 1537 1538 + static irqreturn_t bgx_intr_handler(int irq, void *data) 1539 + { 1540 + struct bgx *bgx = (struct bgx *)data; 1541 + u64 status, val; 1542 + int lmac; 1543 + 1544 + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { 1545 + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); 1546 + if (status & GMI_TXX_INT_UNDFLW) { 1547 + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", 1548 + bgx->bgx_id, lmac); 1549 + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); 1550 + val &= ~CMR_EN; 1551 + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); 1552 + val |= CMR_EN; 1553 + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); 1554 + } 1555 + /* clear interrupts */ 1556 + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); 1557 + } 1558 + 1559 + return IRQ_HANDLED; 1560 + } 1561 + 1562 + static void bgx_register_intr(struct pci_dev *pdev) 1563 + { 1564 + struct bgx *bgx = pci_get_drvdata(pdev); 1565 + int ret; 1566 + 1567 + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, 1568 + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); 1569 + if (ret < 0) { 1570 + pci_err(pdev, "Req for #%d msix vectors failed\n", 1571 + BGX_LMAC_VEC_OFFSET); 1572 + return; 1573 + } 1574 + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, 1575 + bgx, "BGX%d", bgx->bgx_id); 1576 + if (ret) 1577 + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); 1578 + } 1579 + 1547 1580 static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1548 1581 { 1549 1582 int err; ··· 1601 1550 1602 1551 pci_set_drvdata(pdev, bgx); 1603 1552 1604 - err = pci_enable_device(pdev); 1553 + err = pcim_enable_device(pdev); 1605 1554 if (err) { 1606 1555 dev_err(dev, "Failed to enable PCI device\n"); 1607 1556 pci_set_drvdata(pdev, NULL); ··· 1655 1604 1656 1605 bgx_init_hw(bgx); 1657 1606 1607 + bgx_register_intr(pdev); 1608 + 1658 1609 /* Enable all LMACs */ 1659 1610 for (lmac = 0; lmac < bgx->lmac_count; lmac++) { 1660 1611 err = bgx_lmac_enable(bgx, lmac); ··· 1673 1620 1674 1621 err_enable: 1675 1622 bgx_vnic[bgx->bgx_id] = NULL; 1623 + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); 1676 1624 err_release_regions: 1677 1625 pci_release_regions(pdev); 1678 1626 err_disable_device: ··· 1690 1636 /* Disable all LMACs */ 1691 1637 for (lmac = 0; lmac < bgx->lmac_count; lmac++) 1692 1638 bgx_lmac_disable(bgx, lmac); 1639 + 1640 + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); 1693 1641 1694 1642 bgx_vnic[bgx->bgx_id] = NULL; 1695 1643 pci_release_regions(pdev);
+9
drivers/net/ethernet/cavium/thunder/thunder_bgx.h
··· 180 180 #define BGX_GMP_GMI_TXX_BURST 0x38228 181 181 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 182 182 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 183 + #define BGX_GMP_GMI_TXX_INT 0x38500 184 + #define BGX_GMP_GMI_TXX_INT_W1S 0x38508 185 + #define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 186 + #define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 187 + #define GMI_TXX_INT_PTP_LOST BIT_ULL(4) 188 + #define GMI_TXX_INT_LATE_COL BIT_ULL(3) 189 + #define GMI_TXX_INT_XSDEF BIT_ULL(2) 190 + #define GMI_TXX_INT_XSCOL BIT_ULL(1) 191 + #define GMI_TXX_INT_UNDFLW BIT_ULL(0) 183 192 184 193 #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ 185 194 #define BGX_MSIX_VEC_0_29_CTL 0x400008
+2
drivers/net/ethernet/davicom/dm9000.c
··· 1405 1405 mac_addr = of_get_mac_address(np); 1406 1406 if (!IS_ERR(mac_addr)) 1407 1407 ether_addr_copy(pdata->dev_addr, mac_addr); 1408 + else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) 1409 + return ERR_CAST(mac_addr); 1408 1410 1409 1411 return pdata; 1410 1412 }
+38 -18
drivers/net/ethernet/intel/ice/ice_ethtool.c
··· 2936 2936 else 2937 2937 return -EINVAL; 2938 2938 2939 - /* Tell the OS link is going down, the link will go back up when fw 2940 - * says it is ready asynchronously 2941 - */ 2942 - ice_print_link_msg(vsi, false); 2943 - netif_carrier_off(netdev); 2944 - netif_tx_stop_all_queues(netdev); 2945 - 2946 2939 /* Set the FC mode and only restart AN if link is up */ 2947 2940 status = ice_set_fc(pi, &aq_failures, link_up); 2948 2941 ··· 3482 3489 return -EINVAL; 3483 3490 } 3484 3491 3485 - /* hardware only supports an ITR granularity of 2us */ 3486 - if (coalesce_usecs % 2 != 0) { 3487 - netdev_info(vsi->netdev, "Invalid value, %s-usecs must be even\n", 3488 - c_type_str); 3489 - return -EINVAL; 3490 - } 3491 - 3492 3492 if (use_adaptive_coalesce) { 3493 3493 rc->itr_setting |= ICE_ITR_DYNAMIC; 3494 3494 } else { 3495 - /* store user facing value how it was set */ 3495 + /* save the user set usecs */ 3496 3496 rc->itr_setting = coalesce_usecs; 3497 - /* set to static and convert to value HW understands */ 3498 - rc->target_itr = 3499 - ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting)); 3497 + /* device ITR granularity is in 2 usec increments */ 3498 + rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); 3500 3499 } 3501 3500 3502 3501 return 0; ··· 3582 3597 } 3583 3598 3584 3599 /** 3600 + * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs 3601 + * @netdev: netdev used for print 3602 + * @itr_setting: previous user setting 3603 + * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled 3604 + * @coalesce_usecs: requested value of [tx|rx]-usecs 3605 + * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs 3606 + */ 3607 + static void 3608 + ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, 3609 + u32 use_adaptive_coalesce, u32 coalesce_usecs, 3610 + const char *c_type_str) 3611 + { 3612 + if (use_adaptive_coalesce) 3613 + return; 3614 + 3615 + itr_setting = ITR_TO_REG(itr_setting); 3616 + 3617 + if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) 3618 + netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", 3619 + c_type_str, coalesce_usecs, c_type_str, 3620 + ITR_REG_ALIGN(coalesce_usecs)); 3621 + } 3622 + 3623 + /** 3585 3624 * __ice_set_coalesce - set ITR/INTRL values for the device 3586 3625 * @netdev: pointer to the netdev associated with this query 3587 3626 * @ec: ethtool structure to fill with driver's coalesce settings ··· 3625 3616 return -EINVAL; 3626 3617 3627 3618 if (q_num < 0) { 3619 + struct ice_q_vector *q_vector = vsi->q_vectors[0]; 3628 3620 int v_idx; 3621 + 3622 + if (q_vector) { 3623 + ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting, 3624 + ec->use_adaptive_rx_coalesce, 3625 + ec->rx_coalesce_usecs, "rx"); 3626 + 3627 + ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting, 3628 + ec->use_adaptive_tx_coalesce, 3629 + ec->tx_coalesce_usecs, "tx"); 3630 + } 3629 3631 3630 3632 ice_for_each_q_vector(vsi, v_idx) { 3631 3633 /* In some cases if DCB is configured the num_[rx|tx]q
+1 -1
drivers/net/ethernet/intel/ice/ice_txrx.h
··· 222 222 #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ 223 223 #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) 224 224 #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ 225 - #define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) 225 + #define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) 226 226 227 227 #define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 228 228 #define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002
+74 -60
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
··· 1874 1874 } 1875 1875 1876 1876 /** 1877 + * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset 1878 + * @vf: The VF being resseting 1879 + * 1880 + * The max poll time is about ~800ms, which is about the maximum time it takes 1881 + * for a VF to be reset and/or a VF driver to be removed. 1882 + */ 1883 + static void ice_wait_on_vf_reset(struct ice_vf *vf) 1884 + { 1885 + int i; 1886 + 1887 + for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) { 1888 + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) 1889 + break; 1890 + msleep(ICE_MAX_VF_RESET_SLEEP_MS); 1891 + } 1892 + } 1893 + 1894 + /** 1895 + * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried 1896 + * @vf: VF to check if it's ready to be configured/queried 1897 + * 1898 + * The purpose of this function is to make sure the VF is not in reset, not 1899 + * disabled, and initialized so it can be configured and/or queried by a host 1900 + * administrator. 1901 + */ 1902 + static int ice_check_vf_ready_for_cfg(struct ice_vf *vf) 1903 + { 1904 + struct ice_pf *pf; 1905 + 1906 + ice_wait_on_vf_reset(vf); 1907 + 1908 + if (ice_is_vf_disabled(vf)) 1909 + return -EINVAL; 1910 + 1911 + pf = vf->pf; 1912 + if (ice_check_vf_init(pf, vf)) 1913 + return -EBUSY; 1914 + 1915 + return 0; 1916 + } 1917 + 1918 + /** 1877 1919 * ice_set_vf_spoofchk 1878 1920 * @netdev: network interface device structure 1879 1921 * @vf_id: VF identifier ··· 1932 1890 enum ice_status status; 1933 1891 struct device *dev; 1934 1892 struct ice_vf *vf; 1935 - int ret = 0; 1893 + int ret; 1936 1894 1937 1895 dev = ice_pf_to_dev(pf); 1938 1896 if (ice_validate_vf_id(pf, vf_id)) 1939 1897 return -EINVAL; 1940 1898 1941 1899 vf = &pf->vf[vf_id]; 1942 - 1943 - if (ice_check_vf_init(pf, vf)) 1944 - return -EBUSY; 1900 + ret = ice_check_vf_ready_for_cfg(vf); 1901 + if (ret) 1902 + return ret; 1945 1903 1946 1904 vf_vsi = pf->vsi[vf->lan_vsi_idx]; 1947 1905 if (!vf_vsi) { ··· 2738 2696 struct ice_vsi *vsi; 2739 2697 struct device *dev; 2740 2698 struct ice_vf *vf; 2741 - int ret = 0; 2699 + int ret; 2742 2700 2743 2701 dev = ice_pf_to_dev(pf); 2744 2702 if (ice_validate_vf_id(pf, vf_id)) ··· 2756 2714 2757 2715 vf = &pf->vf[vf_id]; 2758 2716 vsi = pf->vsi[vf->lan_vsi_idx]; 2759 - if (ice_check_vf_init(pf, vf)) 2760 - return -EBUSY; 2717 + 2718 + ret = ice_check_vf_ready_for_cfg(vf); 2719 + if (ret) 2720 + return ret; 2761 2721 2762 2722 if (le16_to_cpu(vsi->info.pvid) == vlanprio) { 2763 2723 /* duplicate request, so just return success */ 2764 2724 dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); 2765 - return ret; 2725 + return 0; 2766 2726 } 2767 2727 2768 2728 /* If PVID, then remove all filters on the old VLAN */ ··· 2775 2731 if (vlan_id || qos) { 2776 2732 ret = ice_vsi_manage_pvid(vsi, vlanprio, true); 2777 2733 if (ret) 2778 - goto error_set_pvid; 2734 + return ret; 2779 2735 } else { 2780 2736 ice_vsi_manage_pvid(vsi, 0, false); 2781 2737 vsi->info.pvid = 0; ··· 2788 2744 /* add new VLAN filter for each MAC */ 2789 2745 ret = ice_vsi_add_vlan(vsi, vlan_id); 2790 2746 if (ret) 2791 - goto error_set_pvid; 2747 + return ret; 2792 2748 } 2793 2749 2794 2750 /* The Port VLAN needs to be saved across resets the same as the ··· 2796 2752 */ 2797 2753 vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); 2798 2754 2799 - error_set_pvid: 2800 - return ret; 2755 + return 0; 2801 2756 } 2802 2757 2803 2758 /** ··· 3280 3237 } 3281 3238 3282 3239 /** 3283 - * ice_wait_on_vf_reset 3284 - * @vf: The VF being resseting 3285 - * 3286 - * Poll to make sure a given VF is ready after reset 3287 - */ 3288 - static void ice_wait_on_vf_reset(struct ice_vf *vf) 3289 - { 3290 - int i; 3291 - 3292 - for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) { 3293 - if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) 3294 - break; 3295 - msleep(20); 3296 - } 3297 - } 3298 - 3299 - /** 3300 3240 * ice_set_vf_mac 3301 3241 * @netdev: network interface device structure 3302 3242 * @vf_id: VF identifier ··· 3291 3265 { 3292 3266 struct ice_pf *pf = ice_netdev_to_pf(netdev); 3293 3267 struct ice_vf *vf; 3294 - int ret = 0; 3268 + int ret; 3295 3269 3296 3270 if (ice_validate_vf_id(pf, vf_id)) 3297 3271 return -EINVAL; 3298 - 3299 - vf = &pf->vf[vf_id]; 3300 - /* Don't set MAC on disabled VF */ 3301 - if (ice_is_vf_disabled(vf)) 3302 - return -EINVAL; 3303 - 3304 - /* In case VF is in reset mode, wait until it is completed. Depending 3305 - * on factors like queue disabling routine, this could take ~250ms 3306 - */ 3307 - ice_wait_on_vf_reset(vf); 3308 - 3309 - if (ice_check_vf_init(pf, vf)) 3310 - return -EBUSY; 3311 3272 3312 3273 if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) { 3313 3274 netdev_err(netdev, "%pM not a valid unicast address\n", mac); 3314 3275 return -EINVAL; 3315 3276 } 3277 + 3278 + vf = &pf->vf[vf_id]; 3279 + ret = ice_check_vf_ready_for_cfg(vf); 3280 + if (ret) 3281 + return ret; 3316 3282 3317 3283 /* copy MAC into dflt_lan_addr and trigger a VF reset. The reset 3318 3284 * flow will use the updated dflt_lan_addr and add a MAC filter ··· 3317 3299 vf_id, mac); 3318 3300 3319 3301 ice_vc_reset_vf(vf); 3320 - return ret; 3302 + return 0; 3321 3303 } 3322 3304 3323 3305 /** ··· 3332 3314 { 3333 3315 struct ice_pf *pf = ice_netdev_to_pf(netdev); 3334 3316 struct ice_vf *vf; 3317 + int ret; 3335 3318 3336 3319 if (ice_validate_vf_id(pf, vf_id)) 3337 3320 return -EINVAL; 3338 3321 3339 3322 vf = &pf->vf[vf_id]; 3340 - /* Don't set Trusted Mode on disabled VF */ 3341 - if (ice_is_vf_disabled(vf)) 3342 - return -EINVAL; 3343 - 3344 - /* In case VF is in reset mode, wait until it is completed. Depending 3345 - * on factors like queue disabling routine, this could take ~250ms 3346 - */ 3347 - ice_wait_on_vf_reset(vf); 3348 - 3349 - if (ice_check_vf_init(pf, vf)) 3350 - return -EBUSY; 3323 + ret = ice_check_vf_ready_for_cfg(vf); 3324 + if (ret) 3325 + return ret; 3351 3326 3352 3327 /* Check if already trusted */ 3353 3328 if (trusted == vf->trusted) ··· 3366 3355 { 3367 3356 struct ice_pf *pf = ice_netdev_to_pf(netdev); 3368 3357 struct ice_vf *vf; 3358 + int ret; 3369 3359 3370 3360 if (ice_validate_vf_id(pf, vf_id)) 3371 3361 return -EINVAL; 3372 3362 3373 3363 vf = &pf->vf[vf_id]; 3374 - if (ice_check_vf_init(pf, vf)) 3375 - return -EBUSY; 3364 + ret = ice_check_vf_ready_for_cfg(vf); 3365 + if (ret) 3366 + return ret; 3376 3367 3377 3368 switch (link_state) { 3378 3369 case IFLA_VF_LINK_STATE_AUTO: ··· 3410 3397 struct ice_eth_stats *stats; 3411 3398 struct ice_vsi *vsi; 3412 3399 struct ice_vf *vf; 3400 + int ret; 3413 3401 3414 3402 if (ice_validate_vf_id(pf, vf_id)) 3415 3403 return -EINVAL; 3416 3404 3417 3405 vf = &pf->vf[vf_id]; 3418 - 3419 - if (ice_check_vf_init(pf, vf)) 3420 - return -EBUSY; 3406 + ret = ice_check_vf_ready_for_cfg(vf); 3407 + if (ret) 3408 + return ret; 3421 3409 3422 3410 vsi = pf->vsi[vf->lan_vsi_idx]; 3423 3411 if (!vsi)
+2 -1
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
··· 38 38 #define ICE_MAX_POLICY_INTR_PER_VF 33 39 39 #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) 40 40 #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) 41 - #define ICE_MAX_VF_RESET_WAIT 15 41 + #define ICE_MAX_VF_RESET_TRIES 40 42 + #define ICE_MAX_VF_RESET_SLEEP_MS 20 42 43 43 44 #define ice_for_each_vf(pf, i) \ 44 45 for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++)
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en/health.c
··· 200 200 netdev_err(priv->netdev, err_str); 201 201 202 202 if (!reporter) 203 - return err_ctx->recover(&err_ctx->ctx); 203 + return err_ctx->recover(err_ctx->ctx); 204 204 205 205 return devlink_health_report(reporter, err_str, err_ctx); 206 206 }
+8
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
··· 179 179 } 180 180 } 181 181 182 + static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) 183 + { 184 + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) 185 + mlx5_wq_ll_reset(&rq->mpwqe.wq); 186 + else 187 + mlx5_wq_cyc_reset(&rq->wqe.wq); 188 + } 189 + 182 190 /* SW parser related functions */ 183 191 184 192 struct mlx5e_swp_spec {
+3
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 712 712 if (!in) 713 713 return -ENOMEM; 714 714 715 + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) 716 + mlx5e_rqwq_reset(rq); 717 + 715 718 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 716 719 717 720 MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+8 -12
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
··· 459 459 460 460 static int esw_legacy_enable(struct mlx5_eswitch *esw) 461 461 { 462 - int ret; 462 + struct mlx5_vport *vport; 463 + int ret, i; 463 464 464 465 ret = esw_create_legacy_table(esw); 465 466 if (ret) 466 467 return ret; 468 + 469 + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) 470 + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; 467 471 468 472 ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); 469 473 if (ret) ··· 2456 2452 2457 2453 int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) 2458 2454 { 2459 - int err = 0; 2460 - 2461 2455 if (!esw) 2462 2456 return -EOPNOTSUPP; 2463 2457 2464 2458 if (!ESW_ALLOWED(esw)) 2465 2459 return -EPERM; 2466 2460 2467 - mutex_lock(&esw->state_lock); 2468 - if (esw->mode != MLX5_ESWITCH_LEGACY) { 2469 - err = -EOPNOTSUPP; 2470 - goto out; 2471 - } 2461 + if (esw->mode != MLX5_ESWITCH_LEGACY) 2462 + return -EOPNOTSUPP; 2472 2463 2473 2464 *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; 2474 - 2475 - out: 2476 - mutex_unlock(&esw->state_lock); 2477 - return err; 2465 + return 0; 2478 2466 } 2479 2467 2480 2468 int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
··· 1172 1172 return -EINVAL; 1173 1173 } 1174 1174 1175 - mlx5_eswitch_disable(esw, true); 1175 + mlx5_eswitch_disable(esw, false); 1176 1176 mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); 1177 1177 err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); 1178 1178 if (err) { ··· 2065 2065 { 2066 2066 int err, err1; 2067 2067 2068 - mlx5_eswitch_disable(esw, true); 2068 + mlx5_eswitch_disable(esw, false); 2069 2069 err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); 2070 2070 if (err) { 2071 2071 NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
··· 35 35 static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, 36 36 1 * 1024 * 1024, 37 37 64 * 1024, 38 - 4 * 1024, }; 38 + 128 }; 39 39 40 40 struct mlx5_esw_chains_priv { 41 41 struct rhashtable chains_ht;
+4 -1
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
··· 2307 2307 struct mlx5dr_cmd_vport_cap *vport_cap; 2308 2308 struct mlx5dr_domain *dmn = sb->dmn; 2309 2309 struct mlx5dr_cmd_caps *caps; 2310 + u8 *bit_mask = sb->bit_mask; 2310 2311 u8 *tag = hw_ste->tag; 2312 + bool source_gvmi_set; 2311 2313 2312 2314 DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); 2313 2315 ··· 2330 2328 if (!vport_cap) 2331 2329 return -EINVAL; 2332 2330 2333 - if (vport_cap->vport_gvmi) 2331 + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); 2332 + if (vport_cap->vport_gvmi && source_gvmi_set) 2334 2333 MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); 2335 2334 2336 2335 misc->source_eswitch_owner_vhca_id = 0;
+7 -2
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
··· 66 66 struct mlx5_flow_table *next_ft) 67 67 { 68 68 struct mlx5dr_table *tbl; 69 + u32 flags; 69 70 int err; 70 71 71 72 if (mlx5_dr_is_fw_table(ft->flags)) 72 73 return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, 73 74 log_size, 74 75 next_ft); 76 + flags = ft->flags; 77 + /* turn off encap/decap if not supported for sw-str by fw */ 78 + if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported)) 79 + flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | 80 + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); 75 81 76 - tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, 77 - ft->level, ft->flags); 82 + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags); 78 83 if (!tbl) { 79 84 mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); 80 85 return -EINVAL;
+30 -9
drivers/net/ethernet/mellanox/mlx5/core/wq.c
··· 94 94 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); 95 95 } 96 96 97 + void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) 98 + { 99 + wq->wqe_ctr = 0; 100 + wq->cur_sz = 0; 101 + mlx5_wq_cyc_update_db_record(wq); 102 + } 103 + 97 104 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, 98 105 void *qpc, struct mlx5_wq_qp *wq, 99 106 struct mlx5_wq_ctrl *wq_ctrl) ··· 199 192 return err; 200 193 } 201 194 195 + static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) 196 + { 197 + struct mlx5_wqe_srq_next_seg *next_seg; 198 + int i; 199 + 200 + for (i = 0; i < wq->fbc.sz_m1; i++) { 201 + next_seg = mlx5_wq_ll_get_wqe(wq, i); 202 + next_seg->next_wqe_index = cpu_to_be16(i + 1); 203 + } 204 + next_seg = mlx5_wq_ll_get_wqe(wq, i); 205 + wq->tail_next = &next_seg->next_wqe_index; 206 + } 207 + 202 208 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, 203 209 void *wqc, struct mlx5_wq_ll *wq, 204 210 struct mlx5_wq_ctrl *wq_ctrl) ··· 219 199 u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); 220 200 u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); 221 201 struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; 222 - struct mlx5_wqe_srq_next_seg *next_seg; 223 202 int err; 224 - int i; 225 203 226 204 err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); 227 205 if (err) { ··· 238 220 239 221 mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); 240 222 241 - for (i = 0; i < fbc->sz_m1; i++) { 242 - next_seg = mlx5_wq_ll_get_wqe(wq, i); 243 - next_seg->next_wqe_index = cpu_to_be16(i + 1); 244 - } 245 - next_seg = mlx5_wq_ll_get_wqe(wq, i); 246 - wq->tail_next = &next_seg->next_wqe_index; 247 - 223 + mlx5_wq_ll_init_list(wq); 248 224 wq_ctrl->mdev = mdev; 249 225 250 226 return 0; ··· 247 235 mlx5_db_free(mdev, &wq_ctrl->db); 248 236 249 237 return err; 238 + } 239 + 240 + void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) 241 + { 242 + wq->head = 0; 243 + wq->wqe_ctr = 0; 244 + wq->cur_sz = 0; 245 + mlx5_wq_ll_init_list(wq); 246 + mlx5_wq_ll_update_db_record(wq); 250 247 } 251 248 252 249 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
+2
drivers/net/ethernet/mellanox/mlx5/core/wq.h
··· 80 80 void *wqc, struct mlx5_wq_cyc *wq, 81 81 struct mlx5_wq_ctrl *wq_ctrl); 82 82 void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); 83 + void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); 83 84 84 85 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, 85 86 void *qpc, struct mlx5_wq_qp *wq, ··· 93 92 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, 94 93 void *wqc, struct mlx5_wq_ll *wq, 95 94 struct mlx5_wq_ctrl *wq_ctrl); 95 + void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); 96 96 97 97 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); 98 98
+9 -44
drivers/net/ethernet/micrel/ks8851_mll.c
··· 157 157 */ 158 158 159 159 /** 160 - * ks_rdreg8 - read 8 bit register from device 161 - * @ks : The chip information 162 - * @offset: The register address 163 - * 164 - * Read a 8bit register from the chip, returning the result 165 - */ 166 - static u8 ks_rdreg8(struct ks_net *ks, int offset) 167 - { 168 - u16 data; 169 - u8 shift_bit = offset & 0x03; 170 - u8 shift_data = (offset & 1) << 3; 171 - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); 172 - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); 173 - data = ioread16(ks->hw_addr); 174 - return (u8)(data >> shift_data); 175 - } 176 - 177 - /** 178 160 * ks_rdreg16 - read 16 bit register from device 179 161 * @ks : The chip information 180 162 * @offset: The register address ··· 166 184 167 185 static u16 ks_rdreg16(struct ks_net *ks, int offset) 168 186 { 169 - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); 187 + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); 170 188 iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); 171 189 return ioread16(ks->hw_addr); 172 - } 173 - 174 - /** 175 - * ks_wrreg8 - write 8bit register value to chip 176 - * @ks: The chip information 177 - * @offset: The register address 178 - * @value: The value to write 179 - * 180 - */ 181 - static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) 182 - { 183 - u8 shift_bit = (offset & 0x03); 184 - u16 value_write = (u16)(value << ((offset & 1) << 3)); 185 - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); 186 - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); 187 - iowrite16(value_write, ks->hw_addr); 188 190 } 189 191 190 192 /** ··· 181 215 182 216 static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) 183 217 { 184 - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); 218 + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); 185 219 iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); 186 220 iowrite16(value, ks->hw_addr); 187 221 } ··· 197 231 { 198 232 len >>= 1; 199 233 while (len--) 200 - *wptr++ = (u16)ioread16(ks->hw_addr); 234 + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); 201 235 } 202 236 203 237 /** ··· 211 245 { 212 246 len >>= 1; 213 247 while (len--) 214 - iowrite16(*wptr++, ks->hw_addr); 248 + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); 215 249 } 216 250 217 251 static void ks_disable_int(struct ks_net *ks) ··· 290 324 u16 reg_data = 0; 291 325 292 326 /* Regardless of bus width, 8 bit read should always work.*/ 293 - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; 294 - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; 327 + reg_data = ks_rdreg16(ks, KS_CCR); 295 328 296 329 /* addr/data bus are multiplexed */ 297 330 ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; ··· 394 429 395 430 /* 1. set sudo DMA mode */ 396 431 ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); 397 - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); 432 + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); 398 433 399 434 /* 2. read prepend data */ 400 435 /** ··· 411 446 ks_inblk(ks, buf, ALIGN(len, 4)); 412 447 413 448 /* 4. reset sudo DMA Mode */ 414 - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); 449 + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); 415 450 } 416 451 417 452 /** ··· 644 679 ks->txh.txw[1] = cpu_to_le16(len); 645 680 646 681 /* 1. set sudo-DMA mode */ 647 - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); 682 + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); 648 683 /* 2. write status/lenth info */ 649 684 ks_outblk(ks, ks->txh.txw, 4); 650 685 /* 3. write pkt data */ 651 686 ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); 652 687 /* 4. reset sudo-DMA mode */ 653 - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); 688 + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); 654 689 /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ 655 690 ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); 656 691 /* 6. wait until TXQCR_METFE is auto-cleared */
+8
drivers/net/ethernet/mscc/ocelot_board.c
··· 114 114 if (err != 4) 115 115 break; 116 116 117 + /* At this point the IFH was read correctly, so it is safe to 118 + * presume that there is no error. The err needs to be reset 119 + * otherwise a frame could come in CPU queue between the while 120 + * condition and the check for error later on. And in that case 121 + * the new frame is just removed and not processed. 122 + */ 123 + err = 0; 124 + 117 125 ocelot_parse_ifh(ifh, &info); 118 126 119 127 ocelot_port = ocelot->ports[info.port];
+7 -4
drivers/net/ethernet/pensando/ionic/ionic_dev.c
··· 103 103 { 104 104 struct ionic_dev *idev = &ionic->idev; 105 105 unsigned long hb_time; 106 - u32 fw_status; 106 + u8 fw_status; 107 107 u32 hb; 108 108 109 109 /* wait a little more than one second before testing again */ ··· 111 111 if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) 112 112 return 0; 113 113 114 - /* firmware is useful only if fw_status is non-zero */ 115 - fw_status = ioread32(&idev->dev_info_regs->fw_status); 116 - if (!fw_status) 114 + /* firmware is useful only if the running bit is set and 115 + * fw_status != 0xff (bad PCI read) 116 + */ 117 + fw_status = ioread8(&idev->dev_info_regs->fw_status); 118 + if (fw_status == 0xff || 119 + !(fw_status & IONIC_FW_STS_F_RUNNING)) 117 120 return -ENXIO; 118 121 119 122 /* early FW has no heartbeat, else FW will return non-zero */
+1
drivers/net/ethernet/pensando/ionic/ionic_if.h
··· 2445 2445 u8 version; 2446 2446 u8 asic_type; 2447 2447 u8 asic_rev; 2448 + #define IONIC_FW_STS_F_RUNNING 0x1 2448 2449 u8 fw_status; 2449 2450 u32 fw_heartbeat; 2450 2451 char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
+2
drivers/net/ethernet/qlogic/qede/qede.h
··· 163 163 struct list_head entry; 164 164 struct list_head rdma_event_list; 165 165 struct workqueue_struct *rdma_wq; 166 + struct kref refcnt; 167 + struct completion event_comp; 166 168 bool exp_recovery; 167 169 }; 168 170
+28 -1
drivers/net/ethernet/qlogic/qede/qede_rdma.c
··· 59 59 static int qede_rdma_create_wq(struct qede_dev *edev) 60 60 { 61 61 INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); 62 + kref_init(&edev->rdma_info.refcnt); 63 + init_completion(&edev->rdma_info.event_comp); 64 + 62 65 edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); 63 66 if (!edev->rdma_info.rdma_wq) { 64 67 DP_NOTICE(edev, "qedr: Could not create workqueue\n"); ··· 86 83 } 87 84 } 88 85 86 + static void qede_rdma_complete_event(struct kref *ref) 87 + { 88 + struct qede_rdma_dev *rdma_dev = 89 + container_of(ref, struct qede_rdma_dev, refcnt); 90 + 91 + /* no more events will be added after this */ 92 + complete(&rdma_dev->event_comp); 93 + } 94 + 89 95 static void qede_rdma_destroy_wq(struct qede_dev *edev) 90 96 { 97 + /* Avoid race with add_event flow, make sure it finishes before 98 + * we start accessing the list and cleaning up the work 99 + */ 100 + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); 101 + wait_for_completion(&edev->rdma_info.event_comp); 102 + 91 103 qede_rdma_cleanup_event(edev); 92 104 destroy_workqueue(edev->rdma_info.rdma_wq); 93 105 } ··· 328 310 if (!edev->rdma_info.qedr_dev) 329 311 return; 330 312 313 + /* We don't want the cleanup flow to start while we're allocating and 314 + * scheduling the work 315 + */ 316 + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) 317 + return; /* already being destroyed */ 318 + 331 319 event_node = qede_rdma_get_free_event_node(edev); 332 320 if (!event_node) 333 - return; 321 + goto out; 334 322 335 323 event_node->event = event; 336 324 event_node->ptr = edev; 337 325 338 326 INIT_WORK(&event_node->work, qede_rdma_handle_event); 339 327 queue_work(edev->rdma_info.rdma_wq, &event_node->work); 328 + 329 + out: 330 + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); 340 331 } 341 332 342 333 void qede_rdma_dev_event_open(struct qede_dev *edev)
+2 -2
drivers/net/phy/broadcom.c
··· 410 410 struct device_node *np = phydev->mdio.dev.of_node; 411 411 int ret; 412 412 413 - /* Aneg firsly. */ 413 + /* Aneg firstly. */ 414 414 ret = genphy_config_aneg(phydev); 415 415 416 416 /* Then we can set up the delay. */ ··· 463 463 { 464 464 int ret; 465 465 466 - /* Aneg firsly. */ 466 + /* Aneg firstly. */ 467 467 if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) 468 468 ret = genphy_c37_config_aneg(phydev); 469 469 else
+20
drivers/net/phy/mdio-bcm-iproc.c
··· 178 178 return 0; 179 179 } 180 180 181 + #ifdef CONFIG_PM_SLEEP 182 + int iproc_mdio_resume(struct device *dev) 183 + { 184 + struct platform_device *pdev = to_platform_device(dev); 185 + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); 186 + 187 + /* restore the mii clock configuration */ 188 + iproc_mdio_config_clk(priv->base); 189 + 190 + return 0; 191 + } 192 + 193 + static const struct dev_pm_ops iproc_mdio_pm_ops = { 194 + .resume = iproc_mdio_resume 195 + }; 196 + #endif /* CONFIG_PM_SLEEP */ 197 + 181 198 static const struct of_device_id iproc_mdio_of_match[] = { 182 199 { .compatible = "brcm,iproc-mdio", }, 183 200 { /* sentinel */ }, ··· 205 188 .driver = { 206 189 .name = "iproc-mdio", 207 190 .of_match_table = iproc_mdio_of_match, 191 + #ifdef CONFIG_PM_SLEEP 192 + .pm = &iproc_mdio_pm_ops, 193 + #endif 208 194 }, 209 195 .probe = iproc_mdio_probe, 210 196 .remove = iproc_mdio_remove,
+4 -3
drivers/net/wireguard/device.c
··· 258 258 enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | 259 259 NETIF_F_SG | NETIF_F_GSO | 260 260 NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; 261 + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + 262 + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); 261 263 262 264 dev->netdev_ops = &netdev_ops; 263 265 dev->hard_header_len = 0; ··· 273 271 dev->features |= WG_NETDEV_FEATURES; 274 272 dev->hw_features |= WG_NETDEV_FEATURES; 275 273 dev->hw_enc_features |= WG_NETDEV_FEATURES; 276 - dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - 277 - sizeof(struct udphdr) - 278 - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); 274 + dev->mtu = ETH_DATA_LEN - overhead; 275 + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; 279 276 280 277 SET_NETDEV_DEVTYPE(dev, &device_type); 281 278
+5 -2
drivers/net/wireguard/receive.c
··· 118 118 119 119 under_load = skb_queue_len(&wg->incoming_handshakes) >= 120 120 MAX_QUEUED_INCOMING_HANDSHAKES / 8; 121 - if (under_load) 121 + if (under_load) { 122 122 last_under_load = ktime_get_coarse_boottime_ns(); 123 - else if (last_under_load) 123 + } else if (last_under_load) { 124 124 under_load = !wg_birthdate_has_expired(last_under_load, 1); 125 + if (!under_load) 126 + last_under_load = 0; 127 + } 125 128 mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, 126 129 under_load); 127 130 if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
+11 -5
drivers/net/wireguard/send.c
··· 143 143 144 144 static unsigned int calculate_skb_padding(struct sk_buff *skb) 145 145 { 146 + unsigned int padded_size, last_unit = skb->len; 147 + 148 + if (unlikely(!PACKET_CB(skb)->mtu)) 149 + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; 150 + 146 151 /* We do this modulo business with the MTU, just in case the networking 147 152 * layer gives us a packet that's bigger than the MTU. In that case, we 148 153 * wouldn't want the final subtraction to overflow in the case of the 149 - * padded_size being clamped. 154 + * padded_size being clamped. Fortunately, that's very rarely the case, 155 + * so we optimize for that not happening. 150 156 */ 151 - unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; 152 - unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); 157 + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) 158 + last_unit %= PACKET_CB(skb)->mtu; 153 159 154 - if (padded_size > PACKET_CB(skb)->mtu) 155 - padded_size = PACKET_CB(skb)->mtu; 160 + padded_size = min(PACKET_CB(skb)->mtu, 161 + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); 156 162 return padded_size - last_unit; 157 163 } 158 164
-1
drivers/net/wireguard/socket.c
··· 432 432 wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); 433 433 mutex_unlock(&wg->socket_update_lock); 434 434 synchronize_rcu(); 435 - synchronize_net(); 436 435 sock_free(old4); 437 436 sock_free(old6); 438 437 }
+1
drivers/nfc/pn544/i2c.c
··· 225 225 226 226 out: 227 227 gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); 228 + usleep_range(10000, 15000); 228 229 } 229 230 230 231 static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode)
+1 -1
drivers/nfc/pn544/pn544.c
··· 682 682 static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, 683 683 struct nfc_target *target) 684 684 { 685 - pr_debug("supported protocol %d\b", target->supported_protocols); 685 + pr_debug("supported protocol %d\n", target->supported_protocols); 686 686 if (target->supported_protocols & (NFC_PROTO_ISO14443_MASK | 687 687 NFC_PROTO_ISO14443_B_MASK)) { 688 688 return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+1 -2
drivers/s390/net/qeth_core_main.c
··· 5344 5344 } 5345 5345 5346 5346 use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) || 5347 - ((skb_len >= card->options.rx_sg_cb) && 5347 + (skb_len > card->options.rx_sg_cb && 5348 5348 !atomic_read(&card->force_alloc_skb) && 5349 5349 !IS_OSN(card)); 5350 5350 ··· 5447 5447 { 5448 5448 int work_done = 0; 5449 5449 5450 - WARN_ON_ONCE(!budget); 5451 5450 *done = false; 5452 5451 5453 5452 while (budget) {
+13 -16
drivers/s390/net/qeth_l2_main.c
··· 1707 1707 1708 1708 QETH_CARD_TEXT(card, 2, "vniccsch"); 1709 1709 1710 - /* do not change anything if BridgePort is enabled */ 1711 - if (qeth_bridgeport_is_in_use(card)) 1712 - return -EBUSY; 1713 - 1714 1710 /* check if characteristic and enable/disable are supported */ 1715 1711 if (!(card->options.vnicc.sup_chars & vnicc) || 1716 1712 !(card->options.vnicc.set_char_sup & vnicc)) 1717 1713 return -EOPNOTSUPP; 1714 + 1715 + if (qeth_bridgeport_is_in_use(card)) 1716 + return -EBUSY; 1718 1717 1719 1718 /* set enable/disable command and store wanted characteristic */ 1720 1719 if (state) { ··· 1760 1761 1761 1762 QETH_CARD_TEXT(card, 2, "vniccgch"); 1762 1763 1763 - /* do not get anything if BridgePort is enabled */ 1764 - if (qeth_bridgeport_is_in_use(card)) 1765 - return -EBUSY; 1766 - 1767 1764 /* check if characteristic is supported */ 1768 1765 if (!(card->options.vnicc.sup_chars & vnicc)) 1769 1766 return -EOPNOTSUPP; 1767 + 1768 + if (qeth_bridgeport_is_in_use(card)) 1769 + return -EBUSY; 1770 1770 1771 1771 /* if card is ready, query current VNICC state */ 1772 1772 if (qeth_card_hw_is_reachable(card)) ··· 1784 1786 1785 1787 QETH_CARD_TEXT(card, 2, "vniccsto"); 1786 1788 1787 - /* do not change anything if BridgePort is enabled */ 1788 - if (qeth_bridgeport_is_in_use(card)) 1789 - return -EBUSY; 1790 - 1791 1789 /* check if characteristic and set_timeout are supported */ 1792 1790 if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || 1793 1791 !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) 1794 1792 return -EOPNOTSUPP; 1793 + 1794 + if (qeth_bridgeport_is_in_use(card)) 1795 + return -EBUSY; 1795 1796 1796 1797 /* do we need to do anything? */ 1797 1798 if (card->options.vnicc.learning_timeout == timeout) ··· 1820 1823 1821 1824 QETH_CARD_TEXT(card, 2, "vniccgto"); 1822 1825 1823 - /* do not get anything if BridgePort is enabled */ 1824 - if (qeth_bridgeport_is_in_use(card)) 1825 - return -EBUSY; 1826 - 1827 1826 /* check if characteristic and get_timeout are supported */ 1828 1827 if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || 1829 1828 !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) 1830 1829 return -EOPNOTSUPP; 1830 + 1831 + if (qeth_bridgeport_is_in_use(card)) 1832 + return -EBUSY; 1833 + 1831 1834 /* if card is ready, get timeout. Otherwise, just return stored value */ 1832 1835 *timeout = card->options.vnicc.learning_timeout; 1833 1836 if (qeth_card_hw_is_reachable(card))
+4 -1
include/linux/mlx5/mlx5_ifc.h
··· 688 688 u8 nic_rx_multi_path_tirs[0x1]; 689 689 u8 nic_rx_multi_path_tirs_fts[0x1]; 690 690 u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; 691 - u8 reserved_at_3[0x1d]; 691 + u8 reserved_at_3[0x4]; 692 + u8 sw_owner_reformat_supported[0x1]; 693 + u8 reserved_at_8[0x18]; 694 + 692 695 u8 encap_general_header[0x1]; 693 696 u8 reserved_at_21[0xa]; 694 697 u8 log_max_packet_reformat_context[0x5];
+3 -4
include/linux/netdevice.h
··· 72 72 #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ 73 73 #define NET_RX_DROP 1 /* packet dropped */ 74 74 75 + #define MAX_NEST_DEV 8 76 + 75 77 /* 76 78 * Transmit return codes: transmit return codes originate from three different 77 79 * namespaces: ··· 4391 4389 ldev; \ 4392 4390 ldev = netdev_lower_get_next(dev, &(iter))) 4393 4391 4394 - struct net_device *netdev_all_lower_get_next(struct net_device *dev, 4392 + struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, 4395 4393 struct list_head **iter); 4396 - struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, 4397 - struct list_head **iter); 4398 - 4399 4394 int netdev_walk_all_lower_dev(struct net_device *dev, 4400 4395 int (*fn)(struct net_device *lower_dev, 4401 4396 void *data),
+7
include/linux/rculist_nulls.h
··· 145 145 } 146 146 } 147 147 148 + /* after that hlist_nulls_del will work */ 149 + static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) 150 + { 151 + n->pprev = &n->next; 152 + n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); 153 + } 154 + 148 155 /** 149 156 * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type 150 157 * @tpos: the type * to use as a loop cursor.
+30
include/linux/skbuff.h
··· 611 611 * @next: Next buffer in list 612 612 * @prev: Previous buffer in list 613 613 * @tstamp: Time we arrived/left 614 + * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point 615 + * for retransmit timer 614 616 * @rbnode: RB tree node, alternative to next/prev for netem/tcp 617 + * @list: queue head 615 618 * @sk: Socket we are owned by 619 + * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in 620 + * fragmentation management 616 621 * @dev: Device we arrived on/are leaving by 622 + * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL 617 623 * @cb: Control buffer. Free for use by every layer. Put private vars here 618 624 * @_skb_refdst: destination entry (with norefcount bit) 619 625 * @sp: the security path, used for xfrm ··· 638 632 * @pkt_type: Packet class 639 633 * @fclone: skbuff clone status 640 634 * @ipvs_property: skbuff is owned by ipvs 635 + * @inner_protocol_type: whether the inner protocol is 636 + * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO 637 + * @remcsum_offload: remote checksum offload is enabled 641 638 * @offload_fwd_mark: Packet was L2-forwarded in hardware 642 639 * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware 643 640 * @tc_skip_classify: do not classify packet. set by IFB device ··· 659 650 * @tc_index: Traffic control index 660 651 * @hash: the packet hash 661 652 * @queue_mapping: Queue mapping for multiqueue devices 653 + * @head_frag: skb was allocated from page fragments, 654 + * not allocated by kmalloc() or vmalloc(). 662 655 * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves 663 656 * @active_extensions: active extensions (skb_ext_id types) 664 657 * @ndisc_nodetype: router type (from link layer) ··· 671 660 * @wifi_acked_valid: wifi_acked was set 672 661 * @wifi_acked: whether frame was acked on wifi or not 673 662 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS 663 + * @encapsulation: indicates the inner headers in the skbuff are valid 664 + * @encap_hdr_csum: software checksum is needed 665 + * @csum_valid: checksum is already valid 674 666 * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL 667 + * @csum_complete_sw: checksum was completed by software 668 + * @csum_level: indicates the number of consecutive checksums found in 669 + * the packet minus one that have been verified as 670 + * CHECKSUM_UNNECESSARY (max 3) 675 671 * @dst_pending_confirm: need to confirm neighbour 676 672 * @decrypted: Decrypted SKB 677 673 * @napi_id: id of the NAPI struct this skb came from 674 + * @sender_cpu: (aka @napi_id) source CPU in XPS 678 675 * @secmark: security marking 679 676 * @mark: Generic packet mark 677 + * @reserved_tailroom: (aka @mark) number of bytes of free space available 678 + * at the tail of an sk_buff 679 + * @vlan_present: VLAN tag is present 680 680 * @vlan_proto: vlan encapsulation protocol 681 681 * @vlan_tci: vlan tag control information 682 682 * @inner_protocol: Protocol (encapsulation) 683 + * @inner_ipproto: (aka @inner_protocol) stores ipproto when 684 + * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; 683 685 * @inner_transport_header: Inner transport layer header (encapsulation) 684 686 * @inner_network_header: Network layer header (encapsulation) 685 687 * @inner_mac_header: Link layer header (encapsulation) ··· 774 750 #endif 775 751 #define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) 776 752 753 + /* private: */ 777 754 __u8 __cloned_offset[0]; 755 + /* public: */ 778 756 __u8 cloned:1, 779 757 nohdr:1, 780 758 fclone:2, ··· 801 775 #endif 802 776 #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) 803 777 778 + /* private: */ 804 779 __u8 __pkt_type_offset[0]; 780 + /* public: */ 805 781 __u8 pkt_type:3; 806 782 __u8 ignore_df:1; 807 783 __u8 nf_trace:1; ··· 826 798 #define PKT_VLAN_PRESENT_BIT 0 827 799 #endif 828 800 #define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) 801 + /* private: */ 829 802 __u8 __pkt_vlan_present_offset[0]; 803 + /* public: */ 830 804 __u8 vlan_present:1; 831 805 __u8 csum_complete_sw:1; 832 806 __u8 csum_level:2;
+9
include/net/flow_dissector.h
··· 5 5 #include <linux/types.h> 6 6 #include <linux/in6.h> 7 7 #include <linux/siphash.h> 8 + #include <linux/string.h> 8 9 #include <uapi/linux/if_ether.h> 9 10 10 11 struct sk_buff; ··· 348 347 void *data; 349 348 void *data_end; 350 349 }; 350 + 351 + static inline void 352 + flow_dissector_init_keys(struct flow_dissector_key_control *key_control, 353 + struct flow_dissector_key_basic *key_basic) 354 + { 355 + memset(key_control, 0, sizeof(*key_control)); 356 + memset(key_basic, 0, sizeof(*key_basic)); 357 + } 351 358 352 359 #endif
+33 -5
include/net/sock.h
··· 117 117 * struct sock_common - minimal network layer representation of sockets 118 118 * @skc_daddr: Foreign IPv4 addr 119 119 * @skc_rcv_saddr: Bound local IPv4 addr 120 + * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr 120 121 * @skc_hash: hash value used with various protocol lookup tables 121 122 * @skc_u16hashes: two u16 hash values used by UDP lookup tables 122 123 * @skc_dport: placeholder for inet_dport/tw_dport 123 124 * @skc_num: placeholder for inet_num/tw_num 125 + * @skc_portpair: __u32 union of @skc_dport & @skc_num 124 126 * @skc_family: network address family 125 127 * @skc_state: Connection state 126 128 * @skc_reuse: %SO_REUSEADDR setting 127 129 * @skc_reuseport: %SO_REUSEPORT setting 130 + * @skc_ipv6only: socket is IPV6 only 131 + * @skc_net_refcnt: socket is using net ref counting 128 132 * @skc_bound_dev_if: bound device index if != 0 129 133 * @skc_bind_node: bind hash linkage for various protocol lookup tables 130 134 * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol 131 135 * @skc_prot: protocol handlers inside a network family 132 136 * @skc_net: reference to the network namespace of this socket 137 + * @skc_v6_daddr: IPV6 destination address 138 + * @skc_v6_rcv_saddr: IPV6 source address 139 + * @skc_cookie: socket's cookie value 133 140 * @skc_node: main hash linkage for various protocol lookup tables 134 141 * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol 135 142 * @skc_tx_queue_mapping: tx queue number for this connection ··· 144 137 * @skc_flags: place holder for sk_flags 145 138 * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, 146 139 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 140 + * @skc_listener: connection request listener socket (aka rsk_listener) 141 + * [union with @skc_flags] 142 + * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row 143 + * [union with @skc_flags] 147 144 * @skc_incoming_cpu: record/match cpu processing incoming packets 145 + * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled) 146 + * [union with @skc_incoming_cpu] 147 + * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number 148 + * [union with @skc_incoming_cpu] 148 149 * @skc_refcnt: reference count 149 150 * 150 151 * This is the minimal network layer representation of sockets, the header ··· 260 245 * @sk_dst_cache: destination cache 261 246 * @sk_dst_pending_confirm: need to confirm neighbour 262 247 * @sk_policy: flow policy 248 + * @sk_rx_skb_cache: cache copy of recently accessed RX skb 263 249 * @sk_receive_queue: incoming packets 264 250 * @sk_wmem_alloc: transmit queue bytes committed 265 251 * @sk_tsq_flags: TCP Small Queues flags ··· 281 265 * @sk_no_check_rx: allow zero checksum in RX packets 282 266 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 283 267 * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) 268 + * @sk_route_forced_caps: static, forced route capabilities 269 + * (set in tcp_init_sock()) 284 270 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 285 271 * @sk_gso_max_size: Maximum GSO segment size to build 286 272 * @sk_gso_max_segs: Maximum number of GSO segments ··· 321 303 * @sk_frag: cached page frag 322 304 * @sk_peek_off: current peek_offset value 323 305 * @sk_send_head: front of stuff to transmit 306 + * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] 307 + * @sk_tx_skb_cache: cache copy of recently accessed TX skb 324 308 * @sk_security: used by security modules 325 309 * @sk_mark: generic packet mark 326 310 * @sk_cgrp_data: cgroup data for this cgroup ··· 333 313 * @sk_write_space: callback to indicate there is bf sending space available 334 314 * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) 335 315 * @sk_backlog_rcv: callback to process the backlog 316 + * @sk_validate_xmit_skb: ptr to an optional validate function 336 317 * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 337 318 * @sk_reuseport_cb: reuseport group container 319 + * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage 338 320 * @sk_rcu: used during RCU grace period 339 321 * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) 340 322 * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME 323 + * @sk_txtime_report_errors: set report errors mode for SO_TXTIME 341 324 * @sk_txtime_unused: unused txtime flags 342 325 */ 343 326 struct sock { ··· 416 393 struct sk_filter __rcu *sk_filter; 417 394 union { 418 395 struct socket_wq __rcu *sk_wq; 396 + /* private: */ 419 397 struct socket_wq *sk_wq_raw; 398 + /* public: */ 420 399 }; 421 400 #ifdef CONFIG_XFRM 422 401 struct xfrm_policy __rcu *sk_policy[2]; ··· 2042 2017 * sk_wmem_alloc_get - returns write allocations 2043 2018 * @sk: socket 2044 2019 * 2045 - * Returns sk_wmem_alloc minus initial offset of one 2020 + * Return: sk_wmem_alloc minus initial offset of one 2046 2021 */ 2047 2022 static inline int sk_wmem_alloc_get(const struct sock *sk) 2048 2023 { ··· 2053 2028 * sk_rmem_alloc_get - returns read allocations 2054 2029 * @sk: socket 2055 2030 * 2056 - * Returns sk_rmem_alloc 2031 + * Return: sk_rmem_alloc 2057 2032 */ 2058 2033 static inline int sk_rmem_alloc_get(const struct sock *sk) 2059 2034 { ··· 2064 2039 * sk_has_allocations - check if allocations are outstanding 2065 2040 * @sk: socket 2066 2041 * 2067 - * Returns true if socket has write or read allocations 2042 + * Return: true if socket has write or read allocations 2068 2043 */ 2069 2044 static inline bool sk_has_allocations(const struct sock *sk) 2070 2045 { ··· 2075 2050 * skwq_has_sleeper - check if there are any waiting processes 2076 2051 * @wq: struct socket_wq 2077 2052 * 2078 - * Returns true if socket_wq has waiting processes 2053 + * Return: true if socket_wq has waiting processes 2079 2054 * 2080 2055 * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory 2081 2056 * barrier call. They were added due to the race found within the tcp code. ··· 2263 2238 * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest 2264 2239 * inside other socket operations and end up recursing into sk_page_frag() 2265 2240 * while it's already in use. 2241 + * 2242 + * Return: a per task page_frag if context allows that, 2243 + * otherwise a per socket one. 2266 2244 */ 2267 2245 static inline struct page_frag *sk_page_frag(struct sock *sk) 2268 2246 { ··· 2460 2432 &skb_shinfo(skb)->tskey); 2461 2433 } 2462 2434 2435 + DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); 2463 2436 /** 2464 2437 * sk_eat_skb - Release a skb if it is no longer needed 2465 2438 * @sk: socket to eat this skb from ··· 2469 2440 * This routine must be called with interrupts disabled or with the socket 2470 2441 * locked so that the sk_buff queue operation is ok. 2471 2442 */ 2472 - DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); 2473 2443 static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) 2474 2444 { 2475 2445 __skb_unlink(skb, &sk->sk_receive_queue);
+7 -9
include/uapi/linux/bpf.h
··· 1045 1045 * supports redirection to the egress interface, and accepts no 1046 1046 * flag at all. 1047 1047 * 1048 - * The same effect can be attained with the more generic 1049 - * **bpf_redirect_map**\ (), which requires specific maps to be 1050 - * used but offers better performance. 1048 + * The same effect can also be attained with the more generic 1049 + * **bpf_redirect_map**\ (), which uses a BPF map to store the 1050 + * redirect target instead of providing it directly to the helper. 1051 1051 * Return 1052 1052 * For XDP, the helper returns **XDP_REDIRECT** on success or 1053 1053 * **XDP_ABORTED** on error. For other program types, the values ··· 1611 1611 * the caller. Any higher bits in the *flags* argument must be 1612 1612 * unset. 1613 1613 * 1614 - * When used to redirect packets to net devices, this helper 1615 - * provides a high performance increase over **bpf_redirect**\ (). 1616 - * This is due to various implementation details of the underlying 1617 - * mechanisms, one of which is the fact that **bpf_redirect_map**\ 1618 - * () tries to send packet as a "bulk" to the device. 1614 + * See also bpf_redirect(), which only supports redirecting to an 1615 + * ifindex, but doesn't require a map to do so. 1619 1616 * Return 1620 - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. 1617 + * **XDP_REDIRECT** on success, or the value of the two lower bits 1618 + * of the **flags* argument on error. 1621 1619 * 1622 1620 * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) 1623 1621 * Description
+11 -1
include/uapi/linux/netfilter/nf_conntrack_common.h
··· 97 97 IPS_UNTRACKED_BIT = 12, 98 98 IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), 99 99 100 + #ifdef __KERNEL__ 101 + /* Re-purposed for in-kernel use: 102 + * Tags a conntrack entry that clashed with an existing entry 103 + * on insert. 104 + */ 105 + IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT, 106 + IPS_NAT_CLASH = IPS_UNTRACKED, 107 + #endif 108 + 100 109 /* Conntrack got a helper explicitly attached via CT target. */ 101 110 IPS_HELPER_BIT = 13, 102 111 IPS_HELPER = (1 << IPS_HELPER_BIT), ··· 119 110 */ 120 111 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | 121 112 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | 122 - IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), 113 + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED | 114 + IPS_OFFLOAD), 123 115 124 116 __IPS_MAX_BIT = 15, 125 117 };
+3 -3
kernel/bpf/btf.c
··· 4142 4142 * EFAULT - verifier bug 4143 4143 * 0 - 99% match. The last 1% is validated by the verifier. 4144 4144 */ 4145 - int btf_check_func_type_match(struct bpf_verifier_log *log, 4146 - struct btf *btf1, const struct btf_type *t1, 4147 - struct btf *btf2, const struct btf_type *t2) 4145 + static int btf_check_func_type_match(struct bpf_verifier_log *log, 4146 + struct btf *btf1, const struct btf_type *t1, 4147 + struct btf *btf2, const struct btf_type *t2) 4148 4148 { 4149 4149 const struct btf_param *args1, *args2; 4150 4150 const char *fn1, *fn2, *s1, *s2;
+53 -5
kernel/bpf/hashtab.c
··· 56 56 union { 57 57 struct bpf_htab *htab; 58 58 struct pcpu_freelist_node fnode; 59 + struct htab_elem *batch_flink; 59 60 }; 60 61 }; 61 62 }; ··· 127 126 bpf_map_area_free(htab->elems); 128 127 } 129 128 129 + /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 130 + * (bucket_lock). If both locks need to be acquired together, the lock 131 + * order is always lru_lock -> bucket_lock and this only happens in 132 + * bpf_lru_list.c logic. For example, certain code path of 133 + * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 134 + * will acquire lru_lock first followed by acquiring bucket_lock. 135 + * 136 + * In hashtab.c, to avoid deadlock, lock acquisition of 137 + * bucket_lock followed by lru_lock is not allowed. In such cases, 138 + * bucket_lock needs to be released first before acquiring lru_lock. 139 + */ 130 140 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 131 141 u32 hash) 132 142 { ··· 1268 1256 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1269 1257 void *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1270 1258 u32 batch, max_count, size, bucket_size; 1259 + struct htab_elem *node_to_free = NULL; 1271 1260 u64 elem_map_flags, map_flags; 1272 1261 struct hlist_nulls_head *head; 1273 1262 struct hlist_nulls_node *n; 1274 - unsigned long flags; 1263 + unsigned long flags = 0; 1264 + bool locked = false; 1275 1265 struct htab_elem *l; 1276 1266 struct bucket *b; 1277 1267 int ret = 0; ··· 1333 1319 dst_val = values; 1334 1320 b = &htab->buckets[batch]; 1335 1321 head = &b->head; 1336 - raw_spin_lock_irqsave(&b->lock, flags); 1322 + /* do not grab the lock unless need it (bucket_cnt > 0). */ 1323 + if (locked) 1324 + raw_spin_lock_irqsave(&b->lock, flags); 1337 1325 1338 1326 bucket_cnt = 0; 1339 1327 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1340 1328 bucket_cnt++; 1341 1329 1330 + if (bucket_cnt && !locked) { 1331 + locked = true; 1332 + goto again_nocopy; 1333 + } 1334 + 1342 1335 if (bucket_cnt > (max_count - total)) { 1343 1336 if (total == 0) 1344 1337 ret = -ENOSPC; 1338 + /* Note that since bucket_cnt > 0 here, it is implicit 1339 + * that the locked was grabbed, so release it. 1340 + */ 1345 1341 raw_spin_unlock_irqrestore(&b->lock, flags); 1346 1342 rcu_read_unlock(); 1347 1343 this_cpu_dec(bpf_prog_active); ··· 1361 1337 1362 1338 if (bucket_cnt > bucket_size) { 1363 1339 bucket_size = bucket_cnt; 1340 + /* Note that since bucket_cnt > 0 here, it is implicit 1341 + * that the locked was grabbed, so release it. 1342 + */ 1364 1343 raw_spin_unlock_irqrestore(&b->lock, flags); 1365 1344 rcu_read_unlock(); 1366 1345 this_cpu_dec(bpf_prog_active); ··· 1372 1345 kvfree(values); 1373 1346 goto alloc; 1374 1347 } 1348 + 1349 + /* Next block is only safe to run if you have grabbed the lock */ 1350 + if (!locked) 1351 + goto next_batch; 1375 1352 1376 1353 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1377 1354 memcpy(dst_key, l->key, key_size); ··· 1401 1370 } 1402 1371 if (do_delete) { 1403 1372 hlist_nulls_del_rcu(&l->hash_node); 1404 - if (is_lru_map) 1405 - bpf_lru_push_free(&htab->lru, &l->lru_node); 1406 - else 1373 + 1374 + /* bpf_lru_push_free() will acquire lru_lock, which 1375 + * may cause deadlock. See comments in function 1376 + * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1377 + * after releasing the bucket lock. 1378 + */ 1379 + if (is_lru_map) { 1380 + l->batch_flink = node_to_free; 1381 + node_to_free = l; 1382 + } else { 1407 1383 free_htab_elem(htab, l); 1384 + } 1408 1385 } 1409 1386 dst_key += key_size; 1410 1387 dst_val += value_size; 1411 1388 } 1412 1389 1413 1390 raw_spin_unlock_irqrestore(&b->lock, flags); 1391 + locked = false; 1392 + 1393 + while (node_to_free) { 1394 + l = node_to_free; 1395 + node_to_free = node_to_free->batch_flink; 1396 + bpf_lru_push_free(&htab->lru, &l->lru_node); 1397 + } 1398 + 1399 + next_batch: 1414 1400 /* If we are not copying data, we can go to next bucket and avoid 1415 1401 * unlocking the rcu. 1416 1402 */
+1 -1
kernel/bpf/offload.c
··· 321 321 322 322 ulen = info->jited_prog_len; 323 323 info->jited_prog_len = aux->offload->jited_len; 324 - if (info->jited_prog_len & ulen) { 324 + if (info->jited_prog_len && ulen) { 325 325 uinsns = u64_to_user_ptr(info->jited_prog_insns); 326 326 ulen = min_t(u32, info->jited_prog_len, ulen); 327 327 if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
-1
net/Kconfig
··· 189 189 depends on NETFILTER_ADVANCED 190 190 select NETFILTER_FAMILY_BRIDGE 191 191 select SKB_EXTENSIONS 192 - default m 193 192 ---help--- 194 193 Enabling this option will let arptables resp. iptables see bridged 195 194 ARP resp. IP traffic. If you want a bridging firewall, you probably
+2 -1
net/bridge/br_stp.c
··· 63 63 { 64 64 struct net_bridge_port *p; 65 65 66 - list_for_each_entry_rcu(p, &br->port_list, list) { 66 + list_for_each_entry_rcu(p, &br->port_list, list, 67 + lockdep_is_held(&br->lock)) { 67 68 if (p->port_no == port_no) 68 69 return p; 69 70 }
+11 -23
net/core/dev.c
··· 146 146 #include "net-sysfs.h" 147 147 148 148 #define MAX_GRO_SKBS 8 149 - #define MAX_NEST_DEV 8 150 149 151 150 /* This should be increased if a protocol with a bigger head is added. */ 152 151 #define GRO_MAX_HEAD (MAX_HEADER + 128) ··· 330 331 name_node = netdev_name_node_lookup(net, name); 331 332 if (!name_node) 332 333 return -ENOENT; 334 + /* lookup might have found our primary name or a name belonging 335 + * to another device. 336 + */ 337 + if (name_node == dev->name_node || name_node->dev != dev) 338 + return -EINVAL; 339 + 333 340 __netdev_name_node_alt_destroy(name_node); 334 341 335 342 return 0; ··· 3662 3657 qdisc_calculate_pkt_len(skb, q); 3663 3658 3664 3659 if (q->flags & TCQ_F_NOLOCK) { 3665 - if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && 3666 - qdisc_run_begin(q)) { 3667 - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, 3668 - &q->state))) { 3669 - __qdisc_drop(skb, &to_free); 3670 - rc = NET_XMIT_DROP; 3671 - goto end_run; 3672 - } 3673 - qdisc_bstats_cpu_update(q, skb); 3674 - 3675 - rc = NET_XMIT_SUCCESS; 3676 - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) 3677 - __qdisc_run(q); 3678 - 3679 - end_run: 3680 - qdisc_run_end(q); 3681 - } else { 3682 - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3683 - qdisc_run(q); 3684 - } 3660 + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3661 + qdisc_run(q); 3685 3662 3686 3663 if (unlikely(to_free)) 3687 3664 kfree_skb_list(to_free); ··· 7188 7201 return 0; 7189 7202 } 7190 7203 7191 - static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, 7192 - struct list_head **iter) 7204 + struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, 7205 + struct list_head **iter) 7193 7206 { 7194 7207 struct netdev_adjacent *lower; 7195 7208 ··· 7201 7214 7202 7215 return lower->dev; 7203 7216 } 7217 + EXPORT_SYMBOL(netdev_next_lower_dev_rcu); 7204 7218 7205 7219 static u8 __netdev_upper_depth(struct net_device *dev) 7206 7220 {
+1 -1
net/core/fib_rules.c
··· 974 974 975 975 frh = nlmsg_data(nlh); 976 976 frh->family = ops->family; 977 - frh->table = rule->table; 977 + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; 978 978 if (nla_put_u32(skb, FRA_TABLE, rule->table)) 979 979 goto nla_put_failure; 980 980 if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
+12 -14
net/core/rtnetlink.c
··· 3504 3504 if (err) 3505 3505 return err; 3506 3506 3507 - alt_ifname = nla_data(attr); 3507 + alt_ifname = nla_strdup(attr, GFP_KERNEL); 3508 + if (!alt_ifname) 3509 + return -ENOMEM; 3510 + 3508 3511 if (cmd == RTM_NEWLINKPROP) { 3509 - alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); 3510 - if (!alt_ifname) 3511 - return -ENOMEM; 3512 3512 err = netdev_name_node_alt_create(dev, alt_ifname); 3513 - if (err) { 3514 - kfree(alt_ifname); 3515 - return err; 3516 - } 3513 + if (!err) 3514 + alt_ifname = NULL; 3517 3515 } else if (cmd == RTM_DELLINKPROP) { 3518 3516 err = netdev_name_node_alt_destroy(dev, alt_ifname); 3519 - if (err) 3520 - return err; 3521 3517 } else { 3522 - WARN_ON(1); 3523 - return 0; 3518 + WARN_ON_ONCE(1); 3519 + err = -EINVAL; 3524 3520 } 3525 3521 3526 - *changed = true; 3527 - return 0; 3522 + kfree(alt_ifname); 3523 + if (!err) 3524 + *changed = true; 3525 + return err; 3528 3526 } 3529 3527 3530 3528 static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
+2 -4
net/core/skbuff.c
··· 467 467 return NULL; 468 468 } 469 469 470 - /* use OR instead of assignment to avoid clearing of bits in mask */ 471 470 if (pfmemalloc) 472 471 skb->pfmemalloc = 1; 473 472 skb->head_frag = 1; ··· 526 527 return NULL; 527 528 } 528 529 529 - /* use OR instead of assignment to avoid clearing of bits in mask */ 530 530 if (nc->page.pfmemalloc) 531 531 skb->pfmemalloc = 1; 532 532 skb->head_frag = 1; ··· 4803 4805 typeof(IPPROTO_IP) proto, 4804 4806 unsigned int off) 4805 4807 { 4806 - switch (proto) { 4807 - int err; 4808 + int err; 4808 4809 4810 + switch (proto) { 4809 4811 case IPPROTO_TCP: 4810 4812 err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), 4811 4813 off + MAX_TCP_HDR_LEN);
+3
net/ethtool/bitset.c
··· 447 447 "mask only allowed in compact bitset"); 448 448 return -EINVAL; 449 449 } 450 + 450 451 no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; 452 + if (no_mask) 453 + ethnl_bitmap32_clear(bitmap, 0, nbits, mod); 451 454 452 455 nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { 453 456 bool old_val, new_val;
+2 -1
net/hsr/hsr_framereg.c
··· 155 155 new_node->seq_out[i] = seq_out; 156 156 157 157 spin_lock_bh(&hsr->list_lock); 158 - list_for_each_entry_rcu(node, node_db, mac_list) { 158 + list_for_each_entry_rcu(node, node_db, mac_list, 159 + lockdep_is_held(&hsr->list_lock)) { 159 160 if (ether_addr_equal(node->macaddress_A, addr)) 160 161 goto out; 161 162 if (ether_addr_equal(node->macaddress_B, addr))
+5 -1
net/ipv4/udp.c
··· 1857 1857 inet->inet_dport = 0; 1858 1858 sock_rps_reset_rxhash(sk); 1859 1859 sk->sk_bound_dev_if = 0; 1860 - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1860 + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { 1861 1861 inet_reset_saddr(sk); 1862 + if (sk->sk_prot->rehash && 1863 + (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 1864 + sk->sk_prot->rehash(sk); 1865 + } 1862 1866 1863 1867 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 1864 1868 sk->sk_prot->unhash(sk);
+4 -3
net/ipv6/ip6_fib.c
··· 1102 1102 found++; 1103 1103 break; 1104 1104 } 1105 - if (rt_can_ecmp) 1106 - fallback_ins = fallback_ins ?: ins; 1105 + fallback_ins = fallback_ins ?: ins; 1107 1106 goto next_iter; 1108 1107 } 1109 1108 ··· 1145 1146 } 1146 1147 1147 1148 if (fallback_ins && !found) { 1148 - /* No ECMP-able route found, replace first non-ECMP one */ 1149 + /* No matching route with same ecmp-able-ness found, replace 1150 + * first matching route 1151 + */ 1149 1152 ins = fallback_ins; 1150 1153 iter = rcu_dereference_protected(*ins, 1151 1154 lockdep_is_held(&rt->fib6_table->tb6_lock));
+5 -3
net/ipv6/ip6_gre.c
··· 437 437 return -ENOENT; 438 438 439 439 switch (type) { 440 - struct ipv6_tlv_tnl_enc_lim *tel; 441 - __u32 teli; 442 440 case ICMPV6_DEST_UNREACH: 443 441 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 444 442 t->parms.name); ··· 450 452 break; 451 453 } 452 454 return 0; 453 - case ICMPV6_PARAMPROB: 455 + case ICMPV6_PARAMPROB: { 456 + struct ipv6_tlv_tnl_enc_lim *tel; 457 + __u32 teli; 458 + 454 459 teli = 0; 455 460 if (code == ICMPV6_HDR_FIELD) 456 461 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); ··· 469 468 t->parms.name); 470 469 } 471 470 return 0; 471 + } 472 472 case ICMPV6_PKT_TOOBIG: 473 473 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); 474 474 return 0;
+9 -4
net/ipv6/ip6_tunnel.c
··· 517 517 err = 0; 518 518 519 519 switch (*type) { 520 - struct ipv6_tlv_tnl_enc_lim *tel; 521 - __u32 mtu, teli; 522 520 case ICMPV6_DEST_UNREACH: 523 521 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 524 522 t->parms.name); ··· 529 531 rel_msg = 1; 530 532 } 531 533 break; 532 - case ICMPV6_PARAMPROB: 534 + case ICMPV6_PARAMPROB: { 535 + struct ipv6_tlv_tnl_enc_lim *tel; 536 + __u32 teli; 537 + 533 538 teli = 0; 534 539 if ((*code) == ICMPV6_HDR_FIELD) 535 540 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); ··· 549 548 t->parms.name); 550 549 } 551 550 break; 552 - case ICMPV6_PKT_TOOBIG: 551 + } 552 + case ICMPV6_PKT_TOOBIG: { 553 + __u32 mtu; 554 + 553 555 ip6_update_pmtu(skb, net, htonl(*info), 0, 0, 554 556 sock_net_uid(net, NULL)); 555 557 mtu = *info - offset; ··· 566 562 rel_msg = 1; 567 563 } 568 564 break; 565 + } 569 566 case NDISC_REDIRECT: 570 567 ip6_redirect(skb, net, skb->dev->ifindex, 0, 571 568 sock_net_uid(net, NULL));
+1
net/ipv6/route.c
··· 5198 5198 */ 5199 5199 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 5200 5200 NLM_F_REPLACE); 5201 + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; 5201 5202 nhn++; 5202 5203 } 5203 5204
+1
net/mptcp/Kconfig
··· 4 4 depends on INET 5 5 select SKB_EXTENSIONS 6 6 select CRYPTO_LIB_SHA256 7 + select CRYPTO 7 8 help 8 9 Multipath TCP (MPTCP) connections send and receive data over multiple 9 10 subflows in order to utilize multiple network paths. Each subflow
+19 -29
net/mptcp/protocol.c
··· 755 755 char __user *optval, unsigned int optlen) 756 756 { 757 757 struct mptcp_sock *msk = mptcp_sk(sk); 758 - int ret = -EOPNOTSUPP; 759 758 struct socket *ssock; 760 - struct sock *ssk; 761 759 762 760 pr_debug("msk=%p", msk); 763 761 764 762 /* @@ the meaning of setsockopt() when the socket is connected and 765 - * there are multiple subflows is not defined. 763 + * there are multiple subflows is not yet defined. It is up to the 764 + * MPTCP-level socket to configure the subflows until the subflow 765 + * is in TCP fallback, when TCP socket options are passed through 766 + * to the one remaining subflow. 766 767 */ 767 768 lock_sock(sk); 768 - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); 769 - if (IS_ERR(ssock)) { 770 - release_sock(sk); 771 - return ret; 772 - } 769 + ssock = __mptcp_tcp_fallback(msk); 770 + if (ssock) 771 + return tcp_setsockopt(ssock->sk, level, optname, optval, 772 + optlen); 773 773 774 - ssk = ssock->sk; 775 - sock_hold(ssk); 776 774 release_sock(sk); 777 775 778 - ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 779 - sock_put(ssk); 780 - 781 - return ret; 776 + return -EOPNOTSUPP; 782 777 } 783 778 784 779 static int mptcp_getsockopt(struct sock *sk, int level, int optname, 785 780 char __user *optval, int __user *option) 786 781 { 787 782 struct mptcp_sock *msk = mptcp_sk(sk); 788 - int ret = -EOPNOTSUPP; 789 783 struct socket *ssock; 790 - struct sock *ssk; 791 784 792 785 pr_debug("msk=%p", msk); 793 786 794 - /* @@ the meaning of getsockopt() when the socket is connected and 795 - * there are multiple subflows is not defined. 787 + /* @@ the meaning of setsockopt() when the socket is connected and 788 + * there are multiple subflows is not yet defined. It is up to the 789 + * MPTCP-level socket to configure the subflows until the subflow 790 + * is in TCP fallback, when socket options are passed through 791 + * to the one remaining subflow. 796 792 */ 797 793 lock_sock(sk); 798 - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); 799 - if (IS_ERR(ssock)) { 800 - release_sock(sk); 801 - return ret; 802 - } 794 + ssock = __mptcp_tcp_fallback(msk); 795 + if (ssock) 796 + return tcp_getsockopt(ssock->sk, level, optname, optval, 797 + option); 803 798 804 - ssk = ssock->sk; 805 - sock_hold(ssk); 806 799 release_sock(sk); 807 800 808 - ret = tcp_getsockopt(ssk, level, optname, optval, option); 809 - sock_put(ssk); 810 - 811 - return ret; 801 + return -EOPNOTSUPP; 812 802 } 813 803 814 804 static int mptcp_get_port(struct sock *sk, unsigned short snum)
+2 -2
net/mptcp/protocol.h
··· 56 56 #define MPTCP_DSS_FLAG_MASK (0x1F) 57 57 58 58 /* MPTCP socket flags */ 59 - #define MPTCP_DATA_READY BIT(0) 60 - #define MPTCP_SEND_SPACE BIT(1) 59 + #define MPTCP_DATA_READY 0 60 + #define MPTCP_SEND_SPACE 1 61 61 62 62 /* MPTCP connection sock */ 63 63 struct mptcp_sock {
+164 -28
net/netfilter/nf_conntrack_core.c
··· 894 894 } 895 895 } 896 896 897 - /* Resolve race on insertion if this protocol allows this. */ 897 + static void __nf_conntrack_insert_prepare(struct nf_conn *ct) 898 + { 899 + struct nf_conn_tstamp *tstamp; 900 + 901 + atomic_inc(&ct->ct_general.use); 902 + ct->status |= IPS_CONFIRMED; 903 + 904 + /* set conntrack timestamp, if enabled. */ 905 + tstamp = nf_conn_tstamp_find(ct); 906 + if (tstamp) 907 + tstamp->start = ktime_get_real_ns(); 908 + } 909 + 910 + static int __nf_ct_resolve_clash(struct sk_buff *skb, 911 + struct nf_conntrack_tuple_hash *h) 912 + { 913 + /* This is the conntrack entry already in hashes that won race. */ 914 + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 915 + enum ip_conntrack_info ctinfo; 916 + struct nf_conn *loser_ct; 917 + 918 + loser_ct = nf_ct_get(skb, &ctinfo); 919 + 920 + if (nf_ct_is_dying(ct)) 921 + return NF_DROP; 922 + 923 + if (!atomic_inc_not_zero(&ct->ct_general.use)) 924 + return NF_DROP; 925 + 926 + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || 927 + nf_ct_match(ct, loser_ct)) { 928 + struct net *net = nf_ct_net(ct); 929 + 930 + nf_ct_acct_merge(ct, ctinfo, loser_ct); 931 + nf_ct_add_to_dying_list(loser_ct); 932 + nf_conntrack_put(&loser_ct->ct_general); 933 + nf_ct_set(skb, ct, ctinfo); 934 + 935 + NF_CT_STAT_INC(net, insert_failed); 936 + return NF_ACCEPT; 937 + } 938 + 939 + nf_ct_put(ct); 940 + return NF_DROP; 941 + } 942 + 943 + /** 944 + * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry 945 + * 946 + * @skb: skb that causes the collision 947 + * @repl_idx: hash slot for reply direction 948 + * 949 + * Called when origin or reply direction had a clash. 950 + * The skb can be handled without packet drop provided the reply direction 951 + * is unique or there the existing entry has the identical tuple in both 952 + * directions. 953 + * 954 + * Caller must hold conntrack table locks to prevent concurrent updates. 955 + * 956 + * Returns NF_DROP if the clash could not be handled. 957 + */ 958 + static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) 959 + { 960 + struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb); 961 + const struct nf_conntrack_zone *zone; 962 + struct nf_conntrack_tuple_hash *h; 963 + struct hlist_nulls_node *n; 964 + struct net *net; 965 + 966 + zone = nf_ct_zone(loser_ct); 967 + net = nf_ct_net(loser_ct); 968 + 969 + /* Reply direction must never result in a clash, unless both origin 970 + * and reply tuples are identical. 971 + */ 972 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) { 973 + if (nf_ct_key_equal(h, 974 + &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple, 975 + zone, net)) 976 + return __nf_ct_resolve_clash(skb, h); 977 + } 978 + 979 + /* We want the clashing entry to go away real soon: 1 second timeout. */ 980 + loser_ct->timeout = nfct_time_stamp + HZ; 981 + 982 + /* IPS_NAT_CLASH removes the entry automatically on the first 983 + * reply. Also prevents UDP tracker from moving the entry to 984 + * ASSURED state, i.e. the entry can always be evicted under 985 + * pressure. 986 + */ 987 + loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH; 988 + 989 + __nf_conntrack_insert_prepare(loser_ct); 990 + 991 + /* fake add for ORIGINAL dir: we want lookups to only find the entry 992 + * already in the table. This also hides the clashing entry from 993 + * ctnetlink iteration, i.e. conntrack -L won't show them. 994 + */ 995 + hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 996 + 997 + hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 998 + &nf_conntrack_hash[repl_idx]); 999 + return NF_ACCEPT; 1000 + } 1001 + 1002 + /** 1003 + * nf_ct_resolve_clash - attempt to handle clash without packet drop 1004 + * 1005 + * @skb: skb that causes the clash 1006 + * @h: tuplehash of the clashing entry already in table 1007 + * @hash_reply: hash slot for reply direction 1008 + * 1009 + * A conntrack entry can be inserted to the connection tracking table 1010 + * if there is no existing entry with an identical tuple. 1011 + * 1012 + * If there is one, @skb (and the assocated, unconfirmed conntrack) has 1013 + * to be dropped. In case @skb is retransmitted, next conntrack lookup 1014 + * will find the already-existing entry. 1015 + * 1016 + * The major problem with such packet drop is the extra delay added by 1017 + * the packet loss -- it will take some time for a retransmit to occur 1018 + * (or the sender to time out when waiting for a reply). 1019 + * 1020 + * This function attempts to handle the situation without packet drop. 1021 + * 1022 + * If @skb has no NAT transformation or if the colliding entries are 1023 + * exactly the same, only the to-be-confirmed conntrack entry is discarded 1024 + * and @skb is associated with the conntrack entry already in the table. 1025 + * 1026 + * Failing that, the new, unconfirmed conntrack is still added to the table 1027 + * provided that the collision only occurs in the ORIGINAL direction. 1028 + * The new entry will be added after the existing one in the hash list, 1029 + * so packets in the ORIGINAL direction will continue to match the existing 1030 + * entry. The new entry will also have a fixed timeout so it expires -- 1031 + * due to the collision, it will not see bidirectional traffic. 1032 + * 1033 + * Returns NF_DROP if the clash could not be resolved. 1034 + */ 898 1035 static __cold noinline int 899 - nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, 900 - enum ip_conntrack_info ctinfo, 901 - struct nf_conntrack_tuple_hash *h) 1036 + nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, 1037 + u32 reply_hash) 902 1038 { 903 1039 /* This is the conntrack entry already in hashes that won race. */ 904 1040 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 905 1041 const struct nf_conntrack_l4proto *l4proto; 906 - enum ip_conntrack_info oldinfo; 907 - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); 1042 + enum ip_conntrack_info ctinfo; 1043 + struct nf_conn *loser_ct; 1044 + struct net *net; 1045 + int ret; 1046 + 1047 + loser_ct = nf_ct_get(skb, &ctinfo); 1048 + net = nf_ct_net(loser_ct); 908 1049 909 1050 l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); 910 - if (l4proto->allow_clash && 911 - !nf_ct_is_dying(ct) && 912 - atomic_inc_not_zero(&ct->ct_general.use)) { 913 - if (((ct->status & IPS_NAT_DONE_MASK) == 0) || 914 - nf_ct_match(ct, loser_ct)) { 915 - nf_ct_acct_merge(ct, ctinfo, loser_ct); 916 - nf_conntrack_put(&loser_ct->ct_general); 917 - nf_ct_set(skb, ct, oldinfo); 918 - return NF_ACCEPT; 919 - } 920 - nf_ct_put(ct); 921 - } 1051 + if (!l4proto->allow_clash) 1052 + goto drop; 1053 + 1054 + ret = __nf_ct_resolve_clash(skb, h); 1055 + if (ret == NF_ACCEPT) 1056 + return ret; 1057 + 1058 + ret = nf_ct_resolve_clash_harder(skb, reply_hash); 1059 + if (ret == NF_ACCEPT) 1060 + return ret; 1061 + 1062 + drop: 1063 + nf_ct_add_to_dying_list(loser_ct); 922 1064 NF_CT_STAT_INC(net, drop); 1065 + NF_CT_STAT_INC(net, insert_failed); 923 1066 return NF_DROP; 924 1067 } 925 1068 ··· 1075 932 struct nf_conntrack_tuple_hash *h; 1076 933 struct nf_conn *ct; 1077 934 struct nf_conn_help *help; 1078 - struct nf_conn_tstamp *tstamp; 1079 935 struct hlist_nulls_node *n; 1080 936 enum ip_conntrack_info ctinfo; 1081 937 struct net *net; ··· 1131 989 1132 990 if (unlikely(nf_ct_is_dying(ct))) { 1133 991 nf_ct_add_to_dying_list(ct); 992 + NF_CT_STAT_INC(net, insert_failed); 1134 993 goto dying; 1135 994 } 1136 995 ··· 1152 1009 setting time, otherwise we'd get timer wrap in 1153 1010 weird delay cases. */ 1154 1011 ct->timeout += nfct_time_stamp; 1155 - atomic_inc(&ct->ct_general.use); 1156 - ct->status |= IPS_CONFIRMED; 1157 1012 1158 - /* set conntrack timestamp, if enabled. */ 1159 - tstamp = nf_conn_tstamp_find(ct); 1160 - if (tstamp) 1161 - tstamp->start = ktime_get_real_ns(); 1013 + __nf_conntrack_insert_prepare(ct); 1162 1014 1163 1015 /* Since the lookup is lockless, hash insertion must be done after 1164 1016 * starting the timer and setting the CONFIRMED bit. The RCU barriers ··· 1173 1035 return NF_ACCEPT; 1174 1036 1175 1037 out: 1176 - nf_ct_add_to_dying_list(ct); 1177 - ret = nf_ct_resolve_clash(net, skb, ctinfo, h); 1038 + ret = nf_ct_resolve_clash(skb, h, reply_hash); 1178 1039 dying: 1179 1040 nf_conntrack_double_unlock(hash, reply_hash); 1180 - NF_CT_STAT_INC(net, insert_failed); 1181 1041 local_bh_enable(); 1182 1042 return ret; 1183 1043 }
+16 -4
net/netfilter/nf_conntrack_proto_udp.c
··· 81 81 return false; 82 82 } 83 83 84 + static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct, 85 + struct sk_buff *skb, 86 + enum ip_conntrack_info ctinfo, 87 + u32 extra_jiffies) 88 + { 89 + if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY && 90 + ct->status & IPS_NAT_CLASH)) 91 + nf_ct_kill(ct); 92 + else 93 + nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies); 94 + } 95 + 84 96 /* Returns verdict for packet, and may modify conntracktype */ 85 97 int nf_conntrack_udp_packet(struct nf_conn *ct, 86 98 struct sk_buff *skb, ··· 128 116 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) 129 117 nf_conntrack_event_cache(IPCT_ASSURED, ct); 130 118 } else { 131 - nf_ct_refresh_acct(ct, ctinfo, skb, 132 - timeouts[UDP_CT_UNREPLIED]); 119 + nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, 120 + timeouts[UDP_CT_UNREPLIED]); 133 121 } 134 122 return NF_ACCEPT; 135 123 } ··· 210 198 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) 211 199 nf_conntrack_event_cache(IPCT_ASSURED, ct); 212 200 } else { 213 - nf_ct_refresh_acct(ct, ctinfo, skb, 214 - timeouts[UDP_CT_UNREPLIED]); 201 + nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, 202 + timeouts[UDP_CT_UNREPLIED]); 215 203 } 216 204 return NF_ACCEPT; 217 205 }
+3 -3
net/netfilter/nf_flow_table_offload.c
··· 847 847 { 848 848 int err; 849 849 850 - if (!nf_flowtable_hw_offload(flowtable)) 851 - return 0; 852 - 853 850 if (!dev->netdev_ops->ndo_setup_tc) 854 851 return -EOPNOTSUPP; 855 852 ··· 872 875 struct netlink_ext_ack extack = {}; 873 876 struct flow_block_offload bo; 874 877 int err; 878 + 879 + if (!nf_flowtable_hw_offload(flowtable)) 880 + return 0; 875 881 876 882 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); 877 883 if (err < 0)
+3 -3
net/netfilter/nft_set_pipapo.c
··· 203 203 * :: 204 204 * 205 205 * rule indices in last field: 0 1 206 - * map to elements: 0x42 0x66 206 + * map to elements: 0x66 0x42 207 207 * 208 208 * 209 209 * Matching ··· 298 298 * :: 299 299 * 300 300 * rule indices in last field: 0 1 301 - * map to elements: 0x42 0x66 301 + * map to elements: 0x66 0x42 302 302 * 303 303 * the matching element is at 0x42. 304 304 * ··· 503 503 return -1; 504 504 } 505 505 506 - if (unlikely(match_only)) { 506 + if (match_only) { 507 507 bitmap_clear(map, i, 1); 508 508 return i; 509 509 }
+16 -6
net/netfilter/xt_hashlimit.c
··· 36 36 #include <linux/netfilter_ipv6/ip6_tables.h> 37 37 #include <linux/mutex.h> 38 38 #include <linux/kernel.h> 39 + #include <linux/refcount.h> 39 40 #include <uapi/linux/netfilter/xt_hashlimit.h> 40 41 41 42 #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ ··· 115 114 116 115 struct xt_hashlimit_htable { 117 116 struct hlist_node node; /* global list of all htables */ 118 - int use; 117 + refcount_t use; 119 118 u_int8_t family; 120 119 bool rnd_initialized; 121 120 ··· 316 315 for (i = 0; i < hinfo->cfg.size; i++) 317 316 INIT_HLIST_HEAD(&hinfo->hash[i]); 318 317 319 - hinfo->use = 1; 318 + refcount_set(&hinfo->use, 1); 320 319 hinfo->count = 0; 321 320 hinfo->family = family; 322 321 hinfo->rnd_initialized = false; ··· 421 420 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { 422 421 if (!strcmp(name, hinfo->name) && 423 422 hinfo->family == family) { 424 - hinfo->use++; 423 + refcount_inc(&hinfo->use); 425 424 return hinfo; 426 425 } 427 426 } ··· 430 429 431 430 static void htable_put(struct xt_hashlimit_htable *hinfo) 432 431 { 433 - mutex_lock(&hashlimit_mutex); 434 - if (--hinfo->use == 0) { 432 + if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) { 435 433 hlist_del(&hinfo->node); 434 + mutex_unlock(&hashlimit_mutex); 436 435 htable_destroy(hinfo); 437 436 } 438 - mutex_unlock(&hashlimit_mutex); 439 437 } 440 438 441 439 /* The algorithm used is the Simple Token Bucket Filter (TBF) ··· 837 837 return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); 838 838 } 839 839 840 + #define HASHLIMIT_MAX_SIZE 1048576 841 + 840 842 static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, 841 843 struct xt_hashlimit_htable **hinfo, 842 844 struct hashlimit_cfg3 *cfg, ··· 849 847 850 848 if (cfg->gc_interval == 0 || cfg->expire == 0) 851 849 return -EINVAL; 850 + if (cfg->size > HASHLIMIT_MAX_SIZE) { 851 + cfg->size = HASHLIMIT_MAX_SIZE; 852 + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); 853 + } 854 + if (cfg->max > HASHLIMIT_MAX_SIZE) { 855 + cfg->max = HASHLIMIT_MAX_SIZE; 856 + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); 857 + } 852 858 if (par->family == NFPROTO_IPV4) { 853 859 if (cfg->srcmask > 32 || cfg->dstmask > 32) 854 860 return -EINVAL;
+2 -1
net/netlabel/netlabel_domainhash.c
··· 143 143 if (domain != NULL) { 144 144 bkt = netlbl_domhsh_hash(domain); 145 145 bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt]; 146 - list_for_each_entry_rcu(iter, bkt_list, list) 146 + list_for_each_entry_rcu(iter, bkt_list, list, 147 + lockdep_is_held(&netlbl_domhsh_lock)) 147 148 if (iter->valid && 148 149 netlbl_family_match(iter->family, family) && 149 150 strcmp(iter->domain, domain) == 0)
+2 -1
net/netlabel/netlabel_unlabeled.c
··· 207 207 208 208 bkt = netlbl_unlhsh_hash(ifindex); 209 209 bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]; 210 - list_for_each_entry_rcu(iter, bkt_list, list) 210 + list_for_each_entry_rcu(iter, bkt_list, list, 211 + lockdep_is_held(&netlbl_unlhsh_lock)) 211 212 if (iter->valid && iter->ifindex == ifindex) 212 213 return iter; 213 214
+3 -2
net/netlink/af_netlink.c
··· 1014 1014 if (nlk->netlink_bind && groups) { 1015 1015 int group; 1016 1016 1017 - for (group = 0; group < nlk->ngroups; group++) { 1017 + /* nl_groups is a u32, so cap the maximum groups we can bind */ 1018 + for (group = 0; group < BITS_PER_TYPE(u32); group++) { 1018 1019 if (!test_bit(group, &groups)) 1019 1020 continue; 1020 1021 err = nlk->netlink_bind(net, group + 1); ··· 1034 1033 netlink_insert(sk, nladdr->nl_pid) : 1035 1034 netlink_autobind(sock); 1036 1035 if (err) { 1037 - netlink_undo_bind(nlk->ngroups, groups, sk); 1036 + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); 1038 1037 goto unlock; 1039 1038 } 1040 1039 }
+6 -3
net/openvswitch/datapath.c
··· 179 179 struct hlist_head *head; 180 180 181 181 head = vport_hash_bucket(dp, port_no); 182 - hlist_for_each_entry_rcu(vport, head, dp_hash_node) { 182 + hlist_for_each_entry_rcu(vport, head, dp_hash_node, 183 + lockdep_ovsl_is_held()) { 183 184 if (vport->port_no == port_no) 184 185 return vport; 185 186 } ··· 2043 2042 int i; 2044 2043 2045 2044 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2046 - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2045 + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2046 + lockdep_ovsl_is_held()) { 2047 2047 dev = vport->dev; 2048 2048 dev_headroom = netdev_get_fwd_headroom(dev); 2049 2049 if (dev_headroom > max_headroom) ··· 2063 2061 2064 2062 dp->max_headroom = new_headroom; 2065 2063 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 2066 - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) 2064 + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2065 + lockdep_ovsl_is_held()) 2067 2066 netdev_set_rx_headroom(vport->dev, new_headroom); 2068 2067 } 2069 2068
+10 -8
net/openvswitch/flow_netlink.c
··· 2708 2708 return -EINVAL; 2709 2709 2710 2710 switch (key_type) { 2711 - const struct ovs_key_ipv4 *ipv4_key; 2712 - const struct ovs_key_ipv6 *ipv6_key; 2713 - int err; 2714 - 2715 2711 case OVS_KEY_ATTR_PRIORITY: 2716 2712 case OVS_KEY_ATTR_SKB_MARK: 2717 2713 case OVS_KEY_ATTR_CT_MARK: ··· 2719 2723 return -EINVAL; 2720 2724 break; 2721 2725 2722 - case OVS_KEY_ATTR_TUNNEL: 2726 + case OVS_KEY_ATTR_TUNNEL: { 2727 + int err; 2728 + 2723 2729 if (masked) 2724 2730 return -EINVAL; /* Masked tunnel set not supported. */ 2725 2731 ··· 2730 2732 if (err) 2731 2733 return err; 2732 2734 break; 2735 + } 2736 + case OVS_KEY_ATTR_IPV4: { 2737 + const struct ovs_key_ipv4 *ipv4_key; 2733 2738 2734 - case OVS_KEY_ATTR_IPV4: 2735 2739 if (eth_type != htons(ETH_P_IP)) 2736 2740 return -EINVAL; 2737 2741 ··· 2753 2753 return -EINVAL; 2754 2754 } 2755 2755 break; 2756 + } 2757 + case OVS_KEY_ATTR_IPV6: { 2758 + const struct ovs_key_ipv6 *ipv6_key; 2756 2759 2757 - case OVS_KEY_ATTR_IPV6: 2758 2760 if (eth_type != htons(ETH_P_IPV6)) 2759 2761 return -EINVAL; 2760 2762 ··· 2783 2781 return -EINVAL; 2784 2782 2785 2783 break; 2786 - 2784 + } 2787 2785 case OVS_KEY_ATTR_TCP: 2788 2786 if ((eth_type != htons(ETH_P_IP) && 2789 2787 eth_type != htons(ETH_P_IPV6)) ||
+4 -2
net/openvswitch/flow_table.c
··· 585 585 head = find_bucket(ti, hash); 586 586 (*n_mask_hit)++; 587 587 588 - hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { 588 + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver], 589 + lockdep_ovsl_is_held()) { 589 590 if (flow->mask == mask && flow->flow_table.hash == hash && 590 591 flow_cmp_masked_key(flow, &masked_key, &mask->range)) 591 592 return flow; ··· 770 769 771 770 hash = ufid_hash(ufid); 772 771 head = find_bucket(ti, hash); 773 - hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { 772 + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver], 773 + lockdep_ovsl_is_held()) { 774 774 if (flow->ufid_table.hash == hash && 775 775 ovs_flow_cmp_ufid(flow, ufid)) 776 776 return flow;
+2 -1
net/openvswitch/meter.c
··· 61 61 struct hlist_head *head; 62 62 63 63 head = meter_hash_bucket(dp, meter_id); 64 - hlist_for_each_entry_rcu(meter, head, dp_hash_node) { 64 + hlist_for_each_entry_rcu(meter, head, dp_hash_node, 65 + lockdep_ovsl_is_held()) { 65 66 if (meter->id == meter_id) 66 67 return meter; 67 68 }
+2 -1
net/openvswitch/vport.c
··· 96 96 struct hlist_head *bucket = hash_bucket(net, name); 97 97 struct vport *vport; 98 98 99 - hlist_for_each_entry_rcu(vport, bucket, hash_node) 99 + hlist_for_each_entry_rcu(vport, bucket, hash_node, 100 + lockdep_ovsl_is_held()) 100 101 if (!strcmp(name, ovs_vport_name(vport)) && 101 102 net_eq(ovs_dp_get_net(vport->dp), net)) 102 103 return vport;
+12 -12
net/rds/rdma.c
··· 162 162 if (write) 163 163 gup_flags |= FOLL_WRITE; 164 164 165 - ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages); 165 + ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages); 166 166 if (ret >= 0 && ret < nr_pages) { 167 - while (ret--) 168 - put_page(pages[ret]); 167 + unpin_user_pages(pages, ret); 169 168 ret = -EFAULT; 170 169 } 171 170 ··· 299 300 * to release anything. 300 301 */ 301 302 if (!need_odp) { 302 - for (i = 0 ; i < nents; i++) 303 - put_page(sg_page(&sg[i])); 303 + unpin_user_pages(pages, nr_pages); 304 304 kfree(sg); 305 305 } 306 306 ret = PTR_ERR(trans_private); ··· 323 325 if (cookie_ret) 324 326 *cookie_ret = cookie; 325 327 326 - if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) { 328 + if (args->cookie_addr && 329 + put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { 330 + if (!need_odp) { 331 + unpin_user_pages(pages, nr_pages); 332 + kfree(sg); 333 + } 327 334 ret = -EFAULT; 328 335 goto out; 329 336 } ··· 499 496 * is the case for a RDMA_READ which copies from remote 500 497 * to local memory 501 498 */ 502 - if (!ro->op_write) 503 - set_page_dirty(page); 504 - put_page(page); 499 + unpin_user_pages_dirty_lock(&page, 1, !ro->op_write); 505 500 } 506 501 } 507 502 ··· 516 515 /* Mark page dirty if it was possibly modified, which 517 516 * is the case for a RDMA_READ which copies from remote 518 517 * to local memory */ 519 - set_page_dirty(page); 520 - put_page(page); 518 + unpin_user_pages_dirty_lock(&page, 1, true); 521 519 522 520 kfree(ao->op_notifier); 523 521 ao->op_notifier = NULL; ··· 944 944 return ret; 945 945 err: 946 946 if (page) 947 - put_page(page); 947 + unpin_user_page(page); 948 948 rm->atomic.op_active = 0; 949 949 kfree(rm->atomic.op_notifier); 950 950
+1
net/sched/cls_flower.c
··· 305 305 struct cls_fl_filter *f; 306 306 307 307 list_for_each_entry_rcu(mask, &head->masks, list) { 308 + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); 308 309 fl_clear_masked_range(&skb_key, mask); 309 310 310 311 skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
+20 -9
net/sctp/sm_statefuns.c
··· 170 170 return true; 171 171 } 172 172 173 + /* Check for format error in an ABORT chunk */ 174 + static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) 175 + { 176 + struct sctp_errhdr *err; 177 + 178 + sctp_walk_errors(err, chunk->chunk_hdr); 179 + 180 + return (void *)err == (void *)chunk->chunk_end; 181 + } 182 + 173 183 /********************************************************** 174 184 * These are the state functions for handling chunk events. 175 185 **********************************************************/ ··· 2265 2255 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) 2266 2256 return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); 2267 2257 2258 + if (!sctp_err_chunk_valid(chunk)) 2259 + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 2260 + 2268 2261 return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); 2269 2262 } 2270 2263 ··· 2310 2297 if (SCTP_ADDR_DEL == 2311 2298 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) 2312 2299 return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); 2300 + 2301 + if (!sctp_err_chunk_valid(chunk)) 2302 + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 2313 2303 2314 2304 /* Stop the T2-shutdown timer. */ 2315 2305 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, ··· 2581 2565 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) 2582 2566 return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); 2583 2567 2568 + if (!sctp_err_chunk_valid(chunk)) 2569 + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 2570 + 2584 2571 return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); 2585 2572 } 2586 2573 ··· 2601 2582 2602 2583 /* See if we have an error cause code in the chunk. */ 2603 2584 len = ntohs(chunk->chunk_hdr->length); 2604 - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { 2605 - struct sctp_errhdr *err; 2606 - 2607 - sctp_walk_errors(err, chunk->chunk_hdr); 2608 - if ((void *)err != (void *)chunk->chunk_end) 2609 - return sctp_sf_pdiscard(net, ep, asoc, type, arg, 2610 - commands); 2611 - 2585 + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) 2612 2586 error = ((struct sctp_errhdr *)chunk->skb->data)->cause; 2613 - } 2614 2587 2615 2588 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); 2616 2589 /* ASSOC_FAILED will DELETE_TCB. */
+19 -1
net/tls/tls_device.c
··· 593 593 u32 seq, u64 *p_record_sn) 594 594 { 595 595 u64 record_sn = context->hint_record_sn; 596 - struct tls_record_info *info; 596 + struct tls_record_info *info, *last; 597 597 598 598 info = context->retransmit_hint; 599 599 if (!info || ··· 605 605 struct tls_record_info, list); 606 606 if (!info) 607 607 return NULL; 608 + /* send the start_marker record if seq number is before the 609 + * tls offload start marker sequence number. This record is 610 + * required to handle TCP packets which are before TLS offload 611 + * started. 612 + * And if it's not start marker, look if this seq number 613 + * belongs to the list. 614 + */ 615 + if (likely(!tls_record_is_start_marker(info))) { 616 + /* we have the first record, get the last record to see 617 + * if this seq number belongs to the list. 618 + */ 619 + last = list_last_entry(&context->records_list, 620 + struct tls_record_info, list); 621 + 622 + if (!between(seq, tls_record_start_seq(info), 623 + last->end_seq)) 624 + return NULL; 625 + } 608 626 record_sn = context->unacked_record_sn; 609 627 } 610 628
+2
net/xdp/xsk.c
··· 217 217 static void xsk_flush(struct xdp_sock *xs) 218 218 { 219 219 xskq_prod_submit(xs->rx); 220 + __xskq_cons_release(xs->umem->fq); 220 221 sock_def_readable(&xs->sk); 221 222 } 222 223 ··· 305 304 306 305 rcu_read_lock(); 307 306 list_for_each_entry_rcu(xs, &umem->xsk_list, list) { 307 + __xskq_cons_release(xs->tx); 308 308 xs->sk.sk_write_space(&xs->sk); 309 309 } 310 310 rcu_read_unlock();
+2 -1
net/xdp/xsk_queue.h
··· 271 271 { 272 272 /* To improve performance, only update local state here. 273 273 * Reflect this to global state when we get new entries 274 - * from the ring in xskq_cons_get_entries(). 274 + * from the ring in xskq_cons_get_entries() and whenever 275 + * Rx or Tx processing are completed in the NAPI loop. 275 276 */ 276 277 q->cached_cons++; 277 278 }
+7 -9
tools/include/uapi/linux/bpf.h
··· 1045 1045 * supports redirection to the egress interface, and accepts no 1046 1046 * flag at all. 1047 1047 * 1048 - * The same effect can be attained with the more generic 1049 - * **bpf_redirect_map**\ (), which requires specific maps to be 1050 - * used but offers better performance. 1048 + * The same effect can also be attained with the more generic 1049 + * **bpf_redirect_map**\ (), which uses a BPF map to store the 1050 + * redirect target instead of providing it directly to the helper. 1051 1051 * Return 1052 1052 * For XDP, the helper returns **XDP_REDIRECT** on success or 1053 1053 * **XDP_ABORTED** on error. For other program types, the values ··· 1611 1611 * the caller. Any higher bits in the *flags* argument must be 1612 1612 * unset. 1613 1613 * 1614 - * When used to redirect packets to net devices, this helper 1615 - * provides a high performance increase over **bpf_redirect**\ (). 1616 - * This is due to various implementation details of the underlying 1617 - * mechanisms, one of which is the fact that **bpf_redirect_map**\ 1618 - * () tries to send packet as a "bulk" to the device. 1614 + * See also bpf_redirect(), which only supports redirecting to an 1615 + * ifindex, but doesn't require a map to do so. 1619 1616 * Return 1620 - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. 1617 + * **XDP_REDIRECT** on success, or the value of the two lower bits 1618 + * of the **flags* argument on error. 1621 1619 * 1622 1620 * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) 1623 1621 * Description
+7 -1
tools/lib/bpf/libbpf.c
··· 24 24 #include <endian.h> 25 25 #include <fcntl.h> 26 26 #include <errno.h> 27 + #include <ctype.h> 27 28 #include <asm/unistd.h> 28 29 #include <linux/err.h> 29 30 #include <linux/kernel.h> ··· 1284 1283 static char *internal_map_name(struct bpf_object *obj, 1285 1284 enum libbpf_map_type type) 1286 1285 { 1287 - char map_name[BPF_OBJ_NAME_LEN]; 1286 + char map_name[BPF_OBJ_NAME_LEN], *p; 1288 1287 const char *sfx = libbpf_type_to_btf_name[type]; 1289 1288 int sfx_len = max((size_t)7, strlen(sfx)); 1290 1289 int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, ··· 1292 1291 1293 1292 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1294 1293 sfx_len, libbpf_type_to_btf_name[type]); 1294 + 1295 + /* sanitise map name to characters allowed by kernel */ 1296 + for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1297 + if (!isalnum(*p) && *p != '_' && *p != '.') 1298 + *p = '_'; 1295 1299 1296 1300 return strdup(map_name); 1297 1301 }
+6 -2
tools/testing/selftests/bpf/prog_tests/select_reuseport.c
··· 506 506 .pass_on_failure = 0, 507 507 }; 508 508 509 - if (type != SOCK_STREAM) 509 + if (type != SOCK_STREAM) { 510 + test__skip(); 510 511 return; 512 + } 511 513 512 514 /* 513 515 * +1 for TCP-SYN and ··· 824 822 goto out; 825 823 826 824 saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); 825 + if (saved_tcp_fo < 0) 826 + goto out; 827 827 saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL); 828 - if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0) 828 + if (saved_tcp_syncookie < 0) 829 829 goto out; 830 830 831 831 if (enable_fastopen())
+5
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
··· 3 3 4 4 #include "test_progs.h" 5 5 6 + #define TCP_REPAIR 19 /* TCP sock is under repair right now */ 7 + 8 + #define TCP_REPAIR_ON 1 9 + #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ 10 + 6 11 static int connected_socket_v4(void) 7 12 { 8 13 struct sockaddr_in addr = {
+6
tools/testing/selftests/net/fib_tests.sh
··· 910 910 check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" 911 911 log_test $? 0 "Multipath with single path via multipath attribute" 912 912 913 + # multipath with dev-only 914 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" 915 + run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1" 916 + check_route6 "2001:db8:104::/64 dev veth1 metric 1024" 917 + log_test $? 0 "Multipath with dev-only" 918 + 913 919 # route replace fails - invalid nexthop 1 914 920 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" 915 921 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+13 -12
tools/testing/selftests/net/forwarding/mirror_gre.sh
··· 63 63 { 64 64 local tundev=$1; shift 65 65 local direction=$1; shift 66 - local prot=$1; shift 67 66 local what=$1; shift 68 67 69 - local swp3mac=$(mac_get $swp3) 70 - local h3mac=$(mac_get $h3) 68 + case "$direction" in 69 + ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2) 70 + ;; 71 + egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1) 72 + ;; 73 + esac 71 74 72 75 RET=0 73 76 74 77 mirror_install $swp1 $direction $tundev "matchall $tcflags" 75 - tc filter add dev $h3 ingress pref 77 prot $prot \ 76 - flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ 77 - action pass 78 + icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" 78 79 79 - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 80 + mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 80 81 81 - tc filter del dev $h3 ingress pref 77 82 + icmp_capture_uninstall h3-${tundev} 82 83 mirror_uninstall $swp1 $direction 83 84 84 85 log_test "$direction $what: envelope MAC ($tcflags)" ··· 121 120 122 121 test_gretap_mac() 123 122 { 124 - test_span_gre_mac gt4 ingress ip "mirror to gretap" 125 - test_span_gre_mac gt4 egress ip "mirror to gretap" 123 + test_span_gre_mac gt4 ingress "mirror to gretap" 124 + test_span_gre_mac gt4 egress "mirror to gretap" 126 125 } 127 126 128 127 test_ip6gretap_mac() 129 128 { 130 - test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" 131 - test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" 129 + test_span_gre_mac gt6 ingress "mirror to ip6gretap" 130 + test_span_gre_mac gt6 egress "mirror to ip6gretap" 132 131 } 133 132 134 133 test_all()
+3 -3
tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
··· 516 516 RET=0 517 517 518 518 tc filter add dev v1 egress pref 77 prot ip \ 519 - flower ip_tos 0x40 action pass 520 - vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 521 - vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 519 + flower ip_tos 0x14 action pass 520 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x14" v1 egress 77 10 521 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x18" v1 egress 77 0 522 522 tc filter del dev v1 egress pref 77 prot ip 523 523 524 524 log_test "VXLAN: envelope TOS inheritance"
+14 -24
tools/testing/selftests/wireguard/qemu/Makefile
··· 38 38 define file_download = 39 39 $(DISTFILES_PATH)/$(1): 40 40 mkdir -p $(DISTFILES_PATH) 41 - flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' 42 - if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi 41 + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' 43 42 endef 44 43 45 44 $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) 46 - $(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) 47 45 $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) 48 46 $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) 49 47 $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) 50 48 $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) 51 49 $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) 52 50 $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) 53 - $(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) 51 + $(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) 54 52 55 53 KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) 56 54 rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) ··· 293 295 $(MAKE) -C $(IPERF_PATH) 294 296 $(STRIP) -s $@ 295 297 296 - $(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) 297 - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< 298 - touch $@ 299 - 300 - $(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) 301 - cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared 302 - $(MAKE) -C $(LIBMNL_PATH) 303 - sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc 304 - 305 298 $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) 299 + mkdir -p $(BUILD_PATH) 306 300 flock -s $<.lock tar -C $(BUILD_PATH) -xf $< 307 301 touch $@ 308 302 309 - $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) 310 - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg 303 + $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS) 304 + $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg 311 305 $(STRIP) -s $@ 312 306 313 307 $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) ··· 330 340 $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) 331 341 mkdir -p $(BUILD_PATH) 332 342 flock -s $<.lock tar -C $(BUILD_PATH) -xf $< 333 - printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk 343 + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk 334 344 printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile 335 345 touch $@ 336 346 337 - $(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) 338 - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip 339 - $(STRIP) -s $(IPROUTE2_PATH)/ip/ip 347 + $(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) 348 + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip 349 + $(STRIP) -s $@ 340 350 341 - $(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) 342 - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss 343 - $(STRIP) -s $(IPROUTE2_PATH)/misc/ss 351 + $(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) 352 + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss 353 + $(STRIP) -s $@ 344 354 345 355 $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) 346 356 mkdir -p $(BUILD_PATH) ··· 348 358 sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure 349 359 touch $@ 350 360 351 - $(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) 352 - cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include 361 + $(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS) 362 + cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include 353 363 $(MAKE) -C $(IPTABLES_PATH) 354 364 $(STRIP) -s $@ 355 365