Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

+60 -2

Documentation/devicetree/bindings/powerpc/4xx/emac.txt

··· 71 71 For Axon it can be absent, though my current driver 72 72 doesn't handle phy-address yet so for now, keep 73 73 0x00ffffff in it. 74 + - phy-handle : Used to describe configurations where a external PHY 75 + is used. Please refer to: 76 + Documentation/devicetree/bindings/net/ethernet.txt 74 77 - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec 75 78 operations (if absent the value is the same as 76 79 rx-fifo-size). For Axon, either absent or 2048. ··· 84 81 offload, phandle of the TAH device node. 85 82 - tah-channel : 1 cell, optional. If appropriate, channel used on the 86 83 TAH engine. 84 + - fixed-link : Fixed-link subnode describing a link to a non-MDIO 85 + managed entity. See 86 + Documentation/devicetree/bindings/net/fixed-link.txt 87 + for details. 88 + - mdio subnode : When the EMAC has a phy connected to its local 89 + mdio, which us supported by the kernel's network 90 + PHY library in drivers/net/phy, there must be device 91 + tree subnode with the following required properties: 92 + - #address-cells: Must be <1>. 93 + - #size-cells: Must be <0>. 87 94 88 - Example: 95 + For PHY definitions: Please refer to 96 + Documentation/devicetree/bindings/net/phy.txt and 97 + Documentation/devicetree/bindings/net/ethernet.txt 98 + 99 + Examples: 89 100 90 101 EMAC0: ethernet@40000800 { 91 102 device_type = "network"; ··· 120 103 zmii-device = <&ZMII0>; 121 104 zmii-channel = <0>; 122 105 }; 106 + 107 + EMAC1: ethernet@ef600c00 { 108 + device_type = "network"; 109 + compatible = "ibm,emac-apm821xx", "ibm,emac4sync"; 110 + interrupt-parent = <&EMAC1>; 111 + interrupts = <0 1>; 112 + #interrupt-cells = <1>; 113 + #address-cells = <0>; 114 + #size-cells = <0>; 115 + interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */ 116 + 1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>; 117 + reg = <0xef600c00 0x000000c4>; 118 + local-mac-address = [000000000000]; /* Filled in by U-Boot */ 119 + mal-device = <&MAL0>; 120 + mal-tx-channel = <0>; 121 + mal-rx-channel = <0>; 122 + cell-index = <0>; 123 + max-frame-size = <9000>; 124 + rx-fifo-size = <16384>; 125 + tx-fifo-size = <2048>; 126 + fifo-entry-size = <10>; 127 + phy-mode = "rgmii"; 128 + phy-handle = <&phy0>; 129 + phy-map = <0x00000000>; 130 + rgmii-device = <&RGMII0>; 131 + rgmii-channel = <0>; 132 + tah-device = <&TAH0>; 133 + tah-channel = <0>; 134 + has-inverted-stacr-oc; 135 + has-new-stacr-staopc; 136 + 137 + mdio { 138 + #address-cells = <1>; 139 + #size-cells = <0>; 140 + 141 + phy0: ethernet-phy@0 { 142 + compatible = "ethernet-phy-ieee802.3-c22"; 143 + reg = <0>; 144 + }; 145 + }; 146 + }; 147 + 123 148 124 149 ii) McMAL node 125 150 ··· 204 145 - revision : as provided by the RGMII new version register if 205 146 available. 206 147 For Axon: 0x0000012a 207 -

+2 -1

Documentation/networking/ip-sysctl.txt

··· 1006 1006 FALSE (router) 1007 1007 1008 1008 forwarding - BOOLEAN 1009 - Enable IP forwarding on this interface. 1009 + Enable IP forwarding on this interface. This controls whether packets 1010 + received _on_ this interface can be forwarded. 1010 1011 1011 1012 mc_forwarding - BOOLEAN 1012 1013 Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE

+5 -4

crypto/af_alg.c

··· 266 266 return err; 267 267 } 268 268 269 - int af_alg_accept(struct sock *sk, struct socket *newsock) 269 + int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) 270 270 { 271 271 struct alg_sock *ask = alg_sk(sk); 272 272 const struct af_alg_type *type; ··· 281 281 if (!type) 282 282 goto unlock; 283 283 284 - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); 284 + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern); 285 285 err = -ENOMEM; 286 286 if (!sk2) 287 287 goto unlock; ··· 323 323 } 324 324 EXPORT_SYMBOL_GPL(af_alg_accept); 325 325 326 - static int alg_accept(struct socket *sock, struct socket *newsock, int flags) 326 + static int alg_accept(struct socket *sock, struct socket *newsock, int flags, 327 + bool kern) 327 328 { 328 - return af_alg_accept(sock->sk, newsock); 329 + return af_alg_accept(sock->sk, newsock, kern); 329 330 } 330 331 331 332 static const struct proto_ops alg_proto_ops = {

+5 -4

crypto/algif_hash.c

··· 239 239 return err ?: len; 240 240 } 241 241 242 - static int hash_accept(struct socket *sock, struct socket *newsock, int flags) 242 + static int hash_accept(struct socket *sock, struct socket *newsock, int flags, 243 + bool kern) 243 244 { 244 245 struct sock *sk = sock->sk; 245 246 struct alg_sock *ask = alg_sk(sk); ··· 261 260 if (err) 262 261 return err; 263 262 264 - err = af_alg_accept(ask->parent, newsock); 263 + err = af_alg_accept(ask->parent, newsock, kern); 265 264 if (err) 266 265 return err; 267 266 ··· 379 378 } 380 379 381 380 static int hash_accept_nokey(struct socket *sock, struct socket *newsock, 382 - int flags) 381 + int flags, bool kern) 383 382 { 384 383 int err; 385 384 ··· 387 386 if (err) 388 387 return err; 389 388 390 - return hash_accept(sock, newsock, flags); 389 + return hash_accept(sock, newsock, flags, kern); 391 390 } 392 391 393 392 static struct proto_ops algif_hash_ops_nokey = {

+3

drivers/isdn/gigaset/bas-gigaset.c

··· 2317 2317 return -ENODEV; 2318 2318 } 2319 2319 2320 + if (hostif->desc.bNumEndpoints < 1) 2321 + return -ENODEV; 2322 + 2320 2323 dev_info(&udev->dev, 2321 2324 "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n", 2322 2325 __func__, le16_to_cpu(udev->descriptor.idVendor),

+2 -8

drivers/net/ethernet/amd/xgbe/xgbe-drv.c

··· 2272 2272 processed = xgbe_rx_poll(channel, budget); 2273 2273 2274 2274 /* If we processed everything, we are done */ 2275 - if (processed < budget) { 2276 - /* Turn off polling */ 2277 - napi_complete_done(napi, processed); 2278 - 2275 + if ((processed < budget) && napi_complete_done(napi, processed)) { 2279 2276 /* Enable Tx and Rx interrupts */ 2280 2277 if (pdata->channel_irq_mode) 2281 2278 xgbe_enable_rx_tx_int(pdata, channel); ··· 2314 2317 } while ((processed < budget) && (processed != last_processed)); 2315 2318 2316 2319 /* If we processed everything, we are done */ 2317 - if (processed < budget) { 2318 - /* Turn off polling */ 2319 - napi_complete_done(napi, processed); 2320 - 2320 + if ((processed < budget) && napi_complete_done(napi, processed)) { 2321 2321 /* Enable Tx and Rx interrupts */ 2322 2322 xgbe_enable_rx_tx_ints(pdata); 2323 2323 }

+1 -1

drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c

··· 213 213 if (!((1U << i) & self->msix_entry_mask)) 214 214 continue; 215 215 216 - free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]); 217 216 if (pdev->msix_enabled) 218 217 irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL); 218 + free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]); 219 219 self->msix_entry_mask &= ~(1U << i); 220 220 } 221 221 }

+30 -6

drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

··· 13292 13292 dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 13293 13293 NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA; 13294 13294 13295 - /* VF with OLD Hypervisor or old PF do not support filtering */ 13296 13295 if (IS_PF(bp)) { 13297 13296 if (chip_is_e1x) 13298 13297 bp->accept_any_vlan = true; 13299 13298 else 13300 13299 dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; 13301 - #ifdef CONFIG_BNX2X_SRIOV 13302 - } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { 13303 - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; 13304 - #endif 13305 13300 } 13301 + /* For VF we'll know whether to enable VLAN filtering after 13302 + * getting a response to CHANNEL_TLV_ACQUIRE from PF. 13303 + */ 13306 13304 13307 13305 dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; 13308 13306 dev->features |= NETIF_F_HIGHDMA; ··· 13736 13738 if (!netif_running(bp->dev)) { 13737 13739 DP(BNX2X_MSG_PTP, 13738 13740 "PTP adjfreq called while the interface is down\n"); 13739 - return -EFAULT; 13741 + return -ENETDOWN; 13740 13742 } 13741 13743 13742 13744 if (ppb < 0) { ··· 13795 13797 { 13796 13798 struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); 13797 13799 13800 + if (!netif_running(bp->dev)) { 13801 + DP(BNX2X_MSG_PTP, 13802 + "PTP adjtime called while the interface is down\n"); 13803 + return -ENETDOWN; 13804 + } 13805 + 13798 13806 DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta); 13799 13807 13800 13808 timecounter_adjtime(&bp->timecounter, delta); ··· 13812 13808 { 13813 13809 struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); 13814 13810 u64 ns; 13811 + 13812 + if (!netif_running(bp->dev)) { 13813 + DP(BNX2X_MSG_PTP, 13814 + "PTP gettime called while the interface is down\n"); 13815 + return -ENETDOWN; 13816 + } 13815 13817 13816 13818 ns = timecounter_read(&bp->timecounter); 13817 13819 ··· 13833 13823 { 13834 13824 struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); 13835 13825 u64 ns; 13826 + 13827 + if (!netif_running(bp->dev)) { 13828 + DP(BNX2X_MSG_PTP, 13829 + "PTP settime called while the interface is down\n"); 13830 + return -ENETDOWN; 13831 + } 13836 13832 13837 13833 ns = timespec64_to_ns(ts); 13838 13834 ··· 14007 13991 rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count); 14008 13992 if (rc) 14009 13993 goto init_one_freemem; 13994 + 13995 + #ifdef CONFIG_BNX2X_SRIOV 13996 + /* VF with OLD Hypervisor or old PF do not support filtering */ 13997 + if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { 13998 + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; 13999 + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 14000 + } 14001 + #endif 14010 14002 } 14011 14003 14012 14004 /* Enable SRIOV if capability found in configuration space */

+16 -8

drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

··· 434 434 435 435 /* Add/Remove the filter */ 436 436 rc = bnx2x_config_vlan_mac(bp, &ramrod); 437 - if (rc && rc != -EEXIST) { 437 + if (rc == -EEXIST) 438 + return 0; 439 + if (rc) { 438 440 BNX2X_ERR("Failed to %s %s\n", 439 441 filter->add ? "add" : "delete", 440 442 (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ? ··· 445 443 "MAC" : "VLAN"); 446 444 return rc; 447 445 } 446 + 447 + filter->applied = true; 448 448 449 449 return 0; 450 450 } ··· 473 469 /* Rollback if needed */ 474 470 if (i != filters->count) { 475 471 BNX2X_ERR("Managed only %d/%d filters - rolling back\n", 476 - i, filters->count + 1); 472 + i, filters->count); 477 473 while (--i >= 0) { 474 + if (!filters->filters[i].applied) 475 + continue; 478 476 filters->filters[i].add = !filters->filters[i].add; 479 477 bnx2x_vf_mac_vlan_config(bp, vf, qid, 480 478 &filters->filters[i], ··· 1905 1899 continue; 1906 1900 } 1907 1901 1908 - DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid); 1902 + DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS), 1903 + "add addresses for vf %d\n", vf->abs_vfid); 1909 1904 for_each_vfq(vf, j) { 1910 1905 struct bnx2x_vf_queue *rxq = vfq_get(vf, j); 1911 1906 ··· 1927 1920 cpu_to_le32(U64_HI(q_stats_addr)); 1928 1921 cur_query_entry->address.lo = 1929 1922 cpu_to_le32(U64_LO(q_stats_addr)); 1930 - DP(BNX2X_MSG_IOV, 1931 - "added address %x %x for vf %d queue %d client %d\n", 1932 - cur_query_entry->address.hi, 1933 - cur_query_entry->address.lo, cur_query_entry->funcID, 1934 - j, cur_query_entry->index); 1923 + DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS), 1924 + "added address %x %x for vf %d queue %d client %d\n", 1925 + cur_query_entry->address.hi, 1926 + cur_query_entry->address.lo, 1927 + cur_query_entry->funcID, 1928 + j, cur_query_entry->index); 1935 1929 cur_query_entry++; 1936 1930 cur_data_offset += sizeof(struct per_queue_stats); 1937 1931 stats_count++;

+1

drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h

··· 114 114 (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/ 115 115 116 116 bool add; 117 + bool applied; 117 118 u8 *mac; 118 119 u16 vid; 119 120 };

+28 -12

drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

··· 868 868 struct bnx2x *bp = netdev_priv(dev); 869 869 struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; 870 870 struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; 871 - int rc, i = 0; 871 + int rc = 0, i = 0; 872 872 struct netdev_hw_addr *ha; 873 873 874 874 if (bp->state != BNX2X_STATE_OPEN) { ··· 883 883 /* Get Rx mode requested */ 884 884 DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags); 885 885 886 + /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */ 887 + if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) { 888 + DP(NETIF_MSG_IFUP, 889 + "VF supports not more than %d multicast MAC addresses\n", 890 + PFVF_MAX_MULTICAST_PER_VF); 891 + rc = -EINVAL; 892 + goto out; 893 + } 894 + 886 895 netdev_for_each_mc_addr(ha, dev) { 887 896 DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n", 888 897 bnx2x_mc_addr(ha)); 889 898 memcpy(req->multicast[i], bnx2x_mc_addr(ha), ETH_ALEN); 890 899 i++; 891 - } 892 - 893 - /* We support four PFVF_MAX_MULTICAST_PER_VF mcast 894 - * addresses tops 895 - */ 896 - if (i >= PFVF_MAX_MULTICAST_PER_VF) { 897 - DP(NETIF_MSG_IFUP, 898 - "VF supports not more than %d multicast MAC addresses\n", 899 - PFVF_MAX_MULTICAST_PER_VF); 900 - return -EINVAL; 901 900 } 902 901 903 902 req->n_multicast = i; ··· 923 924 out: 924 925 bnx2x_vfpf_finalize(bp, &req->first_tlv); 925 926 926 - return 0; 927 + return rc; 927 928 } 928 929 929 930 /* request pf to add a vlan for the vf */ ··· 1770 1771 1771 1772 if (fl) { 1772 1773 /* set mac list */ 1774 + rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl, 1775 + msg->vf_qid, 1776 + false); 1777 + if (rc) 1778 + goto op_err; 1779 + } 1780 + 1781 + /* build vlan list */ 1782 + fl = NULL; 1783 + 1784 + rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl, 1785 + VFPF_VLAN_FILTER); 1786 + if (rc) 1787 + goto op_err; 1788 + 1789 + if (fl) { 1790 + /* set vlan list */ 1773 1791 rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl, 1774 1792 msg->vf_qid, 1775 1793 false);

+18 -7

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 4465 4465 vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK; 4466 4466 } 4467 4467 #endif 4468 + if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) & 4469 + FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED)) 4470 + bp->flags |= BNXT_FLAG_FW_LLDP_AGENT; 4471 + 4468 4472 switch (resp->port_partition_type) { 4469 4473 case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0: 4470 4474 case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5: ··· 5511 5507 bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) & 5512 5508 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK; 5513 5509 } 5514 - link_info->support_auto_speeds = 5515 - le16_to_cpu(resp->supported_speeds_auto_mode); 5510 + if (resp->supported_speeds_auto_mode) 5511 + link_info->support_auto_speeds = 5512 + le16_to_cpu(resp->supported_speeds_auto_mode); 5516 5513 5517 5514 hwrm_phy_qcaps_exit: 5518 5515 mutex_unlock(&bp->hwrm_cmd_lock); ··· 6500 6495 if (!silent) 6501 6496 bnxt_dbg_dump_states(bp); 6502 6497 if (netif_running(bp->dev)) { 6498 + int rc; 6499 + 6500 + if (!silent) 6501 + bnxt_ulp_stop(bp); 6503 6502 bnxt_close_nic(bp, false, false); 6504 - bnxt_open_nic(bp, false, false); 6503 + rc = bnxt_open_nic(bp, false, false); 6504 + if (!silent && !rc) 6505 + bnxt_ulp_start(bp); 6505 6506 } 6506 6507 } 6507 6508 ··· 7455 7444 if (rc) 7456 7445 goto init_err_pci_clean; 7457 7446 7447 + rc = bnxt_hwrm_func_reset(bp); 7448 + if (rc) 7449 + goto init_err_pci_clean; 7450 + 7458 7451 bnxt_hwrm_fw_set_time(bp); 7459 7452 7460 7453 dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | ··· 7566 7551 bp->flags |= BNXT_FLAG_STRIP_VLAN; 7567 7552 7568 7553 rc = bnxt_probe_phy(bp); 7569 - if (rc) 7570 - goto init_err_pci_clean; 7571 - 7572 - rc = bnxt_hwrm_func_reset(bp); 7573 7554 if (rc) 7574 7555 goto init_err_pci_clean; 7575 7556

+1

drivers/net/ethernet/broadcom/bnxt/bnxt.h

··· 993 993 BNXT_FLAG_ROCEV2_CAP) 994 994 #define BNXT_FLAG_NO_AGG_RINGS 0x20000 995 995 #define BNXT_FLAG_RX_PAGE_MODE 0x40000 996 + #define BNXT_FLAG_FW_LLDP_AGENT 0x80000 996 997 #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000 997 998 998 999 #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \

+1 -1

drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c

··· 474 474 return; 475 475 476 476 bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE; 477 - if (BNXT_PF(bp)) 477 + if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT)) 478 478 bp->dcbx_cap |= DCB_CAP_DCBX_HOST; 479 479 else 480 480 bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;

+147 -59

drivers/net/ethernet/broadcom/genet/bcmgenet.c

··· 1 1 /* 2 2 * Broadcom GENET (Gigabit Ethernet) controller driver 3 3 * 4 - * Copyright (c) 2014 Broadcom Corporation 4 + * Copyright (c) 2014-2017 Broadcom 5 5 * 6 6 * This program is free software; you can redistribute it and/or modify 7 7 * it under the terms of the GNU General Public License version 2 as ··· 450 450 genet_dma_ring_regs[r]); 451 451 } 452 452 453 + static int bcmgenet_begin(struct net_device *dev) 454 + { 455 + struct bcmgenet_priv *priv = netdev_priv(dev); 456 + 457 + /* Turn on the clock */ 458 + return clk_prepare_enable(priv->clk); 459 + } 460 + 461 + static void bcmgenet_complete(struct net_device *dev) 462 + { 463 + struct bcmgenet_priv *priv = netdev_priv(dev); 464 + 465 + /* Turn off the clock */ 466 + clk_disable_unprepare(priv->clk); 467 + } 468 + 453 469 static int bcmgenet_get_link_ksettings(struct net_device *dev, 454 470 struct ethtool_link_ksettings *cmd) 455 471 { ··· 794 778 STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes), 795 779 /* Misc UniMAC counters */ 796 780 STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, 797 - UMAC_RBUF_OVFL_CNT), 798 - STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT), 781 + UMAC_RBUF_OVFL_CNT_V1), 782 + STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, 783 + UMAC_RBUF_ERR_CNT_V1), 799 784 STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT), 800 785 STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), 801 786 STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), ··· 838 821 } 839 822 } 840 823 824 + static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset) 825 + { 826 + u16 new_offset; 827 + u32 val; 828 + 829 + switch (offset) { 830 + case UMAC_RBUF_OVFL_CNT_V1: 831 + if (GENET_IS_V2(priv)) 832 + new_offset = RBUF_OVFL_CNT_V2; 833 + else 834 + new_offset = RBUF_OVFL_CNT_V3PLUS; 835 + 836 + val = bcmgenet_rbuf_readl(priv, new_offset); 837 + /* clear if overflowed */ 838 + if (val == ~0) 839 + bcmgenet_rbuf_writel(priv, 0, new_offset); 840 + break; 841 + case UMAC_RBUF_ERR_CNT_V1: 842 + if (GENET_IS_V2(priv)) 843 + new_offset = RBUF_ERR_CNT_V2; 844 + else 845 + new_offset = RBUF_ERR_CNT_V3PLUS; 846 + 847 + val = bcmgenet_rbuf_readl(priv, new_offset); 848 + /* clear if overflowed */ 849 + if (val == ~0) 850 + bcmgenet_rbuf_writel(priv, 0, new_offset); 851 + break; 852 + default: 853 + val = bcmgenet_umac_readl(priv, offset); 854 + /* clear if overflowed */ 855 + if (val == ~0) 856 + bcmgenet_umac_writel(priv, 0, offset); 857 + break; 858 + } 859 + 860 + return val; 861 + } 862 + 841 863 static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) 842 864 { 843 865 int i, j = 0; ··· 892 836 case BCMGENET_STAT_NETDEV: 893 837 case BCMGENET_STAT_SOFT: 894 838 continue; 895 - case BCMGENET_STAT_MIB_RX: 896 - case BCMGENET_STAT_MIB_TX: 897 839 case BCMGENET_STAT_RUNT: 898 - if (s->type != BCMGENET_STAT_MIB_RX) 899 - offset = BCMGENET_STAT_OFFSET; 840 + offset += BCMGENET_STAT_OFFSET; 841 + /* fall through */ 842 + case BCMGENET_STAT_MIB_TX: 843 + offset += BCMGENET_STAT_OFFSET; 844 + /* fall through */ 845 + case BCMGENET_STAT_MIB_RX: 900 846 val = bcmgenet_umac_readl(priv, 901 847 UMAC_MIB_START + j + offset); 848 + offset = 0; /* Reset Offset */ 902 849 break; 903 850 case BCMGENET_STAT_MISC: 904 - val = bcmgenet_umac_readl(priv, s->reg_offset); 905 - /* clear if overflowed */ 906 - if (val == ~0) 907 - bcmgenet_umac_writel(priv, 0, s->reg_offset); 851 + if (GENET_IS_V1(priv)) { 852 + val = bcmgenet_umac_readl(priv, s->reg_offset); 853 + /* clear if overflowed */ 854 + if (val == ~0) 855 + bcmgenet_umac_writel(priv, 0, 856 + s->reg_offset); 857 + } else { 858 + val = bcmgenet_update_stat_misc(priv, 859 + s->reg_offset); 860 + } 908 861 break; 909 862 } 910 863 ··· 1038 973 1039 974 /* standard ethtool support functions. */ 1040 975 static const struct ethtool_ops bcmgenet_ethtool_ops = { 976 + .begin = bcmgenet_begin, 977 + .complete = bcmgenet_complete, 1041 978 .get_strings = bcmgenet_get_strings, 1042 979 .get_sset_count = bcmgenet_get_sset_count, 1043 980 .get_ethtool_stats = bcmgenet_get_ethtool_stats, ··· 1234 1167 struct bcmgenet_priv *priv = netdev_priv(dev); 1235 1168 struct device *kdev = &priv->pdev->dev; 1236 1169 struct enet_cb *tx_cb_ptr; 1237 - struct netdev_queue *txq; 1238 1170 unsigned int pkts_compl = 0; 1239 1171 unsigned int bytes_compl = 0; 1240 1172 unsigned int c_index; ··· 1285 1219 dev->stats.tx_packets += pkts_compl; 1286 1220 dev->stats.tx_bytes += bytes_compl; 1287 1221 1288 - txq = netdev_get_tx_queue(dev, ring->queue); 1289 - netdev_tx_completed_queue(txq, pkts_compl, bytes_compl); 1290 - 1291 - if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { 1292 - if (netif_tx_queue_stopped(txq)) 1293 - netif_tx_wake_queue(txq); 1294 - } 1222 + netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue), 1223 + pkts_compl, bytes_compl); 1295 1224 1296 1225 return pkts_compl; 1297 1226 } ··· 1309 1248 struct bcmgenet_tx_ring *ring = 1310 1249 container_of(napi, struct bcmgenet_tx_ring, napi); 1311 1250 unsigned int work_done = 0; 1251 + struct netdev_queue *txq; 1252 + unsigned long flags; 1312 1253 1313 - work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring); 1254 + spin_lock_irqsave(&ring->lock, flags); 1255 + work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring); 1256 + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { 1257 + txq = netdev_get_tx_queue(ring->priv->dev, ring->queue); 1258 + netif_tx_wake_queue(txq); 1259 + } 1260 + spin_unlock_irqrestore(&ring->lock, flags); 1314 1261 1315 1262 if (work_done == 0) { 1316 1263 napi_complete(napi); ··· 2526 2457 /* Interrupt bottom half */ 2527 2458 static void bcmgenet_irq_task(struct work_struct *work) 2528 2459 { 2460 + unsigned long flags; 2461 + unsigned int status; 2529 2462 struct bcmgenet_priv *priv = container_of( 2530 2463 work, struct bcmgenet_priv, bcmgenet_irq_work); 2531 2464 2532 2465 netif_dbg(priv, intr, priv->dev, "%s\n", __func__); 2533 2466 2534 - if (priv->irq0_stat & UMAC_IRQ_MPD_R) { 2535 - priv->irq0_stat &= ~UMAC_IRQ_MPD_R; 2467 + spin_lock_irqsave(&priv->lock, flags); 2468 + status = priv->irq0_stat; 2469 + priv->irq0_stat = 0; 2470 + spin_unlock_irqrestore(&priv->lock, flags); 2471 + 2472 + if (status & UMAC_IRQ_MPD_R) { 2536 2473 netif_dbg(priv, wol, priv->dev, 2537 2474 "magic packet detected, waking up\n"); 2538 2475 bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); 2539 2476 } 2540 2477 2541 2478 /* Link UP/DOWN event */ 2542 - if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { 2479 + if (status & UMAC_IRQ_LINK_EVENT) 2543 2480 phy_mac_interrupt(priv->phydev, 2544 - !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); 2545 - priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; 2546 - } 2481 + !!(status & UMAC_IRQ_LINK_UP)); 2547 2482 } 2548 2483 2549 2484 /* bcmgenet_isr1: handle Rx and Tx priority queues */ ··· 2556 2483 struct bcmgenet_priv *priv = dev_id; 2557 2484 struct bcmgenet_rx_ring *rx_ring; 2558 2485 struct bcmgenet_tx_ring *tx_ring; 2559 - unsigned int index; 2486 + unsigned int index, status; 2560 2487 2561 - /* Save irq status for bottom-half processing. */ 2562 - priv->irq1_stat = 2563 - bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & 2488 + /* Read irq status */ 2489 + status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & 2564 2490 ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); 2565 2491 2566 2492 /* clear interrupts */ 2567 - bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); 2493 + bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR); 2568 2494 2569 2495 netif_dbg(priv, intr, priv->dev, 2570 - "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); 2496 + "%s: IRQ=0x%x\n", __func__, status); 2571 2497 2572 2498 /* Check Rx priority queue interrupts */ 2573 2499 for (index = 0; index < priv->hw_params->rx_queues; index++) { 2574 - if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) 2500 + if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) 2575 2501 continue; 2576 2502 2577 2503 rx_ring = &priv->rx_rings[index]; ··· 2583 2511 2584 2512 /* Check Tx priority queue interrupts */ 2585 2513 for (index = 0; index < priv->hw_params->tx_queues; index++) { 2586 - if (!(priv->irq1_stat & BIT(index))) 2514 + if (!(status & BIT(index))) 2587 2515 continue; 2588 2516 2589 2517 tx_ring = &priv->tx_rings[index]; ··· 2603 2531 struct bcmgenet_priv *priv = dev_id; 2604 2532 struct bcmgenet_rx_ring *rx_ring; 2605 2533 struct bcmgenet_tx_ring *tx_ring; 2534 + unsigned int status; 2535 + unsigned long flags; 2606 2536 2607 - /* Save irq status for bottom-half processing. */ 2608 - priv->irq0_stat = 2609 - bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & 2537 + /* Read irq status */ 2538 + status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & 2610 2539 ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); 2611 2540 2612 2541 /* clear interrupts */ 2613 - bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR); 2542 + bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR); 2614 2543 2615 2544 netif_dbg(priv, intr, priv->dev, 2616 - "IRQ=0x%x\n", priv->irq0_stat); 2545 + "IRQ=0x%x\n", status); 2617 2546 2618 - if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) { 2547 + if (status & UMAC_IRQ_RXDMA_DONE) { 2619 2548 rx_ring = &priv->rx_rings[DESC_INDEX]; 2620 2549 2621 2550 if (likely(napi_schedule_prep(&rx_ring->napi))) { ··· 2625 2552 } 2626 2553 } 2627 2554 2628 - if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) { 2555 + if (status & UMAC_IRQ_TXDMA_DONE) { 2629 2556 tx_ring = &priv->tx_rings[DESC_INDEX]; 2630 2557 2631 2558 if (likely(napi_schedule_prep(&tx_ring->napi))) { ··· 2634 2561 } 2635 2562 } 2636 2563 2637 - if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | 2638 - UMAC_IRQ_PHY_DET_F | 2639 - UMAC_IRQ_LINK_EVENT | 2640 - UMAC_IRQ_HFB_SM | 2641 - UMAC_IRQ_HFB_MM | 2642 - UMAC_IRQ_MPD_R)) { 2643 - /* all other interested interrupts handled in bottom half */ 2644 - schedule_work(&priv->bcmgenet_irq_work); 2564 + if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && 2565 + status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { 2566 + wake_up(&priv->wq); 2645 2567 } 2646 2568 2647 - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && 2648 - priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { 2649 - priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); 2650 - wake_up(&priv->wq); 2569 + /* all other interested interrupts handled in bottom half */ 2570 + status &= (UMAC_IRQ_LINK_EVENT | 2571 + UMAC_IRQ_MPD_R); 2572 + if (status) { 2573 + /* Save irq status for bottom-half processing. */ 2574 + spin_lock_irqsave(&priv->lock, flags); 2575 + priv->irq0_stat |= status; 2576 + spin_unlock_irqrestore(&priv->lock, flags); 2577 + 2578 + schedule_work(&priv->bcmgenet_irq_work); 2651 2579 } 2652 2580 2653 2581 return IRQ_HANDLED; ··· 2875 2801 err_fini_dma: 2876 2802 bcmgenet_fini_dma(priv); 2877 2803 err_clk_disable: 2804 + if (priv->internal_phy) 2805 + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); 2878 2806 clk_disable_unprepare(priv->clk); 2879 2807 return ret; 2880 2808 } ··· 3253 3177 */ 3254 3178 gphy_rev = reg & 0xffff; 3255 3179 3180 + /* This is reserved so should require special treatment */ 3181 + if (gphy_rev == 0 || gphy_rev == 0x01ff) { 3182 + pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); 3183 + return; 3184 + } 3185 + 3256 3186 /* This is the good old scheme, just GPHY major, no minor nor patch */ 3257 3187 if ((gphy_rev & 0xf0) != 0) 3258 3188 priv->gphy_rev = gphy_rev << 8; ··· 3266 3184 /* This is the new scheme, GPHY major rolls over with 0x10 = rev G0 */ 3267 3185 else if ((gphy_rev & 0xff00) != 0) 3268 3186 priv->gphy_rev = gphy_rev; 3269 - 3270 - /* This is reserved so should require special treatment */ 3271 - else if (gphy_rev == 0 || gphy_rev == 0x01ff) { 3272 - pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); 3273 - return; 3274 - } 3275 3187 3276 3188 #ifdef CONFIG_PHYS_ADDR_T_64BIT 3277 3189 if (!(params->flags & GENET_HAS_40BITS)) ··· 3309 3233 const void *macaddr; 3310 3234 struct resource *r; 3311 3235 int err = -EIO; 3236 + const char *phy_mode_str; 3312 3237 3313 3238 /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */ 3314 3239 dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1, ··· 3352 3275 err = PTR_ERR(priv->base); 3353 3276 goto err; 3354 3277 } 3278 + 3279 + spin_lock_init(&priv->lock); 3355 3280 3356 3281 SET_NETDEV_DEV(dev, &pdev->dev); 3357 3282 dev_set_drvdata(&pdev->dev, dev); ··· 3416 3337 dev_warn(&priv->pdev->dev, "failed to get enet-eee clock\n"); 3417 3338 priv->clk_eee = NULL; 3418 3339 } 3340 + 3341 + /* If this is an internal GPHY, power it on now, before UniMAC is 3342 + * brought out of reset as absolutely no UniMAC activity is allowed 3343 + */ 3344 + if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) && 3345 + !strcasecmp(phy_mode_str, "internal")) 3346 + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); 3419 3347 3420 3348 err = reset_umac(priv); 3421 3349 if (err) ··· 3588 3502 return 0; 3589 3503 3590 3504 out_clk_disable: 3505 + if (priv->internal_phy) 3506 + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); 3591 3507 clk_disable_unprepare(priv->clk); 3592 3508 return ret; 3593 3509 }

+11 -5

drivers/net/ethernet/broadcom/genet/bcmgenet.h

··· 1 1 /* 2 - * Copyright (c) 2014 Broadcom Corporation 2 + * Copyright (c) 2014-2017 Broadcom 3 3 * 4 4 * This program is free software; you can redistribute it and/or modify 5 5 * it under the terms of the GNU General Public License version 2 as ··· 214 214 #define MDIO_REG_SHIFT 16 215 215 #define MDIO_REG_MASK 0x1F 216 216 217 - #define UMAC_RBUF_OVFL_CNT 0x61C 217 + #define UMAC_RBUF_OVFL_CNT_V1 0x61C 218 + #define RBUF_OVFL_CNT_V2 0x80 219 + #define RBUF_OVFL_CNT_V3PLUS 0x94 218 220 219 221 #define UMAC_MPD_CTRL 0x620 220 222 #define MPD_EN (1 << 0) ··· 226 224 227 225 #define UMAC_MPD_PW_MS 0x624 228 226 #define UMAC_MPD_PW_LS 0x628 229 - #define UMAC_RBUF_ERR_CNT 0x634 227 + #define UMAC_RBUF_ERR_CNT_V1 0x634 228 + #define RBUF_ERR_CNT_V2 0x84 229 + #define RBUF_ERR_CNT_V3PLUS 0x98 230 230 #define UMAC_MDF_ERR_CNT 0x638 231 231 #define UMAC_MDF_CTRL 0x650 232 232 #define UMAC_MDF_ADDR 0x654 ··· 623 619 struct work_struct bcmgenet_irq_work; 624 620 int irq0; 625 621 int irq1; 626 - unsigned int irq0_stat; 627 - unsigned int irq1_stat; 628 622 int wol_irq; 629 623 bool wol_irq_disabled; 624 + 625 + /* shared status */ 626 + spinlock_t lock; 627 + unsigned int irq0_stat; 630 628 631 629 /* HW descriptors/checksum variables */ 632 630 bool desc_64b_en;

+55 -55

drivers/net/ethernet/cavium/liquidio/lio_main.c

··· 152 152 */ 153 153 struct octeon_sg_entry *sg; 154 154 155 - u64 sg_dma_ptr; 155 + dma_addr_t sg_dma_ptr; 156 156 }; 157 157 158 158 struct handshake { ··· 734 734 struct octnic_gather *g; 735 735 int i; 736 736 737 + kfree(lio->glist_lock); 738 + lio->glist_lock = NULL; 739 + 737 740 if (!lio->glist) 738 741 return; 739 742 ··· 744 741 do { 745 742 g = (struct octnic_gather *) 746 743 list_delete_head(&lio->glist[i]); 747 - if (g) { 748 - if (g->sg) { 749 - dma_unmap_single(&lio->oct_dev-> 750 - pci_dev->dev, 751 - g->sg_dma_ptr, 752 - g->sg_size, 753 - DMA_TO_DEVICE); 754 - kfree((void *)((unsigned long)g->sg - 755 - g->adjust)); 756 - } 744 + if (g) 757 745 kfree(g); 758 - } 759 746 } while (g); 747 + 748 + if (lio->glists_virt_base && lio->glists_virt_base[i]) { 749 + lio_dma_free(lio->oct_dev, 750 + lio->glist_entry_size * lio->tx_qsize, 751 + lio->glists_virt_base[i], 752 + lio->glists_dma_base[i]); 753 + } 760 754 } 761 755 762 - kfree((void *)lio->glist); 763 - kfree((void *)lio->glist_lock); 756 + kfree(lio->glists_virt_base); 757 + lio->glists_virt_base = NULL; 758 + 759 + kfree(lio->glists_dma_base); 760 + lio->glists_dma_base = NULL; 761 + 762 + kfree(lio->glist); 763 + lio->glist = NULL; 764 764 } 765 765 766 766 /** ··· 778 772 lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock), 779 773 GFP_KERNEL); 780 774 if (!lio->glist_lock) 781 - return 1; 775 + return -ENOMEM; 782 776 783 777 lio->glist = kcalloc(num_iqs, sizeof(*lio->glist), 784 778 GFP_KERNEL); 785 779 if (!lio->glist) { 786 - kfree((void *)lio->glist_lock); 787 - return 1; 780 + kfree(lio->glist_lock); 781 + lio->glist_lock = NULL; 782 + return -ENOMEM; 783 + } 784 + 785 + lio->glist_entry_size = 786 + ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); 787 + 788 + /* allocate memory to store virtual and dma base address of 789 + * per glist consistent memory 790 + */ 791 + lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), 792 + GFP_KERNEL); 793 + lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), 794 + GFP_KERNEL); 795 + 796 + if (!lio->glists_virt_base || !lio->glists_dma_base) { 797 + delete_glists(lio); 798 + return -ENOMEM; 788 799 } 789 800 790 801 for (i = 0; i < num_iqs; i++) { ··· 811 788 812 789 INIT_LIST_HEAD(&lio->glist[i]); 813 790 791 + lio->glists_virt_base[i] = 792 + lio_dma_alloc(oct, 793 + lio->glist_entry_size * lio->tx_qsize, 794 + &lio->glists_dma_base[i]); 795 + 796 + if (!lio->glists_virt_base[i]) { 797 + delete_glists(lio); 798 + return -ENOMEM; 799 + } 800 + 814 801 for (j = 0; j < lio->tx_qsize; j++) { 815 802 g = kzalloc_node(sizeof(*g), GFP_KERNEL, 816 803 numa_node); ··· 829 796 if (!g) 830 797 break; 831 798 832 - g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * 833 - OCT_SG_ENTRY_SIZE); 799 + g->sg = lio->glists_virt_base[i] + 800 + (j * lio->glist_entry_size); 834 801 835 - g->sg = kmalloc_node(g->sg_size + 8, 836 - GFP_KERNEL, numa_node); 837 - if (!g->sg) 838 - g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL); 839 - if (!g->sg) { 840 - kfree(g); 841 - break; 842 - } 843 - 844 - /* The gather component should be aligned on 64-bit 845 - * boundary 846 - */ 847 - if (((unsigned long)g->sg) & 7) { 848 - g->adjust = 8 - (((unsigned long)g->sg) & 7); 849 - g->sg = (struct octeon_sg_entry *) 850 - ((unsigned long)g->sg + g->adjust); 851 - } 852 - g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev, 853 - g->sg, g->sg_size, 854 - DMA_TO_DEVICE); 855 - if (dma_mapping_error(&oct->pci_dev->dev, 856 - g->sg_dma_ptr)) { 857 - kfree((void *)((unsigned long)g->sg - 858 - g->adjust)); 859 - kfree(g); 860 - break; 861 - } 802 + g->sg_dma_ptr = lio->glists_dma_base[i] + 803 + (j * lio->glist_entry_size); 862 804 863 805 list_add_tail(&g->list, &lio->glist[i]); 864 806 } 865 807 866 808 if (j != lio->tx_qsize) { 867 809 delete_glists(lio); 868 - return 1; 810 + return -ENOMEM; 869 811 } 870 812 } 871 813 ··· 1893 1885 i++; 1894 1886 } 1895 1887 1896 - dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev, 1897 - g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE); 1898 - 1899 1888 iq = skb_iq(lio, skb); 1900 1889 spin_lock(&lio->glist_lock[iq]); 1901 1890 list_add_tail(&g->list, &lio->glist[iq]); ··· 1937 1932 frag->size, DMA_TO_DEVICE); 1938 1933 i++; 1939 1934 } 1940 - 1941 - dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev, 1942 - g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE); 1943 1935 1944 1936 iq = skb_iq(lio, skb); 1945 1937 ··· 3275 3273 i++; 3276 3274 } 3277 3275 3278 - dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr, 3279 - g->sg_size, DMA_TO_DEVICE); 3280 3276 dptr = g->sg_dma_ptr; 3281 3277 3282 3278 if (OCTEON_CN23XX_PF(oct))

+55 -49

drivers/net/ethernet/cavium/liquidio/lio_vf_main.c

··· 108 108 * received from the IP layer. 109 109 */ 110 110 struct octeon_sg_entry *sg; 111 + 112 + dma_addr_t sg_dma_ptr; 111 113 }; 112 114 113 115 struct octeon_device_priv { ··· 492 490 struct octnic_gather *g; 493 491 int i; 494 492 493 + kfree(lio->glist_lock); 494 + lio->glist_lock = NULL; 495 + 495 496 if (!lio->glist) 496 497 return; 497 498 ··· 502 497 do { 503 498 g = (struct octnic_gather *) 504 499 list_delete_head(&lio->glist[i]); 505 - if (g) { 506 - if (g->sg) 507 - kfree((void *)((unsigned long)g->sg - 508 - g->adjust)); 500 + if (g) 509 501 kfree(g); 510 - } 511 502 } while (g); 503 + 504 + if (lio->glists_virt_base && lio->glists_virt_base[i]) { 505 + lio_dma_free(lio->oct_dev, 506 + lio->glist_entry_size * lio->tx_qsize, 507 + lio->glists_virt_base[i], 508 + lio->glists_dma_base[i]); 509 + } 512 510 } 513 511 512 + kfree(lio->glists_virt_base); 513 + lio->glists_virt_base = NULL; 514 + 515 + kfree(lio->glists_dma_base); 516 + lio->glists_dma_base = NULL; 517 + 514 518 kfree(lio->glist); 515 - kfree(lio->glist_lock); 519 + lio->glist = NULL; 516 520 } 517 521 518 522 /** ··· 536 522 lio->glist_lock = 537 523 kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL); 538 524 if (!lio->glist_lock) 539 - return 1; 525 + return -ENOMEM; 540 526 541 527 lio->glist = 542 528 kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL); 543 529 if (!lio->glist) { 544 530 kfree(lio->glist_lock); 545 - return 1; 531 + lio->glist_lock = NULL; 532 + return -ENOMEM; 533 + } 534 + 535 + lio->glist_entry_size = 536 + ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); 537 + 538 + /* allocate memory to store virtual and dma base address of 539 + * per glist consistent memory 540 + */ 541 + lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), 542 + GFP_KERNEL); 543 + lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), 544 + GFP_KERNEL); 545 + 546 + if (!lio->glists_virt_base || !lio->glists_dma_base) { 547 + delete_glists(lio); 548 + return -ENOMEM; 546 549 } 547 550 548 551 for (i = 0; i < num_iqs; i++) { ··· 567 536 568 537 INIT_LIST_HEAD(&lio->glist[i]); 569 538 539 + lio->glists_virt_base[i] = 540 + lio_dma_alloc(lio->oct_dev, 541 + lio->glist_entry_size * lio->tx_qsize, 542 + &lio->glists_dma_base[i]); 543 + 544 + if (!lio->glists_virt_base[i]) { 545 + delete_glists(lio); 546 + return -ENOMEM; 547 + } 548 + 570 549 for (j = 0; j < lio->tx_qsize; j++) { 571 550 g = kzalloc(sizeof(*g), GFP_KERNEL); 572 551 if (!g) 573 552 break; 574 553 575 - g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * 576 - OCT_SG_ENTRY_SIZE); 554 + g->sg = lio->glists_virt_base[i] + 555 + (j * lio->glist_entry_size); 577 556 578 - g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL); 579 - if (!g->sg) { 580 - kfree(g); 581 - break; 582 - } 557 + g->sg_dma_ptr = lio->glists_dma_base[i] + 558 + (j * lio->glist_entry_size); 583 559 584 - /* The gather component should be aligned on 64-bit 585 - * boundary 586 - */ 587 - if (((unsigned long)g->sg) & 7) { 588 - g->adjust = 8 - (((unsigned long)g->sg) & 7); 589 - g->sg = (struct octeon_sg_entry *) 590 - ((unsigned long)g->sg + g->adjust); 591 - } 592 560 list_add_tail(&g->list, &lio->glist[i]); 593 561 } 594 562 595 563 if (j != lio->tx_qsize) { 596 564 delete_glists(lio); 597 - return 1; 565 + return -ENOMEM; 598 566 } 599 567 } 600 568 ··· 1354 1324 i++; 1355 1325 } 1356 1326 1357 - dma_unmap_single(&lio->oct_dev->pci_dev->dev, 1358 - finfo->dptr, g->sg_size, 1359 - DMA_TO_DEVICE); 1360 - 1361 1327 iq = skb_iq(lio, skb); 1362 1328 1363 1329 spin_lock(&lio->glist_lock[iq]); ··· 1399 1373 frag->size, DMA_TO_DEVICE); 1400 1374 i++; 1401 1375 } 1402 - 1403 - dma_unmap_single(&lio->oct_dev->pci_dev->dev, 1404 - finfo->dptr, g->sg_size, 1405 - DMA_TO_DEVICE); 1406 1376 1407 1377 iq = skb_iq(lio, skb); 1408 1378 ··· 2404 2382 i++; 2405 2383 } 2406 2384 2407 - dptr = dma_map_single(&oct->pci_dev->dev, 2408 - g->sg, g->sg_size, 2409 - DMA_TO_DEVICE); 2410 - if (dma_mapping_error(&oct->pci_dev->dev, dptr)) { 2411 - dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n", 2412 - __func__); 2413 - dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0], 2414 - skb->len - skb->data_len, 2415 - DMA_TO_DEVICE); 2416 - for (j = 1; j <= frags; j++) { 2417 - frag = &skb_shinfo(skb)->frags[j - 1]; 2418 - dma_unmap_page(&oct->pci_dev->dev, 2419 - g->sg[j >> 2].ptr[j & 3], 2420 - frag->size, DMA_TO_DEVICE); 2421 - } 2422 - return NETDEV_TX_BUSY; 2423 - } 2385 + dptr = g->sg_dma_ptr; 2424 2386 2425 2387 ndata.cmd.cmd3.dptr = dptr; 2426 2388 finfo->dptr = dptr;

+3 -3

drivers/net/ethernet/cavium/liquidio/octeon_config.h

··· 71 71 #define CN23XX_MAX_RINGS_PER_VF 8 72 72 73 73 #define CN23XX_MAX_INPUT_QUEUES CN23XX_MAX_RINGS_PER_PF 74 - #define CN23XX_MAX_IQ_DESCRIPTORS 2048 74 + #define CN23XX_MAX_IQ_DESCRIPTORS 512 75 75 #define CN23XX_DB_MIN 1 76 76 #define CN23XX_DB_MAX 8 77 77 #define CN23XX_DB_TIMEOUT 1 78 78 79 79 #define CN23XX_MAX_OUTPUT_QUEUES CN23XX_MAX_RINGS_PER_PF 80 - #define CN23XX_MAX_OQ_DESCRIPTORS 2048 80 + #define CN23XX_MAX_OQ_DESCRIPTORS 512 81 81 #define CN23XX_OQ_BUF_SIZE 1536 82 82 #define CN23XX_OQ_PKTSPER_INTR 128 83 83 /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/ 84 - #define CN23XX_OQ_REFIL_THRESHOLD 128 84 + #define CN23XX_OQ_REFIL_THRESHOLD 16 85 85 86 86 #define CN23XX_OQ_INTR_PKT 64 87 87 #define CN23XX_OQ_INTR_TIME 100

+2 -15

drivers/net/ethernet/cavium/liquidio/octeon_droq.c

··· 155 155 recv_buffer_destroy(droq->recv_buf_list[i].buffer, 156 156 pg_info); 157 157 158 - if (droq->desc_ring && droq->desc_ring[i].info_ptr) 159 - lio_unmap_ring_info(oct->pci_dev, 160 - (u64)droq-> 161 - desc_ring[i].info_ptr, 162 - OCT_DROQ_INFO_SIZE); 163 158 droq->recv_buf_list[i].buffer = NULL; 164 159 } 165 160 ··· 206 211 vfree(droq->recv_buf_list); 207 212 208 213 if (droq->info_base_addr) 209 - cnnic_free_aligned_dma(oct->pci_dev, droq->info_list, 210 - droq->info_alloc_size, 211 - droq->info_base_addr, 212 - droq->info_list_dma); 214 + lio_free_info_buffer(oct, droq); 213 215 214 216 if (droq->desc_ring) 215 217 lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE), ··· 286 294 dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no, 287 295 droq->max_count); 288 296 289 - droq->info_list = 290 - cnnic_numa_alloc_aligned_dma((droq->max_count * 291 - OCT_DROQ_INFO_SIZE), 292 - &droq->info_alloc_size, 293 - &droq->info_base_addr, 294 - numa_node); 297 + droq->info_list = lio_alloc_info_buffer(oct, droq); 295 298 if (!droq->info_list) { 296 299 dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n"); 297 300 lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),

+2 -2

drivers/net/ethernet/cavium/liquidio/octeon_droq.h

··· 325 325 size_t desc_ring_dma; 326 326 327 327 /** Info ptr list are allocated at this virtual address. */ 328 - size_t info_base_addr; 328 + void *info_base_addr; 329 329 330 330 /** DMA mapped address of the info list */ 331 - size_t info_list_dma; 331 + dma_addr_t info_list_dma; 332 332 333 333 /** Allocated size of info list. */ 334 334 u32 info_alloc_size;

-42

drivers/net/ethernet/cavium/liquidio/octeon_main.h

··· 140 140 return 1; 141 141 } 142 142 143 - static inline void * 144 - cnnic_numa_alloc_aligned_dma(u32 size, 145 - u32 *alloc_size, 146 - size_t *orig_ptr, 147 - int numa_node) 148 - { 149 - int retries = 0; 150 - void *ptr = NULL; 151 - 152 - #define OCTEON_MAX_ALLOC_RETRIES 1 153 - do { 154 - struct page *page = NULL; 155 - 156 - page = alloc_pages_node(numa_node, 157 - GFP_KERNEL, 158 - get_order(size)); 159 - if (!page) 160 - page = alloc_pages(GFP_KERNEL, 161 - get_order(size)); 162 - ptr = (void *)page_address(page); 163 - if ((unsigned long)ptr & 0x07) { 164 - __free_pages(page, get_order(size)); 165 - ptr = NULL; 166 - /* Increment the size required if the first 167 - * attempt failed. 168 - */ 169 - if (!retries) 170 - size += 7; 171 - } 172 - retries++; 173 - } while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr); 174 - 175 - *alloc_size = size; 176 - *orig_ptr = (unsigned long)ptr; 177 - if ((unsigned long)ptr & 0x07) 178 - ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL)); 179 - return ptr; 180 - } 181 - 182 - #define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \ 183 - free_pages(orig_ptr, get_order(size)) 184 - 185 143 static inline int 186 144 sleep_cond(wait_queue_head_t *wait_queue, int *condition) 187 145 {

+27 -16

drivers/net/ethernet/cavium/liquidio/octeon_network.h

··· 62 62 63 63 /** Array of gather component linked lists */ 64 64 struct list_head *glist; 65 + void **glists_virt_base; 66 + dma_addr_t *glists_dma_base; 67 + u32 glist_entry_size; 65 68 66 69 /** Pointer to the NIC properties for the Octeon device this network 67 70 * interface is associated with. ··· 347 344 #define lio_dma_free(oct, size, virt_addr, dma_addr) \ 348 345 dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr) 349 346 347 + static inline void * 348 + lio_alloc_info_buffer(struct octeon_device *oct, 349 + struct octeon_droq *droq) 350 + { 351 + void *virt_ptr; 352 + 353 + virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE), 354 + &droq->info_list_dma); 355 + if (virt_ptr) { 356 + droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE; 357 + droq->info_base_addr = virt_ptr; 358 + } 359 + 360 + return virt_ptr; 361 + } 362 + 363 + static inline void lio_free_info_buffer(struct octeon_device *oct, 364 + struct octeon_droq *droq) 365 + { 366 + lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr, 367 + droq->info_list_dma); 368 + } 369 + 350 370 static inline 351 371 void *get_rbd(struct sk_buff *skb) 352 372 { ··· 385 359 static inline u64 386 360 lio_map_ring_info(struct octeon_droq *droq, u32 i) 387 361 { 388 - dma_addr_t dma_addr; 389 - struct octeon_device *oct = droq->oct_dev; 390 - 391 - dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i], 392 - OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE); 393 - 394 - WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr)); 395 - 396 - return (u64)dma_addr; 397 - } 398 - 399 - static inline void 400 - lio_unmap_ring_info(struct pci_dev *pci_dev, 401 - u64 info_ptr, u32 size) 402 - { 403 - dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE); 362 + return droq->info_list_dma + (i * sizeof(struct octeon_droq_info)); 404 363 } 405 364 406 365 static inline u64

+1

drivers/net/ethernet/cavium/thunder/nic.h

··· 269 269 #define MAX_QUEUES_PER_QSET 8 270 270 struct queue_set *qs; 271 271 struct nicvf_cq_poll *napi[8]; 272 + void *iommu_domain; 272 273 u8 vf_id; 273 274 u8 sqs_id; 274 275 bool sqs_mode;

+11 -1

drivers/net/ethernet/cavium/thunder/nicvf_main.c

··· 16 16 #include <linux/log2.h> 17 17 #include <linux/prefetch.h> 18 18 #include <linux/irq.h> 19 + #include <linux/iommu.h> 19 20 20 21 #include "nic_reg.h" 21 22 #include "nic.h" ··· 526 525 /* Get actual TSO descriptors and free them */ 527 526 tso_sqe = 528 527 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 528 + nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 529 + tso_sqe->subdesc_cnt); 529 530 nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1); 531 + } else { 532 + nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 533 + hdr->subdesc_cnt); 530 534 } 531 535 nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); 532 536 prefetch(skb); ··· 582 576 { 583 577 struct sk_buff *skb; 584 578 struct nicvf *nic = netdev_priv(netdev); 579 + struct nicvf *snic = nic; 585 580 int err = 0; 586 581 int rq_idx; 587 582 ··· 599 592 if (err && !cqe_rx->rb_cnt) 600 593 return; 601 594 602 - skb = nicvf_get_rcv_skb(nic, cqe_rx); 595 + skb = nicvf_get_rcv_skb(snic, cqe_rx); 603 596 if (!skb) { 604 597 netdev_dbg(nic->netdev, "Packet not received\n"); 605 598 return; ··· 1649 1642 1650 1643 if (!pass1_silicon(nic->pdev)) 1651 1644 nic->hw_tso = true; 1645 + 1646 + /* Get iommu domain for iova to physical addr conversion */ 1647 + nic->iommu_domain = iommu_get_domain_for_dev(dev); 1652 1648 1653 1649 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 1654 1650 if (sdevid == 0xA134)

+145 -39

drivers/net/ethernet/cavium/thunder/nicvf_queues.c

··· 10 10 #include <linux/netdevice.h> 11 11 #include <linux/ip.h> 12 12 #include <linux/etherdevice.h> 13 + #include <linux/iommu.h> 13 14 #include <net/ip.h> 14 15 #include <net/tso.h> 15 16 ··· 18 17 #include "nic.h" 19 18 #include "q_struct.h" 20 19 #include "nicvf_queues.h" 20 + 21 + #define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0) 22 + 23 + static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) 24 + { 25 + /* Translation is installed only when IOMMU is present */ 26 + if (nic->iommu_domain) 27 + return iommu_iova_to_phys(nic->iommu_domain, dma_addr); 28 + return dma_addr; 29 + } 21 30 22 31 static void nicvf_get_page(struct nicvf *nic) 23 32 { ··· 98 87 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, 99 88 u32 buf_len, u64 **rbuf) 100 89 { 101 - int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0; 90 + int order = NICVF_PAGE_ORDER; 102 91 103 92 /* Check if request can be accomodated in previous allocated page */ 104 93 if (nic->rb_page && ··· 108 97 } 109 98 110 99 nicvf_get_page(nic); 111 - nic->rb_page = NULL; 112 100 113 101 /* Allocate a new page */ 102 + nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 103 + order); 114 104 if (!nic->rb_page) { 115 - nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 116 - order); 117 - if (!nic->rb_page) { 118 - this_cpu_inc(nic->pnicvf->drv_stats-> 119 - rcv_buffer_alloc_failures); 120 - return -ENOMEM; 121 - } 122 - nic->rb_page_offset = 0; 105 + this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures); 106 + return -ENOMEM; 123 107 } 124 - 108 + nic->rb_page_offset = 0; 125 109 ret: 126 - *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset); 110 + /* HW will ensure data coherency, CPU sync not required */ 111 + *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, 112 + nic->rb_page_offset, buf_len, 113 + DMA_FROM_DEVICE, 114 + DMA_ATTR_SKIP_CPU_SYNC)); 115 + if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { 116 + if (!nic->rb_page_offset) 117 + __free_pages(nic->rb_page, order); 118 + nic->rb_page = NULL; 119 + return -ENOMEM; 120 + } 127 121 nic->rb_page_offset += buf_len; 128 122 129 123 return 0; ··· 174 158 rbdr->dma_size = buf_size; 175 159 rbdr->enable = true; 176 160 rbdr->thresh = RBDR_THRESH; 161 + rbdr->head = 0; 162 + rbdr->tail = 0; 177 163 178 164 nic->rb_page = NULL; 179 165 for (idx = 0; idx < ring_len; idx++) { 180 166 err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, 181 167 &rbuf); 182 - if (err) 168 + if (err) { 169 + /* To free already allocated and mapped ones */ 170 + rbdr->tail = idx - 1; 183 171 return err; 172 + } 184 173 185 174 desc = GET_RBDR_DESC(rbdr, idx); 186 - desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; 175 + desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; 187 176 } 188 177 189 178 nicvf_get_page(nic); ··· 200 179 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) 201 180 { 202 181 int head, tail; 203 - u64 buf_addr; 182 + u64 buf_addr, phys_addr; 204 183 struct rbdr_entry_t *desc; 205 184 206 185 if (!rbdr) ··· 213 192 head = rbdr->head; 214 193 tail = rbdr->tail; 215 194 216 - /* Free SKBs */ 195 + /* Release page references */ 217 196 while (head != tail) { 218 197 desc = GET_RBDR_DESC(rbdr, head); 219 - buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; 220 - put_page(virt_to_page(phys_to_virt(buf_addr))); 198 + buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; 199 + phys_addr = nicvf_iova_to_phys(nic, buf_addr); 200 + dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, 201 + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 202 + if (phys_addr) 203 + put_page(virt_to_page(phys_to_virt(phys_addr))); 221 204 head++; 222 205 head &= (rbdr->dmem.q_len - 1); 223 206 } 224 - /* Free SKB of tail desc */ 207 + /* Release buffer of tail desc */ 225 208 desc = GET_RBDR_DESC(rbdr, tail); 226 - buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; 227 - put_page(virt_to_page(phys_to_virt(buf_addr))); 209 + buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; 210 + phys_addr = nicvf_iova_to_phys(nic, buf_addr); 211 + dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, 212 + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 213 + if (phys_addr) 214 + put_page(virt_to_page(phys_to_virt(phys_addr))); 228 215 229 216 /* Free RBDR ring */ 230 217 nicvf_free_q_desc_mem(nic, &rbdr->dmem); ··· 279 250 break; 280 251 281 252 desc = GET_RBDR_DESC(rbdr, tail); 282 - desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; 253 + desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; 283 254 refill_rb_cnt--; 284 255 new_rb++; 285 256 } ··· 390 361 return 0; 391 362 } 392 363 364 + void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, 365 + int hdr_sqe, u8 subdesc_cnt) 366 + { 367 + u8 idx; 368 + struct sq_gather_subdesc *gather; 369 + 370 + /* Unmap DMA mapped skb data buffers */ 371 + for (idx = 0; idx < subdesc_cnt; idx++) { 372 + hdr_sqe++; 373 + hdr_sqe &= (sq->dmem.q_len - 1); 374 + gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe); 375 + /* HW will ensure data coherency, CPU sync not required */ 376 + dma_unmap_page_attrs(&nic->pdev->dev, gather->addr, 377 + gather->size, DMA_TO_DEVICE, 378 + DMA_ATTR_SKIP_CPU_SYNC); 379 + } 380 + } 381 + 393 382 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) 394 383 { 395 384 struct sk_buff *skb; 385 + struct sq_hdr_subdesc *hdr; 386 + struct sq_hdr_subdesc *tso_sqe; 396 387 397 388 if (!sq) 398 389 return; ··· 428 379 smp_rmb(); 429 380 while (sq->head != sq->tail) { 430 381 skb = (struct sk_buff *)sq->skbuff[sq->head]; 431 - if (skb) 432 - dev_kfree_skb_any(skb); 382 + if (!skb) 383 + goto next; 384 + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); 385 + /* Check for dummy descriptor used for HW TSO offload on 88xx */ 386 + if (hdr->dont_send) { 387 + /* Get actual TSO descriptors and unmap them */ 388 + tso_sqe = 389 + (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 390 + nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 391 + tso_sqe->subdesc_cnt); 392 + } else { 393 + nicvf_unmap_sndq_buffers(nic, sq, sq->head, 394 + hdr->subdesc_cnt); 395 + } 396 + dev_kfree_skb_any(skb); 397 + next: 433 398 sq->head++; 434 399 sq->head &= (sq->dmem.q_len - 1); 435 400 } ··· 622 559 nicvf_send_msg_to_pf(nic, &mbx); 623 560 624 561 if (!nic->sqs_mode && (qidx == 0)) { 625 - /* Enable checking L3/L4 length and TCP/UDP checksums */ 562 + /* Enable checking L3/L4 length and TCP/UDP checksums 563 + * Also allow IPv6 pkts with zero UDP checksum. 564 + */ 626 565 nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 627 - (BIT(24) | BIT(23) | BIT(21))); 566 + (BIT(24) | BIT(23) | BIT(21) | BIT(20))); 628 567 nicvf_config_vlan_stripping(nic, nic->netdev->features); 629 568 } 630 569 ··· 945 880 sq->tail &= (sq->dmem.q_len - 1); 946 881 947 882 return qentry; 883 + } 884 + 885 + /* Rollback to previous tail pointer when descriptors not used */ 886 + static inline void nicvf_rollback_sq_desc(struct snd_queue *sq, 887 + int qentry, int desc_cnt) 888 + { 889 + sq->tail = qentry; 890 + atomic_add(desc_cnt, &sq->free_cnt); 948 891 } 949 892 950 893 /* Free descriptor back to SQ for future use */ ··· 1280 1207 struct sk_buff *skb, u8 sq_num) 1281 1208 { 1282 1209 int i, size; 1283 - int subdesc_cnt, tso_sqe = 0; 1210 + int subdesc_cnt, hdr_sqe = 0; 1284 1211 int qentry; 1212 + u64 dma_addr; 1285 1213 1286 1214 subdesc_cnt = nicvf_sq_subdesc_required(nic, skb); 1287 1215 if (subdesc_cnt > atomic_read(&sq->free_cnt)) ··· 1297 1223 /* Add SQ header subdesc */ 1298 1224 nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1, 1299 1225 skb, skb->len); 1300 - tso_sqe = qentry; 1226 + hdr_sqe = qentry; 1301 1227 1302 1228 /* Add SQ gather subdescs */ 1303 1229 qentry = nicvf_get_nxt_sqentry(sq, qentry); 1304 1230 size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; 1305 - nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data)); 1231 + /* HW will ensure data coherency, CPU sync not required */ 1232 + dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data), 1233 + offset_in_page(skb->data), size, 1234 + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 1235 + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { 1236 + nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt); 1237 + return 0; 1238 + } 1239 + 1240 + nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr); 1306 1241 1307 1242 /* Check for scattered buffer */ 1308 1243 if (!skb_is_nonlinear(skb)) ··· 1324 1241 1325 1242 qentry = nicvf_get_nxt_sqentry(sq, qentry); 1326 1243 size = skb_frag_size(frag); 1327 - nicvf_sq_add_gather_subdesc(sq, qentry, size, 1328 - virt_to_phys( 1329 - skb_frag_address(frag))); 1244 + dma_addr = dma_map_page_attrs(&nic->pdev->dev, 1245 + skb_frag_page(frag), 1246 + frag->page_offset, size, 1247 + DMA_TO_DEVICE, 1248 + DMA_ATTR_SKIP_CPU_SYNC); 1249 + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { 1250 + /* Free entire chain of mapped buffers 1251 + * here 'i' = frags mapped + above mapped skb->data 1252 + */ 1253 + nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i); 1254 + nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt); 1255 + return 0; 1256 + } 1257 + nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr); 1330 1258 } 1331 1259 1332 1260 doorbell: 1333 1261 if (nic->t88 && skb_shinfo(skb)->gso_size) { 1334 1262 qentry = nicvf_get_nxt_sqentry(sq, qentry); 1335 - nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); 1263 + nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb); 1336 1264 } 1337 1265 1338 1266 nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); ··· 1376 1282 int offset; 1377 1283 u16 *rb_lens = NULL; 1378 1284 u64 *rb_ptrs = NULL; 1285 + u64 phys_addr; 1379 1286 1380 1287 rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); 1381 1288 /* Except 88xx pass1 on all other chips CQE_RX2_S is added to ··· 1391 1296 else 1392 1297 rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64)); 1393 1298 1394 - netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n", 1395 - __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz); 1396 - 1397 1299 for (frag = 0; frag < cqe_rx->rb_cnt; frag++) { 1398 1300 payload_len = rb_lens[frag_num(frag)]; 1301 + phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs); 1302 + if (!phys_addr) { 1303 + if (skb) 1304 + dev_kfree_skb_any(skb); 1305 + return NULL; 1306 + } 1307 + 1399 1308 if (!frag) { 1400 1309 /* First fragment */ 1310 + dma_unmap_page_attrs(&nic->pdev->dev, 1311 + *rb_ptrs - cqe_rx->align_pad, 1312 + RCV_FRAG_LEN, DMA_FROM_DEVICE, 1313 + DMA_ATTR_SKIP_CPU_SYNC); 1401 1314 skb = nicvf_rb_ptr_to_skb(nic, 1402 - *rb_ptrs - cqe_rx->align_pad, 1315 + phys_addr - cqe_rx->align_pad, 1403 1316 payload_len); 1404 1317 if (!skb) 1405 1318 return NULL; ··· 1415 1312 skb_put(skb, payload_len); 1416 1313 } else { 1417 1314 /* Add fragments */ 1418 - page = virt_to_page(phys_to_virt(*rb_ptrs)); 1419 - offset = phys_to_virt(*rb_ptrs) - page_address(page); 1315 + dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs, 1316 + RCV_FRAG_LEN, DMA_FROM_DEVICE, 1317 + DMA_ATTR_SKIP_CPU_SYNC); 1318 + page = virt_to_page(phys_to_virt(phys_addr)); 1319 + offset = phys_to_virt(phys_addr) - page_address(page); 1420 1320 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 1421 1321 offset, payload_len, RCV_FRAG_LEN); 1422 1322 }

+3 -1

drivers/net/ethernet/cavium/thunder/nicvf_queues.h

··· 87 87 #define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13)) 88 88 #define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13)) 89 89 #define RBDR_THRESH (RCV_BUF_COUNT / 2) 90 - #define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */ 90 + #define DMA_BUFFER_LEN 1536 /* In multiples of 128bytes */ 91 91 #define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ 92 92 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 93 93 ··· 301 301 302 302 #define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) 303 303 304 + void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, 305 + int hdr_sqe, u8 subdesc_cnt); 304 306 void nicvf_config_vlan_stripping(struct nicvf *nic, 305 307 netdev_features_t features); 306 308 int nicvf_set_qset_resources(struct nicvf *nic);

+45 -19

drivers/net/ethernet/cavium/thunder/thunder_bgx.c

··· 123 123 return 1; 124 124 } 125 125 126 + static int max_bgx_per_node; 127 + static void set_max_bgx_per_node(struct pci_dev *pdev) 128 + { 129 + u16 sdevid; 130 + 131 + if (max_bgx_per_node) 132 + return; 133 + 134 + pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); 135 + switch (sdevid) { 136 + case PCI_SUBSYS_DEVID_81XX_BGX: 137 + max_bgx_per_node = MAX_BGX_PER_CN81XX; 138 + break; 139 + case PCI_SUBSYS_DEVID_83XX_BGX: 140 + max_bgx_per_node = MAX_BGX_PER_CN83XX; 141 + break; 142 + case PCI_SUBSYS_DEVID_88XX_BGX: 143 + default: 144 + max_bgx_per_node = MAX_BGX_PER_CN88XX; 145 + break; 146 + } 147 + } 148 + 149 + static struct bgx *get_bgx(int node, int bgx_idx) 150 + { 151 + int idx = (node * max_bgx_per_node) + bgx_idx; 152 + 153 + return bgx_vnic[idx]; 154 + } 155 + 126 156 /* Return number of BGX present in HW */ 127 157 unsigned bgx_get_map(int node) 128 158 { 129 159 int i; 130 160 unsigned map = 0; 131 161 132 - for (i = 0; i < MAX_BGX_PER_NODE; i++) { 133 - if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i]) 162 + for (i = 0; i < max_bgx_per_node; i++) { 163 + if (bgx_vnic[(node * max_bgx_per_node) + i]) 134 164 map |= (1 << i); 135 165 } 136 166 ··· 173 143 { 174 144 struct bgx *bgx; 175 145 176 - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 146 + bgx = get_bgx(node, bgx_idx); 177 147 if (bgx) 178 148 return bgx->lmac_count; 179 149 ··· 188 158 struct bgx *bgx; 189 159 struct lmac *lmac; 190 160 191 - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 161 + bgx = get_bgx(node, bgx_idx); 192 162 if (!bgx) 193 163 return; 194 164 ··· 202 172 203 173 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid) 204 174 { 205 - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 175 + struct bgx *bgx = get_bgx(node, bgx_idx); 206 176 207 177 if (bgx) 208 178 return bgx->lmac[lmacid].mac; ··· 213 183 214 184 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) 215 185 { 216 - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 186 + struct bgx *bgx = get_bgx(node, bgx_idx); 217 187 218 188 if (!bgx) 219 189 return; ··· 224 194 225 195 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) 226 196 { 227 - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 197 + struct bgx *bgx = get_bgx(node, bgx_idx); 228 198 struct lmac *lmac; 229 199 u64 cfg; 230 200 ··· 247 217 void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause) 248 218 { 249 219 struct pfc *pfc = (struct pfc *)pause; 250 - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; 220 + struct bgx *bgx = get_bgx(node, bgx_idx); 251 221 struct lmac *lmac; 252 222 u64 cfg; 253 223 ··· 267 237 void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause) 268 238 { 269 239 struct pfc *pfc = (struct pfc *)pause; 270 - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; 240 + struct bgx *bgx = get_bgx(node, bgx_idx); 271 241 struct lmac *lmac; 272 242 u64 cfg; 273 243 ··· 399 369 { 400 370 struct bgx *bgx; 401 371 402 - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 372 + bgx = get_bgx(node, bgx_idx); 403 373 if (!bgx) 404 374 return 0; 405 375 ··· 413 383 { 414 384 struct bgx *bgx; 415 385 416 - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 386 + bgx = get_bgx(node, bgx_idx); 417 387 if (!bgx) 418 388 return 0; 419 389 ··· 441 411 struct lmac *lmac; 442 412 u64 cfg; 443 413 444 - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; 414 + bgx = get_bgx(node, bgx_idx); 445 415 if (!bgx) 446 416 return; 447 417 ··· 1041 1011 dev_info(dev, "%s: 40G_KR4\n", (char *)str); 1042 1012 break; 1043 1013 case BGX_MODE_QSGMII: 1044 - if ((lmacid == 0) && 1045 - (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid)) 1046 - return; 1047 - if ((lmacid == 2) && 1048 - (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid)) 1049 - return; 1050 1014 dev_info(dev, "%s: QSGMII\n", (char *)str); 1051 1015 break; 1052 1016 case BGX_MODE_RGMII: ··· 1358 1334 goto err_release_regions; 1359 1335 } 1360 1336 1337 + set_max_bgx_per_node(pdev); 1338 + 1361 1339 pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid); 1362 1340 if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { 1363 1341 bgx->bgx_id = (pci_resource_start(pdev, 1364 1342 PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK; 1365 - bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE; 1343 + bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node; 1366 1344 bgx->max_lmac = MAX_LMAC_PER_BGX; 1367 1345 bgx_vnic[bgx->bgx_id] = bgx; 1368 1346 } else {

-1

drivers/net/ethernet/cavium/thunder/thunder_bgx.h

··· 22 22 #define MAX_BGX_PER_CN88XX 2 23 23 #define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */ 24 24 #define MAX_BGX_PER_CN83XX 4 25 - #define MAX_BGX_PER_NODE 4 26 25 #define MAX_LMAC_PER_BGX 4 27 26 #define MAX_BGX_CHANS_PER_LMAC 16 28 27 #define MAX_DMAC_PER_LMAC 8

+17 -8

drivers/net/ethernet/ibm/emac/core.c

··· 2589 2589 static int emac_dt_phy_connect(struct emac_instance *dev, 2590 2590 struct device_node *phy_handle) 2591 2591 { 2592 - int res; 2593 - 2594 2592 dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def), 2595 2593 GFP_KERNEL); 2596 2594 if (!dev->phy.def) ··· 2615 2617 { 2616 2618 struct device_node *np = dev->ofdev->dev.of_node; 2617 2619 struct device_node *phy_handle; 2618 - int res = 0; 2620 + int res = 1; 2619 2621 2620 2622 phy_handle = of_parse_phandle(np, "phy-handle", 0); 2621 2623 ··· 2712 2714 if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { 2713 2715 int res = emac_dt_phy_probe(dev); 2714 2716 2715 - mutex_unlock(&emac_phy_map_lock); 2716 - if (!res) 2717 + switch (res) { 2718 + case 1: 2719 + /* No phy-handle property configured. 2720 + * Continue with the existing phy probe 2721 + * and setup code. 2722 + */ 2723 + break; 2724 + 2725 + case 0: 2726 + mutex_unlock(&emac_phy_map_lock); 2717 2727 goto init_phy; 2718 2728 2719 - dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", 2720 - res); 2721 - return res; 2729 + default: 2730 + mutex_unlock(&emac_phy_map_lock); 2731 + dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", 2732 + res); 2733 + return res; 2734 + } 2722 2735 } 2723 2736 2724 2737 if (dev->phy_address != 0xffffffff)

+34 -9

drivers/net/ethernet/ibm/ibmvnic.c

··· 404 404 send_map_query(adapter); 405 405 for (i = 0; i < rxadd_subcrqs; i++) { 406 406 init_rx_pool(adapter, &adapter->rx_pool[i], 407 - IBMVNIC_BUFFS_PER_POOL, i, 407 + adapter->req_rx_add_entries_per_subcrq, i, 408 408 be64_to_cpu(size_array[i]), 1); 409 409 if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) { 410 410 dev_err(dev, "Couldn't alloc rx pool\n"); ··· 419 419 for (i = 0; i < tx_subcrqs; i++) { 420 420 tx_pool = &adapter->tx_pool[i]; 421 421 tx_pool->tx_buff = 422 - kcalloc(adapter->max_tx_entries_per_subcrq, 422 + kcalloc(adapter->req_tx_entries_per_subcrq, 423 423 sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); 424 424 if (!tx_pool->tx_buff) 425 425 goto tx_pool_alloc_failed; 426 426 427 427 if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, 428 - adapter->max_tx_entries_per_subcrq * 428 + adapter->req_tx_entries_per_subcrq * 429 429 adapter->req_mtu)) 430 430 goto tx_ltb_alloc_failed; 431 431 432 432 tx_pool->free_map = 433 - kcalloc(adapter->max_tx_entries_per_subcrq, 433 + kcalloc(adapter->req_tx_entries_per_subcrq, 434 434 sizeof(int), GFP_KERNEL); 435 435 if (!tx_pool->free_map) 436 436 goto tx_fm_alloc_failed; 437 437 438 - for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++) 438 + for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++) 439 439 tx_pool->free_map[j] = j; 440 440 441 441 tx_pool->consumer_index = 0; ··· 705 705 u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; 706 706 struct device *dev = &adapter->vdev->dev; 707 707 struct ibmvnic_tx_buff *tx_buff = NULL; 708 + struct ibmvnic_sub_crq_queue *tx_scrq; 708 709 struct ibmvnic_tx_pool *tx_pool; 709 710 unsigned int tx_send_failed = 0; 710 711 unsigned int tx_map_failed = 0; ··· 725 724 int ret = 0; 726 725 727 726 tx_pool = &adapter->tx_pool[queue_num]; 727 + tx_scrq = adapter->tx_scrq[queue_num]; 728 728 txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); 729 729 handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + 730 730 be32_to_cpu(adapter->login_rsp_buf-> ··· 746 744 747 745 tx_pool->consumer_index = 748 746 (tx_pool->consumer_index + 1) % 749 - adapter->max_tx_entries_per_subcrq; 747 + adapter->req_tx_entries_per_subcrq; 750 748 751 749 tx_buff = &tx_pool->tx_buff[index]; 752 750 tx_buff->skb = skb; ··· 819 817 820 818 if (tx_pool->consumer_index == 0) 821 819 tx_pool->consumer_index = 822 - adapter->max_tx_entries_per_subcrq - 1; 820 + adapter->req_tx_entries_per_subcrq - 1; 823 821 else 824 822 tx_pool->consumer_index--; 825 823 ··· 828 826 ret = NETDEV_TX_BUSY; 829 827 goto out; 830 828 } 829 + 830 + atomic_inc(&tx_scrq->used); 831 + 832 + if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { 833 + netdev_info(netdev, "Stopping queue %d\n", queue_num); 834 + netif_stop_subqueue(netdev, queue_num); 835 + } 836 + 831 837 tx_packets++; 832 838 tx_bytes += skb->len; 833 839 txq->trans_start = jiffies; ··· 1223 1213 scrq->adapter = adapter; 1224 1214 scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); 1225 1215 scrq->cur = 0; 1216 + atomic_set(&scrq->used, 0); 1226 1217 scrq->rx_skb_top = NULL; 1227 1218 spin_lock_init(&scrq->lock); 1228 1219 ··· 1366 1355 DMA_TO_DEVICE); 1367 1356 } 1368 1357 1369 - if (txbuff->last_frag) 1358 + if (txbuff->last_frag) { 1359 + atomic_dec(&scrq->used); 1360 + 1361 + if (atomic_read(&scrq->used) <= 1362 + (adapter->req_tx_entries_per_subcrq / 2) && 1363 + netif_subqueue_stopped(adapter->netdev, 1364 + txbuff->skb)) { 1365 + netif_wake_subqueue(adapter->netdev, 1366 + scrq->pool_index); 1367 + netdev_dbg(adapter->netdev, 1368 + "Started queue %d\n", 1369 + scrq->pool_index); 1370 + } 1371 + 1370 1372 dev_kfree_skb_any(txbuff->skb); 1373 + } 1371 1374 1372 1375 adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. 1373 1376 producer_index] = index; 1374 1377 adapter->tx_pool[pool].producer_index = 1375 1378 (adapter->tx_pool[pool].producer_index + 1) % 1376 - adapter->max_tx_entries_per_subcrq; 1379 + adapter->req_tx_entries_per_subcrq; 1377 1380 } 1378 1381 /* remove tx_comp scrq*/ 1379 1382 next->tx_comp.first = 0;

+1

drivers/net/ethernet/ibm/ibmvnic.h

··· 863 863 spinlock_t lock; 864 864 struct sk_buff *rx_skb_top; 865 865 struct ibmvnic_adapter *adapter; 866 + atomic_t used; 866 867 }; 867 868 868 869 struct ibmvnic_long_term_buff {

+1

drivers/net/ethernet/mellanox/mlx5/core/Kconfig

··· 14 14 config MLX5_CORE_EN 15 15 bool "Mellanox Technologies ConnectX-4 Ethernet support" 16 16 depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE 17 + depends on IPV6=y || IPV6=n || MLX5_CORE=m 17 18 imply PTP_1588_CLOCK 18 19 default n 19 20 ---help---

+5 -5

drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c

··· 302 302 struct mlx5e_priv *priv = netdev_priv(dev); 303 303 struct mlx5e_dcbx *dcbx = &priv->dcbx; 304 304 305 + if (mode & DCB_CAP_DCBX_LLD_MANAGED) 306 + return 1; 307 + 305 308 if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) { 306 309 if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO) 307 310 return 0; ··· 318 315 return 1; 319 316 } 320 317 321 - if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) 318 + if (!(mode & DCB_CAP_DCBX_HOST)) 322 319 return 1; 323 320 324 - if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || 325 - !(mode & DCB_CAP_DCBX_VER_CEE) || 326 - !(mode & DCB_CAP_DCBX_VER_IEEE) || 327 - !(mode & DCB_CAP_DCBX_HOST)) 321 + if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) 328 322 return 1; 329 323 330 324 return 0;

+1 -4

drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c

··· 204 204 struct iphdr *iph; 205 205 206 206 /* We are only going to peek, no need to clone the SKB */ 207 - if (skb->protocol != htons(ETH_P_IP)) 208 - goto out; 209 - 210 207 if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) 211 208 goto out; 212 209 ··· 246 249 lbtp->loopback_ok = false; 247 250 init_completion(&lbtp->comp); 248 251 249 - lbtp->pt.type = htons(ETH_P_ALL); 252 + lbtp->pt.type = htons(ETH_P_IP); 250 253 lbtp->pt.func = mlx5e_test_loopback_validate; 251 254 lbtp->pt.dev = priv->netdev; 252 255 lbtp->pt.af_packet_priv = lbtp;

+18 -15

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

··· 48 48 #include "eswitch.h" 49 49 #include "vxlan.h" 50 50 51 + enum { 52 + MLX5E_TC_FLOW_ESWITCH = BIT(0), 53 + }; 54 + 51 55 struct mlx5e_tc_flow { 52 56 struct rhash_head node; 53 57 u64 cookie; 58 + u8 flags; 54 59 struct mlx5_flow_handle *rule; 55 60 struct list_head encap; /* flows sharing the same encap */ 56 61 struct mlx5_esw_flow_attr *attr; ··· 182 177 mlx5_fc_destroy(priv->mdev, counter); 183 178 } 184 179 185 - if (esw && esw->mode == SRIOV_OFFLOADS) { 180 + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { 186 181 mlx5_eswitch_del_vlan_action(esw, flow->attr); 187 182 if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) 188 183 mlx5e_detach_encap(priv, flow); ··· 603 598 } 604 599 605 600 static int parse_cls_flower(struct mlx5e_priv *priv, 601 + struct mlx5e_tc_flow *flow, 606 602 struct mlx5_flow_spec *spec, 607 603 struct tc_cls_flower_offload *f) 608 604 { ··· 615 609 616 610 err = __parse_cls_flower(priv, spec, f, &min_inline); 617 611 618 - if (!err && esw->mode == SRIOV_OFFLOADS && 612 + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && 619 613 rep->vport != FDB_UPLINK_VPORT) { 620 614 if (min_inline > esw->offloads.inline_mode) { 621 615 netdev_warn(priv->netdev, ··· 1138 1132 struct tc_cls_flower_offload *f) 1139 1133 { 1140 1134 struct mlx5e_tc_table *tc = &priv->fs.tc; 1141 - int err = 0; 1142 - bool fdb_flow = false; 1135 + int err, attr_size = 0; 1143 1136 u32 flow_tag, action; 1144 1137 struct mlx5e_tc_flow *flow; 1145 1138 struct mlx5_flow_spec *spec; 1146 1139 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1140 + u8 flow_flags = 0; 1147 1141 1148 - if (esw && esw->mode == SRIOV_OFFLOADS) 1149 - fdb_flow = true; 1142 + if (esw && esw->mode == SRIOV_OFFLOADS) { 1143 + flow_flags = MLX5E_TC_FLOW_ESWITCH; 1144 + attr_size = sizeof(struct mlx5_esw_flow_attr); 1145 + } 1150 1146 1151 - if (fdb_flow) 1152 - flow = kzalloc(sizeof(*flow) + 1153 - sizeof(struct mlx5_esw_flow_attr), 1154 - GFP_KERNEL); 1155 - else 1156 - flow = kzalloc(sizeof(*flow), GFP_KERNEL); 1157 - 1147 + flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); 1158 1148 spec = mlx5_vzalloc(sizeof(*spec)); 1159 1149 if (!spec || !flow) { 1160 1150 err = -ENOMEM; ··· 1158 1156 } 1159 1157 1160 1158 flow->cookie = f->cookie; 1159 + flow->flags = flow_flags; 1161 1160 1162 - err = parse_cls_flower(priv, spec, f); 1161 + err = parse_cls_flower(priv, flow, spec, f); 1163 1162 if (err < 0) 1164 1163 goto err_free; 1165 1164 1166 - if (fdb_flow) { 1165 + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { 1167 1166 flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); 1168 1167 err = parse_tc_fdb_actions(priv, f->exts, flow); 1169 1168 if (err < 0)

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

··· 1136 1136 u32 *match_criteria) 1137 1137 { 1138 1138 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1139 - struct list_head *prev = ft->node.children.prev; 1139 + struct list_head *prev = &ft->node.children; 1140 1140 unsigned int candidate_index = 0; 1141 1141 struct mlx5_flow_group *fg; 1142 1142 void *match_criteria_addr;

+3 -2

drivers/net/ethernet/mellanox/mlx5/core/main.c

··· 1352 1352 if (err) 1353 1353 goto clean_load; 1354 1354 1355 + pci_save_state(pdev); 1355 1356 return 0; 1356 1357 1357 1358 clean_load: ··· 1408 1407 1409 1408 mlx5_enter_error_state(dev); 1410 1409 mlx5_unload_one(dev, priv, false); 1411 - /* In case of kernel call save the pci state and drain the health wq */ 1410 + /* In case of kernel call drain the health wq */ 1412 1411 if (state) { 1413 - pci_save_state(pdev); 1414 1412 mlx5_drain_health_wq(dev); 1415 1413 mlx5_pci_disable_device(dev); 1416 1414 } ··· 1461 1461 1462 1462 pci_set_master(pdev); 1463 1463 pci_restore_state(pdev); 1464 + pci_save_state(pdev); 1464 1465 1465 1466 if (wait_vital(pdev)) { 1466 1467 dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);

+2 -2

drivers/net/ethernet/mellanox/mlxsw/reg.h

··· 769 769 #define MLXSW_REG_SPVM_ID 0x200F 770 770 #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */ 771 771 #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */ 772 - #define MLXSW_REG_SPVM_REC_MAX_COUNT 256 772 + #define MLXSW_REG_SPVM_REC_MAX_COUNT 255 773 773 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ 774 774 MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) 775 775 ··· 1702 1702 #define MLXSW_REG_SPVMLR_ID 0x2020 1703 1703 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */ 1704 1704 #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */ 1705 - #define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256 1705 + #define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255 1706 1706 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \ 1707 1707 MLXSW_REG_SPVMLR_REC_LEN * \ 1708 1708 MLXSW_REG_SPVMLR_REC_MAX_COUNT)

+2 -2

drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c

··· 303 303 ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, 304 304 ingress, 305 305 MLXSW_SP_ACL_PROFILE_FLOWER); 306 - if (WARN_ON(IS_ERR(ruleset))) 306 + if (IS_ERR(ruleset)) 307 307 return; 308 308 309 309 rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); 310 - if (!WARN_ON(!rule)) { 310 + if (rule) { 311 311 mlxsw_sp_acl_rule_del(mlxsw_sp, rule); 312 312 mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); 313 313 }

+2 -1

drivers/net/ethernet/qlogic/qed/qed_cxt.c

··· 422 422 u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val; 423 423 u32 cxt_size = CONN_CXT_SIZE(p_hwfn); 424 424 u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; 425 + u32 align = elems_per_page * DQ_RANGE_ALIGN; 425 426 426 - p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page); 427 + p_conn->cid_count = roundup(p_conn->cid_count, align); 427 428 } 428 429 } 429 430

+2 -3

drivers/net/ethernet/qlogic/qed/qed_dev.c

··· 2389 2389 * size/capacity fields are of a u32 type. 2390 2390 */ 2391 2391 if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 && 2392 - chain_size > 0x10000) || 2393 - (cnt_type == QED_CHAIN_CNT_TYPE_U32 && 2394 - chain_size > 0x100000000ULL)) { 2392 + chain_size > ((u32)U16_MAX + 1)) || 2393 + (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) { 2395 2394 DP_NOTICE(cdev, 2396 2395 "The actual chain size (0x%llx) is larger than the maximal possible value\n", 2397 2396 chain_size);

+31

drivers/net/ethernet/qlogic/qed/qed_iscsi.c

··· 190 190 p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring; 191 191 p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring; 192 192 p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring; 193 + p_init->ooo_enable = p_params->ooo_enable; 194 + p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + 195 + p_params->ll2_ooo_queue_id; 193 196 p_init->func_params.log_page_size = p_params->log_page_size; 194 197 val = p_params->num_tasks; 195 198 p_init->func_params.num_tasks = cpu_to_le16(val); ··· 789 786 spin_unlock_bh(&p_hwfn->p_iscsi_info->lock); 790 787 } 791 788 789 + void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn, 790 + struct qed_iscsi_conn *p_conn) 791 + { 792 + qed_chain_free(p_hwfn->cdev, &p_conn->xhq); 793 + qed_chain_free(p_hwfn->cdev, &p_conn->uhq); 794 + qed_chain_free(p_hwfn->cdev, &p_conn->r2tq); 795 + dma_free_coherent(&p_hwfn->cdev->pdev->dev, 796 + sizeof(struct tcp_upload_params), 797 + p_conn->tcp_upload_params_virt_addr, 798 + p_conn->tcp_upload_params_phys_addr); 799 + dma_free_coherent(&p_hwfn->cdev->pdev->dev, 800 + sizeof(struct scsi_terminate_extra_params), 801 + p_conn->queue_cnts_virt_addr, 802 + p_conn->queue_cnts_phys_addr); 803 + kfree(p_conn); 804 + } 805 + 792 806 struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn) 793 807 { 794 808 struct qed_iscsi_info *p_iscsi_info; ··· 827 807 void qed_iscsi_free(struct qed_hwfn *p_hwfn, 828 808 struct qed_iscsi_info *p_iscsi_info) 829 809 { 810 + struct qed_iscsi_conn *p_conn = NULL; 811 + 812 + while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) { 813 + p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list, 814 + struct qed_iscsi_conn, list_entry); 815 + if (p_conn) { 816 + list_del(&p_conn->list_entry); 817 + qed_iscsi_free_connection(p_hwfn, p_conn); 818 + } 819 + } 820 + 830 821 kfree(p_iscsi_info); 831 822 } 832 823

+8 -5

drivers/net/ethernet/qlogic/qed/qed_ll2.c

··· 211 211 /* If need to reuse or there's no replacement buffer, repost this */ 212 212 if (rc) 213 213 goto out_post; 214 + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, 215 + cdev->ll2->rx_size, DMA_FROM_DEVICE); 214 216 215 217 skb = build_skb(buffer->data, 0); 216 218 if (!skb) { ··· 476 474 static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, 477 475 struct qed_ll2_info *p_ll2_conn, 478 476 union core_rx_cqe_union *p_cqe, 479 - unsigned long lock_flags, 477 + unsigned long *p_lock_flags, 480 478 bool b_last_cqe) 481 479 { 482 480 struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; ··· 497 495 "Mismatch between active_descq and the LL2 Rx chain\n"); 498 496 list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); 499 497 500 - spin_unlock_irqrestore(&p_rx->lock, lock_flags); 498 + spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags); 501 499 qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, 502 500 p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); 503 - spin_lock_irqsave(&p_rx->lock, lock_flags); 501 + spin_lock_irqsave(&p_rx->lock, *p_lock_flags); 504 502 505 503 return 0; 506 504 } ··· 540 538 break; 541 539 case CORE_RX_CQE_TYPE_REGULAR: 542 540 rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, 543 - cqe, flags, b_last_cqe); 541 + cqe, &flags, 542 + b_last_cqe); 544 543 break; 545 544 default: 546 545 rc = -EIO; ··· 971 968 { 972 969 struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); 973 970 u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; 974 - struct qed_ll2_conn ll2_info; 971 + struct qed_ll2_conn ll2_info = { 0 }; 975 972 int rc; 976 973 977 974 ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;

+2

drivers/net/ethernet/qlogic/qed/qed_ooo.c

··· 159 159 if (!p_ooo_info->ooo_history.p_cqes) 160 160 goto no_history_mem; 161 161 162 + p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES; 163 + 162 164 return p_ooo_info; 163 165 164 166 no_history_mem:

+25 -22

drivers/net/ethernet/smsc/smc91x.c

··· 1535 1535 * Ethtool support 1536 1536 */ 1537 1537 static int 1538 - smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) 1538 + smc_ethtool_get_link_ksettings(struct net_device *dev, 1539 + struct ethtool_link_ksettings *cmd) 1539 1540 { 1540 1541 struct smc_local *lp = netdev_priv(dev); 1541 1542 int ret; 1542 1543 1543 - cmd->maxtxpkt = 1; 1544 - cmd->maxrxpkt = 1; 1545 - 1546 1544 if (lp->phy_type != 0) { 1547 1545 spin_lock_irq(&lp->lock); 1548 - ret = mii_ethtool_gset(&lp->mii, cmd); 1546 + ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd); 1549 1547 spin_unlock_irq(&lp->lock); 1550 1548 } else { 1551 - cmd->supported = SUPPORTED_10baseT_Half | 1549 + u32 supported = SUPPORTED_10baseT_Half | 1552 1550 SUPPORTED_10baseT_Full | 1553 1551 SUPPORTED_TP | SUPPORTED_AUI; 1554 1552 1555 1553 if (lp->ctl_rspeed == 10) 1556 - ethtool_cmd_speed_set(cmd, SPEED_10); 1554 + cmd->base.speed = SPEED_10; 1557 1555 else if (lp->ctl_rspeed == 100) 1558 - ethtool_cmd_speed_set(cmd, SPEED_100); 1556 + cmd->base.speed = SPEED_100; 1559 1557 1560 - cmd->autoneg = AUTONEG_DISABLE; 1561 - cmd->transceiver = XCVR_INTERNAL; 1562 - cmd->port = 0; 1563 - cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF; 1558 + cmd->base.autoneg = AUTONEG_DISABLE; 1559 + cmd->base.port = 0; 1560 + cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ? 1561 + DUPLEX_FULL : DUPLEX_HALF; 1562 + 1563 + ethtool_convert_legacy_u32_to_link_mode( 1564 + cmd->link_modes.supported, supported); 1564 1565 1565 1566 ret = 0; 1566 1567 } ··· 1570 1569 } 1571 1570 1572 1571 static int 1573 - smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) 1572 + smc_ethtool_set_link_ksettings(struct net_device *dev, 1573 + const struct ethtool_link_ksettings *cmd) 1574 1574 { 1575 1575 struct smc_local *lp = netdev_priv(dev); 1576 1576 int ret; 1577 1577 1578 1578 if (lp->phy_type != 0) { 1579 1579 spin_lock_irq(&lp->lock); 1580 - ret = mii_ethtool_sset(&lp->mii, cmd); 1580 + ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd); 1581 1581 spin_unlock_irq(&lp->lock); 1582 1582 } else { 1583 - if (cmd->autoneg != AUTONEG_DISABLE || 1584 - cmd->speed != SPEED_10 || 1585 - (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) || 1586 - (cmd->port != PORT_TP && cmd->port != PORT_AUI)) 1583 + if (cmd->base.autoneg != AUTONEG_DISABLE || 1584 + cmd->base.speed != SPEED_10 || 1585 + (cmd->base.duplex != DUPLEX_HALF && 1586 + cmd->base.duplex != DUPLEX_FULL) || 1587 + (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI)) 1587 1588 return -EINVAL; 1588 1589 1589 - // lp->port = cmd->port; 1590 - lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL; 1590 + // lp->port = cmd->base.port; 1591 + lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL; 1591 1592 1592 1593 // if (netif_running(dev)) 1593 1594 // smc_set_port(dev); ··· 1747 1744 1748 1745 1749 1746 static const struct ethtool_ops smc_ethtool_ops = { 1750 - .get_settings = smc_ethtool_getsettings, 1751 - .set_settings = smc_ethtool_setsettings, 1752 1747 .get_drvinfo = smc_ethtool_getdrvinfo, 1753 1748 1754 1749 .get_msglevel = smc_ethtool_getmsglevel, ··· 1756 1755 .get_eeprom_len = smc_ethtool_geteeprom_len, 1757 1756 .get_eeprom = smc_ethtool_geteeprom, 1758 1757 .set_eeprom = smc_ethtool_seteeprom, 1758 + .get_link_ksettings = smc_ethtool_get_link_ksettings, 1759 + .set_link_ksettings = smc_ethtool_set_link_ksettings, 1759 1760 }; 1760 1761 1761 1762 static const struct net_device_ops smc_netdev_ops = {

+2 -1

drivers/net/hyperv/hyperv_net.h

··· 700 700 701 701 u32 tx_checksum_mask; 702 702 703 + u32 tx_send_table[VRSS_SEND_TAB_SIZE]; 704 + 703 705 /* Ethtool settings */ 704 706 u8 duplex; 705 707 u32 speed; ··· 759 757 760 758 struct nvsp_message revoke_packet; 761 759 762 - u32 send_table[VRSS_SEND_TAB_SIZE]; 763 760 u32 max_chn; 764 761 u32 num_chn; 765 762 spinlock_t sc_lock; /* Protects num_sc_offered variable */

+2 -6

drivers/net/hyperv/netvsc.c

··· 1136 1136 static void netvsc_send_table(struct hv_device *hdev, 1137 1137 struct nvsp_message *nvmsg) 1138 1138 { 1139 - struct netvsc_device *nvscdev; 1140 1139 struct net_device *ndev = hv_get_drvdata(hdev); 1140 + struct net_device_context *net_device_ctx = netdev_priv(ndev); 1141 1141 int i; 1142 1142 u32 count, *tab; 1143 - 1144 - nvscdev = get_outbound_net_device(hdev); 1145 - if (!nvscdev) 1146 - return; 1147 1143 1148 1144 count = nvmsg->msg.v5_msg.send_table.count; 1149 1145 if (count != VRSS_SEND_TAB_SIZE) { ··· 1151 1155 nvmsg->msg.v5_msg.send_table.offset); 1152 1156 1153 1157 for (i = 0; i < count; i++) 1154 - nvscdev->send_table[i] = tab[i]; 1158 + net_device_ctx->tx_send_table[i] = tab[i]; 1155 1159 } 1156 1160 1157 1161 static void netvsc_send_vf(struct net_device_context *net_device_ctx,

+3 -8

drivers/net/hyperv/netvsc_drv.c

··· 206 206 void *accel_priv, select_queue_fallback_t fallback) 207 207 { 208 208 struct net_device_context *net_device_ctx = netdev_priv(ndev); 209 - struct netvsc_device *nvsc_dev = net_device_ctx->nvdev; 209 + unsigned int num_tx_queues = ndev->real_num_tx_queues; 210 210 struct sock *sk = skb->sk; 211 211 int q_idx = sk_tx_queue_get(sk); 212 212 213 - if (q_idx < 0 || skb->ooo_okay || 214 - q_idx >= ndev->real_num_tx_queues) { 213 + if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) { 215 214 u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE); 216 215 int new_idx; 217 216 218 - new_idx = nvsc_dev->send_table[hash] 219 - % nvsc_dev->num_chn; 217 + new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues; 220 218 221 219 if (q_idx != new_idx && sk && 222 220 sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) ··· 222 224 223 225 q_idx = new_idx; 224 226 } 225 - 226 - if (unlikely(!nvsc_dev->chan_table[q_idx].channel)) 227 - q_idx = 0; 228 227 229 228 return q_idx; 230 229 }

-15

drivers/net/phy/marvell.c

··· 1883 1883 return m88e1510_hwmon_probe(phydev); 1884 1884 } 1885 1885 1886 - static void marvell_remove(struct phy_device *phydev) 1887 - { 1888 - #ifdef CONFIG_HWMON 1889 - 1890 - struct marvell_priv *priv = phydev->priv; 1891 - 1892 - if (priv && priv->hwmon_dev) 1893 - hwmon_device_unregister(priv->hwmon_dev); 1894 - #endif 1895 - } 1896 - 1897 1886 static struct phy_driver marvell_drivers[] = { 1898 1887 { 1899 1888 .phy_id = MARVELL_PHY_ID_88E1101, ··· 1963 1974 .features = PHY_GBIT_FEATURES, 1964 1975 .flags = PHY_HAS_INTERRUPT, 1965 1976 .probe = &m88e1121_probe, 1966 - .remove = &marvell_remove, 1967 1977 .config_init = &m88e1121_config_init, 1968 1978 .config_aneg = &m88e1121_config_aneg, 1969 1979 .read_status = &marvell_read_status, ··· 2075 2087 .features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE, 2076 2088 .flags = PHY_HAS_INTERRUPT, 2077 2089 .probe = &m88e1510_probe, 2078 - .remove = &marvell_remove, 2079 2090 .config_init = &m88e1510_config_init, 2080 2091 .config_aneg = &m88e1510_config_aneg, 2081 2092 .read_status = &marvell_read_status, ··· 2096 2109 .features = PHY_GBIT_FEATURES, 2097 2110 .flags = PHY_HAS_INTERRUPT, 2098 2111 .probe = m88e1510_probe, 2099 - .remove = &marvell_remove, 2100 2112 .config_init = &marvell_config_init, 2101 2113 .config_aneg = &m88e1510_config_aneg, 2102 2114 .read_status = &marvell_read_status, ··· 2113 2127 .phy_id_mask = MARVELL_PHY_ID_MASK, 2114 2128 .name = "Marvell 88E1545", 2115 2129 .probe = m88e1510_probe, 2116 - .remove = &marvell_remove, 2117 2130 .features = PHY_GBIT_FEATURES, 2118 2131 .flags = PHY_HAS_INTERRUPT, 2119 2132 .config_init = &marvell_config_init,

+1 -1

drivers/net/phy/phy_device.c

··· 1864 1864 .phy_id = 0xffffffff, 1865 1865 .phy_id_mask = 0xffffffff, 1866 1866 .name = "Generic PHY", 1867 - .soft_reset = genphy_soft_reset, 1867 + .soft_reset = genphy_no_soft_reset, 1868 1868 .config_init = genphy_config_init, 1869 1869 .features = PHY_GBIT_FEATURES | SUPPORTED_MII | 1870 1870 SUPPORTED_AUI | SUPPORTED_FIBRE |

+2 -1

drivers/net/phy/spi_ks8995.c

··· 491 491 if (err) 492 492 return err; 493 493 494 - ks->regs_attr.size = ks->chip->regs_size; 495 494 memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); 495 + ks->regs_attr.size = ks->chip->regs_size; 496 496 497 497 err = ks8995_reset(ks); 498 498 if (err) 499 499 return err; 500 500 501 + sysfs_attr_init(&ks->regs_attr.attr); 501 502 err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr); 502 503 if (err) { 503 504 dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",

+1

drivers/net/team/team.c

··· 2072 2072 static void team_setup(struct net_device *dev) 2073 2073 { 2074 2074 ether_setup(dev); 2075 + dev->max_mtu = ETH_MAX_MTU; 2075 2076 2076 2077 dev->netdev_ops = &team_netdev_ops; 2077 2078 dev->ethtool_ops = &team_ethtool_ops;

+15 -4

drivers/net/tun.c

··· 822 822 /* Net device open. */ 823 823 static int tun_net_open(struct net_device *dev) 824 824 { 825 + struct tun_struct *tun = netdev_priv(dev); 826 + int i; 827 + 825 828 netif_tx_start_all_queues(dev); 829 + 830 + for (i = 0; i < tun->numqueues; i++) { 831 + struct tun_file *tfile; 832 + 833 + tfile = rtnl_dereference(tun->tfiles[i]); 834 + tfile->socket.sk->sk_write_space(tfile->socket.sk); 835 + } 836 + 826 837 return 0; 827 838 } 828 839 ··· 1114 1103 if (!skb_array_empty(&tfile->tx_array)) 1115 1104 mask |= POLLIN | POLLRDNORM; 1116 1105 1117 - if (sock_writeable(sk) || 1118 - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && 1119 - sock_writeable(sk))) 1106 + if (tun->dev->flags & IFF_UP && 1107 + (sock_writeable(sk) || 1108 + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && 1109 + sock_writeable(sk)))) 1120 1110 mask |= POLLOUT | POLLWRNORM; 1121 1111 1122 1112 if (tun->dev->reg_state != NETREG_REGISTERED) ··· 2582 2570 int ret = 0; 2583 2571 2584 2572 pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION); 2585 - pr_info("%s\n", DRV_COPYRIGHT); 2586 2573 2587 2574 ret = rtnl_link_register(&tun_link_ops); 2588 2575 if (ret) {

+2 -1

drivers/net/vrf.c

··· 340 340 341 341 static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) 342 342 { 343 + int len = skb->len; 343 344 netdev_tx_t ret = is_ip_tx_frame(skb, dev); 344 345 345 346 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { ··· 348 347 349 348 u64_stats_update_begin(&dstats->syncp); 350 349 dstats->tx_pkts++; 351 - dstats->tx_bytes += skb->len; 350 + dstats->tx_bytes += len; 352 351 u64_stats_update_end(&dstats->syncp); 353 352 } else { 354 353 this_cpu_inc(dev->dstats->tx_drps);

+40 -33

drivers/net/vxlan.c

··· 2976 2976 return 0; 2977 2977 } 2978 2978 2979 + static int __vxlan_dev_create(struct net *net, struct net_device *dev, 2980 + struct vxlan_config *conf) 2981 + { 2982 + struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2983 + struct vxlan_dev *vxlan = netdev_priv(dev); 2984 + int err; 2985 + 2986 + err = vxlan_dev_configure(net, dev, conf, false); 2987 + if (err) 2988 + return err; 2989 + 2990 + dev->ethtool_ops = &vxlan_ethtool_ops; 2991 + 2992 + /* create an fdb entry for a valid default destination */ 2993 + if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { 2994 + err = vxlan_fdb_create(vxlan, all_zeros_mac, 2995 + &vxlan->default_dst.remote_ip, 2996 + NUD_REACHABLE | NUD_PERMANENT, 2997 + NLM_F_EXCL | NLM_F_CREATE, 2998 + vxlan->cfg.dst_port, 2999 + vxlan->default_dst.remote_vni, 3000 + vxlan->default_dst.remote_vni, 3001 + vxlan->default_dst.remote_ifindex, 3002 + NTF_SELF); 3003 + if (err) 3004 + return err; 3005 + } 3006 + 3007 + err = register_netdevice(dev); 3008 + if (err) { 3009 + vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); 3010 + return err; 3011 + } 3012 + 3013 + list_add(&vxlan->next, &vn->vxlan_list); 3014 + return 0; 3015 + } 3016 + 2979 3017 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], 2980 3018 struct net_device *dev, struct vxlan_config *conf, 2981 3019 bool changelink) ··· 3210 3172 static int vxlan_newlink(struct net *src_net, struct net_device *dev, 3211 3173 struct nlattr *tb[], struct nlattr *data[]) 3212 3174 { 3213 - struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); 3214 - struct vxlan_dev *vxlan = netdev_priv(dev); 3215 3175 struct vxlan_config conf; 3216 3176 int err; 3217 3177 ··· 3217 3181 if (err) 3218 3182 return err; 3219 3183 3220 - err = vxlan_dev_configure(src_net, dev, &conf, false); 3221 - if (err) 3222 - return err; 3223 - 3224 - dev->ethtool_ops = &vxlan_ethtool_ops; 3225 - 3226 - /* create an fdb entry for a valid default destination */ 3227 - if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { 3228 - err = vxlan_fdb_create(vxlan, all_zeros_mac, 3229 - &vxlan->default_dst.remote_ip, 3230 - NUD_REACHABLE | NUD_PERMANENT, 3231 - NLM_F_EXCL | NLM_F_CREATE, 3232 - vxlan->cfg.dst_port, 3233 - vxlan->default_dst.remote_vni, 3234 - vxlan->default_dst.remote_vni, 3235 - vxlan->default_dst.remote_ifindex, 3236 - NTF_SELF); 3237 - if (err) 3238 - return err; 3239 - } 3240 - 3241 - err = register_netdevice(dev); 3242 - if (err) { 3243 - vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); 3244 - return err; 3245 - } 3246 - 3247 - list_add(&vxlan->next, &vn->vxlan_list); 3248 - 3249 - return 0; 3184 + return __vxlan_dev_create(src_net, dev, &conf); 3250 3185 } 3251 3186 3252 3187 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], ··· 3447 3440 if (IS_ERR(dev)) 3448 3441 return dev; 3449 3442 3450 - err = vxlan_dev_configure(net, dev, conf, false); 3443 + err = __vxlan_dev_create(net, dev, conf); 3451 3444 if (err < 0) { 3452 3445 free_netdev(dev); 3453 3446 return ERR_PTR(err);

+2 -2

drivers/net/wan/fsl_ucc_hdlc.c

··· 381 381 /* set bd status and length */ 382 382 bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S; 383 383 384 - iowrite16be(bd_status, &bd->status); 385 384 iowrite16be(skb->len, &bd->length); 385 + iowrite16be(bd_status, &bd->status); 386 386 387 387 /* Move to next BD in the ring */ 388 388 if (!(bd_status & T_W_S)) ··· 457 457 struct sk_buff *skb; 458 458 hdlc_device *hdlc = dev_to_hdlc(dev); 459 459 struct qe_bd *bd; 460 - u32 bd_status; 460 + u16 bd_status; 461 461 u16 length, howmany = 0; 462 462 u8 *bdbuffer; 463 463 int i;

+3

drivers/net/wimax/i2400m/usb.c

··· 467 467 struct i2400mu *i2400mu; 468 468 struct usb_device *usb_dev = interface_to_usbdev(iface); 469 469 470 + if (iface->cur_altsetting->desc.bNumEndpoints < 4) 471 + return -ENODEV; 472 + 470 473 if (usb_dev->speed != USB_SPEED_HIGH) 471 474 dev_err(dev, "device not connected as high speed\n"); 472 475

+17 -9

drivers/net/xen-netback/interface.c

··· 165 165 { 166 166 struct xenvif *vif = netdev_priv(dev); 167 167 struct xenvif_queue *queue = NULL; 168 - unsigned int num_queues = vif->num_queues; 168 + unsigned int num_queues; 169 169 u16 index; 170 170 struct xenvif_rx_cb *cb; 171 171 172 172 BUG_ON(skb->dev != dev); 173 173 174 - /* Drop the packet if queues are not set up */ 174 + /* Drop the packet if queues are not set up. 175 + * This handler should be called inside an RCU read section 176 + * so we don't need to enter it here explicitly. 177 + */ 178 + num_queues = READ_ONCE(vif->num_queues); 175 179 if (num_queues < 1) 176 180 goto drop; 177 181 ··· 226 222 { 227 223 struct xenvif *vif = netdev_priv(dev); 228 224 struct xenvif_queue *queue = NULL; 225 + unsigned int num_queues; 229 226 u64 rx_bytes = 0; 230 227 u64 rx_packets = 0; 231 228 u64 tx_bytes = 0; 232 229 u64 tx_packets = 0; 233 230 unsigned int index; 234 231 235 - spin_lock(&vif->lock); 236 - if (vif->queues == NULL) 237 - goto out; 232 + rcu_read_lock(); 233 + num_queues = READ_ONCE(vif->num_queues); 238 234 239 235 /* Aggregate tx and rx stats from each queue */ 240 - for (index = 0; index < vif->num_queues; ++index) { 236 + for (index = 0; index < num_queues; ++index) { 241 237 queue = &vif->queues[index]; 242 238 rx_bytes += queue->stats.rx_bytes; 243 239 rx_packets += queue->stats.rx_packets; ··· 245 241 tx_packets += queue->stats.tx_packets; 246 242 } 247 243 248 - out: 249 - spin_unlock(&vif->lock); 244 + rcu_read_unlock(); 250 245 251 246 vif->dev->stats.rx_bytes = rx_bytes; 252 247 vif->dev->stats.rx_packets = rx_packets; ··· 381 378 struct ethtool_stats *stats, u64 * data) 382 379 { 383 380 struct xenvif *vif = netdev_priv(dev); 384 - unsigned int num_queues = vif->num_queues; 381 + unsigned int num_queues; 385 382 int i; 386 383 unsigned int queue_index; 384 + 385 + rcu_read_lock(); 386 + num_queues = READ_ONCE(vif->num_queues); 387 387 388 388 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) { 389 389 unsigned long accum = 0; ··· 396 390 } 397 391 data[i] = accum; 398 392 } 393 + 394 + rcu_read_unlock(); 399 395 } 400 396 401 397 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)

+1 -1

drivers/net/xen-netback/netback.c

··· 214 214 netdev_err(vif->dev, "fatal error; disabling device\n"); 215 215 vif->disabled = true; 216 216 /* Disable the vif from queue 0's kthread */ 217 - if (vif->queues) 217 + if (vif->num_queues) 218 218 xenvif_kick_thread(&vif->queues[0]); 219 219 } 220 220

+10 -10

drivers/net/xen-netback/xenbus.c

··· 495 495 struct xenvif *vif = be->vif; 496 496 497 497 if (vif) { 498 + unsigned int num_queues = vif->num_queues; 498 499 unsigned int queue_index; 499 - struct xenvif_queue *queues; 500 500 501 501 xen_unregister_watchers(vif); 502 502 #ifdef CONFIG_DEBUG_FS 503 503 xenvif_debugfs_delif(vif); 504 504 #endif /* CONFIG_DEBUG_FS */ 505 505 xenvif_disconnect_data(vif); 506 - for (queue_index = 0; 507 - queue_index < vif->num_queues; 508 - ++queue_index) 506 + 507 + /* At this point some of the handlers may still be active 508 + * so we need to have additional synchronization here. 509 + */ 510 + vif->num_queues = 0; 511 + synchronize_net(); 512 + 513 + for (queue_index = 0; queue_index < num_queues; ++queue_index) 509 514 xenvif_deinit_queue(&vif->queues[queue_index]); 510 515 511 - spin_lock(&vif->lock); 512 - queues = vif->queues; 513 - vif->num_queues = 0; 516 + vfree(vif->queues); 514 517 vif->queues = NULL; 515 - spin_unlock(&vif->lock); 516 - 517 - vfree(queues); 518 518 519 519 xenvif_disconnect_ctrl(vif); 520 520 }

+2 -2

drivers/staging/lustre/lnet/lnet/lib-socket.c

··· 532 532 533 533 newsock->ops = sock->ops; 534 534 535 - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); 535 + rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); 536 536 if (rc == -EAGAIN) { 537 537 /* Nothing ready, so wait for activity */ 538 538 init_waitqueue_entry(&wait, current); ··· 540 540 set_current_state(TASK_INTERRUPTIBLE); 541 541 schedule(); 542 542 remove_wait_queue(sk_sleep(sock->sk), &wait); 543 - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); 543 + rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); 544 544 } 545 545 546 546 if (rc)

+1 -1

fs/dlm/lowcomms.c

··· 743 743 newsock->type = con->sock->type; 744 744 newsock->ops = con->sock->ops; 745 745 746 - result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); 746 + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true); 747 747 if (result < 0) 748 748 goto accept_err; 749 749

+1 -1

fs/ocfs2/cluster/tcp.c

··· 1863 1863 1864 1864 new_sock->type = sock->type; 1865 1865 new_sock->ops = sock->ops; 1866 - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); 1866 + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); 1867 1867 if (ret < 0) 1868 1868 goto out; 1869 1869

+1 -1

include/crypto/if_alg.h

··· 73 73 74 74 int af_alg_release(struct socket *sock); 75 75 void af_alg_release_parent(struct sock *sk); 76 - int af_alg_accept(struct sock *sk, struct socket *newsock); 76 + int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern); 77 77 78 78 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); 79 79 void af_alg_free_sg(struct af_alg_sgl *sgl);

+1

include/linux/dccp.h

··· 163 163 __u64 dreq_isr; 164 164 __u64 dreq_gsr; 165 165 __be32 dreq_service; 166 + spinlock_t dreq_lock; 166 167 struct list_head dreq_featneg; 167 168 __u32 dreq_timestamp_echo; 168 169 __u32 dreq_timestamp_time;

+12 -4

include/linux/filter.h

··· 409 409 u16 pages; /* Number of allocated pages */ 410 410 kmemcheck_bitfield_begin(meta); 411 411 u16 jited:1, /* Is our filter JIT'ed? */ 412 + locked:1, /* Program image locked? */ 412 413 gpl_compatible:1, /* Is filter GPL compatible? */ 413 414 cb_access:1, /* Is control block accessed? */ 414 415 dst_needed:1, /* Do we need dst entry? */ ··· 555 554 #ifdef CONFIG_ARCH_HAS_SET_MEMORY 556 555 static inline void bpf_prog_lock_ro(struct bpf_prog *fp) 557 556 { 558 - set_memory_ro((unsigned long)fp, fp->pages); 557 + fp->locked = 1; 558 + WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages)); 559 559 } 560 560 561 561 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) 562 562 { 563 - set_memory_rw((unsigned long)fp, fp->pages); 563 + if (fp->locked) { 564 + WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); 565 + /* In case set_memory_rw() fails, we want to be the first 566 + * to crash here instead of some random place later on. 567 + */ 568 + fp->locked = 0; 569 + } 564 570 } 565 571 566 572 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) 567 573 { 568 - set_memory_ro((unsigned long)hdr, hdr->pages); 574 + WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages)); 569 575 } 570 576 571 577 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) 572 578 { 573 - set_memory_rw((unsigned long)hdr, hdr->pages); 579 + WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); 574 580 } 575 581 #else 576 582 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)

+5

include/linux/list_nulls.h

··· 29 29 ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) 30 30 31 31 #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) 32 + 33 + #define hlist_nulls_entry_safe(ptr, type, member) \ 34 + ({ typeof(ptr) ____ptr = (ptr); \ 35 + !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ 36 + }) 32 37 /** 33 38 * ptr_is_a_nulls - Test if a ptr is a nulls 34 39 * @ptr: ptr to be tested

+1 -1

include/linux/net.h

··· 146 146 int (*socketpair)(struct socket *sock1, 147 147 struct socket *sock2); 148 148 int (*accept) (struct socket *sock, 149 - struct socket *newsock, int flags); 149 + struct socket *newsock, int flags, bool kern); 150 150 int (*getname) (struct socket *sock, 151 151 struct sockaddr *addr, 152 152 int *sockaddr_len, int peer);

+4

include/linux/phy.h

··· 837 837 int genphy_suspend(struct phy_device *phydev); 838 838 int genphy_resume(struct phy_device *phydev); 839 839 int genphy_soft_reset(struct phy_device *phydev); 840 + static inline int genphy_no_soft_reset(struct phy_device *phydev) 841 + { 842 + return 0; 843 + } 840 844 void phy_driver_unregister(struct phy_driver *drv); 841 845 void phy_drivers_unregister(struct phy_driver *drv, int n); 842 846 int phy_driver_register(struct phy_driver *new_driver, struct module *owner);

+14

include/linux/rculist_nulls.h

··· 156 156 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ 157 157 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) 158 158 159 + /** 160 + * hlist_nulls_for_each_entry_safe - 161 + * iterate over list of given type safe against removal of list entry 162 + * @tpos: the type * to use as a loop cursor. 163 + * @pos: the &struct hlist_nulls_node to use as a loop cursor. 164 + * @head: the head for your list. 165 + * @member: the name of the hlist_nulls_node within the struct. 166 + */ 167 + #define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ 168 + for (({barrier();}), \ 169 + pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ 170 + (!is_a_nulls(pos)) && \ 171 + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ 172 + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) 159 173 #endif 160 174 #endif

+2 -1

include/net/inet_common.h

··· 20 20 int addr_len, int flags, int is_sendmsg); 21 21 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, 22 22 int addr_len, int flags); 23 - int inet_accept(struct socket *sock, struct socket *newsock, int flags); 23 + int inet_accept(struct socket *sock, struct socket *newsock, int flags, 24 + bool kern); 24 25 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); 25 26 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 26 27 size_t size, int flags);

+1 -1

include/net/inet_connection_sock.h

··· 258 258 return (unsigned long)min_t(u64, when, max_when); 259 259 } 260 260 261 - struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); 261 + struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); 262 262 263 263 int inet_csk_get_port(struct sock *sk, unsigned short snum); 264 264

+2 -1

include/net/sctp/structs.h

··· 476 476 int (*send_verify) (struct sctp_sock *, union sctp_addr *); 477 477 int (*supported_addrs)(const struct sctp_sock *, __be16 *); 478 478 struct sock *(*create_accept_sk) (struct sock *sk, 479 - struct sctp_association *asoc); 479 + struct sctp_association *asoc, 480 + bool kern); 480 481 int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); 481 482 void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); 482 483 void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);

+6 -3

include/net/sock.h

··· 236 236 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 237 237 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 238 238 * @sk_lock: synchronizer 239 + * @sk_kern_sock: True if sock is using kernel lock classes 239 240 * @sk_rcvbuf: size of receive buffer in bytes 240 241 * @sk_wq: sock wait queue and async head 241 242 * @sk_rx_dst: receive input route used by early demux ··· 431 430 #endif 432 431 433 432 kmemcheck_bitfield_begin(flags); 434 - unsigned int sk_padding : 2, 433 + unsigned int sk_padding : 1, 434 + sk_kern_sock : 1, 435 435 sk_no_check_tx : 1, 436 436 sk_no_check_rx : 1, 437 437 sk_userlocks : 4, ··· 1017 1015 int addr_len); 1018 1016 int (*disconnect)(struct sock *sk, int flags); 1019 1017 1020 - struct sock * (*accept)(struct sock *sk, int flags, int *err); 1018 + struct sock * (*accept)(struct sock *sk, int flags, int *err, 1019 + bool kern); 1021 1020 1022 1021 int (*ioctl)(struct sock *sk, int cmd, 1023 1022 unsigned long arg); ··· 1576 1573 int sock_no_bind(struct socket *, struct sockaddr *, int); 1577 1574 int sock_no_connect(struct socket *, struct sockaddr *, int, int); 1578 1575 int sock_no_socketpair(struct socket *, struct socket *); 1579 - int sock_no_accept(struct socket *, struct socket *, int); 1576 + int sock_no_accept(struct socket *, struct socket *, int, bool); 1580 1577 int sock_no_getname(struct socket *, struct sockaddr *, int *, int); 1581 1578 unsigned int sock_no_poll(struct file *, struct socket *, 1582 1579 struct poll_table_struct *);

+1 -1

include/uapi/linux/packet_diag.h

··· 64 64 __u32 pdmc_count; 65 65 __u16 pdmc_type; 66 66 __u16 pdmc_alen; 67 - __u8 pdmc_addr[MAX_ADDR_LEN]; 67 + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ 68 68 }; 69 69 70 70 struct packet_diag_ring {

+80 -39

kernel/bpf/hashtab.c

··· 13 13 #include <linux/bpf.h> 14 14 #include <linux/jhash.h> 15 15 #include <linux/filter.h> 16 + #include <linux/rculist_nulls.h> 16 17 #include "percpu_freelist.h" 17 18 #include "bpf_lru_list.h" 18 19 19 20 struct bucket { 20 - struct hlist_head head; 21 + struct hlist_nulls_head head; 21 22 raw_spinlock_t lock; 22 23 }; 23 24 ··· 45 44 /* each htab element is struct htab_elem + key + value */ 46 45 struct htab_elem { 47 46 union { 48 - struct hlist_node hash_node; 49 - struct bpf_htab *htab; 50 - struct pcpu_freelist_node fnode; 47 + struct hlist_nulls_node hash_node; 48 + struct { 49 + void *padding; 50 + union { 51 + struct bpf_htab *htab; 52 + struct pcpu_freelist_node fnode; 53 + }; 54 + }; 51 55 }; 52 56 union { 53 57 struct rcu_head rcu; ··· 168 162 offsetof(struct htab_elem, lru_node), 169 163 htab->elem_size, htab->map.max_entries); 170 164 else 171 - pcpu_freelist_populate(&htab->freelist, htab->elems, 165 + pcpu_freelist_populate(&htab->freelist, 166 + htab->elems + offsetof(struct htab_elem, fnode), 172 167 htab->elem_size, htab->map.max_entries); 173 168 174 169 return 0; ··· 223 216 struct bpf_htab *htab; 224 217 int err, i; 225 218 u64 cost; 219 + 220 + BUILD_BUG_ON(offsetof(struct htab_elem, htab) != 221 + offsetof(struct htab_elem, hash_node.pprev)); 222 + BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 223 + offsetof(struct htab_elem, hash_node.pprev)); 226 224 227 225 if (lru && !capable(CAP_SYS_ADMIN)) 228 226 /* LRU implementation is much complicated than other ··· 338 326 goto free_htab; 339 327 340 328 for (i = 0; i < htab->n_buckets; i++) { 341 - INIT_HLIST_HEAD(&htab->buckets[i].head); 329 + INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 342 330 raw_spin_lock_init(&htab->buckets[i].lock); 343 331 } 344 332 ··· 378 366 return &htab->buckets[hash & (htab->n_buckets - 1)]; 379 367 } 380 368 381 - static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 369 + static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 382 370 { 383 371 return &__select_bucket(htab, hash)->head; 384 372 } 385 373 386 - static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, 374 + /* this lookup function can only be called with bucket lock taken */ 375 + static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 387 376 void *key, u32 key_size) 388 377 { 378 + struct hlist_nulls_node *n; 389 379 struct htab_elem *l; 390 380 391 - hlist_for_each_entry_rcu(l, head, hash_node) 381 + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 392 382 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 393 383 return l; 384 + 385 + return NULL; 386 + } 387 + 388 + /* can be called without bucket lock. it will repeat the loop in 389 + * the unlikely event when elements moved from one bucket into another 390 + * while link list is being walked 391 + */ 392 + static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 393 + u32 hash, void *key, 394 + u32 key_size, u32 n_buckets) 395 + { 396 + struct hlist_nulls_node *n; 397 + struct htab_elem *l; 398 + 399 + again: 400 + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 401 + if (l->hash == hash && !memcmp(&l->key, key, key_size)) 402 + return l; 403 + 404 + if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 405 + goto again; 394 406 395 407 return NULL; 396 408 } ··· 423 387 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 424 388 { 425 389 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 426 - struct hlist_head *head; 390 + struct hlist_nulls_head *head; 427 391 struct htab_elem *l; 428 392 u32 hash, key_size; 429 393 ··· 436 400 437 401 head = select_bucket(htab, hash); 438 402 439 - l = lookup_elem_raw(head, hash, key, key_size); 403 + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 440 404 441 405 return l; 442 406 } ··· 469 433 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 470 434 { 471 435 struct bpf_htab *htab = (struct bpf_htab *)arg; 472 - struct htab_elem *l, *tgt_l; 473 - struct hlist_head *head; 436 + struct htab_elem *l = NULL, *tgt_l; 437 + struct hlist_nulls_head *head; 438 + struct hlist_nulls_node *n; 474 439 unsigned long flags; 475 440 struct bucket *b; 476 441 ··· 481 444 482 445 raw_spin_lock_irqsave(&b->lock, flags); 483 446 484 - hlist_for_each_entry_rcu(l, head, hash_node) 447 + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 485 448 if (l == tgt_l) { 486 - hlist_del_rcu(&l->hash_node); 449 + hlist_nulls_del_rcu(&l->hash_node); 487 450 break; 488 451 } 489 452 ··· 496 459 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 497 460 { 498 461 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 499 - struct hlist_head *head; 462 + struct hlist_nulls_head *head; 500 463 struct htab_elem *l, *next_l; 501 464 u32 hash, key_size; 502 465 int i; ··· 510 473 head = select_bucket(htab, hash); 511 474 512 475 /* lookup the key */ 513 - l = lookup_elem_raw(head, hash, key, key_size); 476 + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 514 477 515 478 if (!l) { 516 479 i = 0; ··· 518 481 } 519 482 520 483 /* key was found, get next key in the same bucket */ 521 - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), 484 + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 522 485 struct htab_elem, hash_node); 523 486 524 487 if (next_l) { ··· 537 500 head = select_bucket(htab, i); 538 501 539 502 /* pick first element in the bucket */ 540 - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), 503 + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 541 504 struct htab_elem, hash_node); 542 505 if (next_l) { 543 506 /* if it's not empty, just return it */ ··· 619 582 int err = 0; 620 583 621 584 if (prealloc) { 622 - l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); 623 - if (!l_new) 585 + struct pcpu_freelist_node *l; 586 + 587 + l = pcpu_freelist_pop(&htab->freelist); 588 + if (!l) 624 589 err = -E2BIG; 590 + else 591 + l_new = container_of(l, struct htab_elem, fnode); 625 592 } else { 626 593 if (atomic_inc_return(&htab->count) > htab->map.max_entries) { 627 594 atomic_dec(&htab->count); ··· 702 661 { 703 662 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 704 663 struct htab_elem *l_new = NULL, *l_old; 705 - struct hlist_head *head; 664 + struct hlist_nulls_head *head; 706 665 unsigned long flags; 707 666 struct bucket *b; 708 667 u32 key_size, hash; ··· 741 700 /* add new element to the head of the list, so that 742 701 * concurrent search will find it before old elem 743 702 */ 744 - hlist_add_head_rcu(&l_new->hash_node, head); 703 + hlist_nulls_add_head_rcu(&l_new->hash_node, head); 745 704 if (l_old) { 746 - hlist_del_rcu(&l_old->hash_node); 705 + hlist_nulls_del_rcu(&l_old->hash_node); 747 706 free_htab_elem(htab, l_old); 748 707 } 749 708 ret = 0; ··· 757 716 { 758 717 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 759 718 struct htab_elem *l_new, *l_old = NULL; 760 - struct hlist_head *head; 719 + struct hlist_nulls_head *head; 761 720 unsigned long flags; 762 721 struct bucket *b; 763 722 u32 key_size, hash; ··· 798 757 /* add new element to the head of the list, so that 799 758 * concurrent search will find it before old elem 800 759 */ 801 - hlist_add_head_rcu(&l_new->hash_node, head); 760 + hlist_nulls_add_head_rcu(&l_new->hash_node, head); 802 761 if (l_old) { 803 762 bpf_lru_node_set_ref(&l_new->lru_node); 804 - hlist_del_rcu(&l_old->hash_node); 763 + hlist_nulls_del_rcu(&l_old->hash_node); 805 764 } 806 765 ret = 0; 807 766 ··· 822 781 { 823 782 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 824 783 struct htab_elem *l_new = NULL, *l_old; 825 - struct hlist_head *head; 784 + struct hlist_nulls_head *head; 826 785 unsigned long flags; 827 786 struct bucket *b; 828 787 u32 key_size, hash; ··· 861 820 ret = PTR_ERR(l_new); 862 821 goto err; 863 822 } 864 - hlist_add_head_rcu(&l_new->hash_node, head); 823 + hlist_nulls_add_head_rcu(&l_new->hash_node, head); 865 824 } 866 825 ret = 0; 867 826 err: ··· 875 834 { 876 835 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 877 836 struct htab_elem *l_new = NULL, *l_old; 878 - struct hlist_head *head; 837 + struct hlist_nulls_head *head; 879 838 unsigned long flags; 880 839 struct bucket *b; 881 840 u32 key_size, hash; ··· 923 882 } else { 924 883 pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), 925 884 value, onallcpus); 926 - hlist_add_head_rcu(&l_new->hash_node, head); 885 + hlist_nulls_add_head_rcu(&l_new->hash_node, head); 927 886 l_new = NULL; 928 887 } 929 888 ret = 0; ··· 951 910 static int htab_map_delete_elem(struct bpf_map *map, void *key) 952 911 { 953 912 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 954 - struct hlist_head *head; 913 + struct hlist_nulls_head *head; 955 914 struct bucket *b; 956 915 struct htab_elem *l; 957 916 unsigned long flags; ··· 971 930 l = lookup_elem_raw(head, hash, key, key_size); 972 931 973 932 if (l) { 974 - hlist_del_rcu(&l->hash_node); 933 + hlist_nulls_del_rcu(&l->hash_node); 975 934 free_htab_elem(htab, l); 976 935 ret = 0; 977 936 } ··· 983 942 static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) 984 943 { 985 944 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 986 - struct hlist_head *head; 945 + struct hlist_nulls_head *head; 987 946 struct bucket *b; 988 947 struct htab_elem *l; 989 948 unsigned long flags; ··· 1003 962 l = lookup_elem_raw(head, hash, key, key_size); 1004 963 1005 964 if (l) { 1006 - hlist_del_rcu(&l->hash_node); 965 + hlist_nulls_del_rcu(&l->hash_node); 1007 966 ret = 0; 1008 967 } 1009 968 ··· 1018 977 int i; 1019 978 1020 979 for (i = 0; i < htab->n_buckets; i++) { 1021 - struct hlist_head *head = select_bucket(htab, i); 1022 - struct hlist_node *n; 980 + struct hlist_nulls_head *head = select_bucket(htab, i); 981 + struct hlist_nulls_node *n; 1023 982 struct htab_elem *l; 1024 983 1025 - hlist_for_each_entry_safe(l, n, head, hash_node) { 1026 - hlist_del_rcu(&l->hash_node); 984 + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 985 + hlist_nulls_del_rcu(&l->hash_node); 1027 986 if (l->state != HTAB_EXTRA_ELEM_USED) 1028 987 htab_elem_free(htab, l); 1029 988 }

+6

kernel/bpf/lpm_trie.c

··· 500 500 raw_spin_unlock(&trie->lock); 501 501 } 502 502 503 + static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) 504 + { 505 + return -ENOTSUPP; 506 + } 507 + 503 508 static const struct bpf_map_ops trie_ops = { 504 509 .map_alloc = trie_alloc, 505 510 .map_free = trie_free, 511 + .map_get_next_key = trie_get_next_key, 506 512 .map_lookup_elem = trie_lookup_elem, 507 513 .map_update_elem = trie_update_elem, 508 514 .map_delete_elem = trie_delete_elem,

+3 -2

net/atm/svc.c

··· 318 318 return error; 319 319 } 320 320 321 - static int svc_accept(struct socket *sock, struct socket *newsock, int flags) 321 + static int svc_accept(struct socket *sock, struct socket *newsock, int flags, 322 + bool kern) 322 323 { 323 324 struct sock *sk = sock->sk; 324 325 struct sk_buff *skb; ··· 330 329 331 330 lock_sock(sk); 332 331 333 - error = svc_create(sock_net(sk), newsock, 0, 0); 332 + error = svc_create(sock_net(sk), newsock, 0, kern); 334 333 if (error) 335 334 goto out; 336 335

+2 -1

net/ax25/af_ax25.c

··· 1320 1320 return err; 1321 1321 } 1322 1322 1323 - static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) 1323 + static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, 1324 + bool kern) 1324 1325 { 1325 1326 struct sk_buff *skb; 1326 1327 struct sock *newsk;

+1 -1

net/bluetooth/l2cap_sock.c

··· 301 301 } 302 302 303 303 static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, 304 - int flags) 304 + int flags, bool kern) 305 305 { 306 306 DEFINE_WAIT_FUNC(wait, woken_wake_function); 307 307 struct sock *sk = sock->sk, *nsk;

+2 -1

net/bluetooth/rfcomm/sock.c

··· 471 471 return err; 472 472 } 473 473 474 - static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags) 474 + static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags, 475 + bool kern) 475 476 { 476 477 DEFINE_WAIT_FUNC(wait, woken_wake_function); 477 478 struct sock *sk = sock->sk, *nsk;

+1 -1

net/bluetooth/sco.c

··· 627 627 } 628 628 629 629 static int sco_sock_accept(struct socket *sock, struct socket *newsock, 630 - int flags) 630 + int flags, bool kern) 631 631 { 632 632 DEFINE_WAIT_FUNC(wait, woken_wake_function); 633 633 struct sock *sk = sock->sk, *ch;

+1

net/bridge/br_input.c

··· 30 30 static int 31 31 br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) 32 32 { 33 + br_drop_fake_rtable(skb); 33 34 return netif_receive_skb(skb); 34 35 } 35 36

-21

net/bridge/br_netfilter_hooks.c

··· 521 521 } 522 522 523 523 524 - /* PF_BRIDGE/LOCAL_IN ************************************************/ 525 - /* The packet is locally destined, which requires a real 526 - * dst_entry, so detach the fake one. On the way up, the 527 - * packet would pass through PRE_ROUTING again (which already 528 - * took place when the packet entered the bridge), but we 529 - * register an IPv4 PRE_ROUTING 'sabotage' hook that will 530 - * prevent this from happening. */ 531 - static unsigned int br_nf_local_in(void *priv, 532 - struct sk_buff *skb, 533 - const struct nf_hook_state *state) 534 - { 535 - br_drop_fake_rtable(skb); 536 - return NF_ACCEPT; 537 - } 538 - 539 524 /* PF_BRIDGE/FORWARD *************************************************/ 540 525 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 541 526 { ··· 890 905 .hook = br_nf_pre_routing, 891 906 .pf = NFPROTO_BRIDGE, 892 907 .hooknum = NF_BR_PRE_ROUTING, 893 - .priority = NF_BR_PRI_BRNF, 894 - }, 895 - { 896 - .hook = br_nf_local_in, 897 - .pf = NFPROTO_BRIDGE, 898 - .hooknum = NF_BR_LOCAL_IN, 899 908 .priority = NF_BR_PRI_BRNF, 900 909 }, 901 910 {

+1

net/core/dev.c

··· 1304 1304 { 1305 1305 rtnl_lock(); 1306 1306 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); 1307 + call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); 1307 1308 rtnl_unlock(); 1308 1309 } 1309 1310 EXPORT_SYMBOL(netdev_notify_peers);

+3 -3

net/core/net-sysfs.c

··· 953 953 while (--i >= new_num) { 954 954 struct kobject *kobj = &dev->_rx[i].kobj; 955 955 956 - if (!list_empty(&dev_net(dev)->exit_list)) 956 + if (!atomic_read(&dev_net(dev)->count)) 957 957 kobj->uevent_suppress = 1; 958 958 if (dev->sysfs_rx_queue_group) 959 959 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); ··· 1371 1371 while (--i >= new_num) { 1372 1372 struct netdev_queue *queue = dev->_tx + i; 1373 1373 1374 - if (!list_empty(&dev_net(dev)->exit_list)) 1374 + if (!atomic_read(&dev_net(dev)->count)) 1375 1375 queue->kobj.uevent_suppress = 1; 1376 1376 #ifdef CONFIG_BQL 1377 1377 sysfs_remove_group(&queue->kobj, &dql_group); ··· 1558 1558 { 1559 1559 struct device *dev = &(ndev->dev); 1560 1560 1561 - if (!list_empty(&dev_net(ndev)->exit_list)) 1561 + if (!atomic_read(&dev_net(ndev)->count)) 1562 1562 dev_set_uevent_suppress(dev, 1); 1563 1563 1564 1564 kobject_get(&dev->kobj);

+16 -14

net/core/skbuff.c

··· 3828 3828 if (!skb_may_tx_timestamp(sk, false)) 3829 3829 return; 3830 3830 3831 - /* take a reference to prevent skb_orphan() from freeing the socket */ 3832 - sock_hold(sk); 3833 - 3834 - *skb_hwtstamps(skb) = *hwtstamps; 3835 - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); 3836 - 3837 - sock_put(sk); 3831 + /* Take a reference to prevent skb_orphan() from freeing the socket, 3832 + * but only if the socket refcount is not zero. 3833 + */ 3834 + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { 3835 + *skb_hwtstamps(skb) = *hwtstamps; 3836 + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); 3837 + sock_put(sk); 3838 + } 3838 3839 } 3839 3840 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); 3840 3841 ··· 3894 3893 { 3895 3894 struct sock *sk = skb->sk; 3896 3895 struct sock_exterr_skb *serr; 3897 - int err; 3896 + int err = 1; 3898 3897 3899 3898 skb->wifi_acked_valid = 1; 3900 3899 skb->wifi_acked = acked; ··· 3904 3903 serr->ee.ee_errno = ENOMSG; 3905 3904 serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; 3906 3905 3907 - /* take a reference to prevent skb_orphan() from freeing the socket */ 3908 - sock_hold(sk); 3909 - 3910 - err = sock_queue_err_skb(sk, skb); 3906 + /* Take a reference to prevent skb_orphan() from freeing the socket, 3907 + * but only if the socket refcount is not zero. 3908 + */ 3909 + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { 3910 + err = sock_queue_err_skb(sk, skb); 3911 + sock_put(sk); 3912 + } 3911 3913 if (err) 3912 3914 kfree_skb(skb); 3913 - 3914 - sock_put(sk); 3915 3915 } 3916 3916 EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); 3917 3917

+57 -49

net/core/sock.c

··· 197 197 198 198 /* 199 199 * Each address family might have different locking rules, so we have 200 - * one slock key per address family: 200 + * one slock key per address family and separate keys for internal and 201 + * userspace sockets. 201 202 */ 202 203 static struct lock_class_key af_family_keys[AF_MAX]; 204 + static struct lock_class_key af_family_kern_keys[AF_MAX]; 203 205 static struct lock_class_key af_family_slock_keys[AF_MAX]; 206 + static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; 204 207 205 208 /* 206 209 * Make lock validator output more readable. (we pre-construct these 207 210 * strings build-time, so that runtime initialization of socket 208 211 * locks is fast): 209 212 */ 213 + 214 + #define _sock_locks(x) \ 215 + x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \ 216 + x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \ 217 + x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \ 218 + x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \ 219 + x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \ 220 + x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \ 221 + x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \ 222 + x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \ 223 + x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \ 224 + x "27" , x "28" , x "AF_CAN" , \ 225 + x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \ 226 + x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ 227 + x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ 228 + x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ 229 + x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX" 230 + 210 231 static const char *const af_family_key_strings[AF_MAX+1] = { 211 - "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , 212 - "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", 213 - "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , 214 - "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , 215 - "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 216 - "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 217 - "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 218 - "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 219 - "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 220 - "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 221 - "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 222 - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 223 - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 224 - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , 225 - "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX" 232 + _sock_locks("sk_lock-") 226 233 }; 227 234 static const char *const af_family_slock_key_strings[AF_MAX+1] = { 228 - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 229 - "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", 230 - "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , 231 - "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , 232 - "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 233 - "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 234 - "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 235 - "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , 236 - "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 237 - "slock-27" , "slock-28" , "slock-AF_CAN" , 238 - "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 239 - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 240 - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 241 - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , 242 - "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX" 235 + _sock_locks("slock-") 243 236 }; 244 237 static const char *const af_family_clock_key_strings[AF_MAX+1] = { 245 - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 246 - "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", 247 - "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , 248 - "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , 249 - "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 250 - "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 251 - "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 252 - "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , 253 - "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 254 - "clock-27" , "clock-28" , "clock-AF_CAN" , 255 - "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 256 - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 257 - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 258 - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , 259 - "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX" 238 + _sock_locks("clock-") 239 + }; 240 + 241 + static const char *const af_family_kern_key_strings[AF_MAX+1] = { 242 + _sock_locks("k-sk_lock-") 243 + }; 244 + static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { 245 + _sock_locks("k-slock-") 246 + }; 247 + static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { 248 + _sock_locks("k-clock-") 260 249 }; 261 250 262 251 /* ··· 253 264 * so split the lock classes by using a per-AF key: 254 265 */ 255 266 static struct lock_class_key af_callback_keys[AF_MAX]; 267 + static struct lock_class_key af_kern_callback_keys[AF_MAX]; 256 268 257 269 /* Take into consideration the size of the struct sk_buff overhead in the 258 270 * determination of these values, since that is non-constant across ··· 1283 1293 */ 1284 1294 static inline void sock_lock_init(struct sock *sk) 1285 1295 { 1286 - sock_lock_init_class_and_name(sk, 1296 + if (sk->sk_kern_sock) 1297 + sock_lock_init_class_and_name( 1298 + sk, 1299 + af_family_kern_slock_key_strings[sk->sk_family], 1300 + af_family_kern_slock_keys + sk->sk_family, 1301 + af_family_kern_key_strings[sk->sk_family], 1302 + af_family_kern_keys + sk->sk_family); 1303 + else 1304 + sock_lock_init_class_and_name( 1305 + sk, 1287 1306 af_family_slock_key_strings[sk->sk_family], 1288 1307 af_family_slock_keys + sk->sk_family, 1289 1308 af_family_key_strings[sk->sk_family], ··· 1398 1399 * why we need sk_prot_creator -acme 1399 1400 */ 1400 1401 sk->sk_prot = sk->sk_prot_creator = prot; 1402 + sk->sk_kern_sock = kern; 1401 1403 sock_lock_init(sk); 1402 1404 sk->sk_net_refcnt = kern ? 0 : 1; 1403 1405 if (likely(sk->sk_net_refcnt)) ··· 2277 2277 } 2278 2278 EXPORT_SYMBOL(sock_no_socketpair); 2279 2279 2280 - int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 2280 + int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, 2281 + bool kern) 2281 2282 { 2282 2283 return -EOPNOTSUPP; 2283 2284 } ··· 2482 2481 } 2483 2482 2484 2483 rwlock_init(&sk->sk_callback_lock); 2485 - lockdep_set_class_and_name(&sk->sk_callback_lock, 2484 + if (sk->sk_kern_sock) 2485 + lockdep_set_class_and_name( 2486 + &sk->sk_callback_lock, 2487 + af_kern_callback_keys + sk->sk_family, 2488 + af_family_kern_clock_key_strings[sk->sk_family]); 2489 + else 2490 + lockdep_set_class_and_name( 2491 + &sk->sk_callback_lock, 2486 2492 af_callback_keys + sk->sk_family, 2487 2493 af_family_clock_key_strings[sk->sk_family]); 2488 2494

+1

net/dccp/ccids/ccid2.c

··· 749 749 for (i = 0; i < hc->tx_seqbufc; i++) 750 750 kfree(hc->tx_seqbuf[i]); 751 751 hc->tx_seqbufc = 0; 752 + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); 752 753 } 753 754 754 755 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)

+2 -1

net/dccp/ipv4.c

··· 289 289 290 290 switch (type) { 291 291 case ICMP_REDIRECT: 292 - dccp_do_redirect(skb, sk); 292 + if (!sock_owned_by_user(sk)) 293 + dccp_do_redirect(skb, sk); 293 294 goto out; 294 295 case ICMP_SOURCE_QUENCH: 295 296 /* Just silently ignore these. */

+5 -3

net/dccp/ipv6.c

··· 122 122 np = inet6_sk(sk); 123 123 124 124 if (type == NDISC_REDIRECT) { 125 - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 125 + if (!sock_owned_by_user(sk)) { 126 + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 126 127 127 - if (dst) 128 - dst->ops->redirect(dst, sk, skb); 128 + if (dst) 129 + dst->ops->redirect(dst, sk, skb); 130 + } 129 131 goto out; 130 132 } 131 133

+16 -8

net/dccp/minisocks.c

··· 142 142 struct dccp_request_sock *dreq = dccp_rsk(req); 143 143 bool own_req; 144 144 145 + /* TCP/DCCP listeners became lockless. 146 + * DCCP stores complex state in its request_sock, so we need 147 + * a protection for them, now this code runs without being protected 148 + * by the parent (listener) lock. 149 + */ 150 + spin_lock_bh(&dreq->dreq_lock); 151 + 145 152 /* Check for retransmitted REQUEST */ 146 153 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { 147 154 ··· 163 156 inet_rtx_syn_ack(sk, req); 164 157 } 165 158 /* Network Duplicate, discard packet */ 166 - return NULL; 159 + goto out; 167 160 } 168 161 169 162 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; ··· 189 182 190 183 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, 191 184 req, &own_req); 192 - if (!child) 193 - goto listen_overflow; 185 + if (child) { 186 + child = inet_csk_complete_hashdance(sk, child, req, own_req); 187 + goto out; 188 + } 194 189 195 - return inet_csk_complete_hashdance(sk, child, req, own_req); 196 - 197 - listen_overflow: 198 - dccp_pr_debug("listen_overflow!\n"); 199 190 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; 200 191 drop: 201 192 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) 202 193 req->rsk_ops->send_reset(sk, skb); 203 194 204 195 inet_csk_reqsk_queue_drop(sk, req); 205 - return NULL; 196 + out: 197 + spin_unlock_bh(&dreq->dreq_lock); 198 + return child; 206 199 } 207 200 208 201 EXPORT_SYMBOL_GPL(dccp_check_req); ··· 253 246 { 254 247 struct dccp_request_sock *dreq = dccp_rsk(req); 255 248 249 + spin_lock_init(&dreq->dreq_lock); 256 250 inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; 257 251 inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); 258 252 inet_rsk(req)->acked = 0;

+3 -2

net/decnet/af_decnet.c

··· 1070 1070 return skb == NULL ? ERR_PTR(err) : skb; 1071 1071 } 1072 1072 1073 - static int dn_accept(struct socket *sock, struct socket *newsock, int flags) 1073 + static int dn_accept(struct socket *sock, struct socket *newsock, int flags, 1074 + bool kern) 1074 1075 { 1075 1076 struct sock *sk = sock->sk, *newsk; 1076 1077 struct sk_buff *skb = NULL; ··· 1100 1099 1101 1100 cb = DN_SKB_CB(skb); 1102 1101 sk->sk_ack_backlog--; 1103 - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); 1102 + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); 1104 1103 if (newsk == NULL) { 1105 1104 release_sock(sk); 1106 1105 kfree_skb(skb);

+6 -3

net/ipv4/af_inet.c

··· 689 689 * Accept a pending connection. The TCP layer now gives BSD semantics. 690 690 */ 691 691 692 - int inet_accept(struct socket *sock, struct socket *newsock, int flags) 692 + int inet_accept(struct socket *sock, struct socket *newsock, int flags, 693 + bool kern) 693 694 { 694 695 struct sock *sk1 = sock->sk; 695 696 int err = -EINVAL; 696 - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err); 697 + struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); 697 698 698 699 if (!sk2) 699 700 goto do_err; ··· 1488 1487 int proto = iph->protocol; 1489 1488 int err = -ENOSYS; 1490 1489 1491 - if (skb->encapsulation) 1490 + if (skb->encapsulation) { 1491 + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP)); 1492 1492 skb_set_inner_network_header(skb, nhoff); 1493 + } 1493 1494 1494 1495 csum_replace2(&iph->check, iph->tot_len, newlen); 1495 1496 iph->tot_len = newlen;

+1 -1

net/ipv4/inet_connection_sock.c

··· 424 424 /* 425 425 * This will accept the next outstanding connection. 426 426 */ 427 - struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 427 + struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 428 428 { 429 429 struct inet_connection_sock *icsk = inet_csk(sk); 430 430 struct request_sock_queue *queue = &icsk->icsk_accept_queue;

+1 -1

net/ipv4/ip_output.c

··· 966 966 cork->length += length; 967 967 if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && 968 968 (sk->sk_protocol == IPPROTO_UDP) && 969 - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && 969 + (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && 970 970 (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { 971 971 err = ip_ufo_append_data(sk, queue, getfrag, from, length, 972 972 hh_len, fragheaderlen, transhdrlen,

+7 -3

net/ipv4/tcp_ipv4.c

··· 279 279 */ 280 280 void tcp_v4_mtu_reduced(struct sock *sk) 281 281 { 282 - struct dst_entry *dst; 283 282 struct inet_sock *inet = inet_sk(sk); 284 - u32 mtu = tcp_sk(sk)->mtu_info; 283 + struct dst_entry *dst; 284 + u32 mtu; 285 285 286 + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 287 + return; 288 + mtu = tcp_sk(sk)->mtu_info; 286 289 dst = inet_csk_update_pmtu(sk, mtu); 287 290 if (!dst) 288 291 return; ··· 431 428 432 429 switch (type) { 433 430 case ICMP_REDIRECT: 434 - do_redirect(icmp_skb, sk); 431 + if (!sock_owned_by_user(sk)) 432 + do_redirect(icmp_skb, sk); 435 433 goto out; 436 434 case ICMP_SOURCE_QUENCH: 437 435 /* Just silently ignore these. */

+4 -2

net/ipv4/tcp_timer.c

··· 249 249 250 250 sk_mem_reclaim_partial(sk); 251 251 252 - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) 252 + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || 253 + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) 253 254 goto out; 254 255 255 256 if (time_after(icsk->icsk_ack.timeout, jiffies)) { ··· 553 552 struct inet_connection_sock *icsk = inet_csk(sk); 554 553 int event; 555 554 556 - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) 555 + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || 556 + !icsk->icsk_pending) 557 557 goto out; 558 558 559 559 if (time_after(icsk->icsk_timeout, jiffies)) {

+5 -5

net/ipv6/af_inet6.c

··· 920 920 err = register_pernet_subsys(&inet6_net_ops); 921 921 if (err) 922 922 goto register_pernet_fail; 923 - err = icmpv6_init(); 924 - if (err) 925 - goto icmp_fail; 926 923 err = ip6_mr_init(); 927 924 if (err) 928 925 goto ipmr_fail; 926 + err = icmpv6_init(); 927 + if (err) 928 + goto icmp_fail; 929 929 err = ndisc_init(); 930 930 if (err) 931 931 goto ndisc_fail; ··· 1061 1061 ndisc_cleanup(); 1062 1062 ndisc_fail: 1063 1063 ip6_mr_cleanup(); 1064 - ipmr_fail: 1065 - icmpv6_cleanup(); 1066 1064 icmp_fail: 1067 1065 unregister_pernet_subsys(&inet6_net_ops); 1066 + ipmr_fail: 1067 + icmpv6_cleanup(); 1068 1068 register_pernet_fail: 1069 1069 sock_unregister(PF_INET6); 1070 1070 rtnl_unregister_all(PF_INET6);

+2

net/ipv6/ip6_fib.c

··· 923 923 ins = &rt->dst.rt6_next; 924 924 iter = *ins; 925 925 while (iter) { 926 + if (iter->rt6i_metric > rt->rt6i_metric) 927 + break; 926 928 if (rt6_qualify_for_ecmp(iter)) { 927 929 *ins = iter->dst.rt6_next; 928 930 fib6_purge_rt(iter, fn, info->nl_net);

+3 -1

net/ipv6/ip6_offload.c

··· 294 294 struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); 295 295 int err = -ENOSYS; 296 296 297 - if (skb->encapsulation) 297 + if (skb->encapsulation) { 298 + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); 298 299 skb_set_inner_network_header(skb, nhoff); 300 + } 299 301 300 302 iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); 301 303

+7 -2

net/ipv6/ip6_output.c

··· 768 768 * Fragment the datagram. 769 769 */ 770 770 771 - *prevhdr = NEXTHDR_FRAGMENT; 772 771 troom = rt->dst.dev->needed_tailroom; 773 772 774 773 /* 775 774 * Keep copying data until we run out. 776 775 */ 777 776 while (left > 0) { 777 + u8 *fragnexthdr_offset; 778 + 778 779 len = left; 779 780 /* IF: it doesn't fit, use 'mtu' - the data space left */ 780 781 if (len > mtu) ··· 819 818 * Copy the packet header into the new buffer. 820 819 */ 821 820 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 821 + 822 + fragnexthdr_offset = skb_network_header(frag); 823 + fragnexthdr_offset += prevhdr - skb_network_header(skb); 824 + *fragnexthdr_offset = NEXTHDR_FRAGMENT; 822 825 823 826 /* 824 827 * Build fragment header. ··· 1390 1385 if ((((length + fragheaderlen) > mtu) || 1391 1386 (skb && skb_is_gso(skb))) && 1392 1387 (sk->sk_protocol == IPPROTO_UDP) && 1393 - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && 1388 + (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && 1394 1389 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { 1395 1390 err = ip6_ufo_append_data(sk, queue, getfrag, from, length, 1396 1391 hh_len, fragheaderlen, exthdrlen,

+6 -2

net/ipv6/ip6_vti.c

··· 485 485 if (!skb->ignore_df && skb->len > mtu) { 486 486 skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); 487 487 488 - if (skb->protocol == htons(ETH_P_IPV6)) 488 + if (skb->protocol == htons(ETH_P_IPV6)) { 489 + if (mtu < IPV6_MIN_MTU) 490 + mtu = IPV6_MIN_MTU; 491 + 489 492 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 490 - else 493 + } else { 491 494 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 492 495 htonl(mtu)); 496 + } 493 497 494 498 return -EMSGSIZE; 495 499 }

+6 -5

net/ipv6/route.c

··· 3299 3299 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ 3300 3300 + NLA_ALIGN(sizeof(struct rtnexthop)) 3301 3301 + nla_total_size(16) /* RTA_GATEWAY */ 3302 - + nla_total_size(4) /* RTA_OIF */ 3303 3302 + lwtunnel_get_encap_size(rt->dst.lwtstate); 3304 3303 3305 3304 nexthop_len *= rt->rt6i_nsiblings; ··· 3322 3323 } 3323 3324 3324 3325 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, 3325 - unsigned int *flags) 3326 + unsigned int *flags, bool skip_oif) 3326 3327 { 3327 3328 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { 3328 3329 *flags |= RTNH_F_LINKDOWN; ··· 3335 3336 goto nla_put_failure; 3336 3337 } 3337 3338 3338 - if (rt->dst.dev && 3339 + /* not needed for multipath encoding b/c it has a rtnexthop struct */ 3340 + if (!skip_oif && rt->dst.dev && 3339 3341 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 3340 3342 goto nla_put_failure; 3341 3343 ··· 3350 3350 return -EMSGSIZE; 3351 3351 } 3352 3352 3353 + /* add multipath next hop */ 3353 3354 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) 3354 3355 { 3355 3356 struct rtnexthop *rtnh; ··· 3363 3362 rtnh->rtnh_hops = 0; 3364 3363 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; 3365 3364 3366 - if (rt6_nexthop_info(skb, rt, &flags) < 0) 3365 + if (rt6_nexthop_info(skb, rt, &flags, true) < 0) 3367 3366 goto nla_put_failure; 3368 3367 3369 3368 rtnh->rtnh_flags = flags; ··· 3516 3515 3517 3516 nla_nest_end(skb, mp); 3518 3517 } else { 3519 - if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0) 3518 + if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) 3520 3519 goto nla_put_failure; 3521 3520 } 3522 3521

+5 -3

net/ipv6/tcp_ipv6.c

··· 391 391 np = inet6_sk(sk); 392 392 393 393 if (type == NDISC_REDIRECT) { 394 - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 394 + if (!sock_owned_by_user(sk)) { 395 + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 395 396 396 - if (dst) 397 - dst->ops->redirect(dst, sk, skb); 397 + if (dst) 398 + dst->ops->redirect(dst, sk, skb); 399 + } 398 400 goto out; 399 401 } 400 402

+3 -2

net/irda/af_irda.c

··· 828 828 * Wait for incoming connection 829 829 * 830 830 */ 831 - static int irda_accept(struct socket *sock, struct socket *newsock, int flags) 831 + static int irda_accept(struct socket *sock, struct socket *newsock, int flags, 832 + bool kern) 832 833 { 833 834 struct sock *sk = sock->sk; 834 835 struct irda_sock *new, *self = irda_sk(sk); ··· 837 836 struct sk_buff *skb = NULL; 838 837 int err; 839 838 840 - err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); 839 + err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern); 841 840 if (err) 842 841 return err; 843 842

+1 -1

net/iucv/af_iucv.c

··· 938 938 939 939 /* Accept a pending connection */ 940 940 static int iucv_sock_accept(struct socket *sock, struct socket *newsock, 941 - int flags) 941 + int flags, bool kern) 942 942 { 943 943 DECLARE_WAITQUEUE(wait, current); 944 944 struct sock *sk = sock->sk, *nsk;

+3 -1

net/llc/af_llc.c

··· 641 641 * @sock: Socket which connections arrive on. 642 642 * @newsock: Socket to move incoming connection to. 643 643 * @flags: User specified operational flags. 644 + * @kern: If the socket is kernel internal 644 645 * 645 646 * Accept a new incoming connection. 646 647 * Returns 0 upon success, negative otherwise. 647 648 */ 648 - static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) 649 + static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, 650 + bool kern) 649 651 { 650 652 struct sock *sk = sock->sk, *newsk; 651 653 struct llc_sock *llc, *newllc;

+3 -1

net/mpls/af_mpls.c

··· 1288 1288 /* fall through */ 1289 1289 case NETDEV_CHANGE: 1290 1290 nh->nh_flags |= RTNH_F_LINKDOWN; 1291 - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; 1291 + if (event != NETDEV_UNREGISTER) 1292 + ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; 1292 1293 break; 1293 1294 } 1294 1295 if (event == NETDEV_UNREGISTER) ··· 2029 2028 for (index = 0; index < platform_labels; index++) { 2030 2029 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 2031 2030 RCU_INIT_POINTER(platform_label[index], NULL); 2031 + mpls_notify_route(net, index, rt, NULL, NULL); 2032 2032 mpls_rt_free(rt); 2033 2033 } 2034 2034 rtnl_unlock();

+2 -1

net/netrom/af_netrom.c

··· 765 765 return err; 766 766 } 767 767 768 - static int nr_accept(struct socket *sock, struct socket *newsock, int flags) 768 + static int nr_accept(struct socket *sock, struct socket *newsock, int flags, 769 + bool kern) 769 770 { 770 771 struct sk_buff *skb; 771 772 struct sock *newsk;

+1 -1

net/nfc/llcp_sock.c

··· 441 441 } 442 442 443 443 static int llcp_sock_accept(struct socket *sock, struct socket *newsock, 444 - int flags) 444 + int flags, bool kern) 445 445 { 446 446 DECLARE_WAITQUEUE(wait, current); 447 447 struct sock *sk = sock->sk, *new_sk;

+4 -2

net/phonet/pep.c

··· 772 772 sock_put(sk); 773 773 } 774 774 775 - static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) 775 + static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, 776 + bool kern) 776 777 { 777 778 struct pep_sock *pn = pep_sk(sk), *newpn; 778 779 struct sock *newsk = NULL; ··· 847 846 } 848 847 849 848 /* Create a new to-be-accepted sock */ 850 - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); 849 + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 850 + kern); 851 851 if (!newsk) { 852 852 pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); 853 853 err = -ENOBUFS;

+2 -2

net/phonet/socket.c

··· 305 305 } 306 306 307 307 static int pn_socket_accept(struct socket *sock, struct socket *newsock, 308 - int flags) 308 + int flags, bool kern) 309 309 { 310 310 struct sock *sk = sock->sk; 311 311 struct sock *newsk; ··· 314 314 if (unlikely(sk->sk_state != TCP_LISTEN)) 315 315 return -EINVAL; 316 316 317 - newsk = sk->sk_prot->accept(sk, flags, &err); 317 + newsk = sk->sk_prot->accept(sk, flags, &err, kern); 318 318 if (!newsk) 319 319 return err; 320 320

+1

net/rds/connection.c

··· 429 429 */ 430 430 rds_cong_remove_conn(conn); 431 431 432 + put_net(conn->c_net); 432 433 kmem_cache_free(rds_conn_slab, conn); 433 434 434 435 spin_lock_irqsave(&rds_conn_lock, flags);

+36 -11

net/rds/ib_cm.c

··· 442 442 ic->i_send_cq = NULL; 443 443 ibdev_put_vector(rds_ibdev, ic->i_scq_vector); 444 444 rdsdebug("ib_create_cq send failed: %d\n", ret); 445 - goto out; 445 + goto rds_ibdev_out; 446 446 } 447 447 448 448 ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev); ··· 456 456 ic->i_recv_cq = NULL; 457 457 ibdev_put_vector(rds_ibdev, ic->i_rcq_vector); 458 458 rdsdebug("ib_create_cq recv failed: %d\n", ret); 459 - goto out; 459 + goto send_cq_out; 460 460 } 461 461 462 462 ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); 463 463 if (ret) { 464 464 rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 465 - goto out; 465 + goto recv_cq_out; 466 466 } 467 467 468 468 ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); 469 469 if (ret) { 470 470 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); 471 - goto out; 471 + goto recv_cq_out; 472 472 } 473 473 474 474 /* XXX negotiate max send/recv with remote? */ ··· 494 494 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); 495 495 if (ret) { 496 496 rdsdebug("rdma_create_qp failed: %d\n", ret); 497 - goto out; 497 + goto recv_cq_out; 498 498 } 499 499 500 500 ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ··· 504 504 if (!ic->i_send_hdrs) { 505 505 ret = -ENOMEM; 506 506 rdsdebug("ib_dma_alloc_coherent send failed\n"); 507 - goto out; 507 + goto qp_out; 508 508 } 509 509 510 510 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ··· 514 514 if (!ic->i_recv_hdrs) { 515 515 ret = -ENOMEM; 516 516 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 517 - goto out; 517 + goto send_hdrs_dma_out; 518 518 } 519 519 520 520 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), ··· 522 522 if (!ic->i_ack) { 523 523 ret = -ENOMEM; 524 524 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 525 - goto out; 525 + goto recv_hdrs_dma_out; 526 526 } 527 527 528 528 ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ··· 530 530 if (!ic->i_sends) { 531 531 ret = -ENOMEM; 532 532 rdsdebug("send allocation failed\n"); 533 - goto out; 533 + goto ack_dma_out; 534 534 } 535 535 536 536 ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ··· 538 538 if (!ic->i_recvs) { 539 539 ret = -ENOMEM; 540 540 rdsdebug("recv allocation failed\n"); 541 - goto out; 541 + goto sends_out; 542 542 } 543 543 544 544 rds_ib_recv_init_ack(ic); ··· 546 546 rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, 547 547 ic->i_send_cq, ic->i_recv_cq); 548 548 549 - out: 549 + return ret; 550 + 551 + sends_out: 552 + vfree(ic->i_sends); 553 + ack_dma_out: 554 + ib_dma_free_coherent(dev, sizeof(struct rds_header), 555 + ic->i_ack, ic->i_ack_dma); 556 + recv_hdrs_dma_out: 557 + ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * 558 + sizeof(struct rds_header), 559 + ic->i_recv_hdrs, ic->i_recv_hdrs_dma); 560 + send_hdrs_dma_out: 561 + ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * 562 + sizeof(struct rds_header), 563 + ic->i_send_hdrs, ic->i_send_hdrs_dma); 564 + qp_out: 565 + rdma_destroy_qp(ic->i_cm_id); 566 + recv_cq_out: 567 + if (!ib_destroy_cq(ic->i_recv_cq)) 568 + ic->i_recv_cq = NULL; 569 + send_cq_out: 570 + if (!ib_destroy_cq(ic->i_send_cq)) 571 + ic->i_send_cq = NULL; 572 + rds_ibdev_out: 573 + rds_ib_remove_conn(rds_ibdev, conn); 550 574 rds_ib_dev_put(rds_ibdev); 575 + 551 576 return ret; 552 577 } 553 578

+3 -3

net/rds/rds.h

··· 147 147 148 148 /* Protocol version */ 149 149 unsigned int c_version; 150 - possible_net_t c_net; 150 + struct net *c_net; 151 151 152 152 struct list_head c_map_item; 153 153 unsigned long c_map_queued; ··· 162 162 static inline 163 163 struct net *rds_conn_net(struct rds_connection *conn) 164 164 { 165 - return read_pnet(&conn->c_net); 165 + return conn->c_net; 166 166 } 167 167 168 168 static inline 169 169 void rds_conn_net_set(struct rds_connection *conn, struct net *net) 170 170 { 171 - write_pnet(&conn->c_net, net); 171 + conn->c_net = get_net(net); 172 172 } 173 173 174 174 #define RDS_FLAG_CONG_BITMAP 0x01

+21 -17

net/rds/tcp.c

··· 484 484 * we do need to clean up the listen socket here. 485 485 */ 486 486 if (rtn->rds_tcp_listen_sock) { 487 - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); 487 + struct socket *lsock = rtn->rds_tcp_listen_sock; 488 + 488 489 rtn->rds_tcp_listen_sock = NULL; 489 - flush_work(&rtn->rds_tcp_accept_w); 490 + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); 490 491 } 491 492 } 492 493 ··· 524 523 struct rds_tcp_connection *tc, *_tc; 525 524 LIST_HEAD(tmp_list); 526 525 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 526 + struct socket *lsock = rtn->rds_tcp_listen_sock; 527 527 528 - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); 529 528 rtn->rds_tcp_listen_sock = NULL; 530 - flush_work(&rtn->rds_tcp_accept_w); 529 + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); 531 530 spin_lock_irq(&rds_tcp_conn_lock); 532 531 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 533 - struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); 532 + struct net *c_net = tc->t_cpath->cp_conn->c_net; 534 533 535 534 if (net != c_net || !tc->t_sock) 536 535 continue; ··· 547 546 void *rds_tcp_listen_sock_def_readable(struct net *net) 548 547 { 549 548 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 549 + struct socket *lsock = rtn->rds_tcp_listen_sock; 550 550 551 - return rtn->rds_tcp_listen_sock->sk->sk_user_data; 551 + if (!lsock) 552 + return NULL; 553 + 554 + return lsock->sk->sk_user_data; 552 555 } 553 556 554 557 static int rds_tcp_dev_event(struct notifier_block *this, ··· 589 584 590 585 spin_lock_irq(&rds_tcp_conn_lock); 591 586 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 592 - struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); 587 + struct net *c_net = tc->t_cpath->cp_conn->c_net; 593 588 594 589 if (net != c_net || !tc->t_sock) 595 590 continue; ··· 643 638 goto out; 644 639 } 645 640 646 - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); 647 - if (ret) { 648 - pr_warn("could not register rds_tcp_dev_notifier\n"); 641 + ret = rds_tcp_recv_init(); 642 + if (ret) 649 643 goto out_slab; 650 - } 651 644 652 645 ret = register_pernet_subsys(&rds_tcp_net_ops); 653 646 if (ret) 654 - goto out_notifier; 647 + goto out_recv; 655 648 656 - ret = rds_tcp_recv_init(); 657 - if (ret) 649 + ret = register_netdevice_notifier(&rds_tcp_dev_notifier); 650 + if (ret) { 651 + pr_warn("could not register rds_tcp_dev_notifier\n"); 658 652 goto out_pernet; 653 + } 659 654 660 655 rds_trans_register(&rds_tcp_transport); 661 656 ··· 665 660 666 661 out_pernet: 667 662 unregister_pernet_subsys(&rds_tcp_net_ops); 668 - out_notifier: 669 - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) 670 - pr_warn("could not unregister rds_tcp_dev_notifier\n"); 663 + out_recv: 664 + rds_tcp_recv_exit(); 671 665 out_slab: 672 666 kmem_cache_destroy(rds_tcp_conn_slab); 673 667 out:

+1 -1

net/rds/tcp.h

··· 66 66 67 67 /* tcp_listen.c */ 68 68 struct socket *rds_tcp_listen_init(struct net *); 69 - void rds_tcp_listen_stop(struct socket *); 69 + void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); 70 70 void rds_tcp_listen_data_ready(struct sock *sk); 71 71 int rds_tcp_accept_one(struct socket *sock); 72 72 int rds_tcp_keepalive(struct socket *sock);

+8 -3

net/rds/tcp_listen.c

··· 133 133 134 134 new_sock->type = sock->type; 135 135 new_sock->ops = sock->ops; 136 - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); 136 + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); 137 137 if (ret < 0) 138 138 goto out; 139 139 ··· 223 223 * before it has been accepted and the accepter has set up their 224 224 * data_ready.. we only want to queue listen work for our listening 225 225 * socket 226 + * 227 + * (*ready)() may be null if we are racing with netns delete, and 228 + * the listen socket is being torn down. 226 229 */ 227 230 if (sk->sk_state == TCP_LISTEN) 228 231 rds_tcp_accept_work(sk); ··· 234 231 235 232 out: 236 233 read_unlock_bh(&sk->sk_callback_lock); 237 - ready(sk); 234 + if (ready) 235 + ready(sk); 238 236 } 239 237 240 238 struct socket *rds_tcp_listen_init(struct net *net) ··· 275 271 return NULL; 276 272 } 277 273 278 - void rds_tcp_listen_stop(struct socket *sock) 274 + void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor) 279 275 { 280 276 struct sock *sk; 281 277 ··· 296 292 297 293 /* wait for accepts to stop and close the socket */ 298 294 flush_workqueue(rds_wq); 295 + flush_work(acceptor); 299 296 sock_release(sock); 300 297 }

+2 -1

net/rose/af_rose.c

··· 871 871 return err; 872 872 } 873 873 874 - static int rose_accept(struct socket *sock, struct socket *newsock, int flags) 874 + static int rose_accept(struct socket *sock, struct socket *newsock, int flags, 875 + bool kern) 875 876 { 876 877 struct sk_buff *skb; 877 878 struct sock *newsk;

+19 -8

net/rxrpc/input.c

··· 420 420 u16 skew) 421 421 { 422 422 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 423 + enum rxrpc_call_state state; 423 424 unsigned int offset = sizeof(struct rxrpc_wire_header); 424 425 unsigned int ix; 425 426 rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; ··· 435 434 _proto("Rx DATA %%%u { #%u f=%02x }", 436 435 sp->hdr.serial, seq, sp->hdr.flags); 437 436 438 - if (call->state >= RXRPC_CALL_COMPLETE) 437 + state = READ_ONCE(call->state); 438 + if (state >= RXRPC_CALL_COMPLETE) 439 439 return; 440 440 441 441 /* Received data implicitly ACKs all of the request packets we sent 442 442 * when we're acting as a client. 443 443 */ 444 - if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || 445 - call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && 444 + if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || 445 + state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && 446 446 !rxrpc_receiving_reply(call)) 447 447 return; 448 448 ··· 652 650 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 653 651 struct rxrpc_peer *peer; 654 652 unsigned int mtu; 653 + bool wake = false; 655 654 u32 rwind = ntohl(ackinfo->rwind); 656 655 657 656 _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", ··· 660 657 ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), 661 658 rwind, ntohl(ackinfo->jumbo_max)); 662 659 663 - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) 664 - rwind = RXRPC_RXTX_BUFF_SIZE - 1; 665 - call->tx_winsize = rwind; 660 + if (call->tx_winsize != rwind) { 661 + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) 662 + rwind = RXRPC_RXTX_BUFF_SIZE - 1; 663 + if (rwind > call->tx_winsize) 664 + wake = true; 665 + call->tx_winsize = rwind; 666 + } 667 + 666 668 if (call->cong_ssthresh > rwind) 667 669 call->cong_ssthresh = rwind; 668 670 ··· 681 673 spin_unlock_bh(&peer->lock); 682 674 _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); 683 675 } 676 + 677 + if (wake) 678 + wake_up(&call->waitq); 684 679 } 685 680 686 681 /* ··· 810 799 return rxrpc_proto_abort("AK0", call, 0); 811 800 812 801 /* Ignore ACKs unless we are or have just been transmitting. */ 813 - switch (call->state) { 802 + switch (READ_ONCE(call->state)) { 814 803 case RXRPC_CALL_CLIENT_SEND_REQUEST: 815 804 case RXRPC_CALL_CLIENT_AWAIT_REPLY: 816 805 case RXRPC_CALL_SERVER_SEND_REPLY: ··· 951 940 static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, 952 941 struct rxrpc_call *call) 953 942 { 954 - switch (call->state) { 943 + switch (READ_ONCE(call->state)) { 955 944 case RXRPC_CALL_SERVER_AWAIT_ACK: 956 945 rxrpc_call_completed(call); 957 946 break;

+2 -2

net/rxrpc/recvmsg.c

··· 527 527 msg->msg_namelen = len; 528 528 } 529 529 530 - switch (call->state) { 530 + switch (READ_ONCE(call->state)) { 531 531 case RXRPC_CALL_SERVER_ACCEPTING: 532 532 ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); 533 533 break; ··· 640 640 641 641 mutex_lock(&call->user_mutex); 642 642 643 - switch (call->state) { 643 + switch (READ_ONCE(call->state)) { 644 644 case RXRPC_CALL_CLIENT_RECV_REPLY: 645 645 case RXRPC_CALL_SERVER_RECV_REQUEST: 646 646 case RXRPC_CALL_SERVER_ACK_REQUEST:

+31 -18

net/rxrpc/sendmsg.c

··· 488 488 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) 489 489 __releases(&rx->sk.sk_lock.slock) 490 490 { 491 + enum rxrpc_call_state state; 491 492 enum rxrpc_command cmd; 492 493 struct rxrpc_call *call; 493 494 unsigned long user_call_ID = 0; ··· 527 526 return PTR_ERR(call); 528 527 /* ... and we have the call lock. */ 529 528 } else { 530 - ret = -EBUSY; 531 - if (call->state == RXRPC_CALL_UNINITIALISED || 532 - call->state == RXRPC_CALL_CLIENT_AWAIT_CONN || 533 - call->state == RXRPC_CALL_SERVER_PREALLOC || 534 - call->state == RXRPC_CALL_SERVER_SECURING || 535 - call->state == RXRPC_CALL_SERVER_ACCEPTING) 529 + switch (READ_ONCE(call->state)) { 530 + case RXRPC_CALL_UNINITIALISED: 531 + case RXRPC_CALL_CLIENT_AWAIT_CONN: 532 + case RXRPC_CALL_SERVER_PREALLOC: 533 + case RXRPC_CALL_SERVER_SECURING: 534 + case RXRPC_CALL_SERVER_ACCEPTING: 535 + ret = -EBUSY; 536 536 goto error_release_sock; 537 + default: 538 + break; 539 + } 537 540 538 541 ret = mutex_lock_interruptible(&call->user_mutex); 539 542 release_sock(&rx->sk); ··· 547 542 } 548 543 } 549 544 545 + state = READ_ONCE(call->state); 550 546 _debug("CALL %d USR %lx ST %d on CONN %p", 551 - call->debug_id, call->user_call_ID, call->state, call->conn); 547 + call->debug_id, call->user_call_ID, state, call->conn); 552 548 553 - if (call->state >= RXRPC_CALL_COMPLETE) { 549 + if (state >= RXRPC_CALL_COMPLETE) { 554 550 /* it's too late for this call */ 555 551 ret = -ESHUTDOWN; 556 552 } else if (cmd == RXRPC_CMD_SEND_ABORT) { ··· 561 555 } else if (cmd != RXRPC_CMD_SEND_DATA) { 562 556 ret = -EINVAL; 563 557 } else if (rxrpc_is_client_call(call) && 564 - call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { 558 + state != RXRPC_CALL_CLIENT_SEND_REQUEST) { 565 559 /* request phase complete for this client call */ 566 560 ret = -EPROTO; 567 561 } else if (rxrpc_is_service_call(call) && 568 - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && 569 - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { 562 + state != RXRPC_CALL_SERVER_ACK_REQUEST && 563 + state != RXRPC_CALL_SERVER_SEND_REPLY) { 570 564 /* Reply phase not begun or not complete for service call. */ 571 565 ret = -EPROTO; 572 566 } else { ··· 611 605 _debug("CALL %d USR %lx ST %d on CONN %p", 612 606 call->debug_id, call->user_call_ID, call->state, call->conn); 613 607 614 - if (call->state >= RXRPC_CALL_COMPLETE) { 615 - ret = -ESHUTDOWN; /* it's too late for this call */ 616 - } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && 617 - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && 618 - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { 619 - ret = -EPROTO; /* request phase complete for this client call */ 620 - } else { 608 + switch (READ_ONCE(call->state)) { 609 + case RXRPC_CALL_CLIENT_SEND_REQUEST: 610 + case RXRPC_CALL_SERVER_ACK_REQUEST: 611 + case RXRPC_CALL_SERVER_SEND_REPLY: 621 612 ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); 613 + break; 614 + case RXRPC_CALL_COMPLETE: 615 + read_lock_bh(&call->state_lock); 616 + ret = -call->error; 617 + read_unlock_bh(&call->state_lock); 618 + break; 619 + default: 620 + /* Request phase complete for this client call */ 621 + ret = -EPROTO; 622 + break; 622 623 } 623 624 624 625 mutex_unlock(&call->user_mutex);

+3

net/sched/act_connmark.c

··· 113 113 if (ret < 0) 114 114 return ret; 115 115 116 + if (!tb[TCA_CONNMARK_PARMS]) 117 + return -EINVAL; 118 + 116 119 parm = nla_data(tb[TCA_CONNMARK_PARMS]); 117 120 118 121 if (!tcf_hash_check(tn, parm->index, a, bind)) {

-1

net/sched/act_skbmod.c

··· 228 228 229 229 return skb->len; 230 230 nla_put_failure: 231 - rcu_read_unlock(); 232 231 nlmsg_trim(skb, b); 233 232 return -1; 234 233 }

+3 -2

net/sctp/ipv6.c

··· 640 640 641 641 /* Create and initialize a new sk for the socket to be returned by accept(). */ 642 642 static struct sock *sctp_v6_create_accept_sk(struct sock *sk, 643 - struct sctp_association *asoc) 643 + struct sctp_association *asoc, 644 + bool kern) 644 645 { 645 646 struct sock *newsk; 646 647 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 647 648 struct sctp6_sock *newsctp6sk; 648 649 struct ipv6_txoptions *opt; 649 650 650 - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); 651 + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); 651 652 if (!newsk) 652 653 goto out; 653 654

+3 -2

net/sctp/protocol.c

··· 575 575 576 576 /* Create and initialize a new sk for the socket returned by accept(). */ 577 577 static struct sock *sctp_v4_create_accept_sk(struct sock *sk, 578 - struct sctp_association *asoc) 578 + struct sctp_association *asoc, 579 + bool kern) 579 580 { 580 581 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, 581 - sk->sk_prot, 0); 582 + sk->sk_prot, kern); 582 583 struct inet_sock *newinet; 583 584 584 585 if (!newsk)

+2 -2

net/sctp/socket.c

··· 4116 4116 * descriptor will be returned from accept() to represent the newly 4117 4117 * formed association. 4118 4118 */ 4119 - static struct sock *sctp_accept(struct sock *sk, int flags, int *err) 4119 + static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) 4120 4120 { 4121 4121 struct sctp_sock *sp; 4122 4122 struct sctp_endpoint *ep; ··· 4151 4151 */ 4152 4152 asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); 4153 4153 4154 - newsk = sp->pf->create_accept_sk(sk, asoc); 4154 + newsk = sp->pf->create_accept_sk(sk, asoc, kern); 4155 4155 if (!newsk) { 4156 4156 error = -ENOMEM; 4157 4157 goto out;

+1 -1

net/smc/af_smc.c

··· 944 944 } 945 945 946 946 static int smc_accept(struct socket *sock, struct socket *new_sock, 947 - int flags) 947 + int flags, bool kern) 948 948 { 949 949 struct sock *sk = sock->sk, *nsk; 950 950 DECLARE_WAITQUEUE(wait, current);

+3 -2

net/socket.c

··· 1506 1506 if (err) 1507 1507 goto out_fd; 1508 1508 1509 - err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1509 + err = sock->ops->accept(sock, newsock, sock->file->f_flags, false); 1510 1510 if (err < 0) 1511 1511 goto out_fd; 1512 1512 ··· 1731 1731 /* We assume all kernel code knows the size of sockaddr_storage */ 1732 1732 msg.msg_namelen = 0; 1733 1733 msg.msg_iocb = NULL; 1734 + msg.msg_flags = 0; 1734 1735 if (sock->file->f_flags & O_NONBLOCK) 1735 1736 flags |= MSG_DONTWAIT; 1736 1737 err = sock_recvmsg(sock, &msg, flags); ··· 3239 3238 if (err < 0) 3240 3239 goto done; 3241 3240 3242 - err = sock->ops->accept(sock, *newsock, flags); 3241 + err = sock->ops->accept(sock, *newsock, flags, true); 3243 3242 if (err < 0) { 3244 3243 sock_release(*newsock); 3245 3244 *newsock = NULL;

+5 -3

net/tipc/socket.c

··· 115 115 static void tipc_write_space(struct sock *sk); 116 116 static void tipc_sock_destruct(struct sock *sk); 117 117 static int tipc_release(struct socket *sock); 118 - static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); 118 + static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, 119 + bool kern); 119 120 static void tipc_sk_timeout(unsigned long data); 120 121 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 121 122 struct tipc_name_seq const *seq); ··· 2030 2029 * 2031 2030 * Returns 0 on success, errno otherwise 2032 2031 */ 2033 - static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) 2032 + static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, 2033 + bool kern) 2034 2034 { 2035 2035 struct sock *new_sk, *sk = sock->sk; 2036 2036 struct sk_buff *buf; ··· 2053 2051 2054 2052 buf = skb_peek(&sk->sk_receive_queue); 2055 2053 2056 - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); 2054 + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); 2057 2055 if (res) 2058 2056 goto exit; 2059 2057 security_sk_clone(sock->sk, new_sock->sk);

+3 -2

net/unix/af_unix.c

··· 636 636 static int unix_stream_connect(struct socket *, struct sockaddr *, 637 637 int addr_len, int flags); 638 638 static int unix_socketpair(struct socket *, struct socket *); 639 - static int unix_accept(struct socket *, struct socket *, int); 639 + static int unix_accept(struct socket *, struct socket *, int, bool); 640 640 static int unix_getname(struct socket *, struct sockaddr *, int *, int); 641 641 static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 642 642 static unsigned int unix_dgram_poll(struct file *, struct socket *, ··· 1402 1402 set_bit(SOCK_PASSSEC, &new->flags); 1403 1403 } 1404 1404 1405 - static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1405 + static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1406 + bool kern) 1406 1407 { 1407 1408 struct sock *sk = sock->sk; 1408 1409 struct sock *tsk;

+2 -1

net/vmw_vsock/af_vsock.c

··· 1250 1250 return err; 1251 1251 } 1252 1252 1253 - static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) 1253 + static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, 1254 + bool kern) 1254 1255 { 1255 1256 struct sock *listener; 1256 1257 int err;

+2 -1

net/x25/af_x25.c

··· 852 852 return rc; 853 853 } 854 854 855 - static int x25_accept(struct socket *sock, struct socket *newsock, int flags) 855 + static int x25_accept(struct socket *sock, struct socket *newsock, int flags, 856 + bool kern) 856 857 { 857 858 struct sock *sk = sock->sk; 858 859 struct sock *newsk;

+9 -10

net/xfrm/xfrm_policy.c

··· 1243 1243 } 1244 1244 1245 1245 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, 1246 - const struct flowi *fl) 1246 + const struct flowi *fl, u16 family) 1247 1247 { 1248 1248 struct xfrm_policy *pol; 1249 1249 ··· 1251 1251 again: 1252 1252 pol = rcu_dereference(sk->sk_policy[dir]); 1253 1253 if (pol != NULL) { 1254 - bool match = xfrm_selector_match(&pol->selector, fl, 1255 - sk->sk_family); 1254 + bool match = xfrm_selector_match(&pol->selector, fl, family); 1256 1255 int err = 0; 1257 1256 1258 1257 if (match) { ··· 2238 2239 sk = sk_const_to_full_sk(sk); 2239 2240 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 2240 2241 num_pols = 1; 2241 - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 2242 + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); 2242 2243 err = xfrm_expand_policies(fl, family, pols, 2243 2244 &num_pols, &num_xfrms); 2244 2245 if (err < 0) ··· 2517 2518 pol = NULL; 2518 2519 sk = sk_to_full_sk(sk); 2519 2520 if (sk && sk->sk_policy[dir]) { 2520 - pol = xfrm_sk_policy_lookup(sk, dir, &fl); 2521 + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); 2521 2522 if (IS_ERR(pol)) { 2522 2523 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2523 2524 return 0; ··· 3068 3069 { 3069 3070 int rv; 3070 3071 3072 + /* Initialize the per-net locks here */ 3073 + spin_lock_init(&net->xfrm.xfrm_state_lock); 3074 + spin_lock_init(&net->xfrm.xfrm_policy_lock); 3075 + mutex_init(&net->xfrm.xfrm_cfg_mutex); 3076 + 3071 3077 rv = xfrm_statistics_init(net); 3072 3078 if (rv < 0) 3073 3079 goto out_statistics; ··· 3088 3084 rv = flow_cache_init(net); 3089 3085 if (rv < 0) 3090 3086 goto out; 3091 - 3092 - /* Initialize the per-net locks here */ 3093 - spin_lock_init(&net->xfrm.xfrm_state_lock); 3094 - spin_lock_init(&net->xfrm.xfrm_policy_lock); 3095 - mutex_init(&net->xfrm.xfrm_cfg_mutex); 3096 3087 3097 3088 return 0; 3098 3089

+18

tools/include/uapi/linux/bpf_perf_event.h

··· 1 + /* Copyright (c) 2016 Facebook 2 + * 3 + * This program is free software; you can redistribute it and/or 4 + * modify it under the terms of version 2 of the GNU General Public 5 + * License as published by the Free Software Foundation. 6 + */ 7 + #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ 8 + #define _UAPI__LINUX_BPF_PERF_EVENT_H__ 9 + 10 + #include <linux/types.h> 11 + #include <linux/ptrace.h> 12 + 13 + struct bpf_perf_event_data { 14 + struct pt_regs regs; 15 + __u64 sample_period; 16 + }; 17 + 18 + #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */

+3 -1

tools/testing/selftests/bpf/Makefile

··· 1 1 LIBDIR := ../../../lib 2 2 BPFOBJ := $(LIBDIR)/bpf/bpf.o 3 3 4 - CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) 4 + CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ) 5 5 6 6 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map 7 7 8 8 TEST_PROGS := test_kmod.sh 9 + 10 + all: $(TEST_GEN_PROGS) 9 11 10 12 .PHONY: all clean force 11 13

+4

tools/testing/selftests/bpf/test_verifier.c

··· 8 8 * License as published by the Free Software Foundation. 9 9 */ 10 10 11 + #include <asm/types.h> 12 + #include <linux/types.h> 11 13 #include <stdint.h> 12 14 #include <stdio.h> 13 15 #include <stdlib.h> ··· 4585 4583 cap_flag_value_t sysadmin = CAP_CLEAR; 4586 4584 const cap_value_t cap_val = CAP_SYS_ADMIN; 4587 4585 4586 + #ifdef CAP_IS_SUPPORTED 4588 4587 if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) { 4589 4588 perror("cap_get_flag"); 4590 4589 return false; 4591 4590 } 4591 + #endif 4592 4592 caps = cap_get_proc(); 4593 4593 if (!caps) { 4594 4594 perror("cap_get_proc");