Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

1) Avoid negative netdev refcount in error flow of xfrm state add, from
Aviad Yehezkel.

2) Fix tcpdump decoding of IPSEC decap'd frames by filling in the
ethernet header protocol field in xfrm{4,6}_mode_tunnel_input().
From Yossi Kuperman.

3) Fix a syzbot triggered skb_under_panic in pppoe having to do with
failing to allocate an appropriate amount of headroom. From
Guillaume Nault.

4) Fix memory leak in vmxnet3 driver, from Neil Horman.

5) Cure out-of-bounds packet memory access in em_nbyte EMATCH module,
from Wolfgang Bumiller.

6) Restrict what kinds of sockets can be bound to the KCM multiplexer
and also disallow when another layer has attached to the socket and
made use of sk_user_data. From Tom Herbert.

7) Fix use before init of IOTLB in vhost code, from Jason Wang.

8) Correct STACR register write bit definition in IBM emac driver, from
Ivan Mikhaylov.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
net/ibm/emac: wrong bit is used for STA control register write
net/ibm/emac: add 8192 rx/tx fifo size
vhost: do not try to access device IOTLB when not initialized
vhost: use mutex_lock_nested() in vhost_dev_lock_vqs()
i40e: flower: check if TC offload is enabled on a netdev
qed: Free reserved MR tid
qed: Remove reserveration of dpi for kernel
kcm: Check if sk_user_data already set in kcm_attach
kcm: Only allow TCP sockets to be attached to a KCM mux
net: sched: fix TCF_LAYER_LINK case in tcf_get_base_ptr
net: sched: em_nbyte: don't add the data offset twice
mlxsw: spectrum_router: Don't log an error on missing neighbor
vmxnet3: repair memory leak
ipv6: Fix getsockopt() for sockets with default IPV6_AUTOFLOWLABEL
pppoe: take ->needed_headroom of lower device into account on xmit
xfrm: fix boolean assignment in xfrm_get_type_offload
xfrm: Fix eth_hdr(skb)->h_proto to reflect inner IP version
xfrm: fix error flow in case of add state fails
xfrm: Add SA to hardware at the end of xfrm_state_construct()

Changed files
+90 -49
drivers
net
ethernet
ibm
emac
intel
mellanox
qlogic
ppp
vmxnet3
vhost
include
net
+6
drivers/net/ethernet/ibm/emac/core.c
··· 494 494 case 16384: 495 495 ret |= EMAC_MR1_RFS_16K; 496 496 break; 497 + case 8192: 498 + ret |= EMAC4_MR1_RFS_8K; 499 + break; 497 500 case 4096: 498 501 ret |= EMAC_MR1_RFS_4K; 499 502 break; ··· 518 515 switch(tx_size) { 519 516 case 16384: 520 517 ret |= EMAC4_MR1_TFS_16K; 518 + break; 519 + case 8192: 520 + ret |= EMAC4_MR1_TFS_8K; 521 521 break; 522 522 case 4096: 523 523 ret |= EMAC4_MR1_TFS_4K;
+3 -1
drivers/net/ethernet/ibm/emac/emac.h
··· 151 151 152 152 #define EMAC4_MR1_RFS_2K 0x00100000 153 153 #define EMAC4_MR1_RFS_4K 0x00180000 154 + #define EMAC4_MR1_RFS_8K 0x00200000 154 155 #define EMAC4_MR1_RFS_16K 0x00280000 155 156 #define EMAC4_MR1_TFS_2K 0x00020000 156 157 #define EMAC4_MR1_TFS_4K 0x00030000 158 + #define EMAC4_MR1_TFS_8K 0x00040000 157 159 #define EMAC4_MR1_TFS_16K 0x00050000 158 160 #define EMAC4_MR1_TR 0x00008000 159 161 #define EMAC4_MR1_MWSW_001 0x00001000 ··· 244 242 #define EMAC_STACR_PHYE 0x00004000 245 243 #define EMAC_STACR_STAC_MASK 0x00003000 246 244 #define EMAC_STACR_STAC_READ 0x00001000 247 - #define EMAC_STACR_STAC_WRITE 0x00002000 245 + #define EMAC_STACR_STAC_WRITE 0x00000800 248 246 #define EMAC_STACR_OPBC_MASK 0x00000C00 249 247 #define EMAC_STACR_OPBC_50 0x00000000 250 248 #define EMAC_STACR_OPBC_66 0x00000400
+2
drivers/net/ethernet/intel/i40e/i40e_main.c
··· 7505 7505 { 7506 7506 struct i40e_vsi *vsi = np->vsi; 7507 7507 7508 + if (!tc_can_offload(vsi->netdev)) 7509 + return -EOPNOTSUPP; 7508 7510 if (cls_flower->common.chain_index) 7509 7511 return -EOPNOTSUPP; 7510 7512
+2 -8
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
··· 1942 1942 dipn = htonl(dip); 1943 1943 dev = mlxsw_sp->router->rifs[rif]->dev; 1944 1944 n = neigh_lookup(&arp_tbl, &dipn, dev); 1945 - if (!n) { 1946 - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", 1947 - &dip); 1945 + if (!n) 1948 1946 return; 1949 - } 1950 1947 1951 1948 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); 1952 1949 neigh_event_send(n, NULL); ··· 1970 1973 1971 1974 dev = mlxsw_sp->router->rifs[rif]->dev; 1972 1975 n = neigh_lookup(&nd_tbl, &dip, dev); 1973 - if (!n) { 1974 - netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", 1975 - &dip); 1976 + if (!n) 1976 1977 return; 1977 - } 1978 1978 1979 1979 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); 1980 1980 neigh_event_send(n, NULL);
+17 -14
drivers/net/ethernet/qlogic/qed/qed_rdma.c
··· 358 358 kfree(p_rdma_info); 359 359 } 360 360 361 + static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) 362 + { 363 + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; 364 + 365 + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); 366 + 367 + spin_lock_bh(&p_hwfn->p_rdma_info->lock); 368 + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); 369 + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); 370 + } 371 + 372 + static void qed_rdma_free_reserved_lkey(struct qed_hwfn *p_hwfn) 373 + { 374 + qed_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey); 375 + } 376 + 361 377 static void qed_rdma_free(struct qed_hwfn *p_hwfn) 362 378 { 363 379 DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); 364 380 381 + qed_rdma_free_reserved_lkey(p_hwfn); 365 382 qed_rdma_resc_free(p_hwfn); 366 383 } 367 384 ··· 632 615 { 633 616 struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; 634 617 635 - /* The first DPI is reserved for the Kernel */ 636 - __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap); 637 - 638 618 /* Tid 0 will be used as the key for "reserved MR". 639 619 * The driver should allocate memory for it so it can be loaded but no 640 620 * ramrod should be passed on it. ··· 809 795 810 796 /* Return struct with device parameters */ 811 797 return p_hwfn->p_rdma_info->dev; 812 - } 813 - 814 - static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) 815 - { 816 - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; 817 - 818 - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); 819 - 820 - spin_lock_bh(&p_hwfn->p_rdma_info->lock); 821 - qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); 822 - spin_unlock_bh(&p_hwfn->p_rdma_info->lock); 823 798 } 824 799 825 800 static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
+6 -5
drivers/net/ppp/pppoe.c
··· 842 842 struct pppoe_hdr *ph; 843 843 struct net_device *dev; 844 844 char *start; 845 + int hlen; 845 846 846 847 lock_sock(sk); 847 848 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { ··· 861 860 if (total_len > (dev->mtu + dev->hard_header_len)) 862 861 goto end; 863 862 864 - 865 - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, 866 - 0, GFP_KERNEL); 863 + hlen = LL_RESERVED_SPACE(dev); 864 + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + 865 + dev->needed_tailroom, 0, GFP_KERNEL); 867 866 if (!skb) { 868 867 error = -ENOMEM; 869 868 goto end; 870 869 } 871 870 872 871 /* Reserve space for headers. */ 873 - skb_reserve(skb, dev->hard_header_len); 872 + skb_reserve(skb, hlen); 874 873 skb_reset_network_header(skb); 875 874 876 875 skb->dev = dev; ··· 931 930 /* Copy the data if there is no space for the header or if it's 932 931 * read-only. 933 932 */ 934 - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) 933 + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) 935 934 goto abort; 936 935 937 936 __skb_push(skb, sizeof(*ph));
+1 -1
drivers/net/vmxnet3/vmxnet3_drv.c
··· 1616 1616 rq->rx_ring[i].basePA); 1617 1617 rq->rx_ring[i].base = NULL; 1618 1618 } 1619 - rq->buf_info[i] = NULL; 1620 1619 } 1621 1620 1622 1621 if (rq->data_ring.base) { ··· 1637 1638 (rq->rx_ring[0].size + rq->rx_ring[1].size); 1638 1639 dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], 1639 1640 rq->buf_info_pa); 1641 + rq->buf_info[0] = rq->buf_info[1] = NULL; 1640 1642 } 1641 1643 } 1642 1644
+5 -1
drivers/vhost/vhost.c
··· 904 904 { 905 905 int i = 0; 906 906 for (i = 0; i < d->nvqs; ++i) 907 - mutex_lock(&d->vqs[i]->mutex); 907 + mutex_lock_nested(&d->vqs[i]->mutex, i); 908 908 } 909 909 910 910 static void vhost_dev_unlock_vqs(struct vhost_dev *d) ··· 1015 1015 vhost_iotlb_notify_vq(dev, msg); 1016 1016 break; 1017 1017 case VHOST_IOTLB_INVALIDATE: 1018 + if (!dev->iotlb) { 1019 + ret = -EFAULT; 1020 + break; 1021 + } 1018 1022 vhost_vq_meta_reset(dev); 1019 1023 vhost_del_umem_range(dev->iotlb, msg->iova, 1020 1024 msg->iova + msg->size - 1);
+1
include/net/ipv6.h
··· 331 331 int flags); 332 332 int ip6_flowlabel_init(void); 333 333 void ip6_flowlabel_cleanup(void); 334 + bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); 334 335 335 336 static inline void fl6_sock_release(struct ip6_flowlabel *fl) 336 337 {
+1 -1
include/net/pkt_cls.h
··· 522 522 { 523 523 switch (layer) { 524 524 case TCF_LAYER_LINK: 525 - return skb->data; 525 + return skb_mac_header(skb); 526 526 case TCF_LAYER_NETWORK: 527 527 return skb_network_header(skb); 528 528 case TCF_LAYER_TRANSPORT:
+1
net/ipv4/xfrm4_mode_tunnel.c
··· 92 92 93 93 skb_reset_network_header(skb); 94 94 skb_mac_header_rebuild(skb); 95 + eth_hdr(skb)->h_proto = skb->protocol; 95 96 96 97 err = 0; 97 98
+1 -1
net/ipv6/ip6_output.c
··· 166 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 167 167 } 168 168 169 - static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 169 + bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 170 170 { 171 171 if (!np->autoflowlabel_set) 172 172 return ip6_default_np_autolabel(net);
+1 -1
net/ipv6/ipv6_sockglue.c
··· 1336 1336 break; 1337 1337 1338 1338 case IPV6_AUTOFLOWLABEL: 1339 - val = np->autoflowlabel; 1339 + val = ip6_autoflowlabel(sock_net(sk), np); 1340 1340 break; 1341 1341 1342 1342 case IPV6_RECVFRAGSIZE:
+1
net/ipv6/xfrm6_mode_tunnel.c
··· 92 92 93 93 skb_reset_network_header(skb); 94 94 skb_mac_header_rebuild(skb); 95 + eth_hdr(skb)->h_proto = skb->protocol; 95 96 96 97 err = 0; 97 98
+21 -4
net/kcm/kcmsock.c
··· 1387 1387 if (!csk) 1388 1388 return -EINVAL; 1389 1389 1390 - /* We must prevent loops or risk deadlock ! */ 1391 - if (csk->sk_family == PF_KCM) 1390 + /* Only allow TCP sockets to be attached for now */ 1391 + if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || 1392 + csk->sk_protocol != IPPROTO_TCP) 1393 + return -EOPNOTSUPP; 1394 + 1395 + /* Don't allow listeners or closed sockets */ 1396 + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) 1392 1397 return -EOPNOTSUPP; 1393 1398 1394 1399 psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); ··· 1410 1405 return err; 1411 1406 } 1412 1407 1413 - sock_hold(csk); 1414 - 1415 1408 write_lock_bh(&csk->sk_callback_lock); 1409 + 1410 + /* Check if sk_user_data is aready by KCM or someone else. 1411 + * Must be done under lock to prevent race conditions. 1412 + */ 1413 + if (csk->sk_user_data) { 1414 + write_unlock_bh(&csk->sk_callback_lock); 1415 + strp_done(&psock->strp); 1416 + kmem_cache_free(kcm_psockp, psock); 1417 + return -EALREADY; 1418 + } 1419 + 1416 1420 psock->save_data_ready = csk->sk_data_ready; 1417 1421 psock->save_write_space = csk->sk_write_space; 1418 1422 psock->save_state_change = csk->sk_state_change; ··· 1429 1415 csk->sk_data_ready = psock_data_ready; 1430 1416 csk->sk_write_space = psock_write_space; 1431 1417 csk->sk_state_change = psock_state_change; 1418 + 1432 1419 write_unlock_bh(&csk->sk_callback_lock); 1420 + 1421 + sock_hold(csk); 1433 1422 1434 1423 /* Finished initialization, now add the psock to the MUX. */ 1435 1424 spin_lock_bh(&mux->lock);
+1 -1
net/sched/em_nbyte.c
··· 51 51 if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) 52 52 return 0; 53 53 54 - return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); 54 + return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len); 55 55 } 56 56 57 57 static struct tcf_ematch_ops em_nbyte_ops = {
+1
net/xfrm/xfrm_device.c
··· 102 102 103 103 err = dev->xfrmdev_ops->xdo_dev_state_add(x); 104 104 if (err) { 105 + xso->dev = NULL; 105 106 dev_put(dev); 106 107 return err; 107 108 }
+8 -4
net/xfrm/xfrm_state.c
··· 317 317 318 318 if (!type && try_load) { 319 319 request_module("xfrm-offload-%d-%d", family, proto); 320 - try_load = 0; 320 + try_load = false; 321 321 goto retry; 322 322 } 323 323 ··· 2272 2272 goto error; 2273 2273 } 2274 2274 2275 - x->km.state = XFRM_STATE_VALID; 2276 - 2277 2275 error: 2278 2276 return err; 2279 2277 } ··· 2280 2282 2281 2283 int xfrm_init_state(struct xfrm_state *x) 2282 2284 { 2283 - return __xfrm_init_state(x, true, false); 2285 + int err; 2286 + 2287 + err = __xfrm_init_state(x, true, false); 2288 + if (!err) 2289 + x->km.state = XFRM_STATE_VALID; 2290 + 2291 + return err; 2284 2292 } 2285 2293 2286 2294 EXPORT_SYMBOL(xfrm_init_state);
+11 -7
net/xfrm/xfrm_user.c
··· 598 598 goto error; 599 599 } 600 600 601 - if (attrs[XFRMA_OFFLOAD_DEV]) { 602 - err = xfrm_dev_state_add(net, x, 603 - nla_data(attrs[XFRMA_OFFLOAD_DEV])); 604 - if (err) 605 - goto error; 606 - } 607 - 608 601 if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, 609 602 attrs[XFRMA_REPLAY_ESN_VAL]))) 610 603 goto error; ··· 612 619 613 620 /* override default values from above */ 614 621 xfrm_update_ae_params(x, attrs, 0); 622 + 623 + /* configure the hardware if offload is requested */ 624 + if (attrs[XFRMA_OFFLOAD_DEV]) { 625 + err = xfrm_dev_state_add(net, x, 626 + nla_data(attrs[XFRMA_OFFLOAD_DEV])); 627 + if (err) 628 + goto error; 629 + } 615 630 616 631 return x; 617 632 ··· 662 661 __xfrm_state_put(x); 663 662 goto out; 664 663 } 664 + 665 + if (x->km.state == XFRM_STATE_VOID) 666 + x->km.state = XFRM_STATE_VALID; 665 667 666 668 c.seq = nlh->nlmsg_seq; 667 669 c.portid = nlh->nlmsg_pid;