Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

1) Fix refcount leak in act_ipt during replace, from Davide Caratti.

2) Set task state properly in tun during blocking reads, from Timur
Celik.

3) Leaked reference in DSA, from Wen Yang.

4) NULL deref in act_tunnel_key, from Vlad Buslov.

5) cipso_v4_erro can reference the skb IPCB in inappropriate contexts
thus referencing garbage, from Nazarov Sergey.

6) Don't accept RTA_VIA and RTA_GATEWAY in contexts where those
attributes make no sense.

7) Fix hung sendto in tipc, from Tung Nguyen.

8) Out-of-bounds access in netlabel, from Paul Moore.

9) Grant reference leak in xen-netback, from Igor Druzhinin.

10) Fix tx stalls with lan743x, from Bryan Whitehead.

11) Fix interrupt storm with mv88e6xxx, from Hein Kallweit.

12) Memory leak in sit on device registry failure, from Mao Wenan.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (44 commits)
net: sit: fix memory leak in sit_init_net()
net: dsa: mv88e6xxx: Fix statistics on mv88e6161
geneve: correctly handle ipv6.disable module parameter
net: dsa: mv88e6xxx: prevent interrupt storm caused by mv88e6390x_port_set_cmode
bpf: fix sanitation rewrite in case of non-pointers
ipv4: Add ICMPv6 support when parse route ipproto
MIPS: eBPF: Fix icache flush end address
lan743x: Fix TX Stall Issue
net: phy: phylink: fix uninitialized variable in phylink_get_mac_state
net: aquantia: regression on cpus with high cores: set mode with 8 queues
selftests: fixes for UDP GRO
bpf: drop refcount if bpf_map_new_fd() fails in map_create()
net: dsa: mv88e6xxx: power serdes on/off for 10G interfaces on 6390X
net: dsa: mv88e6xxx: Fix u64 statistics
xen-netback: don't populate the hash cache on XenBus disconnect
xen-netback: fix occasional leak of grant ref mappings under memory pressure
sctp: chunk.c: correct format string for size_t in printk
net: netem: fix skb length BUG_ON in __skb_to_sgvec
netlabel: fix out-of-bounds memory accesses
ipv4: Pass original device to ip_rcv_finish_core
...

+1 -1
arch/mips/net/ebpf_jit.c
··· 1819 1819 1820 1820 /* Update the icache */ 1821 1821 flush_icache_range((unsigned long)ctx.target, 1822 - (unsigned long)(ctx.target + ctx.idx * sizeof(u32))); 1822 + (unsigned long)&ctx.target[ctx.idx]); 1823 1823 1824 1824 if (bpf_jit_enable > 1) 1825 1825 /* Dump JIT code */
+6
drivers/net/dsa/lantiq_gswip.c
··· 1162 1162 1163 1163 module_platform_driver(gswip_driver); 1164 1164 1165 + MODULE_FIRMWARE("lantiq/xrx300_phy11g_a21.bin"); 1166 + MODULE_FIRMWARE("lantiq/xrx300_phy22f_a21.bin"); 1167 + MODULE_FIRMWARE("lantiq/xrx200_phy11g_a14.bin"); 1168 + MODULE_FIRMWARE("lantiq/xrx200_phy11g_a22.bin"); 1169 + MODULE_FIRMWARE("lantiq/xrx200_phy22f_a14.bin"); 1170 + MODULE_FIRMWARE("lantiq/xrx200_phy22f_a22.bin"); 1165 1171 MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>"); 1166 1172 MODULE_DESCRIPTION("Lantiq / Intel GSWIP driver"); 1167 1173 MODULE_LICENSE("GPL v2");
+12 -2
drivers/net/dsa/mv88e6xxx/chip.c
··· 896 896 default: 897 897 return U64_MAX; 898 898 } 899 - value = (((u64)high) << 16) | low; 899 + value = (((u64)high) << 32) | low; 900 900 return value; 901 901 } 902 902 ··· 3093 3093 .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, 3094 3094 .port_link_state = mv88e6352_port_link_state, 3095 3095 .port_get_cmode = mv88e6185_port_get_cmode, 3096 - .stats_snapshot = mv88e6320_g1_stats_snapshot, 3096 + .stats_snapshot = mv88e6xxx_g1_stats_snapshot, 3097 3097 .stats_set_histogram = mv88e6095_g1_stats_set_histogram, 3098 3098 .stats_get_sset_count = mv88e6095_stats_get_sset_count, 3099 3099 .stats_get_strings = mv88e6095_stats_get_strings, ··· 4595 4595 return 0; 4596 4596 } 4597 4597 4598 + static void mv88e6xxx_ports_cmode_init(struct mv88e6xxx_chip *chip) 4599 + { 4600 + int i; 4601 + 4602 + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) 4603 + chip->ports[i].cmode = MV88E6XXX_PORT_STS_CMODE_INVALID; 4604 + } 4605 + 4598 4606 static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds, 4599 4607 int port) 4600 4608 { ··· 4638 4630 err = mv88e6xxx_detect(chip); 4639 4631 if (err) 4640 4632 goto free; 4633 + 4634 + mv88e6xxx_ports_cmode_init(chip); 4641 4635 4642 4636 mutex_lock(&chip->reg_lock); 4643 4637 err = mv88e6xxx_switch_reset(chip);
+6 -2
drivers/net/dsa/mv88e6xxx/port.c
··· 398 398 cmode = 0; 399 399 } 400 400 401 + /* cmode doesn't change, nothing to do for us */ 402 + if (cmode == chip->ports[port].cmode) 403 + return 0; 404 + 401 405 lane = mv88e6390x_serdes_get_lane(chip, port); 402 406 if (lane < 0) 403 407 return lane; ··· 412 408 return err; 413 409 } 414 410 415 - err = mv88e6390_serdes_power(chip, port, false); 411 + err = mv88e6390x_serdes_power(chip, port, false); 416 412 if (err) 417 413 return err; 418 414 ··· 428 424 if (err) 429 425 return err; 430 426 431 - err = mv88e6390_serdes_power(chip, port, true); 427 + err = mv88e6390x_serdes_power(chip, port, true); 432 428 if (err) 433 429 return err; 434 430
+1
drivers/net/dsa/mv88e6xxx/port.h
··· 52 52 #define MV88E6185_PORT_STS_CMODE_1000BASE_X 0x0005 53 53 #define MV88E6185_PORT_STS_CMODE_PHY 0x0006 54 54 #define MV88E6185_PORT_STS_CMODE_DISABLED 0x0007 55 + #define MV88E6XXX_PORT_STS_CMODE_INVALID 0xff 55 56 56 57 /* Offset 0x01: MAC (or PCS or Physical) Control Register */ 57 58 #define MV88E6XXX_PORT_MAC_CTL 0x01
+3
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
··· 275 275 276 276 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self) 277 277 { 278 + /* Tx TC/Queue number config */ 279 + hw_atl_rpb_tps_tx_tc_mode_set(self, 1U); 280 + 278 281 hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U); 279 282 hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U); 280 283 hw_atl_thm_lso_tcp_flag_of_last_pkt_set(self, 0x0F7FU);
+9
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
··· 1274 1274 HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en); 1275 1275 } 1276 1276 1277 + void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw, 1278 + u32 tx_traf_class_mode) 1279 + { 1280 + aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR, 1281 + HW_ATL_TPB_TX_TC_MODE_MSK, 1282 + HW_ATL_TPB_TX_TC_MODE_SHIFT, 1283 + tx_traf_class_mode); 1284 + } 1285 + 1277 1286 void hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw, 1278 1287 u32 tx_buff_hi_threshold_per_tc, 1279 1288 u32 buffer)
+4
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
··· 605 605 606 606 /* tpb */ 607 607 608 + /* set TX Traffic Class Mode */ 609 + void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw, 610 + u32 tx_traf_class_mode); 611 + 608 612 /* set tx buffer enable */ 609 613 void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en); 610 614
+13
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
··· 1948 1948 /* default value of bitfield tx_buf_en */ 1949 1949 #define HW_ATL_TPB_TX_BUF_EN_DEFAULT 0x0 1950 1950 1951 + /* register address for bitfield tx_tc_mode */ 1952 + #define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900 1953 + /* bitmask for bitfield tx_tc_mode */ 1954 + #define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100 1955 + /* inverted bitmask for bitfield tx_tc_mode */ 1956 + #define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF 1957 + /* lower bit position of bitfield tx_tc_mode */ 1958 + #define HW_ATL_TPB_TX_TC_MODE_SHIFT 8 1959 + /* width of bitfield tx_tc_mode */ 1960 + #define HW_ATL_TPB_TX_TC_MODE_WIDTH 1 1961 + /* default value of bitfield tx_tc_mode */ 1962 + #define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0 1963 + 1951 1964 /* tx tx{b}_hi_thresh[c:0] bitfield definitions 1952 1965 * preprocessor definitions for the bitfield "tx{b}_hi_thresh[c:0]". 1953 1966 * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+6
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 500 500 } 501 501 502 502 length >>= 9; 503 + if (unlikely(length >= ARRAY_SIZE(bnxt_lhint_arr))) { 504 + dev_warn_ratelimited(&pdev->dev, "Dropped oversize %d bytes TX packet.\n", 505 + skb->len); 506 + i = 0; 507 + goto tx_dma_error; 508 + } 503 509 flags |= bnxt_lhint_arr[length]; 504 510 txbd->tx_bd_len_flags_type = cpu_to_le32(flags); 505 511
+1 -1
drivers/net/ethernet/microchip/enc28j60.c
··· 1681 1681 MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>"); 1682 1682 MODULE_LICENSE("GPL"); 1683 1683 module_param_named(debug, debug.msg_enable, int, 0); 1684 - MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., ffff=all)"); 1684 + MODULE_PARM_DESC(debug, "Debug verbosity level in amount of bits set (0=none, ..., 31=all)"); 1685 1685 MODULE_ALIAS("spi:" DRV_NAME);
+12 -4
drivers/net/ethernet/microchip/lan743x_main.c
··· 1400 1400 } 1401 1401 1402 1402 static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, 1403 - unsigned int frame_length) 1403 + unsigned int frame_length, 1404 + int nr_frags) 1404 1405 { 1405 1406 /* called only from within lan743x_tx_xmit_frame. 1406 1407 * assuming tx->ring_lock has already been acquired. ··· 1411 1410 1412 1411 /* wrap up previous descriptor */ 1413 1412 tx->frame_data0 |= TX_DESC_DATA0_EXT_; 1413 + if (nr_frags <= 0) { 1414 + tx->frame_data0 |= TX_DESC_DATA0_LS_; 1415 + tx->frame_data0 |= TX_DESC_DATA0_IOC_; 1416 + } 1414 1417 tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; 1415 1418 tx_descriptor->data0 = tx->frame_data0; 1416 1419 ··· 1519 1514 u32 tx_tail_flags = 0; 1520 1515 1521 1516 /* wrap up previous descriptor */ 1522 - tx->frame_data0 |= TX_DESC_DATA0_LS_; 1523 - tx->frame_data0 |= TX_DESC_DATA0_IOC_; 1517 + if ((tx->frame_data0 & TX_DESC_DATA0_DTYPE_MASK_) == 1518 + TX_DESC_DATA0_DTYPE_DATA_) { 1519 + tx->frame_data0 |= TX_DESC_DATA0_LS_; 1520 + tx->frame_data0 |= TX_DESC_DATA0_IOC_; 1521 + } 1524 1522 1525 1523 tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; 1526 1524 buffer_info = &tx->buffer_info[tx->frame_tail]; ··· 1608 1600 } 1609 1601 1610 1602 if (gso) 1611 - lan743x_tx_frame_add_lso(tx, frame_length); 1603 + lan743x_tx_frame_add_lso(tx, frame_length, nr_frags); 1612 1604 1613 1605 if (nr_frags <= 0) 1614 1606 goto finish;
+8 -3
drivers/net/geneve.c
··· 692 692 static int geneve_open(struct net_device *dev) 693 693 { 694 694 struct geneve_dev *geneve = netdev_priv(dev); 695 - bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6); 696 695 bool metadata = geneve->collect_md; 696 + bool ipv4, ipv6; 697 697 int ret = 0; 698 698 699 + ipv6 = geneve->info.mode & IP_TUNNEL_INFO_IPV6 || metadata; 700 + ipv4 = !ipv6 || metadata; 699 701 #if IS_ENABLED(CONFIG_IPV6) 700 - if (ipv6 || metadata) 702 + if (ipv6) { 701 703 ret = geneve_sock_add(geneve, true); 704 + if (ret < 0 && ret != -EAFNOSUPPORT) 705 + ipv4 = false; 706 + } 702 707 #endif 703 - if (!ret && (!ipv6 || metadata)) 708 + if (ipv4) 704 709 ret = geneve_sock_add(geneve, false); 705 710 if (ret < 0) 706 711 geneve_sock_release(geneve);
+19 -3
drivers/net/hyperv/netvsc_drv.c
··· 744 744 schedule_delayed_work(&ndev_ctx->dwork, 0); 745 745 } 746 746 747 + static void netvsc_comp_ipcsum(struct sk_buff *skb) 748 + { 749 + struct iphdr *iph = (struct iphdr *)skb->data; 750 + 751 + iph->check = 0; 752 + iph->check = ip_fast_csum(iph, iph->ihl); 753 + } 754 + 747 755 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, 748 756 struct netvsc_channel *nvchan) 749 757 { ··· 778 770 /* skb is already created with CHECKSUM_NONE */ 779 771 skb_checksum_none_assert(skb); 780 772 781 - /* 782 - * In Linux, the IP checksum is always checked. 783 - * Do L4 checksum offload if enabled and present. 773 + /* Incoming packets may have IP header checksum verified by the host. 774 + * They may not have IP header checksum computed after coalescing. 775 + * We compute it here if the flags are set, because on Linux, the IP 776 + * checksum is always checked. 777 + */ 778 + if (csum_info && csum_info->receive.ip_checksum_value_invalid && 779 + csum_info->receive.ip_checksum_succeeded && 780 + skb->protocol == htons(ETH_P_IP)) 781 + netvsc_comp_ipcsum(skb); 782 + 783 + /* Do L4 checksum offload if enabled and present. 784 784 */ 785 785 if (csum_info && (net->features & NETIF_F_RXCSUM)) { 786 786 if (csum_info->receive.tcp_checksum_succeeded ||
+3
drivers/net/phy/dp83867.c
··· 19 19 #include <linux/module.h> 20 20 #include <linux/of.h> 21 21 #include <linux/phy.h> 22 + #include <linux/delay.h> 22 23 23 24 #include <dt-bindings/net/ti-dp83867.h> 24 25 ··· 325 324 err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); 326 325 if (err < 0) 327 326 return err; 327 + 328 + usleep_range(10, 20); 328 329 329 330 return dp83867_config_init(phydev); 330 331 }
+12 -1
drivers/net/phy/micrel.c
··· 344 344 return genphy_config_aneg(phydev); 345 345 } 346 346 347 + static int ksz8061_config_init(struct phy_device *phydev) 348 + { 349 + int ret; 350 + 351 + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); 352 + if (ret) 353 + return ret; 354 + 355 + return kszphy_config_init(phydev); 356 + } 357 + 347 358 static int ksz9021_load_values_from_of(struct phy_device *phydev, 348 359 const struct device_node *of_node, 349 360 u16 reg, ··· 1051 1040 .name = "Micrel KSZ8061", 1052 1041 .phy_id_mask = MICREL_PHY_ID_MASK, 1053 1042 .features = PHY_BASIC_FEATURES, 1054 - .config_init = kszphy_config_init, 1043 + .config_init = ksz8061_config_init, 1055 1044 .ack_interrupt = kszphy_ack_interrupt, 1056 1045 .config_intr = kszphy_config_intr, 1057 1046 .suspend = genphy_suspend,
+4
drivers/net/phy/phylink.c
··· 320 320 linkmode_zero(state->lp_advertising); 321 321 state->interface = pl->link_config.interface; 322 322 state->an_enabled = pl->link_config.an_enabled; 323 + state->speed = SPEED_UNKNOWN; 324 + state->duplex = DUPLEX_UNKNOWN; 325 + state->pause = MLO_PAUSE_NONE; 326 + state->an_complete = 0; 323 327 state->link = 1; 324 328 325 329 return pl->ops->mac_link_state(ndev, state);
+2 -2
drivers/net/tun.c
··· 2167 2167 } 2168 2168 2169 2169 add_wait_queue(&tfile->wq.wait, &wait); 2170 - current->state = TASK_INTERRUPTIBLE; 2171 2170 2172 2171 while (1) { 2172 + set_current_state(TASK_INTERRUPTIBLE); 2173 2173 ptr = ptr_ring_consume(&tfile->tx_ring); 2174 2174 if (ptr) 2175 2175 break; ··· 2185 2185 schedule(); 2186 2186 } 2187 2187 2188 - current->state = TASK_RUNNING; 2188 + __set_current_state(TASK_RUNNING); 2189 2189 remove_wait_queue(&tfile->wq.wait, &wait); 2190 2190 2191 2191 out:
+2
drivers/net/xen-netback/hash.c
··· 454 454 if (xenvif_hash_cache_size == 0) 455 455 return; 456 456 457 + BUG_ON(vif->hash.cache.count); 458 + 457 459 spin_lock_init(&vif->hash.cache.lock); 458 460 INIT_LIST_HEAD(&vif->hash.cache.list); 459 461 }
+7
drivers/net/xen-netback/interface.c
··· 153 153 { 154 154 struct xenvif *vif = netdev_priv(dev); 155 155 unsigned int size = vif->hash.size; 156 + unsigned int num_queues; 157 + 158 + /* If queues are not set up internally - always return 0 159 + * as the packet going to be dropped anyway */ 160 + num_queues = READ_ONCE(vif->num_queues); 161 + if (num_queues < 1) 162 + return 0; 156 163 157 164 if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) 158 165 return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
+5 -5
drivers/net/xen-netback/netback.c
··· 1072 1072 skb_frag_size_set(&frags[i], len); 1073 1073 } 1074 1074 1075 - /* Copied all the bits from the frag list -- free it. */ 1076 - skb_frag_list_init(skb); 1077 - xenvif_skb_zerocopy_prepare(queue, nskb); 1078 - kfree_skb(nskb); 1079 - 1080 1075 /* Release all the original (foreign) frags. */ 1081 1076 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1082 1077 skb_frag_unref(skb, f); ··· 1140 1145 xenvif_fill_frags(queue, skb); 1141 1146 1142 1147 if (unlikely(skb_has_frag_list(skb))) { 1148 + struct sk_buff *nskb = skb_shinfo(skb)->frag_list; 1149 + xenvif_skb_zerocopy_prepare(queue, nskb); 1143 1150 if (xenvif_handle_frag_list(queue, skb)) { 1144 1151 if (net_ratelimit()) 1145 1152 netdev_err(queue->vif->dev, ··· 1150 1153 kfree_skb(skb); 1151 1154 continue; 1152 1155 } 1156 + /* Copied all the bits from the frag list -- free it. */ 1157 + skb_frag_list_init(skb); 1158 + kfree_skb(nskb); 1153 1159 } 1154 1160 1155 1161 skb->dev = queue->vif->dev;
+1 -1
include/linux/netdevice.h
··· 3861 3861 if (debug_value == 0) /* no output */ 3862 3862 return 0; 3863 3863 /* set low N bits */ 3864 - return (1 << debug_value) - 1; 3864 + return (1U << debug_value) - 1; 3865 3865 } 3866 3866 3867 3867 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
+8 -1
include/net/icmp.h
··· 22 22 23 23 #include <net/inet_sock.h> 24 24 #include <net/snmp.h> 25 + #include <net/ip.h> 25 26 26 27 struct icmp_err { 27 28 int errno; ··· 40 39 struct sk_buff; 41 40 struct net; 42 41 43 - void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); 42 + void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, 43 + const struct ip_options *opt); 44 + static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) 45 + { 46 + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); 47 + } 48 + 44 49 int icmp_rcv(struct sk_buff *skb); 45 50 int icmp_err(struct sk_buff *skb, u32 info); 46 51 int icmp_init(void);
+3 -1
include/net/ip.h
··· 667 667 } 668 668 669 669 void ip_options_fragment(struct sk_buff *skb); 670 + int __ip_options_compile(struct net *net, struct ip_options *opt, 671 + struct sk_buff *skb, __be32 *info); 670 672 int ip_options_compile(struct net *net, struct ip_options *opt, 671 673 struct sk_buff *skb); 672 674 int ip_options_get(struct net *net, struct ip_options_rcu **optp, ··· 718 716 int ip_misc_proc_init(void); 719 717 #endif 720 718 721 - int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, 719 + int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, 722 720 struct netlink_ext_ack *extack); 723 721 724 722 #endif /* _IP_H */
+3 -3
kernel/bpf/syscall.c
··· 559 559 err = bpf_map_new_fd(map, f_flags); 560 560 if (err < 0) { 561 561 /* failed to allocate fd. 562 - * bpf_map_put() is needed because the above 562 + * bpf_map_put_with_uref() is needed because the above 563 563 * bpf_map_alloc_id() has published the map 564 564 * to the userspace and the userspace may 565 565 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 566 566 */ 567 - bpf_map_put(map); 567 + bpf_map_put_with_uref(map); 568 568 return err; 569 569 } 570 570 ··· 1986 1986 1987 1987 fd = bpf_map_new_fd(map, f_flags); 1988 1988 if (fd < 0) 1989 - bpf_map_put(map); 1989 + bpf_map_put_with_uref(map); 1990 1990 1991 1991 return fd; 1992 1992 }
+2 -1
kernel/bpf/verifier.c
··· 6920 6920 u32 off_reg; 6921 6921 6922 6922 aux = &env->insn_aux_data[i + delta]; 6923 - if (!aux->alu_state) 6923 + if (!aux->alu_state || 6924 + aux->alu_state == BPF_ALU_NON_POINTER) 6924 6925 continue; 6925 6926 6926 6927 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
+10 -6
net/dsa/dsa2.c
··· 612 612 { 613 613 struct device_node *ports, *port; 614 614 struct dsa_port *dp; 615 + int err = 0; 615 616 u32 reg; 616 - int err; 617 617 618 618 ports = of_get_child_by_name(dn, "ports"); 619 619 if (!ports) { ··· 624 624 for_each_available_child_of_node(ports, port) { 625 625 err = of_property_read_u32(port, "reg", &reg); 626 626 if (err) 627 - return err; 627 + goto out_put_node; 628 628 629 - if (reg >= ds->num_ports) 630 - return -EINVAL; 629 + if (reg >= ds->num_ports) { 630 + err = -EINVAL; 631 + goto out_put_node; 632 + } 631 633 632 634 dp = &ds->ports[reg]; 633 635 634 636 err = dsa_port_parse_of(dp, port); 635 637 if (err) 636 - return err; 638 + goto out_put_node; 637 639 } 638 640 639 - return 0; 641 + out_put_node: 642 + of_node_put(ports); 643 + return err; 640 644 } 641 645 642 646 static int dsa_switch_parse_member_of(struct dsa_switch *ds,
+1
net/dsa/port.c
··· 292 292 return ERR_PTR(-EPROBE_DEFER); 293 293 } 294 294 295 + of_node_put(phy_dn); 295 296 return phydev; 296 297 } 297 298
+17 -3
net/ipv4/cipso_ipv4.c
··· 667 667 case CIPSO_V4_MAP_PASS: 668 668 return 0; 669 669 case CIPSO_V4_MAP_TRANS: 670 - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) 670 + if ((level < doi_def->map.std->lvl.cipso_size) && 671 + (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) 671 672 return 0; 672 673 break; 673 674 } ··· 1736 1735 */ 1737 1736 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) 1738 1737 { 1738 + unsigned char optbuf[sizeof(struct ip_options) + 40]; 1739 + struct ip_options *opt = (struct ip_options *)optbuf; 1740 + 1739 1741 if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) 1740 1742 return; 1741 1743 1744 + /* 1745 + * We might be called above the IP layer, 1746 + * so we can not use icmp_send and IPCB here. 1747 + */ 1748 + 1749 + memset(opt, 0, sizeof(struct ip_options)); 1750 + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); 1751 + if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) 1752 + return; 1753 + 1742 1754 if (gateway) 1743 - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); 1755 + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); 1744 1756 else 1745 - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); 1757 + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); 1746 1758 } 1747 1759 1748 1760 /**
+4
net/ipv4/fib_frontend.c
··· 710 710 case RTA_GATEWAY: 711 711 cfg->fc_gw = nla_get_be32(attr); 712 712 break; 713 + case RTA_VIA: 714 + NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute"); 715 + err = -EINVAL; 716 + goto errout; 713 717 case RTA_PRIORITY: 714 718 cfg->fc_priority = nla_get_u32(attr); 715 719 break;
+4 -3
net/ipv4/icmp.c
··· 570 570 * MUST reply to only the first fragment. 571 571 */ 572 572 573 - void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) 573 + void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, 574 + const struct ip_options *opt) 574 575 { 575 576 struct iphdr *iph; 576 577 int room; ··· 692 691 iph->tos; 693 692 mark = IP4_REPLY_MARK(net, skb_in->mark); 694 693 695 - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) 694 + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) 696 695 goto out_unlock; 697 696 698 697 ··· 743 742 local_bh_enable(); 744 743 out:; 745 744 } 746 - EXPORT_SYMBOL(icmp_send); 745 + EXPORT_SYMBOL(__icmp_send); 747 746 748 747 749 748 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
+5 -4
net/ipv4/ip_input.c
··· 307 307 } 308 308 309 309 static int ip_rcv_finish_core(struct net *net, struct sock *sk, 310 - struct sk_buff *skb) 310 + struct sk_buff *skb, struct net_device *dev) 311 311 { 312 312 const struct iphdr *iph = ip_hdr(skb); 313 313 int (*edemux)(struct sk_buff *skb); 314 - struct net_device *dev = skb->dev; 315 314 struct rtable *rt; 316 315 int err; 317 316 ··· 399 400 400 401 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 401 402 { 403 + struct net_device *dev = skb->dev; 402 404 int ret; 403 405 404 406 /* if ingress device is enslaved to an L3 master device pass the ··· 409 409 if (!skb) 410 410 return NET_RX_SUCCESS; 411 411 412 - ret = ip_rcv_finish_core(net, sk, skb); 412 + ret = ip_rcv_finish_core(net, sk, skb, dev); 413 413 if (ret != NET_RX_DROP) 414 414 ret = dst_input(skb); 415 415 return ret; ··· 545 545 546 546 INIT_LIST_HEAD(&sublist); 547 547 list_for_each_entry_safe(skb, next, head, list) { 548 + struct net_device *dev = skb->dev; 548 549 struct dst_entry *dst; 549 550 550 551 skb_list_del_init(skb); ··· 555 554 skb = l3mdev_ip_rcv(skb); 556 555 if (!skb) 557 556 continue; 558 - if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP) 557 + if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP) 559 558 continue; 560 559 561 560 dst = skb_dst(skb);
+17 -5
net/ipv4/ip_options.c
··· 251 251 * If opt == NULL, then skb->data should point to IP header. 252 252 */ 253 253 254 - int ip_options_compile(struct net *net, 255 - struct ip_options *opt, struct sk_buff *skb) 254 + int __ip_options_compile(struct net *net, 255 + struct ip_options *opt, struct sk_buff *skb, 256 + __be32 *info) 256 257 { 257 258 __be32 spec_dst = htonl(INADDR_ANY); 258 259 unsigned char *pp_ptr = NULL; ··· 469 468 return 0; 470 469 471 470 error: 472 - if (skb) { 473 - icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); 474 - } 471 + if (info) 472 + *info = htonl((pp_ptr-iph)<<24); 475 473 return -EINVAL; 474 + } 475 + 476 + int ip_options_compile(struct net *net, 477 + struct ip_options *opt, struct sk_buff *skb) 478 + { 479 + int ret; 480 + __be32 info; 481 + 482 + ret = __ip_options_compile(net, opt, skb, &info); 483 + if (ret != 0 && skb) 484 + icmp_send(skb, ICMP_PARAMETERPROB, 0, info); 485 + return ret; 476 486 } 477 487 EXPORT_SYMBOL(ip_options_compile); 478 488
+14 -5
net/ipv4/netlink.c
··· 3 3 #include <linux/types.h> 4 4 #include <net/net_namespace.h> 5 5 #include <net/netlink.h> 6 + #include <linux/in6.h> 6 7 #include <net/ip.h> 7 8 8 - int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, 9 + int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, 9 10 struct netlink_ext_ack *extack) 10 11 { 11 12 *ip_proto = nla_get_u8(attr); ··· 14 13 switch (*ip_proto) { 15 14 case IPPROTO_TCP: 16 15 case IPPROTO_UDP: 17 - case IPPROTO_ICMP: 18 16 return 0; 19 - default: 20 - NL_SET_ERR_MSG(extack, "Unsupported ip proto"); 21 - return -EOPNOTSUPP; 17 + case IPPROTO_ICMP: 18 + if (family != AF_INET) 19 + break; 20 + return 0; 21 + #if IS_ENABLED(CONFIG_IPV6) 22 + case IPPROTO_ICMPV6: 23 + if (family != AF_INET6) 24 + break; 25 + return 0; 26 + #endif 22 27 } 28 + NL_SET_ERR_MSG(extack, "Unsupported ip proto"); 29 + return -EOPNOTSUPP; 23 30 } 24 31 EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
+1 -1
net/ipv4/route.c
··· 2803 2803 2804 2804 if (tb[RTA_IP_PROTO]) { 2805 2805 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 2806 - &ip_proto, extack); 2806 + &ip_proto, AF_INET, extack); 2807 2807 if (err) 2808 2808 return err; 2809 2809 }
+6 -1
net/ipv6/route.c
··· 4182 4182 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 4183 4183 cfg->fc_flags |= RTF_GATEWAY; 4184 4184 } 4185 + if (tb[RTA_VIA]) { 4186 + NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); 4187 + goto errout; 4188 + } 4185 4189 4186 4190 if (tb[RTA_DST]) { 4187 4191 int plen = (rtm->rtm_dst_len + 7) >> 3; ··· 4893 4889 4894 4890 if (tb[RTA_IP_PROTO]) { 4895 4891 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 4896 - &fl6.flowi6_proto, extack); 4892 + &fl6.flowi6_proto, AF_INET6, 4893 + extack); 4897 4894 if (err) 4898 4895 goto errout; 4899 4896 }
+1
net/ipv6/sit.c
··· 1873 1873 1874 1874 err_reg_dev: 1875 1875 ipip6_dev_free(sitn->fb_tunnel_dev); 1876 + free_netdev(sitn->fb_tunnel_dev); 1876 1877 err_alloc_dev: 1877 1878 return err; 1878 1879 }
+3
net/mpls/af_mpls.c
··· 1838 1838 goto errout; 1839 1839 break; 1840 1840 } 1841 + case RTA_GATEWAY: 1842 + NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute"); 1843 + goto errout; 1841 1844 case RTA_VIA: 1842 1845 { 1843 1846 if (nla_get_via(nla, &cfg->rc_via_alen,
+2 -1
net/netlabel/netlabel_kapi.c
··· 903 903 (state == 0 && (byte & bitmask) == 0)) 904 904 return bit_spot; 905 905 906 - bit_spot++; 906 + if (++bit_spot >= bitmap_len) 907 + return -1; 907 908 bitmask >>= 1; 908 909 if (bitmask == 0) { 909 910 byte = bitmap[++byte_offset];
+20
net/nfc/llcp_commands.c
··· 419 419 sock->service_name, 420 420 sock->service_name_len, 421 421 &service_name_tlv_length); 422 + if (!service_name_tlv) { 423 + err = -ENOMEM; 424 + goto error_tlv; 425 + } 422 426 size += service_name_tlv_length; 423 427 } 424 428 ··· 433 429 434 430 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, 435 431 &miux_tlv_length); 432 + if (!miux_tlv) { 433 + err = -ENOMEM; 434 + goto error_tlv; 435 + } 436 436 size += miux_tlv_length; 437 437 438 438 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); 439 + if (!rw_tlv) { 440 + err = -ENOMEM; 441 + goto error_tlv; 442 + } 439 443 size += rw_tlv_length; 440 444 441 445 pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); ··· 496 484 497 485 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, 498 486 &miux_tlv_length); 487 + if (!miux_tlv) { 488 + err = -ENOMEM; 489 + goto error_tlv; 490 + } 499 491 size += miux_tlv_length; 500 492 501 493 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); 494 + if (!rw_tlv) { 495 + err = -ENOMEM; 496 + goto error_tlv; 497 + } 502 498 size += rw_tlv_length; 503 499 504 500 skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
+20 -4
net/nfc/llcp_core.c
··· 532 532 533 533 static int nfc_llcp_build_gb(struct nfc_llcp_local *local) 534 534 { 535 - u8 *gb_cur, *version_tlv, version, version_length; 536 - u8 *lto_tlv, lto_length; 537 - u8 *wks_tlv, wks_length; 538 - u8 *miux_tlv, miux_length; 535 + u8 *gb_cur, version, version_length; 536 + u8 lto_length, wks_length, miux_length; 537 + u8 *version_tlv = NULL, *lto_tlv = NULL, 538 + *wks_tlv = NULL, *miux_tlv = NULL; 539 539 __be16 wks = cpu_to_be16(local->local_wks); 540 540 u8 gb_len = 0; 541 541 int ret = 0; ··· 543 543 version = LLCP_VERSION_11; 544 544 version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, 545 545 1, &version_length); 546 + if (!version_tlv) { 547 + ret = -ENOMEM; 548 + goto out; 549 + } 546 550 gb_len += version_length; 547 551 548 552 lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, &lto_length); 553 + if (!lto_tlv) { 554 + ret = -ENOMEM; 555 + goto out; 556 + } 549 557 gb_len += lto_length; 550 558 551 559 pr_debug("Local wks 0x%lx\n", local->local_wks); 552 560 wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); 561 + if (!wks_tlv) { 562 + ret = -ENOMEM; 563 + goto out; 564 + } 553 565 gb_len += wks_length; 554 566 555 567 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, 556 568 &miux_length); 569 + if (!miux_tlv) { 570 + ret = -ENOMEM; 571 + goto out; 572 + } 557 573 gb_len += miux_length; 558 574 559 575 gb_len += ARRAY_SIZE(llcp_magic);
+1 -2
net/sched/act_ipt.c
··· 199 199 err2: 200 200 kfree(tname); 201 201 err1: 202 - if (ret == ACT_P_CREATED) 203 - tcf_idr_release(*a, bind); 202 + tcf_idr_release(*a, bind); 204 203 return err; 205 204 } 206 205
+1 -2
net/sched/act_skbedit.c
··· 189 189 190 190 params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); 191 191 if (unlikely(!params_new)) { 192 - if (ret == ACT_P_CREATED) 193 - tcf_idr_release(*a, bind); 192 + tcf_idr_release(*a, bind); 194 193 return -ENOMEM; 195 194 } 196 195
+2 -1
net/sched/act_tunnel_key.c
··· 377 377 return ret; 378 378 379 379 release_tun_meta: 380 - dst_release(&metadata->dst); 380 + if (metadata) 381 + dst_release(&metadata->dst); 381 382 382 383 err_out: 383 384 if (exists)
+7 -3
net/sched/sch_netem.c
··· 447 447 int nb = 0; 448 448 int count = 1; 449 449 int rc = NET_XMIT_SUCCESS; 450 + int rc_drop = NET_XMIT_DROP; 450 451 451 452 /* Do not fool qdisc_drop_all() */ 452 453 skb->prev = NULL; ··· 487 486 q->duplicate = 0; 488 487 rootq->enqueue(skb2, rootq, to_free); 489 488 q->duplicate = dupsave; 489 + rc_drop = NET_XMIT_SUCCESS; 490 490 } 491 491 492 492 /* ··· 500 498 if (skb_is_gso(skb)) { 501 499 segs = netem_segment(skb, sch, to_free); 502 500 if (!segs) 503 - return NET_XMIT_DROP; 501 + return rc_drop; 504 502 } else { 505 503 segs = skb; 506 504 } ··· 523 521 1<<(prandom_u32() % 8); 524 522 } 525 523 526 - if (unlikely(sch->q.qlen >= sch->limit)) 527 - return qdisc_drop_all(skb, sch, to_free); 524 + if (unlikely(sch->q.qlen >= sch->limit)) { 525 + qdisc_drop_all(skb, sch, to_free); 526 + return rc_drop; 527 + } 528 528 529 529 qdisc_qstats_backlog_inc(sch, skb); 530 530
+1 -1
net/sctp/chunk.c
··· 192 192 if (unlikely(!max_data)) { 193 193 max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk), 194 194 sctp_datachk_len(&asoc->stream)); 195 - pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%Zu)", 195 + pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%zu)", 196 196 __func__, asoc, max_data); 197 197 } 198 198
+1
net/socket.c
··· 577 577 if (inode) 578 578 inode_lock(inode); 579 579 sock->ops->release(sock); 580 + sock->sk = NULL; 580 581 if (inode) 581 582 inode_unlock(inode); 582 583 sock->ops = NULL;
+5 -1
net/tipc/socket.c
··· 379 379 380 380 #define tipc_wait_for_cond(sock_, timeo_, condition_) \ 381 381 ({ \ 382 + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ 382 383 struct sock *sk_; \ 383 384 int rc_; \ 384 385 \ 385 386 while ((rc_ = !(condition_))) { \ 386 - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ 387 + /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ 388 + smp_rmb(); \ 387 389 sk_ = (sock_)->sk; \ 388 390 rc_ = tipc_sk_sock_err((sock_), timeo_); \ 389 391 if (rc_) \ ··· 1985 1983 return; 1986 1984 case SOCK_WAKEUP: 1987 1985 tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); 1986 + /* coupled with smp_rmb() in tipc_wait_for_cond() */ 1987 + smp_wmb(); 1988 1988 tsk->cong_link_cnt--; 1989 1989 wakeup = true; 1990 1990 break;
+80 -16
tools/testing/selftests/net/pmtu.sh
··· 103 103 # and check that configured MTU is used on link creation and changes, and 104 104 # that MTU is properly calculated instead when MTU is not configured from 105 105 # userspace 106 + # 107 + # - cleanup_ipv4_exception 108 + # Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU 109 + # exceptions on multiple CPUs and check that the veth device tear-down 110 + # happens in a timely manner 111 + # 112 + # - cleanup_ipv6_exception 113 + # Same as above, but use IPv6 transport from A to B 114 + 106 115 107 116 # Kselftest framework requirement - SKIP code is 4. 108 117 ksft_skip=4 ··· 144 135 pmtu_vti6_default_mtu vti6: default MTU assignment 145 136 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 146 137 pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 147 - pmtu_vti6_link_change_mtu vti6: MTU changes on link changes" 138 + pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 139 + cleanup_ipv4_exception ipv4: cleanup of cached exceptions 140 + cleanup_ipv6_exception ipv6: cleanup of cached exceptions" 148 141 149 142 NS_A="ns-$(mktemp -u XXXXXX)" 150 143 NS_B="ns-$(mktemp -u XXXXXX)" ··· 274 263 275 264 ${ns_a} ip link set ${encap}_a up 276 265 ${ns_b} ip link set ${encap}_b up 277 - 278 - sleep 1 279 266 } 280 267 281 268 setup_fou44() { ··· 311 302 setup_namespaces() { 312 303 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do 313 304 ip netns add ${n} || return 1 305 + 306 + # Disable DAD, so that we don't have to wait to use the 307 + # configured IPv6 addresses 308 + ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 314 309 done 315 310 } 316 311 ··· 350 337 351 338 ${ns_a} ip link set vti${proto}_a up 352 339 ${ns_b} ip link set vti${proto}_b up 353 - 354 - sleep 1 355 340 } 356 341 357 342 setup_vti4() { ··· 386 375 387 376 ${ns_a} ip link set ${type}_a up 388 377 ${ns_b} ip link set ${type}_b up 389 - 390 - sleep 1 391 378 } 392 379 393 380 setup_geneve4() { ··· 597 588 mtu "${ns_b}" veth_B-R2 1500 598 589 599 590 # Create route exceptions 600 - ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null 601 - ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null 591 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null 592 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null 602 593 603 594 # Check that exceptions have been created with the correct PMTU 604 595 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" ··· 630 621 # Decrease remote MTU on path via R2, get new exception 631 622 mtu "${ns_r2}" veth_R2-B 400 632 623 mtu "${ns_b}" veth_B-R2 400 633 - ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null 624 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null 634 625 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 635 626 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 636 627 ··· 647 638 check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 648 639 649 640 # Get new exception 650 - ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null 641 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null 651 642 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 652 643 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 653 644 } ··· 696 687 697 688 mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) 698 689 mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) 699 - ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null 690 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null 700 691 701 692 # Check that exception was created 702 693 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" ··· 776 767 777 768 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) 778 769 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) 779 - ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null 770 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null 780 771 781 772 # Check that exception was created 782 773 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" ··· 834 825 835 826 # Send DF packet without exceeding link layer MTU, check that no 836 827 # exception is created 837 - ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null 828 + ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null 838 829 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 839 830 check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 840 831 841 832 # Now exceed link layer MTU by one byte, check that exception is created 842 833 # with the right PMTU value 843 - ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null 834 + ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null 844 835 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 845 836 check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" 846 837 } ··· 856 847 mtu "${ns_b}" veth_b 4000 857 848 mtu "${ns_a}" vti6_a 5000 858 849 mtu "${ns_b}" vti6_b 5000 859 - ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${tunnel6_b_addr} > /dev/null 850 + ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null 860 851 861 852 # Check that exception was created 862 853 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" ··· 1015 1006 fi 1016 1007 1017 1008 return ${fail} 1009 + } 1010 + 1011 + check_command() { 1012 + cmd=${1} 1013 + 1014 + if ! which ${cmd} > /dev/null 2>&1; then 1015 + err " missing required command: '${cmd}'" 1016 + return 1 1017 + fi 1018 + return 0 1019 + } 1020 + 1021 + test_cleanup_vxlanX_exception() { 1022 + outer="${1}" 1023 + encap="vxlan" 1024 + ll_mtu=4000 1025 + 1026 + check_command taskset || return 2 1027 + cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) 1028 + 1029 + setup namespaces routing ${encap}${outer} || return 2 1030 + trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ 1031 + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1032 + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 1033 + 1034 + # Create route exception by exceeding link layer MTU 1035 + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 1036 + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 1037 + mtu "${ns_b}" veth_B-R1 ${ll_mtu} 1038 + mtu "${ns_r1}" veth_R1-B ${ll_mtu} 1039 + 1040 + mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) 1041 + mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) 1042 + 1043 + # Fill exception cache for multiple CPUs (2) 1044 + # we can always use inner IPv4 for that 1045 + for cpu in ${cpu_list}; do 1046 + taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null 1047 + done 1048 + 1049 + ${ns_a} ip link del dev veth_A-R1 & 1050 + iplink_pid=$! 1051 + sleep 1 1052 + if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then 1053 + err " can't delete veth device in a timely manner, PMTU dst likely leaked" 1054 + return 1 1055 + fi 1056 + } 1057 + 1058 + test_cleanup_ipv6_exception() { 1059 + test_cleanup_vxlanX_exception 6 1060 + } 1061 + 1062 + test_cleanup_ipv4_exception() { 1063 + test_cleanup_vxlanX_exception 4 1018 1064 } 1019 1065 1020 1066 usage() {
+4 -4
tools/testing/selftests/net/udpgro.sh
··· 37 37 38 38 cfg_veth 39 39 40 - ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ 40 + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ 41 41 echo "ok" || \ 42 42 echo "failed" & 43 43 ··· 81 81 # will land on the 'plain' one 82 82 ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & 83 83 pid=$! 84 - ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${family} -b ${addr2%/*} ${rx_args} && \ 84 + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ 85 85 echo "ok" || \ 86 86 echo "failed"& 87 87 ··· 99 99 100 100 cfg_veth 101 101 102 - ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -p 12345 & 103 - ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ 102 + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & 103 + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ 104 104 echo "ok" || \ 105 105 echo "failed" & 106 106
+29 -13
tools/testing/selftests/net/udpgso_bench_rx.c
··· 45 45 static int cfg_expected_pkt_nr; 46 46 static int cfg_expected_pkt_len; 47 47 static int cfg_expected_gso_size; 48 + static int cfg_connect_timeout_ms; 49 + static int cfg_rcv_timeout_ms; 48 50 static struct sockaddr_storage cfg_bind_addr; 49 51 50 52 static bool interrupted; ··· 89 87 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); 90 88 } 91 89 92 - static void do_poll(int fd) 90 + static void do_poll(int fd, int timeout_ms) 93 91 { 94 92 struct pollfd pfd; 95 93 int ret; ··· 104 102 break; 105 103 if (ret == -1) 106 104 error(1, errno, "poll"); 107 - if (ret == 0) 108 - continue; 105 + if (ret == 0) { 106 + if (!timeout_ms) 107 + continue; 108 + 109 + timeout_ms -= 10; 110 + if (timeout_ms <= 0) { 111 + interrupted = true; 112 + break; 113 + } 114 + } 109 115 if (pfd.revents != POLLIN) 110 116 error(1, errno, "poll: 0x%x expected 0x%x\n", 111 117 pfd.revents, POLLIN); ··· 144 134 if (listen(accept_fd, 1)) 145 135 error(1, errno, "listen"); 146 136 147 - do_poll(accept_fd); 137 + do_poll(accept_fd, cfg_connect_timeout_ms); 148 138 if (interrupted) 149 139 exit(0); 150 140 ··· 283 273 284 274 static void usage(const char *filepath) 285 275 { 286 - error(1, 0, "Usage: %s [-Grtv] [-b addr] [-p port] [-l pktlen] [-n packetnr] [-S gsosize]", filepath); 276 + error(1, 0, "Usage: %s [-C connect_timeout] [-Grtv] [-b addr] [-p port]" 277 + " [-l pktlen] [-n packetnr] [-R rcv_timeout] [-S gsosize]", 278 + filepath); 287 279 } 288 280 289 281 static void parse_opts(int argc, char **argv) ··· 294 282 295 283 /* bind to any by default */ 296 284 setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); 297 - while ((c = getopt(argc, argv, "4b:Gl:n:p:rS:tv")) != -1) { 285 + while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) { 298 286 switch (c) { 299 287 case '4': 300 288 cfg_family = PF_INET; ··· 303 291 break; 304 292 case 'b': 305 293 setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); 294 + break; 295 + case 'C': 296 + cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); 306 297 break; 307 298 case 'G': 308 299 cfg_gro_segment = true; ··· 321 306 break; 322 307 case 'r': 323 308 cfg_read_all = true; 309 + break; 310 + case 'R': 311 + cfg_rcv_timeout_ms = strtoul(optarg, NULL, 0); 324 312 break; 325 313 case 'S': 326 314 cfg_expected_gso_size = strtol(optarg, NULL, 0); ··· 347 329 348 330 static void do_recv(void) 349 331 { 332 + int timeout_ms = cfg_tcp ? cfg_rcv_timeout_ms : cfg_connect_timeout_ms; 350 333 unsigned long tnow, treport; 351 - int fd, loop = 0; 334 + int fd; 352 335 353 336 fd = do_socket(cfg_tcp); 354 337 ··· 361 342 362 343 treport = gettimeofday_ms() + 1000; 363 344 do { 364 - /* force termination after the second poll(); this cope both 365 - * with sender slower than receiver and missing packet errors 366 - */ 367 - if (cfg_expected_pkt_nr && loop++) 368 - interrupted = true; 369 - do_poll(fd); 345 + do_poll(fd, timeout_ms); 370 346 371 347 if (cfg_tcp) 372 348 do_flush_tcp(fd); ··· 378 364 bytes = packets = 0; 379 365 treport = tnow + 1000; 380 366 } 367 + 368 + timeout_ms = cfg_rcv_timeout_ms; 381 369 382 370 } while (!interrupted); 383 371