Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Merge in overtime fixes, no conflicts.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Jakub Kicinski 4 years ago 89695196 764f4eb6

+444 -162

26 changed files

expand all collapse all

MAINTAINERS

drivers

net

ethernet

broadcom

genet

bcmgenet.c

ibm

ibmvnic.c

ibmvnic.h

intel

ice

ice.h

ice_idc.c

ice_main.c

wwan

qcom_bam_dmux.c

include

net

netfilter

nf_flow_table.h

net

ax25

af_ax25.c

ax25_subr.c

dsa

dsa2.c

ipv4

route.c

tcp_output.c

mptcp

protocol.c

netfilter

nf_flow_table_inet.c

nf_flow_table_ip.c

nf_tables_api.c

nf_tables_core.c

netlink

af_netlink.c

openvswitch

conntrack.c

tipc

socket.c

unix

af_unix.c

xdp

xsk.c

tools

testing

selftests

net

af_unix

test_unix_oob.c

pmtu.sh

-1

MAINTAINERS

reviewed

··· 10770 10770 M: John Fastabend <john.fastabend@gmail.com> 10771 10771 M: Daniel Borkmann <daniel@iogearbox.net> 10772 10772 M: Jakub Sitnicki <jakub@cloudflare.com> 10773 10773 - M: Lorenz Bauer <lmb@cloudflare.com> 10774 10773 L: netdev@vger.kernel.org 10775 10774 L: bpf@vger.kernel.org 10776 10775 S: Maintained

+2 -2

drivers/net/ethernet/broadcom/genet/bcmgenet.c

reviewed

··· 76 76 if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) 77 77 __raw_writel(value, offset); 78 78 else 79 79 - writel_relaxed(value, offset); 79 79 + writel(value, offset); 80 80 } 81 81 82 82 static inline u32 bcmgenet_readl(void __iomem *offset) ··· 84 84 if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) 85 85 return __raw_readl(offset); 86 86 else 87 87 - return readl_relaxed(offset); 87 87 + return readl(offset); 88 88 } 89 89 90 90 static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,

+50 -13

drivers/net/ethernet/ibm/ibmvnic.c

reviewed

··· 1430 1430 return rc; 1431 1431 } 1432 1432 1433 1433 + adapter->tx_queues_active = true; 1434 1434 + 1435 1435 + /* Since queues were stopped until now, there shouldn't be any 1436 1436 + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we 1437 1437 + * don't need the synchronize_rcu()? Leaving it for consistency 1438 1438 + * with setting ->tx_queues_active = false. 1439 1439 + */ 1440 1440 + synchronize_rcu(); 1441 1441 + 1433 1442 netif_tx_start_all_queues(netdev); 1434 1443 1435 1444 if (prev_state == VNIC_CLOSED) { ··· 1613 1604 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1614 1605 1615 1606 /* ensure that transmissions are stopped if called by do_reset */ 1607 1607 + 1608 1608 + adapter->tx_queues_active = false; 1609 1609 + 1610 1610 + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active 1611 1611 + * update so they don't restart a queue after we stop it below. 1612 1612 + */ 1613 1613 + synchronize_rcu(); 1614 1614 + 1616 1615 if (test_bit(0, &adapter->resetting)) 1617 1616 netif_tx_disable(netdev); 1618 1617 else ··· 1860 1843 tx_buff->skb = NULL; 1861 1844 adapter->netdev->stats.tx_dropped++; 1862 1845 } 1846 1846 + 1863 1847 ind_bufp->index = 0; 1848 1848 + 1864 1849 if (atomic_sub_return(entries, &tx_scrq->used) <= 1865 1850 (adapter->req_tx_entries_per_subcrq / 2) && 1866 1866 - __netif_subqueue_stopped(adapter->netdev, queue_num) && 1867 1867 - !test_bit(0, &adapter->resetting)) { 1868 1868 - netif_wake_subqueue(adapter->netdev, queue_num); 1869 1869 - netdev_dbg(adapter->netdev, "Started queue %d\n", 1870 1870 - queue_num); 1851 1851 + __netif_subqueue_stopped(adapter->netdev, queue_num)) { 1852 1852 + rcu_read_lock(); 1853 1853 + 1854 1854 + if (adapter->tx_queues_active) { 1855 1855 + netif_wake_subqueue(adapter->netdev, queue_num); 1856 1856 + netdev_dbg(adapter->netdev, "Started queue %d\n", 1857 1857 + queue_num); 1858 1858 + } 1859 1859 + 1860 1860 + rcu_read_unlock(); 1871 1861 } 1872 1862 } 1873 1863 ··· 1929 1905 int index = 0; 1930 1906 u8 proto = 0; 1931 1907 1932 1932 - tx_scrq = adapter->tx_scrq[queue_num]; 1933 1933 - txq = netdev_get_tx_queue(netdev, queue_num); 1934 1934 - ind_bufp = &tx_scrq->ind_buf; 1935 1935 - 1936 1936 - if (test_bit(0, &adapter->resetting)) { 1908 1908 + /* If a reset is in progress, drop the packet since 1909 1909 + * the scrqs may get torn down. Otherwise use the 1910 1910 + * rcu to ensure reset waits for us to complete. 1911 1911 + */ 1912 1912 + rcu_read_lock(); 1913 1913 + if (!adapter->tx_queues_active) { 1937 1914 dev_kfree_skb_any(skb); 1938 1915 1939 1916 tx_send_failed++; ··· 1943 1918 goto out; 1944 1919 } 1945 1920 1921 1921 + tx_scrq = adapter->tx_scrq[queue_num]; 1922 1922 + txq = netdev_get_tx_queue(netdev, queue_num); 1923 1923 + ind_bufp = &tx_scrq->ind_buf; 1924 1924 + 1946 1925 if (ibmvnic_xmit_workarounds(skb, netdev)) { 1947 1926 tx_dropped++; 1948 1927 tx_send_failed++; ··· 1954 1925 ibmvnic_tx_scrq_flush(adapter, tx_scrq); 1955 1926 goto out; 1956 1927 } 1928 1928 + 1957 1929 if (skb_is_gso(skb)) 1958 1930 tx_pool = &adapter->tso_pool[queue_num]; 1959 1931 else ··· 2109 2079 netif_carrier_off(netdev); 2110 2080 } 2111 2081 out: 2082 2082 + rcu_read_unlock(); 2112 2083 netdev->stats.tx_dropped += tx_dropped; 2113 2084 netdev->stats.tx_bytes += tx_bytes; 2114 2085 netdev->stats.tx_packets += tx_packets; ··· 3780 3749 (adapter->req_tx_entries_per_subcrq / 2) && 3781 3750 __netif_subqueue_stopped(adapter->netdev, 3782 3751 scrq->pool_index)) { 3783 3783 - netif_wake_subqueue(adapter->netdev, scrq->pool_index); 3784 3784 - netdev_dbg(adapter->netdev, "Started queue %d\n", 3785 3785 - scrq->pool_index); 3752 3752 + rcu_read_lock(); 3753 3753 + if (adapter->tx_queues_active) { 3754 3754 + netif_wake_subqueue(adapter->netdev, 3755 3755 + scrq->pool_index); 3756 3756 + netdev_dbg(adapter->netdev, 3757 3757 + "Started queue %d\n", 3758 3758 + scrq->pool_index); 3759 3759 + } 3760 3760 + rcu_read_unlock(); 3786 3761 } 3787 3762 } 3788 3763

+5 -2

drivers/net/ethernet/ibm/ibmvnic.h

reviewed

··· 1006 1006 struct work_struct ibmvnic_reset; 1007 1007 struct delayed_work ibmvnic_delayed_reset; 1008 1008 unsigned long resetting; 1009 1009 - bool napi_enabled, from_passive_init; 1010 1010 - bool login_pending; 1011 1009 /* last device reset time */ 1012 1010 unsigned long last_reset_time; 1013 1011 1012 1012 + bool napi_enabled; 1013 1013 + bool from_passive_init; 1014 1014 + bool login_pending; 1015 1015 + /* protected by rcu */ 1016 1016 + bool tx_queues_active; 1014 1017 bool failover_pending; 1015 1018 bool force_reset_recovery; 1016 1019

drivers/net/ethernet/intel/ice/ice.h

reviewed

··· 290 290 ICE_LINK_DEFAULT_OVERRIDE_PENDING, 291 291 ICE_PHY_INIT_COMPLETE, 292 292 ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ 293 293 + ICE_AUX_ERR_PENDING, 293 294 ICE_STATE_NBITS /* must be last */ 294 295 }; 295 296 ··· 558 557 wait_queue_head_t reset_wait_queue; 559 558 560 559 u32 hw_csum_rx_error; 560 560 + u32 oicr_err_reg; 561 561 u16 oicr_idx; /* Other interrupt cause MSIX vector index */ 562 562 u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ 563 563 u16 max_pf_txqs; /* Total Tx queues PF wide */

drivers/net/ethernet/intel/ice/ice_idc.c

reviewed

··· 34 34 { 35 35 struct iidc_auxiliary_drv *iadrv; 36 36 37 37 + if (WARN_ON_ONCE(!in_task())) 38 38 + return; 39 39 + 37 40 if (!pf->adev) 38 41 return; 39 42

+15 -10

drivers/net/ethernet/intel/ice/ice_main.c

reviewed

··· 2278 2278 return; 2279 2279 } 2280 2280 2281 2281 + if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { 2282 2282 + struct iidc_event *event; 2283 2283 + 2284 2284 + event = kzalloc(sizeof(*event), GFP_KERNEL); 2285 2285 + if (event) { 2286 2286 + set_bit(IIDC_EVENT_CRIT_ERR, event->type); 2287 2287 + /* report the entire OICR value to AUX driver */ 2288 2288 + swap(event->reg, pf->oicr_err_reg); 2289 2289 + ice_send_event_to_aux(pf, event); 2290 2290 + kfree(event); 2291 2291 + } 2292 2292 + } 2293 2293 + 2281 2294 if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) { 2282 2295 /* Plug aux device per request */ 2283 2296 ice_plug_aux_dev(pf); ··· 3077 3064 3078 3065 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) 3079 3066 if (oicr & ICE_AUX_CRIT_ERR) { 3080 3080 - struct iidc_event *event; 3081 3081 - 3067 3067 + pf->oicr_err_reg |= oicr; 3068 3068 + set_bit(ICE_AUX_ERR_PENDING, pf->state); 3082 3069 ena_mask &= ~ICE_AUX_CRIT_ERR; 3083 3083 - event = kzalloc(sizeof(*event), GFP_ATOMIC); 3084 3084 - if (event) { 3085 3085 - set_bit(IIDC_EVENT_CRIT_ERR, event->type); 3086 3086 - /* report the entire OICR value to AUX driver */ 3087 3087 - event->reg = oicr; 3088 3088 - ice_send_event_to_aux(pf, event); 3089 3089 - kfree(event); 3090 3090 - } 3091 3070 } 3092 3071 3093 3072 /* Report any remaining unexpected interrupts */

+1 -1

drivers/net/wwan/qcom_bam_dmux.c

reviewed

··· 755 755 return 0; 756 756 757 757 dmux->tx = dma_request_chan(dev, "tx"); 758 758 - if (IS_ERR(dmux->rx)) { 758 758 + if (IS_ERR(dmux->tx)) { 759 759 dev_err(dev, "Failed to request TX DMA channel: %pe\n", dmux->tx); 760 760 dmux->tx = NULL; 761 761 bam_dmux_runtime_suspend(dev);

+18

include/net/netfilter/nf_flow_table.h

reviewed

··· 10 10 #include <linux/netfilter/nf_conntrack_tuple_common.h> 11 11 #include <net/flow_offload.h> 12 12 #include <net/dst.h> 13 13 + #include <linux/if_pppox.h> 14 14 + #include <linux/ppp_defs.h> 13 15 14 16 struct nf_flowtable; 15 17 struct nf_flow_rule; ··· 318 316 319 317 int nf_flow_table_offload_init(void); 320 318 void nf_flow_table_offload_exit(void); 319 319 + 320 320 + static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) 321 321 + { 322 322 + __be16 proto; 323 323 + 324 324 + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 325 325 + sizeof(struct pppoe_hdr))); 326 326 + switch (proto) { 327 327 + case htons(PPP_IP): 328 328 + return htons(ETH_P_IP); 329 329 + case htons(PPP_IPV6): 330 330 + return htons(ETH_P_IPV6); 331 331 + } 332 332 + 333 333 + return 0; 334 334 + } 321 335 322 336 #endif /* _NF_FLOW_TABLE_H */

+13 -5

net/ax25/af_ax25.c

reviewed

··· 89 89 sk = s->sk; 90 90 if (!sk) { 91 91 spin_unlock_bh(&ax25_list_lock); 92 92 - s->ax25_dev = NULL; 93 92 ax25_disconnect(s, ENETUNREACH); 93 93 + s->ax25_dev = NULL; 94 94 spin_lock_bh(&ax25_list_lock); 95 95 goto again; 96 96 } 97 97 sock_hold(sk); 98 98 spin_unlock_bh(&ax25_list_lock); 99 99 lock_sock(sk); 100 100 - s->ax25_dev = NULL; 101 101 - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); 102 102 - ax25_dev_put(ax25_dev); 103 100 ax25_disconnect(s, ENETUNREACH); 101 101 + s->ax25_dev = NULL; 102 102 + if (sk->sk_socket) { 103 103 + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); 104 104 + ax25_dev_put(ax25_dev); 105 105 + } 104 106 release_sock(sk); 105 107 spin_lock_bh(&ax25_list_lock); 106 108 sock_put(sk); ··· 981 979 { 982 980 struct sock *sk = sock->sk; 983 981 ax25_cb *ax25; 982 982 + ax25_dev *ax25_dev; 984 983 985 984 if (sk == NULL) 986 985 return 0; 987 986 988 987 sock_hold(sk); 989 989 - sock_orphan(sk); 990 988 lock_sock(sk); 989 989 + sock_orphan(sk); 991 990 ax25 = sk_to_ax25(sk); 991 991 + ax25_dev = ax25->ax25_dev; 992 992 + if (ax25_dev) { 993 993 + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); 994 994 + ax25_dev_put(ax25_dev); 995 995 + } 992 996 993 997 if (sk->sk_type == SOCK_SEQPACKET) { 994 998 switch (ax25->state) {

+14 -6

net/ax25/ax25_subr.c

reviewed

··· 261 261 { 262 262 ax25_clear_queues(ax25); 263 263 264 264 - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) 265 265 - ax25_stop_heartbeat(ax25); 266 266 - ax25_stop_t1timer(ax25); 267 267 - ax25_stop_t2timer(ax25); 268 268 - ax25_stop_t3timer(ax25); 269 269 - ax25_stop_idletimer(ax25); 264 264 + if (reason == ENETUNREACH) { 265 265 + del_timer_sync(&ax25->timer); 266 266 + del_timer_sync(&ax25->t1timer); 267 267 + del_timer_sync(&ax25->t2timer); 268 268 + del_timer_sync(&ax25->t3timer); 269 269 + del_timer_sync(&ax25->idletimer); 270 270 + } else { 271 271 + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) 272 272 + ax25_stop_heartbeat(ax25); 273 273 + ax25_stop_t1timer(ax25); 274 274 + ax25_stop_t2timer(ax25); 275 275 + ax25_stop_t3timer(ax25); 276 276 + ax25_stop_idletimer(ax25); 277 277 + } 270 278 271 279 ax25->state = AX25_STATE_0; 272 280

net/dsa/dsa2.c

reviewed

··· 1786 1786 struct dsa_port *dp; 1787 1787 1788 1788 mutex_lock(&dsa2_mutex); 1789 1789 + 1790 1790 + if (!ds->setup) 1791 1791 + goto out; 1792 1792 + 1789 1793 rtnl_lock(); 1790 1794 1791 1795 dsa_switch_for_each_user_port(dp, ds) { ··· 1806 1802 dp->master->dsa_ptr = NULL; 1807 1803 1808 1804 rtnl_unlock(); 1805 1805 + out: 1809 1806 mutex_unlock(&dsa2_mutex); 1810 1807 } 1811 1808 EXPORT_SYMBOL_GPL(dsa_switch_shutdown);

+14 -4

net/ipv4/route.c

reviewed

··· 498 498 } 499 499 EXPORT_SYMBOL(__ip_select_ident); 500 500 501 501 + static void ip_rt_fix_tos(struct flowi4 *fl4) 502 502 + { 503 503 + __u8 tos = RT_FL_TOS(fl4); 504 504 + 505 505 + fl4->flowi4_tos = tos & IPTOS_RT_MASK; 506 506 + fl4->flowi4_scope = tos & RTO_ONLINK ? 507 507 + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; 508 508 + } 509 509 + 501 510 static void __build_flow_key(const struct net *net, struct flowi4 *fl4, 502 511 const struct sock *sk, 503 512 const struct iphdr *iph, ··· 832 823 rt = (struct rtable *) dst; 833 824 834 825 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); 826 826 + ip_rt_fix_tos(&fl4); 835 827 __ip_do_redirect(rt, skb, &fl4, true); 836 828 } 837 829 ··· 1057 1047 struct flowi4 fl4; 1058 1048 1059 1049 ip_rt_build_flow_key(&fl4, sk, skb); 1050 1050 + ip_rt_fix_tos(&fl4); 1060 1051 1061 1052 /* Don't make lookup fail for bridged encapsulations */ 1062 1053 if (skb && netif_is_any_bridge_port(skb->dev)) ··· 1132 1121 goto out; 1133 1122 1134 1123 new = true; 1124 1124 + } else { 1125 1125 + ip_rt_fix_tos(&fl4); 1135 1126 } 1136 1127 1137 1128 __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); ··· 2622 2609 struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, 2623 2610 const struct sk_buff *skb) 2624 2611 { 2625 2625 - __u8 tos = RT_FL_TOS(fl4); 2626 2612 struct fib_result res = { 2627 2613 .type = RTN_UNSPEC, 2628 2614 .fi = NULL, ··· 2631 2619 struct rtable *rth; 2632 2620 2633 2621 fl4->flowi4_iif = LOOPBACK_IFINDEX; 2634 2634 - fl4->flowi4_tos = tos & IPTOS_RT_MASK; 2635 2635 - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? 2636 2636 - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); 2622 2622 + ip_rt_fix_tos(fl4); 2637 2623 2638 2624 rcu_read_lock(); 2639 2625 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);

+4 -1

net/ipv4/tcp_output.c

reviewed

··· 3730 3730 */ 3731 3731 static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) 3732 3732 { 3733 3733 + struct inet_connection_sock *icsk = inet_csk(sk); 3733 3734 struct tcp_sock *tp = tcp_sk(sk); 3734 3735 struct tcp_fastopen_request *fo = tp->fastopen_req; 3735 3736 int space, err = 0; ··· 3745 3744 * private TCP options. The cost is reduced data space in SYN :( 3746 3745 */ 3747 3746 tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); 3747 3747 + /* Sync mss_cache after updating the mss_clamp */ 3748 3748 + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 3748 3749 3749 3749 - space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - 3750 3750 + space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) - 3750 3751 MAX_TCP_OPTION_SPACE; 3751 3752 3752 3753 space = min_t(size_t, space, fo->size);

net/mptcp/protocol.c

reviewed

··· 1199 1199 tcp_skb_entail(ssk, skb); 1200 1200 return skb; 1201 1201 } 1202 1202 + tcp_skb_tsorted_anchor_cleanup(skb); 1202 1203 kfree_skb(skb); 1203 1204 return NULL; 1204 1205 }

+17

net/netfilter/nf_flow_table_inet.c

reviewed

··· 6 6 #include <linux/rhashtable.h> 7 7 #include <net/netfilter/nf_flow_table.h> 8 8 #include <net/netfilter/nf_tables.h> 9 9 + #include <linux/if_vlan.h> 9 10 10 11 static unsigned int 11 12 nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, 12 13 const struct nf_hook_state *state) 13 14 { 15 15 + struct vlan_ethhdr *veth; 16 16 + __be16 proto; 17 17 + 14 18 switch (skb->protocol) { 19 19 + case htons(ETH_P_8021Q): 20 20 + veth = (struct vlan_ethhdr *)skb_mac_header(skb); 21 21 + proto = veth->h_vlan_encapsulated_proto; 22 22 + break; 23 23 + case htons(ETH_P_PPP_SES): 24 24 + proto = nf_flow_pppoe_proto(skb); 25 25 + break; 26 26 + default: 27 27 + proto = skb->protocol; 28 28 + break; 29 29 + } 30 30 + 31 31 + switch (proto) { 15 32 case htons(ETH_P_IP): 16 33 return nf_flow_offload_ip_hook(priv, skb, state); 17 34 case htons(ETH_P_IPV6):

-18

net/netfilter/nf_flow_table_ip.c

reviewed

··· 8 8 #include <linux/ipv6.h> 9 9 #include <linux/netdevice.h> 10 10 #include <linux/if_ether.h> 11 11 - #include <linux/if_pppox.h> 12 12 - #include <linux/ppp_defs.h> 13 11 #include <net/ip.h> 14 12 #include <net/ipv6.h> 15 13 #include <net/ip6_route.h> ··· 256 258 skb_dst_set_noref(skb, dst); 257 259 dst_output(state->net, state->sk, skb); 258 260 return NF_STOLEN; 259 259 - } 260 260 - 261 261 - static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) 262 262 - { 263 263 - __be16 proto; 264 264 - 265 265 - proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 266 266 - sizeof(struct pppoe_hdr))); 267 267 - switch (proto) { 268 268 - case htons(PPP_IP): 269 269 - return htons(ETH_P_IP); 270 270 - case htons(PPP_IPV6): 271 271 - return htons(ETH_P_IPV6); 272 272 - } 273 273 - 274 274 - return 0; 275 261 } 276 262 277 263 static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,

+17 -5

net/netfilter/nf_tables_api.c

reviewed

··· 9363 9363 } 9364 9364 EXPORT_SYMBOL_GPL(nft_parse_u32_check); 9365 9365 9366 9366 - static unsigned int nft_parse_register(const struct nlattr *attr) 9366 9366 + static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg) 9367 9367 { 9368 9368 unsigned int reg; 9369 9369 9370 9370 reg = ntohl(nla_get_be32(attr)); 9371 9371 switch (reg) { 9372 9372 case NFT_REG_VERDICT...NFT_REG_4: 9373 9373 - return reg * NFT_REG_SIZE / NFT_REG32_SIZE; 9373 9373 + *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; 9374 9374 + break; 9375 9375 + case NFT_REG32_00...NFT_REG32_15: 9376 9376 + *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; 9377 9377 + break; 9374 9378 default: 9375 9375 - return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; 9379 9379 + return -ERANGE; 9376 9380 } 9381 9381 + 9382 9382 + return 0; 9377 9383 } 9378 9384 9379 9385 /** ··· 9421 9415 u32 reg; 9422 9416 int err; 9423 9417 9424 9424 - reg = nft_parse_register(attr); 9418 9418 + err = nft_parse_register(attr, &reg); 9419 9419 + if (err < 0) 9420 9420 + return err; 9421 9421 + 9425 9422 err = nft_validate_register_load(reg, len); 9426 9423 if (err < 0) 9427 9424 return err; ··· 9479 9470 int err; 9480 9471 u32 reg; 9481 9472 9482 9482 - reg = nft_parse_register(attr); 9473 9473 + err = nft_parse_register(attr, &reg); 9474 9474 + if (err < 0) 9475 9475 + return err; 9476 9476 + 9483 9477 err = nft_validate_register_store(ctx, reg, data, type, len); 9484 9478 if (err < 0) 9485 9479 return err;

+1 -1

net/netfilter/nf_tables_core.c

reviewed

··· 215 215 const struct nft_rule_dp *rule, *last_rule; 216 216 const struct net *net = nft_net(pkt); 217 217 const struct nft_expr *expr, *last; 218 218 - struct nft_regs regs; 218 218 + struct nft_regs regs = {}; 219 219 unsigned int stackptr = 0; 220 220 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; 221 221 bool genbit = READ_ONCE(net->nft.gencursor);

net/netlink/af_netlink.c

reviewed

··· 159 159 160 160 static inline u32 netlink_group_mask(u32 group) 161 161 { 162 162 + if (group > 32) 163 163 + return 0; 162 164 return group ? 1 << (group - 1) : 0; 163 165 } 164 166

+59 -59

net/openvswitch/conntrack.c

reviewed

··· 734 734 } 735 735 736 736 #if IS_ENABLED(CONFIG_NF_NAT) 737 737 + static void ovs_nat_update_key(struct sw_flow_key *key, 738 738 + const struct sk_buff *skb, 739 739 + enum nf_nat_manip_type maniptype) 740 740 + { 741 741 + if (maniptype == NF_NAT_MANIP_SRC) { 742 742 + __be16 src; 743 743 + 744 744 + key->ct_state |= OVS_CS_F_SRC_NAT; 745 745 + if (key->eth.type == htons(ETH_P_IP)) 746 746 + key->ipv4.addr.src = ip_hdr(skb)->saddr; 747 747 + else if (key->eth.type == htons(ETH_P_IPV6)) 748 748 + memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 749 749 + sizeof(key->ipv6.addr.src)); 750 750 + else 751 751 + return; 752 752 + 753 753 + if (key->ip.proto == IPPROTO_UDP) 754 754 + src = udp_hdr(skb)->source; 755 755 + else if (key->ip.proto == IPPROTO_TCP) 756 756 + src = tcp_hdr(skb)->source; 757 757 + else if (key->ip.proto == IPPROTO_SCTP) 758 758 + src = sctp_hdr(skb)->source; 759 759 + else 760 760 + return; 761 761 + 762 762 + key->tp.src = src; 763 763 + } else { 764 764 + __be16 dst; 765 765 + 766 766 + key->ct_state |= OVS_CS_F_DST_NAT; 767 767 + if (key->eth.type == htons(ETH_P_IP)) 768 768 + key->ipv4.addr.dst = ip_hdr(skb)->daddr; 769 769 + else if (key->eth.type == htons(ETH_P_IPV6)) 770 770 + memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 771 771 + sizeof(key->ipv6.addr.dst)); 772 772 + else 773 773 + return; 774 774 + 775 775 + if (key->ip.proto == IPPROTO_UDP) 776 776 + dst = udp_hdr(skb)->dest; 777 777 + else if (key->ip.proto == IPPROTO_TCP) 778 778 + dst = tcp_hdr(skb)->dest; 779 779 + else if (key->ip.proto == IPPROTO_SCTP) 780 780 + dst = sctp_hdr(skb)->dest; 781 781 + else 782 782 + return; 783 783 + 784 784 + key->tp.dst = dst; 785 785 + } 786 786 + } 787 787 + 737 788 /* Modelled after nf_nat_ipv[46]_fn(). 738 789 * range is only used for new, uninitialized NAT state. 739 790 * Returns either NF_ACCEPT or NF_DROP. ··· 792 741 static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 793 742 enum ip_conntrack_info ctinfo, 794 743 const struct nf_nat_range2 *range, 795 795 - enum nf_nat_manip_type maniptype) 744 744 + enum nf_nat_manip_type maniptype, struct sw_flow_key *key) 796 745 { 797 746 int hooknum, nh_off, err = NF_ACCEPT; 798 747 ··· 864 813 push: 865 814 skb_push_rcsum(skb, nh_off); 866 815 816 816 + /* Update the flow key if NAT successful. */ 817 817 + if (err == NF_ACCEPT) 818 818 + ovs_nat_update_key(key, skb, maniptype); 819 819 + 867 820 return err; 868 868 - } 869 869 - 870 870 - static void ovs_nat_update_key(struct sw_flow_key *key, 871 871 - const struct sk_buff *skb, 872 872 - enum nf_nat_manip_type maniptype) 873 873 - { 874 874 - if (maniptype == NF_NAT_MANIP_SRC) { 875 875 - __be16 src; 876 876 - 877 877 - key->ct_state |= OVS_CS_F_SRC_NAT; 878 878 - if (key->eth.type == htons(ETH_P_IP)) 879 879 - key->ipv4.addr.src = ip_hdr(skb)->saddr; 880 880 - else if (key->eth.type == htons(ETH_P_IPV6)) 881 881 - memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 882 882 - sizeof(key->ipv6.addr.src)); 883 883 - else 884 884 - return; 885 885 - 886 886 - if (key->ip.proto == IPPROTO_UDP) 887 887 - src = udp_hdr(skb)->source; 888 888 - else if (key->ip.proto == IPPROTO_TCP) 889 889 - src = tcp_hdr(skb)->source; 890 890 - else if (key->ip.proto == IPPROTO_SCTP) 891 891 - src = sctp_hdr(skb)->source; 892 892 - else 893 893 - return; 894 894 - 895 895 - key->tp.src = src; 896 896 - } else { 897 897 - __be16 dst; 898 898 - 899 899 - key->ct_state |= OVS_CS_F_DST_NAT; 900 900 - if (key->eth.type == htons(ETH_P_IP)) 901 901 - key->ipv4.addr.dst = ip_hdr(skb)->daddr; 902 902 - else if (key->eth.type == htons(ETH_P_IPV6)) 903 903 - memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 904 904 - sizeof(key->ipv6.addr.dst)); 905 905 - else 906 906 - return; 907 907 - 908 908 - if (key->ip.proto == IPPROTO_UDP) 909 909 - dst = udp_hdr(skb)->dest; 910 910 - else if (key->ip.proto == IPPROTO_TCP) 911 911 - dst = tcp_hdr(skb)->dest; 912 912 - else if (key->ip.proto == IPPROTO_SCTP) 913 913 - dst = sctp_hdr(skb)->dest; 914 914 - else 915 915 - return; 916 916 - 917 917 - key->tp.dst = dst; 918 918 - } 919 821 } 920 822 921 823 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ ··· 910 906 } else { 911 907 return NF_ACCEPT; /* Connection is not NATed. */ 912 908 } 913 913 - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); 909 909 + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); 914 910 915 911 if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { 916 912 if (ct->status & IPS_SRC_NAT) { ··· 920 916 maniptype = NF_NAT_MANIP_SRC; 921 917 922 918 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, 923 923 - maniptype); 919 919 + maniptype, key); 924 920 } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 925 921 err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, 926 926 - NF_NAT_MANIP_SRC); 922 922 + NF_NAT_MANIP_SRC, key); 927 923 } 928 924 } 929 929 - 930 930 - /* Mark NAT done if successful and update the flow key. */ 931 931 - if (err == NF_ACCEPT) 932 932 - ovs_nat_update_key(key, skb, maniptype); 933 925 934 926 return err; 935 927 }

+2 -1

net/tipc/socket.c

reviewed

··· 2852 2852 2853 2853 /* Try again later if dest link is congested */ 2854 2854 if (tsk->cong_link_cnt) { 2855 2855 - sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); 2855 2855 + sk_reset_timer(sk, &sk->sk_timer, 2856 2856 + jiffies + msecs_to_jiffies(100)); 2856 2857 return; 2857 2858 } 2858 2859 /* Prepare SYN for retransmit */

+9 -7

net/unix/af_unix.c

reviewed

··· 2084 2084 if (ousk->oob_skb) 2085 2085 consume_skb(ousk->oob_skb); 2086 2086 2087 2087 - ousk->oob_skb = skb; 2087 2087 + WRITE_ONCE(ousk->oob_skb, skb); 2088 2088 2089 2089 scm_stat_add(other, skb); 2090 2090 skb_queue_tail(&other->sk_receive_queue, skb); ··· 2602 2602 2603 2603 oob_skb = u->oob_skb; 2604 2604 2605 2605 - if (!(state->flags & MSG_PEEK)) { 2606 2606 - u->oob_skb = NULL; 2607 2607 - } 2605 2605 + if (!(state->flags & MSG_PEEK)) 2606 2606 + WRITE_ONCE(u->oob_skb, NULL); 2608 2607 2609 2608 unix_state_unlock(sk); 2610 2609 ··· 2638 2639 skb = NULL; 2639 2640 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2640 2641 if (!(flags & MSG_PEEK)) { 2641 2641 - u->oob_skb = NULL; 2642 2642 + WRITE_ONCE(u->oob_skb, NULL); 2642 2643 consume_skb(skb); 2643 2644 } 2644 2645 } else if (!(flags & MSG_PEEK)) { ··· 3093 3094 case SIOCATMARK: 3094 3095 { 3095 3096 struct sk_buff *skb; 3096 3096 - struct unix_sock *u = unix_sk(sk); 3097 3097 int answ = 0; 3098 3098 3099 3099 skb = skb_peek(&sk->sk_receive_queue); 3100 3100 - if (skb && skb == u->oob_skb) 3100 3100 + if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3101 3101 answ = 1; 3102 3102 err = put_user(answ, (int __user *)arg); 3103 3103 } ··· 3137 3139 mask |= EPOLLIN | EPOLLRDNORM; 3138 3140 if (sk_is_readable(sk)) 3139 3141 mask |= EPOLLIN | EPOLLRDNORM; 3142 3142 + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3143 3143 + if (READ_ONCE(unix_sk(sk)->oob_skb)) 3144 3144 + mask |= EPOLLPRI; 3145 3145 + #endif 3140 3146 3141 3147 /* Connection-based need to check for termination and startup */ 3142 3148 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&

+50 -19

net/xdp/xsk.c

reviewed

··· 402 402 static int xsk_wakeup(struct xdp_sock *xs, u8 flags) 403 403 { 404 404 struct net_device *dev = xs->dev; 405 405 - int err; 406 405 407 407 - rcu_read_lock(); 408 408 - err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); 409 409 - rcu_read_unlock(); 410 410 - 411 411 - return err; 412 412 - } 413 413 - 414 414 - static int xsk_zc_xmit(struct xdp_sock *xs) 415 415 - { 416 416 - return xsk_wakeup(xs, XDP_WAKEUP_TX); 406 406 + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); 417 407 } 418 408 419 409 static void xsk_destruct_skb(struct sk_buff *skb) ··· 522 532 523 533 mutex_lock(&xs->mutex); 524 534 535 535 + /* Since we dropped the RCU read lock, the socket state might have changed. */ 536 536 + if (unlikely(!xsk_is_bound(xs))) { 537 537 + err = -ENXIO; 538 538 + goto out; 539 539 + } 540 540 + 525 541 if (xs->queue_id >= xs->dev->real_num_tx_queues) 526 542 goto out; 527 543 ··· 591 595 return err; 592 596 } 593 597 594 594 - static int __xsk_sendmsg(struct sock *sk) 598 598 + static int xsk_xmit(struct sock *sk) 595 599 { 596 600 struct xdp_sock *xs = xdp_sk(sk); 601 601 + int ret; 597 602 598 603 if (unlikely(!(xs->dev->flags & IFF_UP))) 599 604 return -ENETDOWN; 600 605 if (unlikely(!xs->tx)) 601 606 return -ENOBUFS; 602 607 603 603 - return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk); 608 608 + if (xs->zc) 609 609 + return xsk_wakeup(xs, XDP_WAKEUP_TX); 610 610 + 611 611 + /* Drop the RCU lock since the SKB path might sleep. */ 612 612 + rcu_read_unlock(); 613 613 + ret = xsk_generic_xmit(sk); 614 614 + /* Reaquire RCU lock before going into common code. */ 615 615 + rcu_read_lock(); 616 616 + 617 617 + return ret; 604 618 } 605 619 606 620 static bool xsk_no_wakeup(struct sock *sk) ··· 624 618 #endif 625 619 } 626 620 627 627 - static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 621 621 + static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 628 622 { 629 623 bool need_wait = !(m->msg_flags & MSG_DONTWAIT); 630 624 struct sock *sk = sock->sk; ··· 644 638 645 639 pool = xs->pool; 646 640 if (pool->cached_need_wakeup & XDP_WAKEUP_TX) 647 647 - return __xsk_sendmsg(sk); 641 641 + return xsk_xmit(sk); 648 642 return 0; 649 643 } 650 644 651 651 - static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) 645 645 + static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 646 646 + { 647 647 + int ret; 648 648 + 649 649 + rcu_read_lock(); 650 650 + ret = __xsk_sendmsg(sock, m, total_len); 651 651 + rcu_read_unlock(); 652 652 + 653 653 + return ret; 654 654 + } 655 655 + 656 656 + static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) 652 657 { 653 658 bool need_wait = !(flags & MSG_DONTWAIT); 654 659 struct sock *sk = sock->sk; ··· 685 668 return 0; 686 669 } 687 670 671 671 + static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) 672 672 + { 673 673 + int ret; 674 674 + 675 675 + rcu_read_lock(); 676 676 + ret = __xsk_recvmsg(sock, m, len, flags); 677 677 + rcu_read_unlock(); 678 678 + 679 679 + return ret; 680 680 + } 681 681 + 688 682 static __poll_t xsk_poll(struct file *file, struct socket *sock, 689 683 struct poll_table_struct *wait) 690 684 { ··· 706 678 707 679 sock_poll_wait(file, sock, wait); 708 680 709 709 - if (unlikely(!xsk_is_bound(xs))) 681 681 + rcu_read_lock(); 682 682 + if (unlikely(!xsk_is_bound(xs))) { 683 683 + rcu_read_unlock(); 710 684 return mask; 685 685 + } 711 686 712 687 pool = xs->pool; 713 688 ··· 719 688 xsk_wakeup(xs, pool->cached_need_wakeup); 720 689 else 721 690 /* Poll needs to drive Tx also in copy mode */ 722 722 - __xsk_sendmsg(sk); 691 691 + xsk_xmit(sk); 723 692 } 724 693 725 694 if (xs->rx && !xskq_prod_is_empty(xs->rx)) ··· 727 696 if (xs->tx && xsk_tx_writeable(xs)) 728 697 mask |= EPOLLOUT | EPOLLWRNORM; 729 698 699 699 + rcu_read_unlock(); 730 700 return mask; 731 701 } 732 702 ··· 759 727 760 728 /* Wait for driver to stop using the xdp socket. */ 761 729 xp_del_xsk(xs->pool, xs); 762 762 - xs->dev = NULL; 763 730 synchronize_net(); 764 731 dev_put(dev); 765 732 }

+3 -3

tools/testing/selftests/net/af_unix/test_unix_oob.c

reviewed

··· 218 218 219 219 /* Test 1: 220 220 * veriyf that SIGURG is 221 221 - * delivered and 63 bytes are 222 222 - * read and oob is '@' 221 221 + * delivered, 63 bytes are 222 222 + * read, oob is '@', and POLLPRI works. 223 223 */ 224 224 - wait_for_data(pfd, POLLIN | POLLPRI); 224 224 + wait_for_data(pfd, POLLPRI); 225 225 read_oob(pfd, &oob); 226 226 len = read_data(pfd, buf, 1024); 227 227 if (!signal_recvd || len != 63 || oob != '@') {

+137 -4

tools/testing/selftests/net/pmtu.sh

reviewed

··· 26 26 # - pmtu_ipv6 27 27 # Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 28 28 # 29 29 + # - pmtu_ipv4_dscp_icmp_exception 30 30 + # Set up the same network topology as pmtu_ipv4, but use non-default 31 31 + # routing table in A. A fib-rule is used to jump to this routing table 32 32 + # based on DSCP. Send ICMPv4 packets with the expected DSCP value and 33 33 + # verify that ECN doesn't interfere with the creation of PMTU exceptions. 34 34 + # 35 35 + # - pmtu_ipv4_dscp_udp_exception 36 36 + # Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP. 37 37 + # 29 38 # - pmtu_ipv4_vxlan4_exception 30 39 # Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel 31 40 # over IPv4 between A and B, routed via R1. On the link between R1 and B, ··· 212 203 tests=" 213 204 pmtu_ipv4_exception ipv4: PMTU exceptions 1 214 205 pmtu_ipv6_exception ipv6: PMTU exceptions 1 206 206 + pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1 207 207 + pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1 215 208 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1 216 209 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1 217 210 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1 ··· 334 323 B 6 default 61 335 324 " 336 325 326 326 + policy_mark=0x04 327 327 + rt_table=main 328 328 + 337 329 veth4_a_addr="192.168.1.1" 338 330 veth4_b_addr="192.168.1.2" 339 331 veth4_c_addr="192.168.2.10" ··· 360 346 err_buf= 361 347 tcpdump_pids= 362 348 nettest_pids= 349 349 + socat_pids= 363 350 364 351 err() { 365 352 err_buf="${err_buf}${1} ··· 738 723 739 724 ns_name="$(nsname ${ns})" 740 725 741 741 - ip -n ${ns_name} route add ${addr} via ${gw} 726 726 + ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}" 742 727 743 728 ns=""; addr=""; gw="" 744 729 done ··· 768 753 769 754 ns_name="$(nsname ${ns})" 770 755 771 771 - ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid} 756 756 + ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}" 772 757 773 758 ns=""; fam=""; addr=""; nhid="" 774 759 done ··· 811 796 fi 812 797 813 798 return 0 799 799 + } 800 800 + 801 801 + setup_policy_routing() { 802 802 + setup_routing 803 803 + 804 804 + ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \ 805 805 + table "${rt_table}" 806 806 + 807 807 + # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to 808 808 + # have an option do to it. 809 809 + tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio 810 810 + tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio 811 811 + tc -netns "${NS_A}" filter add dev veth_A-R1 \ 812 812 + protocol ipv4 flower ip_proto udp \ 813 813 + action pedit ex munge ip df set 0x40 pipe csum ip and udp 814 814 + tc -netns "${NS_A}" filter add dev veth_A-R2 \ 815 815 + protocol ipv4 flower ip_proto udp \ 816 816 + action pedit ex munge ip df set 0x40 pipe csum ip and udp 814 817 } 815 818 816 819 setup_bridge() { ··· 936 903 done 937 904 nettest_pids= 938 905 906 906 + for pid in ${socat_pids}; do 907 907 + kill "${pid}" 908 908 + done 909 909 + socat_pids= 910 910 + 939 911 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do 940 912 ip netns del ${n} 2> /dev/null 941 913 done ··· 988 950 route_get_dst_exception() { 989 951 ns_cmd="${1}" 990 952 dst="${2}" 953 953 + dsfield="${3}" 991 954 992 992 - ${ns_cmd} ip route get "${dst}" 955 955 + if [ -z "${dsfield}" ]; then 956 956 + dsfield=0 957 957 + fi 958 958 + 959 959 + ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" 993 960 } 994 961 995 962 route_get_dst_pmtu_from_exception() { 996 963 ns_cmd="${1}" 997 964 dst="${2}" 965 965 + dsfield="${3}" 998 966 999 999 - mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" 967 967 + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" 1000 968 } 1001 969 1002 970 check_pmtu_value() { ··· 1110 1066 1111 1067 test_pmtu_ipv6_exception() { 1112 1068 test_pmtu_ipvX 6 1069 1069 + } 1070 1070 + 1071 1071 + test_pmtu_ipv4_dscp_icmp_exception() { 1072 1072 + rt_table=100 1073 1073 + 1074 1074 + setup namespaces policy_routing || return $ksft_skip 1075 1075 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1076 1076 + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 1077 1077 + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 1078 1078 + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 1079 1079 + 1080 1080 + # Set up initial MTU values 1081 1081 + mtu "${ns_a}" veth_A-R1 2000 1082 1082 + mtu "${ns_r1}" veth_R1-A 2000 1083 1083 + mtu "${ns_r1}" veth_R1-B 1400 1084 1084 + mtu "${ns_b}" veth_B-R1 1400 1085 1085 + 1086 1086 + mtu "${ns_a}" veth_A-R2 2000 1087 1087 + mtu "${ns_r2}" veth_R2-A 2000 1088 1088 + mtu "${ns_r2}" veth_R2-B 1500 1089 1089 + mtu "${ns_b}" veth_B-R2 1500 1090 1090 + 1091 1091 + len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1 1092 1092 + 1093 1093 + dst1="${prefix4}.${b_r1}.1" 1094 1094 + dst2="${prefix4}.${b_r2}.1" 1095 1095 + 1096 1096 + # Create route exceptions 1097 1097 + dsfield=${policy_mark} # No ECN bit set (Not-ECT) 1098 1098 + run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}" 1099 1099 + 1100 1100 + dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0)) 1101 1101 + run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" 1102 1102 + 1103 1103 + # Check that exceptions have been created with the correct PMTU 1104 1104 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" 1105 1105 + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 1106 1106 + 1107 1107 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" 1108 1108 + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 1109 1109 + } 1110 1110 + 1111 1111 + test_pmtu_ipv4_dscp_udp_exception() { 1112 1112 + rt_table=100 1113 1113 + 1114 1114 + if ! which socat > /dev/null 2>&1; then 1115 1115 + echo "'socat' command not found; skipping tests" 1116 1116 + return $ksft_skip 1117 1117 + fi 1118 1118 + 1119 1119 + setup namespaces policy_routing || return $ksft_skip 1120 1120 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1121 1121 + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 1122 1122 + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 1123 1123 + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 1124 1124 + 1125 1125 + # Set up initial MTU values 1126 1126 + mtu "${ns_a}" veth_A-R1 2000 1127 1127 + mtu "${ns_r1}" veth_R1-A 2000 1128 1128 + mtu "${ns_r1}" veth_R1-B 1400 1129 1129 + mtu "${ns_b}" veth_B-R1 1400 1130 1130 + 1131 1131 + mtu "${ns_a}" veth_A-R2 2000 1132 1132 + mtu "${ns_r2}" veth_R2-A 2000 1133 1133 + mtu "${ns_r2}" veth_R2-B 1500 1134 1134 + mtu "${ns_b}" veth_B-R2 1500 1135 1135 + 1136 1136 + len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1 1137 1137 + 1138 1138 + dst1="${prefix4}.${b_r1}.1" 1139 1139 + dst2="${prefix4}.${b_r2}.1" 1140 1140 + 1141 1141 + # Create route exceptions 1142 1142 + run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1 1143 1143 + socat_pids="${socat_pids} $!" 1144 1144 + 1145 1145 + dsfield=${policy_mark} # No ECN bit set (Not-ECT) 1146 1146 + run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \ 1147 1147 + UDP:"${dst1}":50000,tos="${dsfield}" 1148 1148 + 1149 1149 + dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0)) 1150 1150 + run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \ 1151 1151 + UDP:"${dst2}":50000,tos="${dsfield}" 1152 1152 + 1153 1153 + # Check that exceptions have been created with the correct PMTU 1154 1154 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" 1155 1155 + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 1156 1156 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" 1157 1157 + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 1113 1158 } 1114 1159 1115 1160 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {