···14301430 return rc;14311431 }1432143214331433+ adapter->tx_queues_active = true;14341434+14351435+ /* Since queues were stopped until now, there shouldn't be any14361436+ * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we14371437+ * don't need the synchronize_rcu()? Leaving it for consistency14381438+ * with setting ->tx_queues_active = false.14391439+ */14401440+ synchronize_rcu();14411441+14331442 netif_tx_start_all_queues(netdev);1434144314351444 if (prev_state == VNIC_CLOSED) {···16131604 struct ibmvnic_adapter *adapter = netdev_priv(netdev);1614160516151606 /* ensure that transmissions are stopped if called by do_reset */16071607+16081608+ adapter->tx_queues_active = false;16091609+16101610+ /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active16111611+ * update so they don't restart a queue after we stop it below.16121612+ */16131613+ synchronize_rcu();16141614+16161615 if (test_bit(0, &adapter->resetting))16171616 netif_tx_disable(netdev);16181617 else···18601843 tx_buff->skb = NULL;18611844 adapter->netdev->stats.tx_dropped++;18621845 }18461846+18631847 ind_bufp->index = 0;18481848+18641849 if (atomic_sub_return(entries, &tx_scrq->used) <=18651850 (adapter->req_tx_entries_per_subcrq / 2) &&18661866- __netif_subqueue_stopped(adapter->netdev, queue_num) &&18671867- !test_bit(0, &adapter->resetting)) {18681868- netif_wake_subqueue(adapter->netdev, queue_num);18691869- netdev_dbg(adapter->netdev, "Started queue %d\n",18701870- queue_num);18511851+ __netif_subqueue_stopped(adapter->netdev, queue_num)) {18521852+ rcu_read_lock();18531853+18541854+ if (adapter->tx_queues_active) {18551855+ netif_wake_subqueue(adapter->netdev, queue_num);18561856+ netdev_dbg(adapter->netdev, "Started queue %d\n",18571857+ queue_num);18581858+ }18591859+18601860+ rcu_read_unlock();18711861 }18721862}18731863···19291905 int index = 0;19301906 u8 proto = 0;1931190719321932- tx_scrq = adapter->tx_scrq[queue_num];19331933- txq = netdev_get_tx_queue(netdev, queue_num);19341934- ind_bufp = &tx_scrq->ind_buf;19351935-19361936- if (test_bit(0, &adapter->resetting)) {19081908+ /* If a reset is in progress, drop the packet since19091909+ * the scrqs may get torn down. Otherwise use the19101910+ * rcu to ensure reset waits for us to complete.19111911+ */19121912+ rcu_read_lock();19131913+ if (!adapter->tx_queues_active) {19371914 dev_kfree_skb_any(skb);1938191519391916 tx_send_failed++;···19431918 goto out;19441919 }1945192019211921+ tx_scrq = adapter->tx_scrq[queue_num];19221922+ txq = netdev_get_tx_queue(netdev, queue_num);19231923+ ind_bufp = &tx_scrq->ind_buf;19241924+19461925 if (ibmvnic_xmit_workarounds(skb, netdev)) {19471926 tx_dropped++;19481927 tx_send_failed++;···19541925 ibmvnic_tx_scrq_flush(adapter, tx_scrq);19551926 goto out;19561927 }19281928+19571929 if (skb_is_gso(skb))19581930 tx_pool = &adapter->tso_pool[queue_num];19591931 else···21092079 netif_carrier_off(netdev);21102080 }21112081out:20822082+ rcu_read_unlock();21122083 netdev->stats.tx_dropped += tx_dropped;21132084 netdev->stats.tx_bytes += tx_bytes;21142085 netdev->stats.tx_packets += tx_packets;···37803749 (adapter->req_tx_entries_per_subcrq / 2) &&37813750 __netif_subqueue_stopped(adapter->netdev,37823751 scrq->pool_index)) {37833783- netif_wake_subqueue(adapter->netdev, scrq->pool_index);37843784- netdev_dbg(adapter->netdev, "Started queue %d\n",37853785- scrq->pool_index);37523752+ rcu_read_lock();37533753+ if (adapter->tx_queues_active) {37543754+ netif_wake_subqueue(adapter->netdev,37553755+ scrq->pool_index);37563756+ netdev_dbg(adapter->netdev,37573757+ "Started queue %d\n",37583758+ scrq->pool_index);37593759+ }37603760+ rcu_read_unlock();37863761 }37873762 }37883763
+5-2
drivers/net/ethernet/ibm/ibmvnic.h
···10061006 struct work_struct ibmvnic_reset;10071007 struct delayed_work ibmvnic_delayed_reset;10081008 unsigned long resetting;10091009- bool napi_enabled, from_passive_init;10101010- bool login_pending;10111009 /* last device reset time */10121010 unsigned long last_reset_time;1013101110121012+ bool napi_enabled;10131013+ bool from_passive_init;10141014+ bool login_pending;10151015+ /* protected by rcu */10161016+ bool tx_queues_active;10141017 bool failover_pending;10151018 bool force_reset_recovery;10161019
+2
drivers/net/ethernet/intel/ice/ice.h
···290290 ICE_LINK_DEFAULT_OVERRIDE_PENDING,291291 ICE_PHY_INIT_COMPLETE,292292 ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */293293+ ICE_AUX_ERR_PENDING,293294 ICE_STATE_NBITS /* must be last */294295};295296···558557 wait_queue_head_t reset_wait_queue;559558560559 u32 hw_csum_rx_error;560560+ u32 oicr_err_reg;561561 u16 oicr_idx; /* Other interrupt cause MSIX vector index */562562 u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */563563 u16 max_pf_txqs; /* Total Tx queues PF wide */
+3
drivers/net/ethernet/intel/ice/ice_idc.c
···3434{3535 struct iidc_auxiliary_drv *iadrv;36363737+ if (WARN_ON_ONCE(!in_task()))3838+ return;3939+3740 if (!pf->adev)3841 return;3942
+15-10
drivers/net/ethernet/intel/ice/ice_main.c
···22782278 return;22792279 }2280228022812281+ if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {22822282+ struct iidc_event *event;22832283+22842284+ event = kzalloc(sizeof(*event), GFP_KERNEL);22852285+ if (event) {22862286+ set_bit(IIDC_EVENT_CRIT_ERR, event->type);22872287+ /* report the entire OICR value to AUX driver */22882288+ swap(event->reg, pf->oicr_err_reg);22892289+ ice_send_event_to_aux(pf, event);22902290+ kfree(event);22912291+ }22922292+ }22932293+22812294 if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {22822295 /* Plug aux device per request */22832296 ice_plug_aux_dev(pf);···3077306430783065#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)30793066 if (oicr & ICE_AUX_CRIT_ERR) {30803080- struct iidc_event *event;30813081-30673067+ pf->oicr_err_reg |= oicr;30683068+ set_bit(ICE_AUX_ERR_PENDING, pf->state);30823069 ena_mask &= ~ICE_AUX_CRIT_ERR;30833083- event = kzalloc(sizeof(*event), GFP_ATOMIC);30843084- if (event) {30853085- set_bit(IIDC_EVENT_CRIT_ERR, event->type);30863086- /* report the entire OICR value to AUX driver */30873087- event->reg = oicr;30883088- ice_send_event_to_aux(pf, event);30893089- kfree(event);30903090- }30913070 }3092307130933072 /* Report any remaining unexpected interrupts */
+1-1
drivers/net/wwan/qcom_bam_dmux.c
···755755 return 0;756756757757 dmux->tx = dma_request_chan(dev, "tx");758758- if (IS_ERR(dmux->rx)) {758758+ if (IS_ERR(dmux->tx)) {759759 dev_err(dev, "Failed to request TX DMA channel: %pe\n", dmux->tx);760760 dmux->tx = NULL;761761 bam_dmux_runtime_suspend(dev);
···734734}735735736736#if IS_ENABLED(CONFIG_NF_NAT)737737+static void ovs_nat_update_key(struct sw_flow_key *key,738738+ const struct sk_buff *skb,739739+ enum nf_nat_manip_type maniptype)740740+{741741+ if (maniptype == NF_NAT_MANIP_SRC) {742742+ __be16 src;743743+744744+ key->ct_state |= OVS_CS_F_SRC_NAT;745745+ if (key->eth.type == htons(ETH_P_IP))746746+ key->ipv4.addr.src = ip_hdr(skb)->saddr;747747+ else if (key->eth.type == htons(ETH_P_IPV6))748748+ memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,749749+ sizeof(key->ipv6.addr.src));750750+ else751751+ return;752752+753753+ if (key->ip.proto == IPPROTO_UDP)754754+ src = udp_hdr(skb)->source;755755+ else if (key->ip.proto == IPPROTO_TCP)756756+ src = tcp_hdr(skb)->source;757757+ else if (key->ip.proto == IPPROTO_SCTP)758758+ src = sctp_hdr(skb)->source;759759+ else760760+ return;761761+762762+ key->tp.src = src;763763+ } else {764764+ __be16 dst;765765+766766+ key->ct_state |= OVS_CS_F_DST_NAT;767767+ if (key->eth.type == htons(ETH_P_IP))768768+ key->ipv4.addr.dst = ip_hdr(skb)->daddr;769769+ else if (key->eth.type == htons(ETH_P_IPV6))770770+ memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,771771+ sizeof(key->ipv6.addr.dst));772772+ else773773+ return;774774+775775+ if (key->ip.proto == IPPROTO_UDP)776776+ dst = udp_hdr(skb)->dest;777777+ else if (key->ip.proto == IPPROTO_TCP)778778+ dst = tcp_hdr(skb)->dest;779779+ else if (key->ip.proto == IPPROTO_SCTP)780780+ dst = sctp_hdr(skb)->dest;781781+ else782782+ return;783783+784784+ key->tp.dst = dst;785785+ }786786+}787787+737788/* Modelled after nf_nat_ipv[46]_fn().738789 * range is only used for new, uninitialized NAT state.739790 * Returns either NF_ACCEPT or NF_DROP.···792741static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,793742 enum ip_conntrack_info ctinfo,794743 const struct nf_nat_range2 *range,795795- enum nf_nat_manip_type maniptype)744744+ enum nf_nat_manip_type maniptype, struct sw_flow_key *key)796745{797746 int hooknum, nh_off, err = NF_ACCEPT;798747···864813push:865814 skb_push_rcsum(skb, nh_off);866815816816+ /* Update the flow key if NAT successful. */817817+ if (err == NF_ACCEPT)818818+ ovs_nat_update_key(key, skb, maniptype);819819+867820 return err;868868-}869869-870870-static void ovs_nat_update_key(struct sw_flow_key *key,871871- const struct sk_buff *skb,872872- enum nf_nat_manip_type maniptype)873873-{874874- if (maniptype == NF_NAT_MANIP_SRC) {875875- __be16 src;876876-877877- key->ct_state |= OVS_CS_F_SRC_NAT;878878- if (key->eth.type == htons(ETH_P_IP))879879- key->ipv4.addr.src = ip_hdr(skb)->saddr;880880- else if (key->eth.type == htons(ETH_P_IPV6))881881- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,882882- sizeof(key->ipv6.addr.src));883883- else884884- return;885885-886886- if (key->ip.proto == IPPROTO_UDP)887887- src = udp_hdr(skb)->source;888888- else if (key->ip.proto == IPPROTO_TCP)889889- src = tcp_hdr(skb)->source;890890- else if (key->ip.proto == IPPROTO_SCTP)891891- src = sctp_hdr(skb)->source;892892- else893893- return;894894-895895- key->tp.src = src;896896- } else {897897- __be16 dst;898898-899899- key->ct_state |= OVS_CS_F_DST_NAT;900900- if (key->eth.type == htons(ETH_P_IP))901901- key->ipv4.addr.dst = ip_hdr(skb)->daddr;902902- else if (key->eth.type == htons(ETH_P_IPV6))903903- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,904904- sizeof(key->ipv6.addr.dst));905905- else906906- return;907907-908908- if (key->ip.proto == IPPROTO_UDP)909909- dst = udp_hdr(skb)->dest;910910- else if (key->ip.proto == IPPROTO_TCP)911911- dst = tcp_hdr(skb)->dest;912912- else if (key->ip.proto == IPPROTO_SCTP)913913- dst = sctp_hdr(skb)->dest;914914- else915915- return;916916-917917- key->tp.dst = dst;918918- }919821}920822921823/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */···910906 } else {911907 return NF_ACCEPT; /* Connection is not NATed. */912908 }913913- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);909909+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);914910915911 if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {916912 if (ct->status & IPS_SRC_NAT) {···920916 maniptype = NF_NAT_MANIP_SRC;921917922918 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,923923- maniptype);919919+ maniptype, key);924920 } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {925921 err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,926926- NF_NAT_MANIP_SRC);922922+ NF_NAT_MANIP_SRC, key);927923 }928924 }929929-930930- /* Mark NAT done if successful and update the flow key. */931931- if (err == NF_ACCEPT)932932- ovs_nat_update_key(key, skb, maniptype);933925934926 return err;935927}
+2-1
net/tipc/socket.c
···2852285228532853 /* Try again later if dest link is congested */28542854 if (tsk->cong_link_cnt) {28552855- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));28552855+ sk_reset_timer(sk, &sk->sk_timer,28562856+ jiffies + msecs_to_jiffies(100));28562857 return;28572858 }28582859 /* Prepare SYN for retransmit */
+9-7
net/unix/af_unix.c
···20842084 if (ousk->oob_skb)20852085 consume_skb(ousk->oob_skb);2086208620872087- ousk->oob_skb = skb;20872087+ WRITE_ONCE(ousk->oob_skb, skb);2088208820892089 scm_stat_add(other, skb);20902090 skb_queue_tail(&other->sk_receive_queue, skb);···2602260226032603 oob_skb = u->oob_skb;2604260426052605- if (!(state->flags & MSG_PEEK)) {26062606- u->oob_skb = NULL;26072607- }26052605+ if (!(state->flags & MSG_PEEK))26062606+ WRITE_ONCE(u->oob_skb, NULL);2608260726092608 unix_state_unlock(sk);26102609···26382639 skb = NULL;26392640 } else if (sock_flag(sk, SOCK_URGINLINE)) {26402641 if (!(flags & MSG_PEEK)) {26412641- u->oob_skb = NULL;26422642+ WRITE_ONCE(u->oob_skb, NULL);26422643 consume_skb(skb);26432644 }26442645 } else if (!(flags & MSG_PEEK)) {···30933094 case SIOCATMARK:30943095 {30953096 struct sk_buff *skb;30963096- struct unix_sock *u = unix_sk(sk);30973097 int answ = 0;3098309830993099 skb = skb_peek(&sk->sk_receive_queue);31003100- if (skb && skb == u->oob_skb)31003100+ if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))31013101 answ = 1;31023102 err = put_user(answ, (int __user *)arg);31033103 }···31373139 mask |= EPOLLIN | EPOLLRDNORM;31383140 if (sk_is_readable(sk))31393141 mask |= EPOLLIN | EPOLLRDNORM;31423142+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)31433143+ if (READ_ONCE(unix_sk(sk)->oob_skb))31443144+ mask |= EPOLLPRI;31453145+#endif3140314631413147 /* Connection-based need to check for termination and startup */31423148 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+50-19
net/xdp/xsk.c
···402402static int xsk_wakeup(struct xdp_sock *xs, u8 flags)403403{404404 struct net_device *dev = xs->dev;405405- int err;406405407407- rcu_read_lock();408408- err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);409409- rcu_read_unlock();410410-411411- return err;412412-}413413-414414-static int xsk_zc_xmit(struct xdp_sock *xs)415415-{416416- return xsk_wakeup(xs, XDP_WAKEUP_TX);406406+ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);417407}418408419409static void xsk_destruct_skb(struct sk_buff *skb)···522532523533 mutex_lock(&xs->mutex);524534535535+ /* Since we dropped the RCU read lock, the socket state might have changed. */536536+ if (unlikely(!xsk_is_bound(xs))) {537537+ err = -ENXIO;538538+ goto out;539539+ }540540+525541 if (xs->queue_id >= xs->dev->real_num_tx_queues)526542 goto out;527543···591595 return err;592596}593597594594-static int __xsk_sendmsg(struct sock *sk)598598+static int xsk_xmit(struct sock *sk)595599{596600 struct xdp_sock *xs = xdp_sk(sk);601601+ int ret;597602598603 if (unlikely(!(xs->dev->flags & IFF_UP)))599604 return -ENETDOWN;600605 if (unlikely(!xs->tx))601606 return -ENOBUFS;602607603603- return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);608608+ if (xs->zc)609609+ return xsk_wakeup(xs, XDP_WAKEUP_TX);610610+611611+ /* Drop the RCU lock since the SKB path might sleep. */612612+ rcu_read_unlock();613613+ ret = xsk_generic_xmit(sk);614614+ /* Reaquire RCU lock before going into common code. */615615+ rcu_read_lock();616616+617617+ return ret;604618}605619606620static bool xsk_no_wakeup(struct sock *sk)···624618#endif625619}626620627627-static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)621621+static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)628622{629623 bool need_wait = !(m->msg_flags & MSG_DONTWAIT);630624 struct sock *sk = sock->sk;···644638645639 pool = xs->pool;646640 if (pool->cached_need_wakeup & XDP_WAKEUP_TX)647647- return __xsk_sendmsg(sk);641641+ return xsk_xmit(sk);648642 return 0;649643}650644651651-static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)645645+static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)646646+{647647+ int ret;648648+649649+ rcu_read_lock();650650+ ret = __xsk_sendmsg(sock, m, total_len);651651+ rcu_read_unlock();652652+653653+ return ret;654654+}655655+656656+static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)652657{653658 bool need_wait = !(flags & MSG_DONTWAIT);654659 struct sock *sk = sock->sk;···685668 return 0;686669}687670671671+static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)672672+{673673+ int ret;674674+675675+ rcu_read_lock();676676+ ret = __xsk_recvmsg(sock, m, len, flags);677677+ rcu_read_unlock();678678+679679+ return ret;680680+}681681+688682static __poll_t xsk_poll(struct file *file, struct socket *sock,689683 struct poll_table_struct *wait)690684{···706678707679 sock_poll_wait(file, sock, wait);708680709709- if (unlikely(!xsk_is_bound(xs)))681681+ rcu_read_lock();682682+ if (unlikely(!xsk_is_bound(xs))) {683683+ rcu_read_unlock();710684 return mask;685685+ }711686712687 pool = xs->pool;713688···719688 xsk_wakeup(xs, pool->cached_need_wakeup);720689 else721690 /* Poll needs to drive Tx also in copy mode */722722- __xsk_sendmsg(sk);691691+ xsk_xmit(sk);723692 }724693725694 if (xs->rx && !xskq_prod_is_empty(xs->rx))···727696 if (xs->tx && xsk_tx_writeable(xs))728697 mask |= EPOLLOUT | EPOLLWRNORM;729698699699+ rcu_read_unlock();730700 return mask;731701}732702···759727760728 /* Wait for driver to stop using the xdp socket. */761729 xp_del_xsk(xs->pool, xs);762762- xs->dev = NULL;763730 synchronize_net();764731 dev_put(dev);765732}
···218218219219 /* Test 1:220220 * veriyf that SIGURG is221221- * delivered and 63 bytes are222222- * read and oob is '@'221221+ * delivered, 63 bytes are222222+ * read, oob is '@', and POLLPRI works.223223 */224224- wait_for_data(pfd, POLLIN | POLLPRI);224224+ wait_for_data(pfd, POLLPRI);225225 read_oob(pfd, &oob);226226 len = read_data(pfd, buf, 1024);227227 if (!signal_recvd || len != 63 || oob != '@') {
+137-4
tools/testing/selftests/net/pmtu.sh
···2626# - pmtu_ipv62727# Same as pmtu_ipv4, except for locked PMTU tests, using IPv62828#2929+# - pmtu_ipv4_dscp_icmp_exception3030+# Set up the same network topology as pmtu_ipv4, but use non-default3131+# routing table in A. A fib-rule is used to jump to this routing table3232+# based on DSCP. Send ICMPv4 packets with the expected DSCP value and3333+# verify that ECN doesn't interfere with the creation of PMTU exceptions.3434+#3535+# - pmtu_ipv4_dscp_udp_exception3636+# Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.3737+#2938# - pmtu_ipv4_vxlan4_exception3039# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel3140# over IPv4 between A and B, routed via R1. On the link between R1 and B,···212203tests="213204 pmtu_ipv4_exception ipv4: PMTU exceptions 1214205 pmtu_ipv6_exception ipv6: PMTU exceptions 1206206+ pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1207207+ pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1215208 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1216209 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1217210 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1···334323 B 6 default 61335324"336325326326+policy_mark=0x04327327+rt_table=main328328+337329veth4_a_addr="192.168.1.1"338330veth4_b_addr="192.168.1.2"339331veth4_c_addr="192.168.2.10"···360346err_buf=361347tcpdump_pids=362348nettest_pids=349349+socat_pids=363350364351err() {365352 err_buf="${err_buf}${1}···738723739724 ns_name="$(nsname ${ns})"740725741741- ip -n ${ns_name} route add ${addr} via ${gw}726726+ ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"742727743728 ns=""; addr=""; gw=""744729 done···768753769754 ns_name="$(nsname ${ns})"770755771771- ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}756756+ ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"772757773758 ns=""; fam=""; addr=""; nhid=""774759 done···811796 fi812797813798 return 0799799+}800800+801801+setup_policy_routing() {802802+ setup_routing803803+804804+ ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \805805+ table "${rt_table}"806806+807807+ # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to808808+ # have an option do to it.809809+ tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio810810+ tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio811811+ tc -netns "${NS_A}" filter add dev veth_A-R1 \812812+ protocol ipv4 flower ip_proto udp \813813+ action pedit ex munge ip df set 0x40 pipe csum ip and udp814814+ tc -netns "${NS_A}" filter add dev veth_A-R2 \815815+ protocol ipv4 flower ip_proto udp \816816+ action pedit ex munge ip df set 0x40 pipe csum ip and udp814817}815818816819setup_bridge() {···936903 done937904 nettest_pids=938905906906+ for pid in ${socat_pids}; do907907+ kill "${pid}"908908+ done909909+ socat_pids=910910+939911 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do940912 ip netns del ${n} 2> /dev/null941913 done···988950route_get_dst_exception() {989951 ns_cmd="${1}"990952 dst="${2}"953953+ dsfield="${3}"991954992992- ${ns_cmd} ip route get "${dst}"955955+ if [ -z "${dsfield}" ]; then956956+ dsfield=0957957+ fi958958+959959+ ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"993960}994961995962route_get_dst_pmtu_from_exception() {996963 ns_cmd="${1}"997964 dst="${2}"965965+ dsfield="${3}"998966999999- mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"967967+ mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"1000968}10019691002970check_pmtu_value() {···1110106611111067test_pmtu_ipv6_exception() {11121068 test_pmtu_ipvX 610691069+}10701070+10711071+test_pmtu_ipv4_dscp_icmp_exception() {10721072+ rt_table=10010731073+10741074+ setup namespaces policy_routing || return $ksft_skip10751075+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \10761076+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \10771077+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \10781078+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R210791079+10801080+ # Set up initial MTU values10811081+ mtu "${ns_a}" veth_A-R1 200010821082+ mtu "${ns_r1}" veth_R1-A 200010831083+ mtu "${ns_r1}" veth_R1-B 140010841084+ mtu "${ns_b}" veth_B-R1 140010851085+10861086+ mtu "${ns_a}" veth_A-R2 200010871087+ mtu "${ns_r2}" veth_R2-A 200010881088+ mtu "${ns_r2}" veth_R2-B 150010891089+ mtu "${ns_b}" veth_B-R2 150010901090+10911091+ len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R110921092+10931093+ dst1="${prefix4}.${b_r1}.1"10941094+ dst2="${prefix4}.${b_r2}.1"10951095+10961096+ # Create route exceptions10971097+ dsfield=${policy_mark} # No ECN bit set (Not-ECT)10981098+ run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"10991099+11001100+ dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))11011101+ run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"11021102+11031103+ # Check that exceptions have been created with the correct PMTU11041104+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"11051105+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 111061106+11071107+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"11081108+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 111091109+}11101110+11111111+test_pmtu_ipv4_dscp_udp_exception() {11121112+ rt_table=10011131113+11141114+ if ! which socat > /dev/null 2>&1; then11151115+ echo "'socat' command not found; skipping tests"11161116+ return $ksft_skip11171117+ fi11181118+11191119+ setup namespaces policy_routing || return $ksft_skip11201120+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \11211121+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \11221122+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \11231123+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R211241124+11251125+ # Set up initial MTU values11261126+ mtu "${ns_a}" veth_A-R1 200011271127+ mtu "${ns_r1}" veth_R1-A 200011281128+ mtu "${ns_r1}" veth_R1-B 140011291129+ mtu "${ns_b}" veth_B-R1 140011301130+11311131+ mtu "${ns_a}" veth_A-R2 200011321132+ mtu "${ns_r2}" veth_R2-A 200011331133+ mtu "${ns_r2}" veth_R2-B 150011341134+ mtu "${ns_b}" veth_B-R2 150011351135+11361136+ len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R111371137+11381138+ dst1="${prefix4}.${b_r1}.1"11391139+ dst2="${prefix4}.${b_r2}.1"11401140+11411141+ # Create route exceptions11421142+ run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=111431143+ socat_pids="${socat_pids} $!"11441144+11451145+ dsfield=${policy_mark} # No ECN bit set (Not-ECT)11461146+ run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \11471147+ UDP:"${dst1}":50000,tos="${dsfield}"11481148+11491149+ dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))11501150+ run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \11511151+ UDP:"${dst2}":50000,tos="${dsfield}"11521152+11531153+ # Check that exceptions have been created with the correct PMTU11541154+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"11551155+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 111561156+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"11571157+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 111131158}1114115911151160test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {