Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: Rename mono_delivery_time to tstamp_type for scalabilty

mono_delivery_time was added to check if skb->tstamp has delivery
time in mono clock base (i.e. EDT) otherwise skb->tstamp has
timestamp in ingress and delivery_time at egress.

Renaming the bitfield from mono_delivery_time to tstamp_type is for
extensibilty for other timestamps such as userspace timestamp
(i.e. SO_TXTIME) set via sock opts.

As we are renaming the mono_delivery_time to tstamp_type, it makes
sense to start assigning tstamp_type based on enum defined
in this commit.

Earlier we used bool arg flag to check if the tstamp is mono in
function skb_set_delivery_time, Now the signature of the functions
accepts tstamp_type to distinguish between mono and real time.

Also skb_set_delivery_type_by_clockid is a new function which accepts
clockid to determine the tstamp_type.

In future tstamp_type:1 can be extended to support userspace timestamp
by increasing the bitfield.

Signed-off-by: Abhishek Chauhan <quic_abchauha@quicinc.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>

authored by

Abhishek Chauhan and committed by
Martin KaFai Lau
4d25ca2d a87f34e7

+78 -51
+39 -13
include/linux/skbuff.h
··· 706 706 typedef unsigned char *sk_buff_data_t; 707 707 #endif 708 708 709 + enum skb_tstamp_type { 710 + SKB_CLOCK_REALTIME, 711 + SKB_CLOCK_MONOTONIC, 712 + }; 713 + 709 714 /** 710 715 * DOC: Basic sk_buff geometry 711 716 * ··· 828 823 * @dst_pending_confirm: need to confirm neighbour 829 824 * @decrypted: Decrypted SKB 830 825 * @slow_gro: state present at GRO time, slower prepare step required 831 - * @mono_delivery_time: When set, skb->tstamp has the 832 - * delivery_time in mono clock base (i.e. EDT). Otherwise, the 833 - * skb->tstamp has the (rcv) timestamp at ingress and 834 - * delivery_time at egress. 826 + * @tstamp_type: When set, skb->tstamp has the 827 + * delivery_time clock base of skb->tstamp. 835 828 * @napi_id: id of the NAPI struct this skb came from 836 829 * @sender_cpu: (aka @napi_id) source CPU in XPS 837 830 * @alloc_cpu: CPU which did the skb allocation. ··· 957 954 /* private: */ 958 955 __u8 __mono_tc_offset[0]; 959 956 /* public: */ 960 - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ 957 + __u8 tstamp_type:1; /* See skb_tstamp_type */ 961 958 #ifdef CONFIG_NET_XGRESS 962 959 __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ 963 960 __u8 tc_skip_classify:1; ··· 4186 4183 static inline void __net_timestamp(struct sk_buff *skb) 4187 4184 { 4188 4185 skb->tstamp = ktime_get_real(); 4189 - skb->mono_delivery_time = 0; 4186 + skb->tstamp_type = SKB_CLOCK_REALTIME; 4190 4187 } 4191 4188 4192 4189 static inline ktime_t net_timedelta(ktime_t t) ··· 4195 4192 } 4196 4193 4197 4194 static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, 4198 - bool mono) 4195 + u8 tstamp_type) 4199 4196 { 4200 4197 skb->tstamp = kt; 4201 - skb->mono_delivery_time = kt && mono; 4198 + 4199 + if (kt) 4200 + skb->tstamp_type = tstamp_type; 4201 + else 4202 + skb->tstamp_type = SKB_CLOCK_REALTIME; 4203 + } 4204 + 4205 + static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, 4206 + ktime_t kt, clockid_t clockid) 4207 + { 4208 + u8 tstamp_type = SKB_CLOCK_REALTIME; 4209 + 4210 + switch (clockid) { 4211 + case CLOCK_REALTIME: 4212 + break; 4213 + case CLOCK_MONOTONIC: 4214 + tstamp_type = SKB_CLOCK_MONOTONIC; 4215 + break; 4216 + default: 4217 + WARN_ON_ONCE(1); 4218 + kt = 0; 4219 + } 4220 + 4221 + skb_set_delivery_time(skb, kt, tstamp_type); 4202 4222 } 4203 4223 4204 4224 DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); ··· 4231 4205 */ 4232 4206 static inline void skb_clear_delivery_time(struct sk_buff *skb) 4233 4207 { 4234 - if (skb->mono_delivery_time) { 4235 - skb->mono_delivery_time = 0; 4208 + if (skb->tstamp_type) { 4209 + skb->tstamp_type = SKB_CLOCK_REALTIME; 4236 4210 if (static_branch_unlikely(&netstamp_needed_key)) 4237 4211 skb->tstamp = ktime_get_real(); 4238 4212 else ··· 4242 4216 4243 4217 static inline void skb_clear_tstamp(struct sk_buff *skb) 4244 4218 { 4245 - if (skb->mono_delivery_time) 4219 + if (skb->tstamp_type) 4246 4220 return; 4247 4221 4248 4222 skb->tstamp = 0; ··· 4250 4224 4251 4225 static inline ktime_t skb_tstamp(const struct sk_buff *skb) 4252 4226 { 4253 - if (skb->mono_delivery_time) 4227 + if (skb->tstamp_type) 4254 4228 return 0; 4255 4229 4256 4230 return skb->tstamp; ··· 4258 4232 4259 4233 static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) 4260 4234 { 4261 - if (!skb->mono_delivery_time && skb->tstamp) 4235 + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) 4262 4236 return skb->tstamp; 4263 4237 4264 4238 if (static_branch_unlikely(&netstamp_needed_key) || cond)
+2 -2
include/net/inet_frag.h
··· 76 76 * @stamp: timestamp of the last received fragment 77 77 * @len: total length of the original datagram 78 78 * @meat: length of received fragments so far 79 - * @mono_delivery_time: stamp has a mono delivery time (EDT) 79 + * @tstamp_type: stamp has a mono delivery time (EDT) 80 80 * @flags: fragment queue flags 81 81 * @max_size: maximum received fragment size 82 82 * @fqdir: pointer to struct fqdir ··· 97 97 ktime_t stamp; 98 98 int len; 99 99 int meat; 100 - u8 mono_delivery_time; 100 + u8 tstamp_type; 101 101 __u8 flags; 102 102 u16 max_size; 103 103 struct fqdir *fqdir;
+3 -3
net/bridge/netfilter/nf_conntrack_bridge.c
··· 32 32 struct sk_buff *)) 33 33 { 34 34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 35 - bool mono_delivery_time = skb->mono_delivery_time; 35 + u8 tstamp_type = skb->tstamp_type; 36 36 unsigned int hlen, ll_rs, mtu; 37 37 ktime_t tstamp = skb->tstamp; 38 38 struct ip_frag_state state; ··· 82 82 if (iter.frag) 83 83 ip_fraglist_prepare(skb, &iter); 84 84 85 - skb_set_delivery_time(skb, tstamp, mono_delivery_time); 85 + skb_set_delivery_time(skb, tstamp, tstamp_type); 86 86 err = output(net, sk, data, skb); 87 87 if (err || !iter.frag) 88 88 break; ··· 113 113 goto blackhole; 114 114 } 115 115 116 - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); 116 + skb_set_delivery_time(skb2, tstamp, tstamp_type); 117 117 err = output(net, sk, data, skb2); 118 118 if (err) 119 119 goto blackhole;
+1 -1
net/core/dev.c
··· 2160 2160 static inline void net_timestamp_set(struct sk_buff *skb) 2161 2161 { 2162 2162 skb->tstamp = 0; 2163 - skb->mono_delivery_time = 0; 2163 + skb->tstamp_type = SKB_CLOCK_REALTIME; 2164 2164 if (static_branch_unlikely(&netstamp_needed_key)) 2165 2165 skb->tstamp = ktime_get_real(); 2166 2166 }
+5 -5
net/core/filter.c
··· 7730 7730 if (!tstamp) 7731 7731 return -EINVAL; 7732 7732 skb->tstamp = tstamp; 7733 - skb->mono_delivery_time = 1; 7733 + skb->tstamp_type = SKB_CLOCK_MONOTONIC; 7734 7734 break; 7735 7735 case BPF_SKB_TSTAMP_UNSPEC: 7736 7736 if (tstamp) 7737 7737 return -EINVAL; 7738 7738 skb->tstamp = 0; 7739 - skb->mono_delivery_time = 0; 7739 + skb->tstamp_type = SKB_CLOCK_REALTIME; 7740 7740 break; 7741 7741 default: 7742 7742 return -EINVAL; ··· 9443 9443 TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); 9444 9444 *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, 9445 9445 TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); 9446 - /* skb->tc_at_ingress && skb->mono_delivery_time, 9446 + /* skb->tc_at_ingress && skb->tstamp_type, 9447 9447 * read 0 as the (rcv) timestamp. 9448 9448 */ 9449 9449 *insn++ = BPF_MOV64_IMM(value_reg, 0); ··· 9468 9468 * the bpf prog is aware the tstamp could have delivery time. 9469 9469 * Thus, write skb->tstamp as is if tstamp_type_access is true. 9470 9470 * Otherwise, writing at ingress will have to clear the 9471 - * mono_delivery_time bit also. 9471 + * skb->tstamp_type bit also. 9472 9472 */ 9473 9473 if (!prog->tstamp_type_access) { 9474 9474 __u8 tmp_reg = BPF_REG_AX; ··· 9478 9478 *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); 9479 9479 /* goto <store> */ 9480 9480 *insn++ = BPF_JMP_A(2); 9481 - /* <clear>: mono_delivery_time */ 9481 + /* <clear>: skb->tstamp_type */ 9482 9482 *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); 9483 9483 *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); 9484 9484 }
+1 -1
net/ieee802154/6lowpan/reassembly.c
··· 130 130 goto err; 131 131 132 132 fq->q.stamp = skb->tstamp; 133 - fq->q.mono_delivery_time = skb->mono_delivery_time; 133 + fq->q.tstamp_type = skb->tstamp_type; 134 134 if (frag_type == LOWPAN_DISPATCH_FRAG1) 135 135 fq->q.flags |= INET_FRAG_FIRST_IN; 136 136
+1 -1
net/ipv4/inet_fragment.c
··· 619 619 skb_mark_not_on_list(head); 620 620 head->prev = NULL; 621 621 head->tstamp = q->stamp; 622 - head->mono_delivery_time = q->mono_delivery_time; 622 + head->tstamp_type = q->tstamp_type; 623 623 624 624 if (sk) 625 625 refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
+1 -1
net/ipv4/ip_fragment.c
··· 355 355 qp->iif = dev->ifindex; 356 356 357 357 qp->q.stamp = skb->tstamp; 358 - qp->q.mono_delivery_time = skb->mono_delivery_time; 358 + qp->q.tstamp_type = skb->tstamp_type; 359 359 qp->q.meat += skb->len; 360 360 qp->ecn |= ecn; 361 361 add_frag_mem_limit(qp->q.fqdir, skb->truesize);
+5 -4
net/ipv4/ip_output.c
··· 764 764 { 765 765 struct iphdr *iph; 766 766 struct sk_buff *skb2; 767 - bool mono_delivery_time = skb->mono_delivery_time; 767 + u8 tstamp_type = skb->tstamp_type; 768 768 struct rtable *rt = skb_rtable(skb); 769 769 unsigned int mtu, hlen, ll_rs; 770 770 struct ip_fraglist_iter iter; ··· 856 856 } 857 857 } 858 858 859 - skb_set_delivery_time(skb, tstamp, mono_delivery_time); 859 + skb_set_delivery_time(skb, tstamp, tstamp_type); 860 860 err = output(net, sk, skb); 861 861 862 862 if (!err) ··· 912 912 /* 913 913 * Put this fragment into the sending queue. 914 914 */ 915 - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); 915 + skb_set_delivery_time(skb2, tstamp, tstamp_type); 916 916 err = output(net, sk, skb2); 917 917 if (err) 918 918 goto fail; ··· 1649 1649 arg->csumoffset) = csum_fold(csum_add(nskb->csum, 1650 1650 arg->csum)); 1651 1651 nskb->ip_summed = CHECKSUM_NONE; 1652 - nskb->mono_delivery_time = !!transmit_time; 1652 + if (transmit_time) 1653 + nskb->tstamp_type = SKB_CLOCK_MONOTONIC; 1653 1654 if (txhash) 1654 1655 skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); 1655 1656 ip_push_pending_frames(sk, &fl4);
+7 -7
net/ipv4/tcp_output.c
··· 1301 1301 tp = tcp_sk(sk); 1302 1302 prior_wstamp = tp->tcp_wstamp_ns; 1303 1303 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); 1304 - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); 1304 + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); 1305 1305 if (clone_it) { 1306 1306 oskb = skb; 1307 1307 ··· 1655 1655 1656 1656 skb_split(skb, buff, len); 1657 1657 1658 - skb_set_delivery_time(buff, skb->tstamp, true); 1658 + skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC); 1659 1659 tcp_fragment_tstamp(skb, buff); 1660 1660 1661 1661 old_factor = tcp_skb_pcount(skb); ··· 2764 2764 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { 2765 2765 /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ 2766 2766 tp->tcp_wstamp_ns = tp->tcp_clock_cache; 2767 - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); 2767 + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); 2768 2768 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); 2769 2769 tcp_init_tso_segs(skb, mss_now); 2770 2770 goto repair; /* Skip network transmission */ ··· 3752 3752 #ifdef CONFIG_SYN_COOKIES 3753 3753 if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) 3754 3754 skb_set_delivery_time(skb, cookie_init_timestamp(req, now), 3755 - true); 3755 + SKB_CLOCK_MONOTONIC); 3756 3756 else 3757 3757 #endif 3758 3758 { 3759 - skb_set_delivery_time(skb, now, true); 3759 + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); 3760 3760 if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ 3761 3761 tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); 3762 3762 } ··· 3843 3843 bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, 3844 3844 synack_type, &opts); 3845 3845 3846 - skb_set_delivery_time(skb, now, true); 3846 + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); 3847 3847 tcp_add_tx_delay(skb, tp); 3848 3848 3849 3849 return skb; ··· 4027 4027 4028 4028 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); 4029 4029 4030 - skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true); 4030 + skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC); 4031 4031 4032 4032 /* Now full SYN+DATA was cloned and sent (or not), 4033 4033 * remove the SYN from the original skb (syn_data)
+3 -3
net/ipv6/ip6_output.c
··· 859 859 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 860 860 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 861 861 inet6_sk(skb->sk) : NULL; 862 - bool mono_delivery_time = skb->mono_delivery_time; 862 + u8 tstamp_type = skb->tstamp_type; 863 863 struct ip6_frag_state state; 864 864 unsigned int mtu, hlen, nexthdr_offset; 865 865 ktime_t tstamp = skb->tstamp; ··· 955 955 if (iter.frag) 956 956 ip6_fraglist_prepare(skb, &iter); 957 957 958 - skb_set_delivery_time(skb, tstamp, mono_delivery_time); 958 + skb_set_delivery_time(skb, tstamp, tstamp_type); 959 959 err = output(net, sk, skb); 960 960 if (!err) 961 961 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), ··· 1016 1016 /* 1017 1017 * Put this fragment into the sending queue. 1018 1018 */ 1019 - skb_set_delivery_time(frag, tstamp, mono_delivery_time); 1019 + skb_set_delivery_time(frag, tstamp, tstamp_type); 1020 1020 err = output(net, sk, frag); 1021 1021 if (err) 1022 1022 goto fail;
+3 -3
net/ipv6/netfilter.c
··· 126 126 struct sk_buff *)) 127 127 { 128 128 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 129 - bool mono_delivery_time = skb->mono_delivery_time; 129 + u8 tstamp_type = skb->tstamp_type; 130 130 ktime_t tstamp = skb->tstamp; 131 131 struct ip6_frag_state state; 132 132 u8 *prevhdr, nexthdr = 0; ··· 192 192 if (iter.frag) 193 193 ip6_fraglist_prepare(skb, &iter); 194 194 195 - skb_set_delivery_time(skb, tstamp, mono_delivery_time); 195 + skb_set_delivery_time(skb, tstamp, tstamp_type); 196 196 err = output(net, sk, data, skb); 197 197 if (err || !iter.frag) 198 198 break; ··· 225 225 goto blackhole; 226 226 } 227 227 228 - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); 228 + skb_set_delivery_time(skb2, tstamp, tstamp_type); 229 229 err = output(net, sk, data, skb2); 230 230 if (err) 231 231 goto blackhole;
+1 -1
net/ipv6/netfilter/nf_conntrack_reasm.c
··· 263 263 fq->iif = dev->ifindex; 264 264 265 265 fq->q.stamp = skb->tstamp; 266 - fq->q.mono_delivery_time = skb->mono_delivery_time; 266 + fq->q.tstamp_type = skb->tstamp_type; 267 267 fq->q.meat += skb->len; 268 268 fq->ecn |= ecn; 269 269 if (payload_len > fq->q.max_size)
+1 -1
net/ipv6/reassembly.c
··· 198 198 fq->iif = dev->ifindex; 199 199 200 200 fq->q.stamp = skb->tstamp; 201 - fq->q.mono_delivery_time = skb->mono_delivery_time; 201 + fq->q.tstamp_type = skb->tstamp_type; 202 202 fq->q.meat += skb->len; 203 203 fq->ecn |= ecn; 204 204 add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+1 -1
net/ipv6/tcp_ipv6.c
··· 975 975 mark = inet_twsk(sk)->tw_mark; 976 976 else 977 977 mark = READ_ONCE(sk->sk_mark); 978 - skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 978 + skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 979 979 } 980 980 if (txhash) { 981 981 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
+2 -2
net/sched/act_bpf.c
··· 54 54 bpf_compute_data_pointers(skb); 55 55 filter_res = bpf_prog_run(filter, skb); 56 56 } 57 - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) 58 - skb->mono_delivery_time = 0; 57 + if (unlikely(!skb->tstamp && skb->tstamp_type)) 58 + skb->tstamp_type = SKB_CLOCK_REALTIME; 59 59 if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) 60 60 skb_orphan(skb); 61 61
+2 -2
net/sched/cls_bpf.c
··· 104 104 bpf_compute_data_pointers(skb); 105 105 filter_res = bpf_prog_run(prog->filter, skb); 106 106 } 107 - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) 108 - skb->mono_delivery_time = 0; 107 + if (unlikely(!skb->tstamp && skb->tstamp_type)) 108 + skb->tstamp_type = SKB_CLOCK_REALTIME; 109 109 110 110 if (prog->exts_integrated) { 111 111 res->class = 0;