Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: tun: track dropped skb via kfree_skb_reason()

The TUN can be used as vhost-net backend. E.g, the tun_net_xmit() is the
interface to forward the skb from TUN to vhost-net/virtio-net.

However, there are many "goto drop" in the TUN driver. Therefore, the
kfree_skb_reason() is involved at each "goto drop" to help userspace
ftrace/ebpf to track the reason for the loss of packets.

The below reasons are introduced:

- SKB_DROP_REASON_DEV_READY
- SKB_DROP_REASON_NOMEM
- SKB_DROP_REASON_HDR_TRUNC
- SKB_DROP_REASON_TAP_FILTER
- SKB_DROP_REASON_TAP_TXFILTER

Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Dongli Zhang and committed by
David S. Miller
4b4f052e 45a15d89

+51 -9
+28 -9
drivers/net/tun.c
··· 1058 1058 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) 1059 1059 { 1060 1060 struct tun_struct *tun = netdev_priv(dev); 1061 + enum skb_drop_reason drop_reason; 1061 1062 int txq = skb->queue_mapping; 1062 1063 struct netdev_queue *queue; 1063 1064 struct tun_file *tfile; ··· 1068 1067 tfile = rcu_dereference(tun->tfiles[txq]); 1069 1068 1070 1069 /* Drop packet if interface is not attached */ 1071 - if (!tfile) 1070 + if (!tfile) { 1071 + drop_reason = SKB_DROP_REASON_DEV_READY; 1072 1072 goto drop; 1073 + } 1073 1074 1074 1075 if (!rcu_dereference(tun->steering_prog)) 1075 1076 tun_automq_xmit(tun, skb); ··· 1081 1078 /* Drop if the filter does not like it. 1082 1079 * This is a noop if the filter is disabled. 1083 1080 * Filter can be enabled only for the TAP devices. */ 1084 - if (!check_filter(&tun->txflt, skb)) 1081 + if (!check_filter(&tun->txflt, skb)) { 1082 + drop_reason = SKB_DROP_REASON_TAP_TXFILTER; 1085 1083 goto drop; 1084 + } 1086 1085 1087 1086 if (tfile->socket.sk->sk_filter && 1088 - sk_filter(tfile->socket.sk, skb)) 1087 + sk_filter(tfile->socket.sk, skb)) { 1088 + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1089 1089 goto drop; 1090 + } 1090 1091 1091 1092 len = run_ebpf_filter(tun, skb, len); 1092 - if (len == 0) 1093 + if (len == 0) { 1094 + drop_reason = SKB_DROP_REASON_TAP_FILTER; 1093 1095 goto drop; 1096 + } 1094 1097 1095 - if (pskb_trim(skb, len)) 1098 + if (pskb_trim(skb, len)) { 1099 + drop_reason = SKB_DROP_REASON_NOMEM; 1096 1100 goto drop; 1101 + } 1097 1102 1098 - if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) 1103 + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { 1104 + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; 1099 1105 goto drop; 1106 + } 1100 1107 1101 1108 skb_tx_timestamp(skb); 1102 1109 ··· 1117 1104 1118 1105 nf_reset_ct(skb); 1119 1106 1120 - if (ptr_ring_produce(&tfile->tx_ring, skb)) 1107 + if (ptr_ring_produce(&tfile->tx_ring, skb)) { 1108 + drop_reason = SKB_DROP_REASON_FULL_RING; 1121 1109 goto drop; 1110 + } 1122 1111 1123 1112 /* NETIF_F_LLTX requires to do our own update of trans_start */ 1124 1113 queue = netdev_get_tx_queue(dev, txq); ··· 1137 1122 drop: 1138 1123 atomic_long_inc(&dev->tx_dropped); 1139 1124 skb_tx_error(skb); 1140 - kfree_skb(skb); 1125 + kfree_skb_reason(skb, drop_reason); 1141 1126 rcu_read_unlock(); 1142 1127 return NET_XMIT_DROP; 1143 1128 } ··· 1735 1720 u32 rxhash = 0; 1736 1721 int skb_xdp = 1; 1737 1722 bool frags = tun_napi_frags_enabled(tfile); 1723 + enum skb_drop_reason drop_reason; 1738 1724 1739 1725 if (!(tun->flags & IFF_NO_PI)) { 1740 1726 if (len < sizeof(pi)) ··· 1839 1823 1840 1824 if (err) { 1841 1825 err = -EFAULT; 1826 + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; 1842 1827 drop: 1843 1828 atomic_long_inc(&tun->dev->rx_dropped); 1844 - kfree_skb(skb); 1829 + kfree_skb_reason(skb, drop_reason); 1845 1830 if (frags) { 1846 1831 tfile->napi.skb = NULL; 1847 1832 mutex_unlock(&tfile->napi_mutex); ··· 1889 1872 case IFF_TAP: 1890 1873 if (frags && !pskb_may_pull(skb, ETH_HLEN)) { 1891 1874 err = -ENOMEM; 1875 + drop_reason = SKB_DROP_REASON_HDR_TRUNC; 1892 1876 goto drop; 1893 1877 } 1894 1878 skb->protocol = eth_type_trans(skb, tun->dev); ··· 1943 1925 if (unlikely(!(tun->dev->flags & IFF_UP))) { 1944 1926 err = -EIO; 1945 1927 rcu_read_unlock(); 1928 + drop_reason = SKB_DROP_REASON_DEV_READY; 1946 1929 goto drop; 1947 1930 } 1948 1931
+18
include/linux/skbuff.h
··· 424 424 SKB_DROP_REASON_DEV_HDR, /* device driver specific 425 425 * header/metadata is invalid 426 426 */ 427 + /* the device is not ready to xmit/recv due to any of its data 428 + * structure that is not up/ready/initialized, e.g., the IFF_UP is 429 + * not set, or driver specific tun->tfiles[txq] is not initialized 430 + */ 431 + SKB_DROP_REASON_DEV_READY, 427 432 SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ 433 + SKB_DROP_REASON_NOMEM, /* error due to OOM */ 434 + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header 435 + * from networking data, e.g., failed 436 + * to pull the protocol header from 437 + * frags via pskb_may_pull() 438 + */ 439 + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly 440 + * attached to tun/tap, e.g., via 441 + * TUNSETFILTEREBPF 442 + */ 443 + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented 444 + * at tun/tap, e.g., check_filter() 445 + */ 428 446 SKB_DROP_REASON_MAX, 429 447 }; 430 448
+5
include/trace/events/skb.h
··· 55 55 EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ 56 56 EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ 57 57 EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ 58 + EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ 58 59 EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ 60 + EM(SKB_DROP_REASON_NOMEM, NOMEM) \ 61 + EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ 62 + EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ 63 + EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ 59 64 EMe(SKB_DROP_REASON_MAX, MAX) 60 65 61 66 #undef EM