Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dropmon: add ability to detect when hardware dropsrxpackets

Patch to add the ability to detect drops in hardware interfaces via dropwatch.
Adds a tracepoint to net_rx_action to signal everytime a napi instance is
polled. The dropmon code then periodically checks to see if the rx_frames
counter has changed, and if so, adds a drop notification to the netlink
protocol, using the reserved all-0's vector to indicate the drop location was in
hardware, rather than somewhere in the code.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

include/linux/net_dropmon.h | 8 ++
include/trace/napi.h | 11 +++
net/core/dev.c | 5 +
net/core/drop_monitor.c | 124 ++++++++++++++++++++++++++++++++++++++++++--
net/core/net-traces.c | 4 +
net/core/netpoll.c | 2
6 files changed, 149 insertions(+), 5 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Neil Horman and committed by
David S. Miller
4ea7e386 d95ed927

+149 -5
+8
include/linux/net_dropmon.h
··· 2 2 #define __NET_DROPMON_H 3 3 4 4 #include <linux/netlink.h> 5 + #include <linux/types.h> 5 6 6 7 struct net_dm_drop_point { 7 8 __u8 pc[8]; 8 9 __u32 count; 9 10 }; 11 + 12 + #define is_drop_point_hw(x) do {\ 13 + int ____i, ____j;\ 14 + for (____i = 0; ____i < 8; i ____i++)\ 15 + ____j |= x[____i];\ 16 + ____j;\ 17 + } while (0) 10 18 11 19 #define NET_DM_CFG_VERSION 0 12 20 #define NET_DM_CFG_ALERT_COUNT 1
+11
include/trace/napi.h
··· 1 + #ifndef _TRACE_NAPI_H_ 2 + #define _TRACE_NAPI_H_ 3 + 4 + #include <linux/netdevice.h> 5 + #include <linux/tracepoint.h> 6 + 7 + DECLARE_TRACE(napi_poll, 8 + TP_PROTO(struct napi_struct *napi), 9 + TP_ARGS(napi)); 10 + 11 + #endif
+4 -1
net/core/dev.c
··· 126 126 #include <linux/in.h> 127 127 #include <linux/jhash.h> 128 128 #include <linux/random.h> 129 + #include <trace/napi.h> 129 130 130 131 #include "net-sysfs.h" 131 132 ··· 2772 2771 * accidently calling ->poll() when NAPI is not scheduled. 2773 2772 */ 2774 2773 work = 0; 2775 - if (test_bit(NAPI_STATE_SCHED, &n->state)) 2774 + if (test_bit(NAPI_STATE_SCHED, &n->state)) { 2776 2775 work = n->poll(n, weight); 2776 + trace_napi_poll(n); 2777 + } 2777 2778 2778 2779 WARN_ON_ONCE(work > weight); 2779 2780
+120 -4
net/core/drop_monitor.c
··· 22 22 #include <linux/timer.h> 23 23 #include <linux/bitops.h> 24 24 #include <net/genetlink.h> 25 + #include <net/netevent.h> 25 26 26 27 #include <trace/skb.h> 28 + #include <trace/napi.h> 27 29 28 30 #include <asm/unaligned.h> 29 31 ··· 40 38 * and the work handle that will send up 41 39 * netlink alerts 42 40 */ 43 - struct sock *dm_sock; 41 + static int trace_state = TRACE_OFF; 42 + static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; 44 43 45 44 struct per_cpu_dm_data { 46 45 struct work_struct dm_alert_work; 47 46 struct sk_buff *skb; 48 47 atomic_t dm_hit_count; 49 48 struct timer_list send_timer; 49 + }; 50 + 51 + struct dm_hw_stat_delta { 52 + struct net_device *dev; 53 + struct list_head list; 54 + struct rcu_head rcu; 55 + unsigned long last_drop_val; 50 56 }; 51 57 52 58 static struct genl_family net_drop_monitor_family = { ··· 69 59 70 60 static int dm_hit_limit = 64; 71 61 static int dm_delay = 1; 72 - 62 + static unsigned long dm_hw_check_delta = 2*HZ; 63 + static LIST_HEAD(hw_stats_list); 73 64 74 65 static void reset_per_cpu_data(struct per_cpu_dm_data *data) 75 66 { ··· 126 115 schedule_work(&data->dm_alert_work); 127 116 } 128 117 129 - static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 118 + static void trace_drop_common(struct sk_buff *skb, void *location) 130 119 { 131 120 struct net_dm_alert_msg *msg; 132 121 struct nlmsghdr *nlh; ··· 170 159 return; 171 160 } 172 161 162 + static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 163 + { 164 + trace_drop_common(skb, location); 165 + } 166 + 167 + static void trace_napi_poll_hit(struct napi_struct *napi) 168 + { 169 + struct dm_hw_stat_delta *new_stat; 170 + 171 + /* 172 + * Ratelimit our check time to dm_hw_check_delta jiffies 173 + */ 174 + if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) 175 + return; 176 + 177 + rcu_read_lock(); 178 + list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { 179 + if ((new_stat->dev == napi->dev) && 180 + (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { 181 + trace_drop_common(NULL, NULL); 182 + new_stat->last_drop_val = napi->dev->stats.rx_dropped; 183 + break; 184 + } 185 + } 186 + rcu_read_unlock(); 187 + } 188 + 189 + 190 + static void free_dm_hw_stat(struct rcu_head *head) 191 + { 192 + struct dm_hw_stat_delta *n; 193 + n = container_of(head, struct dm_hw_stat_delta, rcu); 194 + kfree(n); 195 + } 196 + 173 197 static int set_all_monitor_traces(int state) 174 198 { 175 199 int rc = 0; 200 + struct dm_hw_stat_delta *new_stat = NULL; 201 + struct dm_hw_stat_delta *temp; 202 + 203 + spin_lock(&trace_state_lock); 176 204 177 205 switch (state) { 178 206 case TRACE_ON: 179 207 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 208 + rc |= register_trace_napi_poll(trace_napi_poll_hit); 180 209 break; 181 210 case TRACE_OFF: 182 211 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 212 + rc |= unregister_trace_napi_poll(trace_napi_poll_hit); 183 213 184 214 tracepoint_synchronize_unregister(); 215 + 216 + /* 217 + * Clean the device list 218 + */ 219 + list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 220 + if (new_stat->dev == NULL) { 221 + list_del_rcu(&new_stat->list); 222 + call_rcu(&new_stat->rcu, free_dm_hw_stat); 223 + } 224 + } 185 225 break; 186 226 default: 187 227 rc = 1; 188 228 break; 189 229 } 230 + 231 + if (!rc) 232 + trace_state = state; 233 + 234 + spin_unlock(&trace_state_lock); 190 235 191 236 if (rc) 192 237 return -EINPROGRESS; ··· 271 204 return -ENOTSUPP; 272 205 } 273 206 207 + static int dropmon_net_event(struct notifier_block *ev_block, 208 + unsigned long event, void *ptr) 209 + { 210 + struct net_device *dev = ptr; 211 + struct dm_hw_stat_delta *new_stat = NULL; 212 + struct dm_hw_stat_delta *tmp; 213 + 214 + switch (event) { 215 + case NETDEV_REGISTER: 216 + new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); 217 + 218 + if (!new_stat) 219 + goto out; 220 + 221 + new_stat->dev = dev; 222 + INIT_RCU_HEAD(&new_stat->rcu); 223 + spin_lock(&trace_state_lock); 224 + list_add_rcu(&new_stat->list, &hw_stats_list); 225 + spin_unlock(&trace_state_lock); 226 + break; 227 + case NETDEV_UNREGISTER: 228 + spin_lock(&trace_state_lock); 229 + list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { 230 + if (new_stat->dev == dev) { 231 + new_stat->dev = NULL; 232 + if (trace_state == TRACE_OFF) { 233 + list_del_rcu(&new_stat->list); 234 + call_rcu(&new_stat->rcu, free_dm_hw_stat); 235 + break; 236 + } 237 + } 238 + } 239 + spin_unlock(&trace_state_lock); 240 + break; 241 + } 242 + out: 243 + return NOTIFY_DONE; 244 + } 274 245 275 246 static struct genl_ops dropmon_ops[] = { 276 247 { ··· 323 218 .cmd = NET_DM_CMD_STOP, 324 219 .doit = net_dm_cmd_trace, 325 220 }, 221 + }; 222 + 223 + static struct notifier_block dropmon_net_notifier = { 224 + .notifier_call = dropmon_net_event 326 225 }; 327 226 328 227 static int __init init_net_drop_monitor(void) ··· 352 243 ret = genl_register_ops(&net_drop_monitor_family, 353 244 &dropmon_ops[i]); 354 245 if (ret) { 355 - printk(KERN_CRIT "failed to register operation %d\n", 246 + printk(KERN_CRIT "Failed to register operation %d\n", 356 247 dropmon_ops[i].cmd); 357 248 goto out_unreg; 358 249 } 250 + } 251 + 252 + rc = register_netdevice_notifier(&dropmon_net_notifier); 253 + if (rc < 0) { 254 + printk(KERN_CRIT "Failed to register netdevice notifier\n"); 255 + goto out_unreg; 359 256 } 360 257 361 258 rc = 0; ··· 374 259 data->send_timer.data = cpu; 375 260 data->send_timer.function = sched_send_work; 376 261 } 262 + 377 263 goto out; 378 264 379 265 out_unreg:
+4
net/core/net-traces.c
··· 20 20 #include <linux/netlink.h> 21 21 #include <linux/net_dropmon.h> 22 22 #include <trace/skb.h> 23 + #include <trace/napi.h> 23 24 24 25 #include <asm/unaligned.h> 25 26 #include <asm/bitops.h> ··· 28 27 29 28 DEFINE_TRACE(kfree_skb); 30 29 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 30 + 31 + DEFINE_TRACE(napi_poll); 32 + EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+2
net/core/netpoll.c
··· 24 24 #include <net/tcp.h> 25 25 #include <net/udp.h> 26 26 #include <asm/unaligned.h> 27 + #include <trace/napi.h> 27 28 28 29 /* 29 30 * We maintain a small pool of fully-sized skbs, to make sure the ··· 138 137 set_bit(NAPI_STATE_NPSVC, &napi->state); 139 138 140 139 work = napi->poll(napi, budget); 140 + trace_napi_poll(napi->dev); 141 141 142 142 clear_bit(NAPI_STATE_NPSVC, &napi->state); 143 143 atomic_dec(&trapped);