at v2.6.31-rc3 386 lines 8.7 kB view raw
1/* 2 * Monitoring code for network dropped packet alerts 3 * 4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> 5 */ 6 7#include <linux/netdevice.h> 8#include <linux/etherdevice.h> 9#include <linux/string.h> 10#include <linux/if_arp.h> 11#include <linux/inetdevice.h> 12#include <linux/inet.h> 13#include <linux/interrupt.h> 14#include <linux/netpoll.h> 15#include <linux/sched.h> 16#include <linux/delay.h> 17#include <linux/types.h> 18#include <linux/workqueue.h> 19#include <linux/netlink.h> 20#include <linux/net_dropmon.h> 21#include <linux/percpu.h> 22#include <linux/timer.h> 23#include <linux/bitops.h> 24#include <net/genetlink.h> 25#include <net/netevent.h> 26 27#include <trace/events/skb.h> 28#include <trace/events/napi.h> 29 30#include <asm/unaligned.h> 31 32#define TRACE_ON 1 33#define TRACE_OFF 0 34 35static void send_dm_alert(struct work_struct *unused); 36 37 38/* 39 * Globals, our netlink socket pointer 40 * and the work handle that will send up 41 * netlink alerts 42 */ 43static int trace_state = TRACE_OFF; 44static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; 45 46struct per_cpu_dm_data { 47 struct work_struct dm_alert_work; 48 struct sk_buff *skb; 49 atomic_t dm_hit_count; 50 struct timer_list send_timer; 51}; 52 53struct dm_hw_stat_delta { 54 struct net_device *dev; 55 struct list_head list; 56 struct rcu_head rcu; 57 unsigned long last_drop_val; 58}; 59 60static struct genl_family net_drop_monitor_family = { 61 .id = GENL_ID_GENERATE, 62 .hdrsize = 0, 63 .name = "NET_DM", 64 .version = 2, 65 .maxattr = NET_DM_CMD_MAX, 66}; 67 68static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 69 70static int dm_hit_limit = 64; 71static int dm_delay = 1; 72static unsigned long dm_hw_check_delta = 2*HZ; 73static LIST_HEAD(hw_stats_list); 74 75static void reset_per_cpu_data(struct per_cpu_dm_data *data) 76{ 77 size_t al; 78 struct net_dm_alert_msg *msg; 79 struct nlattr *nla; 80 81 al = sizeof(struct net_dm_alert_msg); 82 al += dm_hit_limit * sizeof(struct net_dm_drop_point); 83 al += sizeof(struct nlattr); 84 85 data->skb = genlmsg_new(al, GFP_KERNEL); 86 genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, 87 0, NET_DM_CMD_ALERT); 88 nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); 89 msg = nla_data(nla); 90 memset(msg, 0, al); 91 atomic_set(&data->dm_hit_count, dm_hit_limit); 92} 93 94static void send_dm_alert(struct work_struct *unused) 95{ 96 struct sk_buff *skb; 97 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 98 99 /* 100 * Grab the skb we're about to send 101 */ 102 skb = data->skb; 103 104 /* 105 * Replace it with a new one 106 */ 107 reset_per_cpu_data(data); 108 109 /* 110 * Ship it! 111 */ 112 genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); 113 114} 115 116/* 117 * This is the timer function to delay the sending of an alert 118 * in the event that more drops will arrive during the 119 * hysteresis period. Note that it operates under the timer interrupt 120 * so we don't need to disable preemption here 121 */ 122static void sched_send_work(unsigned long unused) 123{ 124 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 125 126 schedule_work(&data->dm_alert_work); 127} 128 129static void trace_drop_common(struct sk_buff *skb, void *location) 130{ 131 struct net_dm_alert_msg *msg; 132 struct nlmsghdr *nlh; 133 struct nlattr *nla; 134 int i; 135 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 136 137 138 if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { 139 /* 140 * we're already at zero, discard this hit 141 */ 142 goto out; 143 } 144 145 nlh = (struct nlmsghdr *)data->skb->data; 146 nla = genlmsg_data(nlmsg_data(nlh)); 147 msg = nla_data(nla); 148 for (i = 0; i < msg->entries; i++) { 149 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { 150 msg->points[i].count++; 151 goto out; 152 } 153 } 154 155 /* 156 * We need to create a new entry 157 */ 158 __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); 159 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); 160 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); 161 msg->points[msg->entries].count = 1; 162 msg->entries++; 163 164 if (!timer_pending(&data->send_timer)) { 165 data->send_timer.expires = jiffies + dm_delay * HZ; 166 add_timer_on(&data->send_timer, smp_processor_id()); 167 } 168 169out: 170 return; 171} 172 173static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 174{ 175 trace_drop_common(skb, location); 176} 177 178static void trace_napi_poll_hit(struct napi_struct *napi) 179{ 180 struct dm_hw_stat_delta *new_stat; 181 182 /* 183 * Ratelimit our check time to dm_hw_check_delta jiffies 184 */ 185 if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) 186 return; 187 188 rcu_read_lock(); 189 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { 190 if ((new_stat->dev == napi->dev) && 191 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { 192 trace_drop_common(NULL, NULL); 193 new_stat->last_drop_val = napi->dev->stats.rx_dropped; 194 break; 195 } 196 } 197 rcu_read_unlock(); 198} 199 200 201static void free_dm_hw_stat(struct rcu_head *head) 202{ 203 struct dm_hw_stat_delta *n; 204 n = container_of(head, struct dm_hw_stat_delta, rcu); 205 kfree(n); 206} 207 208static int set_all_monitor_traces(int state) 209{ 210 int rc = 0; 211 struct dm_hw_stat_delta *new_stat = NULL; 212 struct dm_hw_stat_delta *temp; 213 214 spin_lock(&trace_state_lock); 215 216 switch (state) { 217 case TRACE_ON: 218 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 219 rc |= register_trace_napi_poll(trace_napi_poll_hit); 220 break; 221 case TRACE_OFF: 222 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 223 rc |= unregister_trace_napi_poll(trace_napi_poll_hit); 224 225 tracepoint_synchronize_unregister(); 226 227 /* 228 * Clean the device list 229 */ 230 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 231 if (new_stat->dev == NULL) { 232 list_del_rcu(&new_stat->list); 233 call_rcu(&new_stat->rcu, free_dm_hw_stat); 234 } 235 } 236 break; 237 default: 238 rc = 1; 239 break; 240 } 241 242 if (!rc) 243 trace_state = state; 244 245 spin_unlock(&trace_state_lock); 246 247 if (rc) 248 return -EINPROGRESS; 249 return rc; 250} 251 252 253static int net_dm_cmd_config(struct sk_buff *skb, 254 struct genl_info *info) 255{ 256 return -ENOTSUPP; 257} 258 259static int net_dm_cmd_trace(struct sk_buff *skb, 260 struct genl_info *info) 261{ 262 switch (info->genlhdr->cmd) { 263 case NET_DM_CMD_START: 264 return set_all_monitor_traces(TRACE_ON); 265 break; 266 case NET_DM_CMD_STOP: 267 return set_all_monitor_traces(TRACE_OFF); 268 break; 269 } 270 271 return -ENOTSUPP; 272} 273 274static int dropmon_net_event(struct notifier_block *ev_block, 275 unsigned long event, void *ptr) 276{ 277 struct net_device *dev = ptr; 278 struct dm_hw_stat_delta *new_stat = NULL; 279 struct dm_hw_stat_delta *tmp; 280 281 switch (event) { 282 case NETDEV_REGISTER: 283 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); 284 285 if (!new_stat) 286 goto out; 287 288 new_stat->dev = dev; 289 INIT_RCU_HEAD(&new_stat->rcu); 290 spin_lock(&trace_state_lock); 291 list_add_rcu(&new_stat->list, &hw_stats_list); 292 spin_unlock(&trace_state_lock); 293 break; 294 case NETDEV_UNREGISTER: 295 spin_lock(&trace_state_lock); 296 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { 297 if (new_stat->dev == dev) { 298 new_stat->dev = NULL; 299 if (trace_state == TRACE_OFF) { 300 list_del_rcu(&new_stat->list); 301 call_rcu(&new_stat->rcu, free_dm_hw_stat); 302 break; 303 } 304 } 305 } 306 spin_unlock(&trace_state_lock); 307 break; 308 } 309out: 310 return NOTIFY_DONE; 311} 312 313static struct genl_ops dropmon_ops[] = { 314 { 315 .cmd = NET_DM_CMD_CONFIG, 316 .doit = net_dm_cmd_config, 317 }, 318 { 319 .cmd = NET_DM_CMD_START, 320 .doit = net_dm_cmd_trace, 321 }, 322 { 323 .cmd = NET_DM_CMD_STOP, 324 .doit = net_dm_cmd_trace, 325 }, 326}; 327 328static struct notifier_block dropmon_net_notifier = { 329 .notifier_call = dropmon_net_event 330}; 331 332static int __init init_net_drop_monitor(void) 333{ 334 int cpu; 335 int rc, i, ret; 336 struct per_cpu_dm_data *data; 337 printk(KERN_INFO "Initalizing network drop monitor service\n"); 338 339 if (sizeof(void *) > 8) { 340 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); 341 return -ENOSPC; 342 } 343 344 if (genl_register_family(&net_drop_monitor_family) < 0) { 345 printk(KERN_ERR "Could not create drop monitor netlink family\n"); 346 return -EFAULT; 347 } 348 349 rc = -EFAULT; 350 351 for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { 352 ret = genl_register_ops(&net_drop_monitor_family, 353 &dropmon_ops[i]); 354 if (ret) { 355 printk(KERN_CRIT "Failed to register operation %d\n", 356 dropmon_ops[i].cmd); 357 goto out_unreg; 358 } 359 } 360 361 rc = register_netdevice_notifier(&dropmon_net_notifier); 362 if (rc < 0) { 363 printk(KERN_CRIT "Failed to register netdevice notifier\n"); 364 goto out_unreg; 365 } 366 367 rc = 0; 368 369 for_each_present_cpu(cpu) { 370 data = &per_cpu(dm_cpu_data, cpu); 371 reset_per_cpu_data(data); 372 INIT_WORK(&data->dm_alert_work, send_dm_alert); 373 init_timer(&data->send_timer); 374 data->send_timer.data = cpu; 375 data->send_timer.function = sched_send_work; 376 } 377 378 goto out; 379 380out_unreg: 381 genl_unregister_family(&net_drop_monitor_family); 382out: 383 return rc; 384} 385 386late_initcall(init_net_drop_monitor);