Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

packet: nlmon: virtual netlink monitoring device for packet sockets

Currently, there is no good possibility to debug netlink traffic that
is being exchanged between kernel and user space. Therefore, this patch
implements a netlink virtual device, so that netlink messages will be
made visible to PF_PACKET sockets. Once there was an approach with a
similar idea [1], but it got forgotten somehow.

I think it makes most sense to accept the "overhead" of an extra netlink
net device over implementing the same functionality from PF_PACKET
sockets once again into netlink sockets. We have BPF filters that can
already be easily applied which even have netlink extensions, we have
RX_RING zero-copy between kernel- and user space that can be reused,
and much more features. So instead of re-implementing all of this, we
simply pass the skb to a given PF_PACKET socket for further analysis.

Another nice benefit that comes from that is that no code needs to be
changed in user space packet analyzers (maybe adding a dissector, but
not more), thus out of the box, we can already capture pcap files of
netlink traffic to debug/troubleshoot netlink problems.

Also thanks goes to Thomas Graf, Flavio Leitner, Jesper Dangaard Brouer.

[1] http://marc.info/?l=linux-netdev&m=113813401516110

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Daniel Borkmann and committed by
David S. Miller
e4fc408e bcbde0d4

+181
+10
drivers/net/Kconfig
··· 240 240 This is the virtual network driver for virtio. It can be used with 241 241 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. 242 242 243 + config NLMON 244 + tristate "Virtual netlink monitoring device" 245 + ---help--- 246 + This option enables a monitoring net device for netlink skbs. The 247 + purpose of this is to analyze netlink messages with packet sockets. 248 + Thus applications like tcpdump will be able to see local netlink 249 + messages if they tap into the netlink device, record pcaps for further 250 + diagnostics, etc. This is mostly intended for developers or support 251 + to debug netlink issues. If unsure, say N. 252 + 243 253 endif # NET_CORE 244 254 245 255 config SUNGEM_PHY
+1
drivers/net/Makefile
··· 22 22 obj-$(CONFIG_VETH) += veth.o 23 23 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o 24 24 obj-$(CONFIG_VXLAN) += vxlan.o 25 + obj-$(CONFIG_NLMON) += nlmon.o 25 26 26 27 # 27 28 # Networking Drivers
+170
drivers/net/nlmon.c
··· 1 + #include <linux/module.h> 2 + #include <linux/kernel.h> 3 + #include <linux/netdevice.h> 4 + #include <linux/netlink.h> 5 + #include <net/net_namespace.h> 6 + #include <linux/if_arp.h> 7 + 8 + struct pcpu_lstats { 9 + u64 packets; 10 + u64 bytes; 11 + struct u64_stats_sync syncp; 12 + }; 13 + 14 + static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev) 15 + { 16 + int len = skb->len; 17 + struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats); 18 + 19 + u64_stats_update_begin(&stats->syncp); 20 + stats->bytes += len; 21 + stats->packets++; 22 + u64_stats_update_end(&stats->syncp); 23 + 24 + dev_kfree_skb(skb); 25 + 26 + return NETDEV_TX_OK; 27 + } 28 + 29 + static int nlmon_is_valid_mtu(int new_mtu) 30 + { 31 + return new_mtu >= sizeof(struct nlmsghdr) && new_mtu <= INT_MAX; 32 + } 33 + 34 + static int nlmon_change_mtu(struct net_device *dev, int new_mtu) 35 + { 36 + if (!nlmon_is_valid_mtu(new_mtu)) 37 + return -EINVAL; 38 + 39 + dev->mtu = new_mtu; 40 + return 0; 41 + } 42 + 43 + static int nlmon_dev_init(struct net_device *dev) 44 + { 45 + dev->lstats = alloc_percpu(struct pcpu_lstats); 46 + 47 + return dev->lstats == NULL ? -ENOMEM : 0; 48 + } 49 + 50 + static void nlmon_dev_uninit(struct net_device *dev) 51 + { 52 + free_percpu(dev->lstats); 53 + } 54 + 55 + static struct netlink_tap nlmon_tap; 56 + 57 + static int nlmon_open(struct net_device *dev) 58 + { 59 + return netlink_add_tap(&nlmon_tap); 60 + } 61 + 62 + static int nlmon_close(struct net_device *dev) 63 + { 64 + return netlink_remove_tap(&nlmon_tap); 65 + } 66 + 67 + static struct rtnl_link_stats64 * 68 + nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) 69 + { 70 + int i; 71 + u64 bytes = 0, packets = 0; 72 + 73 + for_each_possible_cpu(i) { 74 + const struct pcpu_lstats *nl_stats; 75 + u64 tbytes, tpackets; 76 + unsigned int start; 77 + 78 + nl_stats = per_cpu_ptr(dev->lstats, i); 79 + 80 + do { 81 + start = u64_stats_fetch_begin_bh(&nl_stats->syncp); 82 + tbytes = nl_stats->bytes; 83 + tpackets = nl_stats->packets; 84 + } while (u64_stats_fetch_retry_bh(&nl_stats->syncp, start)); 85 + 86 + packets += tpackets; 87 + bytes += tbytes; 88 + } 89 + 90 + stats->rx_packets = packets; 91 + stats->tx_packets = 0; 92 + 93 + stats->rx_bytes = bytes; 94 + stats->tx_bytes = 0; 95 + 96 + return stats; 97 + } 98 + 99 + static u32 always_on(struct net_device *dev) 100 + { 101 + return 1; 102 + } 103 + 104 + static const struct ethtool_ops nlmon_ethtool_ops = { 105 + .get_link = always_on, 106 + }; 107 + 108 + static const struct net_device_ops nlmon_ops = { 109 + .ndo_init = nlmon_dev_init, 110 + .ndo_uninit = nlmon_dev_uninit, 111 + .ndo_open = nlmon_open, 112 + .ndo_stop = nlmon_close, 113 + .ndo_start_xmit = nlmon_xmit, 114 + .ndo_get_stats64 = nlmon_get_stats64, 115 + .ndo_change_mtu = nlmon_change_mtu, 116 + }; 117 + 118 + static struct netlink_tap nlmon_tap __read_mostly = { 119 + .module = THIS_MODULE, 120 + }; 121 + 122 + static void nlmon_setup(struct net_device *dev) 123 + { 124 + dev->type = ARPHRD_NETLINK; 125 + dev->tx_queue_len = 0; 126 + 127 + dev->netdev_ops = &nlmon_ops; 128 + dev->ethtool_ops = &nlmon_ethtool_ops; 129 + dev->destructor = free_netdev; 130 + 131 + dev->features = NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; 132 + dev->flags = IFF_NOARP; 133 + 134 + /* That's rather a softlimit here, which, of course, 135 + * can be altered. Not a real MTU, but what is to be 136 + * expected in most cases. 137 + */ 138 + dev->mtu = NLMSG_GOODSIZE; 139 + } 140 + 141 + static __init int nlmon_register(void) 142 + { 143 + int err; 144 + struct net_device *nldev; 145 + 146 + nldev = nlmon_tap.dev = alloc_netdev(0, "netlink", nlmon_setup); 147 + if (unlikely(nldev == NULL)) 148 + return -ENOMEM; 149 + 150 + err = register_netdev(nldev); 151 + if (unlikely(err)) 152 + free_netdev(nldev); 153 + 154 + return err; 155 + } 156 + 157 + static __exit void nlmon_unregister(void) 158 + { 159 + struct net_device *nldev = nlmon_tap.dev; 160 + 161 + unregister_netdev(nldev); 162 + } 163 + 164 + module_init(nlmon_register); 165 + module_exit(nlmon_unregister); 166 + 167 + MODULE_LICENSE("GPL v2"); 168 + MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); 169 + MODULE_AUTHOR("Mathieu Geli <geli@enseirb.fr>"); 170 + MODULE_DESCRIPTION("Netlink monitoring device");