net/core/dev.c at v2.6.27-rc8 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / net / core / dev.c
at v2.6.27-rc8 4896 lines 121 kB view raw
   1/*
   2 * 	NET3	Protocol independent device support routines.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 *	Derived from the non IP parts of dev.c 1.0.19
  10 * 		Authors:	Ross Biro
  11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *	Additional Authors:
  15 *		Florian la Roche <rzsfl@rz.uni-sb.de>
  16 *		Alan Cox <gw4pts@gw4pts.ampr.org>
  17 *		David Hinds <dahinds@users.sourceforge.net>
  18 *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19 *		Adam Sulmicki <adam@cfar.umd.edu>
  20 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21 *
  22 *	Changes:
  23 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24 *              			to 2 if register_netdev gets called
  25 *              			before net_dev_init & also removed a
  26 *              			few lines of code in the process.
  27 *		Alan Cox	:	device private ioctl copies fields back.
  28 *		Alan Cox	:	Transmit queue code does relevant
  29 *					stunts to keep the queue safe.
  30 *		Alan Cox	:	Fixed double lock.
  31 *		Alan Cox	:	Fixed promisc NULL pointer trap
  32 *		????????	:	Support the full private ioctl range
  33 *		Alan Cox	:	Moved ioctl permission check into
  34 *					drivers
  35 *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  36 *		Alan Cox	:	100 backlog just doesn't cut it when
  37 *					you start doing multicast video 8)
  38 *		Alan Cox	:	Rewrote net_bh and list manager.
  39 *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
  40 *		Alan Cox	:	Took out transmit every packet pass
  41 *					Saved a few bytes in the ioctl handler
  42 *		Alan Cox	:	Network driver sets packet type before
  43 *					calling netif_rx. Saves a function
  44 *					call a packet.
  45 *		Alan Cox	:	Hashed net_bh()
  46 *		Richard Kooijman:	Timestamp fixes.
  47 *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  48 *		Alan Cox	:	Device lock protection.
  49 *		Alan Cox	: 	Fixed nasty side effect of device close
  50 *					changes.
  51 *		Rudi Cilibrasi	:	Pass the right thing to
  52 *					set_mac_address()
  53 *		Dave Miller	:	32bit quantity for the device lock to
  54 *					make it work out on a Sparc.
  55 *		Bjorn Ekwall	:	Added KERNELD hack.
  56 *		Alan Cox	:	Cleaned up the backlog initialise.
  57 *		Craig Metz	:	SIOCGIFCONF fix if space for under
  58 *					1 device.
  59 *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  60 *					is no device open function.
  61 *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  62 *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  63 *		Cyrus Durgin	:	Cleaned for KMOD
  64 *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  65 *					A network device unload needs to purge
  66 *					the backlog queue.
  67 *	Paul Rusty Russell	:	SIOCSIFNAME
  68 *              Pekka Riikonen  :	Netdev boot-time settings code
  69 *              Andrew Morton   :       Make unregister_netdevice wait
  70 *              			indefinitely on dev->refcnt
  71 * 		J Hadi Salim	:	- Backlog queue sampling
  72 *				        - netif_rx() feedback
  73 */
  74
  75#include <asm/uaccess.h>
  76#include <asm/system.h>
  77#include <linux/bitops.h>
  78#include <linux/capability.h>
  79#include <linux/cpu.h>
  80#include <linux/types.h>
  81#include <linux/kernel.h>
  82#include <linux/sched.h>
  83#include <linux/mutex.h>
  84#include <linux/string.h>
  85#include <linux/mm.h>
  86#include <linux/socket.h>
  87#include <linux/sockios.h>
  88#include <linux/errno.h>
  89#include <linux/interrupt.h>
  90#include <linux/if_ether.h>
  91#include <linux/netdevice.h>
  92#include <linux/etherdevice.h>
  93#include <linux/ethtool.h>
  94#include <linux/notifier.h>
  95#include <linux/skbuff.h>
  96#include <net/net_namespace.h>
  97#include <net/sock.h>
  98#include <linux/rtnetlink.h>
  99#include <linux/proc_fs.h>
 100#include <linux/seq_file.h>
 101#include <linux/stat.h>
 102#include <linux/if_bridge.h>
 103#include <linux/if_macvlan.h>
 104#include <net/dst.h>
 105#include <net/pkt_sched.h>
 106#include <net/checksum.h>
 107#include <linux/highmem.h>
 108#include <linux/init.h>
 109#include <linux/kmod.h>
 110#include <linux/module.h>
 111#include <linux/kallsyms.h>
 112#include <linux/netpoll.h>
 113#include <linux/rcupdate.h>
 114#include <linux/delay.h>
 115#include <net/wext.h>
 116#include <net/iw_handler.h>
 117#include <asm/current.h>
 118#include <linux/audit.h>
 119#include <linux/dmaengine.h>
 120#include <linux/err.h>
 121#include <linux/ctype.h>
 122#include <linux/if_arp.h>
 123#include <linux/if_vlan.h>
 124#include <linux/ip.h>
 125#include <net/ip.h>
 126#include <linux/ipv6.h>
 127#include <linux/in.h>
 128#include <linux/jhash.h>
 129#include <linux/random.h>
 130
 131#include "net-sysfs.h"
 132
 133/*
 134 *	The list of packet types we will receive (as opposed to discard)
 135 *	and the routines to invoke.
 136 *
 137 *	Why 16. Because with 16 the only overlap we get on a hash of the
 138 *	low nibble of the protocol value is RARP/SNAP/X.25.
 139 *
 140 *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 141 *             sure which should go first, but I bet it won't make much
 142 *             difference if we are running VLANs.  The good news is that
 143 *             this protocol won't be in the list unless compiled in, so
 144 *             the average user (w/out VLANs) will not be adversely affected.
 145 *             --BLG
 146 *
 147 *		0800	IP
 148 *		8100    802.1Q VLAN
 149 *		0001	802.3
 150 *		0002	AX.25
 151 *		0004	802.2
 152 *		8035	RARP
 153 *		0005	SNAP
 154 *		0805	X.25
 155 *		0806	ARP
 156 *		8137	IPX
 157 *		0009	Localtalk
 158 *		86DD	IPv6
 159 */
 160
 161#define PTYPE_HASH_SIZE	(16)
 162#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
 163
 164static DEFINE_SPINLOCK(ptype_lock);
 165static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 166static struct list_head ptype_all __read_mostly;	/* Taps */
 167
 168#ifdef CONFIG_NET_DMA
 169struct net_dma {
 170	struct dma_client client;
 171	spinlock_t lock;
 172	cpumask_t channel_mask;
 173	struct dma_chan **channels;
 174};
 175
 176static enum dma_state_client
 177netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 178	enum dma_state state);
 179
 180static struct net_dma net_dma = {
 181	.client = {
 182		.event_callback = netdev_dma_event,
 183	},
 184};
 185#endif
 186
 187/*
 188 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 189 * semaphore.
 190 *
 191 * Pure readers hold dev_base_lock for reading.
 192 *
 193 * Writers must hold the rtnl semaphore while they loop through the
 194 * dev_base_head list, and hold dev_base_lock for writing when they do the
 195 * actual updates.  This allows pure readers to access the list even
 196 * while a writer is preparing to update it.
 197 *
 198 * To put it another way, dev_base_lock is held for writing only to
 199 * protect against pure readers; the rtnl semaphore provides the
 200 * protection against other writers.
 201 *
 202 * See, for example usages, register_netdevice() and
 203 * unregister_netdevice(), which must be called with the rtnl
 204 * semaphore held.
 205 */
 206DEFINE_RWLOCK(dev_base_lock);
 207
 208EXPORT_SYMBOL(dev_base_lock);
 209
 210#define NETDEV_HASHBITS	8
 211#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 212
 213static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 214{
 215	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 216	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 217}
 218
 219static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 220{
 221	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 222}
 223
 224/* Device list insertion */
 225static int list_netdevice(struct net_device *dev)
 226{
 227	struct net *net = dev_net(dev);
 228
 229	ASSERT_RTNL();
 230
 231	write_lock_bh(&dev_base_lock);
 232	list_add_tail(&dev->dev_list, &net->dev_base_head);
 233	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 234	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 235	write_unlock_bh(&dev_base_lock);
 236	return 0;
 237}
 238
 239/* Device list removal */
 240static void unlist_netdevice(struct net_device *dev)
 241{
 242	ASSERT_RTNL();
 243
 244	/* Unlink dev from the device chain */
 245	write_lock_bh(&dev_base_lock);
 246	list_del(&dev->dev_list);
 247	hlist_del(&dev->name_hlist);
 248	hlist_del(&dev->index_hlist);
 249	write_unlock_bh(&dev_base_lock);
 250}
 251
 252/*
 253 *	Our notifier list
 254 */
 255
 256static RAW_NOTIFIER_HEAD(netdev_chain);
 257
 258/*
 259 *	Device drivers call our routines to queue packets here. We empty the
 260 *	queue in the local softnet handler.
 261 */
 262
 263DEFINE_PER_CPU(struct softnet_data, softnet_data);
 264
 265#ifdef CONFIG_LOCKDEP
 266/*
 267 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 268 * according to dev->type
 269 */
 270static const unsigned short netdev_lock_type[] =
 271	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 272	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 273	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 274	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 275	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 276	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 277	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 278	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 279	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 280	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 281	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 282	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 283	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 284	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 285	 ARPHRD_NONE};
 286
 287static const char *netdev_lock_name[] =
 288	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 289	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 290	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 291	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 292	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 293	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 294	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 295	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 296	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 297	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 298	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 299	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 300	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 301	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 302	 "_xmit_NONE"};
 303
 304static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 305static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 306
 307static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 308{
 309	int i;
 310
 311	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 312		if (netdev_lock_type[i] == dev_type)
 313			return i;
 314	/* the last key is used by default */
 315	return ARRAY_SIZE(netdev_lock_type) - 1;
 316}
 317
 318static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 319						 unsigned short dev_type)
 320{
 321	int i;
 322
 323	i = netdev_lock_pos(dev_type);
 324	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 325				   netdev_lock_name[i]);
 326}
 327
 328static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 329{
 330	int i;
 331
 332	i = netdev_lock_pos(dev->type);
 333	lockdep_set_class_and_name(&dev->addr_list_lock,
 334				   &netdev_addr_lock_key[i],
 335				   netdev_lock_name[i]);
 336}
 337#else
 338static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 339						 unsigned short dev_type)
 340{
 341}
 342static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 343{
 344}
 345#endif
 346
 347/*******************************************************************************
 348
 349		Protocol management and registration routines
 350
 351*******************************************************************************/
 352
 353/*
 354 *	Add a protocol ID to the list. Now that the input handler is
 355 *	smarter we can dispense with all the messy stuff that used to be
 356 *	here.
 357 *
 358 *	BEWARE!!! Protocol handlers, mangling input packets,
 359 *	MUST BE last in hash buckets and checking protocol handlers
 360 *	MUST start from promiscuous ptype_all chain in net_bh.
 361 *	It is true now, do not change it.
 362 *	Explanation follows: if protocol handler, mangling packet, will
 363 *	be the first on list, it is not able to sense, that packet
 364 *	is cloned and should be copied-on-write, so that it will
 365 *	change it and subsequent readers will get broken packet.
 366 *							--ANK (980803)
 367 */
 368
 369/**
 370 *	dev_add_pack - add packet handler
 371 *	@pt: packet type declaration
 372 *
 373 *	Add a protocol handler to the networking stack. The passed &packet_type
 374 *	is linked into kernel lists and may not be freed until it has been
 375 *	removed from the kernel lists.
 376 *
 377 *	This call does not sleep therefore it can not
 378 *	guarantee all CPU's that are in middle of receiving packets
 379 *	will see the new packet type (until the next received packet).
 380 */
 381
 382void dev_add_pack(struct packet_type *pt)
 383{
 384	int hash;
 385
 386	spin_lock_bh(&ptype_lock);
 387	if (pt->type == htons(ETH_P_ALL))
 388		list_add_rcu(&pt->list, &ptype_all);
 389	else {
 390		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 391		list_add_rcu(&pt->list, &ptype_base[hash]);
 392	}
 393	spin_unlock_bh(&ptype_lock);
 394}
 395
 396/**
 397 *	__dev_remove_pack	 - remove packet handler
 398 *	@pt: packet type declaration
 399 *
 400 *	Remove a protocol handler that was previously added to the kernel
 401 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 402 *	from the kernel lists and can be freed or reused once this function
 403 *	returns.
 404 *
 405 *      The packet type might still be in use by receivers
 406 *	and must not be freed until after all the CPU's have gone
 407 *	through a quiescent state.
 408 */
 409void __dev_remove_pack(struct packet_type *pt)
 410{
 411	struct list_head *head;
 412	struct packet_type *pt1;
 413
 414	spin_lock_bh(&ptype_lock);
 415
 416	if (pt->type == htons(ETH_P_ALL))
 417		head = &ptype_all;
 418	else
 419		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 420
 421	list_for_each_entry(pt1, head, list) {
 422		if (pt == pt1) {
 423			list_del_rcu(&pt->list);
 424			goto out;
 425		}
 426	}
 427
 428	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 429out:
 430	spin_unlock_bh(&ptype_lock);
 431}
 432/**
 433 *	dev_remove_pack	 - remove packet handler
 434 *	@pt: packet type declaration
 435 *
 436 *	Remove a protocol handler that was previously added to the kernel
 437 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 438 *	from the kernel lists and can be freed or reused once this function
 439 *	returns.
 440 *
 441 *	This call sleeps to guarantee that no CPU is looking at the packet
 442 *	type after return.
 443 */
 444void dev_remove_pack(struct packet_type *pt)
 445{
 446	__dev_remove_pack(pt);
 447
 448	synchronize_net();
 449}
 450
 451/******************************************************************************
 452
 453		      Device Boot-time Settings Routines
 454
 455*******************************************************************************/
 456
 457/* Boot time configuration table */
 458static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 459
 460/**
 461 *	netdev_boot_setup_add	- add new setup entry
 462 *	@name: name of the device
 463 *	@map: configured settings for the device
 464 *
 465 *	Adds new setup entry to the dev_boot_setup list.  The function
 466 *	returns 0 on error and 1 on success.  This is a generic routine to
 467 *	all netdevices.
 468 */
 469static int netdev_boot_setup_add(char *name, struct ifmap *map)
 470{
 471	struct netdev_boot_setup *s;
 472	int i;
 473
 474	s = dev_boot_setup;
 475	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 476		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 477			memset(s[i].name, 0, sizeof(s[i].name));
 478			strlcpy(s[i].name, name, IFNAMSIZ);
 479			memcpy(&s[i].map, map, sizeof(s[i].map));
 480			break;
 481		}
 482	}
 483
 484	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 485}
 486
 487/**
 488 *	netdev_boot_setup_check	- check boot time settings
 489 *	@dev: the netdevice
 490 *
 491 * 	Check boot time settings for the device.
 492 *	The found settings are set for the device to be used
 493 *	later in the device probing.
 494 *	Returns 0 if no settings found, 1 if they are.
 495 */
 496int netdev_boot_setup_check(struct net_device *dev)
 497{
 498	struct netdev_boot_setup *s = dev_boot_setup;
 499	int i;
 500
 501	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 502		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 503		    !strcmp(dev->name, s[i].name)) {
 504			dev->irq 	= s[i].map.irq;
 505			dev->base_addr 	= s[i].map.base_addr;
 506			dev->mem_start 	= s[i].map.mem_start;
 507			dev->mem_end 	= s[i].map.mem_end;
 508			return 1;
 509		}
 510	}
 511	return 0;
 512}
 513
 514
 515/**
 516 *	netdev_boot_base	- get address from boot time settings
 517 *	@prefix: prefix for network device
 518 *	@unit: id for network device
 519 *
 520 * 	Check boot time settings for the base address of device.
 521 *	The found settings are set for the device to be used
 522 *	later in the device probing.
 523 *	Returns 0 if no settings found.
 524 */
 525unsigned long netdev_boot_base(const char *prefix, int unit)
 526{
 527	const struct netdev_boot_setup *s = dev_boot_setup;
 528	char name[IFNAMSIZ];
 529	int i;
 530
 531	sprintf(name, "%s%d", prefix, unit);
 532
 533	/*
 534	 * If device already registered then return base of 1
 535	 * to indicate not to probe for this interface
 536	 */
 537	if (__dev_get_by_name(&init_net, name))
 538		return 1;
 539
 540	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 541		if (!strcmp(name, s[i].name))
 542			return s[i].map.base_addr;
 543	return 0;
 544}
 545
 546/*
 547 * Saves at boot time configured settings for any netdevice.
 548 */
 549int __init netdev_boot_setup(char *str)
 550{
 551	int ints[5];
 552	struct ifmap map;
 553
 554	str = get_options(str, ARRAY_SIZE(ints), ints);
 555	if (!str || !*str)
 556		return 0;
 557
 558	/* Save settings */
 559	memset(&map, 0, sizeof(map));
 560	if (ints[0] > 0)
 561		map.irq = ints[1];
 562	if (ints[0] > 1)
 563		map.base_addr = ints[2];
 564	if (ints[0] > 2)
 565		map.mem_start = ints[3];
 566	if (ints[0] > 3)
 567		map.mem_end = ints[4];
 568
 569	/* Add new entry to the list */
 570	return netdev_boot_setup_add(str, &map);
 571}
 572
 573__setup("netdev=", netdev_boot_setup);
 574
 575/*******************************************************************************
 576
 577			    Device Interface Subroutines
 578
 579*******************************************************************************/
 580
 581/**
 582 *	__dev_get_by_name	- find a device by its name
 583 *	@net: the applicable net namespace
 584 *	@name: name to find
 585 *
 586 *	Find an interface by name. Must be called under RTNL semaphore
 587 *	or @dev_base_lock. If the name is found a pointer to the device
 588 *	is returned. If the name is not found then %NULL is returned. The
 589 *	reference counters are not incremented so the caller must be
 590 *	careful with locks.
 591 */
 592
 593struct net_device *__dev_get_by_name(struct net *net, const char *name)
 594{
 595	struct hlist_node *p;
 596
 597	hlist_for_each(p, dev_name_hash(net, name)) {
 598		struct net_device *dev
 599			= hlist_entry(p, struct net_device, name_hlist);
 600		if (!strncmp(dev->name, name, IFNAMSIZ))
 601			return dev;
 602	}
 603	return NULL;
 604}
 605
 606/**
 607 *	dev_get_by_name		- find a device by its name
 608 *	@net: the applicable net namespace
 609 *	@name: name to find
 610 *
 611 *	Find an interface by name. This can be called from any
 612 *	context and does its own locking. The returned handle has
 613 *	the usage count incremented and the caller must use dev_put() to
 614 *	release it when it is no longer needed. %NULL is returned if no
 615 *	matching device is found.
 616 */
 617
 618struct net_device *dev_get_by_name(struct net *net, const char *name)
 619{
 620	struct net_device *dev;
 621
 622	read_lock(&dev_base_lock);
 623	dev = __dev_get_by_name(net, name);
 624	if (dev)
 625		dev_hold(dev);
 626	read_unlock(&dev_base_lock);
 627	return dev;
 628}
 629
 630/**
 631 *	__dev_get_by_index - find a device by its ifindex
 632 *	@net: the applicable net namespace
 633 *	@ifindex: index of device
 634 *
 635 *	Search for an interface by index. Returns %NULL if the device
 636 *	is not found or a pointer to the device. The device has not
 637 *	had its reference counter increased so the caller must be careful
 638 *	about locking. The caller must hold either the RTNL semaphore
 639 *	or @dev_base_lock.
 640 */
 641
 642struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 643{
 644	struct hlist_node *p;
 645
 646	hlist_for_each(p, dev_index_hash(net, ifindex)) {
 647		struct net_device *dev
 648			= hlist_entry(p, struct net_device, index_hlist);
 649		if (dev->ifindex == ifindex)
 650			return dev;
 651	}
 652	return NULL;
 653}
 654
 655
 656/**
 657 *	dev_get_by_index - find a device by its ifindex
 658 *	@net: the applicable net namespace
 659 *	@ifindex: index of device
 660 *
 661 *	Search for an interface by index. Returns NULL if the device
 662 *	is not found or a pointer to the device. The device returned has
 663 *	had a reference added and the pointer is safe until the user calls
 664 *	dev_put to indicate they have finished with it.
 665 */
 666
 667struct net_device *dev_get_by_index(struct net *net, int ifindex)
 668{
 669	struct net_device *dev;
 670
 671	read_lock(&dev_base_lock);
 672	dev = __dev_get_by_index(net, ifindex);
 673	if (dev)
 674		dev_hold(dev);
 675	read_unlock(&dev_base_lock);
 676	return dev;
 677}
 678
 679/**
 680 *	dev_getbyhwaddr - find a device by its hardware address
 681 *	@net: the applicable net namespace
 682 *	@type: media type of device
 683 *	@ha: hardware address
 684 *
 685 *	Search for an interface by MAC address. Returns NULL if the device
 686 *	is not found or a pointer to the device. The caller must hold the
 687 *	rtnl semaphore. The returned device has not had its ref count increased
 688 *	and the caller must therefore be careful about locking
 689 *
 690 *	BUGS:
 691 *	If the API was consistent this would be __dev_get_by_hwaddr
 692 */
 693
 694struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 695{
 696	struct net_device *dev;
 697
 698	ASSERT_RTNL();
 699
 700	for_each_netdev(net, dev)
 701		if (dev->type == type &&
 702		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 703			return dev;
 704
 705	return NULL;
 706}
 707
 708EXPORT_SYMBOL(dev_getbyhwaddr);
 709
 710struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 711{
 712	struct net_device *dev;
 713
 714	ASSERT_RTNL();
 715	for_each_netdev(net, dev)
 716		if (dev->type == type)
 717			return dev;
 718
 719	return NULL;
 720}
 721
 722EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 723
 724struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 725{
 726	struct net_device *dev;
 727
 728	rtnl_lock();
 729	dev = __dev_getfirstbyhwtype(net, type);
 730	if (dev)
 731		dev_hold(dev);
 732	rtnl_unlock();
 733	return dev;
 734}
 735
 736EXPORT_SYMBOL(dev_getfirstbyhwtype);
 737
 738/**
 739 *	dev_get_by_flags - find any device with given flags
 740 *	@net: the applicable net namespace
 741 *	@if_flags: IFF_* values
 742 *	@mask: bitmask of bits in if_flags to check
 743 *
 744 *	Search for any interface with the given flags. Returns NULL if a device
 745 *	is not found or a pointer to the device. The device returned has
 746 *	had a reference added and the pointer is safe until the user calls
 747 *	dev_put to indicate they have finished with it.
 748 */
 749
 750struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 751{
 752	struct net_device *dev, *ret;
 753
 754	ret = NULL;
 755	read_lock(&dev_base_lock);
 756	for_each_netdev(net, dev) {
 757		if (((dev->flags ^ if_flags) & mask) == 0) {
 758			dev_hold(dev);
 759			ret = dev;
 760			break;
 761		}
 762	}
 763	read_unlock(&dev_base_lock);
 764	return ret;
 765}
 766
 767/**
 768 *	dev_valid_name - check if name is okay for network device
 769 *	@name: name string
 770 *
 771 *	Network device names need to be valid file names to
 772 *	to allow sysfs to work.  We also disallow any kind of
 773 *	whitespace.
 774 */
 775int dev_valid_name(const char *name)
 776{
 777	if (*name == '\0')
 778		return 0;
 779	if (strlen(name) >= IFNAMSIZ)
 780		return 0;
 781	if (!strcmp(name, ".") || !strcmp(name, ".."))
 782		return 0;
 783
 784	while (*name) {
 785		if (*name == '/' || isspace(*name))
 786			return 0;
 787		name++;
 788	}
 789	return 1;
 790}
 791
 792/**
 793 *	__dev_alloc_name - allocate a name for a device
 794 *	@net: network namespace to allocate the device name in
 795 *	@name: name format string
 796 *	@buf:  scratch buffer and result name string
 797 *
 798 *	Passed a format string - eg "lt%d" it will try and find a suitable
 799 *	id. It scans list of devices to build up a free map, then chooses
 800 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 801 *	while allocating the name and adding the device in order to avoid
 802 *	duplicates.
 803 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 804 *	Returns the number of the unit assigned or a negative errno code.
 805 */
 806
 807static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 808{
 809	int i = 0;
 810	const char *p;
 811	const int max_netdevices = 8*PAGE_SIZE;
 812	unsigned long *inuse;
 813	struct net_device *d;
 814
 815	p = strnchr(name, IFNAMSIZ-1, '%');
 816	if (p) {
 817		/*
 818		 * Verify the string as this thing may have come from
 819		 * the user.  There must be either one "%d" and no other "%"
 820		 * characters.
 821		 */
 822		if (p[1] != 'd' || strchr(p + 2, '%'))
 823			return -EINVAL;
 824
 825		/* Use one page as a bit array of possible slots */
 826		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 827		if (!inuse)
 828			return -ENOMEM;
 829
 830		for_each_netdev(net, d) {
 831			if (!sscanf(d->name, name, &i))
 832				continue;
 833			if (i < 0 || i >= max_netdevices)
 834				continue;
 835
 836			/*  avoid cases where sscanf is not exact inverse of printf */
 837			snprintf(buf, IFNAMSIZ, name, i);
 838			if (!strncmp(buf, d->name, IFNAMSIZ))
 839				set_bit(i, inuse);
 840		}
 841
 842		i = find_first_zero_bit(inuse, max_netdevices);
 843		free_page((unsigned long) inuse);
 844	}
 845
 846	snprintf(buf, IFNAMSIZ, name, i);
 847	if (!__dev_get_by_name(net, buf))
 848		return i;
 849
 850	/* It is possible to run out of possible slots
 851	 * when the name is long and there isn't enough space left
 852	 * for the digits, or if all bits are used.
 853	 */
 854	return -ENFILE;
 855}
 856
 857/**
 858 *	dev_alloc_name - allocate a name for a device
 859 *	@dev: device
 860 *	@name: name format string
 861 *
 862 *	Passed a format string - eg "lt%d" it will try and find a suitable
 863 *	id. It scans list of devices to build up a free map, then chooses
 864 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 865 *	while allocating the name and adding the device in order to avoid
 866 *	duplicates.
 867 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 868 *	Returns the number of the unit assigned or a negative errno code.
 869 */
 870
 871int dev_alloc_name(struct net_device *dev, const char *name)
 872{
 873	char buf[IFNAMSIZ];
 874	struct net *net;
 875	int ret;
 876
 877	BUG_ON(!dev_net(dev));
 878	net = dev_net(dev);
 879	ret = __dev_alloc_name(net, name, buf);
 880	if (ret >= 0)
 881		strlcpy(dev->name, buf, IFNAMSIZ);
 882	return ret;
 883}
 884
 885
 886/**
 887 *	dev_change_name - change name of a device
 888 *	@dev: device
 889 *	@newname: name (or format string) must be at least IFNAMSIZ
 890 *
 891 *	Change name of a device, can pass format strings "eth%d".
 892 *	for wildcarding.
 893 */
 894int dev_change_name(struct net_device *dev, char *newname)
 895{
 896	char oldname[IFNAMSIZ];
 897	int err = 0;
 898	int ret;
 899	struct net *net;
 900
 901	ASSERT_RTNL();
 902	BUG_ON(!dev_net(dev));
 903
 904	net = dev_net(dev);
 905	if (dev->flags & IFF_UP)
 906		return -EBUSY;
 907
 908	if (!dev_valid_name(newname))
 909		return -EINVAL;
 910
 911	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 912		return 0;
 913
 914	memcpy(oldname, dev->name, IFNAMSIZ);
 915
 916	if (strchr(newname, '%')) {
 917		err = dev_alloc_name(dev, newname);
 918		if (err < 0)
 919			return err;
 920		strcpy(newname, dev->name);
 921	}
 922	else if (__dev_get_by_name(net, newname))
 923		return -EEXIST;
 924	else
 925		strlcpy(dev->name, newname, IFNAMSIZ);
 926
 927rollback:
 928	err = device_rename(&dev->dev, dev->name);
 929	if (err) {
 930		memcpy(dev->name, oldname, IFNAMSIZ);
 931		return err;
 932	}
 933
 934	write_lock_bh(&dev_base_lock);
 935	hlist_del(&dev->name_hlist);
 936	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 937	write_unlock_bh(&dev_base_lock);
 938
 939	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 940	ret = notifier_to_errno(ret);
 941
 942	if (ret) {
 943		if (err) {
 944			printk(KERN_ERR
 945			       "%s: name change rollback failed: %d.\n",
 946			       dev->name, ret);
 947		} else {
 948			err = ret;
 949			memcpy(dev->name, oldname, IFNAMSIZ);
 950			goto rollback;
 951		}
 952	}
 953
 954	return err;
 955}
 956
 957/**
 958 *	netdev_features_change - device changes features
 959 *	@dev: device to cause notification
 960 *
 961 *	Called to indicate a device has changed features.
 962 */
 963void netdev_features_change(struct net_device *dev)
 964{
 965	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 966}
 967EXPORT_SYMBOL(netdev_features_change);
 968
 969/**
 970 *	netdev_state_change - device changes state
 971 *	@dev: device to cause notification
 972 *
 973 *	Called to indicate a device has changed state. This function calls
 974 *	the notifier chains for netdev_chain and sends a NEWLINK message
 975 *	to the routing socket.
 976 */
 977void netdev_state_change(struct net_device *dev)
 978{
 979	if (dev->flags & IFF_UP) {
 980		call_netdevice_notifiers(NETDEV_CHANGE, dev);
 981		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 982	}
 983}
 984
 985void netdev_bonding_change(struct net_device *dev)
 986{
 987	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
 988}
 989EXPORT_SYMBOL(netdev_bonding_change);
 990
 991/**
 992 *	dev_load 	- load a network module
 993 *	@net: the applicable net namespace
 994 *	@name: name of interface
 995 *
 996 *	If a network interface is not present and the process has suitable
 997 *	privileges this function loads the module. If module loading is not
 998 *	available in this kernel then it becomes a nop.
 999 */
1000
1001void dev_load(struct net *net, const char *name)
1002{
1003	struct net_device *dev;
1004
1005	read_lock(&dev_base_lock);
1006	dev = __dev_get_by_name(net, name);
1007	read_unlock(&dev_base_lock);
1008
1009	if (!dev && capable(CAP_SYS_MODULE))
1010		request_module("%s", name);
1011}
1012
1013/**
1014 *	dev_open	- prepare an interface for use.
1015 *	@dev:	device to open
1016 *
1017 *	Takes a device from down to up state. The device's private open
1018 *	function is invoked and then the multicast lists are loaded. Finally
1019 *	the device is moved into the up state and a %NETDEV_UP message is
1020 *	sent to the netdev notifier chain.
1021 *
1022 *	Calling this function on an active interface is a nop. On a failure
1023 *	a negative errno code is returned.
1024 */
1025int dev_open(struct net_device *dev)
1026{
1027	int ret = 0;
1028
1029	ASSERT_RTNL();
1030
1031	/*
1032	 *	Is it already up?
1033	 */
1034
1035	if (dev->flags & IFF_UP)
1036		return 0;
1037
1038	/*
1039	 *	Is it even present?
1040	 */
1041	if (!netif_device_present(dev))
1042		return -ENODEV;
1043
1044	/*
1045	 *	Call device private open method
1046	 */
1047	set_bit(__LINK_STATE_START, &dev->state);
1048
1049	if (dev->validate_addr)
1050		ret = dev->validate_addr(dev);
1051
1052	if (!ret && dev->open)
1053		ret = dev->open(dev);
1054
1055	/*
1056	 *	If it went open OK then:
1057	 */
1058
1059	if (ret)
1060		clear_bit(__LINK_STATE_START, &dev->state);
1061	else {
1062		/*
1063		 *	Set the flags.
1064		 */
1065		dev->flags |= IFF_UP;
1066
1067		/*
1068		 *	Initialize multicasting status
1069		 */
1070		dev_set_rx_mode(dev);
1071
1072		/*
1073		 *	Wakeup transmit queue engine
1074		 */
1075		dev_activate(dev);
1076
1077		/*
1078		 *	... and announce new interface.
1079		 */
1080		call_netdevice_notifiers(NETDEV_UP, dev);
1081	}
1082
1083	return ret;
1084}
1085
1086/**
1087 *	dev_close - shutdown an interface.
1088 *	@dev: device to shutdown
1089 *
1090 *	This function moves an active device into down state. A
1091 *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1092 *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1093 *	chain.
1094 */
1095int dev_close(struct net_device *dev)
1096{
1097	ASSERT_RTNL();
1098
1099	might_sleep();
1100
1101	if (!(dev->flags & IFF_UP))
1102		return 0;
1103
1104	/*
1105	 *	Tell people we are going down, so that they can
1106	 *	prepare to death, when device is still operating.
1107	 */
1108	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1109
1110	clear_bit(__LINK_STATE_START, &dev->state);
1111
1112	/* Synchronize to scheduled poll. We cannot touch poll list,
1113	 * it can be even on different cpu. So just clear netif_running().
1114	 *
1115	 * dev->stop() will invoke napi_disable() on all of it's
1116	 * napi_struct instances on this device.
1117	 */
1118	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1119
1120	dev_deactivate(dev);
1121
1122	/*
1123	 *	Call the device specific close. This cannot fail.
1124	 *	Only if device is UP
1125	 *
1126	 *	We allow it to be called even after a DETACH hot-plug
1127	 *	event.
1128	 */
1129	if (dev->stop)
1130		dev->stop(dev);
1131
1132	/*
1133	 *	Device is now down.
1134	 */
1135
1136	dev->flags &= ~IFF_UP;
1137
1138	/*
1139	 * Tell people we are down
1140	 */
1141	call_netdevice_notifiers(NETDEV_DOWN, dev);
1142
1143	return 0;
1144}
1145
1146
1147/**
1148 *	dev_disable_lro - disable Large Receive Offload on a device
1149 *	@dev: device
1150 *
1151 *	Disable Large Receive Offload (LRO) on a net device.  Must be
1152 *	called under RTNL.  This is needed if received packets may be
1153 *	forwarded to another interface.
1154 */
1155void dev_disable_lro(struct net_device *dev)
1156{
1157	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1158	    dev->ethtool_ops->set_flags) {
1159		u32 flags = dev->ethtool_ops->get_flags(dev);
1160		if (flags & ETH_FLAG_LRO) {
1161			flags &= ~ETH_FLAG_LRO;
1162			dev->ethtool_ops->set_flags(dev, flags);
1163		}
1164	}
1165	WARN_ON(dev->features & NETIF_F_LRO);
1166}
1167EXPORT_SYMBOL(dev_disable_lro);
1168
1169
1170static int dev_boot_phase = 1;
1171
1172/*
1173 *	Device change register/unregister. These are not inline or static
1174 *	as we export them to the world.
1175 */
1176
1177/**
1178 *	register_netdevice_notifier - register a network notifier block
1179 *	@nb: notifier
1180 *
1181 *	Register a notifier to be called when network device events occur.
1182 *	The notifier passed is linked into the kernel structures and must
1183 *	not be reused until it has been unregistered. A negative errno code
1184 *	is returned on a failure.
1185 *
1186 * 	When registered all registration and up events are replayed
1187 *	to the new notifier to allow device to have a race free
1188 *	view of the network device list.
1189 */
1190
1191int register_netdevice_notifier(struct notifier_block *nb)
1192{
1193	struct net_device *dev;
1194	struct net_device *last;
1195	struct net *net;
1196	int err;
1197
1198	rtnl_lock();
1199	err = raw_notifier_chain_register(&netdev_chain, nb);
1200	if (err)
1201		goto unlock;
1202	if (dev_boot_phase)
1203		goto unlock;
1204	for_each_net(net) {
1205		for_each_netdev(net, dev) {
1206			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1207			err = notifier_to_errno(err);
1208			if (err)
1209				goto rollback;
1210
1211			if (!(dev->flags & IFF_UP))
1212				continue;
1213
1214			nb->notifier_call(nb, NETDEV_UP, dev);
1215		}
1216	}
1217
1218unlock:
1219	rtnl_unlock();
1220	return err;
1221
1222rollback:
1223	last = dev;
1224	for_each_net(net) {
1225		for_each_netdev(net, dev) {
1226			if (dev == last)
1227				break;
1228
1229			if (dev->flags & IFF_UP) {
1230				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1231				nb->notifier_call(nb, NETDEV_DOWN, dev);
1232			}
1233			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1234		}
1235	}
1236
1237	raw_notifier_chain_unregister(&netdev_chain, nb);
1238	goto unlock;
1239}
1240
1241/**
1242 *	unregister_netdevice_notifier - unregister a network notifier block
1243 *	@nb: notifier
1244 *
1245 *	Unregister a notifier previously registered by
1246 *	register_netdevice_notifier(). The notifier is unlinked into the
1247 *	kernel structures and may then be reused. A negative errno code
1248 *	is returned on a failure.
1249 */
1250
1251int unregister_netdevice_notifier(struct notifier_block *nb)
1252{
1253	int err;
1254
1255	rtnl_lock();
1256	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1257	rtnl_unlock();
1258	return err;
1259}
1260
1261/**
1262 *	call_netdevice_notifiers - call all network notifier blocks
1263 *      @val: value passed unmodified to notifier function
1264 *      @dev: net_device pointer passed unmodified to notifier function
1265 *
1266 *	Call all network notifier blocks.  Parameters and return value
1267 *	are as for raw_notifier_call_chain().
1268 */
1269
1270int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1271{
1272	return raw_notifier_call_chain(&netdev_chain, val, dev);
1273}
1274
1275/* When > 0 there are consumers of rx skb time stamps */
1276static atomic_t netstamp_needed = ATOMIC_INIT(0);
1277
1278void net_enable_timestamp(void)
1279{
1280	atomic_inc(&netstamp_needed);
1281}
1282
1283void net_disable_timestamp(void)
1284{
1285	atomic_dec(&netstamp_needed);
1286}
1287
1288static inline void net_timestamp(struct sk_buff *skb)
1289{
1290	if (atomic_read(&netstamp_needed))
1291		__net_timestamp(skb);
1292	else
1293		skb->tstamp.tv64 = 0;
1294}
1295
1296/*
1297 *	Support routine. Sends outgoing frames to any network
1298 *	taps currently in use.
1299 */
1300
1301static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1302{
1303	struct packet_type *ptype;
1304
1305	net_timestamp(skb);
1306
1307	rcu_read_lock();
1308	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1309		/* Never send packets back to the socket
1310		 * they originated from - MvS (miquels@drinkel.ow.org)
1311		 */
1312		if ((ptype->dev == dev || !ptype->dev) &&
1313		    (ptype->af_packet_priv == NULL ||
1314		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1315			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1316			if (!skb2)
1317				break;
1318
1319			/* skb->nh should be correctly
1320			   set by sender, so that the second statement is
1321			   just protection against buggy protocols.
1322			 */
1323			skb_reset_mac_header(skb2);
1324
1325			if (skb_network_header(skb2) < skb2->data ||
1326			    skb2->network_header > skb2->tail) {
1327				if (net_ratelimit())
1328					printk(KERN_CRIT "protocol %04x is "
1329					       "buggy, dev %s\n",
1330					       skb2->protocol, dev->name);
1331				skb_reset_network_header(skb2);
1332			}
1333
1334			skb2->transport_header = skb2->network_header;
1335			skb2->pkt_type = PACKET_OUTGOING;
1336			ptype->func(skb2, skb->dev, ptype, skb->dev);
1337		}
1338	}
1339	rcu_read_unlock();
1340}
1341
1342
1343static inline void __netif_reschedule(struct Qdisc *q)
1344{
1345	struct softnet_data *sd;
1346	unsigned long flags;
1347
1348	local_irq_save(flags);
1349	sd = &__get_cpu_var(softnet_data);
1350	q->next_sched = sd->output_queue;
1351	sd->output_queue = q;
1352	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1353	local_irq_restore(flags);
1354}
1355
1356void __netif_schedule(struct Qdisc *q)
1357{
1358	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1359		__netif_reschedule(q);
1360}
1361EXPORT_SYMBOL(__netif_schedule);
1362
1363void dev_kfree_skb_irq(struct sk_buff *skb)
1364{
1365	if (atomic_dec_and_test(&skb->users)) {
1366		struct softnet_data *sd;
1367		unsigned long flags;
1368
1369		local_irq_save(flags);
1370		sd = &__get_cpu_var(softnet_data);
1371		skb->next = sd->completion_queue;
1372		sd->completion_queue = skb;
1373		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374		local_irq_restore(flags);
1375	}
1376}
1377EXPORT_SYMBOL(dev_kfree_skb_irq);
1378
1379void dev_kfree_skb_any(struct sk_buff *skb)
1380{
1381	if (in_irq() || irqs_disabled())
1382		dev_kfree_skb_irq(skb);
1383	else
1384		dev_kfree_skb(skb);
1385}
1386EXPORT_SYMBOL(dev_kfree_skb_any);
1387
1388
1389/**
1390 * netif_device_detach - mark device as removed
1391 * @dev: network device
1392 *
1393 * Mark device as removed from system and therefore no longer available.
1394 */
1395void netif_device_detach(struct net_device *dev)
1396{
1397	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1398	    netif_running(dev)) {
1399		netif_stop_queue(dev);
1400	}
1401}
1402EXPORT_SYMBOL(netif_device_detach);
1403
1404/**
1405 * netif_device_attach - mark device as attached
1406 * @dev: network device
1407 *
1408 * Mark device as attached from system and restart if needed.
1409 */
1410void netif_device_attach(struct net_device *dev)
1411{
1412	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1413	    netif_running(dev)) {
1414		netif_wake_queue(dev);
1415		__netdev_watchdog_up(dev);
1416	}
1417}
1418EXPORT_SYMBOL(netif_device_attach);
1419
1420static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1421{
1422	return ((features & NETIF_F_GEN_CSUM) ||
1423		((features & NETIF_F_IP_CSUM) &&
1424		 protocol == htons(ETH_P_IP)) ||
1425		((features & NETIF_F_IPV6_CSUM) &&
1426		 protocol == htons(ETH_P_IPV6)));
1427}
1428
1429static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1430{
1431	if (can_checksum_protocol(dev->features, skb->protocol))
1432		return true;
1433
1434	if (skb->protocol == htons(ETH_P_8021Q)) {
1435		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1436		if (can_checksum_protocol(dev->features & dev->vlan_features,
1437					  veh->h_vlan_encapsulated_proto))
1438			return true;
1439	}
1440
1441	return false;
1442}
1443
1444/*
1445 * Invalidate hardware checksum when packet is to be mangled, and
1446 * complete checksum manually on outgoing path.
1447 */
1448int skb_checksum_help(struct sk_buff *skb)
1449{
1450	__wsum csum;
1451	int ret = 0, offset;
1452
1453	if (skb->ip_summed == CHECKSUM_COMPLETE)
1454		goto out_set_summed;
1455
1456	if (unlikely(skb_shinfo(skb)->gso_size)) {
1457		/* Let GSO fix up the checksum. */
1458		goto out_set_summed;
1459	}
1460
1461	offset = skb->csum_start - skb_headroom(skb);
1462	BUG_ON(offset >= skb_headlen(skb));
1463	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1464
1465	offset += skb->csum_offset;
1466	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1467
1468	if (skb_cloned(skb) &&
1469	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1470		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1471		if (ret)
1472			goto out;
1473	}
1474
1475	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1476out_set_summed:
1477	skb->ip_summed = CHECKSUM_NONE;
1478out:
1479	return ret;
1480}
1481
1482/**
1483 *	skb_gso_segment - Perform segmentation on skb.
1484 *	@skb: buffer to segment
1485 *	@features: features for the output path (see dev->features)
1486 *
1487 *	This function segments the given skb and returns a list of segments.
1488 *
1489 *	It may return NULL if the skb requires no segmentation.  This is
1490 *	only possible when GSO is used for verifying header integrity.
1491 */
1492struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1493{
1494	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1495	struct packet_type *ptype;
1496	__be16 type = skb->protocol;
1497	int err;
1498
1499	BUG_ON(skb_shinfo(skb)->frag_list);
1500
1501	skb_reset_mac_header(skb);
1502	skb->mac_len = skb->network_header - skb->mac_header;
1503	__skb_pull(skb, skb->mac_len);
1504
1505	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1506		if (skb_header_cloned(skb) &&
1507		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1508			return ERR_PTR(err);
1509	}
1510
1511	rcu_read_lock();
1512	list_for_each_entry_rcu(ptype,
1513			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1514		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1515			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1516				err = ptype->gso_send_check(skb);
1517				segs = ERR_PTR(err);
1518				if (err || skb_gso_ok(skb, features))
1519					break;
1520				__skb_push(skb, (skb->data -
1521						 skb_network_header(skb)));
1522			}
1523			segs = ptype->gso_segment(skb, features);
1524			break;
1525		}
1526	}
1527	rcu_read_unlock();
1528
1529	__skb_push(skb, skb->data - skb_mac_header(skb));
1530
1531	return segs;
1532}
1533
1534EXPORT_SYMBOL(skb_gso_segment);
1535
1536/* Take action when hardware reception checksum errors are detected. */
1537#ifdef CONFIG_BUG
1538void netdev_rx_csum_fault(struct net_device *dev)
1539{
1540	if (net_ratelimit()) {
1541		printk(KERN_ERR "%s: hw csum failure.\n",
1542			dev ? dev->name : "<unknown>");
1543		dump_stack();
1544	}
1545}
1546EXPORT_SYMBOL(netdev_rx_csum_fault);
1547#endif
1548
1549/* Actually, we should eliminate this check as soon as we know, that:
1550 * 1. IOMMU is present and allows to map all the memory.
1551 * 2. No high memory really exists on this machine.
1552 */
1553
1554static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1555{
1556#ifdef CONFIG_HIGHMEM
1557	int i;
1558
1559	if (dev->features & NETIF_F_HIGHDMA)
1560		return 0;
1561
1562	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1563		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1564			return 1;
1565
1566#endif
1567	return 0;
1568}
1569
1570struct dev_gso_cb {
1571	void (*destructor)(struct sk_buff *skb);
1572};
1573
1574#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1575
1576static void dev_gso_skb_destructor(struct sk_buff *skb)
1577{
1578	struct dev_gso_cb *cb;
1579
1580	do {
1581		struct sk_buff *nskb = skb->next;
1582
1583		skb->next = nskb->next;
1584		nskb->next = NULL;
1585		kfree_skb(nskb);
1586	} while (skb->next);
1587
1588	cb = DEV_GSO_CB(skb);
1589	if (cb->destructor)
1590		cb->destructor(skb);
1591}
1592
1593/**
1594 *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1595 *	@skb: buffer to segment
1596 *
1597 *	This function segments the given skb and stores the list of segments
1598 *	in skb->next.
1599 */
1600static int dev_gso_segment(struct sk_buff *skb)
1601{
1602	struct net_device *dev = skb->dev;
1603	struct sk_buff *segs;
1604	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1605					 NETIF_F_SG : 0);
1606
1607	segs = skb_gso_segment(skb, features);
1608
1609	/* Verifying header integrity only. */
1610	if (!segs)
1611		return 0;
1612
1613	if (IS_ERR(segs))
1614		return PTR_ERR(segs);
1615
1616	skb->next = segs;
1617	DEV_GSO_CB(skb)->destructor = skb->destructor;
1618	skb->destructor = dev_gso_skb_destructor;
1619
1620	return 0;
1621}
1622
1623int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1624			struct netdev_queue *txq)
1625{
1626	if (likely(!skb->next)) {
1627		if (!list_empty(&ptype_all))
1628			dev_queue_xmit_nit(skb, dev);
1629
1630		if (netif_needs_gso(dev, skb)) {
1631			if (unlikely(dev_gso_segment(skb)))
1632				goto out_kfree_skb;
1633			if (skb->next)
1634				goto gso;
1635		}
1636
1637		return dev->hard_start_xmit(skb, dev);
1638	}
1639
1640gso:
1641	do {
1642		struct sk_buff *nskb = skb->next;
1643		int rc;
1644
1645		skb->next = nskb->next;
1646		nskb->next = NULL;
1647		rc = dev->hard_start_xmit(nskb, dev);
1648		if (unlikely(rc)) {
1649			nskb->next = skb->next;
1650			skb->next = nskb;
1651			return rc;
1652		}
1653		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1654			return NETDEV_TX_BUSY;
1655	} while (skb->next);
1656
1657	skb->destructor = DEV_GSO_CB(skb)->destructor;
1658
1659out_kfree_skb:
1660	kfree_skb(skb);
1661	return 0;
1662}
1663
1664static u32 simple_tx_hashrnd;
1665static int simple_tx_hashrnd_initialized = 0;
1666
1667static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1668{
1669	u32 addr1, addr2, ports;
1670	u32 hash, ihl;
1671	u8 ip_proto = 0;
1672
1673	if (unlikely(!simple_tx_hashrnd_initialized)) {
1674		get_random_bytes(&simple_tx_hashrnd, 4);
1675		simple_tx_hashrnd_initialized = 1;
1676	}
1677
1678	switch (skb->protocol) {
1679	case __constant_htons(ETH_P_IP):
1680		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1681			ip_proto = ip_hdr(skb)->protocol;
1682		addr1 = ip_hdr(skb)->saddr;
1683		addr2 = ip_hdr(skb)->daddr;
1684		ihl = ip_hdr(skb)->ihl;
1685		break;
1686	case __constant_htons(ETH_P_IPV6):
1687		ip_proto = ipv6_hdr(skb)->nexthdr;
1688		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1689		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1690		ihl = (40 >> 2);
1691		break;
1692	default:
1693		return 0;
1694	}
1695
1696
1697	switch (ip_proto) {
1698	case IPPROTO_TCP:
1699	case IPPROTO_UDP:
1700	case IPPROTO_DCCP:
1701	case IPPROTO_ESP:
1702	case IPPROTO_AH:
1703	case IPPROTO_SCTP:
1704	case IPPROTO_UDPLITE:
1705		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1706		break;
1707
1708	default:
1709		ports = 0;
1710		break;
1711	}
1712
1713	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1714
1715	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1716}
1717
1718static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1719					struct sk_buff *skb)
1720{
1721	u16 queue_index = 0;
1722
1723	if (dev->select_queue)
1724		queue_index = dev->select_queue(dev, skb);
1725	else if (dev->real_num_tx_queues > 1)
1726		queue_index = simple_tx_hash(dev, skb);
1727
1728	skb_set_queue_mapping(skb, queue_index);
1729	return netdev_get_tx_queue(dev, queue_index);
1730}
1731
1732/**
1733 *	dev_queue_xmit - transmit a buffer
1734 *	@skb: buffer to transmit
1735 *
1736 *	Queue a buffer for transmission to a network device. The caller must
1737 *	have set the device and priority and built the buffer before calling
1738 *	this function. The function can be called from an interrupt.
1739 *
1740 *	A negative errno code is returned on a failure. A success does not
1741 *	guarantee the frame will be transmitted as it may be dropped due
1742 *	to congestion or traffic shaping.
1743 *
1744 * -----------------------------------------------------------------------------------
1745 *      I notice this method can also return errors from the queue disciplines,
1746 *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1747 *      be positive.
1748 *
1749 *      Regardless of the return value, the skb is consumed, so it is currently
1750 *      difficult to retry a send to this method.  (You can bump the ref count
1751 *      before sending to hold a reference for retry if you are careful.)
1752 *
1753 *      When calling this method, interrupts MUST be enabled.  This is because
1754 *      the BH enable code must have IRQs enabled so that it will not deadlock.
1755 *          --BLG
1756 */
1757int dev_queue_xmit(struct sk_buff *skb)
1758{
1759	struct net_device *dev = skb->dev;
1760	struct netdev_queue *txq;
1761	struct Qdisc *q;
1762	int rc = -ENOMEM;
1763
1764	/* GSO will handle the following emulations directly. */
1765	if (netif_needs_gso(dev, skb))
1766		goto gso;
1767
1768	if (skb_shinfo(skb)->frag_list &&
1769	    !(dev->features & NETIF_F_FRAGLIST) &&
1770	    __skb_linearize(skb))
1771		goto out_kfree_skb;
1772
1773	/* Fragmented skb is linearized if device does not support SG,
1774	 * or if at least one of fragments is in highmem and device
1775	 * does not support DMA from it.
1776	 */
1777	if (skb_shinfo(skb)->nr_frags &&
1778	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1779	    __skb_linearize(skb))
1780		goto out_kfree_skb;
1781
1782	/* If packet is not checksummed and device does not support
1783	 * checksumming for this protocol, complete checksumming here.
1784	 */
1785	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1786		skb_set_transport_header(skb, skb->csum_start -
1787					      skb_headroom(skb));
1788		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1789			goto out_kfree_skb;
1790	}
1791
1792gso:
1793	/* Disable soft irqs for various locks below. Also
1794	 * stops preemption for RCU.
1795	 */
1796	rcu_read_lock_bh();
1797
1798	txq = dev_pick_tx(dev, skb);
1799	q = rcu_dereference(txq->qdisc);
1800
1801#ifdef CONFIG_NET_CLS_ACT
1802	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1803#endif
1804	if (q->enqueue) {
1805		spinlock_t *root_lock = qdisc_lock(q);
1806
1807		spin_lock(root_lock);
1808
1809		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1810			kfree_skb(skb);
1811			rc = NET_XMIT_DROP;
1812		} else {
1813			rc = qdisc_enqueue_root(skb, q);
1814			qdisc_run(q);
1815		}
1816		spin_unlock(root_lock);
1817
1818		goto out;
1819	}
1820
1821	/* The device has no queue. Common case for software devices:
1822	   loopback, all the sorts of tunnels...
1823
1824	   Really, it is unlikely that netif_tx_lock protection is necessary
1825	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1826	   counters.)
1827	   However, it is possible, that they rely on protection
1828	   made by us here.
1829
1830	   Check this and shot the lock. It is not prone from deadlocks.
1831	   Either shot noqueue qdisc, it is even simpler 8)
1832	 */
1833	if (dev->flags & IFF_UP) {
1834		int cpu = smp_processor_id(); /* ok because BHs are off */
1835
1836		if (txq->xmit_lock_owner != cpu) {
1837
1838			HARD_TX_LOCK(dev, txq, cpu);
1839
1840			if (!netif_tx_queue_stopped(txq)) {
1841				rc = 0;
1842				if (!dev_hard_start_xmit(skb, dev, txq)) {
1843					HARD_TX_UNLOCK(dev, txq);
1844					goto out;
1845				}
1846			}
1847			HARD_TX_UNLOCK(dev, txq);
1848			if (net_ratelimit())
1849				printk(KERN_CRIT "Virtual device %s asks to "
1850				       "queue packet!\n", dev->name);
1851		} else {
1852			/* Recursion is detected! It is possible,
1853			 * unfortunately */
1854			if (net_ratelimit())
1855				printk(KERN_CRIT "Dead loop on virtual device "
1856				       "%s, fix it urgently!\n", dev->name);
1857		}
1858	}
1859
1860	rc = -ENETDOWN;
1861	rcu_read_unlock_bh();
1862
1863out_kfree_skb:
1864	kfree_skb(skb);
1865	return rc;
1866out:
1867	rcu_read_unlock_bh();
1868	return rc;
1869}
1870
1871
1872/*=======================================================================
1873			Receiver routines
1874  =======================================================================*/
1875
1876int netdev_max_backlog __read_mostly = 1000;
1877int netdev_budget __read_mostly = 300;
1878int weight_p __read_mostly = 64;            /* old backlog weight */
1879
1880DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1881
1882
1883/**
1884 *	netif_rx	-	post buffer to the network code
1885 *	@skb: buffer to post
1886 *
1887 *	This function receives a packet from a device driver and queues it for
1888 *	the upper (protocol) levels to process.  It always succeeds. The buffer
1889 *	may be dropped during processing for congestion control or by the
1890 *	protocol layers.
1891 *
1892 *	return values:
1893 *	NET_RX_SUCCESS	(no congestion)
1894 *	NET_RX_DROP     (packet was dropped)
1895 *
1896 */
1897
1898int netif_rx(struct sk_buff *skb)
1899{
1900	struct softnet_data *queue;
1901	unsigned long flags;
1902
1903	/* if netpoll wants it, pretend we never saw it */
1904	if (netpoll_rx(skb))
1905		return NET_RX_DROP;
1906
1907	if (!skb->tstamp.tv64)
1908		net_timestamp(skb);
1909
1910	/*
1911	 * The code is rearranged so that the path is the most
1912	 * short when CPU is congested, but is still operating.
1913	 */
1914	local_irq_save(flags);
1915	queue = &__get_cpu_var(softnet_data);
1916
1917	__get_cpu_var(netdev_rx_stat).total++;
1918	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1919		if (queue->input_pkt_queue.qlen) {
1920enqueue:
1921			__skb_queue_tail(&queue->input_pkt_queue, skb);
1922			local_irq_restore(flags);
1923			return NET_RX_SUCCESS;
1924		}
1925
1926		napi_schedule(&queue->backlog);
1927		goto enqueue;
1928	}
1929
1930	__get_cpu_var(netdev_rx_stat).dropped++;
1931	local_irq_restore(flags);
1932
1933	kfree_skb(skb);
1934	return NET_RX_DROP;
1935}
1936
1937int netif_rx_ni(struct sk_buff *skb)
1938{
1939	int err;
1940
1941	preempt_disable();
1942	err = netif_rx(skb);
1943	if (local_softirq_pending())
1944		do_softirq();
1945	preempt_enable();
1946
1947	return err;
1948}
1949
1950EXPORT_SYMBOL(netif_rx_ni);
1951
1952static void net_tx_action(struct softirq_action *h)
1953{
1954	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1955
1956	if (sd->completion_queue) {
1957		struct sk_buff *clist;
1958
1959		local_irq_disable();
1960		clist = sd->completion_queue;
1961		sd->completion_queue = NULL;
1962		local_irq_enable();
1963
1964		while (clist) {
1965			struct sk_buff *skb = clist;
1966			clist = clist->next;
1967
1968			WARN_ON(atomic_read(&skb->users));
1969			__kfree_skb(skb);
1970		}
1971	}
1972
1973	if (sd->output_queue) {
1974		struct Qdisc *head;
1975
1976		local_irq_disable();
1977		head = sd->output_queue;
1978		sd->output_queue = NULL;
1979		local_irq_enable();
1980
1981		while (head) {
1982			struct Qdisc *q = head;
1983			spinlock_t *root_lock;
1984
1985			head = head->next_sched;
1986
1987			root_lock = qdisc_lock(q);
1988			if (spin_trylock(root_lock)) {
1989				smp_mb__before_clear_bit();
1990				clear_bit(__QDISC_STATE_SCHED,
1991					  &q->state);
1992				qdisc_run(q);
1993				spin_unlock(root_lock);
1994			} else {
1995				if (!test_bit(__QDISC_STATE_DEACTIVATED,
1996					      &q->state)) {
1997					__netif_reschedule(q);
1998				} else {
1999					smp_mb__before_clear_bit();
2000					clear_bit(__QDISC_STATE_SCHED,
2001						  &q->state);
2002				}
2003			}
2004		}
2005	}
2006}
2007
2008static inline int deliver_skb(struct sk_buff *skb,
2009			      struct packet_type *pt_prev,
2010			      struct net_device *orig_dev)
2011{
2012	atomic_inc(&skb->users);
2013	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2014}
2015
2016#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2017/* These hooks defined here for ATM */
2018struct net_bridge;
2019struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2020						unsigned char *addr);
2021void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2022
2023/*
2024 * If bridge module is loaded call bridging hook.
2025 *  returns NULL if packet was consumed.
2026 */
2027struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2028					struct sk_buff *skb) __read_mostly;
2029static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2030					    struct packet_type **pt_prev, int *ret,
2031					    struct net_device *orig_dev)
2032{
2033	struct net_bridge_port *port;
2034
2035	if (skb->pkt_type == PACKET_LOOPBACK ||
2036	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2037		return skb;
2038
2039	if (*pt_prev) {
2040		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2041		*pt_prev = NULL;
2042	}
2043
2044	return br_handle_frame_hook(port, skb);
2045}
2046#else
2047#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2048#endif
2049
2050#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2051struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2052EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2053
2054static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2055					     struct packet_type **pt_prev,
2056					     int *ret,
2057					     struct net_device *orig_dev)
2058{
2059	if (skb->dev->macvlan_port == NULL)
2060		return skb;
2061
2062	if (*pt_prev) {
2063		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2064		*pt_prev = NULL;
2065	}
2066	return macvlan_handle_frame_hook(skb);
2067}
2068#else
2069#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2070#endif
2071
2072#ifdef CONFIG_NET_CLS_ACT
2073/* TODO: Maybe we should just force sch_ingress to be compiled in
2074 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2075 * a compare and 2 stores extra right now if we dont have it on
2076 * but have CONFIG_NET_CLS_ACT
2077 * NOTE: This doesnt stop any functionality; if you dont have
2078 * the ingress scheduler, you just cant add policies on ingress.
2079 *
2080 */
2081static int ing_filter(struct sk_buff *skb)
2082{
2083	struct net_device *dev = skb->dev;
2084	u32 ttl = G_TC_RTTL(skb->tc_verd);
2085	struct netdev_queue *rxq;
2086	int result = TC_ACT_OK;
2087	struct Qdisc *q;
2088
2089	if (MAX_RED_LOOP < ttl++) {
2090		printk(KERN_WARNING
2091		       "Redir loop detected Dropping packet (%d->%d)\n",
2092		       skb->iif, dev->ifindex);
2093		return TC_ACT_SHOT;
2094	}
2095
2096	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2097	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2098
2099	rxq = &dev->rx_queue;
2100
2101	q = rxq->qdisc;
2102	if (q != &noop_qdisc) {
2103		spin_lock(qdisc_lock(q));
2104		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2105			result = qdisc_enqueue_root(skb, q);
2106		spin_unlock(qdisc_lock(q));
2107	}
2108
2109	return result;
2110}
2111
2112static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2113					 struct packet_type **pt_prev,
2114					 int *ret, struct net_device *orig_dev)
2115{
2116	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2117		goto out;
2118
2119	if (*pt_prev) {
2120		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2121		*pt_prev = NULL;
2122	} else {
2123		/* Huh? Why does turning on AF_PACKET affect this? */
2124		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2125	}
2126
2127	switch (ing_filter(skb)) {
2128	case TC_ACT_SHOT:
2129	case TC_ACT_STOLEN:
2130		kfree_skb(skb);
2131		return NULL;
2132	}
2133
2134out:
2135	skb->tc_verd = 0;
2136	return skb;
2137}
2138#endif
2139
2140/*
2141 * 	netif_nit_deliver - deliver received packets to network taps
2142 * 	@skb: buffer
2143 *
2144 * 	This function is used to deliver incoming packets to network
2145 * 	taps. It should be used when the normal netif_receive_skb path
2146 * 	is bypassed, for example because of VLAN acceleration.
2147 */
2148void netif_nit_deliver(struct sk_buff *skb)
2149{
2150	struct packet_type *ptype;
2151
2152	if (list_empty(&ptype_all))
2153		return;
2154
2155	skb_reset_network_header(skb);
2156	skb_reset_transport_header(skb);
2157	skb->mac_len = skb->network_header - skb->mac_header;
2158
2159	rcu_read_lock();
2160	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2161		if (!ptype->dev || ptype->dev == skb->dev)
2162			deliver_skb(skb, ptype, skb->dev);
2163	}
2164	rcu_read_unlock();
2165}
2166
2167/**
2168 *	netif_receive_skb - process receive buffer from network
2169 *	@skb: buffer to process
2170 *
2171 *	netif_receive_skb() is the main receive data processing function.
2172 *	It always succeeds. The buffer may be dropped during processing
2173 *	for congestion control or by the protocol layers.
2174 *
2175 *	This function may only be called from softirq context and interrupts
2176 *	should be enabled.
2177 *
2178 *	Return values (usually ignored):
2179 *	NET_RX_SUCCESS: no congestion
2180 *	NET_RX_DROP: packet was dropped
2181 */
2182int netif_receive_skb(struct sk_buff *skb)
2183{
2184	struct packet_type *ptype, *pt_prev;
2185	struct net_device *orig_dev;
2186	struct net_device *null_or_orig;
2187	int ret = NET_RX_DROP;
2188	__be16 type;
2189
2190	/* if we've gotten here through NAPI, check netpoll */
2191	if (netpoll_receive_skb(skb))
2192		return NET_RX_DROP;
2193
2194	if (!skb->tstamp.tv64)
2195		net_timestamp(skb);
2196
2197	if (!skb->iif)
2198		skb->iif = skb->dev->ifindex;
2199
2200	null_or_orig = NULL;
2201	orig_dev = skb->dev;
2202	if (orig_dev->master) {
2203		if (skb_bond_should_drop(skb))
2204			null_or_orig = orig_dev; /* deliver only exact match */
2205		else
2206			skb->dev = orig_dev->master;
2207	}
2208
2209	__get_cpu_var(netdev_rx_stat).total++;
2210
2211	skb_reset_network_header(skb);
2212	skb_reset_transport_header(skb);
2213	skb->mac_len = skb->network_header - skb->mac_header;
2214
2215	pt_prev = NULL;
2216
2217	rcu_read_lock();
2218
2219	/* Don't receive packets in an exiting network namespace */
2220	if (!net_alive(dev_net(skb->dev)))
2221		goto out;
2222
2223#ifdef CONFIG_NET_CLS_ACT
2224	if (skb->tc_verd & TC_NCLS) {
2225		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2226		goto ncls;
2227	}
2228#endif
2229
2230	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2231		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2232		    ptype->dev == orig_dev) {
2233			if (pt_prev)
2234				ret = deliver_skb(skb, pt_prev, orig_dev);
2235			pt_prev = ptype;
2236		}
2237	}
2238
2239#ifdef CONFIG_NET_CLS_ACT
2240	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2241	if (!skb)
2242		goto out;
2243ncls:
2244#endif
2245
2246	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2247	if (!skb)
2248		goto out;
2249	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2250	if (!skb)
2251		goto out;
2252
2253	type = skb->protocol;
2254	list_for_each_entry_rcu(ptype,
2255			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2256		if (ptype->type == type &&
2257		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2258		     ptype->dev == orig_dev)) {
2259			if (pt_prev)
2260				ret = deliver_skb(skb, pt_prev, orig_dev);
2261			pt_prev = ptype;
2262		}
2263	}
2264
2265	if (pt_prev) {
2266		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2267	} else {
2268		kfree_skb(skb);
2269		/* Jamal, now you will not able to escape explaining
2270		 * me how you were going to use this. :-)
2271		 */
2272		ret = NET_RX_DROP;
2273	}
2274
2275out:
2276	rcu_read_unlock();
2277	return ret;
2278}
2279
2280/* Network device is going away, flush any packets still pending  */
2281static void flush_backlog(void *arg)
2282{
2283	struct net_device *dev = arg;
2284	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2285	struct sk_buff *skb, *tmp;
2286
2287	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2288		if (skb->dev == dev) {
2289			__skb_unlink(skb, &queue->input_pkt_queue);
2290			kfree_skb(skb);
2291		}
2292}
2293
2294static int process_backlog(struct napi_struct *napi, int quota)
2295{
2296	int work = 0;
2297	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2298	unsigned long start_time = jiffies;
2299
2300	napi->weight = weight_p;
2301	do {
2302		struct sk_buff *skb;
2303
2304		local_irq_disable();
2305		skb = __skb_dequeue(&queue->input_pkt_queue);
2306		if (!skb) {
2307			__napi_complete(napi);
2308			local_irq_enable();
2309			break;
2310		}
2311		local_irq_enable();
2312
2313		netif_receive_skb(skb);
2314	} while (++work < quota && jiffies == start_time);
2315
2316	return work;
2317}
2318
2319/**
2320 * __napi_schedule - schedule for receive
2321 * @n: entry to schedule
2322 *
2323 * The entry's receive function will be scheduled to run
2324 */
2325void __napi_schedule(struct napi_struct *n)
2326{
2327	unsigned long flags;
2328
2329	local_irq_save(flags);
2330	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2331	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2332	local_irq_restore(flags);
2333}
2334EXPORT_SYMBOL(__napi_schedule);
2335
2336
2337static void net_rx_action(struct softirq_action *h)
2338{
2339	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2340	unsigned long start_time = jiffies;
2341	int budget = netdev_budget;
2342	void *have;
2343
2344	local_irq_disable();
2345
2346	while (!list_empty(list)) {
2347		struct napi_struct *n;
2348		int work, weight;
2349
2350		/* If softirq window is exhuasted then punt.
2351		 *
2352		 * Note that this is a slight policy change from the
2353		 * previous NAPI code, which would allow up to 2
2354		 * jiffies to pass before breaking out.  The test
2355		 * used to be "jiffies - start_time > 1".
2356		 */
2357		if (unlikely(budget <= 0 || jiffies != start_time))
2358			goto softnet_break;
2359
2360		local_irq_enable();
2361
2362		/* Even though interrupts have been re-enabled, this
2363		 * access is safe because interrupts can only add new
2364		 * entries to the tail of this list, and only ->poll()
2365		 * calls can remove this head entry from the list.
2366		 */
2367		n = list_entry(list->next, struct napi_struct, poll_list);
2368
2369		have = netpoll_poll_lock(n);
2370
2371		weight = n->weight;
2372
2373		/* This NAPI_STATE_SCHED test is for avoiding a race
2374		 * with netpoll's poll_napi().  Only the entity which
2375		 * obtains the lock and sees NAPI_STATE_SCHED set will
2376		 * actually make the ->poll() call.  Therefore we avoid
2377		 * accidently calling ->poll() when NAPI is not scheduled.
2378		 */
2379		work = 0;
2380		if (test_bit(NAPI_STATE_SCHED, &n->state))
2381			work = n->poll(n, weight);
2382
2383		WARN_ON_ONCE(work > weight);
2384
2385		budget -= work;
2386
2387		local_irq_disable();
2388
2389		/* Drivers must not modify the NAPI state if they
2390		 * consume the entire weight.  In such cases this code
2391		 * still "owns" the NAPI instance and therefore can
2392		 * move the instance around on the list at-will.
2393		 */
2394		if (unlikely(work == weight)) {
2395			if (unlikely(napi_disable_pending(n)))
2396				__napi_complete(n);
2397			else
2398				list_move_tail(&n->poll_list, list);
2399		}
2400
2401		netpoll_poll_unlock(have);
2402	}
2403out:
2404	local_irq_enable();
2405
2406#ifdef CONFIG_NET_DMA
2407	/*
2408	 * There may not be any more sk_buffs coming right now, so push
2409	 * any pending DMA copies to hardware
2410	 */
2411	if (!cpus_empty(net_dma.channel_mask)) {
2412		int chan_idx;
2413		for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2414			struct dma_chan *chan = net_dma.channels[chan_idx];
2415			if (chan)
2416				dma_async_memcpy_issue_pending(chan);
2417		}
2418	}
2419#endif
2420
2421	return;
2422
2423softnet_break:
2424	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2425	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2426	goto out;
2427}
2428
2429static gifconf_func_t * gifconf_list [NPROTO];
2430
2431/**
2432 *	register_gifconf	-	register a SIOCGIF handler
2433 *	@family: Address family
2434 *	@gifconf: Function handler
2435 *
2436 *	Register protocol dependent address dumping routines. The handler
2437 *	that is passed must not be freed or reused until it has been replaced
2438 *	by another handler.
2439 */
2440int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2441{
2442	if (family >= NPROTO)
2443		return -EINVAL;
2444	gifconf_list[family] = gifconf;
2445	return 0;
2446}
2447
2448
2449/*
2450 *	Map an interface index to its name (SIOCGIFNAME)
2451 */
2452
2453/*
2454 *	We need this ioctl for efficient implementation of the
2455 *	if_indextoname() function required by the IPv6 API.  Without
2456 *	it, we would have to search all the interfaces to find a
2457 *	match.  --pb
2458 */
2459
2460static int dev_ifname(struct net *net, struct ifreq __user *arg)
2461{
2462	struct net_device *dev;
2463	struct ifreq ifr;
2464
2465	/*
2466	 *	Fetch the caller's info block.
2467	 */
2468
2469	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2470		return -EFAULT;
2471
2472	read_lock(&dev_base_lock);
2473	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2474	if (!dev) {
2475		read_unlock(&dev_base_lock);
2476		return -ENODEV;
2477	}
2478
2479	strcpy(ifr.ifr_name, dev->name);
2480	read_unlock(&dev_base_lock);
2481
2482	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2483		return -EFAULT;
2484	return 0;
2485}
2486
2487/*
2488 *	Perform a SIOCGIFCONF call. This structure will change
2489 *	size eventually, and there is nothing I can do about it.
2490 *	Thus we will need a 'compatibility mode'.
2491 */
2492
2493static int dev_ifconf(struct net *net, char __user *arg)
2494{
2495	struct ifconf ifc;
2496	struct net_device *dev;
2497	char __user *pos;
2498	int len;
2499	int total;
2500	int i;
2501
2502	/*
2503	 *	Fetch the caller's info block.
2504	 */
2505
2506	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2507		return -EFAULT;
2508
2509	pos = ifc.ifc_buf;
2510	len = ifc.ifc_len;
2511
2512	/*
2513	 *	Loop over the interfaces, and write an info block for each.
2514	 */
2515
2516	total = 0;
2517	for_each_netdev(net, dev) {
2518		for (i = 0; i < NPROTO; i++) {
2519			if (gifconf_list[i]) {
2520				int done;
2521				if (!pos)
2522					done = gifconf_list[i](dev, NULL, 0);
2523				else
2524					done = gifconf_list[i](dev, pos + total,
2525							       len - total);
2526				if (done < 0)
2527					return -EFAULT;
2528				total += done;
2529			}
2530		}
2531	}
2532
2533	/*
2534	 *	All done.  Write the updated control block back to the caller.
2535	 */
2536	ifc.ifc_len = total;
2537
2538	/*
2539	 * 	Both BSD and Solaris return 0 here, so we do too.
2540	 */
2541	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2542}
2543
2544#ifdef CONFIG_PROC_FS
2545/*
2546 *	This is invoked by the /proc filesystem handler to display a device
2547 *	in detail.
2548 */
2549void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2550	__acquires(dev_base_lock)
2551{
2552	struct net *net = seq_file_net(seq);
2553	loff_t off;
2554	struct net_device *dev;
2555
2556	read_lock(&dev_base_lock);
2557	if (!*pos)
2558		return SEQ_START_TOKEN;
2559
2560	off = 1;
2561	for_each_netdev(net, dev)
2562		if (off++ == *pos)
2563			return dev;
2564
2565	return NULL;
2566}
2567
2568void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2569{
2570	struct net *net = seq_file_net(seq);
2571	++*pos;
2572	return v == SEQ_START_TOKEN ?
2573		first_net_device(net) : next_net_device((struct net_device *)v);
2574}
2575
2576void dev_seq_stop(struct seq_file *seq, void *v)
2577	__releases(dev_base_lock)
2578{
2579	read_unlock(&dev_base_lock);
2580}
2581
2582static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2583{
2584	struct net_device_stats *stats = dev->get_stats(dev);
2585
2586	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2587		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2588		   dev->name, stats->rx_bytes, stats->rx_packets,
2589		   stats->rx_errors,
2590		   stats->rx_dropped + stats->rx_missed_errors,
2591		   stats->rx_fifo_errors,
2592		   stats->rx_length_errors + stats->rx_over_errors +
2593		    stats->rx_crc_errors + stats->rx_frame_errors,
2594		   stats->rx_compressed, stats->multicast,
2595		   stats->tx_bytes, stats->tx_packets,
2596		   stats->tx_errors, stats->tx_dropped,
2597		   stats->tx_fifo_errors, stats->collisions,
2598		   stats->tx_carrier_errors +
2599		    stats->tx_aborted_errors +
2600		    stats->tx_window_errors +
2601		    stats->tx_heartbeat_errors,
2602		   stats->tx_compressed);
2603}
2604
2605/*
2606 *	Called from the PROCfs module. This now uses the new arbitrary sized
2607 *	/proc/net interface to create /proc/net/dev
2608 */
2609static int dev_seq_show(struct seq_file *seq, void *v)
2610{
2611	if (v == SEQ_START_TOKEN)
2612		seq_puts(seq, "Inter-|   Receive                            "
2613			      "                    |  Transmit\n"
2614			      " face |bytes    packets errs drop fifo frame "
2615			      "compressed multicast|bytes    packets errs "
2616			      "drop fifo colls carrier compressed\n");
2617	else
2618		dev_seq_printf_stats(seq, v);
2619	return 0;
2620}
2621
2622static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2623{
2624	struct netif_rx_stats *rc = NULL;
2625
2626	while (*pos < nr_cpu_ids)
2627		if (cpu_online(*pos)) {
2628			rc = &per_cpu(netdev_rx_stat, *pos);
2629			break;
2630		} else
2631			++*pos;
2632	return rc;
2633}
2634
2635static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2636{
2637	return softnet_get_online(pos);
2638}
2639
2640static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2641{
2642	++*pos;
2643	return softnet_get_online(pos);
2644}
2645
2646static void softnet_seq_stop(struct seq_file *seq, void *v)
2647{
2648}
2649
2650static int softnet_seq_show(struct seq_file *seq, void *v)
2651{
2652	struct netif_rx_stats *s = v;
2653
2654	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2655		   s->total, s->dropped, s->time_squeeze, 0,
2656		   0, 0, 0, 0, /* was fastroute */
2657		   s->cpu_collision );
2658	return 0;
2659}
2660
2661static const struct seq_operations dev_seq_ops = {
2662	.start = dev_seq_start,
2663	.next  = dev_seq_next,
2664	.stop  = dev_seq_stop,
2665	.show  = dev_seq_show,
2666};
2667
2668static int dev_seq_open(struct inode *inode, struct file *file)
2669{
2670	return seq_open_net(inode, file, &dev_seq_ops,
2671			    sizeof(struct seq_net_private));
2672}
2673
2674static const struct file_operations dev_seq_fops = {
2675	.owner	 = THIS_MODULE,
2676	.open    = dev_seq_open,
2677	.read    = seq_read,
2678	.llseek  = seq_lseek,
2679	.release = seq_release_net,
2680};
2681
2682static const struct seq_operations softnet_seq_ops = {
2683	.start = softnet_seq_start,
2684	.next  = softnet_seq_next,
2685	.stop  = softnet_seq_stop,
2686	.show  = softnet_seq_show,
2687};
2688
2689static int softnet_seq_open(struct inode *inode, struct file *file)
2690{
2691	return seq_open(file, &softnet_seq_ops);
2692}
2693
2694static const struct file_operations softnet_seq_fops = {
2695	.owner	 = THIS_MODULE,
2696	.open    = softnet_seq_open,
2697	.read    = seq_read,
2698	.llseek  = seq_lseek,
2699	.release = seq_release,
2700};
2701
2702static void *ptype_get_idx(loff_t pos)
2703{
2704	struct packet_type *pt = NULL;
2705	loff_t i = 0;
2706	int t;
2707
2708	list_for_each_entry_rcu(pt, &ptype_all, list) {
2709		if (i == pos)
2710			return pt;
2711		++i;
2712	}
2713
2714	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2715		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2716			if (i == pos)
2717				return pt;
2718			++i;
2719		}
2720	}
2721	return NULL;
2722}
2723
2724static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2725	__acquires(RCU)
2726{
2727	rcu_read_lock();
2728	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2729}
2730
2731static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2732{
2733	struct packet_type *pt;
2734	struct list_head *nxt;
2735	int hash;
2736
2737	++*pos;
2738	if (v == SEQ_START_TOKEN)
2739		return ptype_get_idx(0);
2740
2741	pt = v;
2742	nxt = pt->list.next;
2743	if (pt->type == htons(ETH_P_ALL)) {
2744		if (nxt != &ptype_all)
2745			goto found;
2746		hash = 0;
2747		nxt = ptype_base[0].next;
2748	} else
2749		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2750
2751	while (nxt == &ptype_base[hash]) {
2752		if (++hash >= PTYPE_HASH_SIZE)
2753			return NULL;
2754		nxt = ptype_base[hash].next;
2755	}
2756found:
2757	return list_entry(nxt, struct packet_type, list);
2758}
2759
2760static void ptype_seq_stop(struct seq_file *seq, void *v)
2761	__releases(RCU)
2762{
2763	rcu_read_unlock();
2764}
2765
2766static void ptype_seq_decode(struct seq_file *seq, void *sym)
2767{
2768#ifdef CONFIG_KALLSYMS
2769	unsigned long offset = 0, symsize;
2770	const char *symname;
2771	char *modname;
2772	char namebuf[128];
2773
2774	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2775				  &modname, namebuf);
2776
2777	if (symname) {
2778		char *delim = ":";
2779
2780		if (!modname)
2781			modname = delim = "";
2782		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2783			   symname, offset);
2784		return;
2785	}
2786#endif
2787
2788	seq_printf(seq, "[%p]", sym);
2789}
2790
2791static int ptype_seq_show(struct seq_file *seq, void *v)
2792{
2793	struct packet_type *pt = v;
2794
2795	if (v == SEQ_START_TOKEN)
2796		seq_puts(seq, "Type Device      Function\n");
2797	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2798		if (pt->type == htons(ETH_P_ALL))
2799			seq_puts(seq, "ALL ");
2800		else
2801			seq_printf(seq, "%04x", ntohs(pt->type));
2802
2803		seq_printf(seq, " %-8s ",
2804			   pt->dev ? pt->dev->name : "");
2805		ptype_seq_decode(seq,  pt->func);
2806		seq_putc(seq, '\n');
2807	}
2808
2809	return 0;
2810}
2811
2812static const struct seq_operations ptype_seq_ops = {
2813	.start = ptype_seq_start,
2814	.next  = ptype_seq_next,
2815	.stop  = ptype_seq_stop,
2816	.show  = ptype_seq_show,
2817};
2818
2819static int ptype_seq_open(struct inode *inode, struct file *file)
2820{
2821	return seq_open_net(inode, file, &ptype_seq_ops,
2822			sizeof(struct seq_net_private));
2823}
2824
2825static const struct file_operations ptype_seq_fops = {
2826	.owner	 = THIS_MODULE,
2827	.open    = ptype_seq_open,
2828	.read    = seq_read,
2829	.llseek  = seq_lseek,
2830	.release = seq_release_net,
2831};
2832
2833
2834static int __net_init dev_proc_net_init(struct net *net)
2835{
2836	int rc = -ENOMEM;
2837
2838	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2839		goto out;
2840	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2841		goto out_dev;
2842	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2843		goto out_softnet;
2844
2845	if (wext_proc_init(net))
2846		goto out_ptype;
2847	rc = 0;
2848out:
2849	return rc;
2850out_ptype:
2851	proc_net_remove(net, "ptype");
2852out_softnet:
2853	proc_net_remove(net, "softnet_stat");
2854out_dev:
2855	proc_net_remove(net, "dev");
2856	goto out;
2857}
2858
2859static void __net_exit dev_proc_net_exit(struct net *net)
2860{
2861	wext_proc_exit(net);
2862
2863	proc_net_remove(net, "ptype");
2864	proc_net_remove(net, "softnet_stat");
2865	proc_net_remove(net, "dev");
2866}
2867
2868static struct pernet_operations __net_initdata dev_proc_ops = {
2869	.init = dev_proc_net_init,
2870	.exit = dev_proc_net_exit,
2871};
2872
2873static int __init dev_proc_init(void)
2874{
2875	return register_pernet_subsys(&dev_proc_ops);
2876}
2877#else
2878#define dev_proc_init() 0
2879#endif	/* CONFIG_PROC_FS */
2880
2881
2882/**
2883 *	netdev_set_master	-	set up master/slave pair
2884 *	@slave: slave device
2885 *	@master: new master device
2886 *
2887 *	Changes the master device of the slave. Pass %NULL to break the
2888 *	bonding. The caller must hold the RTNL semaphore. On a failure
2889 *	a negative errno code is returned. On success the reference counts
2890 *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2891 *	function returns zero.
2892 */
2893int netdev_set_master(struct net_device *slave, struct net_device *master)
2894{
2895	struct net_device *old = slave->master;
2896
2897	ASSERT_RTNL();
2898
2899	if (master) {
2900		if (old)
2901			return -EBUSY;
2902		dev_hold(master);
2903	}
2904
2905	slave->master = master;
2906
2907	synchronize_net();
2908
2909	if (old)
2910		dev_put(old);
2911
2912	if (master)
2913		slave->flags |= IFF_SLAVE;
2914	else
2915		slave->flags &= ~IFF_SLAVE;
2916
2917	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2918	return 0;
2919}
2920
2921static int __dev_set_promiscuity(struct net_device *dev, int inc)
2922{
2923	unsigned short old_flags = dev->flags;
2924
2925	ASSERT_RTNL();
2926
2927	dev->flags |= IFF_PROMISC;
2928	dev->promiscuity += inc;
2929	if (dev->promiscuity == 0) {
2930		/*
2931		 * Avoid overflow.
2932		 * If inc causes overflow, untouch promisc and return error.
2933		 */
2934		if (inc < 0)
2935			dev->flags &= ~IFF_PROMISC;
2936		else {
2937			dev->promiscuity -= inc;
2938			printk(KERN_WARNING "%s: promiscuity touches roof, "
2939				"set promiscuity failed, promiscuity feature "
2940				"of device might be broken.\n", dev->name);
2941			return -EOVERFLOW;
2942		}
2943	}
2944	if (dev->flags != old_flags) {
2945		printk(KERN_INFO "device %s %s promiscuous mode\n",
2946		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2947							       "left");
2948		if (audit_enabled)
2949			audit_log(current->audit_context, GFP_ATOMIC,
2950				AUDIT_ANOM_PROMISCUOUS,
2951				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2952				dev->name, (dev->flags & IFF_PROMISC),
2953				(old_flags & IFF_PROMISC),
2954				audit_get_loginuid(current),
2955				current->uid, current->gid,
2956				audit_get_sessionid(current));
2957
2958		if (dev->change_rx_flags)
2959			dev->change_rx_flags(dev, IFF_PROMISC);
2960	}
2961	return 0;
2962}
2963
2964/**
2965 *	dev_set_promiscuity	- update promiscuity count on a device
2966 *	@dev: device
2967 *	@inc: modifier
2968 *
2969 *	Add or remove promiscuity from a device. While the count in the device
2970 *	remains above zero the interface remains promiscuous. Once it hits zero
2971 *	the device reverts back to normal filtering operation. A negative inc
2972 *	value is used to drop promiscuity on the device.
2973 *	Return 0 if successful or a negative errno code on error.
2974 */
2975int dev_set_promiscuity(struct net_device *dev, int inc)
2976{
2977	unsigned short old_flags = dev->flags;
2978	int err;
2979
2980	err = __dev_set_promiscuity(dev, inc);
2981	if (err < 0)
2982		return err;
2983	if (dev->flags != old_flags)
2984		dev_set_rx_mode(dev);
2985	return err;
2986}
2987
2988/**
2989 *	dev_set_allmulti	- update allmulti count on a device
2990 *	@dev: device
2991 *	@inc: modifier
2992 *
2993 *	Add or remove reception of all multicast frames to a device. While the
2994 *	count in the device remains above zero the interface remains listening
2995 *	to all interfaces. Once it hits zero the device reverts back to normal
2996 *	filtering operation. A negative @inc value is used to drop the counter
2997 *	when releasing a resource needing all multicasts.
2998 *	Return 0 if successful or a negative errno code on error.
2999 */
3000
3001int dev_set_allmulti(struct net_device *dev, int inc)
3002{
3003	unsigned short old_flags = dev->flags;
3004
3005	ASSERT_RTNL();
3006
3007	dev->flags |= IFF_ALLMULTI;
3008	dev->allmulti += inc;
3009	if (dev->allmulti == 0) {
3010		/*
3011		 * Avoid overflow.
3012		 * If inc causes overflow, untouch allmulti and return error.
3013		 */
3014		if (inc < 0)
3015			dev->flags &= ~IFF_ALLMULTI;
3016		else {
3017			dev->allmulti -= inc;
3018			printk(KERN_WARNING "%s: allmulti touches roof, "
3019				"set allmulti failed, allmulti feature of "
3020				"device might be broken.\n", dev->name);
3021			return -EOVERFLOW;
3022		}
3023	}
3024	if (dev->flags ^ old_flags) {
3025		if (dev->change_rx_flags)
3026			dev->change_rx_flags(dev, IFF_ALLMULTI);
3027		dev_set_rx_mode(dev);
3028	}
3029	return 0;
3030}
3031
3032/*
3033 *	Upload unicast and multicast address lists to device and
3034 *	configure RX filtering. When the device doesn't support unicast
3035 *	filtering it is put in promiscuous mode while unicast addresses
3036 *	are present.
3037 */
3038void __dev_set_rx_mode(struct net_device *dev)
3039{
3040	/* dev_open will call this function so the list will stay sane. */
3041	if (!(dev->flags&IFF_UP))
3042		return;
3043
3044	if (!netif_device_present(dev))
3045		return;
3046
3047	if (dev->set_rx_mode)
3048		dev->set_rx_mode(dev);
3049	else {
3050		/* Unicast addresses changes may only happen under the rtnl,
3051		 * therefore calling __dev_set_promiscuity here is safe.
3052		 */
3053		if (dev->uc_count > 0 && !dev->uc_promisc) {
3054			__dev_set_promiscuity(dev, 1);
3055			dev->uc_promisc = 1;
3056		} else if (dev->uc_count == 0 && dev->uc_promisc) {
3057			__dev_set_promiscuity(dev, -1);
3058			dev->uc_promisc = 0;
3059		}
3060
3061		if (dev->set_multicast_list)
3062			dev->set_multicast_list(dev);
3063	}
3064}
3065
3066void dev_set_rx_mode(struct net_device *dev)
3067{
3068	netif_addr_lock_bh(dev);
3069	__dev_set_rx_mode(dev);
3070	netif_addr_unlock_bh(dev);
3071}
3072
3073int __dev_addr_delete(struct dev_addr_list **list, int *count,
3074		      void *addr, int alen, int glbl)
3075{
3076	struct dev_addr_list *da;
3077
3078	for (; (da = *list) != NULL; list = &da->next) {
3079		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3080		    alen == da->da_addrlen) {
3081			if (glbl) {
3082				int old_glbl = da->da_gusers;
3083				da->da_gusers = 0;
3084				if (old_glbl == 0)
3085					break;
3086			}
3087			if (--da->da_users)
3088				return 0;
3089
3090			*list = da->next;
3091			kfree(da);
3092			(*count)--;
3093			return 0;
3094		}
3095	}
3096	return -ENOENT;
3097}
3098
3099int __dev_addr_add(struct dev_addr_list **list, int *count,
3100		   void *addr, int alen, int glbl)
3101{
3102	struct dev_addr_list *da;
3103
3104	for (da = *list; da != NULL; da = da->next) {
3105		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3106		    da->da_addrlen == alen) {
3107			if (glbl) {
3108				int old_glbl = da->da_gusers;
3109				da->da_gusers = 1;
3110				if (old_glbl)
3111					return 0;
3112			}
3113			da->da_users++;
3114			return 0;
3115		}
3116	}
3117
3118	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3119	if (da == NULL)
3120		return -ENOMEM;
3121	memcpy(da->da_addr, addr, alen);
3122	da->da_addrlen = alen;
3123	da->da_users = 1;
3124	da->da_gusers = glbl ? 1 : 0;
3125	da->next = *list;
3126	*list = da;
3127	(*count)++;
3128	return 0;
3129}
3130
3131/**
3132 *	dev_unicast_delete	- Release secondary unicast address.
3133 *	@dev: device
3134 *	@addr: address to delete
3135 *	@alen: length of @addr
3136 *
3137 *	Release reference to a secondary unicast address and remove it
3138 *	from the device if the reference count drops to zero.
3139 *
3140 * 	The caller must hold the rtnl_mutex.
3141 */
3142int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3143{
3144	int err;
3145
3146	ASSERT_RTNL();
3147
3148	netif_addr_lock_bh(dev);
3149	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3150	if (!err)
3151		__dev_set_rx_mode(dev);
3152	netif_addr_unlock_bh(dev);
3153	return err;
3154}
3155EXPORT_SYMBOL(dev_unicast_delete);
3156
3157/**
3158 *	dev_unicast_add		- add a secondary unicast address
3159 *	@dev: device
3160 *	@addr: address to add
3161 *	@alen: length of @addr
3162 *
3163 *	Add a secondary unicast address to the device or increase
3164 *	the reference count if it already exists.
3165 *
3166 *	The caller must hold the rtnl_mutex.
3167 */
3168int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3169{
3170	int err;
3171
3172	ASSERT_RTNL();
3173
3174	netif_addr_lock_bh(dev);
3175	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3176	if (!err)
3177		__dev_set_rx_mode(dev);
3178	netif_addr_unlock_bh(dev);
3179	return err;
3180}
3181EXPORT_SYMBOL(dev_unicast_add);
3182
3183int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3184		    struct dev_addr_list **from, int *from_count)
3185{
3186	struct dev_addr_list *da, *next;
3187	int err = 0;
3188
3189	da = *from;
3190	while (da != NULL) {
3191		next = da->next;
3192		if (!da->da_synced) {
3193			err = __dev_addr_add(to, to_count,
3194					     da->da_addr, da->da_addrlen, 0);
3195			if (err < 0)
3196				break;
3197			da->da_synced = 1;
3198			da->da_users++;
3199		} else if (da->da_users == 1) {
3200			__dev_addr_delete(to, to_count,
3201					  da->da_addr, da->da_addrlen, 0);
3202			__dev_addr_delete(from, from_count,
3203					  da->da_addr, da->da_addrlen, 0);
3204		}
3205		da = next;
3206	}
3207	return err;
3208}
3209
3210void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3211		       struct dev_addr_list **from, int *from_count)
3212{
3213	struct dev_addr_list *da, *next;
3214
3215	da = *from;
3216	while (da != NULL) {
3217		next = da->next;
3218		if (da->da_synced) {
3219			__dev_addr_delete(to, to_count,
3220					  da->da_addr, da->da_addrlen, 0);
3221			da->da_synced = 0;
3222			__dev_addr_delete(from, from_count,
3223					  da->da_addr, da->da_addrlen, 0);
3224		}
3225		da = next;
3226	}
3227}
3228
3229/**
3230 *	dev_unicast_sync - Synchronize device's unicast list to another device
3231 *	@to: destination device
3232 *	@from: source device
3233 *
3234 *	Add newly added addresses to the destination device and release
3235 *	addresses that have no users left. The source device must be
3236 *	locked by netif_tx_lock_bh.
3237 *
3238 *	This function is intended to be called from the dev->set_rx_mode
3239 *	function of layered software devices.
3240 */
3241int dev_unicast_sync(struct net_device *to, struct net_device *from)
3242{
3243	int err = 0;
3244
3245	netif_addr_lock_bh(to);
3246	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3247			      &from->uc_list, &from->uc_count);
3248	if (!err)
3249		__dev_set_rx_mode(to);
3250	netif_addr_unlock_bh(to);
3251	return err;
3252}
3253EXPORT_SYMBOL(dev_unicast_sync);
3254
3255/**
3256 *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3257 *	@to: destination device
3258 *	@from: source device
3259 *
3260 *	Remove all addresses that were added to the destination device by
3261 *	dev_unicast_sync(). This function is intended to be called from the
3262 *	dev->stop function of layered software devices.
3263 */
3264void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3265{
3266	netif_addr_lock_bh(from);
3267	netif_addr_lock(to);
3268
3269	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3270			  &from->uc_list, &from->uc_count);
3271	__dev_set_rx_mode(to);
3272
3273	netif_addr_unlock(to);
3274	netif_addr_unlock_bh(from);
3275}
3276EXPORT_SYMBOL(dev_unicast_unsync);
3277
3278static void __dev_addr_discard(struct dev_addr_list **list)
3279{
3280	struct dev_addr_list *tmp;
3281
3282	while (*list != NULL) {
3283		tmp = *list;
3284		*list = tmp->next;
3285		if (tmp->da_users > tmp->da_gusers)
3286			printk("__dev_addr_discard: address leakage! "
3287			       "da_users=%d\n", tmp->da_users);
3288		kfree(tmp);
3289	}
3290}
3291
3292static void dev_addr_discard(struct net_device *dev)
3293{
3294	netif_addr_lock_bh(dev);
3295
3296	__dev_addr_discard(&dev->uc_list);
3297	dev->uc_count = 0;
3298
3299	__dev_addr_discard(&dev->mc_list);
3300	dev->mc_count = 0;
3301
3302	netif_addr_unlock_bh(dev);
3303}
3304
3305unsigned dev_get_flags(const struct net_device *dev)
3306{
3307	unsigned flags;
3308
3309	flags = (dev->flags & ~(IFF_PROMISC |
3310				IFF_ALLMULTI |
3311				IFF_RUNNING |
3312				IFF_LOWER_UP |
3313				IFF_DORMANT)) |
3314		(dev->gflags & (IFF_PROMISC |
3315				IFF_ALLMULTI));
3316
3317	if (netif_running(dev)) {
3318		if (netif_oper_up(dev))
3319			flags |= IFF_RUNNING;
3320		if (netif_carrier_ok(dev))
3321			flags |= IFF_LOWER_UP;
3322		if (netif_dormant(dev))
3323			flags |= IFF_DORMANT;
3324	}
3325
3326	return flags;
3327}
3328
3329int dev_change_flags(struct net_device *dev, unsigned flags)
3330{
3331	int ret, changes;
3332	int old_flags = dev->flags;
3333
3334	ASSERT_RTNL();
3335
3336	/*
3337	 *	Set the flags on our device.
3338	 */
3339
3340	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3341			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3342			       IFF_AUTOMEDIA)) |
3343		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3344				    IFF_ALLMULTI));
3345
3346	/*
3347	 *	Load in the correct multicast list now the flags have changed.
3348	 */
3349
3350	if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
3351		dev->change_rx_flags(dev, IFF_MULTICAST);
3352
3353	dev_set_rx_mode(dev);
3354
3355	/*
3356	 *	Have we downed the interface. We handle IFF_UP ourselves
3357	 *	according to user attempts to set it, rather than blindly
3358	 *	setting it.
3359	 */
3360
3361	ret = 0;
3362	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3363		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3364
3365		if (!ret)
3366			dev_set_rx_mode(dev);
3367	}
3368
3369	if (dev->flags & IFF_UP &&
3370	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3371					  IFF_VOLATILE)))
3372		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3373
3374	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3375		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3376		dev->gflags ^= IFF_PROMISC;
3377		dev_set_promiscuity(dev, inc);
3378	}
3379
3380	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3381	   is important. Some (broken) drivers set IFF_PROMISC, when
3382	   IFF_ALLMULTI is requested not asking us and not reporting.
3383	 */
3384	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3385		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3386		dev->gflags ^= IFF_ALLMULTI;
3387		dev_set_allmulti(dev, inc);
3388	}
3389
3390	/* Exclude state transition flags, already notified */
3391	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3392	if (changes)
3393		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3394
3395	return ret;
3396}
3397
3398int dev_set_mtu(struct net_device *dev, int new_mtu)
3399{
3400	int err;
3401
3402	if (new_mtu == dev->mtu)
3403		return 0;
3404
3405	/*	MTU must be positive.	 */
3406	if (new_mtu < 0)
3407		return -EINVAL;
3408
3409	if (!netif_device_present(dev))
3410		return -ENODEV;
3411
3412	err = 0;
3413	if (dev->change_mtu)
3414		err = dev->change_mtu(dev, new_mtu);
3415	else
3416		dev->mtu = new_mtu;
3417	if (!err && dev->flags & IFF_UP)
3418		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3419	return err;
3420}
3421
3422int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3423{
3424	int err;
3425
3426	if (!dev->set_mac_address)
3427		return -EOPNOTSUPP;
3428	if (sa->sa_family != dev->type)
3429		return -EINVAL;
3430	if (!netif_device_present(dev))
3431		return -ENODEV;
3432	err = dev->set_mac_address(dev, sa);
3433	if (!err)
3434		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3435	return err;
3436}
3437
3438/*
3439 *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3440 */
3441static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3442{
3443	int err;
3444	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3445
3446	if (!dev)
3447		return -ENODEV;
3448
3449	switch (cmd) {
3450		case SIOCGIFFLAGS:	/* Get interface flags */
3451			ifr->ifr_flags = dev_get_flags(dev);
3452			return 0;
3453
3454		case SIOCGIFMETRIC:	/* Get the metric on the interface
3455					   (currently unused) */
3456			ifr->ifr_metric = 0;
3457			return 0;
3458
3459		case SIOCGIFMTU:	/* Get the MTU of a device */
3460			ifr->ifr_mtu = dev->mtu;
3461			return 0;
3462
3463		case SIOCGIFHWADDR:
3464			if (!dev->addr_len)
3465				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3466			else
3467				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3468				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3469			ifr->ifr_hwaddr.sa_family = dev->type;
3470			return 0;
3471
3472		case SIOCGIFSLAVE:
3473			err = -EINVAL;
3474			break;
3475
3476		case SIOCGIFMAP:
3477			ifr->ifr_map.mem_start = dev->mem_start;
3478			ifr->ifr_map.mem_end   = dev->mem_end;
3479			ifr->ifr_map.base_addr = dev->base_addr;
3480			ifr->ifr_map.irq       = dev->irq;
3481			ifr->ifr_map.dma       = dev->dma;
3482			ifr->ifr_map.port      = dev->if_port;
3483			return 0;
3484
3485		case SIOCGIFINDEX:
3486			ifr->ifr_ifindex = dev->ifindex;
3487			return 0;
3488
3489		case SIOCGIFTXQLEN:
3490			ifr->ifr_qlen = dev->tx_queue_len;
3491			return 0;
3492
3493		default:
3494			/* dev_ioctl() should ensure this case
3495			 * is never reached
3496			 */
3497			WARN_ON(1);
3498			err = -EINVAL;
3499			break;
3500
3501	}
3502	return err;
3503}
3504
3505/*
3506 *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3507 */
3508static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3509{
3510	int err;
3511	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3512
3513	if (!dev)
3514		return -ENODEV;
3515
3516	switch (cmd) {
3517		case SIOCSIFFLAGS:	/* Set interface flags */
3518			return dev_change_flags(dev, ifr->ifr_flags);
3519
3520		case SIOCSIFMETRIC:	/* Set the metric on the interface
3521					   (currently unused) */
3522			return -EOPNOTSUPP;
3523
3524		case SIOCSIFMTU:	/* Set the MTU of a device */
3525			return dev_set_mtu(dev, ifr->ifr_mtu);
3526
3527		case SIOCSIFHWADDR:
3528			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3529
3530		case SIOCSIFHWBROADCAST:
3531			if (ifr->ifr_hwaddr.sa_family != dev->type)
3532				return -EINVAL;
3533			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3534			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3535			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3536			return 0;
3537
3538		case SIOCSIFMAP:
3539			if (dev->set_config) {
3540				if (!netif_device_present(dev))
3541					return -ENODEV;
3542				return dev->set_config(dev, &ifr->ifr_map);
3543			}
3544			return -EOPNOTSUPP;
3545
3546		case SIOCADDMULTI:
3547			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3548			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3549				return -EINVAL;
3550			if (!netif_device_present(dev))
3551				return -ENODEV;
3552			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3553					  dev->addr_len, 1);
3554
3555		case SIOCDELMULTI:
3556			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3557			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3558				return -EINVAL;
3559			if (!netif_device_present(dev))
3560				return -ENODEV;
3561			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3562					     dev->addr_len, 1);
3563
3564		case SIOCSIFTXQLEN:
3565			if (ifr->ifr_qlen < 0)
3566				return -EINVAL;
3567			dev->tx_queue_len = ifr->ifr_qlen;
3568			return 0;
3569
3570		case SIOCSIFNAME:
3571			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3572			return dev_change_name(dev, ifr->ifr_newname);
3573
3574		/*
3575		 *	Unknown or private ioctl
3576		 */
3577
3578		default:
3579			if ((cmd >= SIOCDEVPRIVATE &&
3580			    cmd <= SIOCDEVPRIVATE + 15) ||
3581			    cmd == SIOCBONDENSLAVE ||
3582			    cmd == SIOCBONDRELEASE ||
3583			    cmd == SIOCBONDSETHWADDR ||
3584			    cmd == SIOCBONDSLAVEINFOQUERY ||
3585			    cmd == SIOCBONDINFOQUERY ||
3586			    cmd == SIOCBONDCHANGEACTIVE ||
3587			    cmd == SIOCGMIIPHY ||
3588			    cmd == SIOCGMIIREG ||
3589			    cmd == SIOCSMIIREG ||
3590			    cmd == SIOCBRADDIF ||
3591			    cmd == SIOCBRDELIF ||
3592			    cmd == SIOCWANDEV) {
3593				err = -EOPNOTSUPP;
3594				if (dev->do_ioctl) {
3595					if (netif_device_present(dev))
3596						err = dev->do_ioctl(dev, ifr,
3597								    cmd);
3598					else
3599						err = -ENODEV;
3600				}
3601			} else
3602				err = -EINVAL;
3603
3604	}
3605	return err;
3606}
3607
3608/*
3609 *	This function handles all "interface"-type I/O control requests. The actual
3610 *	'doing' part of this is dev_ifsioc above.
3611 */
3612
3613/**
3614 *	dev_ioctl	-	network device ioctl
3615 *	@net: the applicable net namespace
3616 *	@cmd: command to issue
3617 *	@arg: pointer to a struct ifreq in user space
3618 *
3619 *	Issue ioctl functions to devices. This is normally called by the
3620 *	user space syscall interfaces but can sometimes be useful for
3621 *	other purposes. The return value is the return from the syscall if
3622 *	positive or a negative errno code on error.
3623 */
3624
3625int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3626{
3627	struct ifreq ifr;
3628	int ret;
3629	char *colon;
3630
3631	/* One special case: SIOCGIFCONF takes ifconf argument
3632	   and requires shared lock, because it sleeps writing
3633	   to user space.
3634	 */
3635
3636	if (cmd == SIOCGIFCONF) {
3637		rtnl_lock();
3638		ret = dev_ifconf(net, (char __user *) arg);
3639		rtnl_unlock();
3640		return ret;
3641	}
3642	if (cmd == SIOCGIFNAME)
3643		return dev_ifname(net, (struct ifreq __user *)arg);
3644
3645	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3646		return -EFAULT;
3647
3648	ifr.ifr_name[IFNAMSIZ-1] = 0;
3649
3650	colon = strchr(ifr.ifr_name, ':');
3651	if (colon)
3652		*colon = 0;
3653
3654	/*
3655	 *	See which interface the caller is talking about.
3656	 */
3657
3658	switch (cmd) {
3659		/*
3660		 *	These ioctl calls:
3661		 *	- can be done by all.
3662		 *	- atomic and do not require locking.
3663		 *	- return a value
3664		 */
3665		case SIOCGIFFLAGS:
3666		case SIOCGIFMETRIC:
3667		case SIOCGIFMTU:
3668		case SIOCGIFHWADDR:
3669		case SIOCGIFSLAVE:
3670		case SIOCGIFMAP:
3671		case SIOCGIFINDEX:
3672		case SIOCGIFTXQLEN:
3673			dev_load(net, ifr.ifr_name);
3674			read_lock(&dev_base_lock);
3675			ret = dev_ifsioc_locked(net, &ifr, cmd);
3676			read_unlock(&dev_base_lock);
3677			if (!ret) {
3678				if (colon)
3679					*colon = ':';
3680				if (copy_to_user(arg, &ifr,
3681						 sizeof(struct ifreq)))
3682					ret = -EFAULT;
3683			}
3684			return ret;
3685
3686		case SIOCETHTOOL:
3687			dev_load(net, ifr.ifr_name);
3688			rtnl_lock();
3689			ret = dev_ethtool(net, &ifr);
3690			rtnl_unlock();
3691			if (!ret) {
3692				if (colon)
3693					*colon = ':';
3694				if (copy_to_user(arg, &ifr,
3695						 sizeof(struct ifreq)))
3696					ret = -EFAULT;
3697			}
3698			return ret;
3699
3700		/*
3701		 *	These ioctl calls:
3702		 *	- require superuser power.
3703		 *	- require strict serialization.
3704		 *	- return a value
3705		 */
3706		case SIOCGMIIPHY:
3707		case SIOCGMIIREG:
3708		case SIOCSIFNAME:
3709			if (!capable(CAP_NET_ADMIN))
3710				return -EPERM;
3711			dev_load(net, ifr.ifr_name);
3712			rtnl_lock();
3713			ret = dev_ifsioc(net, &ifr, cmd);
3714			rtnl_unlock();
3715			if (!ret) {
3716				if (colon)
3717					*colon = ':';
3718				if (copy_to_user(arg, &ifr,
3719						 sizeof(struct ifreq)))
3720					ret = -EFAULT;
3721			}
3722			return ret;
3723
3724		/*
3725		 *	These ioctl calls:
3726		 *	- require superuser power.
3727		 *	- require strict serialization.
3728		 *	- do not return a value
3729		 */
3730		case SIOCSIFFLAGS:
3731		case SIOCSIFMETRIC:
3732		case SIOCSIFMTU:
3733		case SIOCSIFMAP:
3734		case SIOCSIFHWADDR:
3735		case SIOCSIFSLAVE:
3736		case SIOCADDMULTI:
3737		case SIOCDELMULTI:
3738		case SIOCSIFHWBROADCAST:
3739		case SIOCSIFTXQLEN:
3740		case SIOCSMIIREG:
3741		case SIOCBONDENSLAVE:
3742		case SIOCBONDRELEASE:
3743		case SIOCBONDSETHWADDR:
3744		case SIOCBONDCHANGEACTIVE:
3745		case SIOCBRADDIF:
3746		case SIOCBRDELIF:
3747			if (!capable(CAP_NET_ADMIN))
3748				return -EPERM;
3749			/* fall through */
3750		case SIOCBONDSLAVEINFOQUERY:
3751		case SIOCBONDINFOQUERY:
3752			dev_load(net, ifr.ifr_name);
3753			rtnl_lock();
3754			ret = dev_ifsioc(net, &ifr, cmd);
3755			rtnl_unlock();
3756			return ret;
3757
3758		case SIOCGIFMEM:
3759			/* Get the per device memory space. We can add this but
3760			 * currently do not support it */
3761		case SIOCSIFMEM:
3762			/* Set the per device memory buffer space.
3763			 * Not applicable in our case */
3764		case SIOCSIFLINK:
3765			return -EINVAL;
3766
3767		/*
3768		 *	Unknown or private ioctl.
3769		 */
3770		default:
3771			if (cmd == SIOCWANDEV ||
3772			    (cmd >= SIOCDEVPRIVATE &&
3773			     cmd <= SIOCDEVPRIVATE + 15)) {
3774				dev_load(net, ifr.ifr_name);
3775				rtnl_lock();
3776				ret = dev_ifsioc(net, &ifr, cmd);
3777				rtnl_unlock();
3778				if (!ret && copy_to_user(arg, &ifr,
3779							 sizeof(struct ifreq)))
3780					ret = -EFAULT;
3781				return ret;
3782			}
3783			/* Take care of Wireless Extensions */
3784			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3785				return wext_handle_ioctl(net, &ifr, cmd, arg);
3786			return -EINVAL;
3787	}
3788}
3789
3790
3791/**
3792 *	dev_new_index	-	allocate an ifindex
3793 *	@net: the applicable net namespace
3794 *
3795 *	Returns a suitable unique value for a new device interface
3796 *	number.  The caller must hold the rtnl semaphore or the
3797 *	dev_base_lock to be sure it remains unique.
3798 */
3799static int dev_new_index(struct net *net)
3800{
3801	static int ifindex;
3802	for (;;) {
3803		if (++ifindex <= 0)
3804			ifindex = 1;
3805		if (!__dev_get_by_index(net, ifindex))
3806			return ifindex;
3807	}
3808}
3809
3810/* Delayed registration/unregisteration */
3811static DEFINE_SPINLOCK(net_todo_list_lock);
3812static LIST_HEAD(net_todo_list);
3813
3814static void net_set_todo(struct net_device *dev)
3815{
3816	spin_lock(&net_todo_list_lock);
3817	list_add_tail(&dev->todo_list, &net_todo_list);
3818	spin_unlock(&net_todo_list_lock);
3819}
3820
3821static void rollback_registered(struct net_device *dev)
3822{
3823	BUG_ON(dev_boot_phase);
3824	ASSERT_RTNL();
3825
3826	/* Some devices call without registering for initialization unwind. */
3827	if (dev->reg_state == NETREG_UNINITIALIZED) {
3828		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3829				  "was registered\n", dev->name, dev);
3830
3831		WARN_ON(1);
3832		return;
3833	}
3834
3835	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3836
3837	/* If device is running, close it first. */
3838	dev_close(dev);
3839
3840	/* And unlink it from device chain. */
3841	unlist_netdevice(dev);
3842
3843	dev->reg_state = NETREG_UNREGISTERING;
3844
3845	synchronize_net();
3846
3847	/* Shutdown queueing discipline. */
3848	dev_shutdown(dev);
3849
3850
3851	/* Notify protocols, that we are about to destroy
3852	   this device. They should clean all the things.
3853	*/
3854	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3855
3856	/*
3857	 *	Flush the unicast and multicast chains
3858	 */
3859	dev_addr_discard(dev);
3860
3861	if (dev->uninit)
3862		dev->uninit(dev);
3863
3864	/* Notifier chain MUST detach us from master device. */
3865	WARN_ON(dev->master);
3866
3867	/* Remove entries from kobject tree */
3868	netdev_unregister_kobject(dev);
3869
3870	synchronize_net();
3871
3872	dev_put(dev);
3873}
3874
3875static void __netdev_init_queue_locks_one(struct net_device *dev,
3876					  struct netdev_queue *dev_queue,
3877					  void *_unused)
3878{
3879	spin_lock_init(&dev_queue->_xmit_lock);
3880	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3881	dev_queue->xmit_lock_owner = -1;
3882}
3883
3884static void netdev_init_queue_locks(struct net_device *dev)
3885{
3886	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3887	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3888}
3889
3890/**
3891 *	register_netdevice	- register a network device
3892 *	@dev: device to register
3893 *
3894 *	Take a completed network device structure and add it to the kernel
3895 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3896 *	chain. 0 is returned on success. A negative errno code is returned
3897 *	on a failure to set up the device, or if the name is a duplicate.
3898 *
3899 *	Callers must hold the rtnl semaphore. You may want
3900 *	register_netdev() instead of this.
3901 *
3902 *	BUGS:
3903 *	The locking appears insufficient to guarantee two parallel registers
3904 *	will not get the same name.
3905 */
3906
3907int register_netdevice(struct net_device *dev)
3908{
3909	struct hlist_head *head;
3910	struct hlist_node *p;
3911	int ret;
3912	struct net *net;
3913
3914	BUG_ON(dev_boot_phase);
3915	ASSERT_RTNL();
3916
3917	might_sleep();
3918
3919	/* When net_device's are persistent, this will be fatal. */
3920	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3921	BUG_ON(!dev_net(dev));
3922	net = dev_net(dev);
3923
3924	spin_lock_init(&dev->addr_list_lock);
3925	netdev_set_addr_lockdep_class(dev);
3926	netdev_init_queue_locks(dev);
3927
3928	dev->iflink = -1;
3929
3930	/* Init, if this function is available */
3931	if (dev->init) {
3932		ret = dev->init(dev);
3933		if (ret) {
3934			if (ret > 0)
3935				ret = -EIO;
3936			goto out;
3937		}
3938	}
3939
3940	if (!dev_valid_name(dev->name)) {
3941		ret = -EINVAL;
3942		goto err_uninit;
3943	}
3944
3945	dev->ifindex = dev_new_index(net);
3946	if (dev->iflink == -1)
3947		dev->iflink = dev->ifindex;
3948
3949	/* Check for existence of name */
3950	head = dev_name_hash(net, dev->name);
3951	hlist_for_each(p, head) {
3952		struct net_device *d
3953			= hlist_entry(p, struct net_device, name_hlist);
3954		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3955			ret = -EEXIST;
3956			goto err_uninit;
3957		}
3958	}
3959
3960	/* Fix illegal checksum combinations */
3961	if ((dev->features & NETIF_F_HW_CSUM) &&
3962	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3963		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3964		       dev->name);
3965		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3966	}
3967
3968	if ((dev->features & NETIF_F_NO_CSUM) &&
3969	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3970		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3971		       dev->name);
3972		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3973	}
3974
3975
3976	/* Fix illegal SG+CSUM combinations. */
3977	if ((dev->features & NETIF_F_SG) &&
3978	    !(dev->features & NETIF_F_ALL_CSUM)) {
3979		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3980		       dev->name);
3981		dev->features &= ~NETIF_F_SG;
3982	}
3983
3984	/* TSO requires that SG is present as well. */
3985	if ((dev->features & NETIF_F_TSO) &&
3986	    !(dev->features & NETIF_F_SG)) {
3987		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3988		       dev->name);
3989		dev->features &= ~NETIF_F_TSO;
3990	}
3991	if (dev->features & NETIF_F_UFO) {
3992		if (!(dev->features & NETIF_F_HW_CSUM)) {
3993			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3994					"NETIF_F_HW_CSUM feature.\n",
3995							dev->name);
3996			dev->features &= ~NETIF_F_UFO;
3997		}
3998		if (!(dev->features & NETIF_F_SG)) {
3999			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4000					"NETIF_F_SG feature.\n",
4001					dev->name);
4002			dev->features &= ~NETIF_F_UFO;
4003		}
4004	}
4005
4006	/* Enable software GSO if SG is supported. */
4007	if (dev->features & NETIF_F_SG)
4008		dev->features |= NETIF_F_GSO;
4009
4010	netdev_initialize_kobject(dev);
4011	ret = netdev_register_kobject(dev);
4012	if (ret)
4013		goto err_uninit;
4014	dev->reg_state = NETREG_REGISTERED;
4015
4016	/*
4017	 *	Default initial state at registry is that the
4018	 *	device is present.
4019	 */
4020
4021	set_bit(__LINK_STATE_PRESENT, &dev->state);
4022
4023	dev_init_scheduler(dev);
4024	dev_hold(dev);
4025	list_netdevice(dev);
4026
4027	/* Notify protocols, that a new device appeared. */
4028	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4029	ret = notifier_to_errno(ret);
4030	if (ret) {
4031		rollback_registered(dev);
4032		dev->reg_state = NETREG_UNREGISTERED;
4033	}
4034
4035out:
4036	return ret;
4037
4038err_uninit:
4039	if (dev->uninit)
4040		dev->uninit(dev);
4041	goto out;
4042}
4043
4044/**
4045 *	register_netdev	- register a network device
4046 *	@dev: device to register
4047 *
4048 *	Take a completed network device structure and add it to the kernel
4049 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4050 *	chain. 0 is returned on success. A negative errno code is returned
4051 *	on a failure to set up the device, or if the name is a duplicate.
4052 *
4053 *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4054 *	and expands the device name if you passed a format string to
4055 *	alloc_netdev.
4056 */
4057int register_netdev(struct net_device *dev)
4058{
4059	int err;
4060
4061	rtnl_lock();
4062
4063	/*
4064	 * If the name is a format string the caller wants us to do a
4065	 * name allocation.
4066	 */
4067	if (strchr(dev->name, '%')) {
4068		err = dev_alloc_name(dev, dev->name);
4069		if (err < 0)
4070			goto out;
4071	}
4072
4073	err = register_netdevice(dev);
4074out:
4075	rtnl_unlock();
4076	return err;
4077}
4078EXPORT_SYMBOL(register_netdev);
4079
4080/*
4081 * netdev_wait_allrefs - wait until all references are gone.
4082 *
4083 * This is called when unregistering network devices.
4084 *
4085 * Any protocol or device that holds a reference should register
4086 * for netdevice notification, and cleanup and put back the
4087 * reference if they receive an UNREGISTER event.
4088 * We can get stuck here if buggy protocols don't correctly
4089 * call dev_put.
4090 */
4091static void netdev_wait_allrefs(struct net_device *dev)
4092{
4093	unsigned long rebroadcast_time, warning_time;
4094
4095	rebroadcast_time = warning_time = jiffies;
4096	while (atomic_read(&dev->refcnt) != 0) {
4097		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4098			rtnl_lock();
4099
4100			/* Rebroadcast unregister notification */
4101			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4102
4103			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4104				     &dev->state)) {
4105				/* We must not have linkwatch events
4106				 * pending on unregister. If this
4107				 * happens, we simply run the queue
4108				 * unscheduled, resulting in a noop
4109				 * for this device.
4110				 */
4111				linkwatch_run_queue();
4112			}
4113
4114			__rtnl_unlock();
4115
4116			rebroadcast_time = jiffies;
4117		}
4118
4119		msleep(250);
4120
4121		if (time_after(jiffies, warning_time + 10 * HZ)) {
4122			printk(KERN_EMERG "unregister_netdevice: "
4123			       "waiting for %s to become free. Usage "
4124			       "count = %d\n",
4125			       dev->name, atomic_read(&dev->refcnt));
4126			warning_time = jiffies;
4127		}
4128	}
4129}
4130
4131/* The sequence is:
4132 *
4133 *	rtnl_lock();
4134 *	...
4135 *	register_netdevice(x1);
4136 *	register_netdevice(x2);
4137 *	...
4138 *	unregister_netdevice(y1);
4139 *	unregister_netdevice(y2);
4140 *      ...
4141 *	rtnl_unlock();
4142 *	free_netdev(y1);
4143 *	free_netdev(y2);
4144 *
4145 * We are invoked by rtnl_unlock() after it drops the semaphore.
4146 * This allows us to deal with problems:
4147 * 1) We can delete sysfs objects which invoke hotplug
4148 *    without deadlocking with linkwatch via keventd.
4149 * 2) Since we run with the RTNL semaphore not held, we can sleep
4150 *    safely in order to wait for the netdev refcnt to drop to zero.
4151 */
4152static DEFINE_MUTEX(net_todo_run_mutex);
4153void netdev_run_todo(void)
4154{
4155	struct list_head list;
4156
4157	/* Need to guard against multiple cpu's getting out of order. */
4158	mutex_lock(&net_todo_run_mutex);
4159
4160	/* Not safe to do outside the semaphore.  We must not return
4161	 * until all unregister events invoked by the local processor
4162	 * have been completed (either by this todo run, or one on
4163	 * another cpu).
4164	 */
4165	if (list_empty(&net_todo_list))
4166		goto out;
4167
4168	/* Snapshot list, allow later requests */
4169	spin_lock(&net_todo_list_lock);
4170	list_replace_init(&net_todo_list, &list);
4171	spin_unlock(&net_todo_list_lock);
4172
4173	while (!list_empty(&list)) {
4174		struct net_device *dev
4175			= list_entry(list.next, struct net_device, todo_list);
4176		list_del(&dev->todo_list);
4177
4178		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4179			printk(KERN_ERR "network todo '%s' but state %d\n",
4180			       dev->name, dev->reg_state);
4181			dump_stack();
4182			continue;
4183		}
4184
4185		dev->reg_state = NETREG_UNREGISTERED;
4186
4187		on_each_cpu(flush_backlog, dev, 1);
4188
4189		netdev_wait_allrefs(dev);
4190
4191		/* paranoia */
4192		BUG_ON(atomic_read(&dev->refcnt));
4193		WARN_ON(dev->ip_ptr);
4194		WARN_ON(dev->ip6_ptr);
4195		WARN_ON(dev->dn_ptr);
4196
4197		if (dev->destructor)
4198			dev->destructor(dev);
4199
4200		/* Free network device */
4201		kobject_put(&dev->dev.kobj);
4202	}
4203
4204out:
4205	mutex_unlock(&net_todo_run_mutex);
4206}
4207
4208static struct net_device_stats *internal_stats(struct net_device *dev)
4209{
4210	return &dev->stats;
4211}
4212
4213static void netdev_init_one_queue(struct net_device *dev,
4214				  struct netdev_queue *queue,
4215				  void *_unused)
4216{
4217	queue->dev = dev;
4218}
4219
4220static void netdev_init_queues(struct net_device *dev)
4221{
4222	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4223	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4224	spin_lock_init(&dev->tx_global_lock);
4225}
4226
4227/**
4228 *	alloc_netdev_mq - allocate network device
4229 *	@sizeof_priv:	size of private data to allocate space for
4230 *	@name:		device name format string
4231 *	@setup:		callback to initialize device
4232 *	@queue_count:	the number of subqueues to allocate
4233 *
4234 *	Allocates a struct net_device with private data area for driver use
4235 *	and performs basic initialization.  Also allocates subquue structs
4236 *	for each queue on the device at the end of the netdevice.
4237 */
4238struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4239		void (*setup)(struct net_device *), unsigned int queue_count)
4240{
4241	struct netdev_queue *tx;
4242	struct net_device *dev;
4243	size_t alloc_size;
4244	void *p;
4245
4246	BUG_ON(strlen(name) >= sizeof(dev->name));
4247
4248	alloc_size = sizeof(struct net_device);
4249	if (sizeof_priv) {
4250		/* ensure 32-byte alignment of private area */
4251		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4252		alloc_size += sizeof_priv;
4253	}
4254	/* ensure 32-byte alignment of whole construct */
4255	alloc_size += NETDEV_ALIGN_CONST;
4256
4257	p = kzalloc(alloc_size, GFP_KERNEL);
4258	if (!p) {
4259		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4260		return NULL;
4261	}
4262
4263	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4264	if (!tx) {
4265		printk(KERN_ERR "alloc_netdev: Unable to allocate "
4266		       "tx qdiscs.\n");
4267		kfree(p);
4268		return NULL;
4269	}
4270
4271	dev = (struct net_device *)
4272		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4273	dev->padded = (char *)dev - (char *)p;
4274	dev_net_set(dev, &init_net);
4275
4276	dev->_tx = tx;
4277	dev->num_tx_queues = queue_count;
4278	dev->real_num_tx_queues = queue_count;
4279
4280	if (sizeof_priv) {
4281		dev->priv = ((char *)dev +
4282			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4283			      & ~NETDEV_ALIGN_CONST));
4284	}
4285
4286	dev->gso_max_size = GSO_MAX_SIZE;
4287
4288	netdev_init_queues(dev);
4289
4290	dev->get_stats = internal_stats;
4291	netpoll_netdev_init(dev);
4292	setup(dev);
4293	strcpy(dev->name, name);
4294	return dev;
4295}
4296EXPORT_SYMBOL(alloc_netdev_mq);
4297
4298/**
4299 *	free_netdev - free network device
4300 *	@dev: device
4301 *
4302 *	This function does the last stage of destroying an allocated device
4303 * 	interface. The reference to the device object is released.
4304 *	If this is the last reference then it will be freed.
4305 */
4306void free_netdev(struct net_device *dev)
4307{
4308	release_net(dev_net(dev));
4309
4310	kfree(dev->_tx);
4311
4312	/*  Compatibility with error handling in drivers */
4313	if (dev->reg_state == NETREG_UNINITIALIZED) {
4314		kfree((char *)dev - dev->padded);
4315		return;
4316	}
4317
4318	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4319	dev->reg_state = NETREG_RELEASED;
4320
4321	/* will free via device release */
4322	put_device(&dev->dev);
4323}
4324
4325/* Synchronize with packet receive processing. */
4326void synchronize_net(void)
4327{
4328	might_sleep();
4329	synchronize_rcu();
4330}
4331
4332/**
4333 *	unregister_netdevice - remove device from the kernel
4334 *	@dev: device
4335 *
4336 *	This function shuts down a device interface and removes it
4337 *	from the kernel tables.
4338 *
4339 *	Callers must hold the rtnl semaphore.  You may want
4340 *	unregister_netdev() instead of this.
4341 */
4342
4343void unregister_netdevice(struct net_device *dev)
4344{
4345	ASSERT_RTNL();
4346
4347	rollback_registered(dev);
4348	/* Finish processing unregister after unlock */
4349	net_set_todo(dev);
4350}
4351
4352/**
4353 *	unregister_netdev - remove device from the kernel
4354 *	@dev: device
4355 *
4356 *	This function shuts down a device interface and removes it
4357 *	from the kernel tables.
4358 *
4359 *	This is just a wrapper for unregister_netdevice that takes
4360 *	the rtnl semaphore.  In general you want to use this and not
4361 *	unregister_netdevice.
4362 */
4363void unregister_netdev(struct net_device *dev)
4364{
4365	rtnl_lock();
4366	unregister_netdevice(dev);
4367	rtnl_unlock();
4368}
4369
4370EXPORT_SYMBOL(unregister_netdev);
4371
4372/**
4373 *	dev_change_net_namespace - move device to different nethost namespace
4374 *	@dev: device
4375 *	@net: network namespace
4376 *	@pat: If not NULL name pattern to try if the current device name
4377 *	      is already taken in the destination network namespace.
4378 *
4379 *	This function shuts down a device interface and moves it
4380 *	to a new network namespace. On success 0 is returned, on
4381 *	a failure a netagive errno code is returned.
4382 *
4383 *	Callers must hold the rtnl semaphore.
4384 */
4385
4386int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4387{
4388	char buf[IFNAMSIZ];
4389	const char *destname;
4390	int err;
4391
4392	ASSERT_RTNL();
4393
4394	/* Don't allow namespace local devices to be moved. */
4395	err = -EINVAL;
4396	if (dev->features & NETIF_F_NETNS_LOCAL)
4397		goto out;
4398
4399	/* Ensure the device has been registrered */
4400	err = -EINVAL;
4401	if (dev->reg_state != NETREG_REGISTERED)
4402		goto out;
4403
4404	/* Get out if there is nothing todo */
4405	err = 0;
4406	if (net_eq(dev_net(dev), net))
4407		goto out;
4408
4409	/* Pick the destination device name, and ensure
4410	 * we can use it in the destination network namespace.
4411	 */
4412	err = -EEXIST;
4413	destname = dev->name;
4414	if (__dev_get_by_name(net, destname)) {
4415		/* We get here if we can't use the current device name */
4416		if (!pat)
4417			goto out;
4418		if (!dev_valid_name(pat))
4419			goto out;
4420		if (strchr(pat, '%')) {
4421			if (__dev_alloc_name(net, pat, buf) < 0)
4422				goto out;
4423			destname = buf;
4424		} else
4425			destname = pat;
4426		if (__dev_get_by_name(net, destname))
4427			goto out;
4428	}
4429
4430	/*
4431	 * And now a mini version of register_netdevice unregister_netdevice.
4432	 */
4433
4434	/* If device is running close it first. */
4435	dev_close(dev);
4436
4437	/* And unlink it from device chain */
4438	err = -ENODEV;
4439	unlist_netdevice(dev);
4440
4441	synchronize_net();
4442
4443	/* Shutdown queueing discipline. */
4444	dev_shutdown(dev);
4445
4446	/* Notify protocols, that we are about to destroy
4447	   this device. They should clean all the things.
4448	*/
4449	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4450
4451	/*
4452	 *	Flush the unicast and multicast chains
4453	 */
4454	dev_addr_discard(dev);
4455
4456	/* Actually switch the network namespace */
4457	dev_net_set(dev, net);
4458
4459	/* Assign the new device name */
4460	if (destname != dev->name)
4461		strcpy(dev->name, destname);
4462
4463	/* If there is an ifindex conflict assign a new one */
4464	if (__dev_get_by_index(net, dev->ifindex)) {
4465		int iflink = (dev->iflink == dev->ifindex);
4466		dev->ifindex = dev_new_index(net);
4467		if (iflink)
4468			dev->iflink = dev->ifindex;
4469	}
4470
4471	/* Fixup kobjects */
4472	netdev_unregister_kobject(dev);
4473	err = netdev_register_kobject(dev);
4474	WARN_ON(err);
4475
4476	/* Add the device back in the hashes */
4477	list_netdevice(dev);
4478
4479	/* Notify protocols, that a new device appeared. */
4480	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4481
4482	synchronize_net();
4483	err = 0;
4484out:
4485	return err;
4486}
4487
4488static int dev_cpu_callback(struct notifier_block *nfb,
4489			    unsigned long action,
4490			    void *ocpu)
4491{
4492	struct sk_buff **list_skb;
4493	struct Qdisc **list_net;
4494	struct sk_buff *skb;
4495	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4496	struct softnet_data *sd, *oldsd;
4497
4498	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4499		return NOTIFY_OK;
4500
4501	local_irq_disable();
4502	cpu = smp_processor_id();
4503	sd = &per_cpu(softnet_data, cpu);
4504	oldsd = &per_cpu(softnet_data, oldcpu);
4505
4506	/* Find end of our completion_queue. */
4507	list_skb = &sd->completion_queue;
4508	while (*list_skb)
4509		list_skb = &(*list_skb)->next;
4510	/* Append completion queue from offline CPU. */
4511	*list_skb = oldsd->completion_queue;
4512	oldsd->completion_queue = NULL;
4513
4514	/* Find end of our output_queue. */
4515	list_net = &sd->output_queue;
4516	while (*list_net)
4517		list_net = &(*list_net)->next_sched;
4518	/* Append output queue from offline CPU. */
4519	*list_net = oldsd->output_queue;
4520	oldsd->output_queue = NULL;
4521
4522	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4523	local_irq_enable();
4524
4525	/* Process offline CPU's input_pkt_queue */
4526	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4527		netif_rx(skb);
4528
4529	return NOTIFY_OK;
4530}
4531
4532#ifdef CONFIG_NET_DMA
4533/**
4534 * net_dma_rebalance - try to maintain one DMA channel per CPU
4535 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4536 *
4537 * This is called when the number of channels allocated to the net_dma client
4538 * changes.  The net_dma client tries to have one DMA channel per CPU.
4539 */
4540
4541static void net_dma_rebalance(struct net_dma *net_dma)
4542{
4543	unsigned int cpu, i, n, chan_idx;
4544	struct dma_chan *chan;
4545
4546	if (cpus_empty(net_dma->channel_mask)) {
4547		for_each_online_cpu(cpu)
4548			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4549		return;
4550	}
4551
4552	i = 0;
4553	cpu = first_cpu(cpu_online_map);
4554
4555	for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4556		chan = net_dma->channels[chan_idx];
4557
4558		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4559		   + (i < (num_online_cpus() %
4560			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4561
4562		while(n) {
4563			per_cpu(softnet_data, cpu).net_dma = chan;
4564			cpu = next_cpu(cpu, cpu_online_map);
4565			n--;
4566		}
4567		i++;
4568	}
4569}
4570
4571/**
4572 * netdev_dma_event - event callback for the net_dma_client
4573 * @client: should always be net_dma_client
4574 * @chan: DMA channel for the event
4575 * @state: DMA state to be handled
4576 */
4577static enum dma_state_client
4578netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4579	enum dma_state state)
4580{
4581	int i, found = 0, pos = -1;
4582	struct net_dma *net_dma =
4583		container_of(client, struct net_dma, client);
4584	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4585
4586	spin_lock(&net_dma->lock);
4587	switch (state) {
4588	case DMA_RESOURCE_AVAILABLE:
4589		for (i = 0; i < nr_cpu_ids; i++)
4590			if (net_dma->channels[i] == chan) {
4591				found = 1;
4592				break;
4593			} else if (net_dma->channels[i] == NULL && pos < 0)
4594				pos = i;
4595
4596		if (!found && pos >= 0) {
4597			ack = DMA_ACK;
4598			net_dma->channels[pos] = chan;
4599			cpu_set(pos, net_dma->channel_mask);
4600			net_dma_rebalance(net_dma);
4601		}
4602		break;
4603	case DMA_RESOURCE_REMOVED:
4604		for (i = 0; i < nr_cpu_ids; i++)
4605			if (net_dma->channels[i] == chan) {
4606				found = 1;
4607				pos = i;
4608				break;
4609			}
4610
4611		if (found) {
4612			ack = DMA_ACK;
4613			cpu_clear(pos, net_dma->channel_mask);
4614			net_dma->channels[i] = NULL;
4615			net_dma_rebalance(net_dma);
4616		}
4617		break;
4618	default:
4619		break;
4620	}
4621	spin_unlock(&net_dma->lock);
4622
4623	return ack;
4624}
4625
4626/**
4627 * netdev_dma_regiser - register the networking subsystem as a DMA client
4628 */
4629static int __init netdev_dma_register(void)
4630{
4631	net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4632								GFP_KERNEL);
4633	if (unlikely(!net_dma.channels)) {
4634		printk(KERN_NOTICE
4635				"netdev_dma: no memory for net_dma.channels\n");
4636		return -ENOMEM;
4637	}
4638	spin_lock_init(&net_dma.lock);
4639	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4640	dma_async_client_register(&net_dma.client);
4641	dma_async_client_chan_request(&net_dma.client);
4642	return 0;
4643}
4644
4645#else
4646static int __init netdev_dma_register(void) { return -ENODEV; }
4647#endif /* CONFIG_NET_DMA */
4648
4649/**
4650 *	netdev_compute_feature - compute conjunction of two feature sets
4651 *	@all: first feature set
4652 *	@one: second feature set
4653 *
4654 *	Computes a new feature set after adding a device with feature set
4655 *	@one to the master device with current feature set @all.  Returns
4656 *	the new feature set.
4657 */
4658int netdev_compute_features(unsigned long all, unsigned long one)
4659{
4660	/* if device needs checksumming, downgrade to hw checksumming */
4661	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4662		all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4663
4664	/* if device can't do all checksum, downgrade to ipv4/ipv6 */
4665	if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4666		all ^= NETIF_F_HW_CSUM
4667			| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4668
4669	if (one & NETIF_F_GSO)
4670		one |= NETIF_F_GSO_SOFTWARE;
4671	one |= NETIF_F_GSO;
4672
4673	/* If even one device supports robust GSO, enable it for all. */
4674	if (one & NETIF_F_GSO_ROBUST)
4675		all |= NETIF_F_GSO_ROBUST;
4676
4677	all &= one | NETIF_F_LLTX;
4678
4679	if (!(all & NETIF_F_ALL_CSUM))
4680		all &= ~NETIF_F_SG;
4681	if (!(all & NETIF_F_SG))
4682		all &= ~NETIF_F_GSO_MASK;
4683
4684	return all;
4685}
4686EXPORT_SYMBOL(netdev_compute_features);
4687
4688static struct hlist_head *netdev_create_hash(void)
4689{
4690	int i;
4691	struct hlist_head *hash;
4692
4693	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4694	if (hash != NULL)
4695		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4696			INIT_HLIST_HEAD(&hash[i]);
4697
4698	return hash;
4699}
4700
4701/* Initialize per network namespace state */
4702static int __net_init netdev_init(struct net *net)
4703{
4704	INIT_LIST_HEAD(&net->dev_base_head);
4705
4706	net->dev_name_head = netdev_create_hash();
4707	if (net->dev_name_head == NULL)
4708		goto err_name;
4709
4710	net->dev_index_head = netdev_create_hash();
4711	if (net->dev_index_head == NULL)
4712		goto err_idx;
4713
4714	return 0;
4715
4716err_idx:
4717	kfree(net->dev_name_head);
4718err_name:
4719	return -ENOMEM;
4720}
4721
4722char *netdev_drivername(struct net_device *dev, char *buffer, int len)
4723{
4724	struct device_driver *driver;
4725	struct device *parent;
4726
4727	if (len <= 0 || !buffer)
4728		return buffer;
4729	buffer[0] = 0;
4730
4731	parent = dev->dev.parent;
4732
4733	if (!parent)
4734		return buffer;
4735
4736	driver = parent->driver;
4737	if (driver && driver->name)
4738		strlcpy(buffer, driver->name, len);
4739	return buffer;
4740}
4741
4742static void __net_exit netdev_exit(struct net *net)
4743{
4744	kfree(net->dev_name_head);
4745	kfree(net->dev_index_head);
4746}
4747
4748static struct pernet_operations __net_initdata netdev_net_ops = {
4749	.init = netdev_init,
4750	.exit = netdev_exit,
4751};
4752
4753static void __net_exit default_device_exit(struct net *net)
4754{
4755	struct net_device *dev, *next;
4756	/*
4757	 * Push all migratable of the network devices back to the
4758	 * initial network namespace
4759	 */
4760	rtnl_lock();
4761	for_each_netdev_safe(net, dev, next) {
4762		int err;
4763		char fb_name[IFNAMSIZ];
4764
4765		/* Ignore unmoveable devices (i.e. loopback) */
4766		if (dev->features & NETIF_F_NETNS_LOCAL)
4767			continue;
4768
4769		/* Push remaing network devices to init_net */
4770		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4771		err = dev_change_net_namespace(dev, &init_net, fb_name);
4772		if (err) {
4773			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4774				__func__, dev->name, err);
4775			BUG();
4776		}
4777	}
4778	rtnl_unlock();
4779}
4780
4781static struct pernet_operations __net_initdata default_device_ops = {
4782	.exit = default_device_exit,
4783};
4784
4785/*
4786 *	Initialize the DEV module. At boot time this walks the device list and
4787 *	unhooks any devices that fail to initialise (normally hardware not
4788 *	present) and leaves us with a valid list of present and active devices.
4789 *
4790 */
4791
4792/*
4793 *       This is called single threaded during boot, so no need
4794 *       to take the rtnl semaphore.
4795 */
4796static int __init net_dev_init(void)
4797{
4798	int i, rc = -ENOMEM;
4799
4800	BUG_ON(!dev_boot_phase);
4801
4802	if (dev_proc_init())
4803		goto out;
4804
4805	if (netdev_kobject_init())
4806		goto out;
4807
4808	INIT_LIST_HEAD(&ptype_all);
4809	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4810		INIT_LIST_HEAD(&ptype_base[i]);
4811
4812	if (register_pernet_subsys(&netdev_net_ops))
4813		goto out;
4814
4815	if (register_pernet_device(&default_device_ops))
4816		goto out;
4817
4818	/*
4819	 *	Initialise the packet receive queues.
4820	 */
4821
4822	for_each_possible_cpu(i) {
4823		struct softnet_data *queue;
4824
4825		queue = &per_cpu(softnet_data, i);
4826		skb_queue_head_init(&queue->input_pkt_queue);
4827		queue->completion_queue = NULL;
4828		INIT_LIST_HEAD(&queue->poll_list);
4829
4830		queue->backlog.poll = process_backlog;
4831		queue->backlog.weight = weight_p;
4832	}
4833
4834	netdev_dma_register();
4835
4836	dev_boot_phase = 0;
4837
4838	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4839	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4840
4841	hotcpu_notifier(dev_cpu_callback, 0);
4842	dst_init();
4843	dev_mcast_init();
4844	rc = 0;
4845out:
4846	return rc;
4847}
4848
4849subsys_initcall(net_dev_init);
4850
4851EXPORT_SYMBOL(__dev_get_by_index);
4852EXPORT_SYMBOL(__dev_get_by_name);
4853EXPORT_SYMBOL(__dev_remove_pack);
4854EXPORT_SYMBOL(dev_valid_name);
4855EXPORT_SYMBOL(dev_add_pack);
4856EXPORT_SYMBOL(dev_alloc_name);
4857EXPORT_SYMBOL(dev_close);
4858EXPORT_SYMBOL(dev_get_by_flags);
4859EXPORT_SYMBOL(dev_get_by_index);
4860EXPORT_SYMBOL(dev_get_by_name);
4861EXPORT_SYMBOL(dev_open);
4862EXPORT_SYMBOL(dev_queue_xmit);
4863EXPORT_SYMBOL(dev_remove_pack);
4864EXPORT_SYMBOL(dev_set_allmulti);
4865EXPORT_SYMBOL(dev_set_promiscuity);
4866EXPORT_SYMBOL(dev_change_flags);
4867EXPORT_SYMBOL(dev_set_mtu);
4868EXPORT_SYMBOL(dev_set_mac_address);
4869EXPORT_SYMBOL(free_netdev);
4870EXPORT_SYMBOL(netdev_boot_setup_check);
4871EXPORT_SYMBOL(netdev_set_master);
4872EXPORT_SYMBOL(netdev_state_change);
4873EXPORT_SYMBOL(netif_receive_skb);
4874EXPORT_SYMBOL(netif_rx);
4875EXPORT_SYMBOL(register_gifconf);
4876EXPORT_SYMBOL(register_netdevice);
4877EXPORT_SYMBOL(register_netdevice_notifier);
4878EXPORT_SYMBOL(skb_checksum_help);
4879EXPORT_SYMBOL(synchronize_net);
4880EXPORT_SYMBOL(unregister_netdevice);
4881EXPORT_SYMBOL(unregister_netdevice_notifier);
4882EXPORT_SYMBOL(net_enable_timestamp);
4883EXPORT_SYMBOL(net_disable_timestamp);
4884EXPORT_SYMBOL(dev_get_flags);
4885
4886#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4887EXPORT_SYMBOL(br_handle_frame_hook);
4888EXPORT_SYMBOL(br_fdb_get_hook);
4889EXPORT_SYMBOL(br_fdb_put_hook);
4890#endif
4891
4892#ifdef CONFIG_KMOD
4893EXPORT_SYMBOL(dev_load);
4894#endif
4895
4896EXPORT_PER_CPU_SYMBOL(softnet_data);