net/core/dev.c at v2.6.31-rc4 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / net / core / dev.c
at v2.6.31-rc4 5668 lines 139 kB view raw
   1/*
   2 * 	NET3	Protocol independent device support routines.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 *	Derived from the non IP parts of dev.c 1.0.19
  10 * 		Authors:	Ross Biro
  11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *	Additional Authors:
  15 *		Florian la Roche <rzsfl@rz.uni-sb.de>
  16 *		Alan Cox <gw4pts@gw4pts.ampr.org>
  17 *		David Hinds <dahinds@users.sourceforge.net>
  18 *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19 *		Adam Sulmicki <adam@cfar.umd.edu>
  20 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21 *
  22 *	Changes:
  23 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24 *              			to 2 if register_netdev gets called
  25 *              			before net_dev_init & also removed a
  26 *              			few lines of code in the process.
  27 *		Alan Cox	:	device private ioctl copies fields back.
  28 *		Alan Cox	:	Transmit queue code does relevant
  29 *					stunts to keep the queue safe.
  30 *		Alan Cox	:	Fixed double lock.
  31 *		Alan Cox	:	Fixed promisc NULL pointer trap
  32 *		????????	:	Support the full private ioctl range
  33 *		Alan Cox	:	Moved ioctl permission check into
  34 *					drivers
  35 *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  36 *		Alan Cox	:	100 backlog just doesn't cut it when
  37 *					you start doing multicast video 8)
  38 *		Alan Cox	:	Rewrote net_bh and list manager.
  39 *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
  40 *		Alan Cox	:	Took out transmit every packet pass
  41 *					Saved a few bytes in the ioctl handler
  42 *		Alan Cox	:	Network driver sets packet type before
  43 *					calling netif_rx. Saves a function
  44 *					call a packet.
  45 *		Alan Cox	:	Hashed net_bh()
  46 *		Richard Kooijman:	Timestamp fixes.
  47 *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  48 *		Alan Cox	:	Device lock protection.
  49 *		Alan Cox	: 	Fixed nasty side effect of device close
  50 *					changes.
  51 *		Rudi Cilibrasi	:	Pass the right thing to
  52 *					set_mac_address()
  53 *		Dave Miller	:	32bit quantity for the device lock to
  54 *					make it work out on a Sparc.
  55 *		Bjorn Ekwall	:	Added KERNELD hack.
  56 *		Alan Cox	:	Cleaned up the backlog initialise.
  57 *		Craig Metz	:	SIOCGIFCONF fix if space for under
  58 *					1 device.
  59 *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  60 *					is no device open function.
  61 *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  62 *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  63 *		Cyrus Durgin	:	Cleaned for KMOD
  64 *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  65 *					A network device unload needs to purge
  66 *					the backlog queue.
  67 *	Paul Rusty Russell	:	SIOCSIFNAME
  68 *              Pekka Riikonen  :	Netdev boot-time settings code
  69 *              Andrew Morton   :       Make unregister_netdevice wait
  70 *              			indefinitely on dev->refcnt
  71 * 		J Hadi Salim	:	- Backlog queue sampling
  72 *				        - netif_rx() feedback
  73 */
  74
  75#include <asm/uaccess.h>
  76#include <asm/system.h>
  77#include <linux/bitops.h>
  78#include <linux/capability.h>
  79#include <linux/cpu.h>
  80#include <linux/types.h>
  81#include <linux/kernel.h>
  82#include <linux/sched.h>
  83#include <linux/mutex.h>
  84#include <linux/string.h>
  85#include <linux/mm.h>
  86#include <linux/socket.h>
  87#include <linux/sockios.h>
  88#include <linux/errno.h>
  89#include <linux/interrupt.h>
  90#include <linux/if_ether.h>
  91#include <linux/netdevice.h>
  92#include <linux/etherdevice.h>
  93#include <linux/ethtool.h>
  94#include <linux/notifier.h>
  95#include <linux/skbuff.h>
  96#include <net/net_namespace.h>
  97#include <net/sock.h>
  98#include <linux/rtnetlink.h>
  99#include <linux/proc_fs.h>
 100#include <linux/seq_file.h>
 101#include <linux/stat.h>
 102#include <linux/if_bridge.h>
 103#include <linux/if_macvlan.h>
 104#include <net/dst.h>
 105#include <net/pkt_sched.h>
 106#include <net/checksum.h>
 107#include <linux/highmem.h>
 108#include <linux/init.h>
 109#include <linux/kmod.h>
 110#include <linux/module.h>
 111#include <linux/netpoll.h>
 112#include <linux/rcupdate.h>
 113#include <linux/delay.h>
 114#include <net/wext.h>
 115#include <net/iw_handler.h>
 116#include <asm/current.h>
 117#include <linux/audit.h>
 118#include <linux/dmaengine.h>
 119#include <linux/err.h>
 120#include <linux/ctype.h>
 121#include <linux/if_arp.h>
 122#include <linux/if_vlan.h>
 123#include <linux/ip.h>
 124#include <net/ip.h>
 125#include <linux/ipv6.h>
 126#include <linux/in.h>
 127#include <linux/jhash.h>
 128#include <linux/random.h>
 129#include <trace/events/napi.h>
 130
 131#include "net-sysfs.h"
 132
 133/* Instead of increasing this, you should create a hash table. */
 134#define MAX_GRO_SKBS 8
 135
 136/* This should be increased if a protocol with a bigger head is added. */
 137#define GRO_MAX_HEAD (MAX_HEADER + 128)
 138
 139/*
 140 *	The list of packet types we will receive (as opposed to discard)
 141 *	and the routines to invoke.
 142 *
 143 *	Why 16. Because with 16 the only overlap we get on a hash of the
 144 *	low nibble of the protocol value is RARP/SNAP/X.25.
 145 *
 146 *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 147 *             sure which should go first, but I bet it won't make much
 148 *             difference if we are running VLANs.  The good news is that
 149 *             this protocol won't be in the list unless compiled in, so
 150 *             the average user (w/out VLANs) will not be adversely affected.
 151 *             --BLG
 152 *
 153 *		0800	IP
 154 *		8100    802.1Q VLAN
 155 *		0001	802.3
 156 *		0002	AX.25
 157 *		0004	802.2
 158 *		8035	RARP
 159 *		0005	SNAP
 160 *		0805	X.25
 161 *		0806	ARP
 162 *		8137	IPX
 163 *		0009	Localtalk
 164 *		86DD	IPv6
 165 */
 166
 167#define PTYPE_HASH_SIZE	(16)
 168#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
 169
 170static DEFINE_SPINLOCK(ptype_lock);
 171static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 172static struct list_head ptype_all __read_mostly;	/* Taps */
 173
 174/*
 175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 176 * semaphore.
 177 *
 178 * Pure readers hold dev_base_lock for reading.
 179 *
 180 * Writers must hold the rtnl semaphore while they loop through the
 181 * dev_base_head list, and hold dev_base_lock for writing when they do the
 182 * actual updates.  This allows pure readers to access the list even
 183 * while a writer is preparing to update it.
 184 *
 185 * To put it another way, dev_base_lock is held for writing only to
 186 * protect against pure readers; the rtnl semaphore provides the
 187 * protection against other writers.
 188 *
 189 * See, for example usages, register_netdevice() and
 190 * unregister_netdevice(), which must be called with the rtnl
 191 * semaphore held.
 192 */
 193DEFINE_RWLOCK(dev_base_lock);
 194
 195EXPORT_SYMBOL(dev_base_lock);
 196
 197#define NETDEV_HASHBITS	8
 198#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 199
 200static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 201{
 202	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 203	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 204}
 205
 206static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 207{
 208	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 209}
 210
 211/* Device list insertion */
 212static int list_netdevice(struct net_device *dev)
 213{
 214	struct net *net = dev_net(dev);
 215
 216	ASSERT_RTNL();
 217
 218	write_lock_bh(&dev_base_lock);
 219	list_add_tail(&dev->dev_list, &net->dev_base_head);
 220	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 221	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 222	write_unlock_bh(&dev_base_lock);
 223	return 0;
 224}
 225
 226/* Device list removal */
 227static void unlist_netdevice(struct net_device *dev)
 228{
 229	ASSERT_RTNL();
 230
 231	/* Unlink dev from the device chain */
 232	write_lock_bh(&dev_base_lock);
 233	list_del(&dev->dev_list);
 234	hlist_del(&dev->name_hlist);
 235	hlist_del(&dev->index_hlist);
 236	write_unlock_bh(&dev_base_lock);
 237}
 238
 239/*
 240 *	Our notifier list
 241 */
 242
 243static RAW_NOTIFIER_HEAD(netdev_chain);
 244
 245/*
 246 *	Device drivers call our routines to queue packets here. We empty the
 247 *	queue in the local softnet handler.
 248 */
 249
 250DEFINE_PER_CPU(struct softnet_data, softnet_data);
 251
 252#ifdef CONFIG_LOCKDEP
 253/*
 254 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 255 * according to dev->type
 256 */
 257static const unsigned short netdev_lock_type[] =
 258	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 259	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 260	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 261	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 262	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 263	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 264	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 265	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 266	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 267	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 268	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 269	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 270	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 271	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
 272	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
 273	 ARPHRD_VOID, ARPHRD_NONE};
 274
 275static const char *netdev_lock_name[] =
 276	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 277	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 278	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 279	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 280	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 281	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 282	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 283	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 284	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 285	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 286	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 287	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 288	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 289	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
 290	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
 291	 "_xmit_VOID", "_xmit_NONE"};
 292
 293static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 294static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 295
 296static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 297{
 298	int i;
 299
 300	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 301		if (netdev_lock_type[i] == dev_type)
 302			return i;
 303	/* the last key is used by default */
 304	return ARRAY_SIZE(netdev_lock_type) - 1;
 305}
 306
 307static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 308						 unsigned short dev_type)
 309{
 310	int i;
 311
 312	i = netdev_lock_pos(dev_type);
 313	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 314				   netdev_lock_name[i]);
 315}
 316
 317static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 318{
 319	int i;
 320
 321	i = netdev_lock_pos(dev->type);
 322	lockdep_set_class_and_name(&dev->addr_list_lock,
 323				   &netdev_addr_lock_key[i],
 324				   netdev_lock_name[i]);
 325}
 326#else
 327static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 328						 unsigned short dev_type)
 329{
 330}
 331static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 332{
 333}
 334#endif
 335
 336/*******************************************************************************
 337
 338		Protocol management and registration routines
 339
 340*******************************************************************************/
 341
 342/*
 343 *	Add a protocol ID to the list. Now that the input handler is
 344 *	smarter we can dispense with all the messy stuff that used to be
 345 *	here.
 346 *
 347 *	BEWARE!!! Protocol handlers, mangling input packets,
 348 *	MUST BE last in hash buckets and checking protocol handlers
 349 *	MUST start from promiscuous ptype_all chain in net_bh.
 350 *	It is true now, do not change it.
 351 *	Explanation follows: if protocol handler, mangling packet, will
 352 *	be the first on list, it is not able to sense, that packet
 353 *	is cloned and should be copied-on-write, so that it will
 354 *	change it and subsequent readers will get broken packet.
 355 *							--ANK (980803)
 356 */
 357
 358/**
 359 *	dev_add_pack - add packet handler
 360 *	@pt: packet type declaration
 361 *
 362 *	Add a protocol handler to the networking stack. The passed &packet_type
 363 *	is linked into kernel lists and may not be freed until it has been
 364 *	removed from the kernel lists.
 365 *
 366 *	This call does not sleep therefore it can not
 367 *	guarantee all CPU's that are in middle of receiving packets
 368 *	will see the new packet type (until the next received packet).
 369 */
 370
 371void dev_add_pack(struct packet_type *pt)
 372{
 373	int hash;
 374
 375	spin_lock_bh(&ptype_lock);
 376	if (pt->type == htons(ETH_P_ALL))
 377		list_add_rcu(&pt->list, &ptype_all);
 378	else {
 379		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 380		list_add_rcu(&pt->list, &ptype_base[hash]);
 381	}
 382	spin_unlock_bh(&ptype_lock);
 383}
 384
 385/**
 386 *	__dev_remove_pack	 - remove packet handler
 387 *	@pt: packet type declaration
 388 *
 389 *	Remove a protocol handler that was previously added to the kernel
 390 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 391 *	from the kernel lists and can be freed or reused once this function
 392 *	returns.
 393 *
 394 *      The packet type might still be in use by receivers
 395 *	and must not be freed until after all the CPU's have gone
 396 *	through a quiescent state.
 397 */
 398void __dev_remove_pack(struct packet_type *pt)
 399{
 400	struct list_head *head;
 401	struct packet_type *pt1;
 402
 403	spin_lock_bh(&ptype_lock);
 404
 405	if (pt->type == htons(ETH_P_ALL))
 406		head = &ptype_all;
 407	else
 408		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 409
 410	list_for_each_entry(pt1, head, list) {
 411		if (pt == pt1) {
 412			list_del_rcu(&pt->list);
 413			goto out;
 414		}
 415	}
 416
 417	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 418out:
 419	spin_unlock_bh(&ptype_lock);
 420}
 421/**
 422 *	dev_remove_pack	 - remove packet handler
 423 *	@pt: packet type declaration
 424 *
 425 *	Remove a protocol handler that was previously added to the kernel
 426 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 427 *	from the kernel lists and can be freed or reused once this function
 428 *	returns.
 429 *
 430 *	This call sleeps to guarantee that no CPU is looking at the packet
 431 *	type after return.
 432 */
 433void dev_remove_pack(struct packet_type *pt)
 434{
 435	__dev_remove_pack(pt);
 436
 437	synchronize_net();
 438}
 439
 440/******************************************************************************
 441
 442		      Device Boot-time Settings Routines
 443
 444*******************************************************************************/
 445
 446/* Boot time configuration table */
 447static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 448
 449/**
 450 *	netdev_boot_setup_add	- add new setup entry
 451 *	@name: name of the device
 452 *	@map: configured settings for the device
 453 *
 454 *	Adds new setup entry to the dev_boot_setup list.  The function
 455 *	returns 0 on error and 1 on success.  This is a generic routine to
 456 *	all netdevices.
 457 */
 458static int netdev_boot_setup_add(char *name, struct ifmap *map)
 459{
 460	struct netdev_boot_setup *s;
 461	int i;
 462
 463	s = dev_boot_setup;
 464	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 465		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 466			memset(s[i].name, 0, sizeof(s[i].name));
 467			strlcpy(s[i].name, name, IFNAMSIZ);
 468			memcpy(&s[i].map, map, sizeof(s[i].map));
 469			break;
 470		}
 471	}
 472
 473	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 474}
 475
 476/**
 477 *	netdev_boot_setup_check	- check boot time settings
 478 *	@dev: the netdevice
 479 *
 480 * 	Check boot time settings for the device.
 481 *	The found settings are set for the device to be used
 482 *	later in the device probing.
 483 *	Returns 0 if no settings found, 1 if they are.
 484 */
 485int netdev_boot_setup_check(struct net_device *dev)
 486{
 487	struct netdev_boot_setup *s = dev_boot_setup;
 488	int i;
 489
 490	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 491		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 492		    !strcmp(dev->name, s[i].name)) {
 493			dev->irq 	= s[i].map.irq;
 494			dev->base_addr 	= s[i].map.base_addr;
 495			dev->mem_start 	= s[i].map.mem_start;
 496			dev->mem_end 	= s[i].map.mem_end;
 497			return 1;
 498		}
 499	}
 500	return 0;
 501}
 502
 503
 504/**
 505 *	netdev_boot_base	- get address from boot time settings
 506 *	@prefix: prefix for network device
 507 *	@unit: id for network device
 508 *
 509 * 	Check boot time settings for the base address of device.
 510 *	The found settings are set for the device to be used
 511 *	later in the device probing.
 512 *	Returns 0 if no settings found.
 513 */
 514unsigned long netdev_boot_base(const char *prefix, int unit)
 515{
 516	const struct netdev_boot_setup *s = dev_boot_setup;
 517	char name[IFNAMSIZ];
 518	int i;
 519
 520	sprintf(name, "%s%d", prefix, unit);
 521
 522	/*
 523	 * If device already registered then return base of 1
 524	 * to indicate not to probe for this interface
 525	 */
 526	if (__dev_get_by_name(&init_net, name))
 527		return 1;
 528
 529	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 530		if (!strcmp(name, s[i].name))
 531			return s[i].map.base_addr;
 532	return 0;
 533}
 534
 535/*
 536 * Saves at boot time configured settings for any netdevice.
 537 */
 538int __init netdev_boot_setup(char *str)
 539{
 540	int ints[5];
 541	struct ifmap map;
 542
 543	str = get_options(str, ARRAY_SIZE(ints), ints);
 544	if (!str || !*str)
 545		return 0;
 546
 547	/* Save settings */
 548	memset(&map, 0, sizeof(map));
 549	if (ints[0] > 0)
 550		map.irq = ints[1];
 551	if (ints[0] > 1)
 552		map.base_addr = ints[2];
 553	if (ints[0] > 2)
 554		map.mem_start = ints[3];
 555	if (ints[0] > 3)
 556		map.mem_end = ints[4];
 557
 558	/* Add new entry to the list */
 559	return netdev_boot_setup_add(str, &map);
 560}
 561
 562__setup("netdev=", netdev_boot_setup);
 563
 564/*******************************************************************************
 565
 566			    Device Interface Subroutines
 567
 568*******************************************************************************/
 569
 570/**
 571 *	__dev_get_by_name	- find a device by its name
 572 *	@net: the applicable net namespace
 573 *	@name: name to find
 574 *
 575 *	Find an interface by name. Must be called under RTNL semaphore
 576 *	or @dev_base_lock. If the name is found a pointer to the device
 577 *	is returned. If the name is not found then %NULL is returned. The
 578 *	reference counters are not incremented so the caller must be
 579 *	careful with locks.
 580 */
 581
 582struct net_device *__dev_get_by_name(struct net *net, const char *name)
 583{
 584	struct hlist_node *p;
 585
 586	hlist_for_each(p, dev_name_hash(net, name)) {
 587		struct net_device *dev
 588			= hlist_entry(p, struct net_device, name_hlist);
 589		if (!strncmp(dev->name, name, IFNAMSIZ))
 590			return dev;
 591	}
 592	return NULL;
 593}
 594
 595/**
 596 *	dev_get_by_name		- find a device by its name
 597 *	@net: the applicable net namespace
 598 *	@name: name to find
 599 *
 600 *	Find an interface by name. This can be called from any
 601 *	context and does its own locking. The returned handle has
 602 *	the usage count incremented and the caller must use dev_put() to
 603 *	release it when it is no longer needed. %NULL is returned if no
 604 *	matching device is found.
 605 */
 606
 607struct net_device *dev_get_by_name(struct net *net, const char *name)
 608{
 609	struct net_device *dev;
 610
 611	read_lock(&dev_base_lock);
 612	dev = __dev_get_by_name(net, name);
 613	if (dev)
 614		dev_hold(dev);
 615	read_unlock(&dev_base_lock);
 616	return dev;
 617}
 618
 619/**
 620 *	__dev_get_by_index - find a device by its ifindex
 621 *	@net: the applicable net namespace
 622 *	@ifindex: index of device
 623 *
 624 *	Search for an interface by index. Returns %NULL if the device
 625 *	is not found or a pointer to the device. The device has not
 626 *	had its reference counter increased so the caller must be careful
 627 *	about locking. The caller must hold either the RTNL semaphore
 628 *	or @dev_base_lock.
 629 */
 630
 631struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 632{
 633	struct hlist_node *p;
 634
 635	hlist_for_each(p, dev_index_hash(net, ifindex)) {
 636		struct net_device *dev
 637			= hlist_entry(p, struct net_device, index_hlist);
 638		if (dev->ifindex == ifindex)
 639			return dev;
 640	}
 641	return NULL;
 642}
 643
 644
 645/**
 646 *	dev_get_by_index - find a device by its ifindex
 647 *	@net: the applicable net namespace
 648 *	@ifindex: index of device
 649 *
 650 *	Search for an interface by index. Returns NULL if the device
 651 *	is not found or a pointer to the device. The device returned has
 652 *	had a reference added and the pointer is safe until the user calls
 653 *	dev_put to indicate they have finished with it.
 654 */
 655
 656struct net_device *dev_get_by_index(struct net *net, int ifindex)
 657{
 658	struct net_device *dev;
 659
 660	read_lock(&dev_base_lock);
 661	dev = __dev_get_by_index(net, ifindex);
 662	if (dev)
 663		dev_hold(dev);
 664	read_unlock(&dev_base_lock);
 665	return dev;
 666}
 667
 668/**
 669 *	dev_getbyhwaddr - find a device by its hardware address
 670 *	@net: the applicable net namespace
 671 *	@type: media type of device
 672 *	@ha: hardware address
 673 *
 674 *	Search for an interface by MAC address. Returns NULL if the device
 675 *	is not found or a pointer to the device. The caller must hold the
 676 *	rtnl semaphore. The returned device has not had its ref count increased
 677 *	and the caller must therefore be careful about locking
 678 *
 679 *	BUGS:
 680 *	If the API was consistent this would be __dev_get_by_hwaddr
 681 */
 682
 683struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 684{
 685	struct net_device *dev;
 686
 687	ASSERT_RTNL();
 688
 689	for_each_netdev(net, dev)
 690		if (dev->type == type &&
 691		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 692			return dev;
 693
 694	return NULL;
 695}
 696
 697EXPORT_SYMBOL(dev_getbyhwaddr);
 698
 699struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 700{
 701	struct net_device *dev;
 702
 703	ASSERT_RTNL();
 704	for_each_netdev(net, dev)
 705		if (dev->type == type)
 706			return dev;
 707
 708	return NULL;
 709}
 710
 711EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 712
 713struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 714{
 715	struct net_device *dev;
 716
 717	rtnl_lock();
 718	dev = __dev_getfirstbyhwtype(net, type);
 719	if (dev)
 720		dev_hold(dev);
 721	rtnl_unlock();
 722	return dev;
 723}
 724
 725EXPORT_SYMBOL(dev_getfirstbyhwtype);
 726
 727/**
 728 *	dev_get_by_flags - find any device with given flags
 729 *	@net: the applicable net namespace
 730 *	@if_flags: IFF_* values
 731 *	@mask: bitmask of bits in if_flags to check
 732 *
 733 *	Search for any interface with the given flags. Returns NULL if a device
 734 *	is not found or a pointer to the device. The device returned has
 735 *	had a reference added and the pointer is safe until the user calls
 736 *	dev_put to indicate they have finished with it.
 737 */
 738
 739struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 740{
 741	struct net_device *dev, *ret;
 742
 743	ret = NULL;
 744	read_lock(&dev_base_lock);
 745	for_each_netdev(net, dev) {
 746		if (((dev->flags ^ if_flags) & mask) == 0) {
 747			dev_hold(dev);
 748			ret = dev;
 749			break;
 750		}
 751	}
 752	read_unlock(&dev_base_lock);
 753	return ret;
 754}
 755
 756/**
 757 *	dev_valid_name - check if name is okay for network device
 758 *	@name: name string
 759 *
 760 *	Network device names need to be valid file names to
 761 *	to allow sysfs to work.  We also disallow any kind of
 762 *	whitespace.
 763 */
 764int dev_valid_name(const char *name)
 765{
 766	if (*name == '\0')
 767		return 0;
 768	if (strlen(name) >= IFNAMSIZ)
 769		return 0;
 770	if (!strcmp(name, ".") || !strcmp(name, ".."))
 771		return 0;
 772
 773	while (*name) {
 774		if (*name == '/' || isspace(*name))
 775			return 0;
 776		name++;
 777	}
 778	return 1;
 779}
 780
 781/**
 782 *	__dev_alloc_name - allocate a name for a device
 783 *	@net: network namespace to allocate the device name in
 784 *	@name: name format string
 785 *	@buf:  scratch buffer and result name string
 786 *
 787 *	Passed a format string - eg "lt%d" it will try and find a suitable
 788 *	id. It scans list of devices to build up a free map, then chooses
 789 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 790 *	while allocating the name and adding the device in order to avoid
 791 *	duplicates.
 792 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 793 *	Returns the number of the unit assigned or a negative errno code.
 794 */
 795
 796static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 797{
 798	int i = 0;
 799	const char *p;
 800	const int max_netdevices = 8*PAGE_SIZE;
 801	unsigned long *inuse;
 802	struct net_device *d;
 803
 804	p = strnchr(name, IFNAMSIZ-1, '%');
 805	if (p) {
 806		/*
 807		 * Verify the string as this thing may have come from
 808		 * the user.  There must be either one "%d" and no other "%"
 809		 * characters.
 810		 */
 811		if (p[1] != 'd' || strchr(p + 2, '%'))
 812			return -EINVAL;
 813
 814		/* Use one page as a bit array of possible slots */
 815		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 816		if (!inuse)
 817			return -ENOMEM;
 818
 819		for_each_netdev(net, d) {
 820			if (!sscanf(d->name, name, &i))
 821				continue;
 822			if (i < 0 || i >= max_netdevices)
 823				continue;
 824
 825			/*  avoid cases where sscanf is not exact inverse of printf */
 826			snprintf(buf, IFNAMSIZ, name, i);
 827			if (!strncmp(buf, d->name, IFNAMSIZ))
 828				set_bit(i, inuse);
 829		}
 830
 831		i = find_first_zero_bit(inuse, max_netdevices);
 832		free_page((unsigned long) inuse);
 833	}
 834
 835	snprintf(buf, IFNAMSIZ, name, i);
 836	if (!__dev_get_by_name(net, buf))
 837		return i;
 838
 839	/* It is possible to run out of possible slots
 840	 * when the name is long and there isn't enough space left
 841	 * for the digits, or if all bits are used.
 842	 */
 843	return -ENFILE;
 844}
 845
 846/**
 847 *	dev_alloc_name - allocate a name for a device
 848 *	@dev: device
 849 *	@name: name format string
 850 *
 851 *	Passed a format string - eg "lt%d" it will try and find a suitable
 852 *	id. It scans list of devices to build up a free map, then chooses
 853 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 854 *	while allocating the name and adding the device in order to avoid
 855 *	duplicates.
 856 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 857 *	Returns the number of the unit assigned or a negative errno code.
 858 */
 859
 860int dev_alloc_name(struct net_device *dev, const char *name)
 861{
 862	char buf[IFNAMSIZ];
 863	struct net *net;
 864	int ret;
 865
 866	BUG_ON(!dev_net(dev));
 867	net = dev_net(dev);
 868	ret = __dev_alloc_name(net, name, buf);
 869	if (ret >= 0)
 870		strlcpy(dev->name, buf, IFNAMSIZ);
 871	return ret;
 872}
 873
 874
 875/**
 876 *	dev_change_name - change name of a device
 877 *	@dev: device
 878 *	@newname: name (or format string) must be at least IFNAMSIZ
 879 *
 880 *	Change name of a device, can pass format strings "eth%d".
 881 *	for wildcarding.
 882 */
 883int dev_change_name(struct net_device *dev, const char *newname)
 884{
 885	char oldname[IFNAMSIZ];
 886	int err = 0;
 887	int ret;
 888	struct net *net;
 889
 890	ASSERT_RTNL();
 891	BUG_ON(!dev_net(dev));
 892
 893	net = dev_net(dev);
 894	if (dev->flags & IFF_UP)
 895		return -EBUSY;
 896
 897	if (!dev_valid_name(newname))
 898		return -EINVAL;
 899
 900	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 901		return 0;
 902
 903	memcpy(oldname, dev->name, IFNAMSIZ);
 904
 905	if (strchr(newname, '%')) {
 906		err = dev_alloc_name(dev, newname);
 907		if (err < 0)
 908			return err;
 909	}
 910	else if (__dev_get_by_name(net, newname))
 911		return -EEXIST;
 912	else
 913		strlcpy(dev->name, newname, IFNAMSIZ);
 914
 915rollback:
 916	/* For now only devices in the initial network namespace
 917	 * are in sysfs.
 918	 */
 919	if (net == &init_net) {
 920		ret = device_rename(&dev->dev, dev->name);
 921		if (ret) {
 922			memcpy(dev->name, oldname, IFNAMSIZ);
 923			return ret;
 924		}
 925	}
 926
 927	write_lock_bh(&dev_base_lock);
 928	hlist_del(&dev->name_hlist);
 929	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 930	write_unlock_bh(&dev_base_lock);
 931
 932	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 933	ret = notifier_to_errno(ret);
 934
 935	if (ret) {
 936		if (err) {
 937			printk(KERN_ERR
 938			       "%s: name change rollback failed: %d.\n",
 939			       dev->name, ret);
 940		} else {
 941			err = ret;
 942			memcpy(dev->name, oldname, IFNAMSIZ);
 943			goto rollback;
 944		}
 945	}
 946
 947	return err;
 948}
 949
 950/**
 951 *	dev_set_alias - change ifalias of a device
 952 *	@dev: device
 953 *	@alias: name up to IFALIASZ
 954 *	@len: limit of bytes to copy from info
 955 *
 956 *	Set ifalias for a device,
 957 */
 958int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 959{
 960	ASSERT_RTNL();
 961
 962	if (len >= IFALIASZ)
 963		return -EINVAL;
 964
 965	if (!len) {
 966		if (dev->ifalias) {
 967			kfree(dev->ifalias);
 968			dev->ifalias = NULL;
 969		}
 970		return 0;
 971	}
 972
 973	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
 974	if (!dev->ifalias)
 975		return -ENOMEM;
 976
 977	strlcpy(dev->ifalias, alias, len+1);
 978	return len;
 979}
 980
 981
 982/**
 983 *	netdev_features_change - device changes features
 984 *	@dev: device to cause notification
 985 *
 986 *	Called to indicate a device has changed features.
 987 */
 988void netdev_features_change(struct net_device *dev)
 989{
 990	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 991}
 992EXPORT_SYMBOL(netdev_features_change);
 993
 994/**
 995 *	netdev_state_change - device changes state
 996 *	@dev: device to cause notification
 997 *
 998 *	Called to indicate a device has changed state. This function calls
 999 *	the notifier chains for netdev_chain and sends a NEWLINK message
1000 *	to the routing socket.
1001 */
1002void netdev_state_change(struct net_device *dev)
1003{
1004	if (dev->flags & IFF_UP) {
1005		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1006		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1007	}
1008}
1009
1010void netdev_bonding_change(struct net_device *dev)
1011{
1012	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1013}
1014EXPORT_SYMBOL(netdev_bonding_change);
1015
1016/**
1017 *	dev_load 	- load a network module
1018 *	@net: the applicable net namespace
1019 *	@name: name of interface
1020 *
1021 *	If a network interface is not present and the process has suitable
1022 *	privileges this function loads the module. If module loading is not
1023 *	available in this kernel then it becomes a nop.
1024 */
1025
1026void dev_load(struct net *net, const char *name)
1027{
1028	struct net_device *dev;
1029
1030	read_lock(&dev_base_lock);
1031	dev = __dev_get_by_name(net, name);
1032	read_unlock(&dev_base_lock);
1033
1034	if (!dev && capable(CAP_SYS_MODULE))
1035		request_module("%s", name);
1036}
1037
1038/**
1039 *	dev_open	- prepare an interface for use.
1040 *	@dev:	device to open
1041 *
1042 *	Takes a device from down to up state. The device's private open
1043 *	function is invoked and then the multicast lists are loaded. Finally
1044 *	the device is moved into the up state and a %NETDEV_UP message is
1045 *	sent to the netdev notifier chain.
1046 *
1047 *	Calling this function on an active interface is a nop. On a failure
1048 *	a negative errno code is returned.
1049 */
1050int dev_open(struct net_device *dev)
1051{
1052	const struct net_device_ops *ops = dev->netdev_ops;
1053	int ret;
1054
1055	ASSERT_RTNL();
1056
1057	/*
1058	 *	Is it already up?
1059	 */
1060
1061	if (dev->flags & IFF_UP)
1062		return 0;
1063
1064	/*
1065	 *	Is it even present?
1066	 */
1067	if (!netif_device_present(dev))
1068		return -ENODEV;
1069
1070	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1071	ret = notifier_to_errno(ret);
1072	if (ret)
1073		return ret;
1074
1075	/*
1076	 *	Call device private open method
1077	 */
1078	set_bit(__LINK_STATE_START, &dev->state);
1079
1080	if (ops->ndo_validate_addr)
1081		ret = ops->ndo_validate_addr(dev);
1082
1083	if (!ret && ops->ndo_open)
1084		ret = ops->ndo_open(dev);
1085
1086	/*
1087	 *	If it went open OK then:
1088	 */
1089
1090	if (ret)
1091		clear_bit(__LINK_STATE_START, &dev->state);
1092	else {
1093		/*
1094		 *	Set the flags.
1095		 */
1096		dev->flags |= IFF_UP;
1097
1098		/*
1099		 *	Enable NET_DMA
1100		 */
1101		net_dmaengine_get();
1102
1103		/*
1104		 *	Initialize multicasting status
1105		 */
1106		dev_set_rx_mode(dev);
1107
1108		/*
1109		 *	Wakeup transmit queue engine
1110		 */
1111		dev_activate(dev);
1112
1113		/*
1114		 *	... and announce new interface.
1115		 */
1116		call_netdevice_notifiers(NETDEV_UP, dev);
1117	}
1118
1119	return ret;
1120}
1121
1122/**
1123 *	dev_close - shutdown an interface.
1124 *	@dev: device to shutdown
1125 *
1126 *	This function moves an active device into down state. A
1127 *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128 *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129 *	chain.
1130 */
1131int dev_close(struct net_device *dev)
1132{
1133	const struct net_device_ops *ops = dev->netdev_ops;
1134	ASSERT_RTNL();
1135
1136	might_sleep();
1137
1138	if (!(dev->flags & IFF_UP))
1139		return 0;
1140
1141	/*
1142	 *	Tell people we are going down, so that they can
1143	 *	prepare to death, when device is still operating.
1144	 */
1145	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1146
1147	clear_bit(__LINK_STATE_START, &dev->state);
1148
1149	/* Synchronize to scheduled poll. We cannot touch poll list,
1150	 * it can be even on different cpu. So just clear netif_running().
1151	 *
1152	 * dev->stop() will invoke napi_disable() on all of it's
1153	 * napi_struct instances on this device.
1154	 */
1155	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1156
1157	dev_deactivate(dev);
1158
1159	/*
1160	 *	Call the device specific close. This cannot fail.
1161	 *	Only if device is UP
1162	 *
1163	 *	We allow it to be called even after a DETACH hot-plug
1164	 *	event.
1165	 */
1166	if (ops->ndo_stop)
1167		ops->ndo_stop(dev);
1168
1169	/*
1170	 *	Device is now down.
1171	 */
1172
1173	dev->flags &= ~IFF_UP;
1174
1175	/*
1176	 * Tell people we are down
1177	 */
1178	call_netdevice_notifiers(NETDEV_DOWN, dev);
1179
1180	/*
1181	 *	Shutdown NET_DMA
1182	 */
1183	net_dmaengine_put();
1184
1185	return 0;
1186}
1187
1188
1189/**
1190 *	dev_disable_lro - disable Large Receive Offload on a device
1191 *	@dev: device
1192 *
1193 *	Disable Large Receive Offload (LRO) on a net device.  Must be
1194 *	called under RTNL.  This is needed if received packets may be
1195 *	forwarded to another interface.
1196 */
1197void dev_disable_lro(struct net_device *dev)
1198{
1199	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1200	    dev->ethtool_ops->set_flags) {
1201		u32 flags = dev->ethtool_ops->get_flags(dev);
1202		if (flags & ETH_FLAG_LRO) {
1203			flags &= ~ETH_FLAG_LRO;
1204			dev->ethtool_ops->set_flags(dev, flags);
1205		}
1206	}
1207	WARN_ON(dev->features & NETIF_F_LRO);
1208}
1209EXPORT_SYMBOL(dev_disable_lro);
1210
1211
1212static int dev_boot_phase = 1;
1213
1214/*
1215 *	Device change register/unregister. These are not inline or static
1216 *	as we export them to the world.
1217 */
1218
1219/**
1220 *	register_netdevice_notifier - register a network notifier block
1221 *	@nb: notifier
1222 *
1223 *	Register a notifier to be called when network device events occur.
1224 *	The notifier passed is linked into the kernel structures and must
1225 *	not be reused until it has been unregistered. A negative errno code
1226 *	is returned on a failure.
1227 *
1228 * 	When registered all registration and up events are replayed
1229 *	to the new notifier to allow device to have a race free
1230 *	view of the network device list.
1231 */
1232
1233int register_netdevice_notifier(struct notifier_block *nb)
1234{
1235	struct net_device *dev;
1236	struct net_device *last;
1237	struct net *net;
1238	int err;
1239
1240	rtnl_lock();
1241	err = raw_notifier_chain_register(&netdev_chain, nb);
1242	if (err)
1243		goto unlock;
1244	if (dev_boot_phase)
1245		goto unlock;
1246	for_each_net(net) {
1247		for_each_netdev(net, dev) {
1248			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1249			err = notifier_to_errno(err);
1250			if (err)
1251				goto rollback;
1252
1253			if (!(dev->flags & IFF_UP))
1254				continue;
1255
1256			nb->notifier_call(nb, NETDEV_UP, dev);
1257		}
1258	}
1259
1260unlock:
1261	rtnl_unlock();
1262	return err;
1263
1264rollback:
1265	last = dev;
1266	for_each_net(net) {
1267		for_each_netdev(net, dev) {
1268			if (dev == last)
1269				break;
1270
1271			if (dev->flags & IFF_UP) {
1272				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1273				nb->notifier_call(nb, NETDEV_DOWN, dev);
1274			}
1275			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1276		}
1277	}
1278
1279	raw_notifier_chain_unregister(&netdev_chain, nb);
1280	goto unlock;
1281}
1282
1283/**
1284 *	unregister_netdevice_notifier - unregister a network notifier block
1285 *	@nb: notifier
1286 *
1287 *	Unregister a notifier previously registered by
1288 *	register_netdevice_notifier(). The notifier is unlinked into the
1289 *	kernel structures and may then be reused. A negative errno code
1290 *	is returned on a failure.
1291 */
1292
1293int unregister_netdevice_notifier(struct notifier_block *nb)
1294{
1295	int err;
1296
1297	rtnl_lock();
1298	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1299	rtnl_unlock();
1300	return err;
1301}
1302
1303/**
1304 *	call_netdevice_notifiers - call all network notifier blocks
1305 *      @val: value passed unmodified to notifier function
1306 *      @dev: net_device pointer passed unmodified to notifier function
1307 *
1308 *	Call all network notifier blocks.  Parameters and return value
1309 *	are as for raw_notifier_call_chain().
1310 */
1311
1312int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1313{
1314	return raw_notifier_call_chain(&netdev_chain, val, dev);
1315}
1316
1317/* When > 0 there are consumers of rx skb time stamps */
1318static atomic_t netstamp_needed = ATOMIC_INIT(0);
1319
1320void net_enable_timestamp(void)
1321{
1322	atomic_inc(&netstamp_needed);
1323}
1324
1325void net_disable_timestamp(void)
1326{
1327	atomic_dec(&netstamp_needed);
1328}
1329
1330static inline void net_timestamp(struct sk_buff *skb)
1331{
1332	if (atomic_read(&netstamp_needed))
1333		__net_timestamp(skb);
1334	else
1335		skb->tstamp.tv64 = 0;
1336}
1337
1338/*
1339 *	Support routine. Sends outgoing frames to any network
1340 *	taps currently in use.
1341 */
1342
1343static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1344{
1345	struct packet_type *ptype;
1346
1347#ifdef CONFIG_NET_CLS_ACT
1348	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1349		net_timestamp(skb);
1350#else
1351	net_timestamp(skb);
1352#endif
1353
1354	rcu_read_lock();
1355	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1356		/* Never send packets back to the socket
1357		 * they originated from - MvS (miquels@drinkel.ow.org)
1358		 */
1359		if ((ptype->dev == dev || !ptype->dev) &&
1360		    (ptype->af_packet_priv == NULL ||
1361		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1362			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1363			if (!skb2)
1364				break;
1365
1366			/* skb->nh should be correctly
1367			   set by sender, so that the second statement is
1368			   just protection against buggy protocols.
1369			 */
1370			skb_reset_mac_header(skb2);
1371
1372			if (skb_network_header(skb2) < skb2->data ||
1373			    skb2->network_header > skb2->tail) {
1374				if (net_ratelimit())
1375					printk(KERN_CRIT "protocol %04x is "
1376					       "buggy, dev %s\n",
1377					       skb2->protocol, dev->name);
1378				skb_reset_network_header(skb2);
1379			}
1380
1381			skb2->transport_header = skb2->network_header;
1382			skb2->pkt_type = PACKET_OUTGOING;
1383			ptype->func(skb2, skb->dev, ptype, skb->dev);
1384		}
1385	}
1386	rcu_read_unlock();
1387}
1388
1389
1390static inline void __netif_reschedule(struct Qdisc *q)
1391{
1392	struct softnet_data *sd;
1393	unsigned long flags;
1394
1395	local_irq_save(flags);
1396	sd = &__get_cpu_var(softnet_data);
1397	q->next_sched = sd->output_queue;
1398	sd->output_queue = q;
1399	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1400	local_irq_restore(flags);
1401}
1402
1403void __netif_schedule(struct Qdisc *q)
1404{
1405	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1406		__netif_reschedule(q);
1407}
1408EXPORT_SYMBOL(__netif_schedule);
1409
1410void dev_kfree_skb_irq(struct sk_buff *skb)
1411{
1412	if (atomic_dec_and_test(&skb->users)) {
1413		struct softnet_data *sd;
1414		unsigned long flags;
1415
1416		local_irq_save(flags);
1417		sd = &__get_cpu_var(softnet_data);
1418		skb->next = sd->completion_queue;
1419		sd->completion_queue = skb;
1420		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1421		local_irq_restore(flags);
1422	}
1423}
1424EXPORT_SYMBOL(dev_kfree_skb_irq);
1425
1426void dev_kfree_skb_any(struct sk_buff *skb)
1427{
1428	if (in_irq() || irqs_disabled())
1429		dev_kfree_skb_irq(skb);
1430	else
1431		dev_kfree_skb(skb);
1432}
1433EXPORT_SYMBOL(dev_kfree_skb_any);
1434
1435
1436/**
1437 * netif_device_detach - mark device as removed
1438 * @dev: network device
1439 *
1440 * Mark device as removed from system and therefore no longer available.
1441 */
1442void netif_device_detach(struct net_device *dev)
1443{
1444	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1445	    netif_running(dev)) {
1446		netif_tx_stop_all_queues(dev);
1447	}
1448}
1449EXPORT_SYMBOL(netif_device_detach);
1450
1451/**
1452 * netif_device_attach - mark device as attached
1453 * @dev: network device
1454 *
1455 * Mark device as attached from system and restart if needed.
1456 */
1457void netif_device_attach(struct net_device *dev)
1458{
1459	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1460	    netif_running(dev)) {
1461		netif_tx_wake_all_queues(dev);
1462		__netdev_watchdog_up(dev);
1463	}
1464}
1465EXPORT_SYMBOL(netif_device_attach);
1466
1467static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1468{
1469	return ((features & NETIF_F_GEN_CSUM) ||
1470		((features & NETIF_F_IP_CSUM) &&
1471		 protocol == htons(ETH_P_IP)) ||
1472		((features & NETIF_F_IPV6_CSUM) &&
1473		 protocol == htons(ETH_P_IPV6)) ||
1474		((features & NETIF_F_FCOE_CRC) &&
1475		 protocol == htons(ETH_P_FCOE)));
1476}
1477
1478static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1479{
1480	if (can_checksum_protocol(dev->features, skb->protocol))
1481		return true;
1482
1483	if (skb->protocol == htons(ETH_P_8021Q)) {
1484		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1485		if (can_checksum_protocol(dev->features & dev->vlan_features,
1486					  veh->h_vlan_encapsulated_proto))
1487			return true;
1488	}
1489
1490	return false;
1491}
1492
1493/*
1494 * Invalidate hardware checksum when packet is to be mangled, and
1495 * complete checksum manually on outgoing path.
1496 */
1497int skb_checksum_help(struct sk_buff *skb)
1498{
1499	__wsum csum;
1500	int ret = 0, offset;
1501
1502	if (skb->ip_summed == CHECKSUM_COMPLETE)
1503		goto out_set_summed;
1504
1505	if (unlikely(skb_shinfo(skb)->gso_size)) {
1506		/* Let GSO fix up the checksum. */
1507		goto out_set_summed;
1508	}
1509
1510	offset = skb->csum_start - skb_headroom(skb);
1511	BUG_ON(offset >= skb_headlen(skb));
1512	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1513
1514	offset += skb->csum_offset;
1515	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1516
1517	if (skb_cloned(skb) &&
1518	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1519		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1520		if (ret)
1521			goto out;
1522	}
1523
1524	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1525out_set_summed:
1526	skb->ip_summed = CHECKSUM_NONE;
1527out:
1528	return ret;
1529}
1530
1531/**
1532 *	skb_gso_segment - Perform segmentation on skb.
1533 *	@skb: buffer to segment
1534 *	@features: features for the output path (see dev->features)
1535 *
1536 *	This function segments the given skb and returns a list of segments.
1537 *
1538 *	It may return NULL if the skb requires no segmentation.  This is
1539 *	only possible when GSO is used for verifying header integrity.
1540 */
1541struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1542{
1543	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1544	struct packet_type *ptype;
1545	__be16 type = skb->protocol;
1546	int err;
1547
1548	skb_reset_mac_header(skb);
1549	skb->mac_len = skb->network_header - skb->mac_header;
1550	__skb_pull(skb, skb->mac_len);
1551
1552	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1553		struct net_device *dev = skb->dev;
1554		struct ethtool_drvinfo info = {};
1555
1556		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1557			dev->ethtool_ops->get_drvinfo(dev, &info);
1558
1559		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1560			"ip_summed=%d",
1561		     info.driver, dev ? dev->features : 0L,
1562		     skb->sk ? skb->sk->sk_route_caps : 0L,
1563		     skb->len, skb->data_len, skb->ip_summed);
1564
1565		if (skb_header_cloned(skb) &&
1566		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1567			return ERR_PTR(err);
1568	}
1569
1570	rcu_read_lock();
1571	list_for_each_entry_rcu(ptype,
1572			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1573		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1574			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1575				err = ptype->gso_send_check(skb);
1576				segs = ERR_PTR(err);
1577				if (err || skb_gso_ok(skb, features))
1578					break;
1579				__skb_push(skb, (skb->data -
1580						 skb_network_header(skb)));
1581			}
1582			segs = ptype->gso_segment(skb, features);
1583			break;
1584		}
1585	}
1586	rcu_read_unlock();
1587
1588	__skb_push(skb, skb->data - skb_mac_header(skb));
1589
1590	return segs;
1591}
1592
1593EXPORT_SYMBOL(skb_gso_segment);
1594
1595/* Take action when hardware reception checksum errors are detected. */
1596#ifdef CONFIG_BUG
1597void netdev_rx_csum_fault(struct net_device *dev)
1598{
1599	if (net_ratelimit()) {
1600		printk(KERN_ERR "%s: hw csum failure.\n",
1601			dev ? dev->name : "<unknown>");
1602		dump_stack();
1603	}
1604}
1605EXPORT_SYMBOL(netdev_rx_csum_fault);
1606#endif
1607
1608/* Actually, we should eliminate this check as soon as we know, that:
1609 * 1. IOMMU is present and allows to map all the memory.
1610 * 2. No high memory really exists on this machine.
1611 */
1612
1613static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1614{
1615#ifdef CONFIG_HIGHMEM
1616	int i;
1617
1618	if (dev->features & NETIF_F_HIGHDMA)
1619		return 0;
1620
1621	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1622		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1623			return 1;
1624
1625#endif
1626	return 0;
1627}
1628
1629struct dev_gso_cb {
1630	void (*destructor)(struct sk_buff *skb);
1631};
1632
1633#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1634
1635static void dev_gso_skb_destructor(struct sk_buff *skb)
1636{
1637	struct dev_gso_cb *cb;
1638
1639	do {
1640		struct sk_buff *nskb = skb->next;
1641
1642		skb->next = nskb->next;
1643		nskb->next = NULL;
1644		kfree_skb(nskb);
1645	} while (skb->next);
1646
1647	cb = DEV_GSO_CB(skb);
1648	if (cb->destructor)
1649		cb->destructor(skb);
1650}
1651
1652/**
1653 *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1654 *	@skb: buffer to segment
1655 *
1656 *	This function segments the given skb and stores the list of segments
1657 *	in skb->next.
1658 */
1659static int dev_gso_segment(struct sk_buff *skb)
1660{
1661	struct net_device *dev = skb->dev;
1662	struct sk_buff *segs;
1663	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1664					 NETIF_F_SG : 0);
1665
1666	segs = skb_gso_segment(skb, features);
1667
1668	/* Verifying header integrity only. */
1669	if (!segs)
1670		return 0;
1671
1672	if (IS_ERR(segs))
1673		return PTR_ERR(segs);
1674
1675	skb->next = segs;
1676	DEV_GSO_CB(skb)->destructor = skb->destructor;
1677	skb->destructor = dev_gso_skb_destructor;
1678
1679	return 0;
1680}
1681
1682int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1683			struct netdev_queue *txq)
1684{
1685	const struct net_device_ops *ops = dev->netdev_ops;
1686	int rc;
1687
1688	if (likely(!skb->next)) {
1689		if (!list_empty(&ptype_all))
1690			dev_queue_xmit_nit(skb, dev);
1691
1692		if (netif_needs_gso(dev, skb)) {
1693			if (unlikely(dev_gso_segment(skb)))
1694				goto out_kfree_skb;
1695			if (skb->next)
1696				goto gso;
1697		}
1698
1699		/*
1700		 * If device doesnt need skb->dst, release it right now while
1701		 * its hot in this cpu cache
1702		 */
1703		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1704			skb_dst_drop(skb);
1705
1706		rc = ops->ndo_start_xmit(skb, dev);
1707		if (rc == 0)
1708			txq_trans_update(txq);
1709		/*
1710		 * TODO: if skb_orphan() was called by
1711		 * dev->hard_start_xmit() (for example, the unmodified
1712		 * igb driver does that; bnx2 doesn't), then
1713		 * skb_tx_software_timestamp() will be unable to send
1714		 * back the time stamp.
1715		 *
1716		 * How can this be prevented? Always create another
1717		 * reference to the socket before calling
1718		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1719		 * does anything in dev->hard_start_xmit() by clearing
1720		 * the skb destructor before the call and restoring it
1721		 * afterwards, then doing the skb_orphan() ourselves?
1722		 */
1723		return rc;
1724	}
1725
1726gso:
1727	do {
1728		struct sk_buff *nskb = skb->next;
1729
1730		skb->next = nskb->next;
1731		nskb->next = NULL;
1732		rc = ops->ndo_start_xmit(nskb, dev);
1733		if (unlikely(rc)) {
1734			nskb->next = skb->next;
1735			skb->next = nskb;
1736			return rc;
1737		}
1738		txq_trans_update(txq);
1739		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1740			return NETDEV_TX_BUSY;
1741	} while (skb->next);
1742
1743	skb->destructor = DEV_GSO_CB(skb)->destructor;
1744
1745out_kfree_skb:
1746	kfree_skb(skb);
1747	return 0;
1748}
1749
1750static u32 skb_tx_hashrnd;
1751
1752u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1753{
1754	u32 hash;
1755
1756	if (skb_rx_queue_recorded(skb)) {
1757		hash = skb_get_rx_queue(skb);
1758		while (unlikely (hash >= dev->real_num_tx_queues))
1759			hash -= dev->real_num_tx_queues;
1760		return hash;
1761	}
1762
1763	if (skb->sk && skb->sk->sk_hash)
1764		hash = skb->sk->sk_hash;
1765	else
1766		hash = skb->protocol;
1767
1768	hash = jhash_1word(hash, skb_tx_hashrnd);
1769
1770	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1771}
1772EXPORT_SYMBOL(skb_tx_hash);
1773
1774static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1775					struct sk_buff *skb)
1776{
1777	const struct net_device_ops *ops = dev->netdev_ops;
1778	u16 queue_index = 0;
1779
1780	if (ops->ndo_select_queue)
1781		queue_index = ops->ndo_select_queue(dev, skb);
1782	else if (dev->real_num_tx_queues > 1)
1783		queue_index = skb_tx_hash(dev, skb);
1784
1785	skb_set_queue_mapping(skb, queue_index);
1786	return netdev_get_tx_queue(dev, queue_index);
1787}
1788
1789/**
1790 *	dev_queue_xmit - transmit a buffer
1791 *	@skb: buffer to transmit
1792 *
1793 *	Queue a buffer for transmission to a network device. The caller must
1794 *	have set the device and priority and built the buffer before calling
1795 *	this function. The function can be called from an interrupt.
1796 *
1797 *	A negative errno code is returned on a failure. A success does not
1798 *	guarantee the frame will be transmitted as it may be dropped due
1799 *	to congestion or traffic shaping.
1800 *
1801 * -----------------------------------------------------------------------------------
1802 *      I notice this method can also return errors from the queue disciplines,
1803 *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1804 *      be positive.
1805 *
1806 *      Regardless of the return value, the skb is consumed, so it is currently
1807 *      difficult to retry a send to this method.  (You can bump the ref count
1808 *      before sending to hold a reference for retry if you are careful.)
1809 *
1810 *      When calling this method, interrupts MUST be enabled.  This is because
1811 *      the BH enable code must have IRQs enabled so that it will not deadlock.
1812 *          --BLG
1813 */
1814int dev_queue_xmit(struct sk_buff *skb)
1815{
1816	struct net_device *dev = skb->dev;
1817	struct netdev_queue *txq;
1818	struct Qdisc *q;
1819	int rc = -ENOMEM;
1820
1821	/* GSO will handle the following emulations directly. */
1822	if (netif_needs_gso(dev, skb))
1823		goto gso;
1824
1825	if (skb_has_frags(skb) &&
1826	    !(dev->features & NETIF_F_FRAGLIST) &&
1827	    __skb_linearize(skb))
1828		goto out_kfree_skb;
1829
1830	/* Fragmented skb is linearized if device does not support SG,
1831	 * or if at least one of fragments is in highmem and device
1832	 * does not support DMA from it.
1833	 */
1834	if (skb_shinfo(skb)->nr_frags &&
1835	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1836	    __skb_linearize(skb))
1837		goto out_kfree_skb;
1838
1839	/* If packet is not checksummed and device does not support
1840	 * checksumming for this protocol, complete checksumming here.
1841	 */
1842	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1843		skb_set_transport_header(skb, skb->csum_start -
1844					      skb_headroom(skb));
1845		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1846			goto out_kfree_skb;
1847	}
1848
1849gso:
1850	/* Disable soft irqs for various locks below. Also
1851	 * stops preemption for RCU.
1852	 */
1853	rcu_read_lock_bh();
1854
1855	txq = dev_pick_tx(dev, skb);
1856	q = rcu_dereference(txq->qdisc);
1857
1858#ifdef CONFIG_NET_CLS_ACT
1859	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1860#endif
1861	if (q->enqueue) {
1862		spinlock_t *root_lock = qdisc_lock(q);
1863
1864		spin_lock(root_lock);
1865
1866		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1867			kfree_skb(skb);
1868			rc = NET_XMIT_DROP;
1869		} else {
1870			rc = qdisc_enqueue_root(skb, q);
1871			qdisc_run(q);
1872		}
1873		spin_unlock(root_lock);
1874
1875		goto out;
1876	}
1877
1878	/* The device has no queue. Common case for software devices:
1879	   loopback, all the sorts of tunnels...
1880
1881	   Really, it is unlikely that netif_tx_lock protection is necessary
1882	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1883	   counters.)
1884	   However, it is possible, that they rely on protection
1885	   made by us here.
1886
1887	   Check this and shot the lock. It is not prone from deadlocks.
1888	   Either shot noqueue qdisc, it is even simpler 8)
1889	 */
1890	if (dev->flags & IFF_UP) {
1891		int cpu = smp_processor_id(); /* ok because BHs are off */
1892
1893		if (txq->xmit_lock_owner != cpu) {
1894
1895			HARD_TX_LOCK(dev, txq, cpu);
1896
1897			if (!netif_tx_queue_stopped(txq)) {
1898				rc = 0;
1899				if (!dev_hard_start_xmit(skb, dev, txq)) {
1900					HARD_TX_UNLOCK(dev, txq);
1901					goto out;
1902				}
1903			}
1904			HARD_TX_UNLOCK(dev, txq);
1905			if (net_ratelimit())
1906				printk(KERN_CRIT "Virtual device %s asks to "
1907				       "queue packet!\n", dev->name);
1908		} else {
1909			/* Recursion is detected! It is possible,
1910			 * unfortunately */
1911			if (net_ratelimit())
1912				printk(KERN_CRIT "Dead loop on virtual device "
1913				       "%s, fix it urgently!\n", dev->name);
1914		}
1915	}
1916
1917	rc = -ENETDOWN;
1918	rcu_read_unlock_bh();
1919
1920out_kfree_skb:
1921	kfree_skb(skb);
1922	return rc;
1923out:
1924	rcu_read_unlock_bh();
1925	return rc;
1926}
1927
1928
1929/*=======================================================================
1930			Receiver routines
1931  =======================================================================*/
1932
1933int netdev_max_backlog __read_mostly = 1000;
1934int netdev_budget __read_mostly = 300;
1935int weight_p __read_mostly = 64;            /* old backlog weight */
1936
1937DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1938
1939
1940/**
1941 *	netif_rx	-	post buffer to the network code
1942 *	@skb: buffer to post
1943 *
1944 *	This function receives a packet from a device driver and queues it for
1945 *	the upper (protocol) levels to process.  It always succeeds. The buffer
1946 *	may be dropped during processing for congestion control or by the
1947 *	protocol layers.
1948 *
1949 *	return values:
1950 *	NET_RX_SUCCESS	(no congestion)
1951 *	NET_RX_DROP     (packet was dropped)
1952 *
1953 */
1954
1955int netif_rx(struct sk_buff *skb)
1956{
1957	struct softnet_data *queue;
1958	unsigned long flags;
1959
1960	/* if netpoll wants it, pretend we never saw it */
1961	if (netpoll_rx(skb))
1962		return NET_RX_DROP;
1963
1964	if (!skb->tstamp.tv64)
1965		net_timestamp(skb);
1966
1967	/*
1968	 * The code is rearranged so that the path is the most
1969	 * short when CPU is congested, but is still operating.
1970	 */
1971	local_irq_save(flags);
1972	queue = &__get_cpu_var(softnet_data);
1973
1974	__get_cpu_var(netdev_rx_stat).total++;
1975	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1976		if (queue->input_pkt_queue.qlen) {
1977enqueue:
1978			__skb_queue_tail(&queue->input_pkt_queue, skb);
1979			local_irq_restore(flags);
1980			return NET_RX_SUCCESS;
1981		}
1982
1983		napi_schedule(&queue->backlog);
1984		goto enqueue;
1985	}
1986
1987	__get_cpu_var(netdev_rx_stat).dropped++;
1988	local_irq_restore(flags);
1989
1990	kfree_skb(skb);
1991	return NET_RX_DROP;
1992}
1993
1994int netif_rx_ni(struct sk_buff *skb)
1995{
1996	int err;
1997
1998	preempt_disable();
1999	err = netif_rx(skb);
2000	if (local_softirq_pending())
2001		do_softirq();
2002	preempt_enable();
2003
2004	return err;
2005}
2006
2007EXPORT_SYMBOL(netif_rx_ni);
2008
2009static void net_tx_action(struct softirq_action *h)
2010{
2011	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2012
2013	if (sd->completion_queue) {
2014		struct sk_buff *clist;
2015
2016		local_irq_disable();
2017		clist = sd->completion_queue;
2018		sd->completion_queue = NULL;
2019		local_irq_enable();
2020
2021		while (clist) {
2022			struct sk_buff *skb = clist;
2023			clist = clist->next;
2024
2025			WARN_ON(atomic_read(&skb->users));
2026			__kfree_skb(skb);
2027		}
2028	}
2029
2030	if (sd->output_queue) {
2031		struct Qdisc *head;
2032
2033		local_irq_disable();
2034		head = sd->output_queue;
2035		sd->output_queue = NULL;
2036		local_irq_enable();
2037
2038		while (head) {
2039			struct Qdisc *q = head;
2040			spinlock_t *root_lock;
2041
2042			head = head->next_sched;
2043
2044			root_lock = qdisc_lock(q);
2045			if (spin_trylock(root_lock)) {
2046				smp_mb__before_clear_bit();
2047				clear_bit(__QDISC_STATE_SCHED,
2048					  &q->state);
2049				qdisc_run(q);
2050				spin_unlock(root_lock);
2051			} else {
2052				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2053					      &q->state)) {
2054					__netif_reschedule(q);
2055				} else {
2056					smp_mb__before_clear_bit();
2057					clear_bit(__QDISC_STATE_SCHED,
2058						  &q->state);
2059				}
2060			}
2061		}
2062	}
2063}
2064
2065static inline int deliver_skb(struct sk_buff *skb,
2066			      struct packet_type *pt_prev,
2067			      struct net_device *orig_dev)
2068{
2069	atomic_inc(&skb->users);
2070	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2071}
2072
2073#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2074
2075#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2076/* This hook is defined here for ATM LANE */
2077int (*br_fdb_test_addr_hook)(struct net_device *dev,
2078			     unsigned char *addr) __read_mostly;
2079EXPORT_SYMBOL(br_fdb_test_addr_hook);
2080#endif
2081
2082/*
2083 * If bridge module is loaded call bridging hook.
2084 *  returns NULL if packet was consumed.
2085 */
2086struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2087					struct sk_buff *skb) __read_mostly;
2088EXPORT_SYMBOL(br_handle_frame_hook);
2089
2090static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2091					    struct packet_type **pt_prev, int *ret,
2092					    struct net_device *orig_dev)
2093{
2094	struct net_bridge_port *port;
2095
2096	if (skb->pkt_type == PACKET_LOOPBACK ||
2097	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2098		return skb;
2099
2100	if (*pt_prev) {
2101		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2102		*pt_prev = NULL;
2103	}
2104
2105	return br_handle_frame_hook(port, skb);
2106}
2107#else
2108#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2109#endif
2110
2111#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2112struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2113EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2114
2115static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2116					     struct packet_type **pt_prev,
2117					     int *ret,
2118					     struct net_device *orig_dev)
2119{
2120	if (skb->dev->macvlan_port == NULL)
2121		return skb;
2122
2123	if (*pt_prev) {
2124		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2125		*pt_prev = NULL;
2126	}
2127	return macvlan_handle_frame_hook(skb);
2128}
2129#else
2130#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2131#endif
2132
2133#ifdef CONFIG_NET_CLS_ACT
2134/* TODO: Maybe we should just force sch_ingress to be compiled in
2135 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2136 * a compare and 2 stores extra right now if we dont have it on
2137 * but have CONFIG_NET_CLS_ACT
2138 * NOTE: This doesnt stop any functionality; if you dont have
2139 * the ingress scheduler, you just cant add policies on ingress.
2140 *
2141 */
2142static int ing_filter(struct sk_buff *skb)
2143{
2144	struct net_device *dev = skb->dev;
2145	u32 ttl = G_TC_RTTL(skb->tc_verd);
2146	struct netdev_queue *rxq;
2147	int result = TC_ACT_OK;
2148	struct Qdisc *q;
2149
2150	if (MAX_RED_LOOP < ttl++) {
2151		printk(KERN_WARNING
2152		       "Redir loop detected Dropping packet (%d->%d)\n",
2153		       skb->iif, dev->ifindex);
2154		return TC_ACT_SHOT;
2155	}
2156
2157	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2158	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2159
2160	rxq = &dev->rx_queue;
2161
2162	q = rxq->qdisc;
2163	if (q != &noop_qdisc) {
2164		spin_lock(qdisc_lock(q));
2165		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2166			result = qdisc_enqueue_root(skb, q);
2167		spin_unlock(qdisc_lock(q));
2168	}
2169
2170	return result;
2171}
2172
2173static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2174					 struct packet_type **pt_prev,
2175					 int *ret, struct net_device *orig_dev)
2176{
2177	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2178		goto out;
2179
2180	if (*pt_prev) {
2181		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2182		*pt_prev = NULL;
2183	} else {
2184		/* Huh? Why does turning on AF_PACKET affect this? */
2185		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2186	}
2187
2188	switch (ing_filter(skb)) {
2189	case TC_ACT_SHOT:
2190	case TC_ACT_STOLEN:
2191		kfree_skb(skb);
2192		return NULL;
2193	}
2194
2195out:
2196	skb->tc_verd = 0;
2197	return skb;
2198}
2199#endif
2200
2201/*
2202 * 	netif_nit_deliver - deliver received packets to network taps
2203 * 	@skb: buffer
2204 *
2205 * 	This function is used to deliver incoming packets to network
2206 * 	taps. It should be used when the normal netif_receive_skb path
2207 * 	is bypassed, for example because of VLAN acceleration.
2208 */
2209void netif_nit_deliver(struct sk_buff *skb)
2210{
2211	struct packet_type *ptype;
2212
2213	if (list_empty(&ptype_all))
2214		return;
2215
2216	skb_reset_network_header(skb);
2217	skb_reset_transport_header(skb);
2218	skb->mac_len = skb->network_header - skb->mac_header;
2219
2220	rcu_read_lock();
2221	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2222		if (!ptype->dev || ptype->dev == skb->dev)
2223			deliver_skb(skb, ptype, skb->dev);
2224	}
2225	rcu_read_unlock();
2226}
2227
2228/**
2229 *	netif_receive_skb - process receive buffer from network
2230 *	@skb: buffer to process
2231 *
2232 *	netif_receive_skb() is the main receive data processing function.
2233 *	It always succeeds. The buffer may be dropped during processing
2234 *	for congestion control or by the protocol layers.
2235 *
2236 *	This function may only be called from softirq context and interrupts
2237 *	should be enabled.
2238 *
2239 *	Return values (usually ignored):
2240 *	NET_RX_SUCCESS: no congestion
2241 *	NET_RX_DROP: packet was dropped
2242 */
2243int netif_receive_skb(struct sk_buff *skb)
2244{
2245	struct packet_type *ptype, *pt_prev;
2246	struct net_device *orig_dev;
2247	struct net_device *null_or_orig;
2248	int ret = NET_RX_DROP;
2249	__be16 type;
2250
2251	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2252		return NET_RX_SUCCESS;
2253
2254	/* if we've gotten here through NAPI, check netpoll */
2255	if (netpoll_receive_skb(skb))
2256		return NET_RX_DROP;
2257
2258	if (!skb->tstamp.tv64)
2259		net_timestamp(skb);
2260
2261	if (!skb->iif)
2262		skb->iif = skb->dev->ifindex;
2263
2264	null_or_orig = NULL;
2265	orig_dev = skb->dev;
2266	if (orig_dev->master) {
2267		if (skb_bond_should_drop(skb))
2268			null_or_orig = orig_dev; /* deliver only exact match */
2269		else
2270			skb->dev = orig_dev->master;
2271	}
2272
2273	__get_cpu_var(netdev_rx_stat).total++;
2274
2275	skb_reset_network_header(skb);
2276	skb_reset_transport_header(skb);
2277	skb->mac_len = skb->network_header - skb->mac_header;
2278
2279	pt_prev = NULL;
2280
2281	rcu_read_lock();
2282
2283#ifdef CONFIG_NET_CLS_ACT
2284	if (skb->tc_verd & TC_NCLS) {
2285		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2286		goto ncls;
2287	}
2288#endif
2289
2290	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2291		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2292		    ptype->dev == orig_dev) {
2293			if (pt_prev)
2294				ret = deliver_skb(skb, pt_prev, orig_dev);
2295			pt_prev = ptype;
2296		}
2297	}
2298
2299#ifdef CONFIG_NET_CLS_ACT
2300	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2301	if (!skb)
2302		goto out;
2303ncls:
2304#endif
2305
2306	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2307	if (!skb)
2308		goto out;
2309	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2310	if (!skb)
2311		goto out;
2312
2313	type = skb->protocol;
2314	list_for_each_entry_rcu(ptype,
2315			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2316		if (ptype->type == type &&
2317		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2318		     ptype->dev == orig_dev)) {
2319			if (pt_prev)
2320				ret = deliver_skb(skb, pt_prev, orig_dev);
2321			pt_prev = ptype;
2322		}
2323	}
2324
2325	if (pt_prev) {
2326		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2327	} else {
2328		kfree_skb(skb);
2329		/* Jamal, now you will not able to escape explaining
2330		 * me how you were going to use this. :-)
2331		 */
2332		ret = NET_RX_DROP;
2333	}
2334
2335out:
2336	rcu_read_unlock();
2337	return ret;
2338}
2339
2340/* Network device is going away, flush any packets still pending  */
2341static void flush_backlog(void *arg)
2342{
2343	struct net_device *dev = arg;
2344	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2345	struct sk_buff *skb, *tmp;
2346
2347	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2348		if (skb->dev == dev) {
2349			__skb_unlink(skb, &queue->input_pkt_queue);
2350			kfree_skb(skb);
2351		}
2352}
2353
2354static int napi_gro_complete(struct sk_buff *skb)
2355{
2356	struct packet_type *ptype;
2357	__be16 type = skb->protocol;
2358	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2359	int err = -ENOENT;
2360
2361	if (NAPI_GRO_CB(skb)->count == 1) {
2362		skb_shinfo(skb)->gso_size = 0;
2363		goto out;
2364	}
2365
2366	rcu_read_lock();
2367	list_for_each_entry_rcu(ptype, head, list) {
2368		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2369			continue;
2370
2371		err = ptype->gro_complete(skb);
2372		break;
2373	}
2374	rcu_read_unlock();
2375
2376	if (err) {
2377		WARN_ON(&ptype->list == head);
2378		kfree_skb(skb);
2379		return NET_RX_SUCCESS;
2380	}
2381
2382out:
2383	return netif_receive_skb(skb);
2384}
2385
2386void napi_gro_flush(struct napi_struct *napi)
2387{
2388	struct sk_buff *skb, *next;
2389
2390	for (skb = napi->gro_list; skb; skb = next) {
2391		next = skb->next;
2392		skb->next = NULL;
2393		napi_gro_complete(skb);
2394	}
2395
2396	napi->gro_count = 0;
2397	napi->gro_list = NULL;
2398}
2399EXPORT_SYMBOL(napi_gro_flush);
2400
2401int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2402{
2403	struct sk_buff **pp = NULL;
2404	struct packet_type *ptype;
2405	__be16 type = skb->protocol;
2406	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2407	int same_flow;
2408	int mac_len;
2409	int ret;
2410
2411	if (!(skb->dev->features & NETIF_F_GRO))
2412		goto normal;
2413
2414	if (skb_is_gso(skb) || skb_has_frags(skb))
2415		goto normal;
2416
2417	rcu_read_lock();
2418	list_for_each_entry_rcu(ptype, head, list) {
2419		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2420			continue;
2421
2422		skb_set_network_header(skb, skb_gro_offset(skb));
2423		mac_len = skb->network_header - skb->mac_header;
2424		skb->mac_len = mac_len;
2425		NAPI_GRO_CB(skb)->same_flow = 0;
2426		NAPI_GRO_CB(skb)->flush = 0;
2427		NAPI_GRO_CB(skb)->free = 0;
2428
2429		pp = ptype->gro_receive(&napi->gro_list, skb);
2430		break;
2431	}
2432	rcu_read_unlock();
2433
2434	if (&ptype->list == head)
2435		goto normal;
2436
2437	same_flow = NAPI_GRO_CB(skb)->same_flow;
2438	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2439
2440	if (pp) {
2441		struct sk_buff *nskb = *pp;
2442
2443		*pp = nskb->next;
2444		nskb->next = NULL;
2445		napi_gro_complete(nskb);
2446		napi->gro_count--;
2447	}
2448
2449	if (same_flow)
2450		goto ok;
2451
2452	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2453		goto normal;
2454
2455	napi->gro_count++;
2456	NAPI_GRO_CB(skb)->count = 1;
2457	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2458	skb->next = napi->gro_list;
2459	napi->gro_list = skb;
2460	ret = GRO_HELD;
2461
2462pull:
2463	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2464		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2465
2466		BUG_ON(skb->end - skb->tail < grow);
2467
2468		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2469
2470		skb->tail += grow;
2471		skb->data_len -= grow;
2472
2473		skb_shinfo(skb)->frags[0].page_offset += grow;
2474		skb_shinfo(skb)->frags[0].size -= grow;
2475
2476		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2477			put_page(skb_shinfo(skb)->frags[0].page);
2478			memmove(skb_shinfo(skb)->frags,
2479				skb_shinfo(skb)->frags + 1,
2480				--skb_shinfo(skb)->nr_frags);
2481		}
2482	}
2483
2484ok:
2485	return ret;
2486
2487normal:
2488	ret = GRO_NORMAL;
2489	goto pull;
2490}
2491EXPORT_SYMBOL(dev_gro_receive);
2492
2493static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2494{
2495	struct sk_buff *p;
2496
2497	if (netpoll_rx_on(skb))
2498		return GRO_NORMAL;
2499
2500	for (p = napi->gro_list; p; p = p->next) {
2501		NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2502			&& !compare_ether_header(skb_mac_header(p),
2503						 skb_gro_mac_header(skb));
2504		NAPI_GRO_CB(p)->flush = 0;
2505	}
2506
2507	return dev_gro_receive(napi, skb);
2508}
2509
2510int napi_skb_finish(int ret, struct sk_buff *skb)
2511{
2512	int err = NET_RX_SUCCESS;
2513
2514	switch (ret) {
2515	case GRO_NORMAL:
2516		return netif_receive_skb(skb);
2517
2518	case GRO_DROP:
2519		err = NET_RX_DROP;
2520		/* fall through */
2521
2522	case GRO_MERGED_FREE:
2523		kfree_skb(skb);
2524		break;
2525	}
2526
2527	return err;
2528}
2529EXPORT_SYMBOL(napi_skb_finish);
2530
2531void skb_gro_reset_offset(struct sk_buff *skb)
2532{
2533	NAPI_GRO_CB(skb)->data_offset = 0;
2534	NAPI_GRO_CB(skb)->frag0 = NULL;
2535	NAPI_GRO_CB(skb)->frag0_len = 0;
2536
2537	if (skb->mac_header == skb->tail &&
2538	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2539		NAPI_GRO_CB(skb)->frag0 =
2540			page_address(skb_shinfo(skb)->frags[0].page) +
2541			skb_shinfo(skb)->frags[0].page_offset;
2542		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2543	}
2544}
2545EXPORT_SYMBOL(skb_gro_reset_offset);
2546
2547int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2548{
2549	skb_gro_reset_offset(skb);
2550
2551	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2552}
2553EXPORT_SYMBOL(napi_gro_receive);
2554
2555void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2556{
2557	__skb_pull(skb, skb_headlen(skb));
2558	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2559
2560	napi->skb = skb;
2561}
2562EXPORT_SYMBOL(napi_reuse_skb);
2563
2564struct sk_buff *napi_get_frags(struct napi_struct *napi)
2565{
2566	struct net_device *dev = napi->dev;
2567	struct sk_buff *skb = napi->skb;
2568
2569	if (!skb) {
2570		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2571		if (!skb)
2572			goto out;
2573
2574		skb_reserve(skb, NET_IP_ALIGN);
2575
2576		napi->skb = skb;
2577	}
2578
2579out:
2580	return skb;
2581}
2582EXPORT_SYMBOL(napi_get_frags);
2583
2584int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2585{
2586	int err = NET_RX_SUCCESS;
2587
2588	switch (ret) {
2589	case GRO_NORMAL:
2590	case GRO_HELD:
2591		skb->protocol = eth_type_trans(skb, napi->dev);
2592
2593		if (ret == GRO_NORMAL)
2594			return netif_receive_skb(skb);
2595
2596		skb_gro_pull(skb, -ETH_HLEN);
2597		break;
2598
2599	case GRO_DROP:
2600		err = NET_RX_DROP;
2601		/* fall through */
2602
2603	case GRO_MERGED_FREE:
2604		napi_reuse_skb(napi, skb);
2605		break;
2606	}
2607
2608	return err;
2609}
2610EXPORT_SYMBOL(napi_frags_finish);
2611
2612struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2613{
2614	struct sk_buff *skb = napi->skb;
2615	struct ethhdr *eth;
2616	unsigned int hlen;
2617	unsigned int off;
2618
2619	napi->skb = NULL;
2620
2621	skb_reset_mac_header(skb);
2622	skb_gro_reset_offset(skb);
2623
2624	off = skb_gro_offset(skb);
2625	hlen = off + sizeof(*eth);
2626	eth = skb_gro_header_fast(skb, off);
2627	if (skb_gro_header_hard(skb, hlen)) {
2628		eth = skb_gro_header_slow(skb, hlen, off);
2629		if (unlikely(!eth)) {
2630			napi_reuse_skb(napi, skb);
2631			skb = NULL;
2632			goto out;
2633		}
2634	}
2635
2636	skb_gro_pull(skb, sizeof(*eth));
2637
2638	/*
2639	 * This works because the only protocols we care about don't require
2640	 * special handling.  We'll fix it up properly at the end.
2641	 */
2642	skb->protocol = eth->h_proto;
2643
2644out:
2645	return skb;
2646}
2647EXPORT_SYMBOL(napi_frags_skb);
2648
2649int napi_gro_frags(struct napi_struct *napi)
2650{
2651	struct sk_buff *skb = napi_frags_skb(napi);
2652
2653	if (!skb)
2654		return NET_RX_DROP;
2655
2656	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2657}
2658EXPORT_SYMBOL(napi_gro_frags);
2659
2660static int process_backlog(struct napi_struct *napi, int quota)
2661{
2662	int work = 0;
2663	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2664	unsigned long start_time = jiffies;
2665
2666	napi->weight = weight_p;
2667	do {
2668		struct sk_buff *skb;
2669
2670		local_irq_disable();
2671		skb = __skb_dequeue(&queue->input_pkt_queue);
2672		if (!skb) {
2673			__napi_complete(napi);
2674			local_irq_enable();
2675			break;
2676		}
2677		local_irq_enable();
2678
2679		netif_receive_skb(skb);
2680	} while (++work < quota && jiffies == start_time);
2681
2682	return work;
2683}
2684
2685/**
2686 * __napi_schedule - schedule for receive
2687 * @n: entry to schedule
2688 *
2689 * The entry's receive function will be scheduled to run
2690 */
2691void __napi_schedule(struct napi_struct *n)
2692{
2693	unsigned long flags;
2694
2695	local_irq_save(flags);
2696	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2697	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2698	local_irq_restore(flags);
2699}
2700EXPORT_SYMBOL(__napi_schedule);
2701
2702void __napi_complete(struct napi_struct *n)
2703{
2704	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2705	BUG_ON(n->gro_list);
2706
2707	list_del(&n->poll_list);
2708	smp_mb__before_clear_bit();
2709	clear_bit(NAPI_STATE_SCHED, &n->state);
2710}
2711EXPORT_SYMBOL(__napi_complete);
2712
2713void napi_complete(struct napi_struct *n)
2714{
2715	unsigned long flags;
2716
2717	/*
2718	 * don't let napi dequeue from the cpu poll list
2719	 * just in case its running on a different cpu
2720	 */
2721	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2722		return;
2723
2724	napi_gro_flush(n);
2725	local_irq_save(flags);
2726	__napi_complete(n);
2727	local_irq_restore(flags);
2728}
2729EXPORT_SYMBOL(napi_complete);
2730
2731void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2732		    int (*poll)(struct napi_struct *, int), int weight)
2733{
2734	INIT_LIST_HEAD(&napi->poll_list);
2735	napi->gro_count = 0;
2736	napi->gro_list = NULL;
2737	napi->skb = NULL;
2738	napi->poll = poll;
2739	napi->weight = weight;
2740	list_add(&napi->dev_list, &dev->napi_list);
2741	napi->dev = dev;
2742#ifdef CONFIG_NETPOLL
2743	spin_lock_init(&napi->poll_lock);
2744	napi->poll_owner = -1;
2745#endif
2746	set_bit(NAPI_STATE_SCHED, &napi->state);
2747}
2748EXPORT_SYMBOL(netif_napi_add);
2749
2750void netif_napi_del(struct napi_struct *napi)
2751{
2752	struct sk_buff *skb, *next;
2753
2754	list_del_init(&napi->dev_list);
2755	napi_free_frags(napi);
2756
2757	for (skb = napi->gro_list; skb; skb = next) {
2758		next = skb->next;
2759		skb->next = NULL;
2760		kfree_skb(skb);
2761	}
2762
2763	napi->gro_list = NULL;
2764	napi->gro_count = 0;
2765}
2766EXPORT_SYMBOL(netif_napi_del);
2767
2768
2769static void net_rx_action(struct softirq_action *h)
2770{
2771	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2772	unsigned long time_limit = jiffies + 2;
2773	int budget = netdev_budget;
2774	void *have;
2775
2776	local_irq_disable();
2777
2778	while (!list_empty(list)) {
2779		struct napi_struct *n;
2780		int work, weight;
2781
2782		/* If softirq window is exhuasted then punt.
2783		 * Allow this to run for 2 jiffies since which will allow
2784		 * an average latency of 1.5/HZ.
2785		 */
2786		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2787			goto softnet_break;
2788
2789		local_irq_enable();
2790
2791		/* Even though interrupts have been re-enabled, this
2792		 * access is safe because interrupts can only add new
2793		 * entries to the tail of this list, and only ->poll()
2794		 * calls can remove this head entry from the list.
2795		 */
2796		n = list_entry(list->next, struct napi_struct, poll_list);
2797
2798		have = netpoll_poll_lock(n);
2799
2800		weight = n->weight;
2801
2802		/* This NAPI_STATE_SCHED test is for avoiding a race
2803		 * with netpoll's poll_napi().  Only the entity which
2804		 * obtains the lock and sees NAPI_STATE_SCHED set will
2805		 * actually make the ->poll() call.  Therefore we avoid
2806		 * accidently calling ->poll() when NAPI is not scheduled.
2807		 */
2808		work = 0;
2809		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2810			work = n->poll(n, weight);
2811			trace_napi_poll(n);
2812		}
2813
2814		WARN_ON_ONCE(work > weight);
2815
2816		budget -= work;
2817
2818		local_irq_disable();
2819
2820		/* Drivers must not modify the NAPI state if they
2821		 * consume the entire weight.  In such cases this code
2822		 * still "owns" the NAPI instance and therefore can
2823		 * move the instance around on the list at-will.
2824		 */
2825		if (unlikely(work == weight)) {
2826			if (unlikely(napi_disable_pending(n))) {
2827				local_irq_enable();
2828				napi_complete(n);
2829				local_irq_disable();
2830			} else
2831				list_move_tail(&n->poll_list, list);
2832		}
2833
2834		netpoll_poll_unlock(have);
2835	}
2836out:
2837	local_irq_enable();
2838
2839#ifdef CONFIG_NET_DMA
2840	/*
2841	 * There may not be any more sk_buffs coming right now, so push
2842	 * any pending DMA copies to hardware
2843	 */
2844	dma_issue_pending_all();
2845#endif
2846
2847	return;
2848
2849softnet_break:
2850	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2851	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2852	goto out;
2853}
2854
2855static gifconf_func_t * gifconf_list [NPROTO];
2856
2857/**
2858 *	register_gifconf	-	register a SIOCGIF handler
2859 *	@family: Address family
2860 *	@gifconf: Function handler
2861 *
2862 *	Register protocol dependent address dumping routines. The handler
2863 *	that is passed must not be freed or reused until it has been replaced
2864 *	by another handler.
2865 */
2866int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2867{
2868	if (family >= NPROTO)
2869		return -EINVAL;
2870	gifconf_list[family] = gifconf;
2871	return 0;
2872}
2873
2874
2875/*
2876 *	Map an interface index to its name (SIOCGIFNAME)
2877 */
2878
2879/*
2880 *	We need this ioctl for efficient implementation of the
2881 *	if_indextoname() function required by the IPv6 API.  Without
2882 *	it, we would have to search all the interfaces to find a
2883 *	match.  --pb
2884 */
2885
2886static int dev_ifname(struct net *net, struct ifreq __user *arg)
2887{
2888	struct net_device *dev;
2889	struct ifreq ifr;
2890
2891	/*
2892	 *	Fetch the caller's info block.
2893	 */
2894
2895	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2896		return -EFAULT;
2897
2898	read_lock(&dev_base_lock);
2899	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2900	if (!dev) {
2901		read_unlock(&dev_base_lock);
2902		return -ENODEV;
2903	}
2904
2905	strcpy(ifr.ifr_name, dev->name);
2906	read_unlock(&dev_base_lock);
2907
2908	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2909		return -EFAULT;
2910	return 0;
2911}
2912
2913/*
2914 *	Perform a SIOCGIFCONF call. This structure will change
2915 *	size eventually, and there is nothing I can do about it.
2916 *	Thus we will need a 'compatibility mode'.
2917 */
2918
2919static int dev_ifconf(struct net *net, char __user *arg)
2920{
2921	struct ifconf ifc;
2922	struct net_device *dev;
2923	char __user *pos;
2924	int len;
2925	int total;
2926	int i;
2927
2928	/*
2929	 *	Fetch the caller's info block.
2930	 */
2931
2932	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2933		return -EFAULT;
2934
2935	pos = ifc.ifc_buf;
2936	len = ifc.ifc_len;
2937
2938	/*
2939	 *	Loop over the interfaces, and write an info block for each.
2940	 */
2941
2942	total = 0;
2943	for_each_netdev(net, dev) {
2944		for (i = 0; i < NPROTO; i++) {
2945			if (gifconf_list[i]) {
2946				int done;
2947				if (!pos)
2948					done = gifconf_list[i](dev, NULL, 0);
2949				else
2950					done = gifconf_list[i](dev, pos + total,
2951							       len - total);
2952				if (done < 0)
2953					return -EFAULT;
2954				total += done;
2955			}
2956		}
2957	}
2958
2959	/*
2960	 *	All done.  Write the updated control block back to the caller.
2961	 */
2962	ifc.ifc_len = total;
2963
2964	/*
2965	 * 	Both BSD and Solaris return 0 here, so we do too.
2966	 */
2967	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2968}
2969
2970#ifdef CONFIG_PROC_FS
2971/*
2972 *	This is invoked by the /proc filesystem handler to display a device
2973 *	in detail.
2974 */
2975void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2976	__acquires(dev_base_lock)
2977{
2978	struct net *net = seq_file_net(seq);
2979	loff_t off;
2980	struct net_device *dev;
2981
2982	read_lock(&dev_base_lock);
2983	if (!*pos)
2984		return SEQ_START_TOKEN;
2985
2986	off = 1;
2987	for_each_netdev(net, dev)
2988		if (off++ == *pos)
2989			return dev;
2990
2991	return NULL;
2992}
2993
2994void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2995{
2996	struct net *net = seq_file_net(seq);
2997	++*pos;
2998	return v == SEQ_START_TOKEN ?
2999		first_net_device(net) : next_net_device((struct net_device *)v);
3000}
3001
3002void dev_seq_stop(struct seq_file *seq, void *v)
3003	__releases(dev_base_lock)
3004{
3005	read_unlock(&dev_base_lock);
3006}
3007
3008static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3009{
3010	const struct net_device_stats *stats = dev_get_stats(dev);
3011
3012	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3013		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3014		   dev->name, stats->rx_bytes, stats->rx_packets,
3015		   stats->rx_errors,
3016		   stats->rx_dropped + stats->rx_missed_errors,
3017		   stats->rx_fifo_errors,
3018		   stats->rx_length_errors + stats->rx_over_errors +
3019		    stats->rx_crc_errors + stats->rx_frame_errors,
3020		   stats->rx_compressed, stats->multicast,
3021		   stats->tx_bytes, stats->tx_packets,
3022		   stats->tx_errors, stats->tx_dropped,
3023		   stats->tx_fifo_errors, stats->collisions,
3024		   stats->tx_carrier_errors +
3025		    stats->tx_aborted_errors +
3026		    stats->tx_window_errors +
3027		    stats->tx_heartbeat_errors,
3028		   stats->tx_compressed);
3029}
3030
3031/*
3032 *	Called from the PROCfs module. This now uses the new arbitrary sized
3033 *	/proc/net interface to create /proc/net/dev
3034 */
3035static int dev_seq_show(struct seq_file *seq, void *v)
3036{
3037	if (v == SEQ_START_TOKEN)
3038		seq_puts(seq, "Inter-|   Receive                            "
3039			      "                    |  Transmit\n"
3040			      " face |bytes    packets errs drop fifo frame "
3041			      "compressed multicast|bytes    packets errs "
3042			      "drop fifo colls carrier compressed\n");
3043	else
3044		dev_seq_printf_stats(seq, v);
3045	return 0;
3046}
3047
3048static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3049{
3050	struct netif_rx_stats *rc = NULL;
3051
3052	while (*pos < nr_cpu_ids)
3053		if (cpu_online(*pos)) {
3054			rc = &per_cpu(netdev_rx_stat, *pos);
3055			break;
3056		} else
3057			++*pos;
3058	return rc;
3059}
3060
3061static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3062{
3063	return softnet_get_online(pos);
3064}
3065
3066static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3067{
3068	++*pos;
3069	return softnet_get_online(pos);
3070}
3071
3072static void softnet_seq_stop(struct seq_file *seq, void *v)
3073{
3074}
3075
3076static int softnet_seq_show(struct seq_file *seq, void *v)
3077{
3078	struct netif_rx_stats *s = v;
3079
3080	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3081		   s->total, s->dropped, s->time_squeeze, 0,
3082		   0, 0, 0, 0, /* was fastroute */
3083		   s->cpu_collision );
3084	return 0;
3085}
3086
3087static const struct seq_operations dev_seq_ops = {
3088	.start = dev_seq_start,
3089	.next  = dev_seq_next,
3090	.stop  = dev_seq_stop,
3091	.show  = dev_seq_show,
3092};
3093
3094static int dev_seq_open(struct inode *inode, struct file *file)
3095{
3096	return seq_open_net(inode, file, &dev_seq_ops,
3097			    sizeof(struct seq_net_private));
3098}
3099
3100static const struct file_operations dev_seq_fops = {
3101	.owner	 = THIS_MODULE,
3102	.open    = dev_seq_open,
3103	.read    = seq_read,
3104	.llseek  = seq_lseek,
3105	.release = seq_release_net,
3106};
3107
3108static const struct seq_operations softnet_seq_ops = {
3109	.start = softnet_seq_start,
3110	.next  = softnet_seq_next,
3111	.stop  = softnet_seq_stop,
3112	.show  = softnet_seq_show,
3113};
3114
3115static int softnet_seq_open(struct inode *inode, struct file *file)
3116{
3117	return seq_open(file, &softnet_seq_ops);
3118}
3119
3120static const struct file_operations softnet_seq_fops = {
3121	.owner	 = THIS_MODULE,
3122	.open    = softnet_seq_open,
3123	.read    = seq_read,
3124	.llseek  = seq_lseek,
3125	.release = seq_release,
3126};
3127
3128static void *ptype_get_idx(loff_t pos)
3129{
3130	struct packet_type *pt = NULL;
3131	loff_t i = 0;
3132	int t;
3133
3134	list_for_each_entry_rcu(pt, &ptype_all, list) {
3135		if (i == pos)
3136			return pt;
3137		++i;
3138	}
3139
3140	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3141		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3142			if (i == pos)
3143				return pt;
3144			++i;
3145		}
3146	}
3147	return NULL;
3148}
3149
3150static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3151	__acquires(RCU)
3152{
3153	rcu_read_lock();
3154	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3155}
3156
3157static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3158{
3159	struct packet_type *pt;
3160	struct list_head *nxt;
3161	int hash;
3162
3163	++*pos;
3164	if (v == SEQ_START_TOKEN)
3165		return ptype_get_idx(0);
3166
3167	pt = v;
3168	nxt = pt->list.next;
3169	if (pt->type == htons(ETH_P_ALL)) {
3170		if (nxt != &ptype_all)
3171			goto found;
3172		hash = 0;
3173		nxt = ptype_base[0].next;
3174	} else
3175		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3176
3177	while (nxt == &ptype_base[hash]) {
3178		if (++hash >= PTYPE_HASH_SIZE)
3179			return NULL;
3180		nxt = ptype_base[hash].next;
3181	}
3182found:
3183	return list_entry(nxt, struct packet_type, list);
3184}
3185
3186static void ptype_seq_stop(struct seq_file *seq, void *v)
3187	__releases(RCU)
3188{
3189	rcu_read_unlock();
3190}
3191
3192static int ptype_seq_show(struct seq_file *seq, void *v)
3193{
3194	struct packet_type *pt = v;
3195
3196	if (v == SEQ_START_TOKEN)
3197		seq_puts(seq, "Type Device      Function\n");
3198	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3199		if (pt->type == htons(ETH_P_ALL))
3200			seq_puts(seq, "ALL ");
3201		else
3202			seq_printf(seq, "%04x", ntohs(pt->type));
3203
3204		seq_printf(seq, " %-8s %pF\n",
3205			   pt->dev ? pt->dev->name : "", pt->func);
3206	}
3207
3208	return 0;
3209}
3210
3211static const struct seq_operations ptype_seq_ops = {
3212	.start = ptype_seq_start,
3213	.next  = ptype_seq_next,
3214	.stop  = ptype_seq_stop,
3215	.show  = ptype_seq_show,
3216};
3217
3218static int ptype_seq_open(struct inode *inode, struct file *file)
3219{
3220	return seq_open_net(inode, file, &ptype_seq_ops,
3221			sizeof(struct seq_net_private));
3222}
3223
3224static const struct file_operations ptype_seq_fops = {
3225	.owner	 = THIS_MODULE,
3226	.open    = ptype_seq_open,
3227	.read    = seq_read,
3228	.llseek  = seq_lseek,
3229	.release = seq_release_net,
3230};
3231
3232
3233static int __net_init dev_proc_net_init(struct net *net)
3234{
3235	int rc = -ENOMEM;
3236
3237	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3238		goto out;
3239	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3240		goto out_dev;
3241	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3242		goto out_softnet;
3243
3244	if (wext_proc_init(net))
3245		goto out_ptype;
3246	rc = 0;
3247out:
3248	return rc;
3249out_ptype:
3250	proc_net_remove(net, "ptype");
3251out_softnet:
3252	proc_net_remove(net, "softnet_stat");
3253out_dev:
3254	proc_net_remove(net, "dev");
3255	goto out;
3256}
3257
3258static void __net_exit dev_proc_net_exit(struct net *net)
3259{
3260	wext_proc_exit(net);
3261
3262	proc_net_remove(net, "ptype");
3263	proc_net_remove(net, "softnet_stat");
3264	proc_net_remove(net, "dev");
3265}
3266
3267static struct pernet_operations __net_initdata dev_proc_ops = {
3268	.init = dev_proc_net_init,
3269	.exit = dev_proc_net_exit,
3270};
3271
3272static int __init dev_proc_init(void)
3273{
3274	return register_pernet_subsys(&dev_proc_ops);
3275}
3276#else
3277#define dev_proc_init() 0
3278#endif	/* CONFIG_PROC_FS */
3279
3280
3281/**
3282 *	netdev_set_master	-	set up master/slave pair
3283 *	@slave: slave device
3284 *	@master: new master device
3285 *
3286 *	Changes the master device of the slave. Pass %NULL to break the
3287 *	bonding. The caller must hold the RTNL semaphore. On a failure
3288 *	a negative errno code is returned. On success the reference counts
3289 *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3290 *	function returns zero.
3291 */
3292int netdev_set_master(struct net_device *slave, struct net_device *master)
3293{
3294	struct net_device *old = slave->master;
3295
3296	ASSERT_RTNL();
3297
3298	if (master) {
3299		if (old)
3300			return -EBUSY;
3301		dev_hold(master);
3302	}
3303
3304	slave->master = master;
3305
3306	synchronize_net();
3307
3308	if (old)
3309		dev_put(old);
3310
3311	if (master)
3312		slave->flags |= IFF_SLAVE;
3313	else
3314		slave->flags &= ~IFF_SLAVE;
3315
3316	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3317	return 0;
3318}
3319
3320static void dev_change_rx_flags(struct net_device *dev, int flags)
3321{
3322	const struct net_device_ops *ops = dev->netdev_ops;
3323
3324	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3325		ops->ndo_change_rx_flags(dev, flags);
3326}
3327
3328static int __dev_set_promiscuity(struct net_device *dev, int inc)
3329{
3330	unsigned short old_flags = dev->flags;
3331	uid_t uid;
3332	gid_t gid;
3333
3334	ASSERT_RTNL();
3335
3336	dev->flags |= IFF_PROMISC;
3337	dev->promiscuity += inc;
3338	if (dev->promiscuity == 0) {
3339		/*
3340		 * Avoid overflow.
3341		 * If inc causes overflow, untouch promisc and return error.
3342		 */
3343		if (inc < 0)
3344			dev->flags &= ~IFF_PROMISC;
3345		else {
3346			dev->promiscuity -= inc;
3347			printk(KERN_WARNING "%s: promiscuity touches roof, "
3348				"set promiscuity failed, promiscuity feature "
3349				"of device might be broken.\n", dev->name);
3350			return -EOVERFLOW;
3351		}
3352	}
3353	if (dev->flags != old_flags) {
3354		printk(KERN_INFO "device %s %s promiscuous mode\n",
3355		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3356							       "left");
3357		if (audit_enabled) {
3358			current_uid_gid(&uid, &gid);
3359			audit_log(current->audit_context, GFP_ATOMIC,
3360				AUDIT_ANOM_PROMISCUOUS,
3361				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3362				dev->name, (dev->flags & IFF_PROMISC),
3363				(old_flags & IFF_PROMISC),
3364				audit_get_loginuid(current),
3365				uid, gid,
3366				audit_get_sessionid(current));
3367		}
3368
3369		dev_change_rx_flags(dev, IFF_PROMISC);
3370	}
3371	return 0;
3372}
3373
3374/**
3375 *	dev_set_promiscuity	- update promiscuity count on a device
3376 *	@dev: device
3377 *	@inc: modifier
3378 *
3379 *	Add or remove promiscuity from a device. While the count in the device
3380 *	remains above zero the interface remains promiscuous. Once it hits zero
3381 *	the device reverts back to normal filtering operation. A negative inc
3382 *	value is used to drop promiscuity on the device.
3383 *	Return 0 if successful or a negative errno code on error.
3384 */
3385int dev_set_promiscuity(struct net_device *dev, int inc)
3386{
3387	unsigned short old_flags = dev->flags;
3388	int err;
3389
3390	err = __dev_set_promiscuity(dev, inc);
3391	if (err < 0)
3392		return err;
3393	if (dev->flags != old_flags)
3394		dev_set_rx_mode(dev);
3395	return err;
3396}
3397
3398/**
3399 *	dev_set_allmulti	- update allmulti count on a device
3400 *	@dev: device
3401 *	@inc: modifier
3402 *
3403 *	Add or remove reception of all multicast frames to a device. While the
3404 *	count in the device remains above zero the interface remains listening
3405 *	to all interfaces. Once it hits zero the device reverts back to normal
3406 *	filtering operation. A negative @inc value is used to drop the counter
3407 *	when releasing a resource needing all multicasts.
3408 *	Return 0 if successful or a negative errno code on error.
3409 */
3410
3411int dev_set_allmulti(struct net_device *dev, int inc)
3412{
3413	unsigned short old_flags = dev->flags;
3414
3415	ASSERT_RTNL();
3416
3417	dev->flags |= IFF_ALLMULTI;
3418	dev->allmulti += inc;
3419	if (dev->allmulti == 0) {
3420		/*
3421		 * Avoid overflow.
3422		 * If inc causes overflow, untouch allmulti and return error.
3423		 */
3424		if (inc < 0)
3425			dev->flags &= ~IFF_ALLMULTI;
3426		else {
3427			dev->allmulti -= inc;
3428			printk(KERN_WARNING "%s: allmulti touches roof, "
3429				"set allmulti failed, allmulti feature of "
3430				"device might be broken.\n", dev->name);
3431			return -EOVERFLOW;
3432		}
3433	}
3434	if (dev->flags ^ old_flags) {
3435		dev_change_rx_flags(dev, IFF_ALLMULTI);
3436		dev_set_rx_mode(dev);
3437	}
3438	return 0;
3439}
3440
3441/*
3442 *	Upload unicast and multicast address lists to device and
3443 *	configure RX filtering. When the device doesn't support unicast
3444 *	filtering it is put in promiscuous mode while unicast addresses
3445 *	are present.
3446 */
3447void __dev_set_rx_mode(struct net_device *dev)
3448{
3449	const struct net_device_ops *ops = dev->netdev_ops;
3450
3451	/* dev_open will call this function so the list will stay sane. */
3452	if (!(dev->flags&IFF_UP))
3453		return;
3454
3455	if (!netif_device_present(dev))
3456		return;
3457
3458	if (ops->ndo_set_rx_mode)
3459		ops->ndo_set_rx_mode(dev);
3460	else {
3461		/* Unicast addresses changes may only happen under the rtnl,
3462		 * therefore calling __dev_set_promiscuity here is safe.
3463		 */
3464		if (dev->uc.count > 0 && !dev->uc_promisc) {
3465			__dev_set_promiscuity(dev, 1);
3466			dev->uc_promisc = 1;
3467		} else if (dev->uc.count == 0 && dev->uc_promisc) {
3468			__dev_set_promiscuity(dev, -1);
3469			dev->uc_promisc = 0;
3470		}
3471
3472		if (ops->ndo_set_multicast_list)
3473			ops->ndo_set_multicast_list(dev);
3474	}
3475}
3476
3477void dev_set_rx_mode(struct net_device *dev)
3478{
3479	netif_addr_lock_bh(dev);
3480	__dev_set_rx_mode(dev);
3481	netif_addr_unlock_bh(dev);
3482}
3483
3484/* hw addresses list handling functions */
3485
3486static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3487			 int addr_len, unsigned char addr_type)
3488{
3489	struct netdev_hw_addr *ha;
3490	int alloc_size;
3491
3492	if (addr_len > MAX_ADDR_LEN)
3493		return -EINVAL;
3494
3495	list_for_each_entry(ha, &list->list, list) {
3496		if (!memcmp(ha->addr, addr, addr_len) &&
3497		    ha->type == addr_type) {
3498			ha->refcount++;
3499			return 0;
3500		}
3501	}
3502
3503
3504	alloc_size = sizeof(*ha);
3505	if (alloc_size < L1_CACHE_BYTES)
3506		alloc_size = L1_CACHE_BYTES;
3507	ha = kmalloc(alloc_size, GFP_ATOMIC);
3508	if (!ha)
3509		return -ENOMEM;
3510	memcpy(ha->addr, addr, addr_len);
3511	ha->type = addr_type;
3512	ha->refcount = 1;
3513	ha->synced = false;
3514	list_add_tail_rcu(&ha->list, &list->list);
3515	list->count++;
3516	return 0;
3517}
3518
3519static void ha_rcu_free(struct rcu_head *head)
3520{
3521	struct netdev_hw_addr *ha;
3522
3523	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3524	kfree(ha);
3525}
3526
3527static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3528			 int addr_len, unsigned char addr_type)
3529{
3530	struct netdev_hw_addr *ha;
3531
3532	list_for_each_entry(ha, &list->list, list) {
3533		if (!memcmp(ha->addr, addr, addr_len) &&
3534		    (ha->type == addr_type || !addr_type)) {
3535			if (--ha->refcount)
3536				return 0;
3537			list_del_rcu(&ha->list);
3538			call_rcu(&ha->rcu_head, ha_rcu_free);
3539			list->count--;
3540			return 0;
3541		}
3542	}
3543	return -ENOENT;
3544}
3545
3546static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3547				  struct netdev_hw_addr_list *from_list,
3548				  int addr_len,
3549				  unsigned char addr_type)
3550{
3551	int err;
3552	struct netdev_hw_addr *ha, *ha2;
3553	unsigned char type;
3554
3555	list_for_each_entry(ha, &from_list->list, list) {
3556		type = addr_type ? addr_type : ha->type;
3557		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3558		if (err)
3559			goto unroll;
3560	}
3561	return 0;
3562
3563unroll:
3564	list_for_each_entry(ha2, &from_list->list, list) {
3565		if (ha2 == ha)
3566			break;
3567		type = addr_type ? addr_type : ha2->type;
3568		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3569	}
3570	return err;
3571}
3572
3573static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3574				   struct netdev_hw_addr_list *from_list,
3575				   int addr_len,
3576				   unsigned char addr_type)
3577{
3578	struct netdev_hw_addr *ha;
3579	unsigned char type;
3580
3581	list_for_each_entry(ha, &from_list->list, list) {
3582		type = addr_type ? addr_type : ha->type;
3583		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3584	}
3585}
3586
3587static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3588			  struct netdev_hw_addr_list *from_list,
3589			  int addr_len)
3590{
3591	int err = 0;
3592	struct netdev_hw_addr *ha, *tmp;
3593
3594	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3595		if (!ha->synced) {
3596			err = __hw_addr_add(to_list, ha->addr,
3597					    addr_len, ha->type);
3598			if (err)
3599				break;
3600			ha->synced = true;
3601			ha->refcount++;
3602		} else if (ha->refcount == 1) {
3603			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3604			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3605		}
3606	}
3607	return err;
3608}
3609
3610static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3611			     struct netdev_hw_addr_list *from_list,
3612			     int addr_len)
3613{
3614	struct netdev_hw_addr *ha, *tmp;
3615
3616	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3617		if (ha->synced) {
3618			__hw_addr_del(to_list, ha->addr,
3619				      addr_len, ha->type);
3620			ha->synced = false;
3621			__hw_addr_del(from_list, ha->addr,
3622				      addr_len, ha->type);
3623		}
3624	}
3625}
3626
3627static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3628{
3629	struct netdev_hw_addr *ha, *tmp;
3630
3631	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3632		list_del_rcu(&ha->list);
3633		call_rcu(&ha->rcu_head, ha_rcu_free);
3634	}
3635	list->count = 0;
3636}
3637
3638static void __hw_addr_init(struct netdev_hw_addr_list *list)
3639{
3640	INIT_LIST_HEAD(&list->list);
3641	list->count = 0;
3642}
3643
3644/* Device addresses handling functions */
3645
3646static void dev_addr_flush(struct net_device *dev)
3647{
3648	/* rtnl_mutex must be held here */
3649
3650	__hw_addr_flush(&dev->dev_addrs);
3651	dev->dev_addr = NULL;
3652}
3653
3654static int dev_addr_init(struct net_device *dev)
3655{
3656	unsigned char addr[MAX_ADDR_LEN];
3657	struct netdev_hw_addr *ha;
3658	int err;
3659
3660	/* rtnl_mutex must be held here */
3661
3662	__hw_addr_init(&dev->dev_addrs);
3663	memset(addr, 0, sizeof(addr));
3664	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3665			    NETDEV_HW_ADDR_T_LAN);
3666	if (!err) {
3667		/*
3668		 * Get the first (previously created) address from the list
3669		 * and set dev_addr pointer to this location.
3670		 */
3671		ha = list_first_entry(&dev->dev_addrs.list,
3672				      struct netdev_hw_addr, list);
3673		dev->dev_addr = ha->addr;
3674	}
3675	return err;
3676}
3677
3678/**
3679 *	dev_addr_add	- Add a device address
3680 *	@dev: device
3681 *	@addr: address to add
3682 *	@addr_type: address type
3683 *
3684 *	Add a device address to the device or increase the reference count if
3685 *	it already exists.
3686 *
3687 *	The caller must hold the rtnl_mutex.
3688 */
3689int dev_addr_add(struct net_device *dev, unsigned char *addr,
3690		 unsigned char addr_type)
3691{
3692	int err;
3693
3694	ASSERT_RTNL();
3695
3696	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3697	if (!err)
3698		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3699	return err;
3700}
3701EXPORT_SYMBOL(dev_addr_add);
3702
3703/**
3704 *	dev_addr_del	- Release a device address.
3705 *	@dev: device
3706 *	@addr: address to delete
3707 *	@addr_type: address type
3708 *
3709 *	Release reference to a device address and remove it from the device
3710 *	if the reference count drops to zero.
3711 *
3712 *	The caller must hold the rtnl_mutex.
3713 */
3714int dev_addr_del(struct net_device *dev, unsigned char *addr,
3715		 unsigned char addr_type)
3716{
3717	int err;
3718	struct netdev_hw_addr *ha;
3719
3720	ASSERT_RTNL();
3721
3722	/*
3723	 * We can not remove the first address from the list because
3724	 * dev->dev_addr points to that.
3725	 */
3726	ha = list_first_entry(&dev->dev_addrs.list,
3727			      struct netdev_hw_addr, list);
3728	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3729		return -ENOENT;
3730
3731	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3732			    addr_type);
3733	if (!err)
3734		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3735	return err;
3736}
3737EXPORT_SYMBOL(dev_addr_del);
3738
3739/**
3740 *	dev_addr_add_multiple	- Add device addresses from another device
3741 *	@to_dev: device to which addresses will be added
3742 *	@from_dev: device from which addresses will be added
3743 *	@addr_type: address type - 0 means type will be used from from_dev
3744 *
3745 *	Add device addresses of the one device to another.
3746 **
3747 *	The caller must hold the rtnl_mutex.
3748 */
3749int dev_addr_add_multiple(struct net_device *to_dev,
3750			  struct net_device *from_dev,
3751			  unsigned char addr_type)
3752{
3753	int err;
3754
3755	ASSERT_RTNL();
3756
3757	if (from_dev->addr_len != to_dev->addr_len)
3758		return -EINVAL;
3759	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3760				     to_dev->addr_len, addr_type);
3761	if (!err)
3762		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3763	return err;
3764}
3765EXPORT_SYMBOL(dev_addr_add_multiple);
3766
3767/**
3768 *	dev_addr_del_multiple	- Delete device addresses by another device
3769 *	@to_dev: device where the addresses will be deleted
3770 *	@from_dev: device by which addresses the addresses will be deleted
3771 *	@addr_type: address type - 0 means type will used from from_dev
3772 *
3773 *	Deletes addresses in to device by the list of addresses in from device.
3774 *
3775 *	The caller must hold the rtnl_mutex.
3776 */
3777int dev_addr_del_multiple(struct net_device *to_dev,
3778			  struct net_device *from_dev,
3779			  unsigned char addr_type)
3780{
3781	ASSERT_RTNL();
3782
3783	if (from_dev->addr_len != to_dev->addr_len)
3784		return -EINVAL;
3785	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3786			       to_dev->addr_len, addr_type);
3787	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3788	return 0;
3789}
3790EXPORT_SYMBOL(dev_addr_del_multiple);
3791
3792/* multicast addresses handling functions */
3793
3794int __dev_addr_delete(struct dev_addr_list **list, int *count,
3795		      void *addr, int alen, int glbl)
3796{
3797	struct dev_addr_list *da;
3798
3799	for (; (da = *list) != NULL; list = &da->next) {
3800		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3801		    alen == da->da_addrlen) {
3802			if (glbl) {
3803				int old_glbl = da->da_gusers;
3804				da->da_gusers = 0;
3805				if (old_glbl == 0)
3806					break;
3807			}
3808			if (--da->da_users)
3809				return 0;
3810
3811			*list = da->next;
3812			kfree(da);
3813			(*count)--;
3814			return 0;
3815		}
3816	}
3817	return -ENOENT;
3818}
3819
3820int __dev_addr_add(struct dev_addr_list **list, int *count,
3821		   void *addr, int alen, int glbl)
3822{
3823	struct dev_addr_list *da;
3824
3825	for (da = *list; da != NULL; da = da->next) {
3826		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3827		    da->da_addrlen == alen) {
3828			if (glbl) {
3829				int old_glbl = da->da_gusers;
3830				da->da_gusers = 1;
3831				if (old_glbl)
3832					return 0;
3833			}
3834			da->da_users++;
3835			return 0;
3836		}
3837	}
3838
3839	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3840	if (da == NULL)
3841		return -ENOMEM;
3842	memcpy(da->da_addr, addr, alen);
3843	da->da_addrlen = alen;
3844	da->da_users = 1;
3845	da->da_gusers = glbl ? 1 : 0;
3846	da->next = *list;
3847	*list = da;
3848	(*count)++;
3849	return 0;
3850}
3851
3852/**
3853 *	dev_unicast_delete	- Release secondary unicast address.
3854 *	@dev: device
3855 *	@addr: address to delete
3856 *
3857 *	Release reference to a secondary unicast address and remove it
3858 *	from the device if the reference count drops to zero.
3859 *
3860 * 	The caller must hold the rtnl_mutex.
3861 */
3862int dev_unicast_delete(struct net_device *dev, void *addr)
3863{
3864	int err;
3865
3866	ASSERT_RTNL();
3867
3868	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3869			    NETDEV_HW_ADDR_T_UNICAST);
3870	if (!err)
3871		__dev_set_rx_mode(dev);
3872	return err;
3873}
3874EXPORT_SYMBOL(dev_unicast_delete);
3875
3876/**
3877 *	dev_unicast_add		- add a secondary unicast address
3878 *	@dev: device
3879 *	@addr: address to add
3880 *
3881 *	Add a secondary unicast address to the device or increase
3882 *	the reference count if it already exists.
3883 *
3884 *	The caller must hold the rtnl_mutex.
3885 */
3886int dev_unicast_add(struct net_device *dev, void *addr)
3887{
3888	int err;
3889
3890	ASSERT_RTNL();
3891
3892	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
3893			    NETDEV_HW_ADDR_T_UNICAST);
3894	if (!err)
3895		__dev_set_rx_mode(dev);
3896	return err;
3897}
3898EXPORT_SYMBOL(dev_unicast_add);
3899
3900int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3901		    struct dev_addr_list **from, int *from_count)
3902{
3903	struct dev_addr_list *da, *next;
3904	int err = 0;
3905
3906	da = *from;
3907	while (da != NULL) {
3908		next = da->next;
3909		if (!da->da_synced) {
3910			err = __dev_addr_add(to, to_count,
3911					     da->da_addr, da->da_addrlen, 0);
3912			if (err < 0)
3913				break;
3914			da->da_synced = 1;
3915			da->da_users++;
3916		} else if (da->da_users == 1) {
3917			__dev_addr_delete(to, to_count,
3918					  da->da_addr, da->da_addrlen, 0);
3919			__dev_addr_delete(from, from_count,
3920					  da->da_addr, da->da_addrlen, 0);
3921		}
3922		da = next;
3923	}
3924	return err;
3925}
3926
3927void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3928		       struct dev_addr_list **from, int *from_count)
3929{
3930	struct dev_addr_list *da, *next;
3931
3932	da = *from;
3933	while (da != NULL) {
3934		next = da->next;
3935		if (da->da_synced) {
3936			__dev_addr_delete(to, to_count,
3937					  da->da_addr, da->da_addrlen, 0);
3938			da->da_synced = 0;
3939			__dev_addr_delete(from, from_count,
3940					  da->da_addr, da->da_addrlen, 0);
3941		}
3942		da = next;
3943	}
3944}
3945
3946/**
3947 *	dev_unicast_sync - Synchronize device's unicast list to another device
3948 *	@to: destination device
3949 *	@from: source device
3950 *
3951 *	Add newly added addresses to the destination device and release
3952 *	addresses that have no users left.
3953 *
3954 *	This function is intended to be called from the dev->set_rx_mode
3955 *	function of layered software devices.
3956 */
3957int dev_unicast_sync(struct net_device *to, struct net_device *from)
3958{
3959	int err = 0;
3960
3961	ASSERT_RTNL();
3962
3963	if (to->addr_len != from->addr_len)
3964		return -EINVAL;
3965
3966	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
3967	if (!err)
3968		__dev_set_rx_mode(to);
3969	return err;
3970}
3971EXPORT_SYMBOL(dev_unicast_sync);
3972
3973/**
3974 *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3975 *	@to: destination device
3976 *	@from: source device
3977 *
3978 *	Remove all addresses that were added to the destination device by
3979 *	dev_unicast_sync(). This function is intended to be called from the
3980 *	dev->stop function of layered software devices.
3981 */
3982void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3983{
3984	ASSERT_RTNL();
3985
3986	if (to->addr_len != from->addr_len)
3987		return;
3988
3989	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
3990	__dev_set_rx_mode(to);
3991}
3992EXPORT_SYMBOL(dev_unicast_unsync);
3993
3994static void dev_unicast_flush(struct net_device *dev)
3995{
3996	/* rtnl_mutex must be held here */
3997
3998	__hw_addr_flush(&dev->uc);
3999}
4000
4001static void dev_unicast_init(struct net_device *dev)
4002{
4003	/* rtnl_mutex must be held here */
4004
4005	__hw_addr_init(&dev->uc);
4006}
4007
4008
4009static void __dev_addr_discard(struct dev_addr_list **list)
4010{
4011	struct dev_addr_list *tmp;
4012
4013	while (*list != NULL) {
4014		tmp = *list;
4015		*list = tmp->next;
4016		if (tmp->da_users > tmp->da_gusers)
4017			printk("__dev_addr_discard: address leakage! "
4018			       "da_users=%d\n", tmp->da_users);
4019		kfree(tmp);
4020	}
4021}
4022
4023static void dev_addr_discard(struct net_device *dev)
4024{
4025	netif_addr_lock_bh(dev);
4026
4027	__dev_addr_discard(&dev->mc_list);
4028	dev->mc_count = 0;
4029
4030	netif_addr_unlock_bh(dev);
4031}
4032
4033/**
4034 *	dev_get_flags - get flags reported to userspace
4035 *	@dev: device
4036 *
4037 *	Get the combination of flag bits exported through APIs to userspace.
4038 */
4039unsigned dev_get_flags(const struct net_device *dev)
4040{
4041	unsigned flags;
4042
4043	flags = (dev->flags & ~(IFF_PROMISC |
4044				IFF_ALLMULTI |
4045				IFF_RUNNING |
4046				IFF_LOWER_UP |
4047				IFF_DORMANT)) |
4048		(dev->gflags & (IFF_PROMISC |
4049				IFF_ALLMULTI));
4050
4051	if (netif_running(dev)) {
4052		if (netif_oper_up(dev))
4053			flags |= IFF_RUNNING;
4054		if (netif_carrier_ok(dev))
4055			flags |= IFF_LOWER_UP;
4056		if (netif_dormant(dev))
4057			flags |= IFF_DORMANT;
4058	}
4059
4060	return flags;
4061}
4062
4063/**
4064 *	dev_change_flags - change device settings
4065 *	@dev: device
4066 *	@flags: device state flags
4067 *
4068 *	Change settings on device based state flags. The flags are
4069 *	in the userspace exported format.
4070 */
4071int dev_change_flags(struct net_device *dev, unsigned flags)
4072{
4073	int ret, changes;
4074	int old_flags = dev->flags;
4075
4076	ASSERT_RTNL();
4077
4078	/*
4079	 *	Set the flags on our device.
4080	 */
4081
4082	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4083			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4084			       IFF_AUTOMEDIA)) |
4085		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4086				    IFF_ALLMULTI));
4087
4088	/*
4089	 *	Load in the correct multicast list now the flags have changed.
4090	 */
4091
4092	if ((old_flags ^ flags) & IFF_MULTICAST)
4093		dev_change_rx_flags(dev, IFF_MULTICAST);
4094
4095	dev_set_rx_mode(dev);
4096
4097	/*
4098	 *	Have we downed the interface. We handle IFF_UP ourselves
4099	 *	according to user attempts to set it, rather than blindly
4100	 *	setting it.
4101	 */
4102
4103	ret = 0;
4104	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4105		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4106
4107		if (!ret)
4108			dev_set_rx_mode(dev);
4109	}
4110
4111	if (dev->flags & IFF_UP &&
4112	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4113					  IFF_VOLATILE)))
4114		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4115
4116	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4117		int inc = (flags & IFF_PROMISC) ? +1 : -1;
4118		dev->gflags ^= IFF_PROMISC;
4119		dev_set_promiscuity(dev, inc);
4120	}
4121
4122	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4123	   is important. Some (broken) drivers set IFF_PROMISC, when
4124	   IFF_ALLMULTI is requested not asking us and not reporting.
4125	 */
4126	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4127		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
4128		dev->gflags ^= IFF_ALLMULTI;
4129		dev_set_allmulti(dev, inc);
4130	}
4131
4132	/* Exclude state transition flags, already notified */
4133	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4134	if (changes)
4135		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4136
4137	return ret;
4138}
4139
4140/**
4141 *	dev_set_mtu - Change maximum transfer unit
4142 *	@dev: device
4143 *	@new_mtu: new transfer unit
4144 *
4145 *	Change the maximum transfer size of the network device.
4146 */
4147int dev_set_mtu(struct net_device *dev, int new_mtu)
4148{
4149	const struct net_device_ops *ops = dev->netdev_ops;
4150	int err;
4151
4152	if (new_mtu == dev->mtu)
4153		return 0;
4154
4155	/*	MTU must be positive.	 */
4156	if (new_mtu < 0)
4157		return -EINVAL;
4158
4159	if (!netif_device_present(dev))
4160		return -ENODEV;
4161
4162	err = 0;
4163	if (ops->ndo_change_mtu)
4164		err = ops->ndo_change_mtu(dev, new_mtu);
4165	else
4166		dev->mtu = new_mtu;
4167
4168	if (!err && dev->flags & IFF_UP)
4169		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4170	return err;
4171}
4172
4173/**
4174 *	dev_set_mac_address - Change Media Access Control Address
4175 *	@dev: device
4176 *	@sa: new address
4177 *
4178 *	Change the hardware (MAC) address of the device
4179 */
4180int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4181{
4182	const struct net_device_ops *ops = dev->netdev_ops;
4183	int err;
4184
4185	if (!ops->ndo_set_mac_address)
4186		return -EOPNOTSUPP;
4187	if (sa->sa_family != dev->type)
4188		return -EINVAL;
4189	if (!netif_device_present(dev))
4190		return -ENODEV;
4191	err = ops->ndo_set_mac_address(dev, sa);
4192	if (!err)
4193		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4194	return err;
4195}
4196
4197/*
4198 *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4199 */
4200static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4201{
4202	int err;
4203	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4204
4205	if (!dev)
4206		return -ENODEV;
4207
4208	switch (cmd) {
4209		case SIOCGIFFLAGS:	/* Get interface flags */
4210			ifr->ifr_flags = (short) dev_get_flags(dev);
4211			return 0;
4212
4213		case SIOCGIFMETRIC:	/* Get the metric on the interface
4214					   (currently unused) */
4215			ifr->ifr_metric = 0;
4216			return 0;
4217
4218		case SIOCGIFMTU:	/* Get the MTU of a device */
4219			ifr->ifr_mtu = dev->mtu;
4220			return 0;
4221
4222		case SIOCGIFHWADDR:
4223			if (!dev->addr_len)
4224				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4225			else
4226				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4227				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4228			ifr->ifr_hwaddr.sa_family = dev->type;
4229			return 0;
4230
4231		case SIOCGIFSLAVE:
4232			err = -EINVAL;
4233			break;
4234
4235		case SIOCGIFMAP:
4236			ifr->ifr_map.mem_start = dev->mem_start;
4237			ifr->ifr_map.mem_end   = dev->mem_end;
4238			ifr->ifr_map.base_addr = dev->base_addr;
4239			ifr->ifr_map.irq       = dev->irq;
4240			ifr->ifr_map.dma       = dev->dma;
4241			ifr->ifr_map.port      = dev->if_port;
4242			return 0;
4243
4244		case SIOCGIFINDEX:
4245			ifr->ifr_ifindex = dev->ifindex;
4246			return 0;
4247
4248		case SIOCGIFTXQLEN:
4249			ifr->ifr_qlen = dev->tx_queue_len;
4250			return 0;
4251
4252		default:
4253			/* dev_ioctl() should ensure this case
4254			 * is never reached
4255			 */
4256			WARN_ON(1);
4257			err = -EINVAL;
4258			break;
4259
4260	}
4261	return err;
4262}
4263
4264/*
4265 *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4266 */
4267static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4268{
4269	int err;
4270	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4271	const struct net_device_ops *ops;
4272
4273	if (!dev)
4274		return -ENODEV;
4275
4276	ops = dev->netdev_ops;
4277
4278	switch (cmd) {
4279		case SIOCSIFFLAGS:	/* Set interface flags */
4280			return dev_change_flags(dev, ifr->ifr_flags);
4281
4282		case SIOCSIFMETRIC:	/* Set the metric on the interface
4283					   (currently unused) */
4284			return -EOPNOTSUPP;
4285
4286		case SIOCSIFMTU:	/* Set the MTU of a device */
4287			return dev_set_mtu(dev, ifr->ifr_mtu);
4288
4289		case SIOCSIFHWADDR:
4290			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4291
4292		case SIOCSIFHWBROADCAST:
4293			if (ifr->ifr_hwaddr.sa_family != dev->type)
4294				return -EINVAL;
4295			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4296			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4297			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4298			return 0;
4299
4300		case SIOCSIFMAP:
4301			if (ops->ndo_set_config) {
4302				if (!netif_device_present(dev))
4303					return -ENODEV;
4304				return ops->ndo_set_config(dev, &ifr->ifr_map);
4305			}
4306			return -EOPNOTSUPP;
4307
4308		case SIOCADDMULTI:
4309			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4310			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4311				return -EINVAL;
4312			if (!netif_device_present(dev))
4313				return -ENODEV;
4314			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4315					  dev->addr_len, 1);
4316
4317		case SIOCDELMULTI:
4318			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4319			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4320				return -EINVAL;
4321			if (!netif_device_present(dev))
4322				return -ENODEV;
4323			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4324					     dev->addr_len, 1);
4325
4326		case SIOCSIFTXQLEN:
4327			if (ifr->ifr_qlen < 0)
4328				return -EINVAL;
4329			dev->tx_queue_len = ifr->ifr_qlen;
4330			return 0;
4331
4332		case SIOCSIFNAME:
4333			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4334			return dev_change_name(dev, ifr->ifr_newname);
4335
4336		/*
4337		 *	Unknown or private ioctl
4338		 */
4339
4340		default:
4341			if ((cmd >= SIOCDEVPRIVATE &&
4342			    cmd <= SIOCDEVPRIVATE + 15) ||
4343			    cmd == SIOCBONDENSLAVE ||
4344			    cmd == SIOCBONDRELEASE ||
4345			    cmd == SIOCBONDSETHWADDR ||
4346			    cmd == SIOCBONDSLAVEINFOQUERY ||
4347			    cmd == SIOCBONDINFOQUERY ||
4348			    cmd == SIOCBONDCHANGEACTIVE ||
4349			    cmd == SIOCGMIIPHY ||
4350			    cmd == SIOCGMIIREG ||
4351			    cmd == SIOCSMIIREG ||
4352			    cmd == SIOCBRADDIF ||
4353			    cmd == SIOCBRDELIF ||
4354			    cmd == SIOCSHWTSTAMP ||
4355			    cmd == SIOCWANDEV) {
4356				err = -EOPNOTSUPP;
4357				if (ops->ndo_do_ioctl) {
4358					if (netif_device_present(dev))
4359						err = ops->ndo_do_ioctl(dev, ifr, cmd);
4360					else
4361						err = -ENODEV;
4362				}
4363			} else
4364				err = -EINVAL;
4365
4366	}
4367	return err;
4368}
4369
4370/*
4371 *	This function handles all "interface"-type I/O control requests. The actual
4372 *	'doing' part of this is dev_ifsioc above.
4373 */
4374
4375/**
4376 *	dev_ioctl	-	network device ioctl
4377 *	@net: the applicable net namespace
4378 *	@cmd: command to issue
4379 *	@arg: pointer to a struct ifreq in user space
4380 *
4381 *	Issue ioctl functions to devices. This is normally called by the
4382 *	user space syscall interfaces but can sometimes be useful for
4383 *	other purposes. The return value is the return from the syscall if
4384 *	positive or a negative errno code on error.
4385 */
4386
4387int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4388{
4389	struct ifreq ifr;
4390	int ret;
4391	char *colon;
4392
4393	/* One special case: SIOCGIFCONF takes ifconf argument
4394	   and requires shared lock, because it sleeps writing
4395	   to user space.
4396	 */
4397
4398	if (cmd == SIOCGIFCONF) {
4399		rtnl_lock();
4400		ret = dev_ifconf(net, (char __user *) arg);
4401		rtnl_unlock();
4402		return ret;
4403	}
4404	if (cmd == SIOCGIFNAME)
4405		return dev_ifname(net, (struct ifreq __user *)arg);
4406
4407	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4408		return -EFAULT;
4409
4410	ifr.ifr_name[IFNAMSIZ-1] = 0;
4411
4412	colon = strchr(ifr.ifr_name, ':');
4413	if (colon)
4414		*colon = 0;
4415
4416	/*
4417	 *	See which interface the caller is talking about.
4418	 */
4419
4420	switch (cmd) {
4421		/*
4422		 *	These ioctl calls:
4423		 *	- can be done by all.
4424		 *	- atomic and do not require locking.
4425		 *	- return a value
4426		 */
4427		case SIOCGIFFLAGS:
4428		case SIOCGIFMETRIC:
4429		case SIOCGIFMTU:
4430		case SIOCGIFHWADDR:
4431		case SIOCGIFSLAVE:
4432		case SIOCGIFMAP:
4433		case SIOCGIFINDEX:
4434		case SIOCGIFTXQLEN:
4435			dev_load(net, ifr.ifr_name);
4436			read_lock(&dev_base_lock);
4437			ret = dev_ifsioc_locked(net, &ifr, cmd);
4438			read_unlock(&dev_base_lock);
4439			if (!ret) {
4440				if (colon)
4441					*colon = ':';
4442				if (copy_to_user(arg, &ifr,
4443						 sizeof(struct ifreq)))
4444					ret = -EFAULT;
4445			}
4446			return ret;
4447
4448		case SIOCETHTOOL:
4449			dev_load(net, ifr.ifr_name);
4450			rtnl_lock();
4451			ret = dev_ethtool(net, &ifr);
4452			rtnl_unlock();
4453			if (!ret) {
4454				if (colon)
4455					*colon = ':';
4456				if (copy_to_user(arg, &ifr,
4457						 sizeof(struct ifreq)))
4458					ret = -EFAULT;
4459			}
4460			return ret;
4461
4462		/*
4463		 *	These ioctl calls:
4464		 *	- require superuser power.
4465		 *	- require strict serialization.
4466		 *	- return a value
4467		 */
4468		case SIOCGMIIPHY:
4469		case SIOCGMIIREG:
4470		case SIOCSIFNAME:
4471			if (!capable(CAP_NET_ADMIN))
4472				return -EPERM;
4473			dev_load(net, ifr.ifr_name);
4474			rtnl_lock();
4475			ret = dev_ifsioc(net, &ifr, cmd);
4476			rtnl_unlock();
4477			if (!ret) {
4478				if (colon)
4479					*colon = ':';
4480				if (copy_to_user(arg, &ifr,
4481						 sizeof(struct ifreq)))
4482					ret = -EFAULT;
4483			}
4484			return ret;
4485
4486		/*
4487		 *	These ioctl calls:
4488		 *	- require superuser power.
4489		 *	- require strict serialization.
4490		 *	- do not return a value
4491		 */
4492		case SIOCSIFFLAGS:
4493		case SIOCSIFMETRIC:
4494		case SIOCSIFMTU:
4495		case SIOCSIFMAP:
4496		case SIOCSIFHWADDR:
4497		case SIOCSIFSLAVE:
4498		case SIOCADDMULTI:
4499		case SIOCDELMULTI:
4500		case SIOCSIFHWBROADCAST:
4501		case SIOCSIFTXQLEN:
4502		case SIOCSMIIREG:
4503		case SIOCBONDENSLAVE:
4504		case SIOCBONDRELEASE:
4505		case SIOCBONDSETHWADDR:
4506		case SIOCBONDCHANGEACTIVE:
4507		case SIOCBRADDIF:
4508		case SIOCBRDELIF:
4509		case SIOCSHWTSTAMP:
4510			if (!capable(CAP_NET_ADMIN))
4511				return -EPERM;
4512			/* fall through */
4513		case SIOCBONDSLAVEINFOQUERY:
4514		case SIOCBONDINFOQUERY:
4515			dev_load(net, ifr.ifr_name);
4516			rtnl_lock();
4517			ret = dev_ifsioc(net, &ifr, cmd);
4518			rtnl_unlock();
4519			return ret;
4520
4521		case SIOCGIFMEM:
4522			/* Get the per device memory space. We can add this but
4523			 * currently do not support it */
4524		case SIOCSIFMEM:
4525			/* Set the per device memory buffer space.
4526			 * Not applicable in our case */
4527		case SIOCSIFLINK:
4528			return -EINVAL;
4529
4530		/*
4531		 *	Unknown or private ioctl.
4532		 */
4533		default:
4534			if (cmd == SIOCWANDEV ||
4535			    (cmd >= SIOCDEVPRIVATE &&
4536			     cmd <= SIOCDEVPRIVATE + 15)) {
4537				dev_load(net, ifr.ifr_name);
4538				rtnl_lock();
4539				ret = dev_ifsioc(net, &ifr, cmd);
4540				rtnl_unlock();
4541				if (!ret && copy_to_user(arg, &ifr,
4542							 sizeof(struct ifreq)))
4543					ret = -EFAULT;
4544				return ret;
4545			}
4546			/* Take care of Wireless Extensions */
4547			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4548				return wext_handle_ioctl(net, &ifr, cmd, arg);
4549			return -EINVAL;
4550	}
4551}
4552
4553
4554/**
4555 *	dev_new_index	-	allocate an ifindex
4556 *	@net: the applicable net namespace
4557 *
4558 *	Returns a suitable unique value for a new device interface
4559 *	number.  The caller must hold the rtnl semaphore or the
4560 *	dev_base_lock to be sure it remains unique.
4561 */
4562static int dev_new_index(struct net *net)
4563{
4564	static int ifindex;
4565	for (;;) {
4566		if (++ifindex <= 0)
4567			ifindex = 1;
4568		if (!__dev_get_by_index(net, ifindex))
4569			return ifindex;
4570	}
4571}
4572
4573/* Delayed registration/unregisteration */
4574static LIST_HEAD(net_todo_list);
4575
4576static void net_set_todo(struct net_device *dev)
4577{
4578	list_add_tail(&dev->todo_list, &net_todo_list);
4579}
4580
4581static void rollback_registered(struct net_device *dev)
4582{
4583	BUG_ON(dev_boot_phase);
4584	ASSERT_RTNL();
4585
4586	/* Some devices call without registering for initialization unwind. */
4587	if (dev->reg_state == NETREG_UNINITIALIZED) {
4588		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4589				  "was registered\n", dev->name, dev);
4590
4591		WARN_ON(1);
4592		return;
4593	}
4594
4595	BUG_ON(dev->reg_state != NETREG_REGISTERED);
4596
4597	/* If device is running, close it first. */
4598	dev_close(dev);
4599
4600	/* And unlink it from device chain. */
4601	unlist_netdevice(dev);
4602
4603	dev->reg_state = NETREG_UNREGISTERING;
4604
4605	synchronize_net();
4606
4607	/* Shutdown queueing discipline. */
4608	dev_shutdown(dev);
4609
4610
4611	/* Notify protocols, that we are about to destroy
4612	   this device. They should clean all the things.
4613	*/
4614	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4615
4616	/*
4617	 *	Flush the unicast and multicast chains
4618	 */
4619	dev_unicast_flush(dev);
4620	dev_addr_discard(dev);
4621
4622	if (dev->netdev_ops->ndo_uninit)
4623		dev->netdev_ops->ndo_uninit(dev);
4624
4625	/* Notifier chain MUST detach us from master device. */
4626	WARN_ON(dev->master);
4627
4628	/* Remove entries from kobject tree */
4629	netdev_unregister_kobject(dev);
4630
4631	synchronize_net();
4632
4633	dev_put(dev);
4634}
4635
4636static void __netdev_init_queue_locks_one(struct net_device *dev,
4637					  struct netdev_queue *dev_queue,
4638					  void *_unused)
4639{
4640	spin_lock_init(&dev_queue->_xmit_lock);
4641	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4642	dev_queue->xmit_lock_owner = -1;
4643}
4644
4645static void netdev_init_queue_locks(struct net_device *dev)
4646{
4647	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4648	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4649}
4650
4651unsigned long netdev_fix_features(unsigned long features, const char *name)
4652{
4653	/* Fix illegal SG+CSUM combinations. */
4654	if ((features & NETIF_F_SG) &&
4655	    !(features & NETIF_F_ALL_CSUM)) {
4656		if (name)
4657			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4658			       "checksum feature.\n", name);
4659		features &= ~NETIF_F_SG;
4660	}
4661
4662	/* TSO requires that SG is present as well. */
4663	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4664		if (name)
4665			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4666			       "SG feature.\n", name);
4667		features &= ~NETIF_F_TSO;
4668	}
4669
4670	if (features & NETIF_F_UFO) {
4671		if (!(features & NETIF_F_GEN_CSUM)) {
4672			if (name)
4673				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4674				       "since no NETIF_F_HW_CSUM feature.\n",
4675				       name);
4676			features &= ~NETIF_F_UFO;
4677		}
4678
4679		if (!(features & NETIF_F_SG)) {
4680			if (name)
4681				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4682				       "since no NETIF_F_SG feature.\n", name);
4683			features &= ~NETIF_F_UFO;
4684		}
4685	}
4686
4687	return features;
4688}
4689EXPORT_SYMBOL(netdev_fix_features);
4690
4691/**
4692 *	register_netdevice	- register a network device
4693 *	@dev: device to register
4694 *
4695 *	Take a completed network device structure and add it to the kernel
4696 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4697 *	chain. 0 is returned on success. A negative errno code is returned
4698 *	on a failure to set up the device, or if the name is a duplicate.
4699 *
4700 *	Callers must hold the rtnl semaphore. You may want
4701 *	register_netdev() instead of this.
4702 *
4703 *	BUGS:
4704 *	The locking appears insufficient to guarantee two parallel registers
4705 *	will not get the same name.
4706 */
4707
4708int register_netdevice(struct net_device *dev)
4709{
4710	struct hlist_head *head;
4711	struct hlist_node *p;
4712	int ret;
4713	struct net *net = dev_net(dev);
4714
4715	BUG_ON(dev_boot_phase);
4716	ASSERT_RTNL();
4717
4718	might_sleep();
4719
4720	/* When net_device's are persistent, this will be fatal. */
4721	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4722	BUG_ON(!net);
4723
4724	spin_lock_init(&dev->addr_list_lock);
4725	netdev_set_addr_lockdep_class(dev);
4726	netdev_init_queue_locks(dev);
4727
4728	dev->iflink = -1;
4729
4730	/* Init, if this function is available */
4731	if (dev->netdev_ops->ndo_init) {
4732		ret = dev->netdev_ops->ndo_init(dev);
4733		if (ret) {
4734			if (ret > 0)
4735				ret = -EIO;
4736			goto out;
4737		}
4738	}
4739
4740	if (!dev_valid_name(dev->name)) {
4741		ret = -EINVAL;
4742		goto err_uninit;
4743	}
4744
4745	dev->ifindex = dev_new_index(net);
4746	if (dev->iflink == -1)
4747		dev->iflink = dev->ifindex;
4748
4749	/* Check for existence of name */
4750	head = dev_name_hash(net, dev->name);
4751	hlist_for_each(p, head) {
4752		struct net_device *d
4753			= hlist_entry(p, struct net_device, name_hlist);
4754		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4755			ret = -EEXIST;
4756			goto err_uninit;
4757		}
4758	}
4759
4760	/* Fix illegal checksum combinations */
4761	if ((dev->features & NETIF_F_HW_CSUM) &&
4762	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4763		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4764		       dev->name);
4765		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4766	}
4767
4768	if ((dev->features & NETIF_F_NO_CSUM) &&
4769	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4770		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4771		       dev->name);
4772		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4773	}
4774
4775	dev->features = netdev_fix_features(dev->features, dev->name);
4776
4777	/* Enable software GSO if SG is supported. */
4778	if (dev->features & NETIF_F_SG)
4779		dev->features |= NETIF_F_GSO;
4780
4781	netdev_initialize_kobject(dev);
4782	ret = netdev_register_kobject(dev);
4783	if (ret)
4784		goto err_uninit;
4785	dev->reg_state = NETREG_REGISTERED;
4786
4787	/*
4788	 *	Default initial state at registry is that the
4789	 *	device is present.
4790	 */
4791
4792	set_bit(__LINK_STATE_PRESENT, &dev->state);
4793
4794	dev_init_scheduler(dev);
4795	dev_hold(dev);
4796	list_netdevice(dev);
4797
4798	/* Notify protocols, that a new device appeared. */
4799	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4800	ret = notifier_to_errno(ret);
4801	if (ret) {
4802		rollback_registered(dev);
4803		dev->reg_state = NETREG_UNREGISTERED;
4804	}
4805
4806out:
4807	return ret;
4808
4809err_uninit:
4810	if (dev->netdev_ops->ndo_uninit)
4811		dev->netdev_ops->ndo_uninit(dev);
4812	goto out;
4813}
4814
4815/**
4816 *	init_dummy_netdev	- init a dummy network device for NAPI
4817 *	@dev: device to init
4818 *
4819 *	This takes a network device structure and initialize the minimum
4820 *	amount of fields so it can be used to schedule NAPI polls without
4821 *	registering a full blown interface. This is to be used by drivers
4822 *	that need to tie several hardware interfaces to a single NAPI
4823 *	poll scheduler due to HW limitations.
4824 */
4825int init_dummy_netdev(struct net_device *dev)
4826{
4827	/* Clear everything. Note we don't initialize spinlocks
4828	 * are they aren't supposed to be taken by any of the
4829	 * NAPI code and this dummy netdev is supposed to be
4830	 * only ever used for NAPI polls
4831	 */
4832	memset(dev, 0, sizeof(struct net_device));
4833
4834	/* make sure we BUG if trying to hit standard
4835	 * register/unregister code path
4836	 */
4837	dev->reg_state = NETREG_DUMMY;
4838
4839	/* initialize the ref count */
4840	atomic_set(&dev->refcnt, 1);
4841
4842	/* NAPI wants this */
4843	INIT_LIST_HEAD(&dev->napi_list);
4844
4845	/* a dummy interface is started by default */
4846	set_bit(__LINK_STATE_PRESENT, &dev->state);
4847	set_bit(__LINK_STATE_START, &dev->state);
4848
4849	return 0;
4850}
4851EXPORT_SYMBOL_GPL(init_dummy_netdev);
4852
4853
4854/**
4855 *	register_netdev	- register a network device
4856 *	@dev: device to register
4857 *
4858 *	Take a completed network device structure and add it to the kernel
4859 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4860 *	chain. 0 is returned on success. A negative errno code is returned
4861 *	on a failure to set up the device, or if the name is a duplicate.
4862 *
4863 *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4864 *	and expands the device name if you passed a format string to
4865 *	alloc_netdev.
4866 */
4867int register_netdev(struct net_device *dev)
4868{
4869	int err;
4870
4871	rtnl_lock();
4872
4873	/*
4874	 * If the name is a format string the caller wants us to do a
4875	 * name allocation.
4876	 */
4877	if (strchr(dev->name, '%')) {
4878		err = dev_alloc_name(dev, dev->name);
4879		if (err < 0)
4880			goto out;
4881	}
4882
4883	err = register_netdevice(dev);
4884out:
4885	rtnl_unlock();
4886	return err;
4887}
4888EXPORT_SYMBOL(register_netdev);
4889
4890/*
4891 * netdev_wait_allrefs - wait until all references are gone.
4892 *
4893 * This is called when unregistering network devices.
4894 *
4895 * Any protocol or device that holds a reference should register
4896 * for netdevice notification, and cleanup and put back the
4897 * reference if they receive an UNREGISTER event.
4898 * We can get stuck here if buggy protocols don't correctly
4899 * call dev_put.
4900 */
4901static void netdev_wait_allrefs(struct net_device *dev)
4902{
4903	unsigned long rebroadcast_time, warning_time;
4904
4905	rebroadcast_time = warning_time = jiffies;
4906	while (atomic_read(&dev->refcnt) != 0) {
4907		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4908			rtnl_lock();
4909
4910			/* Rebroadcast unregister notification */
4911			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4912
4913			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4914				     &dev->state)) {
4915				/* We must not have linkwatch events
4916				 * pending on unregister. If this
4917				 * happens, we simply run the queue
4918				 * unscheduled, resulting in a noop
4919				 * for this device.
4920				 */
4921				linkwatch_run_queue();
4922			}
4923
4924			__rtnl_unlock();
4925
4926			rebroadcast_time = jiffies;
4927		}
4928
4929		msleep(250);
4930
4931		if (time_after(jiffies, warning_time + 10 * HZ)) {
4932			printk(KERN_EMERG "unregister_netdevice: "
4933			       "waiting for %s to become free. Usage "
4934			       "count = %d\n",
4935			       dev->name, atomic_read(&dev->refcnt));
4936			warning_time = jiffies;
4937		}
4938	}
4939}
4940
4941/* The sequence is:
4942 *
4943 *	rtnl_lock();
4944 *	...
4945 *	register_netdevice(x1);
4946 *	register_netdevice(x2);
4947 *	...
4948 *	unregister_netdevice(y1);
4949 *	unregister_netdevice(y2);
4950 *      ...
4951 *	rtnl_unlock();
4952 *	free_netdev(y1);
4953 *	free_netdev(y2);
4954 *
4955 * We are invoked by rtnl_unlock().
4956 * This allows us to deal with problems:
4957 * 1) We can delete sysfs objects which invoke hotplug
4958 *    without deadlocking with linkwatch via keventd.
4959 * 2) Since we run with the RTNL semaphore not held, we can sleep
4960 *    safely in order to wait for the netdev refcnt to drop to zero.
4961 *
4962 * We must not return until all unregister events added during
4963 * the interval the lock was held have been completed.
4964 */
4965void netdev_run_todo(void)
4966{
4967	struct list_head list;
4968
4969	/* Snapshot list, allow later requests */
4970	list_replace_init(&net_todo_list, &list);
4971
4972	__rtnl_unlock();
4973
4974	while (!list_empty(&list)) {
4975		struct net_device *dev
4976			= list_entry(list.next, struct net_device, todo_list);
4977		list_del(&dev->todo_list);
4978
4979		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4980			printk(KERN_ERR "network todo '%s' but state %d\n",
4981			       dev->name, dev->reg_state);
4982			dump_stack();
4983			continue;
4984		}
4985
4986		dev->reg_state = NETREG_UNREGISTERED;
4987
4988		on_each_cpu(flush_backlog, dev, 1);
4989
4990		netdev_wait_allrefs(dev);
4991
4992		/* paranoia */
4993		BUG_ON(atomic_read(&dev->refcnt));
4994		WARN_ON(dev->ip_ptr);
4995		WARN_ON(dev->ip6_ptr);
4996		WARN_ON(dev->dn_ptr);
4997
4998		if (dev->destructor)
4999			dev->destructor(dev);
5000
5001		/* Free network device */
5002		kobject_put(&dev->dev.kobj);
5003	}
5004}
5005
5006/**
5007 *	dev_get_stats	- get network device statistics
5008 *	@dev: device to get statistics from
5009 *
5010 *	Get network statistics from device. The device driver may provide
5011 *	its own method by setting dev->netdev_ops->get_stats; otherwise
5012 *	the internal statistics structure is used.
5013 */
5014const struct net_device_stats *dev_get_stats(struct net_device *dev)
5015{
5016	const struct net_device_ops *ops = dev->netdev_ops;
5017
5018	if (ops->ndo_get_stats)
5019		return ops->ndo_get_stats(dev);
5020	else {
5021		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5022		struct net_device_stats *stats = &dev->stats;
5023		unsigned int i;
5024		struct netdev_queue *txq;
5025
5026		for (i = 0; i < dev->num_tx_queues; i++) {
5027			txq = netdev_get_tx_queue(dev, i);
5028			tx_bytes   += txq->tx_bytes;
5029			tx_packets += txq->tx_packets;
5030			tx_dropped += txq->tx_dropped;
5031		}
5032		if (tx_bytes || tx_packets || tx_dropped) {
5033			stats->tx_bytes   = tx_bytes;
5034			stats->tx_packets = tx_packets;
5035			stats->tx_dropped = tx_dropped;
5036		}
5037		return stats;
5038	}
5039}
5040EXPORT_SYMBOL(dev_get_stats);
5041
5042static void netdev_init_one_queue(struct net_device *dev,
5043				  struct netdev_queue *queue,
5044				  void *_unused)
5045{
5046	queue->dev = dev;
5047}
5048
5049static void netdev_init_queues(struct net_device *dev)
5050{
5051	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5052	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5053	spin_lock_init(&dev->tx_global_lock);
5054}
5055
5056/**
5057 *	alloc_netdev_mq - allocate network device
5058 *	@sizeof_priv:	size of private data to allocate space for
5059 *	@name:		device name format string
5060 *	@setup:		callback to initialize device
5061 *	@queue_count:	the number of subqueues to allocate
5062 *
5063 *	Allocates a struct net_device with private data area for driver use
5064 *	and performs basic initialization.  Also allocates subquue structs
5065 *	for each queue on the device at the end of the netdevice.
5066 */
5067struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5068		void (*setup)(struct net_device *), unsigned int queue_count)
5069{
5070	struct netdev_queue *tx;
5071	struct net_device *dev;
5072	size_t alloc_size;
5073	struct net_device *p;
5074
5075	BUG_ON(strlen(name) >= sizeof(dev->name));
5076
5077	alloc_size = sizeof(struct net_device);
5078	if (sizeof_priv) {
5079		/* ensure 32-byte alignment of private area */
5080		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5081		alloc_size += sizeof_priv;
5082	}
5083	/* ensure 32-byte alignment of whole construct */
5084	alloc_size += NETDEV_ALIGN - 1;
5085
5086	p = kzalloc(alloc_size, GFP_KERNEL);
5087	if (!p) {
5088		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5089		return NULL;
5090	}
5091
5092	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5093	if (!tx) {
5094		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5095		       "tx qdiscs.\n");
5096		goto free_p;
5097	}
5098
5099	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5100	dev->padded = (char *)dev - (char *)p;
5101
5102	if (dev_addr_init(dev))
5103		goto free_tx;
5104
5105	dev_unicast_init(dev);
5106
5107	dev_net_set(dev, &init_net);
5108
5109	dev->_tx = tx;
5110	dev->num_tx_queues = queue_count;
5111	dev->real_num_tx_queues = queue_count;
5112
5113	dev->gso_max_size = GSO_MAX_SIZE;
5114
5115	netdev_init_queues(dev);
5116
5117	INIT_LIST_HEAD(&dev->napi_list);
5118	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5119	setup(dev);
5120	strcpy(dev->name, name);
5121	return dev;
5122
5123free_tx:
5124	kfree(tx);
5125
5126free_p:
5127	kfree(p);
5128	return NULL;
5129}
5130EXPORT_SYMBOL(alloc_netdev_mq);
5131
5132/**
5133 *	free_netdev - free network device
5134 *	@dev: device
5135 *
5136 *	This function does the last stage of destroying an allocated device
5137 * 	interface. The reference to the device object is released.
5138 *	If this is the last reference then it will be freed.
5139 */
5140void free_netdev(struct net_device *dev)
5141{
5142	struct napi_struct *p, *n;
5143
5144	release_net(dev_net(dev));
5145
5146	kfree(dev->_tx);
5147
5148	/* Flush device addresses */
5149	dev_addr_flush(dev);
5150
5151	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5152		netif_napi_del(p);
5153
5154	/*  Compatibility with error handling in drivers */
5155	if (dev->reg_state == NETREG_UNINITIALIZED) {
5156		kfree((char *)dev - dev->padded);
5157		return;
5158	}
5159
5160	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5161	dev->reg_state = NETREG_RELEASED;
5162
5163	/* will free via device release */
5164	put_device(&dev->dev);
5165}
5166
5167/**
5168 *	synchronize_net -  Synchronize with packet receive processing
5169 *
5170 *	Wait for packets currently being received to be done.
5171 *	Does not block later packets from starting.
5172 */
5173void synchronize_net(void)
5174{
5175	might_sleep();
5176	synchronize_rcu();
5177}
5178
5179/**
5180 *	unregister_netdevice - remove device from the kernel
5181 *	@dev: device
5182 *
5183 *	This function shuts down a device interface and removes it
5184 *	from the kernel tables.
5185 *
5186 *	Callers must hold the rtnl semaphore.  You may want
5187 *	unregister_netdev() instead of this.
5188 */
5189
5190void unregister_netdevice(struct net_device *dev)
5191{
5192	ASSERT_RTNL();
5193
5194	rollback_registered(dev);
5195	/* Finish processing unregister after unlock */
5196	net_set_todo(dev);
5197}
5198
5199/**
5200 *	unregister_netdev - remove device from the kernel
5201 *	@dev: device
5202 *
5203 *	This function shuts down a device interface and removes it
5204 *	from the kernel tables.
5205 *
5206 *	This is just a wrapper for unregister_netdevice that takes
5207 *	the rtnl semaphore.  In general you want to use this and not
5208 *	unregister_netdevice.
5209 */
5210void unregister_netdev(struct net_device *dev)
5211{
5212	rtnl_lock();
5213	unregister_netdevice(dev);
5214	rtnl_unlock();
5215}
5216
5217EXPORT_SYMBOL(unregister_netdev);
5218
5219/**
5220 *	dev_change_net_namespace - move device to different nethost namespace
5221 *	@dev: device
5222 *	@net: network namespace
5223 *	@pat: If not NULL name pattern to try if the current device name
5224 *	      is already taken in the destination network namespace.
5225 *
5226 *	This function shuts down a device interface and moves it
5227 *	to a new network namespace. On success 0 is returned, on
5228 *	a failure a netagive errno code is returned.
5229 *
5230 *	Callers must hold the rtnl semaphore.
5231 */
5232
5233int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5234{
5235	char buf[IFNAMSIZ];
5236	const char *destname;
5237	int err;
5238
5239	ASSERT_RTNL();
5240
5241	/* Don't allow namespace local devices to be moved. */
5242	err = -EINVAL;
5243	if (dev->features & NETIF_F_NETNS_LOCAL)
5244		goto out;
5245
5246#ifdef CONFIG_SYSFS
5247	/* Don't allow real devices to be moved when sysfs
5248	 * is enabled.
5249	 */
5250	err = -EINVAL;
5251	if (dev->dev.parent)
5252		goto out;
5253#endif
5254
5255	/* Ensure the device has been registrered */
5256	err = -EINVAL;
5257	if (dev->reg_state != NETREG_REGISTERED)
5258		goto out;
5259
5260	/* Get out if there is nothing todo */
5261	err = 0;
5262	if (net_eq(dev_net(dev), net))
5263		goto out;
5264
5265	/* Pick the destination device name, and ensure
5266	 * we can use it in the destination network namespace.
5267	 */
5268	err = -EEXIST;
5269	destname = dev->name;
5270	if (__dev_get_by_name(net, destname)) {
5271		/* We get here if we can't use the current device name */
5272		if (!pat)
5273			goto out;
5274		if (!dev_valid_name(pat))
5275			goto out;
5276		if (strchr(pat, '%')) {
5277			if (__dev_alloc_name(net, pat, buf) < 0)
5278				goto out;
5279			destname = buf;
5280		} else
5281			destname = pat;
5282		if (__dev_get_by_name(net, destname))
5283			goto out;
5284	}
5285
5286	/*
5287	 * And now a mini version of register_netdevice unregister_netdevice.
5288	 */
5289
5290	/* If device is running close it first. */
5291	dev_close(dev);
5292
5293	/* And unlink it from device chain */
5294	err = -ENODEV;
5295	unlist_netdevice(dev);
5296
5297	synchronize_net();
5298
5299	/* Shutdown queueing discipline. */
5300	dev_shutdown(dev);
5301
5302	/* Notify protocols, that we are about to destroy
5303	   this device. They should clean all the things.
5304	*/
5305	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5306
5307	/*
5308	 *	Flush the unicast and multicast chains
5309	 */
5310	dev_unicast_flush(dev);
5311	dev_addr_discard(dev);
5312
5313	netdev_unregister_kobject(dev);
5314
5315	/* Actually switch the network namespace */
5316	dev_net_set(dev, net);
5317
5318	/* Assign the new device name */
5319	if (destname != dev->name)
5320		strcpy(dev->name, destname);
5321
5322	/* If there is an ifindex conflict assign a new one */
5323	if (__dev_get_by_index(net, dev->ifindex)) {
5324		int iflink = (dev->iflink == dev->ifindex);
5325		dev->ifindex = dev_new_index(net);
5326		if (iflink)
5327			dev->iflink = dev->ifindex;
5328	}
5329
5330	/* Fixup kobjects */
5331	err = netdev_register_kobject(dev);
5332	WARN_ON(err);
5333
5334	/* Add the device back in the hashes */
5335	list_netdevice(dev);
5336
5337	/* Notify protocols, that a new device appeared. */
5338	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5339
5340	synchronize_net();
5341	err = 0;
5342out:
5343	return err;
5344}
5345
5346static int dev_cpu_callback(struct notifier_block *nfb,
5347			    unsigned long action,
5348			    void *ocpu)
5349{
5350	struct sk_buff **list_skb;
5351	struct Qdisc **list_net;
5352	struct sk_buff *skb;
5353	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5354	struct softnet_data *sd, *oldsd;
5355
5356	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5357		return NOTIFY_OK;
5358
5359	local_irq_disable();
5360	cpu = smp_processor_id();
5361	sd = &per_cpu(softnet_data, cpu);
5362	oldsd = &per_cpu(softnet_data, oldcpu);
5363
5364	/* Find end of our completion_queue. */
5365	list_skb = &sd->completion_queue;
5366	while (*list_skb)
5367		list_skb = &(*list_skb)->next;
5368	/* Append completion queue from offline CPU. */
5369	*list_skb = oldsd->completion_queue;
5370	oldsd->completion_queue = NULL;
5371
5372	/* Find end of our output_queue. */
5373	list_net = &sd->output_queue;
5374	while (*list_net)
5375		list_net = &(*list_net)->next_sched;
5376	/* Append output queue from offline CPU. */
5377	*list_net = oldsd->output_queue;
5378	oldsd->output_queue = NULL;
5379
5380	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5381	local_irq_enable();
5382
5383	/* Process offline CPU's input_pkt_queue */
5384	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5385		netif_rx(skb);
5386
5387	return NOTIFY_OK;
5388}
5389
5390
5391/**
5392 *	netdev_increment_features - increment feature set by one
5393 *	@all: current feature set
5394 *	@one: new feature set
5395 *	@mask: mask feature set
5396 *
5397 *	Computes a new feature set after adding a device with feature set
5398 *	@one to the master device with current feature set @all.  Will not
5399 *	enable anything that is off in @mask. Returns the new feature set.
5400 */
5401unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5402					unsigned long mask)
5403{
5404	/* If device needs checksumming, downgrade to it. */
5405        if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5406		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5407	else if (mask & NETIF_F_ALL_CSUM) {
5408		/* If one device supports v4/v6 checksumming, set for all. */
5409		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5410		    !(all & NETIF_F_GEN_CSUM)) {
5411			all &= ~NETIF_F_ALL_CSUM;
5412			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5413		}
5414
5415		/* If one device supports hw checksumming, set for all. */
5416		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5417			all &= ~NETIF_F_ALL_CSUM;
5418			all |= NETIF_F_HW_CSUM;
5419		}
5420	}
5421
5422	one |= NETIF_F_ALL_CSUM;
5423
5424	one |= all & NETIF_F_ONE_FOR_ALL;
5425	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5426	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5427
5428	return all;
5429}
5430EXPORT_SYMBOL(netdev_increment_features);
5431
5432static struct hlist_head *netdev_create_hash(void)
5433{
5434	int i;
5435	struct hlist_head *hash;
5436
5437	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5438	if (hash != NULL)
5439		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5440			INIT_HLIST_HEAD(&hash[i]);
5441
5442	return hash;
5443}
5444
5445/* Initialize per network namespace state */
5446static int __net_init netdev_init(struct net *net)
5447{
5448	INIT_LIST_HEAD(&net->dev_base_head);
5449
5450	net->dev_name_head = netdev_create_hash();
5451	if (net->dev_name_head == NULL)
5452		goto err_name;
5453
5454	net->dev_index_head = netdev_create_hash();
5455	if (net->dev_index_head == NULL)
5456		goto err_idx;
5457
5458	return 0;
5459
5460err_idx:
5461	kfree(net->dev_name_head);
5462err_name:
5463	return -ENOMEM;
5464}
5465
5466/**
5467 *	netdev_drivername - network driver for the device
5468 *	@dev: network device
5469 *	@buffer: buffer for resulting name
5470 *	@len: size of buffer
5471 *
5472 *	Determine network driver for device.
5473 */
5474char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5475{
5476	const struct device_driver *driver;
5477	const struct device *parent;
5478
5479	if (len <= 0 || !buffer)
5480		return buffer;
5481	buffer[0] = 0;
5482
5483	parent = dev->dev.parent;
5484
5485	if (!parent)
5486		return buffer;
5487
5488	driver = parent->driver;
5489	if (driver && driver->name)
5490		strlcpy(buffer, driver->name, len);
5491	return buffer;
5492}
5493
5494static void __net_exit netdev_exit(struct net *net)
5495{
5496	kfree(net->dev_name_head);
5497	kfree(net->dev_index_head);
5498}
5499
5500static struct pernet_operations __net_initdata netdev_net_ops = {
5501	.init = netdev_init,
5502	.exit = netdev_exit,
5503};
5504
5505static void __net_exit default_device_exit(struct net *net)
5506{
5507	struct net_device *dev;
5508	/*
5509	 * Push all migratable of the network devices back to the
5510	 * initial network namespace
5511	 */
5512	rtnl_lock();
5513restart:
5514	for_each_netdev(net, dev) {
5515		int err;
5516		char fb_name[IFNAMSIZ];
5517
5518		/* Ignore unmoveable devices (i.e. loopback) */
5519		if (dev->features & NETIF_F_NETNS_LOCAL)
5520			continue;
5521
5522		/* Delete virtual devices */
5523		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5524			dev->rtnl_link_ops->dellink(dev);
5525			goto restart;
5526		}
5527
5528		/* Push remaing network devices to init_net */
5529		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5530		err = dev_change_net_namespace(dev, &init_net, fb_name);
5531		if (err) {
5532			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5533				__func__, dev->name, err);
5534			BUG();
5535		}
5536		goto restart;
5537	}
5538	rtnl_unlock();
5539}
5540
5541static struct pernet_operations __net_initdata default_device_ops = {
5542	.exit = default_device_exit,
5543};
5544
5545/*
5546 *	Initialize the DEV module. At boot time this walks the device list and
5547 *	unhooks any devices that fail to initialise (normally hardware not
5548 *	present) and leaves us with a valid list of present and active devices.
5549 *
5550 */
5551
5552/*
5553 *       This is called single threaded during boot, so no need
5554 *       to take the rtnl semaphore.
5555 */
5556static int __init net_dev_init(void)
5557{
5558	int i, rc = -ENOMEM;
5559
5560	BUG_ON(!dev_boot_phase);
5561
5562	if (dev_proc_init())
5563		goto out;
5564
5565	if (netdev_kobject_init())
5566		goto out;
5567
5568	INIT_LIST_HEAD(&ptype_all);
5569	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5570		INIT_LIST_HEAD(&ptype_base[i]);
5571
5572	if (register_pernet_subsys(&netdev_net_ops))
5573		goto out;
5574
5575	/*
5576	 *	Initialise the packet receive queues.
5577	 */
5578
5579	for_each_possible_cpu(i) {
5580		struct softnet_data *queue;
5581
5582		queue = &per_cpu(softnet_data, i);
5583		skb_queue_head_init(&queue->input_pkt_queue);
5584		queue->completion_queue = NULL;
5585		INIT_LIST_HEAD(&queue->poll_list);
5586
5587		queue->backlog.poll = process_backlog;
5588		queue->backlog.weight = weight_p;
5589		queue->backlog.gro_list = NULL;
5590		queue->backlog.gro_count = 0;
5591	}
5592
5593	dev_boot_phase = 0;
5594
5595	/* The loopback device is special if any other network devices
5596	 * is present in a network namespace the loopback device must
5597	 * be present. Since we now dynamically allocate and free the
5598	 * loopback device ensure this invariant is maintained by
5599	 * keeping the loopback device as the first device on the
5600	 * list of network devices.  Ensuring the loopback devices
5601	 * is the first device that appears and the last network device
5602	 * that disappears.
5603	 */
5604	if (register_pernet_device(&loopback_net_ops))
5605		goto out;
5606
5607	if (register_pernet_device(&default_device_ops))
5608		goto out;
5609
5610	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5611	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5612
5613	hotcpu_notifier(dev_cpu_callback, 0);
5614	dst_init();
5615	dev_mcast_init();
5616	rc = 0;
5617out:
5618	return rc;
5619}
5620
5621subsys_initcall(net_dev_init);
5622
5623static int __init initialize_hashrnd(void)
5624{
5625	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5626	return 0;
5627}
5628
5629late_initcall_sync(initialize_hashrnd);
5630
5631EXPORT_SYMBOL(__dev_get_by_index);
5632EXPORT_SYMBOL(__dev_get_by_name);
5633EXPORT_SYMBOL(__dev_remove_pack);
5634EXPORT_SYMBOL(dev_valid_name);
5635EXPORT_SYMBOL(dev_add_pack);
5636EXPORT_SYMBOL(dev_alloc_name);
5637EXPORT_SYMBOL(dev_close);
5638EXPORT_SYMBOL(dev_get_by_flags);
5639EXPORT_SYMBOL(dev_get_by_index);
5640EXPORT_SYMBOL(dev_get_by_name);
5641EXPORT_SYMBOL(dev_open);
5642EXPORT_SYMBOL(dev_queue_xmit);
5643EXPORT_SYMBOL(dev_remove_pack);
5644EXPORT_SYMBOL(dev_set_allmulti);
5645EXPORT_SYMBOL(dev_set_promiscuity);
5646EXPORT_SYMBOL(dev_change_flags);
5647EXPORT_SYMBOL(dev_set_mtu);
5648EXPORT_SYMBOL(dev_set_mac_address);
5649EXPORT_SYMBOL(free_netdev);
5650EXPORT_SYMBOL(netdev_boot_setup_check);
5651EXPORT_SYMBOL(netdev_set_master);
5652EXPORT_SYMBOL(netdev_state_change);
5653EXPORT_SYMBOL(netif_receive_skb);
5654EXPORT_SYMBOL(netif_rx);
5655EXPORT_SYMBOL(register_gifconf);
5656EXPORT_SYMBOL(register_netdevice);
5657EXPORT_SYMBOL(register_netdevice_notifier);
5658EXPORT_SYMBOL(skb_checksum_help);
5659EXPORT_SYMBOL(synchronize_net);
5660EXPORT_SYMBOL(unregister_netdevice);
5661EXPORT_SYMBOL(unregister_netdevice_notifier);
5662EXPORT_SYMBOL(net_enable_timestamp);
5663EXPORT_SYMBOL(net_disable_timestamp);
5664EXPORT_SYMBOL(dev_get_flags);
5665
5666EXPORT_SYMBOL(dev_load);
5667
5668EXPORT_PER_CPU_SYMBOL(softnet_data);