net/core/dev.c at v2.6.24 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / net / core / dev.c
at v2.6.24 111 kB view raw
   1/*
   2 * 	NET3	Protocol independent device support routines.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 *	Derived from the non IP parts of dev.c 1.0.19
  10 * 		Authors:	Ross Biro
  11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *	Additional Authors:
  15 *		Florian la Roche <rzsfl@rz.uni-sb.de>
  16 *		Alan Cox <gw4pts@gw4pts.ampr.org>
  17 *		David Hinds <dahinds@users.sourceforge.net>
  18 *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19 *		Adam Sulmicki <adam@cfar.umd.edu>
  20 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21 *
  22 *	Changes:
  23 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24 *              			to 2 if register_netdev gets called
  25 *              			before net_dev_init & also removed a
  26 *              			few lines of code in the process.
  27 *		Alan Cox	:	device private ioctl copies fields back.
  28 *		Alan Cox	:	Transmit queue code does relevant
  29 *					stunts to keep the queue safe.
  30 *		Alan Cox	:	Fixed double lock.
  31 *		Alan Cox	:	Fixed promisc NULL pointer trap
  32 *		????????	:	Support the full private ioctl range
  33 *		Alan Cox	:	Moved ioctl permission check into
  34 *					drivers
  35 *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  36 *		Alan Cox	:	100 backlog just doesn't cut it when
  37 *					you start doing multicast video 8)
  38 *		Alan Cox	:	Rewrote net_bh and list manager.
  39 *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
  40 *		Alan Cox	:	Took out transmit every packet pass
  41 *					Saved a few bytes in the ioctl handler
  42 *		Alan Cox	:	Network driver sets packet type before
  43 *					calling netif_rx. Saves a function
  44 *					call a packet.
  45 *		Alan Cox	:	Hashed net_bh()
  46 *		Richard Kooijman:	Timestamp fixes.
  47 *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  48 *		Alan Cox	:	Device lock protection.
  49 *		Alan Cox	: 	Fixed nasty side effect of device close
  50 *					changes.
  51 *		Rudi Cilibrasi	:	Pass the right thing to
  52 *					set_mac_address()
  53 *		Dave Miller	:	32bit quantity for the device lock to
  54 *					make it work out on a Sparc.
  55 *		Bjorn Ekwall	:	Added KERNELD hack.
  56 *		Alan Cox	:	Cleaned up the backlog initialise.
  57 *		Craig Metz	:	SIOCGIFCONF fix if space for under
  58 *					1 device.
  59 *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  60 *					is no device open function.
  61 *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  62 *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  63 *		Cyrus Durgin	:	Cleaned for KMOD
  64 *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  65 *					A network device unload needs to purge
  66 *					the backlog queue.
  67 *	Paul Rusty Russell	:	SIOCSIFNAME
  68 *              Pekka Riikonen  :	Netdev boot-time settings code
  69 *              Andrew Morton   :       Make unregister_netdevice wait
  70 *              			indefinitely on dev->refcnt
  71 * 		J Hadi Salim	:	- Backlog queue sampling
  72 *				        - netif_rx() feedback
  73 */
  74
  75#include <asm/uaccess.h>
  76#include <asm/system.h>
  77#include <linux/bitops.h>
  78#include <linux/capability.h>
  79#include <linux/cpu.h>
  80#include <linux/types.h>
  81#include <linux/kernel.h>
  82#include <linux/sched.h>
  83#include <linux/mutex.h>
  84#include <linux/string.h>
  85#include <linux/mm.h>
  86#include <linux/socket.h>
  87#include <linux/sockios.h>
  88#include <linux/errno.h>
  89#include <linux/interrupt.h>
  90#include <linux/if_ether.h>
  91#include <linux/netdevice.h>
  92#include <linux/etherdevice.h>
  93#include <linux/notifier.h>
  94#include <linux/skbuff.h>
  95#include <net/net_namespace.h>
  96#include <net/sock.h>
  97#include <linux/rtnetlink.h>
  98#include <linux/proc_fs.h>
  99#include <linux/seq_file.h>
 100#include <linux/stat.h>
 101#include <linux/if_bridge.h>
 102#include <linux/if_macvlan.h>
 103#include <net/dst.h>
 104#include <net/pkt_sched.h>
 105#include <net/checksum.h>
 106#include <linux/highmem.h>
 107#include <linux/init.h>
 108#include <linux/kmod.h>
 109#include <linux/module.h>
 110#include <linux/kallsyms.h>
 111#include <linux/netpoll.h>
 112#include <linux/rcupdate.h>
 113#include <linux/delay.h>
 114#include <net/wext.h>
 115#include <net/iw_handler.h>
 116#include <asm/current.h>
 117#include <linux/audit.h>
 118#include <linux/dmaengine.h>
 119#include <linux/err.h>
 120#include <linux/ctype.h>
 121#include <linux/if_arp.h>
 122
 123#include "net-sysfs.h"
 124
 125/*
 126 *	The list of packet types we will receive (as opposed to discard)
 127 *	and the routines to invoke.
 128 *
 129 *	Why 16. Because with 16 the only overlap we get on a hash of the
 130 *	low nibble of the protocol value is RARP/SNAP/X.25.
 131 *
 132 *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 133 *             sure which should go first, but I bet it won't make much
 134 *             difference if we are running VLANs.  The good news is that
 135 *             this protocol won't be in the list unless compiled in, so
 136 *             the average user (w/out VLANs) will not be adversely affected.
 137 *             --BLG
 138 *
 139 *		0800	IP
 140 *		8100    802.1Q VLAN
 141 *		0001	802.3
 142 *		0002	AX.25
 143 *		0004	802.2
 144 *		8035	RARP
 145 *		0005	SNAP
 146 *		0805	X.25
 147 *		0806	ARP
 148 *		8137	IPX
 149 *		0009	Localtalk
 150 *		86DD	IPv6
 151 */
 152
 153static DEFINE_SPINLOCK(ptype_lock);
 154static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
 155static struct list_head ptype_all __read_mostly;	/* Taps */
 156
 157#ifdef CONFIG_NET_DMA
 158struct net_dma {
 159	struct dma_client client;
 160	spinlock_t lock;
 161	cpumask_t channel_mask;
 162	struct dma_chan *channels[NR_CPUS];
 163};
 164
 165static enum dma_state_client
 166netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 167	enum dma_state state);
 168
 169static struct net_dma net_dma = {
 170	.client = {
 171		.event_callback = netdev_dma_event,
 172	},
 173};
 174#endif
 175
 176/*
 177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 178 * semaphore.
 179 *
 180 * Pure readers hold dev_base_lock for reading.
 181 *
 182 * Writers must hold the rtnl semaphore while they loop through the
 183 * dev_base_head list, and hold dev_base_lock for writing when they do the
 184 * actual updates.  This allows pure readers to access the list even
 185 * while a writer is preparing to update it.
 186 *
 187 * To put it another way, dev_base_lock is held for writing only to
 188 * protect against pure readers; the rtnl semaphore provides the
 189 * protection against other writers.
 190 *
 191 * See, for example usages, register_netdevice() and
 192 * unregister_netdevice(), which must be called with the rtnl
 193 * semaphore held.
 194 */
 195DEFINE_RWLOCK(dev_base_lock);
 196
 197EXPORT_SYMBOL(dev_base_lock);
 198
 199#define NETDEV_HASHBITS	8
 200#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 201
 202static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 203{
 204	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 205	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 206}
 207
 208static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 209{
 210	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 211}
 212
 213/* Device list insertion */
 214static int list_netdevice(struct net_device *dev)
 215{
 216	struct net *net = dev->nd_net;
 217
 218	ASSERT_RTNL();
 219
 220	write_lock_bh(&dev_base_lock);
 221	list_add_tail(&dev->dev_list, &net->dev_base_head);
 222	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 223	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 224	write_unlock_bh(&dev_base_lock);
 225	return 0;
 226}
 227
 228/* Device list removal */
 229static void unlist_netdevice(struct net_device *dev)
 230{
 231	ASSERT_RTNL();
 232
 233	/* Unlink dev from the device chain */
 234	write_lock_bh(&dev_base_lock);
 235	list_del(&dev->dev_list);
 236	hlist_del(&dev->name_hlist);
 237	hlist_del(&dev->index_hlist);
 238	write_unlock_bh(&dev_base_lock);
 239}
 240
 241/*
 242 *	Our notifier list
 243 */
 244
 245static RAW_NOTIFIER_HEAD(netdev_chain);
 246
 247/*
 248 *	Device drivers call our routines to queue packets here. We empty the
 249 *	queue in the local softnet handler.
 250 */
 251
 252DEFINE_PER_CPU(struct softnet_data, softnet_data);
 253
 254#ifdef CONFIG_DEBUG_LOCK_ALLOC
 255/*
 256 * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 257 * according to dev->type
 258 */
 259static const unsigned short netdev_lock_type[] =
 260	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 261	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 262	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 263	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 264	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 265	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 266	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 267	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 268	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 269	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 270	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 271	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 272	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 273	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 274	 ARPHRD_NONE};
 275
 276static const char *netdev_lock_name[] =
 277	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 278	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 279	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 280	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 281	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 282	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 283	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 284	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 285	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 286	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 287	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 288	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 289	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 290	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 291	 "_xmit_NONE"};
 292
 293static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 294
 295static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 296{
 297	int i;
 298
 299	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 300		if (netdev_lock_type[i] == dev_type)
 301			return i;
 302	/* the last key is used by default */
 303	return ARRAY_SIZE(netdev_lock_type) - 1;
 304}
 305
 306static inline void netdev_set_lockdep_class(spinlock_t *lock,
 307					    unsigned short dev_type)
 308{
 309	int i;
 310
 311	i = netdev_lock_pos(dev_type);
 312	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 313				   netdev_lock_name[i]);
 314}
 315#else
 316static inline void netdev_set_lockdep_class(spinlock_t *lock,
 317					    unsigned short dev_type)
 318{
 319}
 320#endif
 321
 322/*******************************************************************************
 323
 324		Protocol management and registration routines
 325
 326*******************************************************************************/
 327
 328/*
 329 *	Add a protocol ID to the list. Now that the input handler is
 330 *	smarter we can dispense with all the messy stuff that used to be
 331 *	here.
 332 *
 333 *	BEWARE!!! Protocol handlers, mangling input packets,
 334 *	MUST BE last in hash buckets and checking protocol handlers
 335 *	MUST start from promiscuous ptype_all chain in net_bh.
 336 *	It is true now, do not change it.
 337 *	Explanation follows: if protocol handler, mangling packet, will
 338 *	be the first on list, it is not able to sense, that packet
 339 *	is cloned and should be copied-on-write, so that it will
 340 *	change it and subsequent readers will get broken packet.
 341 *							--ANK (980803)
 342 */
 343
 344/**
 345 *	dev_add_pack - add packet handler
 346 *	@pt: packet type declaration
 347 *
 348 *	Add a protocol handler to the networking stack. The passed &packet_type
 349 *	is linked into kernel lists and may not be freed until it has been
 350 *	removed from the kernel lists.
 351 *
 352 *	This call does not sleep therefore it can not
 353 *	guarantee all CPU's that are in middle of receiving packets
 354 *	will see the new packet type (until the next received packet).
 355 */
 356
 357void dev_add_pack(struct packet_type *pt)
 358{
 359	int hash;
 360
 361	spin_lock_bh(&ptype_lock);
 362	if (pt->type == htons(ETH_P_ALL))
 363		list_add_rcu(&pt->list, &ptype_all);
 364	else {
 365		hash = ntohs(pt->type) & 15;
 366		list_add_rcu(&pt->list, &ptype_base[hash]);
 367	}
 368	spin_unlock_bh(&ptype_lock);
 369}
 370
 371/**
 372 *	__dev_remove_pack	 - remove packet handler
 373 *	@pt: packet type declaration
 374 *
 375 *	Remove a protocol handler that was previously added to the kernel
 376 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 377 *	from the kernel lists and can be freed or reused once this function
 378 *	returns.
 379 *
 380 *      The packet type might still be in use by receivers
 381 *	and must not be freed until after all the CPU's have gone
 382 *	through a quiescent state.
 383 */
 384void __dev_remove_pack(struct packet_type *pt)
 385{
 386	struct list_head *head;
 387	struct packet_type *pt1;
 388
 389	spin_lock_bh(&ptype_lock);
 390
 391	if (pt->type == htons(ETH_P_ALL))
 392		head = &ptype_all;
 393	else
 394		head = &ptype_base[ntohs(pt->type) & 15];
 395
 396	list_for_each_entry(pt1, head, list) {
 397		if (pt == pt1) {
 398			list_del_rcu(&pt->list);
 399			goto out;
 400		}
 401	}
 402
 403	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 404out:
 405	spin_unlock_bh(&ptype_lock);
 406}
 407/**
 408 *	dev_remove_pack	 - remove packet handler
 409 *	@pt: packet type declaration
 410 *
 411 *	Remove a protocol handler that was previously added to the kernel
 412 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 413 *	from the kernel lists and can be freed or reused once this function
 414 *	returns.
 415 *
 416 *	This call sleeps to guarantee that no CPU is looking at the packet
 417 *	type after return.
 418 */
 419void dev_remove_pack(struct packet_type *pt)
 420{
 421	__dev_remove_pack(pt);
 422
 423	synchronize_net();
 424}
 425
 426/******************************************************************************
 427
 428		      Device Boot-time Settings Routines
 429
 430*******************************************************************************/
 431
 432/* Boot time configuration table */
 433static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 434
 435/**
 436 *	netdev_boot_setup_add	- add new setup entry
 437 *	@name: name of the device
 438 *	@map: configured settings for the device
 439 *
 440 *	Adds new setup entry to the dev_boot_setup list.  The function
 441 *	returns 0 on error and 1 on success.  This is a generic routine to
 442 *	all netdevices.
 443 */
 444static int netdev_boot_setup_add(char *name, struct ifmap *map)
 445{
 446	struct netdev_boot_setup *s;
 447	int i;
 448
 449	s = dev_boot_setup;
 450	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 451		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 452			memset(s[i].name, 0, sizeof(s[i].name));
 453			strcpy(s[i].name, name);
 454			memcpy(&s[i].map, map, sizeof(s[i].map));
 455			break;
 456		}
 457	}
 458
 459	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 460}
 461
 462/**
 463 *	netdev_boot_setup_check	- check boot time settings
 464 *	@dev: the netdevice
 465 *
 466 * 	Check boot time settings for the device.
 467 *	The found settings are set for the device to be used
 468 *	later in the device probing.
 469 *	Returns 0 if no settings found, 1 if they are.
 470 */
 471int netdev_boot_setup_check(struct net_device *dev)
 472{
 473	struct netdev_boot_setup *s = dev_boot_setup;
 474	int i;
 475
 476	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 477		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 478		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 479			dev->irq 	= s[i].map.irq;
 480			dev->base_addr 	= s[i].map.base_addr;
 481			dev->mem_start 	= s[i].map.mem_start;
 482			dev->mem_end 	= s[i].map.mem_end;
 483			return 1;
 484		}
 485	}
 486	return 0;
 487}
 488
 489
 490/**
 491 *	netdev_boot_base	- get address from boot time settings
 492 *	@prefix: prefix for network device
 493 *	@unit: id for network device
 494 *
 495 * 	Check boot time settings for the base address of device.
 496 *	The found settings are set for the device to be used
 497 *	later in the device probing.
 498 *	Returns 0 if no settings found.
 499 */
 500unsigned long netdev_boot_base(const char *prefix, int unit)
 501{
 502	const struct netdev_boot_setup *s = dev_boot_setup;
 503	char name[IFNAMSIZ];
 504	int i;
 505
 506	sprintf(name, "%s%d", prefix, unit);
 507
 508	/*
 509	 * If device already registered then return base of 1
 510	 * to indicate not to probe for this interface
 511	 */
 512	if (__dev_get_by_name(&init_net, name))
 513		return 1;
 514
 515	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 516		if (!strcmp(name, s[i].name))
 517			return s[i].map.base_addr;
 518	return 0;
 519}
 520
 521/*
 522 * Saves at boot time configured settings for any netdevice.
 523 */
 524int __init netdev_boot_setup(char *str)
 525{
 526	int ints[5];
 527	struct ifmap map;
 528
 529	str = get_options(str, ARRAY_SIZE(ints), ints);
 530	if (!str || !*str)
 531		return 0;
 532
 533	/* Save settings */
 534	memset(&map, 0, sizeof(map));
 535	if (ints[0] > 0)
 536		map.irq = ints[1];
 537	if (ints[0] > 1)
 538		map.base_addr = ints[2];
 539	if (ints[0] > 2)
 540		map.mem_start = ints[3];
 541	if (ints[0] > 3)
 542		map.mem_end = ints[4];
 543
 544	/* Add new entry to the list */
 545	return netdev_boot_setup_add(str, &map);
 546}
 547
 548__setup("netdev=", netdev_boot_setup);
 549
 550/*******************************************************************************
 551
 552			    Device Interface Subroutines
 553
 554*******************************************************************************/
 555
 556/**
 557 *	__dev_get_by_name	- find a device by its name
 558 *	@net: the applicable net namespace
 559 *	@name: name to find
 560 *
 561 *	Find an interface by name. Must be called under RTNL semaphore
 562 *	or @dev_base_lock. If the name is found a pointer to the device
 563 *	is returned. If the name is not found then %NULL is returned. The
 564 *	reference counters are not incremented so the caller must be
 565 *	careful with locks.
 566 */
 567
 568struct net_device *__dev_get_by_name(struct net *net, const char *name)
 569{
 570	struct hlist_node *p;
 571
 572	hlist_for_each(p, dev_name_hash(net, name)) {
 573		struct net_device *dev
 574			= hlist_entry(p, struct net_device, name_hlist);
 575		if (!strncmp(dev->name, name, IFNAMSIZ))
 576			return dev;
 577	}
 578	return NULL;
 579}
 580
 581/**
 582 *	dev_get_by_name		- find a device by its name
 583 *	@net: the applicable net namespace
 584 *	@name: name to find
 585 *
 586 *	Find an interface by name. This can be called from any
 587 *	context and does its own locking. The returned handle has
 588 *	the usage count incremented and the caller must use dev_put() to
 589 *	release it when it is no longer needed. %NULL is returned if no
 590 *	matching device is found.
 591 */
 592
 593struct net_device *dev_get_by_name(struct net *net, const char *name)
 594{
 595	struct net_device *dev;
 596
 597	read_lock(&dev_base_lock);
 598	dev = __dev_get_by_name(net, name);
 599	if (dev)
 600		dev_hold(dev);
 601	read_unlock(&dev_base_lock);
 602	return dev;
 603}
 604
 605/**
 606 *	__dev_get_by_index - find a device by its ifindex
 607 *	@net: the applicable net namespace
 608 *	@ifindex: index of device
 609 *
 610 *	Search for an interface by index. Returns %NULL if the device
 611 *	is not found or a pointer to the device. The device has not
 612 *	had its reference counter increased so the caller must be careful
 613 *	about locking. The caller must hold either the RTNL semaphore
 614 *	or @dev_base_lock.
 615 */
 616
 617struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 618{
 619	struct hlist_node *p;
 620
 621	hlist_for_each(p, dev_index_hash(net, ifindex)) {
 622		struct net_device *dev
 623			= hlist_entry(p, struct net_device, index_hlist);
 624		if (dev->ifindex == ifindex)
 625			return dev;
 626	}
 627	return NULL;
 628}
 629
 630
 631/**
 632 *	dev_get_by_index - find a device by its ifindex
 633 *	@net: the applicable net namespace
 634 *	@ifindex: index of device
 635 *
 636 *	Search for an interface by index. Returns NULL if the device
 637 *	is not found or a pointer to the device. The device returned has
 638 *	had a reference added and the pointer is safe until the user calls
 639 *	dev_put to indicate they have finished with it.
 640 */
 641
 642struct net_device *dev_get_by_index(struct net *net, int ifindex)
 643{
 644	struct net_device *dev;
 645
 646	read_lock(&dev_base_lock);
 647	dev = __dev_get_by_index(net, ifindex);
 648	if (dev)
 649		dev_hold(dev);
 650	read_unlock(&dev_base_lock);
 651	return dev;
 652}
 653
 654/**
 655 *	dev_getbyhwaddr - find a device by its hardware address
 656 *	@net: the applicable net namespace
 657 *	@type: media type of device
 658 *	@ha: hardware address
 659 *
 660 *	Search for an interface by MAC address. Returns NULL if the device
 661 *	is not found or a pointer to the device. The caller must hold the
 662 *	rtnl semaphore. The returned device has not had its ref count increased
 663 *	and the caller must therefore be careful about locking
 664 *
 665 *	BUGS:
 666 *	If the API was consistent this would be __dev_get_by_hwaddr
 667 */
 668
 669struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 670{
 671	struct net_device *dev;
 672
 673	ASSERT_RTNL();
 674
 675	for_each_netdev(&init_net, dev)
 676		if (dev->type == type &&
 677		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 678			return dev;
 679
 680	return NULL;
 681}
 682
 683EXPORT_SYMBOL(dev_getbyhwaddr);
 684
 685struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 686{
 687	struct net_device *dev;
 688
 689	ASSERT_RTNL();
 690	for_each_netdev(net, dev)
 691		if (dev->type == type)
 692			return dev;
 693
 694	return NULL;
 695}
 696
 697EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 698
 699struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 700{
 701	struct net_device *dev;
 702
 703	rtnl_lock();
 704	dev = __dev_getfirstbyhwtype(net, type);
 705	if (dev)
 706		dev_hold(dev);
 707	rtnl_unlock();
 708	return dev;
 709}
 710
 711EXPORT_SYMBOL(dev_getfirstbyhwtype);
 712
 713/**
 714 *	dev_get_by_flags - find any device with given flags
 715 *	@net: the applicable net namespace
 716 *	@if_flags: IFF_* values
 717 *	@mask: bitmask of bits in if_flags to check
 718 *
 719 *	Search for any interface with the given flags. Returns NULL if a device
 720 *	is not found or a pointer to the device. The device returned has
 721 *	had a reference added and the pointer is safe until the user calls
 722 *	dev_put to indicate they have finished with it.
 723 */
 724
 725struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 726{
 727	struct net_device *dev, *ret;
 728
 729	ret = NULL;
 730	read_lock(&dev_base_lock);
 731	for_each_netdev(net, dev) {
 732		if (((dev->flags ^ if_flags) & mask) == 0) {
 733			dev_hold(dev);
 734			ret = dev;
 735			break;
 736		}
 737	}
 738	read_unlock(&dev_base_lock);
 739	return ret;
 740}
 741
 742/**
 743 *	dev_valid_name - check if name is okay for network device
 744 *	@name: name string
 745 *
 746 *	Network device names need to be valid file names to
 747 *	to allow sysfs to work.  We also disallow any kind of
 748 *	whitespace.
 749 */
 750int dev_valid_name(const char *name)
 751{
 752	if (*name == '\0')
 753		return 0;
 754	if (strlen(name) >= IFNAMSIZ)
 755		return 0;
 756	if (!strcmp(name, ".") || !strcmp(name, ".."))
 757		return 0;
 758
 759	while (*name) {
 760		if (*name == '/' || isspace(*name))
 761			return 0;
 762		name++;
 763	}
 764	return 1;
 765}
 766
 767/**
 768 *	__dev_alloc_name - allocate a name for a device
 769 *	@net: network namespace to allocate the device name in
 770 *	@name: name format string
 771 *	@buf:  scratch buffer and result name string
 772 *
 773 *	Passed a format string - eg "lt%d" it will try and find a suitable
 774 *	id. It scans list of devices to build up a free map, then chooses
 775 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 776 *	while allocating the name and adding the device in order to avoid
 777 *	duplicates.
 778 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 779 *	Returns the number of the unit assigned or a negative errno code.
 780 */
 781
 782static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 783{
 784	int i = 0;
 785	const char *p;
 786	const int max_netdevices = 8*PAGE_SIZE;
 787	unsigned long *inuse;
 788	struct net_device *d;
 789
 790	p = strnchr(name, IFNAMSIZ-1, '%');
 791	if (p) {
 792		/*
 793		 * Verify the string as this thing may have come from
 794		 * the user.  There must be either one "%d" and no other "%"
 795		 * characters.
 796		 */
 797		if (p[1] != 'd' || strchr(p + 2, '%'))
 798			return -EINVAL;
 799
 800		/* Use one page as a bit array of possible slots */
 801		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 802		if (!inuse)
 803			return -ENOMEM;
 804
 805		for_each_netdev(net, d) {
 806			if (!sscanf(d->name, name, &i))
 807				continue;
 808			if (i < 0 || i >= max_netdevices)
 809				continue;
 810
 811			/*  avoid cases where sscanf is not exact inverse of printf */
 812			snprintf(buf, IFNAMSIZ, name, i);
 813			if (!strncmp(buf, d->name, IFNAMSIZ))
 814				set_bit(i, inuse);
 815		}
 816
 817		i = find_first_zero_bit(inuse, max_netdevices);
 818		free_page((unsigned long) inuse);
 819	}
 820
 821	snprintf(buf, IFNAMSIZ, name, i);
 822	if (!__dev_get_by_name(net, buf))
 823		return i;
 824
 825	/* It is possible to run out of possible slots
 826	 * when the name is long and there isn't enough space left
 827	 * for the digits, or if all bits are used.
 828	 */
 829	return -ENFILE;
 830}
 831
 832/**
 833 *	dev_alloc_name - allocate a name for a device
 834 *	@dev: device
 835 *	@name: name format string
 836 *
 837 *	Passed a format string - eg "lt%d" it will try and find a suitable
 838 *	id. It scans list of devices to build up a free map, then chooses
 839 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 840 *	while allocating the name and adding the device in order to avoid
 841 *	duplicates.
 842 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 843 *	Returns the number of the unit assigned or a negative errno code.
 844 */
 845
 846int dev_alloc_name(struct net_device *dev, const char *name)
 847{
 848	char buf[IFNAMSIZ];
 849	struct net *net;
 850	int ret;
 851
 852	BUG_ON(!dev->nd_net);
 853	net = dev->nd_net;
 854	ret = __dev_alloc_name(net, name, buf);
 855	if (ret >= 0)
 856		strlcpy(dev->name, buf, IFNAMSIZ);
 857	return ret;
 858}
 859
 860
 861/**
 862 *	dev_change_name - change name of a device
 863 *	@dev: device
 864 *	@newname: name (or format string) must be at least IFNAMSIZ
 865 *
 866 *	Change name of a device, can pass format strings "eth%d".
 867 *	for wildcarding.
 868 */
 869int dev_change_name(struct net_device *dev, char *newname)
 870{
 871	char oldname[IFNAMSIZ];
 872	int err = 0;
 873	int ret;
 874	struct net *net;
 875
 876	ASSERT_RTNL();
 877	BUG_ON(!dev->nd_net);
 878
 879	net = dev->nd_net;
 880	if (dev->flags & IFF_UP)
 881		return -EBUSY;
 882
 883	if (!dev_valid_name(newname))
 884		return -EINVAL;
 885
 886	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 887		return 0;
 888
 889	memcpy(oldname, dev->name, IFNAMSIZ);
 890
 891	if (strchr(newname, '%')) {
 892		err = dev_alloc_name(dev, newname);
 893		if (err < 0)
 894			return err;
 895		strcpy(newname, dev->name);
 896	}
 897	else if (__dev_get_by_name(net, newname))
 898		return -EEXIST;
 899	else
 900		strlcpy(dev->name, newname, IFNAMSIZ);
 901
 902rollback:
 903	device_rename(&dev->dev, dev->name);
 904
 905	write_lock_bh(&dev_base_lock);
 906	hlist_del(&dev->name_hlist);
 907	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 908	write_unlock_bh(&dev_base_lock);
 909
 910	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 911	ret = notifier_to_errno(ret);
 912
 913	if (ret) {
 914		if (err) {
 915			printk(KERN_ERR
 916			       "%s: name change rollback failed: %d.\n",
 917			       dev->name, ret);
 918		} else {
 919			err = ret;
 920			memcpy(dev->name, oldname, IFNAMSIZ);
 921			goto rollback;
 922		}
 923	}
 924
 925	return err;
 926}
 927
 928/**
 929 *	netdev_features_change - device changes features
 930 *	@dev: device to cause notification
 931 *
 932 *	Called to indicate a device has changed features.
 933 */
 934void netdev_features_change(struct net_device *dev)
 935{
 936	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 937}
 938EXPORT_SYMBOL(netdev_features_change);
 939
 940/**
 941 *	netdev_state_change - device changes state
 942 *	@dev: device to cause notification
 943 *
 944 *	Called to indicate a device has changed state. This function calls
 945 *	the notifier chains for netdev_chain and sends a NEWLINK message
 946 *	to the routing socket.
 947 */
 948void netdev_state_change(struct net_device *dev)
 949{
 950	if (dev->flags & IFF_UP) {
 951		call_netdevice_notifiers(NETDEV_CHANGE, dev);
 952		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 953	}
 954}
 955
 956/**
 957 *	dev_load 	- load a network module
 958 *	@net: the applicable net namespace
 959 *	@name: name of interface
 960 *
 961 *	If a network interface is not present and the process has suitable
 962 *	privileges this function loads the module. If module loading is not
 963 *	available in this kernel then it becomes a nop.
 964 */
 965
 966void dev_load(struct net *net, const char *name)
 967{
 968	struct net_device *dev;
 969
 970	read_lock(&dev_base_lock);
 971	dev = __dev_get_by_name(net, name);
 972	read_unlock(&dev_base_lock);
 973
 974	if (!dev && capable(CAP_SYS_MODULE))
 975		request_module("%s", name);
 976}
 977
 978/**
 979 *	dev_open	- prepare an interface for use.
 980 *	@dev:	device to open
 981 *
 982 *	Takes a device from down to up state. The device's private open
 983 *	function is invoked and then the multicast lists are loaded. Finally
 984 *	the device is moved into the up state and a %NETDEV_UP message is
 985 *	sent to the netdev notifier chain.
 986 *
 987 *	Calling this function on an active interface is a nop. On a failure
 988 *	a negative errno code is returned.
 989 */
 990int dev_open(struct net_device *dev)
 991{
 992	int ret = 0;
 993
 994	/*
 995	 *	Is it already up?
 996	 */
 997
 998	if (dev->flags & IFF_UP)
 999		return 0;
1000
1001	/*
1002	 *	Is it even present?
1003	 */
1004	if (!netif_device_present(dev))
1005		return -ENODEV;
1006
1007	/*
1008	 *	Call device private open method
1009	 */
1010	set_bit(__LINK_STATE_START, &dev->state);
1011
1012	if (dev->validate_addr)
1013		ret = dev->validate_addr(dev);
1014
1015	if (!ret && dev->open)
1016		ret = dev->open(dev);
1017
1018	/*
1019	 *	If it went open OK then:
1020	 */
1021
1022	if (ret)
1023		clear_bit(__LINK_STATE_START, &dev->state);
1024	else {
1025		/*
1026		 *	Set the flags.
1027		 */
1028		dev->flags |= IFF_UP;
1029
1030		/*
1031		 *	Initialize multicasting status
1032		 */
1033		dev_set_rx_mode(dev);
1034
1035		/*
1036		 *	Wakeup transmit queue engine
1037		 */
1038		dev_activate(dev);
1039
1040		/*
1041		 *	... and announce new interface.
1042		 */
1043		call_netdevice_notifiers(NETDEV_UP, dev);
1044	}
1045
1046	return ret;
1047}
1048
1049/**
1050 *	dev_close - shutdown an interface.
1051 *	@dev: device to shutdown
1052 *
1053 *	This function moves an active device into down state. A
1054 *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1055 *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1056 *	chain.
1057 */
1058int dev_close(struct net_device *dev)
1059{
1060	might_sleep();
1061
1062	if (!(dev->flags & IFF_UP))
1063		return 0;
1064
1065	/*
1066	 *	Tell people we are going down, so that they can
1067	 *	prepare to death, when device is still operating.
1068	 */
1069	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1070
1071	dev_deactivate(dev);
1072
1073	clear_bit(__LINK_STATE_START, &dev->state);
1074
1075	/* Synchronize to scheduled poll. We cannot touch poll list,
1076	 * it can be even on different cpu. So just clear netif_running().
1077	 *
1078	 * dev->stop() will invoke napi_disable() on all of it's
1079	 * napi_struct instances on this device.
1080	 */
1081	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1082
1083	/*
1084	 *	Call the device specific close. This cannot fail.
1085	 *	Only if device is UP
1086	 *
1087	 *	We allow it to be called even after a DETACH hot-plug
1088	 *	event.
1089	 */
1090	if (dev->stop)
1091		dev->stop(dev);
1092
1093	/*
1094	 *	Device is now down.
1095	 */
1096
1097	dev->flags &= ~IFF_UP;
1098
1099	/*
1100	 * Tell people we are down
1101	 */
1102	call_netdevice_notifiers(NETDEV_DOWN, dev);
1103
1104	return 0;
1105}
1106
1107
1108static int dev_boot_phase = 1;
1109
1110/*
1111 *	Device change register/unregister. These are not inline or static
1112 *	as we export them to the world.
1113 */
1114
1115/**
1116 *	register_netdevice_notifier - register a network notifier block
1117 *	@nb: notifier
1118 *
1119 *	Register a notifier to be called when network device events occur.
1120 *	The notifier passed is linked into the kernel structures and must
1121 *	not be reused until it has been unregistered. A negative errno code
1122 *	is returned on a failure.
1123 *
1124 * 	When registered all registration and up events are replayed
1125 *	to the new notifier to allow device to have a race free
1126 *	view of the network device list.
1127 */
1128
1129int register_netdevice_notifier(struct notifier_block *nb)
1130{
1131	struct net_device *dev;
1132	struct net_device *last;
1133	struct net *net;
1134	int err;
1135
1136	rtnl_lock();
1137	err = raw_notifier_chain_register(&netdev_chain, nb);
1138	if (err)
1139		goto unlock;
1140	if (dev_boot_phase)
1141		goto unlock;
1142	for_each_net(net) {
1143		for_each_netdev(net, dev) {
1144			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1145			err = notifier_to_errno(err);
1146			if (err)
1147				goto rollback;
1148
1149			if (!(dev->flags & IFF_UP))
1150				continue;
1151
1152			nb->notifier_call(nb, NETDEV_UP, dev);
1153		}
1154	}
1155
1156unlock:
1157	rtnl_unlock();
1158	return err;
1159
1160rollback:
1161	last = dev;
1162	for_each_net(net) {
1163		for_each_netdev(net, dev) {
1164			if (dev == last)
1165				break;
1166
1167			if (dev->flags & IFF_UP) {
1168				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1169				nb->notifier_call(nb, NETDEV_DOWN, dev);
1170			}
1171			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1172		}
1173	}
1174
1175	raw_notifier_chain_unregister(&netdev_chain, nb);
1176	goto unlock;
1177}
1178
1179/**
1180 *	unregister_netdevice_notifier - unregister a network notifier block
1181 *	@nb: notifier
1182 *
1183 *	Unregister a notifier previously registered by
1184 *	register_netdevice_notifier(). The notifier is unlinked into the
1185 *	kernel structures and may then be reused. A negative errno code
1186 *	is returned on a failure.
1187 */
1188
1189int unregister_netdevice_notifier(struct notifier_block *nb)
1190{
1191	int err;
1192
1193	rtnl_lock();
1194	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1195	rtnl_unlock();
1196	return err;
1197}
1198
1199/**
1200 *	call_netdevice_notifiers - call all network notifier blocks
1201 *      @val: value passed unmodified to notifier function
1202 *      @dev: net_device pointer passed unmodified to notifier function
1203 *
1204 *	Call all network notifier blocks.  Parameters and return value
1205 *	are as for raw_notifier_call_chain().
1206 */
1207
1208int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1209{
1210	return raw_notifier_call_chain(&netdev_chain, val, dev);
1211}
1212
1213/* When > 0 there are consumers of rx skb time stamps */
1214static atomic_t netstamp_needed = ATOMIC_INIT(0);
1215
1216void net_enable_timestamp(void)
1217{
1218	atomic_inc(&netstamp_needed);
1219}
1220
1221void net_disable_timestamp(void)
1222{
1223	atomic_dec(&netstamp_needed);
1224}
1225
1226static inline void net_timestamp(struct sk_buff *skb)
1227{
1228	if (atomic_read(&netstamp_needed))
1229		__net_timestamp(skb);
1230	else
1231		skb->tstamp.tv64 = 0;
1232}
1233
1234/*
1235 *	Support routine. Sends outgoing frames to any network
1236 *	taps currently in use.
1237 */
1238
1239static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1240{
1241	struct packet_type *ptype;
1242
1243	net_timestamp(skb);
1244
1245	rcu_read_lock();
1246	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1247		/* Never send packets back to the socket
1248		 * they originated from - MvS (miquels@drinkel.ow.org)
1249		 */
1250		if ((ptype->dev == dev || !ptype->dev) &&
1251		    (ptype->af_packet_priv == NULL ||
1252		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1253			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1254			if (!skb2)
1255				break;
1256
1257			/* skb->nh should be correctly
1258			   set by sender, so that the second statement is
1259			   just protection against buggy protocols.
1260			 */
1261			skb_reset_mac_header(skb2);
1262
1263			if (skb_network_header(skb2) < skb2->data ||
1264			    skb2->network_header > skb2->tail) {
1265				if (net_ratelimit())
1266					printk(KERN_CRIT "protocol %04x is "
1267					       "buggy, dev %s\n",
1268					       skb2->protocol, dev->name);
1269				skb_reset_network_header(skb2);
1270			}
1271
1272			skb2->transport_header = skb2->network_header;
1273			skb2->pkt_type = PACKET_OUTGOING;
1274			ptype->func(skb2, skb->dev, ptype, skb->dev);
1275		}
1276	}
1277	rcu_read_unlock();
1278}
1279
1280
1281void __netif_schedule(struct net_device *dev)
1282{
1283	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1284		unsigned long flags;
1285		struct softnet_data *sd;
1286
1287		local_irq_save(flags);
1288		sd = &__get_cpu_var(softnet_data);
1289		dev->next_sched = sd->output_queue;
1290		sd->output_queue = dev;
1291		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1292		local_irq_restore(flags);
1293	}
1294}
1295EXPORT_SYMBOL(__netif_schedule);
1296
1297void dev_kfree_skb_irq(struct sk_buff *skb)
1298{
1299	if (atomic_dec_and_test(&skb->users)) {
1300		struct softnet_data *sd;
1301		unsigned long flags;
1302
1303		local_irq_save(flags);
1304		sd = &__get_cpu_var(softnet_data);
1305		skb->next = sd->completion_queue;
1306		sd->completion_queue = skb;
1307		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1308		local_irq_restore(flags);
1309	}
1310}
1311EXPORT_SYMBOL(dev_kfree_skb_irq);
1312
1313void dev_kfree_skb_any(struct sk_buff *skb)
1314{
1315	if (in_irq() || irqs_disabled())
1316		dev_kfree_skb_irq(skb);
1317	else
1318		dev_kfree_skb(skb);
1319}
1320EXPORT_SYMBOL(dev_kfree_skb_any);
1321
1322
1323/**
1324 * netif_device_detach - mark device as removed
1325 * @dev: network device
1326 *
1327 * Mark device as removed from system and therefore no longer available.
1328 */
1329void netif_device_detach(struct net_device *dev)
1330{
1331	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1332	    netif_running(dev)) {
1333		netif_stop_queue(dev);
1334	}
1335}
1336EXPORT_SYMBOL(netif_device_detach);
1337
1338/**
1339 * netif_device_attach - mark device as attached
1340 * @dev: network device
1341 *
1342 * Mark device as attached from system and restart if needed.
1343 */
1344void netif_device_attach(struct net_device *dev)
1345{
1346	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1347	    netif_running(dev)) {
1348		netif_wake_queue(dev);
1349		__netdev_watchdog_up(dev);
1350	}
1351}
1352EXPORT_SYMBOL(netif_device_attach);
1353
1354
1355/*
1356 * Invalidate hardware checksum when packet is to be mangled, and
1357 * complete checksum manually on outgoing path.
1358 */
1359int skb_checksum_help(struct sk_buff *skb)
1360{
1361	__wsum csum;
1362	int ret = 0, offset;
1363
1364	if (skb->ip_summed == CHECKSUM_COMPLETE)
1365		goto out_set_summed;
1366
1367	if (unlikely(skb_shinfo(skb)->gso_size)) {
1368		/* Let GSO fix up the checksum. */
1369		goto out_set_summed;
1370	}
1371
1372	offset = skb->csum_start - skb_headroom(skb);
1373	BUG_ON(offset >= skb_headlen(skb));
1374	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1375
1376	offset += skb->csum_offset;
1377	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1378
1379	if (skb_cloned(skb) &&
1380	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1381		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1382		if (ret)
1383			goto out;
1384	}
1385
1386	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1387out_set_summed:
1388	skb->ip_summed = CHECKSUM_NONE;
1389out:
1390	return ret;
1391}
1392
1393/**
1394 *	skb_gso_segment - Perform segmentation on skb.
1395 *	@skb: buffer to segment
1396 *	@features: features for the output path (see dev->features)
1397 *
1398 *	This function segments the given skb and returns a list of segments.
1399 *
1400 *	It may return NULL if the skb requires no segmentation.  This is
1401 *	only possible when GSO is used for verifying header integrity.
1402 */
1403struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1404{
1405	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1406	struct packet_type *ptype;
1407	__be16 type = skb->protocol;
1408	int err;
1409
1410	BUG_ON(skb_shinfo(skb)->frag_list);
1411
1412	skb_reset_mac_header(skb);
1413	skb->mac_len = skb->network_header - skb->mac_header;
1414	__skb_pull(skb, skb->mac_len);
1415
1416	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1417		if (skb_header_cloned(skb) &&
1418		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1419			return ERR_PTR(err);
1420	}
1421
1422	rcu_read_lock();
1423	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1424		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1425			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1426				err = ptype->gso_send_check(skb);
1427				segs = ERR_PTR(err);
1428				if (err || skb_gso_ok(skb, features))
1429					break;
1430				__skb_push(skb, (skb->data -
1431						 skb_network_header(skb)));
1432			}
1433			segs = ptype->gso_segment(skb, features);
1434			break;
1435		}
1436	}
1437	rcu_read_unlock();
1438
1439	__skb_push(skb, skb->data - skb_mac_header(skb));
1440
1441	return segs;
1442}
1443
1444EXPORT_SYMBOL(skb_gso_segment);
1445
1446/* Take action when hardware reception checksum errors are detected. */
1447#ifdef CONFIG_BUG
1448void netdev_rx_csum_fault(struct net_device *dev)
1449{
1450	if (net_ratelimit()) {
1451		printk(KERN_ERR "%s: hw csum failure.\n",
1452			dev ? dev->name : "<unknown>");
1453		dump_stack();
1454	}
1455}
1456EXPORT_SYMBOL(netdev_rx_csum_fault);
1457#endif
1458
1459/* Actually, we should eliminate this check as soon as we know, that:
1460 * 1. IOMMU is present and allows to map all the memory.
1461 * 2. No high memory really exists on this machine.
1462 */
1463
1464static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1465{
1466#ifdef CONFIG_HIGHMEM
1467	int i;
1468
1469	if (dev->features & NETIF_F_HIGHDMA)
1470		return 0;
1471
1472	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1473		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1474			return 1;
1475
1476#endif
1477	return 0;
1478}
1479
1480struct dev_gso_cb {
1481	void (*destructor)(struct sk_buff *skb);
1482};
1483
1484#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1485
1486static void dev_gso_skb_destructor(struct sk_buff *skb)
1487{
1488	struct dev_gso_cb *cb;
1489
1490	do {
1491		struct sk_buff *nskb = skb->next;
1492
1493		skb->next = nskb->next;
1494		nskb->next = NULL;
1495		kfree_skb(nskb);
1496	} while (skb->next);
1497
1498	cb = DEV_GSO_CB(skb);
1499	if (cb->destructor)
1500		cb->destructor(skb);
1501}
1502
1503/**
1504 *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1505 *	@skb: buffer to segment
1506 *
1507 *	This function segments the given skb and stores the list of segments
1508 *	in skb->next.
1509 */
1510static int dev_gso_segment(struct sk_buff *skb)
1511{
1512	struct net_device *dev = skb->dev;
1513	struct sk_buff *segs;
1514	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1515					 NETIF_F_SG : 0);
1516
1517	segs = skb_gso_segment(skb, features);
1518
1519	/* Verifying header integrity only. */
1520	if (!segs)
1521		return 0;
1522
1523	if (unlikely(IS_ERR(segs)))
1524		return PTR_ERR(segs);
1525
1526	skb->next = segs;
1527	DEV_GSO_CB(skb)->destructor = skb->destructor;
1528	skb->destructor = dev_gso_skb_destructor;
1529
1530	return 0;
1531}
1532
1533int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1534{
1535	if (likely(!skb->next)) {
1536		if (!list_empty(&ptype_all))
1537			dev_queue_xmit_nit(skb, dev);
1538
1539		if (netif_needs_gso(dev, skb)) {
1540			if (unlikely(dev_gso_segment(skb)))
1541				goto out_kfree_skb;
1542			if (skb->next)
1543				goto gso;
1544		}
1545
1546		return dev->hard_start_xmit(skb, dev);
1547	}
1548
1549gso:
1550	do {
1551		struct sk_buff *nskb = skb->next;
1552		int rc;
1553
1554		skb->next = nskb->next;
1555		nskb->next = NULL;
1556		rc = dev->hard_start_xmit(nskb, dev);
1557		if (unlikely(rc)) {
1558			nskb->next = skb->next;
1559			skb->next = nskb;
1560			return rc;
1561		}
1562		if (unlikely((netif_queue_stopped(dev) ||
1563			     netif_subqueue_stopped(dev, skb)) &&
1564			     skb->next))
1565			return NETDEV_TX_BUSY;
1566	} while (skb->next);
1567
1568	skb->destructor = DEV_GSO_CB(skb)->destructor;
1569
1570out_kfree_skb:
1571	kfree_skb(skb);
1572	return 0;
1573}
1574
1575/**
1576 *	dev_queue_xmit - transmit a buffer
1577 *	@skb: buffer to transmit
1578 *
1579 *	Queue a buffer for transmission to a network device. The caller must
1580 *	have set the device and priority and built the buffer before calling
1581 *	this function. The function can be called from an interrupt.
1582 *
1583 *	A negative errno code is returned on a failure. A success does not
1584 *	guarantee the frame will be transmitted as it may be dropped due
1585 *	to congestion or traffic shaping.
1586 *
1587 * -----------------------------------------------------------------------------------
1588 *      I notice this method can also return errors from the queue disciplines,
1589 *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1590 *      be positive.
1591 *
1592 *      Regardless of the return value, the skb is consumed, so it is currently
1593 *      difficult to retry a send to this method.  (You can bump the ref count
1594 *      before sending to hold a reference for retry if you are careful.)
1595 *
1596 *      When calling this method, interrupts MUST be enabled.  This is because
1597 *      the BH enable code must have IRQs enabled so that it will not deadlock.
1598 *          --BLG
1599 */
1600
1601int dev_queue_xmit(struct sk_buff *skb)
1602{
1603	struct net_device *dev = skb->dev;
1604	struct Qdisc *q;
1605	int rc = -ENOMEM;
1606
1607	/* GSO will handle the following emulations directly. */
1608	if (netif_needs_gso(dev, skb))
1609		goto gso;
1610
1611	if (skb_shinfo(skb)->frag_list &&
1612	    !(dev->features & NETIF_F_FRAGLIST) &&
1613	    __skb_linearize(skb))
1614		goto out_kfree_skb;
1615
1616	/* Fragmented skb is linearized if device does not support SG,
1617	 * or if at least one of fragments is in highmem and device
1618	 * does not support DMA from it.
1619	 */
1620	if (skb_shinfo(skb)->nr_frags &&
1621	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1622	    __skb_linearize(skb))
1623		goto out_kfree_skb;
1624
1625	/* If packet is not checksummed and device does not support
1626	 * checksumming for this protocol, complete checksumming here.
1627	 */
1628	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1629		skb_set_transport_header(skb, skb->csum_start -
1630					      skb_headroom(skb));
1631
1632		if (!(dev->features & NETIF_F_GEN_CSUM) &&
1633		    !((dev->features & NETIF_F_IP_CSUM) &&
1634		      skb->protocol == htons(ETH_P_IP)) &&
1635		    !((dev->features & NETIF_F_IPV6_CSUM) &&
1636		      skb->protocol == htons(ETH_P_IPV6)))
1637			if (skb_checksum_help(skb))
1638				goto out_kfree_skb;
1639	}
1640
1641gso:
1642	spin_lock_prefetch(&dev->queue_lock);
1643
1644	/* Disable soft irqs for various locks below. Also
1645	 * stops preemption for RCU.
1646	 */
1647	rcu_read_lock_bh();
1648
1649	/* Updates of qdisc are serialized by queue_lock.
1650	 * The struct Qdisc which is pointed to by qdisc is now a
1651	 * rcu structure - it may be accessed without acquiring
1652	 * a lock (but the structure may be stale.) The freeing of the
1653	 * qdisc will be deferred until it's known that there are no
1654	 * more references to it.
1655	 *
1656	 * If the qdisc has an enqueue function, we still need to
1657	 * hold the queue_lock before calling it, since queue_lock
1658	 * also serializes access to the device queue.
1659	 */
1660
1661	q = rcu_dereference(dev->qdisc);
1662#ifdef CONFIG_NET_CLS_ACT
1663	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1664#endif
1665	if (q->enqueue) {
1666		/* Grab device queue */
1667		spin_lock(&dev->queue_lock);
1668		q = dev->qdisc;
1669		if (q->enqueue) {
1670			/* reset queue_mapping to zero */
1671			skb_set_queue_mapping(skb, 0);
1672			rc = q->enqueue(skb, q);
1673			qdisc_run(dev);
1674			spin_unlock(&dev->queue_lock);
1675
1676			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1677			goto out;
1678		}
1679		spin_unlock(&dev->queue_lock);
1680	}
1681
1682	/* The device has no queue. Common case for software devices:
1683	   loopback, all the sorts of tunnels...
1684
1685	   Really, it is unlikely that netif_tx_lock protection is necessary
1686	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1687	   counters.)
1688	   However, it is possible, that they rely on protection
1689	   made by us here.
1690
1691	   Check this and shot the lock. It is not prone from deadlocks.
1692	   Either shot noqueue qdisc, it is even simpler 8)
1693	 */
1694	if (dev->flags & IFF_UP) {
1695		int cpu = smp_processor_id(); /* ok because BHs are off */
1696
1697		if (dev->xmit_lock_owner != cpu) {
1698
1699			HARD_TX_LOCK(dev, cpu);
1700
1701			if (!netif_queue_stopped(dev) &&
1702			    !netif_subqueue_stopped(dev, skb)) {
1703				rc = 0;
1704				if (!dev_hard_start_xmit(skb, dev)) {
1705					HARD_TX_UNLOCK(dev);
1706					goto out;
1707				}
1708			}
1709			HARD_TX_UNLOCK(dev);
1710			if (net_ratelimit())
1711				printk(KERN_CRIT "Virtual device %s asks to "
1712				       "queue packet!\n", dev->name);
1713		} else {
1714			/* Recursion is detected! It is possible,
1715			 * unfortunately */
1716			if (net_ratelimit())
1717				printk(KERN_CRIT "Dead loop on virtual device "
1718				       "%s, fix it urgently!\n", dev->name);
1719		}
1720	}
1721
1722	rc = -ENETDOWN;
1723	rcu_read_unlock_bh();
1724
1725out_kfree_skb:
1726	kfree_skb(skb);
1727	return rc;
1728out:
1729	rcu_read_unlock_bh();
1730	return rc;
1731}
1732
1733
1734/*=======================================================================
1735			Receiver routines
1736  =======================================================================*/
1737
1738int netdev_max_backlog __read_mostly = 1000;
1739int netdev_budget __read_mostly = 300;
1740int weight_p __read_mostly = 64;            /* old backlog weight */
1741
1742DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1743
1744
1745/**
1746 *	netif_rx	-	post buffer to the network code
1747 *	@skb: buffer to post
1748 *
1749 *	This function receives a packet from a device driver and queues it for
1750 *	the upper (protocol) levels to process.  It always succeeds. The buffer
1751 *	may be dropped during processing for congestion control or by the
1752 *	protocol layers.
1753 *
1754 *	return values:
1755 *	NET_RX_SUCCESS	(no congestion)
1756 *	NET_RX_DROP     (packet was dropped)
1757 *
1758 */
1759
1760int netif_rx(struct sk_buff *skb)
1761{
1762	struct softnet_data *queue;
1763	unsigned long flags;
1764
1765	/* if netpoll wants it, pretend we never saw it */
1766	if (netpoll_rx(skb))
1767		return NET_RX_DROP;
1768
1769	if (!skb->tstamp.tv64)
1770		net_timestamp(skb);
1771
1772	/*
1773	 * The code is rearranged so that the path is the most
1774	 * short when CPU is congested, but is still operating.
1775	 */
1776	local_irq_save(flags);
1777	queue = &__get_cpu_var(softnet_data);
1778
1779	__get_cpu_var(netdev_rx_stat).total++;
1780	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1781		if (queue->input_pkt_queue.qlen) {
1782enqueue:
1783			dev_hold(skb->dev);
1784			__skb_queue_tail(&queue->input_pkt_queue, skb);
1785			local_irq_restore(flags);
1786			return NET_RX_SUCCESS;
1787		}
1788
1789		napi_schedule(&queue->backlog);
1790		goto enqueue;
1791	}
1792
1793	__get_cpu_var(netdev_rx_stat).dropped++;
1794	local_irq_restore(flags);
1795
1796	kfree_skb(skb);
1797	return NET_RX_DROP;
1798}
1799
1800int netif_rx_ni(struct sk_buff *skb)
1801{
1802	int err;
1803
1804	preempt_disable();
1805	err = netif_rx(skb);
1806	if (local_softirq_pending())
1807		do_softirq();
1808	preempt_enable();
1809
1810	return err;
1811}
1812
1813EXPORT_SYMBOL(netif_rx_ni);
1814
1815static inline struct net_device *skb_bond(struct sk_buff *skb)
1816{
1817	struct net_device *dev = skb->dev;
1818
1819	if (dev->master) {
1820		if (skb_bond_should_drop(skb)) {
1821			kfree_skb(skb);
1822			return NULL;
1823		}
1824		skb->dev = dev->master;
1825	}
1826
1827	return dev;
1828}
1829
1830
1831static void net_tx_action(struct softirq_action *h)
1832{
1833	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1834
1835	if (sd->completion_queue) {
1836		struct sk_buff *clist;
1837
1838		local_irq_disable();
1839		clist = sd->completion_queue;
1840		sd->completion_queue = NULL;
1841		local_irq_enable();
1842
1843		while (clist) {
1844			struct sk_buff *skb = clist;
1845			clist = clist->next;
1846
1847			BUG_TRAP(!atomic_read(&skb->users));
1848			__kfree_skb(skb);
1849		}
1850	}
1851
1852	if (sd->output_queue) {
1853		struct net_device *head;
1854
1855		local_irq_disable();
1856		head = sd->output_queue;
1857		sd->output_queue = NULL;
1858		local_irq_enable();
1859
1860		while (head) {
1861			struct net_device *dev = head;
1862			head = head->next_sched;
1863
1864			smp_mb__before_clear_bit();
1865			clear_bit(__LINK_STATE_SCHED, &dev->state);
1866
1867			if (spin_trylock(&dev->queue_lock)) {
1868				qdisc_run(dev);
1869				spin_unlock(&dev->queue_lock);
1870			} else {
1871				netif_schedule(dev);
1872			}
1873		}
1874	}
1875}
1876
1877static inline int deliver_skb(struct sk_buff *skb,
1878			      struct packet_type *pt_prev,
1879			      struct net_device *orig_dev)
1880{
1881	atomic_inc(&skb->users);
1882	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1883}
1884
1885#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1886/* These hooks defined here for ATM */
1887struct net_bridge;
1888struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1889						unsigned char *addr);
1890void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1891
1892/*
1893 * If bridge module is loaded call bridging hook.
1894 *  returns NULL if packet was consumed.
1895 */
1896struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1897					struct sk_buff *skb) __read_mostly;
1898static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1899					    struct packet_type **pt_prev, int *ret,
1900					    struct net_device *orig_dev)
1901{
1902	struct net_bridge_port *port;
1903
1904	if (skb->pkt_type == PACKET_LOOPBACK ||
1905	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
1906		return skb;
1907
1908	if (*pt_prev) {
1909		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1910		*pt_prev = NULL;
1911	}
1912
1913	return br_handle_frame_hook(port, skb);
1914}
1915#else
1916#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
1917#endif
1918
1919#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1920struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1921EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1922
1923static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1924					     struct packet_type **pt_prev,
1925					     int *ret,
1926					     struct net_device *orig_dev)
1927{
1928	if (skb->dev->macvlan_port == NULL)
1929		return skb;
1930
1931	if (*pt_prev) {
1932		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1933		*pt_prev = NULL;
1934	}
1935	return macvlan_handle_frame_hook(skb);
1936}
1937#else
1938#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
1939#endif
1940
1941#ifdef CONFIG_NET_CLS_ACT
1942/* TODO: Maybe we should just force sch_ingress to be compiled in
1943 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1944 * a compare and 2 stores extra right now if we dont have it on
1945 * but have CONFIG_NET_CLS_ACT
1946 * NOTE: This doesnt stop any functionality; if you dont have
1947 * the ingress scheduler, you just cant add policies on ingress.
1948 *
1949 */
1950static int ing_filter(struct sk_buff *skb)
1951{
1952	struct Qdisc *q;
1953	struct net_device *dev = skb->dev;
1954	int result = TC_ACT_OK;
1955	u32 ttl = G_TC_RTTL(skb->tc_verd);
1956
1957	if (MAX_RED_LOOP < ttl++) {
1958		printk(KERN_WARNING
1959		       "Redir loop detected Dropping packet (%d->%d)\n",
1960		       skb->iif, dev->ifindex);
1961		return TC_ACT_SHOT;
1962	}
1963
1964	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1965	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1966
1967	spin_lock(&dev->ingress_lock);
1968	if ((q = dev->qdisc_ingress) != NULL)
1969		result = q->enqueue(skb, q);
1970	spin_unlock(&dev->ingress_lock);
1971
1972	return result;
1973}
1974
1975static inline struct sk_buff *handle_ing(struct sk_buff *skb,
1976					 struct packet_type **pt_prev,
1977					 int *ret, struct net_device *orig_dev)
1978{
1979	if (!skb->dev->qdisc_ingress)
1980		goto out;
1981
1982	if (*pt_prev) {
1983		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1984		*pt_prev = NULL;
1985	} else {
1986		/* Huh? Why does turning on AF_PACKET affect this? */
1987		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1988	}
1989
1990	switch (ing_filter(skb)) {
1991	case TC_ACT_SHOT:
1992	case TC_ACT_STOLEN:
1993		kfree_skb(skb);
1994		return NULL;
1995	}
1996
1997out:
1998	skb->tc_verd = 0;
1999	return skb;
2000}
2001#endif
2002
2003/**
2004 *	netif_receive_skb - process receive buffer from network
2005 *	@skb: buffer to process
2006 *
2007 *	netif_receive_skb() is the main receive data processing function.
2008 *	It always succeeds. The buffer may be dropped during processing
2009 *	for congestion control or by the protocol layers.
2010 *
2011 *	This function may only be called from softirq context and interrupts
2012 *	should be enabled.
2013 *
2014 *	Return values (usually ignored):
2015 *	NET_RX_SUCCESS: no congestion
2016 *	NET_RX_DROP: packet was dropped
2017 */
2018int netif_receive_skb(struct sk_buff *skb)
2019{
2020	struct packet_type *ptype, *pt_prev;
2021	struct net_device *orig_dev;
2022	int ret = NET_RX_DROP;
2023	__be16 type;
2024
2025	/* if we've gotten here through NAPI, check netpoll */
2026	if (netpoll_receive_skb(skb))
2027		return NET_RX_DROP;
2028
2029	if (!skb->tstamp.tv64)
2030		net_timestamp(skb);
2031
2032	if (!skb->iif)
2033		skb->iif = skb->dev->ifindex;
2034
2035	orig_dev = skb_bond(skb);
2036
2037	if (!orig_dev)
2038		return NET_RX_DROP;
2039
2040	__get_cpu_var(netdev_rx_stat).total++;
2041
2042	skb_reset_network_header(skb);
2043	skb_reset_transport_header(skb);
2044	skb->mac_len = skb->network_header - skb->mac_header;
2045
2046	pt_prev = NULL;
2047
2048	rcu_read_lock();
2049
2050#ifdef CONFIG_NET_CLS_ACT
2051	if (skb->tc_verd & TC_NCLS) {
2052		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2053		goto ncls;
2054	}
2055#endif
2056
2057	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2058		if (!ptype->dev || ptype->dev == skb->dev) {
2059			if (pt_prev)
2060				ret = deliver_skb(skb, pt_prev, orig_dev);
2061			pt_prev = ptype;
2062		}
2063	}
2064
2065#ifdef CONFIG_NET_CLS_ACT
2066	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2067	if (!skb)
2068		goto out;
2069ncls:
2070#endif
2071
2072	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2073	if (!skb)
2074		goto out;
2075	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2076	if (!skb)
2077		goto out;
2078
2079	type = skb->protocol;
2080	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2081		if (ptype->type == type &&
2082		    (!ptype->dev || ptype->dev == skb->dev)) {
2083			if (pt_prev)
2084				ret = deliver_skb(skb, pt_prev, orig_dev);
2085			pt_prev = ptype;
2086		}
2087	}
2088
2089	if (pt_prev) {
2090		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2091	} else {
2092		kfree_skb(skb);
2093		/* Jamal, now you will not able to escape explaining
2094		 * me how you were going to use this. :-)
2095		 */
2096		ret = NET_RX_DROP;
2097	}
2098
2099out:
2100	rcu_read_unlock();
2101	return ret;
2102}
2103
2104static int process_backlog(struct napi_struct *napi, int quota)
2105{
2106	int work = 0;
2107	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2108	unsigned long start_time = jiffies;
2109
2110	napi->weight = weight_p;
2111	do {
2112		struct sk_buff *skb;
2113		struct net_device *dev;
2114
2115		local_irq_disable();
2116		skb = __skb_dequeue(&queue->input_pkt_queue);
2117		if (!skb) {
2118			__napi_complete(napi);
2119			local_irq_enable();
2120			break;
2121		}
2122
2123		local_irq_enable();
2124
2125		dev = skb->dev;
2126
2127		netif_receive_skb(skb);
2128
2129		dev_put(dev);
2130	} while (++work < quota && jiffies == start_time);
2131
2132	return work;
2133}
2134
2135/**
2136 * __napi_schedule - schedule for receive
2137 * @n: entry to schedule
2138 *
2139 * The entry's receive function will be scheduled to run
2140 */
2141void fastcall __napi_schedule(struct napi_struct *n)
2142{
2143	unsigned long flags;
2144
2145	local_irq_save(flags);
2146	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2147	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2148	local_irq_restore(flags);
2149}
2150EXPORT_SYMBOL(__napi_schedule);
2151
2152
2153static void net_rx_action(struct softirq_action *h)
2154{
2155	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2156	unsigned long start_time = jiffies;
2157	int budget = netdev_budget;
2158	void *have;
2159
2160	local_irq_disable();
2161
2162	while (!list_empty(list)) {
2163		struct napi_struct *n;
2164		int work, weight;
2165
2166		/* If softirq window is exhuasted then punt.
2167		 *
2168		 * Note that this is a slight policy change from the
2169		 * previous NAPI code, which would allow up to 2
2170		 * jiffies to pass before breaking out.  The test
2171		 * used to be "jiffies - start_time > 1".
2172		 */
2173		if (unlikely(budget <= 0 || jiffies != start_time))
2174			goto softnet_break;
2175
2176		local_irq_enable();
2177
2178		/* Even though interrupts have been re-enabled, this
2179		 * access is safe because interrupts can only add new
2180		 * entries to the tail of this list, and only ->poll()
2181		 * calls can remove this head entry from the list.
2182		 */
2183		n = list_entry(list->next, struct napi_struct, poll_list);
2184
2185		have = netpoll_poll_lock(n);
2186
2187		weight = n->weight;
2188
2189		/* This NAPI_STATE_SCHED test is for avoiding a race
2190		 * with netpoll's poll_napi().  Only the entity which
2191		 * obtains the lock and sees NAPI_STATE_SCHED set will
2192		 * actually make the ->poll() call.  Therefore we avoid
2193		 * accidently calling ->poll() when NAPI is not scheduled.
2194		 */
2195		work = 0;
2196		if (test_bit(NAPI_STATE_SCHED, &n->state))
2197			work = n->poll(n, weight);
2198
2199		WARN_ON_ONCE(work > weight);
2200
2201		budget -= work;
2202
2203		local_irq_disable();
2204
2205		/* Drivers must not modify the NAPI state if they
2206		 * consume the entire weight.  In such cases this code
2207		 * still "owns" the NAPI instance and therefore can
2208		 * move the instance around on the list at-will.
2209		 */
2210		if (unlikely(work == weight)) {
2211			if (unlikely(napi_disable_pending(n)))
2212				__napi_complete(n);
2213			else
2214				list_move_tail(&n->poll_list, list);
2215		}
2216
2217		netpoll_poll_unlock(have);
2218	}
2219out:
2220	local_irq_enable();
2221
2222#ifdef CONFIG_NET_DMA
2223	/*
2224	 * There may not be any more sk_buffs coming right now, so push
2225	 * any pending DMA copies to hardware
2226	 */
2227	if (!cpus_empty(net_dma.channel_mask)) {
2228		int chan_idx;
2229		for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2230			struct dma_chan *chan = net_dma.channels[chan_idx];
2231			if (chan)
2232				dma_async_memcpy_issue_pending(chan);
2233		}
2234	}
2235#endif
2236
2237	return;
2238
2239softnet_break:
2240	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2241	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2242	goto out;
2243}
2244
2245static gifconf_func_t * gifconf_list [NPROTO];
2246
2247/**
2248 *	register_gifconf	-	register a SIOCGIF handler
2249 *	@family: Address family
2250 *	@gifconf: Function handler
2251 *
2252 *	Register protocol dependent address dumping routines. The handler
2253 *	that is passed must not be freed or reused until it has been replaced
2254 *	by another handler.
2255 */
2256int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2257{
2258	if (family >= NPROTO)
2259		return -EINVAL;
2260	gifconf_list[family] = gifconf;
2261	return 0;
2262}
2263
2264
2265/*
2266 *	Map an interface index to its name (SIOCGIFNAME)
2267 */
2268
2269/*
2270 *	We need this ioctl for efficient implementation of the
2271 *	if_indextoname() function required by the IPv6 API.  Without
2272 *	it, we would have to search all the interfaces to find a
2273 *	match.  --pb
2274 */
2275
2276static int dev_ifname(struct net *net, struct ifreq __user *arg)
2277{
2278	struct net_device *dev;
2279	struct ifreq ifr;
2280
2281	/*
2282	 *	Fetch the caller's info block.
2283	 */
2284
2285	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2286		return -EFAULT;
2287
2288	read_lock(&dev_base_lock);
2289	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2290	if (!dev) {
2291		read_unlock(&dev_base_lock);
2292		return -ENODEV;
2293	}
2294
2295	strcpy(ifr.ifr_name, dev->name);
2296	read_unlock(&dev_base_lock);
2297
2298	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2299		return -EFAULT;
2300	return 0;
2301}
2302
2303/*
2304 *	Perform a SIOCGIFCONF call. This structure will change
2305 *	size eventually, and there is nothing I can do about it.
2306 *	Thus we will need a 'compatibility mode'.
2307 */
2308
2309static int dev_ifconf(struct net *net, char __user *arg)
2310{
2311	struct ifconf ifc;
2312	struct net_device *dev;
2313	char __user *pos;
2314	int len;
2315	int total;
2316	int i;
2317
2318	/*
2319	 *	Fetch the caller's info block.
2320	 */
2321
2322	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2323		return -EFAULT;
2324
2325	pos = ifc.ifc_buf;
2326	len = ifc.ifc_len;
2327
2328	/*
2329	 *	Loop over the interfaces, and write an info block for each.
2330	 */
2331
2332	total = 0;
2333	for_each_netdev(net, dev) {
2334		for (i = 0; i < NPROTO; i++) {
2335			if (gifconf_list[i]) {
2336				int done;
2337				if (!pos)
2338					done = gifconf_list[i](dev, NULL, 0);
2339				else
2340					done = gifconf_list[i](dev, pos + total,
2341							       len - total);
2342				if (done < 0)
2343					return -EFAULT;
2344				total += done;
2345			}
2346		}
2347	}
2348
2349	/*
2350	 *	All done.  Write the updated control block back to the caller.
2351	 */
2352	ifc.ifc_len = total;
2353
2354	/*
2355	 * 	Both BSD and Solaris return 0 here, so we do too.
2356	 */
2357	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2358}
2359
2360#ifdef CONFIG_PROC_FS
2361/*
2362 *	This is invoked by the /proc filesystem handler to display a device
2363 *	in detail.
2364 */
2365void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2366{
2367	struct net *net = seq->private;
2368	loff_t off;
2369	struct net_device *dev;
2370
2371	read_lock(&dev_base_lock);
2372	if (!*pos)
2373		return SEQ_START_TOKEN;
2374
2375	off = 1;
2376	for_each_netdev(net, dev)
2377		if (off++ == *pos)
2378			return dev;
2379
2380	return NULL;
2381}
2382
2383void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2384{
2385	struct net *net = seq->private;
2386	++*pos;
2387	return v == SEQ_START_TOKEN ?
2388		first_net_device(net) : next_net_device((struct net_device *)v);
2389}
2390
2391void dev_seq_stop(struct seq_file *seq, void *v)
2392{
2393	read_unlock(&dev_base_lock);
2394}
2395
2396static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2397{
2398	struct net_device_stats *stats = dev->get_stats(dev);
2399
2400	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2401		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2402		   dev->name, stats->rx_bytes, stats->rx_packets,
2403		   stats->rx_errors,
2404		   stats->rx_dropped + stats->rx_missed_errors,
2405		   stats->rx_fifo_errors,
2406		   stats->rx_length_errors + stats->rx_over_errors +
2407		    stats->rx_crc_errors + stats->rx_frame_errors,
2408		   stats->rx_compressed, stats->multicast,
2409		   stats->tx_bytes, stats->tx_packets,
2410		   stats->tx_errors, stats->tx_dropped,
2411		   stats->tx_fifo_errors, stats->collisions,
2412		   stats->tx_carrier_errors +
2413		    stats->tx_aborted_errors +
2414		    stats->tx_window_errors +
2415		    stats->tx_heartbeat_errors,
2416		   stats->tx_compressed);
2417}
2418
2419/*
2420 *	Called from the PROCfs module. This now uses the new arbitrary sized
2421 *	/proc/net interface to create /proc/net/dev
2422 */
2423static int dev_seq_show(struct seq_file *seq, void *v)
2424{
2425	if (v == SEQ_START_TOKEN)
2426		seq_puts(seq, "Inter-|   Receive                            "
2427			      "                    |  Transmit\n"
2428			      " face |bytes    packets errs drop fifo frame "
2429			      "compressed multicast|bytes    packets errs "
2430			      "drop fifo colls carrier compressed\n");
2431	else
2432		dev_seq_printf_stats(seq, v);
2433	return 0;
2434}
2435
2436static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2437{
2438	struct netif_rx_stats *rc = NULL;
2439
2440	while (*pos < NR_CPUS)
2441		if (cpu_online(*pos)) {
2442			rc = &per_cpu(netdev_rx_stat, *pos);
2443			break;
2444		} else
2445			++*pos;
2446	return rc;
2447}
2448
2449static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2450{
2451	return softnet_get_online(pos);
2452}
2453
2454static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2455{
2456	++*pos;
2457	return softnet_get_online(pos);
2458}
2459
2460static void softnet_seq_stop(struct seq_file *seq, void *v)
2461{
2462}
2463
2464static int softnet_seq_show(struct seq_file *seq, void *v)
2465{
2466	struct netif_rx_stats *s = v;
2467
2468	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2469		   s->total, s->dropped, s->time_squeeze, 0,
2470		   0, 0, 0, 0, /* was fastroute */
2471		   s->cpu_collision );
2472	return 0;
2473}
2474
2475static const struct seq_operations dev_seq_ops = {
2476	.start = dev_seq_start,
2477	.next  = dev_seq_next,
2478	.stop  = dev_seq_stop,
2479	.show  = dev_seq_show,
2480};
2481
2482static int dev_seq_open(struct inode *inode, struct file *file)
2483{
2484	struct seq_file *seq;
2485	int res;
2486	res =  seq_open(file, &dev_seq_ops);
2487	if (!res) {
2488		seq = file->private_data;
2489		seq->private = get_proc_net(inode);
2490		if (!seq->private) {
2491			seq_release(inode, file);
2492			res = -ENXIO;
2493		}
2494	}
2495	return res;
2496}
2497
2498static int dev_seq_release(struct inode *inode, struct file *file)
2499{
2500	struct seq_file *seq = file->private_data;
2501	struct net *net = seq->private;
2502	put_net(net);
2503	return seq_release(inode, file);
2504}
2505
2506static const struct file_operations dev_seq_fops = {
2507	.owner	 = THIS_MODULE,
2508	.open    = dev_seq_open,
2509	.read    = seq_read,
2510	.llseek  = seq_lseek,
2511	.release = dev_seq_release,
2512};
2513
2514static const struct seq_operations softnet_seq_ops = {
2515	.start = softnet_seq_start,
2516	.next  = softnet_seq_next,
2517	.stop  = softnet_seq_stop,
2518	.show  = softnet_seq_show,
2519};
2520
2521static int softnet_seq_open(struct inode *inode, struct file *file)
2522{
2523	return seq_open(file, &softnet_seq_ops);
2524}
2525
2526static const struct file_operations softnet_seq_fops = {
2527	.owner	 = THIS_MODULE,
2528	.open    = softnet_seq_open,
2529	.read    = seq_read,
2530	.llseek  = seq_lseek,
2531	.release = seq_release,
2532};
2533
2534static void *ptype_get_idx(loff_t pos)
2535{
2536	struct packet_type *pt = NULL;
2537	loff_t i = 0;
2538	int t;
2539
2540	list_for_each_entry_rcu(pt, &ptype_all, list) {
2541		if (i == pos)
2542			return pt;
2543		++i;
2544	}
2545
2546	for (t = 0; t < 16; t++) {
2547		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2548			if (i == pos)
2549				return pt;
2550			++i;
2551		}
2552	}
2553	return NULL;
2554}
2555
2556static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2557{
2558	rcu_read_lock();
2559	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2560}
2561
2562static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563{
2564	struct packet_type *pt;
2565	struct list_head *nxt;
2566	int hash;
2567
2568	++*pos;
2569	if (v == SEQ_START_TOKEN)
2570		return ptype_get_idx(0);
2571
2572	pt = v;
2573	nxt = pt->list.next;
2574	if (pt->type == htons(ETH_P_ALL)) {
2575		if (nxt != &ptype_all)
2576			goto found;
2577		hash = 0;
2578		nxt = ptype_base[0].next;
2579	} else
2580		hash = ntohs(pt->type) & 15;
2581
2582	while (nxt == &ptype_base[hash]) {
2583		if (++hash >= 16)
2584			return NULL;
2585		nxt = ptype_base[hash].next;
2586	}
2587found:
2588	return list_entry(nxt, struct packet_type, list);
2589}
2590
2591static void ptype_seq_stop(struct seq_file *seq, void *v)
2592{
2593	rcu_read_unlock();
2594}
2595
2596static void ptype_seq_decode(struct seq_file *seq, void *sym)
2597{
2598#ifdef CONFIG_KALLSYMS
2599	unsigned long offset = 0, symsize;
2600	const char *symname;
2601	char *modname;
2602	char namebuf[128];
2603
2604	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2605				  &modname, namebuf);
2606
2607	if (symname) {
2608		char *delim = ":";
2609
2610		if (!modname)
2611			modname = delim = "";
2612		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2613			   symname, offset);
2614		return;
2615	}
2616#endif
2617
2618	seq_printf(seq, "[%p]", sym);
2619}
2620
2621static int ptype_seq_show(struct seq_file *seq, void *v)
2622{
2623	struct packet_type *pt = v;
2624
2625	if (v == SEQ_START_TOKEN)
2626		seq_puts(seq, "Type Device      Function\n");
2627	else {
2628		if (pt->type == htons(ETH_P_ALL))
2629			seq_puts(seq, "ALL ");
2630		else
2631			seq_printf(seq, "%04x", ntohs(pt->type));
2632
2633		seq_printf(seq, " %-8s ",
2634			   pt->dev ? pt->dev->name : "");
2635		ptype_seq_decode(seq,  pt->func);
2636		seq_putc(seq, '\n');
2637	}
2638
2639	return 0;
2640}
2641
2642static const struct seq_operations ptype_seq_ops = {
2643	.start = ptype_seq_start,
2644	.next  = ptype_seq_next,
2645	.stop  = ptype_seq_stop,
2646	.show  = ptype_seq_show,
2647};
2648
2649static int ptype_seq_open(struct inode *inode, struct file *file)
2650{
2651	return seq_open(file, &ptype_seq_ops);
2652}
2653
2654static const struct file_operations ptype_seq_fops = {
2655	.owner	 = THIS_MODULE,
2656	.open    = ptype_seq_open,
2657	.read    = seq_read,
2658	.llseek  = seq_lseek,
2659	.release = seq_release,
2660};
2661
2662
2663static int __net_init dev_proc_net_init(struct net *net)
2664{
2665	int rc = -ENOMEM;
2666
2667	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2668		goto out;
2669	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2670		goto out_dev;
2671	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2672		goto out_softnet;
2673
2674	if (wext_proc_init(net))
2675		goto out_ptype;
2676	rc = 0;
2677out:
2678	return rc;
2679out_ptype:
2680	proc_net_remove(net, "ptype");
2681out_softnet:
2682	proc_net_remove(net, "softnet_stat");
2683out_dev:
2684	proc_net_remove(net, "dev");
2685	goto out;
2686}
2687
2688static void __net_exit dev_proc_net_exit(struct net *net)
2689{
2690	wext_proc_exit(net);
2691
2692	proc_net_remove(net, "ptype");
2693	proc_net_remove(net, "softnet_stat");
2694	proc_net_remove(net, "dev");
2695}
2696
2697static struct pernet_operations __net_initdata dev_proc_ops = {
2698	.init = dev_proc_net_init,
2699	.exit = dev_proc_net_exit,
2700};
2701
2702static int __init dev_proc_init(void)
2703{
2704	return register_pernet_subsys(&dev_proc_ops);
2705}
2706#else
2707#define dev_proc_init() 0
2708#endif	/* CONFIG_PROC_FS */
2709
2710
2711/**
2712 *	netdev_set_master	-	set up master/slave pair
2713 *	@slave: slave device
2714 *	@master: new master device
2715 *
2716 *	Changes the master device of the slave. Pass %NULL to break the
2717 *	bonding. The caller must hold the RTNL semaphore. On a failure
2718 *	a negative errno code is returned. On success the reference counts
2719 *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2720 *	function returns zero.
2721 */
2722int netdev_set_master(struct net_device *slave, struct net_device *master)
2723{
2724	struct net_device *old = slave->master;
2725
2726	ASSERT_RTNL();
2727
2728	if (master) {
2729		if (old)
2730			return -EBUSY;
2731		dev_hold(master);
2732	}
2733
2734	slave->master = master;
2735
2736	synchronize_net();
2737
2738	if (old)
2739		dev_put(old);
2740
2741	if (master)
2742		slave->flags |= IFF_SLAVE;
2743	else
2744		slave->flags &= ~IFF_SLAVE;
2745
2746	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2747	return 0;
2748}
2749
2750static void __dev_set_promiscuity(struct net_device *dev, int inc)
2751{
2752	unsigned short old_flags = dev->flags;
2753
2754	ASSERT_RTNL();
2755
2756	if ((dev->promiscuity += inc) == 0)
2757		dev->flags &= ~IFF_PROMISC;
2758	else
2759		dev->flags |= IFF_PROMISC;
2760	if (dev->flags != old_flags) {
2761		printk(KERN_INFO "device %s %s promiscuous mode\n",
2762		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2763							       "left");
2764		audit_log(current->audit_context, GFP_ATOMIC,
2765			AUDIT_ANOM_PROMISCUOUS,
2766			"dev=%s prom=%d old_prom=%d auid=%u",
2767			dev->name, (dev->flags & IFF_PROMISC),
2768			(old_flags & IFF_PROMISC),
2769			audit_get_loginuid(current->audit_context));
2770
2771		if (dev->change_rx_flags)
2772			dev->change_rx_flags(dev, IFF_PROMISC);
2773	}
2774}
2775
2776/**
2777 *	dev_set_promiscuity	- update promiscuity count on a device
2778 *	@dev: device
2779 *	@inc: modifier
2780 *
2781 *	Add or remove promiscuity from a device. While the count in the device
2782 *	remains above zero the interface remains promiscuous. Once it hits zero
2783 *	the device reverts back to normal filtering operation. A negative inc
2784 *	value is used to drop promiscuity on the device.
2785 */
2786void dev_set_promiscuity(struct net_device *dev, int inc)
2787{
2788	unsigned short old_flags = dev->flags;
2789
2790	__dev_set_promiscuity(dev, inc);
2791	if (dev->flags != old_flags)
2792		dev_set_rx_mode(dev);
2793}
2794
2795/**
2796 *	dev_set_allmulti	- update allmulti count on a device
2797 *	@dev: device
2798 *	@inc: modifier
2799 *
2800 *	Add or remove reception of all multicast frames to a device. While the
2801 *	count in the device remains above zero the interface remains listening
2802 *	to all interfaces. Once it hits zero the device reverts back to normal
2803 *	filtering operation. A negative @inc value is used to drop the counter
2804 *	when releasing a resource needing all multicasts.
2805 */
2806
2807void dev_set_allmulti(struct net_device *dev, int inc)
2808{
2809	unsigned short old_flags = dev->flags;
2810
2811	ASSERT_RTNL();
2812
2813	dev->flags |= IFF_ALLMULTI;
2814	if ((dev->allmulti += inc) == 0)
2815		dev->flags &= ~IFF_ALLMULTI;
2816	if (dev->flags ^ old_flags) {
2817		if (dev->change_rx_flags)
2818			dev->change_rx_flags(dev, IFF_ALLMULTI);
2819		dev_set_rx_mode(dev);
2820	}
2821}
2822
2823/*
2824 *	Upload unicast and multicast address lists to device and
2825 *	configure RX filtering. When the device doesn't support unicast
2826 *	filtering it is put in promiscuous mode while unicast addresses
2827 *	are present.
2828 */
2829void __dev_set_rx_mode(struct net_device *dev)
2830{
2831	/* dev_open will call this function so the list will stay sane. */
2832	if (!(dev->flags&IFF_UP))
2833		return;
2834
2835	if (!netif_device_present(dev))
2836		return;
2837
2838	if (dev->set_rx_mode)
2839		dev->set_rx_mode(dev);
2840	else {
2841		/* Unicast addresses changes may only happen under the rtnl,
2842		 * therefore calling __dev_set_promiscuity here is safe.
2843		 */
2844		if (dev->uc_count > 0 && !dev->uc_promisc) {
2845			__dev_set_promiscuity(dev, 1);
2846			dev->uc_promisc = 1;
2847		} else if (dev->uc_count == 0 && dev->uc_promisc) {
2848			__dev_set_promiscuity(dev, -1);
2849			dev->uc_promisc = 0;
2850		}
2851
2852		if (dev->set_multicast_list)
2853			dev->set_multicast_list(dev);
2854	}
2855}
2856
2857void dev_set_rx_mode(struct net_device *dev)
2858{
2859	netif_tx_lock_bh(dev);
2860	__dev_set_rx_mode(dev);
2861	netif_tx_unlock_bh(dev);
2862}
2863
2864int __dev_addr_delete(struct dev_addr_list **list, int *count,
2865		      void *addr, int alen, int glbl)
2866{
2867	struct dev_addr_list *da;
2868
2869	for (; (da = *list) != NULL; list = &da->next) {
2870		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2871		    alen == da->da_addrlen) {
2872			if (glbl) {
2873				int old_glbl = da->da_gusers;
2874				da->da_gusers = 0;
2875				if (old_glbl == 0)
2876					break;
2877			}
2878			if (--da->da_users)
2879				return 0;
2880
2881			*list = da->next;
2882			kfree(da);
2883			(*count)--;
2884			return 0;
2885		}
2886	}
2887	return -ENOENT;
2888}
2889
2890int __dev_addr_add(struct dev_addr_list **list, int *count,
2891		   void *addr, int alen, int glbl)
2892{
2893	struct dev_addr_list *da;
2894
2895	for (da = *list; da != NULL; da = da->next) {
2896		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2897		    da->da_addrlen == alen) {
2898			if (glbl) {
2899				int old_glbl = da->da_gusers;
2900				da->da_gusers = 1;
2901				if (old_glbl)
2902					return 0;
2903			}
2904			da->da_users++;
2905			return 0;
2906		}
2907	}
2908
2909	da = kmalloc(sizeof(*da), GFP_ATOMIC);
2910	if (da == NULL)
2911		return -ENOMEM;
2912	memcpy(da->da_addr, addr, alen);
2913	da->da_addrlen = alen;
2914	da->da_users = 1;
2915	da->da_gusers = glbl ? 1 : 0;
2916	da->next = *list;
2917	*list = da;
2918	(*count)++;
2919	return 0;
2920}
2921
2922/**
2923 *	dev_unicast_delete	- Release secondary unicast address.
2924 *	@dev: device
2925 *	@addr: address to delete
2926 *	@alen: length of @addr
2927 *
2928 *	Release reference to a secondary unicast address and remove it
2929 *	from the device if the reference count drops to zero.
2930 *
2931 * 	The caller must hold the rtnl_mutex.
2932 */
2933int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2934{
2935	int err;
2936
2937	ASSERT_RTNL();
2938
2939	netif_tx_lock_bh(dev);
2940	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2941	if (!err)
2942		__dev_set_rx_mode(dev);
2943	netif_tx_unlock_bh(dev);
2944	return err;
2945}
2946EXPORT_SYMBOL(dev_unicast_delete);
2947
2948/**
2949 *	dev_unicast_add		- add a secondary unicast address
2950 *	@dev: device
2951 *	@addr: address to delete
2952 *	@alen: length of @addr
2953 *
2954 *	Add a secondary unicast address to the device or increase
2955 *	the reference count if it already exists.
2956 *
2957 *	The caller must hold the rtnl_mutex.
2958 */
2959int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2960{
2961	int err;
2962
2963	ASSERT_RTNL();
2964
2965	netif_tx_lock_bh(dev);
2966	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2967	if (!err)
2968		__dev_set_rx_mode(dev);
2969	netif_tx_unlock_bh(dev);
2970	return err;
2971}
2972EXPORT_SYMBOL(dev_unicast_add);
2973
2974static void __dev_addr_discard(struct dev_addr_list **list)
2975{
2976	struct dev_addr_list *tmp;
2977
2978	while (*list != NULL) {
2979		tmp = *list;
2980		*list = tmp->next;
2981		if (tmp->da_users > tmp->da_gusers)
2982			printk("__dev_addr_discard: address leakage! "
2983			       "da_users=%d\n", tmp->da_users);
2984		kfree(tmp);
2985	}
2986}
2987
2988static void dev_addr_discard(struct net_device *dev)
2989{
2990	netif_tx_lock_bh(dev);
2991
2992	__dev_addr_discard(&dev->uc_list);
2993	dev->uc_count = 0;
2994
2995	__dev_addr_discard(&dev->mc_list);
2996	dev->mc_count = 0;
2997
2998	netif_tx_unlock_bh(dev);
2999}
3000
3001unsigned dev_get_flags(const struct net_device *dev)
3002{
3003	unsigned flags;
3004
3005	flags = (dev->flags & ~(IFF_PROMISC |
3006				IFF_ALLMULTI |
3007				IFF_RUNNING |
3008				IFF_LOWER_UP |
3009				IFF_DORMANT)) |
3010		(dev->gflags & (IFF_PROMISC |
3011				IFF_ALLMULTI));
3012
3013	if (netif_running(dev)) {
3014		if (netif_oper_up(dev))
3015			flags |= IFF_RUNNING;
3016		if (netif_carrier_ok(dev))
3017			flags |= IFF_LOWER_UP;
3018		if (netif_dormant(dev))
3019			flags |= IFF_DORMANT;
3020	}
3021
3022	return flags;
3023}
3024
3025int dev_change_flags(struct net_device *dev, unsigned flags)
3026{
3027	int ret, changes;
3028	int old_flags = dev->flags;
3029
3030	ASSERT_RTNL();
3031
3032	/*
3033	 *	Set the flags on our device.
3034	 */
3035
3036	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3037			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3038			       IFF_AUTOMEDIA)) |
3039		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3040				    IFF_ALLMULTI));
3041
3042	/*
3043	 *	Load in the correct multicast list now the flags have changed.
3044	 */
3045
3046	if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3047		dev->change_rx_flags(dev, IFF_MULTICAST);
3048
3049	dev_set_rx_mode(dev);
3050
3051	/*
3052	 *	Have we downed the interface. We handle IFF_UP ourselves
3053	 *	according to user attempts to set it, rather than blindly
3054	 *	setting it.
3055	 */
3056
3057	ret = 0;
3058	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3059		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3060
3061		if (!ret)
3062			dev_set_rx_mode(dev);
3063	}
3064
3065	if (dev->flags & IFF_UP &&
3066	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3067					  IFF_VOLATILE)))
3068		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3069
3070	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3071		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3072		dev->gflags ^= IFF_PROMISC;
3073		dev_set_promiscuity(dev, inc);
3074	}
3075
3076	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3077	   is important. Some (broken) drivers set IFF_PROMISC, when
3078	   IFF_ALLMULTI is requested not asking us and not reporting.
3079	 */
3080	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3081		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3082		dev->gflags ^= IFF_ALLMULTI;
3083		dev_set_allmulti(dev, inc);
3084	}
3085
3086	/* Exclude state transition flags, already notified */
3087	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3088	if (changes)
3089		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3090
3091	return ret;
3092}
3093
3094int dev_set_mtu(struct net_device *dev, int new_mtu)
3095{
3096	int err;
3097
3098	if (new_mtu == dev->mtu)
3099		return 0;
3100
3101	/*	MTU must be positive.	 */
3102	if (new_mtu < 0)
3103		return -EINVAL;
3104
3105	if (!netif_device_present(dev))
3106		return -ENODEV;
3107
3108	err = 0;
3109	if (dev->change_mtu)
3110		err = dev->change_mtu(dev, new_mtu);
3111	else
3112		dev->mtu = new_mtu;
3113	if (!err && dev->flags & IFF_UP)
3114		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3115	return err;
3116}
3117
3118int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3119{
3120	int err;
3121
3122	if (!dev->set_mac_address)
3123		return -EOPNOTSUPP;
3124	if (sa->sa_family != dev->type)
3125		return -EINVAL;
3126	if (!netif_device_present(dev))
3127		return -ENODEV;
3128	err = dev->set_mac_address(dev, sa);
3129	if (!err)
3130		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3131	return err;
3132}
3133
3134/*
3135 *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3136 */
3137static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3138{
3139	int err;
3140	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3141
3142	if (!dev)
3143		return -ENODEV;
3144
3145	switch (cmd) {
3146		case SIOCGIFFLAGS:	/* Get interface flags */
3147			ifr->ifr_flags = dev_get_flags(dev);
3148			return 0;
3149
3150		case SIOCGIFMETRIC:	/* Get the metric on the interface
3151					   (currently unused) */
3152			ifr->ifr_metric = 0;
3153			return 0;
3154
3155		case SIOCGIFMTU:	/* Get the MTU of a device */
3156			ifr->ifr_mtu = dev->mtu;
3157			return 0;
3158
3159		case SIOCGIFHWADDR:
3160			if (!dev->addr_len)
3161				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3162			else
3163				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3164				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3165			ifr->ifr_hwaddr.sa_family = dev->type;
3166			return 0;
3167
3168		case SIOCGIFSLAVE:
3169			err = -EINVAL;
3170			break;
3171
3172		case SIOCGIFMAP:
3173			ifr->ifr_map.mem_start = dev->mem_start;
3174			ifr->ifr_map.mem_end   = dev->mem_end;
3175			ifr->ifr_map.base_addr = dev->base_addr;
3176			ifr->ifr_map.irq       = dev->irq;
3177			ifr->ifr_map.dma       = dev->dma;
3178			ifr->ifr_map.port      = dev->if_port;
3179			return 0;
3180
3181		case SIOCGIFINDEX:
3182			ifr->ifr_ifindex = dev->ifindex;
3183			return 0;
3184
3185		case SIOCGIFTXQLEN:
3186			ifr->ifr_qlen = dev->tx_queue_len;
3187			return 0;
3188
3189		default:
3190			/* dev_ioctl() should ensure this case
3191			 * is never reached
3192			 */
3193			WARN_ON(1);
3194			err = -EINVAL;
3195			break;
3196
3197	}
3198	return err;
3199}
3200
3201/*
3202 *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3203 */
3204static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3205{
3206	int err;
3207	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3208
3209	if (!dev)
3210		return -ENODEV;
3211
3212	switch (cmd) {
3213		case SIOCSIFFLAGS:	/* Set interface flags */
3214			return dev_change_flags(dev, ifr->ifr_flags);
3215
3216		case SIOCSIFMETRIC:	/* Set the metric on the interface
3217					   (currently unused) */
3218			return -EOPNOTSUPP;
3219
3220		case SIOCSIFMTU:	/* Set the MTU of a device */
3221			return dev_set_mtu(dev, ifr->ifr_mtu);
3222
3223		case SIOCSIFHWADDR:
3224			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3225
3226		case SIOCSIFHWBROADCAST:
3227			if (ifr->ifr_hwaddr.sa_family != dev->type)
3228				return -EINVAL;
3229			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3230			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3231			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3232			return 0;
3233
3234		case SIOCSIFMAP:
3235			if (dev->set_config) {
3236				if (!netif_device_present(dev))
3237					return -ENODEV;
3238				return dev->set_config(dev, &ifr->ifr_map);
3239			}
3240			return -EOPNOTSUPP;
3241
3242		case SIOCADDMULTI:
3243			if (!dev->set_multicast_list ||
3244			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3245				return -EINVAL;
3246			if (!netif_device_present(dev))
3247				return -ENODEV;
3248			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3249					  dev->addr_len, 1);
3250
3251		case SIOCDELMULTI:
3252			if (!dev->set_multicast_list ||
3253			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3254				return -EINVAL;
3255			if (!netif_device_present(dev))
3256				return -ENODEV;
3257			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3258					     dev->addr_len, 1);
3259
3260		case SIOCSIFTXQLEN:
3261			if (ifr->ifr_qlen < 0)
3262				return -EINVAL;
3263			dev->tx_queue_len = ifr->ifr_qlen;
3264			return 0;
3265
3266		case SIOCSIFNAME:
3267			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3268			return dev_change_name(dev, ifr->ifr_newname);
3269
3270		/*
3271		 *	Unknown or private ioctl
3272		 */
3273
3274		default:
3275			if ((cmd >= SIOCDEVPRIVATE &&
3276			    cmd <= SIOCDEVPRIVATE + 15) ||
3277			    cmd == SIOCBONDENSLAVE ||
3278			    cmd == SIOCBONDRELEASE ||
3279			    cmd == SIOCBONDSETHWADDR ||
3280			    cmd == SIOCBONDSLAVEINFOQUERY ||
3281			    cmd == SIOCBONDINFOQUERY ||
3282			    cmd == SIOCBONDCHANGEACTIVE ||
3283			    cmd == SIOCGMIIPHY ||
3284			    cmd == SIOCGMIIREG ||
3285			    cmd == SIOCSMIIREG ||
3286			    cmd == SIOCBRADDIF ||
3287			    cmd == SIOCBRDELIF ||
3288			    cmd == SIOCWANDEV) {
3289				err = -EOPNOTSUPP;
3290				if (dev->do_ioctl) {
3291					if (netif_device_present(dev))
3292						err = dev->do_ioctl(dev, ifr,
3293								    cmd);
3294					else
3295						err = -ENODEV;
3296				}
3297			} else
3298				err = -EINVAL;
3299
3300	}
3301	return err;
3302}
3303
3304/*
3305 *	This function handles all "interface"-type I/O control requests. The actual
3306 *	'doing' part of this is dev_ifsioc above.
3307 */
3308
3309/**
3310 *	dev_ioctl	-	network device ioctl
3311 *	@net: the applicable net namespace
3312 *	@cmd: command to issue
3313 *	@arg: pointer to a struct ifreq in user space
3314 *
3315 *	Issue ioctl functions to devices. This is normally called by the
3316 *	user space syscall interfaces but can sometimes be useful for
3317 *	other purposes. The return value is the return from the syscall if
3318 *	positive or a negative errno code on error.
3319 */
3320
3321int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3322{
3323	struct ifreq ifr;
3324	int ret;
3325	char *colon;
3326
3327	/* One special case: SIOCGIFCONF takes ifconf argument
3328	   and requires shared lock, because it sleeps writing
3329	   to user space.
3330	 */
3331
3332	if (cmd == SIOCGIFCONF) {
3333		rtnl_lock();
3334		ret = dev_ifconf(net, (char __user *) arg);
3335		rtnl_unlock();
3336		return ret;
3337	}
3338	if (cmd == SIOCGIFNAME)
3339		return dev_ifname(net, (struct ifreq __user *)arg);
3340
3341	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3342		return -EFAULT;
3343
3344	ifr.ifr_name[IFNAMSIZ-1] = 0;
3345
3346	colon = strchr(ifr.ifr_name, ':');
3347	if (colon)
3348		*colon = 0;
3349
3350	/*
3351	 *	See which interface the caller is talking about.
3352	 */
3353
3354	switch (cmd) {
3355		/*
3356		 *	These ioctl calls:
3357		 *	- can be done by all.
3358		 *	- atomic and do not require locking.
3359		 *	- return a value
3360		 */
3361		case SIOCGIFFLAGS:
3362		case SIOCGIFMETRIC:
3363		case SIOCGIFMTU:
3364		case SIOCGIFHWADDR:
3365		case SIOCGIFSLAVE:
3366		case SIOCGIFMAP:
3367		case SIOCGIFINDEX:
3368		case SIOCGIFTXQLEN:
3369			dev_load(net, ifr.ifr_name);
3370			read_lock(&dev_base_lock);
3371			ret = dev_ifsioc_locked(net, &ifr, cmd);
3372			read_unlock(&dev_base_lock);
3373			if (!ret) {
3374				if (colon)
3375					*colon = ':';
3376				if (copy_to_user(arg, &ifr,
3377						 sizeof(struct ifreq)))
3378					ret = -EFAULT;
3379			}
3380			return ret;
3381
3382		case SIOCETHTOOL:
3383			dev_load(net, ifr.ifr_name);
3384			rtnl_lock();
3385			ret = dev_ethtool(net, &ifr);
3386			rtnl_unlock();
3387			if (!ret) {
3388				if (colon)
3389					*colon = ':';
3390				if (copy_to_user(arg, &ifr,
3391						 sizeof(struct ifreq)))
3392					ret = -EFAULT;
3393			}
3394			return ret;
3395
3396		/*
3397		 *	These ioctl calls:
3398		 *	- require superuser power.
3399		 *	- require strict serialization.
3400		 *	- return a value
3401		 */
3402		case SIOCGMIIPHY:
3403		case SIOCGMIIREG:
3404		case SIOCSIFNAME:
3405			if (!capable(CAP_NET_ADMIN))
3406				return -EPERM;
3407			dev_load(net, ifr.ifr_name);
3408			rtnl_lock();
3409			ret = dev_ifsioc(net, &ifr, cmd);
3410			rtnl_unlock();
3411			if (!ret) {
3412				if (colon)
3413					*colon = ':';
3414				if (copy_to_user(arg, &ifr,
3415						 sizeof(struct ifreq)))
3416					ret = -EFAULT;
3417			}
3418			return ret;
3419
3420		/*
3421		 *	These ioctl calls:
3422		 *	- require superuser power.
3423		 *	- require strict serialization.
3424		 *	- do not return a value
3425		 */
3426		case SIOCSIFFLAGS:
3427		case SIOCSIFMETRIC:
3428		case SIOCSIFMTU:
3429		case SIOCSIFMAP:
3430		case SIOCSIFHWADDR:
3431		case SIOCSIFSLAVE:
3432		case SIOCADDMULTI:
3433		case SIOCDELMULTI:
3434		case SIOCSIFHWBROADCAST:
3435		case SIOCSIFTXQLEN:
3436		case SIOCSMIIREG:
3437		case SIOCBONDENSLAVE:
3438		case SIOCBONDRELEASE:
3439		case SIOCBONDSETHWADDR:
3440		case SIOCBONDCHANGEACTIVE:
3441		case SIOCBRADDIF:
3442		case SIOCBRDELIF:
3443			if (!capable(CAP_NET_ADMIN))
3444				return -EPERM;
3445			/* fall through */
3446		case SIOCBONDSLAVEINFOQUERY:
3447		case SIOCBONDINFOQUERY:
3448			dev_load(net, ifr.ifr_name);
3449			rtnl_lock();
3450			ret = dev_ifsioc(net, &ifr, cmd);
3451			rtnl_unlock();
3452			return ret;
3453
3454		case SIOCGIFMEM:
3455			/* Get the per device memory space. We can add this but
3456			 * currently do not support it */
3457		case SIOCSIFMEM:
3458			/* Set the per device memory buffer space.
3459			 * Not applicable in our case */
3460		case SIOCSIFLINK:
3461			return -EINVAL;
3462
3463		/*
3464		 *	Unknown or private ioctl.
3465		 */
3466		default:
3467			if (cmd == SIOCWANDEV ||
3468			    (cmd >= SIOCDEVPRIVATE &&
3469			     cmd <= SIOCDEVPRIVATE + 15)) {
3470				dev_load(net, ifr.ifr_name);
3471				rtnl_lock();
3472				ret = dev_ifsioc(net, &ifr, cmd);
3473				rtnl_unlock();
3474				if (!ret && copy_to_user(arg, &ifr,
3475							 sizeof(struct ifreq)))
3476					ret = -EFAULT;
3477				return ret;
3478			}
3479			/* Take care of Wireless Extensions */
3480			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3481				return wext_handle_ioctl(net, &ifr, cmd, arg);
3482			return -EINVAL;
3483	}
3484}
3485
3486
3487/**
3488 *	dev_new_index	-	allocate an ifindex
3489 *	@net: the applicable net namespace
3490 *
3491 *	Returns a suitable unique value for a new device interface
3492 *	number.  The caller must hold the rtnl semaphore or the
3493 *	dev_base_lock to be sure it remains unique.
3494 */
3495static int dev_new_index(struct net *net)
3496{
3497	static int ifindex;
3498	for (;;) {
3499		if (++ifindex <= 0)
3500			ifindex = 1;
3501		if (!__dev_get_by_index(net, ifindex))
3502			return ifindex;
3503	}
3504}
3505
3506/* Delayed registration/unregisteration */
3507static DEFINE_SPINLOCK(net_todo_list_lock);
3508static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3509
3510static void net_set_todo(struct net_device *dev)
3511{
3512	spin_lock(&net_todo_list_lock);
3513	list_add_tail(&dev->todo_list, &net_todo_list);
3514	spin_unlock(&net_todo_list_lock);
3515}
3516
3517static void rollback_registered(struct net_device *dev)
3518{
3519	BUG_ON(dev_boot_phase);
3520	ASSERT_RTNL();
3521
3522	/* Some devices call without registering for initialization unwind. */
3523	if (dev->reg_state == NETREG_UNINITIALIZED) {
3524		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3525				  "was registered\n", dev->name, dev);
3526
3527		WARN_ON(1);
3528		return;
3529	}
3530
3531	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3532
3533	/* If device is running, close it first. */
3534	dev_close(dev);
3535
3536	/* And unlink it from device chain. */
3537	unlist_netdevice(dev);
3538
3539	dev->reg_state = NETREG_UNREGISTERING;
3540
3541	synchronize_net();
3542
3543	/* Shutdown queueing discipline. */
3544	dev_shutdown(dev);
3545
3546
3547	/* Notify protocols, that we are about to destroy
3548	   this device. They should clean all the things.
3549	*/
3550	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3551
3552	/*
3553	 *	Flush the unicast and multicast chains
3554	 */
3555	dev_addr_discard(dev);
3556
3557	if (dev->uninit)
3558		dev->uninit(dev);
3559
3560	/* Notifier chain MUST detach us from master device. */
3561	BUG_TRAP(!dev->master);
3562
3563	/* Remove entries from kobject tree */
3564	netdev_unregister_kobject(dev);
3565
3566	synchronize_net();
3567
3568	dev_put(dev);
3569}
3570
3571/**
3572 *	register_netdevice	- register a network device
3573 *	@dev: device to register
3574 *
3575 *	Take a completed network device structure and add it to the kernel
3576 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3577 *	chain. 0 is returned on success. A negative errno code is returned
3578 *	on a failure to set up the device, or if the name is a duplicate.
3579 *
3580 *	Callers must hold the rtnl semaphore. You may want
3581 *	register_netdev() instead of this.
3582 *
3583 *	BUGS:
3584 *	The locking appears insufficient to guarantee two parallel registers
3585 *	will not get the same name.
3586 */
3587
3588int register_netdevice(struct net_device *dev)
3589{
3590	struct hlist_head *head;
3591	struct hlist_node *p;
3592	int ret;
3593	struct net *net;
3594
3595	BUG_ON(dev_boot_phase);
3596	ASSERT_RTNL();
3597
3598	might_sleep();
3599
3600	/* When net_device's are persistent, this will be fatal. */
3601	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3602	BUG_ON(!dev->nd_net);
3603	net = dev->nd_net;
3604
3605	spin_lock_init(&dev->queue_lock);
3606	spin_lock_init(&dev->_xmit_lock);
3607	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3608	dev->xmit_lock_owner = -1;
3609	spin_lock_init(&dev->ingress_lock);
3610
3611	dev->iflink = -1;
3612
3613	/* Init, if this function is available */
3614	if (dev->init) {
3615		ret = dev->init(dev);
3616		if (ret) {
3617			if (ret > 0)
3618				ret = -EIO;
3619			goto out;
3620		}
3621	}
3622
3623	if (!dev_valid_name(dev->name)) {
3624		ret = -EINVAL;
3625		goto err_uninit;
3626	}
3627
3628	dev->ifindex = dev_new_index(net);
3629	if (dev->iflink == -1)
3630		dev->iflink = dev->ifindex;
3631
3632	/* Check for existence of name */
3633	head = dev_name_hash(net, dev->name);
3634	hlist_for_each(p, head) {
3635		struct net_device *d
3636			= hlist_entry(p, struct net_device, name_hlist);
3637		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3638			ret = -EEXIST;
3639			goto err_uninit;
3640		}
3641	}
3642
3643	/* Fix illegal checksum combinations */
3644	if ((dev->features & NETIF_F_HW_CSUM) &&
3645	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3646		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3647		       dev->name);
3648		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3649	}
3650
3651	if ((dev->features & NETIF_F_NO_CSUM) &&
3652	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3653		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3654		       dev->name);
3655		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3656	}
3657
3658
3659	/* Fix illegal SG+CSUM combinations. */
3660	if ((dev->features & NETIF_F_SG) &&
3661	    !(dev->features & NETIF_F_ALL_CSUM)) {
3662		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3663		       dev->name);
3664		dev->features &= ~NETIF_F_SG;
3665	}
3666
3667	/* TSO requires that SG is present as well. */
3668	if ((dev->features & NETIF_F_TSO) &&
3669	    !(dev->features & NETIF_F_SG)) {
3670		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3671		       dev->name);
3672		dev->features &= ~NETIF_F_TSO;
3673	}
3674	if (dev->features & NETIF_F_UFO) {
3675		if (!(dev->features & NETIF_F_HW_CSUM)) {
3676			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3677					"NETIF_F_HW_CSUM feature.\n",
3678							dev->name);
3679			dev->features &= ~NETIF_F_UFO;
3680		}
3681		if (!(dev->features & NETIF_F_SG)) {
3682			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3683					"NETIF_F_SG feature.\n",
3684					dev->name);
3685			dev->features &= ~NETIF_F_UFO;
3686		}
3687	}
3688
3689	ret = netdev_register_kobject(dev);
3690	if (ret)
3691		goto err_uninit;
3692	dev->reg_state = NETREG_REGISTERED;
3693
3694	/*
3695	 *	Default initial state at registry is that the
3696	 *	device is present.
3697	 */
3698
3699	set_bit(__LINK_STATE_PRESENT, &dev->state);
3700
3701	dev_init_scheduler(dev);
3702	dev_hold(dev);
3703	list_netdevice(dev);
3704
3705	/* Notify protocols, that a new device appeared. */
3706	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
3707	ret = notifier_to_errno(ret);
3708	if (ret) {
3709		rollback_registered(dev);
3710		dev->reg_state = NETREG_UNREGISTERED;
3711	}
3712
3713out:
3714	return ret;
3715
3716err_uninit:
3717	if (dev->uninit)
3718		dev->uninit(dev);
3719	goto out;
3720}
3721
3722/**
3723 *	register_netdev	- register a network device
3724 *	@dev: device to register
3725 *
3726 *	Take a completed network device structure and add it to the kernel
3727 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3728 *	chain. 0 is returned on success. A negative errno code is returned
3729 *	on a failure to set up the device, or if the name is a duplicate.
3730 *
3731 *	This is a wrapper around register_netdevice that takes the rtnl semaphore
3732 *	and expands the device name if you passed a format string to
3733 *	alloc_netdev.
3734 */
3735int register_netdev(struct net_device *dev)
3736{
3737	int err;
3738
3739	rtnl_lock();
3740
3741	/*
3742	 * If the name is a format string the caller wants us to do a
3743	 * name allocation.
3744	 */
3745	if (strchr(dev->name, '%')) {
3746		err = dev_alloc_name(dev, dev->name);
3747		if (err < 0)
3748			goto out;
3749	}
3750
3751	err = register_netdevice(dev);
3752out:
3753	rtnl_unlock();
3754	return err;
3755}
3756EXPORT_SYMBOL(register_netdev);
3757
3758/*
3759 * netdev_wait_allrefs - wait until all references are gone.
3760 *
3761 * This is called when unregistering network devices.
3762 *
3763 * Any protocol or device that holds a reference should register
3764 * for netdevice notification, and cleanup and put back the
3765 * reference if they receive an UNREGISTER event.
3766 * We can get stuck here if buggy protocols don't correctly
3767 * call dev_put.
3768 */
3769static void netdev_wait_allrefs(struct net_device *dev)
3770{
3771	unsigned long rebroadcast_time, warning_time;
3772
3773	rebroadcast_time = warning_time = jiffies;
3774	while (atomic_read(&dev->refcnt) != 0) {
3775		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3776			rtnl_lock();
3777
3778			/* Rebroadcast unregister notification */
3779			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3780
3781			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3782				     &dev->state)) {
3783				/* We must not have linkwatch events
3784				 * pending on unregister. If this
3785				 * happens, we simply run the queue
3786				 * unscheduled, resulting in a noop
3787				 * for this device.
3788				 */
3789				linkwatch_run_queue();
3790			}
3791
3792			__rtnl_unlock();
3793
3794			rebroadcast_time = jiffies;
3795		}
3796
3797		msleep(250);
3798
3799		if (time_after(jiffies, warning_time + 10 * HZ)) {
3800			printk(KERN_EMERG "unregister_netdevice: "
3801			       "waiting for %s to become free. Usage "
3802			       "count = %d\n",
3803			       dev->name, atomic_read(&dev->refcnt));
3804			warning_time = jiffies;
3805		}
3806	}
3807}
3808
3809/* The sequence is:
3810 *
3811 *	rtnl_lock();
3812 *	...
3813 *	register_netdevice(x1);
3814 *	register_netdevice(x2);
3815 *	...
3816 *	unregister_netdevice(y1);
3817 *	unregister_netdevice(y2);
3818 *      ...
3819 *	rtnl_unlock();
3820 *	free_netdev(y1);
3821 *	free_netdev(y2);
3822 *
3823 * We are invoked by rtnl_unlock() after it drops the semaphore.
3824 * This allows us to deal with problems:
3825 * 1) We can delete sysfs objects which invoke hotplug
3826 *    without deadlocking with linkwatch via keventd.
3827 * 2) Since we run with the RTNL semaphore not held, we can sleep
3828 *    safely in order to wait for the netdev refcnt to drop to zero.
3829 */
3830static DEFINE_MUTEX(net_todo_run_mutex);
3831void netdev_run_todo(void)
3832{
3833	struct list_head list;
3834
3835	/* Need to guard against multiple cpu's getting out of order. */
3836	mutex_lock(&net_todo_run_mutex);
3837
3838	/* Not safe to do outside the semaphore.  We must not return
3839	 * until all unregister events invoked by the local processor
3840	 * have been completed (either by this todo run, or one on
3841	 * another cpu).
3842	 */
3843	if (list_empty(&net_todo_list))
3844		goto out;
3845
3846	/* Snapshot list, allow later requests */
3847	spin_lock(&net_todo_list_lock);
3848	list_replace_init(&net_todo_list, &list);
3849	spin_unlock(&net_todo_list_lock);
3850
3851	while (!list_empty(&list)) {
3852		struct net_device *dev
3853			= list_entry(list.next, struct net_device, todo_list);
3854		list_del(&dev->todo_list);
3855
3856		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3857			printk(KERN_ERR "network todo '%s' but state %d\n",
3858			       dev->name, dev->reg_state);
3859			dump_stack();
3860			continue;
3861		}
3862
3863		dev->reg_state = NETREG_UNREGISTERED;
3864
3865		netdev_wait_allrefs(dev);
3866
3867		/* paranoia */
3868		BUG_ON(atomic_read(&dev->refcnt));
3869		BUG_TRAP(!dev->ip_ptr);
3870		BUG_TRAP(!dev->ip6_ptr);
3871		BUG_TRAP(!dev->dn_ptr);
3872
3873		if (dev->destructor)
3874			dev->destructor(dev);
3875
3876		/* Free network device */
3877		kobject_put(&dev->dev.kobj);
3878	}
3879
3880out:
3881	mutex_unlock(&net_todo_run_mutex);
3882}
3883
3884static struct net_device_stats *internal_stats(struct net_device *dev)
3885{
3886	return &dev->stats;
3887}
3888
3889/**
3890 *	alloc_netdev_mq - allocate network device
3891 *	@sizeof_priv:	size of private data to allocate space for
3892 *	@name:		device name format string
3893 *	@setup:		callback to initialize device
3894 *	@queue_count:	the number of subqueues to allocate
3895 *
3896 *	Allocates a struct net_device with private data area for driver use
3897 *	and performs basic initialization.  Also allocates subquue structs
3898 *	for each queue on the device at the end of the netdevice.
3899 */
3900struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3901		void (*setup)(struct net_device *), unsigned int queue_count)
3902{
3903	void *p;
3904	struct net_device *dev;
3905	int alloc_size;
3906
3907	BUG_ON(strlen(name) >= sizeof(dev->name));
3908
3909	/* ensure 32-byte alignment of both the device and private area */
3910	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
3911		     (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
3912		     ~NETDEV_ALIGN_CONST;
3913	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3914
3915	p = kzalloc(alloc_size, GFP_KERNEL);
3916	if (!p) {
3917		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3918		return NULL;
3919	}
3920
3921	dev = (struct net_device *)
3922		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3923	dev->padded = (char *)dev - (char *)p;
3924	dev->nd_net = &init_net;
3925
3926	if (sizeof_priv) {
3927		dev->priv = ((char *)dev +
3928			     ((sizeof(struct net_device) +
3929			       (sizeof(struct net_device_subqueue) *
3930				(queue_count - 1)) + NETDEV_ALIGN_CONST)
3931			      & ~NETDEV_ALIGN_CONST));
3932	}
3933
3934	dev->egress_subqueue_count = queue_count;
3935
3936	dev->get_stats = internal_stats;
3937	netpoll_netdev_init(dev);
3938	setup(dev);
3939	strcpy(dev->name, name);
3940	return dev;
3941}
3942EXPORT_SYMBOL(alloc_netdev_mq);
3943
3944/**
3945 *	free_netdev - free network device
3946 *	@dev: device
3947 *
3948 *	This function does the last stage of destroying an allocated device
3949 * 	interface. The reference to the device object is released.
3950 *	If this is the last reference then it will be freed.
3951 */
3952void free_netdev(struct net_device *dev)
3953{
3954	/*  Compatibility with error handling in drivers */
3955	if (dev->reg_state == NETREG_UNINITIALIZED) {
3956		kfree((char *)dev - dev->padded);
3957		return;
3958	}
3959
3960	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3961	dev->reg_state = NETREG_RELEASED;
3962
3963	/* will free via device release */
3964	put_device(&dev->dev);
3965}
3966
3967/* Synchronize with packet receive processing. */
3968void synchronize_net(void)
3969{
3970	might_sleep();
3971	synchronize_rcu();
3972}
3973
3974/**
3975 *	unregister_netdevice - remove device from the kernel
3976 *	@dev: device
3977 *
3978 *	This function shuts down a device interface and removes it
3979 *	from the kernel tables.
3980 *
3981 *	Callers must hold the rtnl semaphore.  You may want
3982 *	unregister_netdev() instead of this.
3983 */
3984
3985void unregister_netdevice(struct net_device *dev)
3986{
3987	rollback_registered(dev);
3988	/* Finish processing unregister after unlock */
3989	net_set_todo(dev);
3990}
3991
3992/**
3993 *	unregister_netdev - remove device from the kernel
3994 *	@dev: device
3995 *
3996 *	This function shuts down a device interface and removes it
3997 *	from the kernel tables.
3998 *
3999 *	This is just a wrapper for unregister_netdevice that takes
4000 *	the rtnl semaphore.  In general you want to use this and not
4001 *	unregister_netdevice.
4002 */
4003void unregister_netdev(struct net_device *dev)
4004{
4005	rtnl_lock();
4006	unregister_netdevice(dev);
4007	rtnl_unlock();
4008}
4009
4010EXPORT_SYMBOL(unregister_netdev);
4011
4012/**
4013 *	dev_change_net_namespace - move device to different nethost namespace
4014 *	@dev: device
4015 *	@net: network namespace
4016 *	@pat: If not NULL name pattern to try if the current device name
4017 *	      is already taken in the destination network namespace.
4018 *
4019 *	This function shuts down a device interface and moves it
4020 *	to a new network namespace. On success 0 is returned, on
4021 *	a failure a netagive errno code is returned.
4022 *
4023 *	Callers must hold the rtnl semaphore.
4024 */
4025
4026int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4027{
4028	char buf[IFNAMSIZ];
4029	const char *destname;
4030	int err;
4031
4032	ASSERT_RTNL();
4033
4034	/* Don't allow namespace local devices to be moved. */
4035	err = -EINVAL;
4036	if (dev->features & NETIF_F_NETNS_LOCAL)
4037		goto out;
4038
4039	/* Ensure the device has been registrered */
4040	err = -EINVAL;
4041	if (dev->reg_state != NETREG_REGISTERED)
4042		goto out;
4043
4044	/* Get out if there is nothing todo */
4045	err = 0;
4046	if (dev->nd_net == net)
4047		goto out;
4048
4049	/* Pick the destination device name, and ensure
4050	 * we can use it in the destination network namespace.
4051	 */
4052	err = -EEXIST;
4053	destname = dev->name;
4054	if (__dev_get_by_name(net, destname)) {
4055		/* We get here if we can't use the current device name */
4056		if (!pat)
4057			goto out;
4058		if (!dev_valid_name(pat))
4059			goto out;
4060		if (strchr(pat, '%')) {
4061			if (__dev_alloc_name(net, pat, buf) < 0)
4062				goto out;
4063			destname = buf;
4064		} else
4065			destname = pat;
4066		if (__dev_get_by_name(net, destname))
4067			goto out;
4068	}
4069
4070	/*
4071	 * And now a mini version of register_netdevice unregister_netdevice.
4072	 */
4073
4074	/* If device is running close it first. */
4075	dev_close(dev);
4076
4077	/* And unlink it from device chain */
4078	err = -ENODEV;
4079	unlist_netdevice(dev);
4080
4081	synchronize_net();
4082
4083	/* Shutdown queueing discipline. */
4084	dev_shutdown(dev);
4085
4086	/* Notify protocols, that we are about to destroy
4087	   this device. They should clean all the things.
4088	*/
4089	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4090
4091	/*
4092	 *	Flush the unicast and multicast chains
4093	 */
4094	dev_addr_discard(dev);
4095
4096	/* Actually switch the network namespace */
4097	dev->nd_net = net;
4098
4099	/* Assign the new device name */
4100	if (destname != dev->name)
4101		strcpy(dev->name, destname);
4102
4103	/* If there is an ifindex conflict assign a new one */
4104	if (__dev_get_by_index(net, dev->ifindex)) {
4105		int iflink = (dev->iflink == dev->ifindex);
4106		dev->ifindex = dev_new_index(net);
4107		if (iflink)
4108			dev->iflink = dev->ifindex;
4109	}
4110
4111	/* Fixup kobjects */
4112	err = device_rename(&dev->dev, dev->name);
4113	WARN_ON(err);
4114
4115	/* Add the device back in the hashes */
4116	list_netdevice(dev);
4117
4118	/* Notify protocols, that a new device appeared. */
4119	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4120
4121	synchronize_net();
4122	err = 0;
4123out:
4124	return err;
4125}
4126
4127static int dev_cpu_callback(struct notifier_block *nfb,
4128			    unsigned long action,
4129			    void *ocpu)
4130{
4131	struct sk_buff **list_skb;
4132	struct net_device **list_net;
4133	struct sk_buff *skb;
4134	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4135	struct softnet_data *sd, *oldsd;
4136
4137	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4138		return NOTIFY_OK;
4139
4140	local_irq_disable();
4141	cpu = smp_processor_id();
4142	sd = &per_cpu(softnet_data, cpu);
4143	oldsd = &per_cpu(softnet_data, oldcpu);
4144
4145	/* Find end of our completion_queue. */
4146	list_skb = &sd->completion_queue;
4147	while (*list_skb)
4148		list_skb = &(*list_skb)->next;
4149	/* Append completion queue from offline CPU. */
4150	*list_skb = oldsd->completion_queue;
4151	oldsd->completion_queue = NULL;
4152
4153	/* Find end of our output_queue. */
4154	list_net = &sd->output_queue;
4155	while (*list_net)
4156		list_net = &(*list_net)->next_sched;
4157	/* Append output queue from offline CPU. */
4158	*list_net = oldsd->output_queue;
4159	oldsd->output_queue = NULL;
4160
4161	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4162	local_irq_enable();
4163
4164	/* Process offline CPU's input_pkt_queue */
4165	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4166		netif_rx(skb);
4167
4168	return NOTIFY_OK;
4169}
4170
4171#ifdef CONFIG_NET_DMA
4172/**
4173 * net_dma_rebalance - try to maintain one DMA channel per CPU
4174 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4175 *
4176 * This is called when the number of channels allocated to the net_dma client
4177 * changes.  The net_dma client tries to have one DMA channel per CPU.
4178 */
4179
4180static void net_dma_rebalance(struct net_dma *net_dma)
4181{
4182	unsigned int cpu, i, n, chan_idx;
4183	struct dma_chan *chan;
4184
4185	if (cpus_empty(net_dma->channel_mask)) {
4186		for_each_online_cpu(cpu)
4187			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4188		return;
4189	}
4190
4191	i = 0;
4192	cpu = first_cpu(cpu_online_map);
4193
4194	for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4195		chan = net_dma->channels[chan_idx];
4196
4197		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4198		   + (i < (num_online_cpus() %
4199			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4200
4201		while(n) {
4202			per_cpu(softnet_data, cpu).net_dma = chan;
4203			cpu = next_cpu(cpu, cpu_online_map);
4204			n--;
4205		}
4206		i++;
4207	}
4208}
4209
4210/**
4211 * netdev_dma_event - event callback for the net_dma_client
4212 * @client: should always be net_dma_client
4213 * @chan: DMA channel for the event
4214 * @state: DMA state to be handled
4215 */
4216static enum dma_state_client
4217netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4218	enum dma_state state)
4219{
4220	int i, found = 0, pos = -1;
4221	struct net_dma *net_dma =
4222		container_of(client, struct net_dma, client);
4223	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4224
4225	spin_lock(&net_dma->lock);
4226	switch (state) {
4227	case DMA_RESOURCE_AVAILABLE:
4228		for (i = 0; i < NR_CPUS; i++)
4229			if (net_dma->channels[i] == chan) {
4230				found = 1;
4231				break;
4232			} else if (net_dma->channels[i] == NULL && pos < 0)
4233				pos = i;
4234
4235		if (!found && pos >= 0) {
4236			ack = DMA_ACK;
4237			net_dma->channels[pos] = chan;
4238			cpu_set(pos, net_dma->channel_mask);
4239			net_dma_rebalance(net_dma);
4240		}
4241		break;
4242	case DMA_RESOURCE_REMOVED:
4243		for (i = 0; i < NR_CPUS; i++)
4244			if (net_dma->channels[i] == chan) {
4245				found = 1;
4246				pos = i;
4247				break;
4248			}
4249
4250		if (found) {
4251			ack = DMA_ACK;
4252			cpu_clear(pos, net_dma->channel_mask);
4253			net_dma->channels[i] = NULL;
4254			net_dma_rebalance(net_dma);
4255		}
4256		break;
4257	default:
4258		break;
4259	}
4260	spin_unlock(&net_dma->lock);
4261
4262	return ack;
4263}
4264
4265/**
4266 * netdev_dma_regiser - register the networking subsystem as a DMA client
4267 */
4268static int __init netdev_dma_register(void)
4269{
4270	spin_lock_init(&net_dma.lock);
4271	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4272	dma_async_client_register(&net_dma.client);
4273	dma_async_client_chan_request(&net_dma.client);
4274	return 0;
4275}
4276
4277#else
4278static int __init netdev_dma_register(void) { return -ENODEV; }
4279#endif /* CONFIG_NET_DMA */
4280
4281/**
4282 *	netdev_compute_feature - compute conjunction of two feature sets
4283 *	@all: first feature set
4284 *	@one: second feature set
4285 *
4286 *	Computes a new feature set after adding a device with feature set
4287 *	@one to the master device with current feature set @all.  Returns
4288 *	the new feature set.
4289 */
4290int netdev_compute_features(unsigned long all, unsigned long one)
4291{
4292	/* if device needs checksumming, downgrade to hw checksumming */
4293	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4294		all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4295
4296	/* if device can't do all checksum, downgrade to ipv4/ipv6 */
4297	if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4298		all ^= NETIF_F_HW_CSUM
4299			| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4300
4301	if (one & NETIF_F_GSO)
4302		one |= NETIF_F_GSO_SOFTWARE;
4303	one |= NETIF_F_GSO;
4304
4305	/* If even one device supports robust GSO, enable it for all. */
4306	if (one & NETIF_F_GSO_ROBUST)
4307		all |= NETIF_F_GSO_ROBUST;
4308
4309	all &= one | NETIF_F_LLTX;
4310
4311	if (!(all & NETIF_F_ALL_CSUM))
4312		all &= ~NETIF_F_SG;
4313	if (!(all & NETIF_F_SG))
4314		all &= ~NETIF_F_GSO_MASK;
4315
4316	return all;
4317}
4318EXPORT_SYMBOL(netdev_compute_features);
4319
4320static struct hlist_head *netdev_create_hash(void)
4321{
4322	int i;
4323	struct hlist_head *hash;
4324
4325	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4326	if (hash != NULL)
4327		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4328			INIT_HLIST_HEAD(&hash[i]);
4329
4330	return hash;
4331}
4332
4333/* Initialize per network namespace state */
4334static int __net_init netdev_init(struct net *net)
4335{
4336	INIT_LIST_HEAD(&net->dev_base_head);
4337
4338	net->dev_name_head = netdev_create_hash();
4339	if (net->dev_name_head == NULL)
4340		goto err_name;
4341
4342	net->dev_index_head = netdev_create_hash();
4343	if (net->dev_index_head == NULL)
4344		goto err_idx;
4345
4346	return 0;
4347
4348err_idx:
4349	kfree(net->dev_name_head);
4350err_name:
4351	return -ENOMEM;
4352}
4353
4354static void __net_exit netdev_exit(struct net *net)
4355{
4356	kfree(net->dev_name_head);
4357	kfree(net->dev_index_head);
4358}
4359
4360static struct pernet_operations __net_initdata netdev_net_ops = {
4361	.init = netdev_init,
4362	.exit = netdev_exit,
4363};
4364
4365static void __net_exit default_device_exit(struct net *net)
4366{
4367	struct net_device *dev, *next;
4368	/*
4369	 * Push all migratable of the network devices back to the
4370	 * initial network namespace
4371	 */
4372	rtnl_lock();
4373	for_each_netdev_safe(net, dev, next) {
4374		int err;
4375
4376		/* Ignore unmoveable devices (i.e. loopback) */
4377		if (dev->features & NETIF_F_NETNS_LOCAL)
4378			continue;
4379
4380		/* Push remaing network devices to init_net */
4381		err = dev_change_net_namespace(dev, &init_net, "dev%d");
4382		if (err) {
4383			printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4384				__func__, dev->name, err);
4385			unregister_netdevice(dev);
4386		}
4387	}
4388	rtnl_unlock();
4389}
4390
4391static struct pernet_operations __net_initdata default_device_ops = {
4392	.exit = default_device_exit,
4393};
4394
4395/*
4396 *	Initialize the DEV module. At boot time this walks the device list and
4397 *	unhooks any devices that fail to initialise (normally hardware not
4398 *	present) and leaves us with a valid list of present and active devices.
4399 *
4400 */
4401
4402/*
4403 *       This is called single threaded during boot, so no need
4404 *       to take the rtnl semaphore.
4405 */
4406static int __init net_dev_init(void)
4407{
4408	int i, rc = -ENOMEM;
4409
4410	BUG_ON(!dev_boot_phase);
4411
4412	if (dev_proc_init())
4413		goto out;
4414
4415	if (netdev_kobject_init())
4416		goto out;
4417
4418	INIT_LIST_HEAD(&ptype_all);
4419	for (i = 0; i < 16; i++)
4420		INIT_LIST_HEAD(&ptype_base[i]);
4421
4422	if (register_pernet_subsys(&netdev_net_ops))
4423		goto out;
4424
4425	if (register_pernet_device(&default_device_ops))
4426		goto out;
4427
4428	/*
4429	 *	Initialise the packet receive queues.
4430	 */
4431
4432	for_each_possible_cpu(i) {
4433		struct softnet_data *queue;
4434
4435		queue = &per_cpu(softnet_data, i);
4436		skb_queue_head_init(&queue->input_pkt_queue);
4437		queue->completion_queue = NULL;
4438		INIT_LIST_HEAD(&queue->poll_list);
4439
4440		queue->backlog.poll = process_backlog;
4441		queue->backlog.weight = weight_p;
4442	}
4443
4444	netdev_dma_register();
4445
4446	dev_boot_phase = 0;
4447
4448	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4449	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4450
4451	hotcpu_notifier(dev_cpu_callback, 0);
4452	dst_init();
4453	dev_mcast_init();
4454	rc = 0;
4455out:
4456	return rc;
4457}
4458
4459subsys_initcall(net_dev_init);
4460
4461EXPORT_SYMBOL(__dev_get_by_index);
4462EXPORT_SYMBOL(__dev_get_by_name);
4463EXPORT_SYMBOL(__dev_remove_pack);
4464EXPORT_SYMBOL(dev_valid_name);
4465EXPORT_SYMBOL(dev_add_pack);
4466EXPORT_SYMBOL(dev_alloc_name);
4467EXPORT_SYMBOL(dev_close);
4468EXPORT_SYMBOL(dev_get_by_flags);
4469EXPORT_SYMBOL(dev_get_by_index);
4470EXPORT_SYMBOL(dev_get_by_name);
4471EXPORT_SYMBOL(dev_open);
4472EXPORT_SYMBOL(dev_queue_xmit);
4473EXPORT_SYMBOL(dev_remove_pack);
4474EXPORT_SYMBOL(dev_set_allmulti);
4475EXPORT_SYMBOL(dev_set_promiscuity);
4476EXPORT_SYMBOL(dev_change_flags);
4477EXPORT_SYMBOL(dev_set_mtu);
4478EXPORT_SYMBOL(dev_set_mac_address);
4479EXPORT_SYMBOL(free_netdev);
4480EXPORT_SYMBOL(netdev_boot_setup_check);
4481EXPORT_SYMBOL(netdev_set_master);
4482EXPORT_SYMBOL(netdev_state_change);
4483EXPORT_SYMBOL(netif_receive_skb);
4484EXPORT_SYMBOL(netif_rx);
4485EXPORT_SYMBOL(register_gifconf);
4486EXPORT_SYMBOL(register_netdevice);
4487EXPORT_SYMBOL(register_netdevice_notifier);
4488EXPORT_SYMBOL(skb_checksum_help);
4489EXPORT_SYMBOL(synchronize_net);
4490EXPORT_SYMBOL(unregister_netdevice);
4491EXPORT_SYMBOL(unregister_netdevice_notifier);
4492EXPORT_SYMBOL(net_enable_timestamp);
4493EXPORT_SYMBOL(net_disable_timestamp);
4494EXPORT_SYMBOL(dev_get_flags);
4495
4496#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4497EXPORT_SYMBOL(br_handle_frame_hook);
4498EXPORT_SYMBOL(br_fdb_get_hook);
4499EXPORT_SYMBOL(br_fdb_put_hook);
4500#endif
4501
4502#ifdef CONFIG_KMOD
4503EXPORT_SYMBOL(dev_load);
4504#endif
4505
4506EXPORT_PER_CPU_SYMBOL(softnet_data);