net/core/dev.c at v2.6.12-rc3 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / net / core / dev.c
at v2.6.12-rc3 3359 lines 83 kB view raw
   1/*
   2 * 	NET3	Protocol independent device support routines.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 *	Derived from the non IP parts of dev.c 1.0.19
  10 * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *	Additional Authors:
  15 *		Florian la Roche <rzsfl@rz.uni-sb.de>
  16 *		Alan Cox <gw4pts@gw4pts.ampr.org>
  17 *		David Hinds <dahinds@users.sourceforge.net>
  18 *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19 *		Adam Sulmicki <adam@cfar.umd.edu>
  20 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21 *
  22 *	Changes:
  23 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24 *              			to 2 if register_netdev gets called
  25 *              			before net_dev_init & also removed a
  26 *              			few lines of code in the process.
  27 *		Alan Cox	:	device private ioctl copies fields back.
  28 *		Alan Cox	:	Transmit queue code does relevant
  29 *					stunts to keep the queue safe.
  30 *		Alan Cox	:	Fixed double lock.
  31 *		Alan Cox	:	Fixed promisc NULL pointer trap
  32 *		????????	:	Support the full private ioctl range
  33 *		Alan Cox	:	Moved ioctl permission check into
  34 *					drivers
  35 *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  36 *		Alan Cox	:	100 backlog just doesn't cut it when
  37 *					you start doing multicast video 8)
  38 *		Alan Cox	:	Rewrote net_bh and list manager.
  39 *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
  40 *		Alan Cox	:	Took out transmit every packet pass
  41 *					Saved a few bytes in the ioctl handler
  42 *		Alan Cox	:	Network driver sets packet type before
  43 *					calling netif_rx. Saves a function
  44 *					call a packet.
  45 *		Alan Cox	:	Hashed net_bh()
  46 *		Richard Kooijman:	Timestamp fixes.
  47 *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  48 *		Alan Cox	:	Device lock protection.
  49 *		Alan Cox	: 	Fixed nasty side effect of device close
  50 *					changes.
  51 *		Rudi Cilibrasi	:	Pass the right thing to
  52 *					set_mac_address()
  53 *		Dave Miller	:	32bit quantity for the device lock to
  54 *					make it work out on a Sparc.
  55 *		Bjorn Ekwall	:	Added KERNELD hack.
  56 *		Alan Cox	:	Cleaned up the backlog initialise.
  57 *		Craig Metz	:	SIOCGIFCONF fix if space for under
  58 *					1 device.
  59 *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  60 *					is no device open function.
  61 *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  62 *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  63 *		Cyrus Durgin	:	Cleaned for KMOD
  64 *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  65 *					A network device unload needs to purge
  66 *					the backlog queue.
  67 *	Paul Rusty Russell	:	SIOCSIFNAME
  68 *              Pekka Riikonen  :	Netdev boot-time settings code
  69 *              Andrew Morton   :       Make unregister_netdevice wait
  70 *              			indefinitely on dev->refcnt
  71 * 		J Hadi Salim	:	- Backlog queue sampling
  72 *				        - netif_rx() feedback
  73 */
  74
  75#include <asm/uaccess.h>
  76#include <asm/system.h>
  77#include <linux/bitops.h>
  78#include <linux/config.h>
  79#include <linux/cpu.h>
  80#include <linux/types.h>
  81#include <linux/kernel.h>
  82#include <linux/sched.h>
  83#include <linux/string.h>
  84#include <linux/mm.h>
  85#include <linux/socket.h>
  86#include <linux/sockios.h>
  87#include <linux/errno.h>
  88#include <linux/interrupt.h>
  89#include <linux/if_ether.h>
  90#include <linux/netdevice.h>
  91#include <linux/etherdevice.h>
  92#include <linux/notifier.h>
  93#include <linux/skbuff.h>
  94#include <net/sock.h>
  95#include <linux/rtnetlink.h>
  96#include <linux/proc_fs.h>
  97#include <linux/seq_file.h>
  98#include <linux/stat.h>
  99#include <linux/if_bridge.h>
 100#include <linux/divert.h>
 101#include <net/dst.h>
 102#include <net/pkt_sched.h>
 103#include <net/checksum.h>
 104#include <linux/highmem.h>
 105#include <linux/init.h>
 106#include <linux/kmod.h>
 107#include <linux/module.h>
 108#include <linux/kallsyms.h>
 109#include <linux/netpoll.h>
 110#include <linux/rcupdate.h>
 111#include <linux/delay.h>
 112#ifdef CONFIG_NET_RADIO
 113#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
 114#include <net/iw_handler.h>
 115#endif	/* CONFIG_NET_RADIO */
 116#include <asm/current.h>
 117
 118/* This define, if set, will randomly drop a packet when congestion
 119 * is more than moderate.  It helps fairness in the multi-interface
 120 * case when one of them is a hog, but it kills performance for the
 121 * single interface case so it is off now by default.
 122 */
 123#undef RAND_LIE
 124
 125/* Setting this will sample the queue lengths and thus congestion
 126 * via a timer instead of as each packet is received.
 127 */
 128#undef OFFLINE_SAMPLE
 129
 130/*
 131 *	The list of packet types we will receive (as opposed to discard)
 132 *	and the routines to invoke.
 133 *
 134 *	Why 16. Because with 16 the only overlap we get on a hash of the
 135 *	low nibble of the protocol value is RARP/SNAP/X.25.
 136 *
 137 *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 138 *             sure which should go first, but I bet it won't make much
 139 *             difference if we are running VLANs.  The good news is that
 140 *             this protocol won't be in the list unless compiled in, so
 141 *             the average user (w/out VLANs) will not be adversly affected.
 142 *             --BLG
 143 *
 144 *		0800	IP
 145 *		8100    802.1Q VLAN
 146 *		0001	802.3
 147 *		0002	AX.25
 148 *		0004	802.2
 149 *		8035	RARP
 150 *		0005	SNAP
 151 *		0805	X.25
 152 *		0806	ARP
 153 *		8137	IPX
 154 *		0009	Localtalk
 155 *		86DD	IPv6
 156 */
 157
 158static DEFINE_SPINLOCK(ptype_lock);
 159static struct list_head ptype_base[16];	/* 16 way hashed list */
 160static struct list_head ptype_all;		/* Taps */
 161
 162#ifdef OFFLINE_SAMPLE
 163static void sample_queue(unsigned long dummy);
 164static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
 165#endif
 166
 167/*
 168 * The @dev_base list is protected by @dev_base_lock and the rtln
 169 * semaphore.
 170 *
 171 * Pure readers hold dev_base_lock for reading.
 172 *
 173 * Writers must hold the rtnl semaphore while they loop through the
 174 * dev_base list, and hold dev_base_lock for writing when they do the
 175 * actual updates.  This allows pure readers to access the list even
 176 * while a writer is preparing to update it.
 177 *
 178 * To put it another way, dev_base_lock is held for writing only to
 179 * protect against pure readers; the rtnl semaphore provides the
 180 * protection against other writers.
 181 *
 182 * See, for example usages, register_netdevice() and
 183 * unregister_netdevice(), which must be called with the rtnl
 184 * semaphore held.
 185 */
 186struct net_device *dev_base;
 187static struct net_device **dev_tail = &dev_base;
 188DEFINE_RWLOCK(dev_base_lock);
 189
 190EXPORT_SYMBOL(dev_base);
 191EXPORT_SYMBOL(dev_base_lock);
 192
 193#define NETDEV_HASHBITS	8
 194static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 195static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 196
 197static inline struct hlist_head *dev_name_hash(const char *name)
 198{
 199	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 200	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 201}
 202
 203static inline struct hlist_head *dev_index_hash(int ifindex)
 204{
 205	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 206}
 207
 208/*
 209 *	Our notifier list
 210 */
 211
 212static struct notifier_block *netdev_chain;
 213
 214/*
 215 *	Device drivers call our routines to queue packets here. We empty the
 216 *	queue in the local softnet handler.
 217 */
 218DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
 219
 220#ifdef CONFIG_SYSFS
 221extern int netdev_sysfs_init(void);
 222extern int netdev_register_sysfs(struct net_device *);
 223extern void netdev_unregister_sysfs(struct net_device *);
 224#else
 225#define netdev_sysfs_init()	 	(0)
 226#define netdev_register_sysfs(dev)	(0)
 227#define	netdev_unregister_sysfs(dev)	do { } while(0)
 228#endif
 229
 230
 231/*******************************************************************************
 232
 233		Protocol management and registration routines
 234
 235*******************************************************************************/
 236
 237/*
 238 *	For efficiency
 239 */
 240
 241int netdev_nit;
 242
 243/*
 244 *	Add a protocol ID to the list. Now that the input handler is
 245 *	smarter we can dispense with all the messy stuff that used to be
 246 *	here.
 247 *
 248 *	BEWARE!!! Protocol handlers, mangling input packets,
 249 *	MUST BE last in hash buckets and checking protocol handlers
 250 *	MUST start from promiscuous ptype_all chain in net_bh.
 251 *	It is true now, do not change it.
 252 *	Explanation follows: if protocol handler, mangling packet, will
 253 *	be the first on list, it is not able to sense, that packet
 254 *	is cloned and should be copied-on-write, so that it will
 255 *	change it and subsequent readers will get broken packet.
 256 *							--ANK (980803)
 257 */
 258
 259/**
 260 *	dev_add_pack - add packet handler
 261 *	@pt: packet type declaration
 262 *
 263 *	Add a protocol handler to the networking stack. The passed &packet_type
 264 *	is linked into kernel lists and may not be freed until it has been
 265 *	removed from the kernel lists.
 266 *
 267 *	This call does not sleep therefore it can not 
 268 *	guarantee all CPU's that are in middle of receiving packets
 269 *	will see the new packet type (until the next received packet).
 270 */
 271
 272void dev_add_pack(struct packet_type *pt)
 273{
 274	int hash;
 275
 276	spin_lock_bh(&ptype_lock);
 277	if (pt->type == htons(ETH_P_ALL)) {
 278		netdev_nit++;
 279		list_add_rcu(&pt->list, &ptype_all);
 280	} else {
 281		hash = ntohs(pt->type) & 15;
 282		list_add_rcu(&pt->list, &ptype_base[hash]);
 283	}
 284	spin_unlock_bh(&ptype_lock);
 285}
 286
 287extern void linkwatch_run_queue(void);
 288
 289
 290
 291/**
 292 *	__dev_remove_pack	 - remove packet handler
 293 *	@pt: packet type declaration
 294 *
 295 *	Remove a protocol handler that was previously added to the kernel
 296 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 297 *	from the kernel lists and can be freed or reused once this function
 298 *	returns. 
 299 *
 300 *      The packet type might still be in use by receivers
 301 *	and must not be freed until after all the CPU's have gone
 302 *	through a quiescent state.
 303 */
 304void __dev_remove_pack(struct packet_type *pt)
 305{
 306	struct list_head *head;
 307	struct packet_type *pt1;
 308
 309	spin_lock_bh(&ptype_lock);
 310
 311	if (pt->type == htons(ETH_P_ALL)) {
 312		netdev_nit--;
 313		head = &ptype_all;
 314	} else
 315		head = &ptype_base[ntohs(pt->type) & 15];
 316
 317	list_for_each_entry(pt1, head, list) {
 318		if (pt == pt1) {
 319			list_del_rcu(&pt->list);
 320			goto out;
 321		}
 322	}
 323
 324	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 325out:
 326	spin_unlock_bh(&ptype_lock);
 327}
 328/**
 329 *	dev_remove_pack	 - remove packet handler
 330 *	@pt: packet type declaration
 331 *
 332 *	Remove a protocol handler that was previously added to the kernel
 333 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 334 *	from the kernel lists and can be freed or reused once this function
 335 *	returns.
 336 *
 337 *	This call sleeps to guarantee that no CPU is looking at the packet
 338 *	type after return.
 339 */
 340void dev_remove_pack(struct packet_type *pt)
 341{
 342	__dev_remove_pack(pt);
 343	
 344	synchronize_net();
 345}
 346
 347/******************************************************************************
 348
 349		      Device Boot-time Settings Routines
 350
 351*******************************************************************************/
 352
 353/* Boot time configuration table */
 354static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 355
 356/**
 357 *	netdev_boot_setup_add	- add new setup entry
 358 *	@name: name of the device
 359 *	@map: configured settings for the device
 360 *
 361 *	Adds new setup entry to the dev_boot_setup list.  The function
 362 *	returns 0 on error and 1 on success.  This is a generic routine to
 363 *	all netdevices.
 364 */
 365static int netdev_boot_setup_add(char *name, struct ifmap *map)
 366{
 367	struct netdev_boot_setup *s;
 368	int i;
 369
 370	s = dev_boot_setup;
 371	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 372		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 373			memset(s[i].name, 0, sizeof(s[i].name));
 374			strcpy(s[i].name, name);
 375			memcpy(&s[i].map, map, sizeof(s[i].map));
 376			break;
 377		}
 378	}
 379
 380	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 381}
 382
 383/**
 384 *	netdev_boot_setup_check	- check boot time settings
 385 *	@dev: the netdevice
 386 *
 387 * 	Check boot time settings for the device.
 388 *	The found settings are set for the device to be used
 389 *	later in the device probing.
 390 *	Returns 0 if no settings found, 1 if they are.
 391 */
 392int netdev_boot_setup_check(struct net_device *dev)
 393{
 394	struct netdev_boot_setup *s = dev_boot_setup;
 395	int i;
 396
 397	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 398		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 399		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 400			dev->irq 	= s[i].map.irq;
 401			dev->base_addr 	= s[i].map.base_addr;
 402			dev->mem_start 	= s[i].map.mem_start;
 403			dev->mem_end 	= s[i].map.mem_end;
 404			return 1;
 405		}
 406	}
 407	return 0;
 408}
 409
 410
 411/**
 412 *	netdev_boot_base	- get address from boot time settings
 413 *	@prefix: prefix for network device
 414 *	@unit: id for network device
 415 *
 416 * 	Check boot time settings for the base address of device.
 417 *	The found settings are set for the device to be used
 418 *	later in the device probing.
 419 *	Returns 0 if no settings found.
 420 */
 421unsigned long netdev_boot_base(const char *prefix, int unit)
 422{
 423	const struct netdev_boot_setup *s = dev_boot_setup;
 424	char name[IFNAMSIZ];
 425	int i;
 426
 427	sprintf(name, "%s%d", prefix, unit);
 428
 429	/*
 430	 * If device already registered then return base of 1
 431	 * to indicate not to probe for this interface
 432	 */
 433	if (__dev_get_by_name(name))
 434		return 1;
 435
 436	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 437		if (!strcmp(name, s[i].name))
 438			return s[i].map.base_addr;
 439	return 0;
 440}
 441
 442/*
 443 * Saves at boot time configured settings for any netdevice.
 444 */
 445int __init netdev_boot_setup(char *str)
 446{
 447	int ints[5];
 448	struct ifmap map;
 449
 450	str = get_options(str, ARRAY_SIZE(ints), ints);
 451	if (!str || !*str)
 452		return 0;
 453
 454	/* Save settings */
 455	memset(&map, 0, sizeof(map));
 456	if (ints[0] > 0)
 457		map.irq = ints[1];
 458	if (ints[0] > 1)
 459		map.base_addr = ints[2];
 460	if (ints[0] > 2)
 461		map.mem_start = ints[3];
 462	if (ints[0] > 3)
 463		map.mem_end = ints[4];
 464
 465	/* Add new entry to the list */
 466	return netdev_boot_setup_add(str, &map);
 467}
 468
 469__setup("netdev=", netdev_boot_setup);
 470
 471/*******************************************************************************
 472
 473			    Device Interface Subroutines
 474
 475*******************************************************************************/
 476
 477/**
 478 *	__dev_get_by_name	- find a device by its name
 479 *	@name: name to find
 480 *
 481 *	Find an interface by name. Must be called under RTNL semaphore
 482 *	or @dev_base_lock. If the name is found a pointer to the device
 483 *	is returned. If the name is not found then %NULL is returned. The
 484 *	reference counters are not incremented so the caller must be
 485 *	careful with locks.
 486 */
 487
 488struct net_device *__dev_get_by_name(const char *name)
 489{
 490	struct hlist_node *p;
 491
 492	hlist_for_each(p, dev_name_hash(name)) {
 493		struct net_device *dev
 494			= hlist_entry(p, struct net_device, name_hlist);
 495		if (!strncmp(dev->name, name, IFNAMSIZ))
 496			return dev;
 497	}
 498	return NULL;
 499}
 500
 501/**
 502 *	dev_get_by_name		- find a device by its name
 503 *	@name: name to find
 504 *
 505 *	Find an interface by name. This can be called from any
 506 *	context and does its own locking. The returned handle has
 507 *	the usage count incremented and the caller must use dev_put() to
 508 *	release it when it is no longer needed. %NULL is returned if no
 509 *	matching device is found.
 510 */
 511
 512struct net_device *dev_get_by_name(const char *name)
 513{
 514	struct net_device *dev;
 515
 516	read_lock(&dev_base_lock);
 517	dev = __dev_get_by_name(name);
 518	if (dev)
 519		dev_hold(dev);
 520	read_unlock(&dev_base_lock);
 521	return dev;
 522}
 523
 524/**
 525 *	__dev_get_by_index - find a device by its ifindex
 526 *	@ifindex: index of device
 527 *
 528 *	Search for an interface by index. Returns %NULL if the device
 529 *	is not found or a pointer to the device. The device has not
 530 *	had its reference counter increased so the caller must be careful
 531 *	about locking. The caller must hold either the RTNL semaphore
 532 *	or @dev_base_lock.
 533 */
 534
 535struct net_device *__dev_get_by_index(int ifindex)
 536{
 537	struct hlist_node *p;
 538
 539	hlist_for_each(p, dev_index_hash(ifindex)) {
 540		struct net_device *dev
 541			= hlist_entry(p, struct net_device, index_hlist);
 542		if (dev->ifindex == ifindex)
 543			return dev;
 544	}
 545	return NULL;
 546}
 547
 548
 549/**
 550 *	dev_get_by_index - find a device by its ifindex
 551 *	@ifindex: index of device
 552 *
 553 *	Search for an interface by index. Returns NULL if the device
 554 *	is not found or a pointer to the device. The device returned has
 555 *	had a reference added and the pointer is safe until the user calls
 556 *	dev_put to indicate they have finished with it.
 557 */
 558
 559struct net_device *dev_get_by_index(int ifindex)
 560{
 561	struct net_device *dev;
 562
 563	read_lock(&dev_base_lock);
 564	dev = __dev_get_by_index(ifindex);
 565	if (dev)
 566		dev_hold(dev);
 567	read_unlock(&dev_base_lock);
 568	return dev;
 569}
 570
 571/**
 572 *	dev_getbyhwaddr - find a device by its hardware address
 573 *	@type: media type of device
 574 *	@ha: hardware address
 575 *
 576 *	Search for an interface by MAC address. Returns NULL if the device
 577 *	is not found or a pointer to the device. The caller must hold the
 578 *	rtnl semaphore. The returned device has not had its ref count increased
 579 *	and the caller must therefore be careful about locking
 580 *
 581 *	BUGS:
 582 *	If the API was consistent this would be __dev_get_by_hwaddr
 583 */
 584
 585struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 586{
 587	struct net_device *dev;
 588
 589	ASSERT_RTNL();
 590
 591	for (dev = dev_base; dev; dev = dev->next)
 592		if (dev->type == type &&
 593		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 594			break;
 595	return dev;
 596}
 597
 598struct net_device *dev_getfirstbyhwtype(unsigned short type)
 599{
 600	struct net_device *dev;
 601
 602	rtnl_lock();
 603	for (dev = dev_base; dev; dev = dev->next) {
 604		if (dev->type == type) {
 605			dev_hold(dev);
 606			break;
 607		}
 608	}
 609	rtnl_unlock();
 610	return dev;
 611}
 612
 613EXPORT_SYMBOL(dev_getfirstbyhwtype);
 614
 615/**
 616 *	dev_get_by_flags - find any device with given flags
 617 *	@if_flags: IFF_* values
 618 *	@mask: bitmask of bits in if_flags to check
 619 *
 620 *	Search for any interface with the given flags. Returns NULL if a device
 621 *	is not found or a pointer to the device. The device returned has 
 622 *	had a reference added and the pointer is safe until the user calls
 623 *	dev_put to indicate they have finished with it.
 624 */
 625
 626struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 627{
 628	struct net_device *dev;
 629
 630	read_lock(&dev_base_lock);
 631	for (dev = dev_base; dev != NULL; dev = dev->next) {
 632		if (((dev->flags ^ if_flags) & mask) == 0) {
 633			dev_hold(dev);
 634			break;
 635		}
 636	}
 637	read_unlock(&dev_base_lock);
 638	return dev;
 639}
 640
 641/**
 642 *	dev_valid_name - check if name is okay for network device
 643 *	@name: name string
 644 *
 645 *	Network device names need to be valid file names to
 646 *	to allow sysfs to work
 647 */
 648static int dev_valid_name(const char *name)
 649{
 650	return !(*name == '\0' 
 651		 || !strcmp(name, ".")
 652		 || !strcmp(name, "..")
 653		 || strchr(name, '/'));
 654}
 655
 656/**
 657 *	dev_alloc_name - allocate a name for a device
 658 *	@dev: device
 659 *	@name: name format string
 660 *
 661 *	Passed a format string - eg "lt%d" it will try and find a suitable
 662 *	id. Not efficient for many devices, not called a lot. The caller
 663 *	must hold the dev_base or rtnl lock while allocating the name and
 664 *	adding the device in order to avoid duplicates. Returns the number
 665 *	of the unit assigned or a negative errno code.
 666 */
 667
 668int dev_alloc_name(struct net_device *dev, const char *name)
 669{
 670	int i = 0;
 671	char buf[IFNAMSIZ];
 672	const char *p;
 673	const int max_netdevices = 8*PAGE_SIZE;
 674	long *inuse;
 675	struct net_device *d;
 676
 677	p = strnchr(name, IFNAMSIZ-1, '%');
 678	if (p) {
 679		/*
 680		 * Verify the string as this thing may have come from
 681		 * the user.  There must be either one "%d" and no other "%"
 682		 * characters.
 683		 */
 684		if (p[1] != 'd' || strchr(p + 2, '%'))
 685			return -EINVAL;
 686
 687		/* Use one page as a bit array of possible slots */
 688		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 689		if (!inuse)
 690			return -ENOMEM;
 691
 692		for (d = dev_base; d; d = d->next) {
 693			if (!sscanf(d->name, name, &i))
 694				continue;
 695			if (i < 0 || i >= max_netdevices)
 696				continue;
 697
 698			/*  avoid cases where sscanf is not exact inverse of printf */
 699			snprintf(buf, sizeof(buf), name, i);
 700			if (!strncmp(buf, d->name, IFNAMSIZ))
 701				set_bit(i, inuse);
 702		}
 703
 704		i = find_first_zero_bit(inuse, max_netdevices);
 705		free_page((unsigned long) inuse);
 706	}
 707
 708	snprintf(buf, sizeof(buf), name, i);
 709	if (!__dev_get_by_name(buf)) {
 710		strlcpy(dev->name, buf, IFNAMSIZ);
 711		return i;
 712	}
 713
 714	/* It is possible to run out of possible slots
 715	 * when the name is long and there isn't enough space left
 716	 * for the digits, or if all bits are used.
 717	 */
 718	return -ENFILE;
 719}
 720
 721
 722/**
 723 *	dev_change_name - change name of a device
 724 *	@dev: device
 725 *	@newname: name (or format string) must be at least IFNAMSIZ
 726 *
 727 *	Change name of a device, can pass format strings "eth%d".
 728 *	for wildcarding.
 729 */
 730int dev_change_name(struct net_device *dev, char *newname)
 731{
 732	int err = 0;
 733
 734	ASSERT_RTNL();
 735
 736	if (dev->flags & IFF_UP)
 737		return -EBUSY;
 738
 739	if (!dev_valid_name(newname))
 740		return -EINVAL;
 741
 742	if (strchr(newname, '%')) {
 743		err = dev_alloc_name(dev, newname);
 744		if (err < 0)
 745			return err;
 746		strcpy(newname, dev->name);
 747	}
 748	else if (__dev_get_by_name(newname))
 749		return -EEXIST;
 750	else
 751		strlcpy(dev->name, newname, IFNAMSIZ);
 752
 753	err = class_device_rename(&dev->class_dev, dev->name);
 754	if (!err) {
 755		hlist_del(&dev->name_hlist);
 756		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 757		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 758	}
 759
 760	return err;
 761}
 762
 763/**
 764 *	netdev_state_change - device changes state
 765 *	@dev: device to cause notification
 766 *
 767 *	Called to indicate a device has changed state. This function calls
 768 *	the notifier chains for netdev_chain and sends a NEWLINK message
 769 *	to the routing socket.
 770 */
 771void netdev_state_change(struct net_device *dev)
 772{
 773	if (dev->flags & IFF_UP) {
 774		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
 775		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 776	}
 777}
 778
 779/**
 780 *	dev_load 	- load a network module
 781 *	@name: name of interface
 782 *
 783 *	If a network interface is not present and the process has suitable
 784 *	privileges this function loads the module. If module loading is not
 785 *	available in this kernel then it becomes a nop.
 786 */
 787
 788void dev_load(const char *name)
 789{
 790	struct net_device *dev;  
 791
 792	read_lock(&dev_base_lock);
 793	dev = __dev_get_by_name(name);
 794	read_unlock(&dev_base_lock);
 795
 796	if (!dev && capable(CAP_SYS_MODULE))
 797		request_module("%s", name);
 798}
 799
 800static int default_rebuild_header(struct sk_buff *skb)
 801{
 802	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 803	       skb->dev ? skb->dev->name : "NULL!!!");
 804	kfree_skb(skb);
 805	return 1;
 806}
 807
 808
 809/**
 810 *	dev_open	- prepare an interface for use.
 811 *	@dev:	device to open
 812 *
 813 *	Takes a device from down to up state. The device's private open
 814 *	function is invoked and then the multicast lists are loaded. Finally
 815 *	the device is moved into the up state and a %NETDEV_UP message is
 816 *	sent to the netdev notifier chain.
 817 *
 818 *	Calling this function on an active interface is a nop. On a failure
 819 *	a negative errno code is returned.
 820 */
 821int dev_open(struct net_device *dev)
 822{
 823	int ret = 0;
 824
 825	/*
 826	 *	Is it already up?
 827	 */
 828
 829	if (dev->flags & IFF_UP)
 830		return 0;
 831
 832	/*
 833	 *	Is it even present?
 834	 */
 835	if (!netif_device_present(dev))
 836		return -ENODEV;
 837
 838	/*
 839	 *	Call device private open method
 840	 */
 841	set_bit(__LINK_STATE_START, &dev->state);
 842	if (dev->open) {
 843		ret = dev->open(dev);
 844		if (ret)
 845			clear_bit(__LINK_STATE_START, &dev->state);
 846	}
 847
 848 	/*
 849	 *	If it went open OK then:
 850	 */
 851
 852	if (!ret) {
 853		/*
 854		 *	Set the flags.
 855		 */
 856		dev->flags |= IFF_UP;
 857
 858		/*
 859		 *	Initialize multicasting status
 860		 */
 861		dev_mc_upload(dev);
 862
 863		/*
 864		 *	Wakeup transmit queue engine
 865		 */
 866		dev_activate(dev);
 867
 868		/*
 869		 *	... and announce new interface.
 870		 */
 871		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 872	}
 873	return ret;
 874}
 875
 876/**
 877 *	dev_close - shutdown an interface.
 878 *	@dev: device to shutdown
 879 *
 880 *	This function moves an active device into down state. A
 881 *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 882 *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 883 *	chain.
 884 */
 885int dev_close(struct net_device *dev)
 886{
 887	if (!(dev->flags & IFF_UP))
 888		return 0;
 889
 890	/*
 891	 *	Tell people we are going down, so that they can
 892	 *	prepare to death, when device is still operating.
 893	 */
 894	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 895
 896	dev_deactivate(dev);
 897
 898	clear_bit(__LINK_STATE_START, &dev->state);
 899
 900	/* Synchronize to scheduled poll. We cannot touch poll list,
 901	 * it can be even on different cpu. So just clear netif_running(),
 902	 * and wait when poll really will happen. Actually, the best place
 903	 * for this is inside dev->stop() after device stopped its irq
 904	 * engine, but this requires more changes in devices. */
 905
 906	smp_mb__after_clear_bit(); /* Commit netif_running(). */
 907	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 908		/* No hurry. */
 909		current->state = TASK_INTERRUPTIBLE;
 910		schedule_timeout(1);
 911	}
 912
 913	/*
 914	 *	Call the device specific close. This cannot fail.
 915	 *	Only if device is UP
 916	 *
 917	 *	We allow it to be called even after a DETACH hot-plug
 918	 *	event.
 919	 */
 920	if (dev->stop)
 921		dev->stop(dev);
 922
 923	/*
 924	 *	Device is now down.
 925	 */
 926
 927	dev->flags &= ~IFF_UP;
 928
 929	/*
 930	 * Tell people we are down
 931	 */
 932	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 933
 934	return 0;
 935}
 936
 937
 938/*
 939 *	Device change register/unregister. These are not inline or static
 940 *	as we export them to the world.
 941 */
 942
 943/**
 944 *	register_netdevice_notifier - register a network notifier block
 945 *	@nb: notifier
 946 *
 947 *	Register a notifier to be called when network device events occur.
 948 *	The notifier passed is linked into the kernel structures and must
 949 *	not be reused until it has been unregistered. A negative errno code
 950 *	is returned on a failure.
 951 *
 952 * 	When registered all registration and up events are replayed
 953 *	to the new notifier to allow device to have a race free 
 954 *	view of the network device list.
 955 */
 956
 957int register_netdevice_notifier(struct notifier_block *nb)
 958{
 959	struct net_device *dev;
 960	int err;
 961
 962	rtnl_lock();
 963	err = notifier_chain_register(&netdev_chain, nb);
 964	if (!err) {
 965		for (dev = dev_base; dev; dev = dev->next) {
 966			nb->notifier_call(nb, NETDEV_REGISTER, dev);
 967
 968			if (dev->flags & IFF_UP) 
 969				nb->notifier_call(nb, NETDEV_UP, dev);
 970		}
 971	}
 972	rtnl_unlock();
 973	return err;
 974}
 975
 976/**
 977 *	unregister_netdevice_notifier - unregister a network notifier block
 978 *	@nb: notifier
 979 *
 980 *	Unregister a notifier previously registered by
 981 *	register_netdevice_notifier(). The notifier is unlinked into the
 982 *	kernel structures and may then be reused. A negative errno code
 983 *	is returned on a failure.
 984 */
 985
 986int unregister_netdevice_notifier(struct notifier_block *nb)
 987{
 988	return notifier_chain_unregister(&netdev_chain, nb);
 989}
 990
 991/**
 992 *	call_netdevice_notifiers - call all network notifier blocks
 993 *      @val: value passed unmodified to notifier function
 994 *      @v:   pointer passed unmodified to notifier function
 995 *
 996 *	Call all network notifier blocks.  Parameters and return value
 997 *	are as for notifier_call_chain().
 998 */
 999
1000int call_netdevice_notifiers(unsigned long val, void *v)
1001{
1002	return notifier_call_chain(&netdev_chain, val, v);
1003}
1004
1005/* When > 0 there are consumers of rx skb time stamps */
1006static atomic_t netstamp_needed = ATOMIC_INIT(0);
1007
1008void net_enable_timestamp(void)
1009{
1010	atomic_inc(&netstamp_needed);
1011}
1012
1013void net_disable_timestamp(void)
1014{
1015	atomic_dec(&netstamp_needed);
1016}
1017
1018static inline void net_timestamp(struct timeval *stamp)
1019{
1020	if (atomic_read(&netstamp_needed))
1021		do_gettimeofday(stamp);
1022	else {
1023		stamp->tv_sec = 0;
1024		stamp->tv_usec = 0;
1025	}
1026}
1027
1028/*
1029 *	Support routine. Sends outgoing frames to any network
1030 *	taps currently in use.
1031 */
1032
1033void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1034{
1035	struct packet_type *ptype;
1036	net_timestamp(&skb->stamp);
1037
1038	rcu_read_lock();
1039	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1040		/* Never send packets back to the socket
1041		 * they originated from - MvS (miquels@drinkel.ow.org)
1042		 */
1043		if ((ptype->dev == dev || !ptype->dev) &&
1044		    (ptype->af_packet_priv == NULL ||
1045		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1046			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1047			if (!skb2)
1048				break;
1049
1050			/* skb->nh should be correctly
1051			   set by sender, so that the second statement is
1052			   just protection against buggy protocols.
1053			 */
1054			skb2->mac.raw = skb2->data;
1055
1056			if (skb2->nh.raw < skb2->data ||
1057			    skb2->nh.raw > skb2->tail) {
1058				if (net_ratelimit())
1059					printk(KERN_CRIT "protocol %04x is "
1060					       "buggy, dev %s\n",
1061					       skb2->protocol, dev->name);
1062				skb2->nh.raw = skb2->data;
1063			}
1064
1065			skb2->h.raw = skb2->nh.raw;
1066			skb2->pkt_type = PACKET_OUTGOING;
1067			ptype->func(skb2, skb->dev, ptype);
1068		}
1069	}
1070	rcu_read_unlock();
1071}
1072
1073/*
1074 * Invalidate hardware checksum when packet is to be mangled, and
1075 * complete checksum manually on outgoing path.
1076 */
1077int skb_checksum_help(struct sk_buff *skb, int inward)
1078{
1079	unsigned int csum;
1080	int ret = 0, offset = skb->h.raw - skb->data;
1081
1082	if (inward) {
1083		skb->ip_summed = CHECKSUM_NONE;
1084		goto out;
1085	}
1086
1087	if (skb_cloned(skb)) {
1088		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1089		if (ret)
1090			goto out;
1091	}
1092
1093	if (offset > (int)skb->len)
1094		BUG();
1095	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1096
1097	offset = skb->tail - skb->h.raw;
1098	if (offset <= 0)
1099		BUG();
1100	if (skb->csum + 2 > offset)
1101		BUG();
1102
1103	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1104	skb->ip_summed = CHECKSUM_NONE;
1105out:	
1106	return ret;
1107}
1108
1109#ifdef CONFIG_HIGHMEM
1110/* Actually, we should eliminate this check as soon as we know, that:
1111 * 1. IOMMU is present and allows to map all the memory.
1112 * 2. No high memory really exists on this machine.
1113 */
1114
1115static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1116{
1117	int i;
1118
1119	if (dev->features & NETIF_F_HIGHDMA)
1120		return 0;
1121
1122	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1123		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1124			return 1;
1125
1126	return 0;
1127}
1128#else
1129#define illegal_highdma(dev, skb)	(0)
1130#endif
1131
1132extern void skb_release_data(struct sk_buff *);
1133
1134/* Keep head the same: replace data */
1135int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1136{
1137	unsigned int size;
1138	u8 *data;
1139	long offset;
1140	struct skb_shared_info *ninfo;
1141	int headerlen = skb->data - skb->head;
1142	int expand = (skb->tail + skb->data_len) - skb->end;
1143
1144	if (skb_shared(skb))
1145		BUG();
1146
1147	if (expand <= 0)
1148		expand = 0;
1149
1150	size = skb->end - skb->head + expand;
1151	size = SKB_DATA_ALIGN(size);
1152	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1153	if (!data)
1154		return -ENOMEM;
1155
1156	/* Copy entire thing */
1157	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1158		BUG();
1159
1160	/* Set up shinfo */
1161	ninfo = (struct skb_shared_info*)(data + size);
1162	atomic_set(&ninfo->dataref, 1);
1163	ninfo->tso_size = skb_shinfo(skb)->tso_size;
1164	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1165	ninfo->nr_frags = 0;
1166	ninfo->frag_list = NULL;
1167
1168	/* Offset between the two in bytes */
1169	offset = data - skb->head;
1170
1171	/* Free old data. */
1172	skb_release_data(skb);
1173
1174	skb->head = data;
1175	skb->end  = data + size;
1176
1177	/* Set up new pointers */
1178	skb->h.raw   += offset;
1179	skb->nh.raw  += offset;
1180	skb->mac.raw += offset;
1181	skb->tail    += offset;
1182	skb->data    += offset;
1183
1184	/* We are no longer a clone, even if we were. */
1185	skb->cloned    = 0;
1186
1187	skb->tail     += skb->data_len;
1188	skb->data_len  = 0;
1189	return 0;
1190}
1191
1192#define HARD_TX_LOCK(dev, cpu) {			\
1193	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1194		spin_lock(&dev->xmit_lock);		\
1195		dev->xmit_lock_owner = cpu;		\
1196	}						\
1197}
1198
1199#define HARD_TX_UNLOCK(dev) {				\
1200	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1201		dev->xmit_lock_owner = -1;		\
1202		spin_unlock(&dev->xmit_lock);		\
1203	}						\
1204}
1205
1206/**
1207 *	dev_queue_xmit - transmit a buffer
1208 *	@skb: buffer to transmit
1209 *
1210 *	Queue a buffer for transmission to a network device. The caller must
1211 *	have set the device and priority and built the buffer before calling
1212 *	this function. The function can be called from an interrupt.
1213 *
1214 *	A negative errno code is returned on a failure. A success does not
1215 *	guarantee the frame will be transmitted as it may be dropped due
1216 *	to congestion or traffic shaping.
1217 */
1218
1219int dev_queue_xmit(struct sk_buff *skb)
1220{
1221	struct net_device *dev = skb->dev;
1222	struct Qdisc *q;
1223	int rc = -ENOMEM;
1224
1225	if (skb_shinfo(skb)->frag_list &&
1226	    !(dev->features & NETIF_F_FRAGLIST) &&
1227	    __skb_linearize(skb, GFP_ATOMIC))
1228		goto out_kfree_skb;
1229
1230	/* Fragmented skb is linearized if device does not support SG,
1231	 * or if at least one of fragments is in highmem and device
1232	 * does not support DMA from it.
1233	 */
1234	if (skb_shinfo(skb)->nr_frags &&
1235	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1236	    __skb_linearize(skb, GFP_ATOMIC))
1237		goto out_kfree_skb;
1238
1239	/* If packet is not checksummed and device does not support
1240	 * checksumming for this protocol, complete checksumming here.
1241	 */
1242	if (skb->ip_summed == CHECKSUM_HW &&
1243	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1244	     (!(dev->features & NETIF_F_IP_CSUM) ||
1245	      skb->protocol != htons(ETH_P_IP))))
1246	      	if (skb_checksum_help(skb, 0))
1247	      		goto out_kfree_skb;
1248
1249	/* Disable soft irqs for various locks below. Also 
1250	 * stops preemption for RCU. 
1251	 */
1252	local_bh_disable(); 
1253
1254	/* Updates of qdisc are serialized by queue_lock. 
1255	 * The struct Qdisc which is pointed to by qdisc is now a 
1256	 * rcu structure - it may be accessed without acquiring 
1257	 * a lock (but the structure may be stale.) The freeing of the
1258	 * qdisc will be deferred until it's known that there are no 
1259	 * more references to it.
1260	 * 
1261	 * If the qdisc has an enqueue function, we still need to 
1262	 * hold the queue_lock before calling it, since queue_lock
1263	 * also serializes access to the device queue.
1264	 */
1265
1266	q = rcu_dereference(dev->qdisc);
1267#ifdef CONFIG_NET_CLS_ACT
1268	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1269#endif
1270	if (q->enqueue) {
1271		/* Grab device queue */
1272		spin_lock(&dev->queue_lock);
1273
1274		rc = q->enqueue(skb, q);
1275
1276		qdisc_run(dev);
1277
1278		spin_unlock(&dev->queue_lock);
1279		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1280		goto out;
1281	}
1282
1283	/* The device has no queue. Common case for software devices:
1284	   loopback, all the sorts of tunnels...
1285
1286	   Really, it is unlikely that xmit_lock protection is necessary here.
1287	   (f.e. loopback and IP tunnels are clean ignoring statistics
1288	   counters.)
1289	   However, it is possible, that they rely on protection
1290	   made by us here.
1291
1292	   Check this and shot the lock. It is not prone from deadlocks.
1293	   Either shot noqueue qdisc, it is even simpler 8)
1294	 */
1295	if (dev->flags & IFF_UP) {
1296		int cpu = smp_processor_id(); /* ok because BHs are off */
1297
1298		if (dev->xmit_lock_owner != cpu) {
1299
1300			HARD_TX_LOCK(dev, cpu);
1301
1302			if (!netif_queue_stopped(dev)) {
1303				if (netdev_nit)
1304					dev_queue_xmit_nit(skb, dev);
1305
1306				rc = 0;
1307				if (!dev->hard_start_xmit(skb, dev)) {
1308					HARD_TX_UNLOCK(dev);
1309					goto out;
1310				}
1311			}
1312			HARD_TX_UNLOCK(dev);
1313			if (net_ratelimit())
1314				printk(KERN_CRIT "Virtual device %s asks to "
1315				       "queue packet!\n", dev->name);
1316		} else {
1317			/* Recursion is detected! It is possible,
1318			 * unfortunately */
1319			if (net_ratelimit())
1320				printk(KERN_CRIT "Dead loop on virtual device "
1321				       "%s, fix it urgently!\n", dev->name);
1322		}
1323	}
1324
1325	rc = -ENETDOWN;
1326	local_bh_enable();
1327
1328out_kfree_skb:
1329	kfree_skb(skb);
1330	return rc;
1331out:
1332	local_bh_enable();
1333	return rc;
1334}
1335
1336
1337/*=======================================================================
1338			Receiver routines
1339  =======================================================================*/
1340
1341int netdev_max_backlog = 300;
1342int weight_p = 64;            /* old backlog weight */
1343/* These numbers are selected based on intuition and some
1344 * experimentatiom, if you have more scientific way of doing this
1345 * please go ahead and fix things.
1346 */
1347int no_cong_thresh = 10;
1348int no_cong = 20;
1349int lo_cong = 100;
1350int mod_cong = 290;
1351
1352DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1353
1354
1355static void get_sample_stats(int cpu)
1356{
1357#ifdef RAND_LIE
1358	unsigned long rd;
1359	int rq;
1360#endif
1361	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1362	int blog = sd->input_pkt_queue.qlen;
1363	int avg_blog = sd->avg_blog;
1364
1365	avg_blog = (avg_blog >> 1) + (blog >> 1);
1366
1367	if (avg_blog > mod_cong) {
1368		/* Above moderate congestion levels. */
1369		sd->cng_level = NET_RX_CN_HIGH;
1370#ifdef RAND_LIE
1371		rd = net_random();
1372		rq = rd % netdev_max_backlog;
1373		if (rq < avg_blog) /* unlucky bastard */
1374			sd->cng_level = NET_RX_DROP;
1375#endif
1376	} else if (avg_blog > lo_cong) {
1377		sd->cng_level = NET_RX_CN_MOD;
1378#ifdef RAND_LIE
1379		rd = net_random();
1380		rq = rd % netdev_max_backlog;
1381			if (rq < avg_blog) /* unlucky bastard */
1382				sd->cng_level = NET_RX_CN_HIGH;
1383#endif
1384	} else if (avg_blog > no_cong)
1385		sd->cng_level = NET_RX_CN_LOW;
1386	else  /* no congestion */
1387		sd->cng_level = NET_RX_SUCCESS;
1388
1389	sd->avg_blog = avg_blog;
1390}
1391
1392#ifdef OFFLINE_SAMPLE
1393static void sample_queue(unsigned long dummy)
1394{
1395/* 10 ms 0r 1ms -- i don't care -- JHS */
1396	int next_tick = 1;
1397	int cpu = smp_processor_id();
1398
1399	get_sample_stats(cpu);
1400	next_tick += jiffies;
1401	mod_timer(&samp_timer, next_tick);
1402}
1403#endif
1404
1405
1406/**
1407 *	netif_rx	-	post buffer to the network code
1408 *	@skb: buffer to post
1409 *
1410 *	This function receives a packet from a device driver and queues it for
1411 *	the upper (protocol) levels to process.  It always succeeds. The buffer
1412 *	may be dropped during processing for congestion control or by the
1413 *	protocol layers.
1414 *
1415 *	return values:
1416 *	NET_RX_SUCCESS	(no congestion)
1417 *	NET_RX_CN_LOW   (low congestion)
1418 *	NET_RX_CN_MOD   (moderate congestion)
1419 *	NET_RX_CN_HIGH  (high congestion)
1420 *	NET_RX_DROP     (packet was dropped)
1421 *
1422 */
1423
1424int netif_rx(struct sk_buff *skb)
1425{
1426	int this_cpu;
1427	struct softnet_data *queue;
1428	unsigned long flags;
1429
1430	/* if netpoll wants it, pretend we never saw it */
1431	if (netpoll_rx(skb))
1432		return NET_RX_DROP;
1433
1434	if (!skb->stamp.tv_sec)
1435		net_timestamp(&skb->stamp);
1436
1437	/*
1438	 * The code is rearranged so that the path is the most
1439	 * short when CPU is congested, but is still operating.
1440	 */
1441	local_irq_save(flags);
1442	this_cpu = smp_processor_id();
1443	queue = &__get_cpu_var(softnet_data);
1444
1445	__get_cpu_var(netdev_rx_stat).total++;
1446	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1447		if (queue->input_pkt_queue.qlen) {
1448			if (queue->throttle)
1449				goto drop;
1450
1451enqueue:
1452			dev_hold(skb->dev);
1453			__skb_queue_tail(&queue->input_pkt_queue, skb);
1454#ifndef OFFLINE_SAMPLE
1455			get_sample_stats(this_cpu);
1456#endif
1457			local_irq_restore(flags);
1458			return queue->cng_level;
1459		}
1460
1461		if (queue->throttle)
1462			queue->throttle = 0;
1463
1464		netif_rx_schedule(&queue->backlog_dev);
1465		goto enqueue;
1466	}
1467
1468	if (!queue->throttle) {
1469		queue->throttle = 1;
1470		__get_cpu_var(netdev_rx_stat).throttled++;
1471	}
1472
1473drop:
1474	__get_cpu_var(netdev_rx_stat).dropped++;
1475	local_irq_restore(flags);
1476
1477	kfree_skb(skb);
1478	return NET_RX_DROP;
1479}
1480
1481int netif_rx_ni(struct sk_buff *skb)
1482{
1483	int err;
1484
1485	preempt_disable();
1486	err = netif_rx(skb);
1487	if (local_softirq_pending())
1488		do_softirq();
1489	preempt_enable();
1490
1491	return err;
1492}
1493
1494EXPORT_SYMBOL(netif_rx_ni);
1495
1496static __inline__ void skb_bond(struct sk_buff *skb)
1497{
1498	struct net_device *dev = skb->dev;
1499
1500	if (dev->master) {
1501		skb->real_dev = skb->dev;
1502		skb->dev = dev->master;
1503	}
1504}
1505
1506static void net_tx_action(struct softirq_action *h)
1507{
1508	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1509
1510	if (sd->completion_queue) {
1511		struct sk_buff *clist;
1512
1513		local_irq_disable();
1514		clist = sd->completion_queue;
1515		sd->completion_queue = NULL;
1516		local_irq_enable();
1517
1518		while (clist) {
1519			struct sk_buff *skb = clist;
1520			clist = clist->next;
1521
1522			BUG_TRAP(!atomic_read(&skb->users));
1523			__kfree_skb(skb);
1524		}
1525	}
1526
1527	if (sd->output_queue) {
1528		struct net_device *head;
1529
1530		local_irq_disable();
1531		head = sd->output_queue;
1532		sd->output_queue = NULL;
1533		local_irq_enable();
1534
1535		while (head) {
1536			struct net_device *dev = head;
1537			head = head->next_sched;
1538
1539			smp_mb__before_clear_bit();
1540			clear_bit(__LINK_STATE_SCHED, &dev->state);
1541
1542			if (spin_trylock(&dev->queue_lock)) {
1543				qdisc_run(dev);
1544				spin_unlock(&dev->queue_lock);
1545			} else {
1546				netif_schedule(dev);
1547			}
1548		}
1549	}
1550}
1551
1552static __inline__ int deliver_skb(struct sk_buff *skb,
1553				  struct packet_type *pt_prev)
1554{
1555	atomic_inc(&skb->users);
1556	return pt_prev->func(skb, skb->dev, pt_prev);
1557}
1558
1559#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1560int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1561struct net_bridge;
1562struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1563						unsigned char *addr);
1564void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1565
1566static __inline__ int handle_bridge(struct sk_buff **pskb,
1567				    struct packet_type **pt_prev, int *ret)
1568{
1569	struct net_bridge_port *port;
1570
1571	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1572	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1573		return 0;
1574
1575	if (*pt_prev) {
1576		*ret = deliver_skb(*pskb, *pt_prev);
1577		*pt_prev = NULL;
1578	} 
1579	
1580	return br_handle_frame_hook(port, pskb);
1581}
1582#else
1583#define handle_bridge(skb, pt_prev, ret)	(0)
1584#endif
1585
1586#ifdef CONFIG_NET_CLS_ACT
1587/* TODO: Maybe we should just force sch_ingress to be compiled in
1588 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1589 * a compare and 2 stores extra right now if we dont have it on
1590 * but have CONFIG_NET_CLS_ACT
1591 * NOTE: This doesnt stop any functionality; if you dont have 
1592 * the ingress scheduler, you just cant add policies on ingress.
1593 *
1594 */
1595static int ing_filter(struct sk_buff *skb) 
1596{
1597	struct Qdisc *q;
1598	struct net_device *dev = skb->dev;
1599	int result = TC_ACT_OK;
1600	
1601	if (dev->qdisc_ingress) {
1602		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1603		if (MAX_RED_LOOP < ttl++) {
1604			printk("Redir loop detected Dropping packet (%s->%s)\n",
1605				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1606			return TC_ACT_SHOT;
1607		}
1608
1609		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1610
1611		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1612		if (NULL == skb->input_dev) {
1613			skb->input_dev = skb->dev;
1614			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
1615		}
1616		spin_lock(&dev->ingress_lock);
1617		if ((q = dev->qdisc_ingress) != NULL)
1618			result = q->enqueue(skb, q);
1619		spin_unlock(&dev->ingress_lock);
1620
1621	}
1622
1623	return result;
1624}
1625#endif
1626
1627int netif_receive_skb(struct sk_buff *skb)
1628{
1629	struct packet_type *ptype, *pt_prev;
1630	int ret = NET_RX_DROP;
1631	unsigned short type;
1632
1633	/* if we've gotten here through NAPI, check netpoll */
1634	if (skb->dev->poll && netpoll_rx(skb))
1635		return NET_RX_DROP;
1636
1637	if (!skb->stamp.tv_sec)
1638		net_timestamp(&skb->stamp);
1639
1640	skb_bond(skb);
1641
1642	__get_cpu_var(netdev_rx_stat).total++;
1643
1644	skb->h.raw = skb->nh.raw = skb->data;
1645	skb->mac_len = skb->nh.raw - skb->mac.raw;
1646
1647	pt_prev = NULL;
1648
1649	rcu_read_lock();
1650
1651#ifdef CONFIG_NET_CLS_ACT
1652	if (skb->tc_verd & TC_NCLS) {
1653		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1654		goto ncls;
1655	}
1656#endif
1657
1658	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1659		if (!ptype->dev || ptype->dev == skb->dev) {
1660			if (pt_prev) 
1661				ret = deliver_skb(skb, pt_prev);
1662			pt_prev = ptype;
1663		}
1664	}
1665
1666#ifdef CONFIG_NET_CLS_ACT
1667	if (pt_prev) {
1668		ret = deliver_skb(skb, pt_prev);
1669		pt_prev = NULL; /* noone else should process this after*/
1670	} else {
1671		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1672	}
1673
1674	ret = ing_filter(skb);
1675
1676	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1677		kfree_skb(skb);
1678		goto out;
1679	}
1680
1681	skb->tc_verd = 0;
1682ncls:
1683#endif
1684
1685	handle_diverter(skb);
1686
1687	if (handle_bridge(&skb, &pt_prev, &ret))
1688		goto out;
1689
1690	type = skb->protocol;
1691	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1692		if (ptype->type == type &&
1693		    (!ptype->dev || ptype->dev == skb->dev)) {
1694			if (pt_prev) 
1695				ret = deliver_skb(skb, pt_prev);
1696			pt_prev = ptype;
1697		}
1698	}
1699
1700	if (pt_prev) {
1701		ret = pt_prev->func(skb, skb->dev, pt_prev);
1702	} else {
1703		kfree_skb(skb);
1704		/* Jamal, now you will not able to escape explaining
1705		 * me how you were going to use this. :-)
1706		 */
1707		ret = NET_RX_DROP;
1708	}
1709
1710out:
1711	rcu_read_unlock();
1712	return ret;
1713}
1714
1715static int process_backlog(struct net_device *backlog_dev, int *budget)
1716{
1717	int work = 0;
1718	int quota = min(backlog_dev->quota, *budget);
1719	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1720	unsigned long start_time = jiffies;
1721
1722	for (;;) {
1723		struct sk_buff *skb;
1724		struct net_device *dev;
1725
1726		local_irq_disable();
1727		skb = __skb_dequeue(&queue->input_pkt_queue);
1728		if (!skb)
1729			goto job_done;
1730		local_irq_enable();
1731
1732		dev = skb->dev;
1733
1734		netif_receive_skb(skb);
1735
1736		dev_put(dev);
1737
1738		work++;
1739
1740		if (work >= quota || jiffies - start_time > 1)
1741			break;
1742
1743	}
1744
1745	backlog_dev->quota -= work;
1746	*budget -= work;
1747	return -1;
1748
1749job_done:
1750	backlog_dev->quota -= work;
1751	*budget -= work;
1752
1753	list_del(&backlog_dev->poll_list);
1754	smp_mb__before_clear_bit();
1755	netif_poll_enable(backlog_dev);
1756
1757	if (queue->throttle)
1758		queue->throttle = 0;
1759	local_irq_enable();
1760	return 0;
1761}
1762
1763static void net_rx_action(struct softirq_action *h)
1764{
1765	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1766	unsigned long start_time = jiffies;
1767	int budget = netdev_max_backlog;
1768
1769	
1770	local_irq_disable();
1771
1772	while (!list_empty(&queue->poll_list)) {
1773		struct net_device *dev;
1774
1775		if (budget <= 0 || jiffies - start_time > 1)
1776			goto softnet_break;
1777
1778		local_irq_enable();
1779
1780		dev = list_entry(queue->poll_list.next,
1781				 struct net_device, poll_list);
1782		netpoll_poll_lock(dev);
1783
1784		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1785			netpoll_poll_unlock(dev);
1786			local_irq_disable();
1787			list_del(&dev->poll_list);
1788			list_add_tail(&dev->poll_list, &queue->poll_list);
1789			if (dev->quota < 0)
1790				dev->quota += dev->weight;
1791			else
1792				dev->quota = dev->weight;
1793		} else {
1794			netpoll_poll_unlock(dev);
1795			dev_put(dev);
1796			local_irq_disable();
1797		}
1798	}
1799out:
1800	local_irq_enable();
1801	return;
1802
1803softnet_break:
1804	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1805	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1806	goto out;
1807}
1808
1809static gifconf_func_t * gifconf_list [NPROTO];
1810
1811/**
1812 *	register_gifconf	-	register a SIOCGIF handler
1813 *	@family: Address family
1814 *	@gifconf: Function handler
1815 *
1816 *	Register protocol dependent address dumping routines. The handler
1817 *	that is passed must not be freed or reused until it has been replaced
1818 *	by another handler.
1819 */
1820int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1821{
1822	if (family >= NPROTO)
1823		return -EINVAL;
1824	gifconf_list[family] = gifconf;
1825	return 0;
1826}
1827
1828
1829/*
1830 *	Map an interface index to its name (SIOCGIFNAME)
1831 */
1832
1833/*
1834 *	We need this ioctl for efficient implementation of the
1835 *	if_indextoname() function required by the IPv6 API.  Without
1836 *	it, we would have to search all the interfaces to find a
1837 *	match.  --pb
1838 */
1839
1840static int dev_ifname(struct ifreq __user *arg)
1841{
1842	struct net_device *dev;
1843	struct ifreq ifr;
1844
1845	/*
1846	 *	Fetch the caller's info block.
1847	 */
1848
1849	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1850		return -EFAULT;
1851
1852	read_lock(&dev_base_lock);
1853	dev = __dev_get_by_index(ifr.ifr_ifindex);
1854	if (!dev) {
1855		read_unlock(&dev_base_lock);
1856		return -ENODEV;
1857	}
1858
1859	strcpy(ifr.ifr_name, dev->name);
1860	read_unlock(&dev_base_lock);
1861
1862	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1863		return -EFAULT;
1864	return 0;
1865}
1866
1867/*
1868 *	Perform a SIOCGIFCONF call. This structure will change
1869 *	size eventually, and there is nothing I can do about it.
1870 *	Thus we will need a 'compatibility mode'.
1871 */
1872
1873static int dev_ifconf(char __user *arg)
1874{
1875	struct ifconf ifc;
1876	struct net_device *dev;
1877	char __user *pos;
1878	int len;
1879	int total;
1880	int i;
1881
1882	/*
1883	 *	Fetch the caller's info block.
1884	 */
1885
1886	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1887		return -EFAULT;
1888
1889	pos = ifc.ifc_buf;
1890	len = ifc.ifc_len;
1891
1892	/*
1893	 *	Loop over the interfaces, and write an info block for each.
1894	 */
1895
1896	total = 0;
1897	for (dev = dev_base; dev; dev = dev->next) {
1898		for (i = 0; i < NPROTO; i++) {
1899			if (gifconf_list[i]) {
1900				int done;
1901				if (!pos)
1902					done = gifconf_list[i](dev, NULL, 0);
1903				else
1904					done = gifconf_list[i](dev, pos + total,
1905							       len - total);
1906				if (done < 0)
1907					return -EFAULT;
1908				total += done;
1909			}
1910		}
1911  	}
1912
1913	/*
1914	 *	All done.  Write the updated control block back to the caller.
1915	 */
1916	ifc.ifc_len = total;
1917
1918	/*
1919	 * 	Both BSD and Solaris return 0 here, so we do too.
1920	 */
1921	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1922}
1923
1924#ifdef CONFIG_PROC_FS
1925/*
1926 *	This is invoked by the /proc filesystem handler to display a device
1927 *	in detail.
1928 */
1929static __inline__ struct net_device *dev_get_idx(loff_t pos)
1930{
1931	struct net_device *dev;
1932	loff_t i;
1933
1934	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1935
1936	return i == pos ? dev : NULL;
1937}
1938
1939void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1940{
1941	read_lock(&dev_base_lock);
1942	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1943}
1944
1945void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1946{
1947	++*pos;
1948	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1949}
1950
1951void dev_seq_stop(struct seq_file *seq, void *v)
1952{
1953	read_unlock(&dev_base_lock);
1954}
1955
1956static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1957{
1958	if (dev->get_stats) {
1959		struct net_device_stats *stats = dev->get_stats(dev);
1960
1961		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1962				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1963			   dev->name, stats->rx_bytes, stats->rx_packets,
1964			   stats->rx_errors,
1965			   stats->rx_dropped + stats->rx_missed_errors,
1966			   stats->rx_fifo_errors,
1967			   stats->rx_length_errors + stats->rx_over_errors +
1968			     stats->rx_crc_errors + stats->rx_frame_errors,
1969			   stats->rx_compressed, stats->multicast,
1970			   stats->tx_bytes, stats->tx_packets,
1971			   stats->tx_errors, stats->tx_dropped,
1972			   stats->tx_fifo_errors, stats->collisions,
1973			   stats->tx_carrier_errors +
1974			     stats->tx_aborted_errors +
1975			     stats->tx_window_errors +
1976			     stats->tx_heartbeat_errors,
1977			   stats->tx_compressed);
1978	} else
1979		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1980}
1981
1982/*
1983 *	Called from the PROCfs module. This now uses the new arbitrary sized
1984 *	/proc/net interface to create /proc/net/dev
1985 */
1986static int dev_seq_show(struct seq_file *seq, void *v)
1987{
1988	if (v == SEQ_START_TOKEN)
1989		seq_puts(seq, "Inter-|   Receive                            "
1990			      "                    |  Transmit\n"
1991			      " face |bytes    packets errs drop fifo frame "
1992			      "compressed multicast|bytes    packets errs "
1993			      "drop fifo colls carrier compressed\n");
1994	else
1995		dev_seq_printf_stats(seq, v);
1996	return 0;
1997}
1998
1999static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2000{
2001	struct netif_rx_stats *rc = NULL;
2002
2003	while (*pos < NR_CPUS)
2004	       	if (cpu_online(*pos)) {
2005			rc = &per_cpu(netdev_rx_stat, *pos);
2006			break;
2007		} else
2008			++*pos;
2009	return rc;
2010}
2011
2012static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2013{
2014	return softnet_get_online(pos);
2015}
2016
2017static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2018{
2019	++*pos;
2020	return softnet_get_online(pos);
2021}
2022
2023static void softnet_seq_stop(struct seq_file *seq, void *v)
2024{
2025}
2026
2027static int softnet_seq_show(struct seq_file *seq, void *v)
2028{
2029	struct netif_rx_stats *s = v;
2030
2031	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2032		   s->total, s->dropped, s->time_squeeze, s->throttled,
2033		   s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
2034		   s->fastroute_deferred_out,
2035#if 0
2036		   s->fastroute_latency_reduction
2037#else
2038		   s->cpu_collision
2039#endif
2040		  );
2041	return 0;
2042}
2043
2044static struct seq_operations dev_seq_ops = {
2045	.start = dev_seq_start,
2046	.next  = dev_seq_next,
2047	.stop  = dev_seq_stop,
2048	.show  = dev_seq_show,
2049};
2050
2051static int dev_seq_open(struct inode *inode, struct file *file)
2052{
2053	return seq_open(file, &dev_seq_ops);
2054}
2055
2056static struct file_operations dev_seq_fops = {
2057	.owner	 = THIS_MODULE,
2058	.open    = dev_seq_open,
2059	.read    = seq_read,
2060	.llseek  = seq_lseek,
2061	.release = seq_release,
2062};
2063
2064static struct seq_operations softnet_seq_ops = {
2065	.start = softnet_seq_start,
2066	.next  = softnet_seq_next,
2067	.stop  = softnet_seq_stop,
2068	.show  = softnet_seq_show,
2069};
2070
2071static int softnet_seq_open(struct inode *inode, struct file *file)
2072{
2073	return seq_open(file, &softnet_seq_ops);
2074}
2075
2076static struct file_operations softnet_seq_fops = {
2077	.owner	 = THIS_MODULE,
2078	.open    = softnet_seq_open,
2079	.read    = seq_read,
2080	.llseek  = seq_lseek,
2081	.release = seq_release,
2082};
2083
2084#ifdef WIRELESS_EXT
2085extern int wireless_proc_init(void);
2086#else
2087#define wireless_proc_init() 0
2088#endif
2089
2090static int __init dev_proc_init(void)
2091{
2092	int rc = -ENOMEM;
2093
2094	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2095		goto out;
2096	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2097		goto out_dev;
2098	if (wireless_proc_init())
2099		goto out_softnet;
2100	rc = 0;
2101out:
2102	return rc;
2103out_softnet:
2104	proc_net_remove("softnet_stat");
2105out_dev:
2106	proc_net_remove("dev");
2107	goto out;
2108}
2109#else
2110#define dev_proc_init() 0
2111#endif	/* CONFIG_PROC_FS */
2112
2113
2114/**
2115 *	netdev_set_master	-	set up master/slave pair
2116 *	@slave: slave device
2117 *	@master: new master device
2118 *
2119 *	Changes the master device of the slave. Pass %NULL to break the
2120 *	bonding. The caller must hold the RTNL semaphore. On a failure
2121 *	a negative errno code is returned. On success the reference counts
2122 *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2123 *	function returns zero.
2124 */
2125int netdev_set_master(struct net_device *slave, struct net_device *master)
2126{
2127	struct net_device *old = slave->master;
2128
2129	ASSERT_RTNL();
2130
2131	if (master) {
2132		if (old)
2133			return -EBUSY;
2134		dev_hold(master);
2135	}
2136
2137	slave->master = master;
2138	
2139	synchronize_net();
2140
2141	if (old)
2142		dev_put(old);
2143
2144	if (master)
2145		slave->flags |= IFF_SLAVE;
2146	else
2147		slave->flags &= ~IFF_SLAVE;
2148
2149	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2150	return 0;
2151}
2152
2153/**
2154 *	dev_set_promiscuity	- update promiscuity count on a device
2155 *	@dev: device
2156 *	@inc: modifier
2157 *
2158 *	Add or remove promsicuity from a device. While the count in the device
2159 *	remains above zero the interface remains promiscuous. Once it hits zero
2160 *	the device reverts back to normal filtering operation. A negative inc
2161 *	value is used to drop promiscuity on the device.
2162 */
2163void dev_set_promiscuity(struct net_device *dev, int inc)
2164{
2165	unsigned short old_flags = dev->flags;
2166
2167	dev->flags |= IFF_PROMISC;
2168	if ((dev->promiscuity += inc) == 0)
2169		dev->flags &= ~IFF_PROMISC;
2170	if (dev->flags ^ old_flags) {
2171		dev_mc_upload(dev);
2172		printk(KERN_INFO "device %s %s promiscuous mode\n",
2173		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2174		       					       "left");
2175	}
2176}
2177
2178/**
2179 *	dev_set_allmulti	- update allmulti count on a device
2180 *	@dev: device
2181 *	@inc: modifier
2182 *
2183 *	Add or remove reception of all multicast frames to a device. While the
2184 *	count in the device remains above zero the interface remains listening
2185 *	to all interfaces. Once it hits zero the device reverts back to normal
2186 *	filtering operation. A negative @inc value is used to drop the counter
2187 *	when releasing a resource needing all multicasts.
2188 */
2189
2190void dev_set_allmulti(struct net_device *dev, int inc)
2191{
2192	unsigned short old_flags = dev->flags;
2193
2194	dev->flags |= IFF_ALLMULTI;
2195	if ((dev->allmulti += inc) == 0)
2196		dev->flags &= ~IFF_ALLMULTI;
2197	if (dev->flags ^ old_flags)
2198		dev_mc_upload(dev);
2199}
2200
2201unsigned dev_get_flags(const struct net_device *dev)
2202{
2203	unsigned flags;
2204
2205	flags = (dev->flags & ~(IFF_PROMISC |
2206				IFF_ALLMULTI |
2207				IFF_RUNNING)) | 
2208		(dev->gflags & (IFF_PROMISC |
2209				IFF_ALLMULTI));
2210
2211	if (netif_running(dev) && netif_carrier_ok(dev))
2212		flags |= IFF_RUNNING;
2213
2214	return flags;
2215}
2216
2217int dev_change_flags(struct net_device *dev, unsigned flags)
2218{
2219	int ret;
2220	int old_flags = dev->flags;
2221
2222	/*
2223	 *	Set the flags on our device.
2224	 */
2225
2226	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2227			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2228			       IFF_AUTOMEDIA)) |
2229		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2230				    IFF_ALLMULTI));
2231
2232	/*
2233	 *	Load in the correct multicast list now the flags have changed.
2234	 */
2235
2236	dev_mc_upload(dev);
2237
2238	/*
2239	 *	Have we downed the interface. We handle IFF_UP ourselves
2240	 *	according to user attempts to set it, rather than blindly
2241	 *	setting it.
2242	 */
2243
2244	ret = 0;
2245	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2246		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2247
2248		if (!ret)
2249			dev_mc_upload(dev);
2250	}
2251
2252	if (dev->flags & IFF_UP &&
2253	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2254					  IFF_VOLATILE)))
2255		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2256
2257	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2258		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2259		dev->gflags ^= IFF_PROMISC;
2260		dev_set_promiscuity(dev, inc);
2261	}
2262
2263	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2264	   is important. Some (broken) drivers set IFF_PROMISC, when
2265	   IFF_ALLMULTI is requested not asking us and not reporting.
2266	 */
2267	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2268		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2269		dev->gflags ^= IFF_ALLMULTI;
2270		dev_set_allmulti(dev, inc);
2271	}
2272
2273	if (old_flags ^ dev->flags)
2274		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2275
2276	return ret;
2277}
2278
2279int dev_set_mtu(struct net_device *dev, int new_mtu)
2280{
2281	int err;
2282
2283	if (new_mtu == dev->mtu)
2284		return 0;
2285
2286	/*	MTU must be positive.	 */
2287	if (new_mtu < 0)
2288		return -EINVAL;
2289
2290	if (!netif_device_present(dev))
2291		return -ENODEV;
2292
2293	err = 0;
2294	if (dev->change_mtu)
2295		err = dev->change_mtu(dev, new_mtu);
2296	else
2297		dev->mtu = new_mtu;
2298	if (!err && dev->flags & IFF_UP)
2299		notifier_call_chain(&netdev_chain,
2300				    NETDEV_CHANGEMTU, dev);
2301	return err;
2302}
2303
2304int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2305{
2306	int err;
2307
2308	if (!dev->set_mac_address)
2309		return -EOPNOTSUPP;
2310	if (sa->sa_family != dev->type)
2311		return -EINVAL;
2312	if (!netif_device_present(dev))
2313		return -ENODEV;
2314	err = dev->set_mac_address(dev, sa);
2315	if (!err)
2316		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2317	return err;
2318}
2319
2320/*
2321 *	Perform the SIOCxIFxxx calls.
2322 */
2323static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2324{
2325	int err;
2326	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2327
2328	if (!dev)
2329		return -ENODEV;
2330
2331	switch (cmd) {
2332		case SIOCGIFFLAGS:	/* Get interface flags */
2333			ifr->ifr_flags = dev_get_flags(dev);
2334			return 0;
2335
2336		case SIOCSIFFLAGS:	/* Set interface flags */
2337			return dev_change_flags(dev, ifr->ifr_flags);
2338
2339		case SIOCGIFMETRIC:	/* Get the metric on the interface
2340					   (currently unused) */
2341			ifr->ifr_metric = 0;
2342			return 0;
2343
2344		case SIOCSIFMETRIC:	/* Set the metric on the interface
2345					   (currently unused) */
2346			return -EOPNOTSUPP;
2347
2348		case SIOCGIFMTU:	/* Get the MTU of a device */
2349			ifr->ifr_mtu = dev->mtu;
2350			return 0;
2351
2352		case SIOCSIFMTU:	/* Set the MTU of a device */
2353			return dev_set_mtu(dev, ifr->ifr_mtu);
2354
2355		case SIOCGIFHWADDR:
2356			if (!dev->addr_len)
2357				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2358			else
2359				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2360				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2361			ifr->ifr_hwaddr.sa_family = dev->type;
2362			return 0;
2363
2364		case SIOCSIFHWADDR:
2365			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2366
2367		case SIOCSIFHWBROADCAST:
2368			if (ifr->ifr_hwaddr.sa_family != dev->type)
2369				return -EINVAL;
2370			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2371			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2372			notifier_call_chain(&netdev_chain,
2373					    NETDEV_CHANGEADDR, dev);
2374			return 0;
2375
2376		case SIOCGIFMAP:
2377			ifr->ifr_map.mem_start = dev->mem_start;
2378			ifr->ifr_map.mem_end   = dev->mem_end;
2379			ifr->ifr_map.base_addr = dev->base_addr;
2380			ifr->ifr_map.irq       = dev->irq;
2381			ifr->ifr_map.dma       = dev->dma;
2382			ifr->ifr_map.port      = dev->if_port;
2383			return 0;
2384
2385		case SIOCSIFMAP:
2386			if (dev->set_config) {
2387				if (!netif_device_present(dev))
2388					return -ENODEV;
2389				return dev->set_config(dev, &ifr->ifr_map);
2390			}
2391			return -EOPNOTSUPP;
2392
2393		case SIOCADDMULTI:
2394			if (!dev->set_multicast_list ||
2395			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2396				return -EINVAL;
2397			if (!netif_device_present(dev))
2398				return -ENODEV;
2399			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2400					  dev->addr_len, 1);
2401
2402		case SIOCDELMULTI:
2403			if (!dev->set_multicast_list ||
2404			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2405				return -EINVAL;
2406			if (!netif_device_present(dev))
2407				return -ENODEV;
2408			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2409					     dev->addr_len, 1);
2410
2411		case SIOCGIFINDEX:
2412			ifr->ifr_ifindex = dev->ifindex;
2413			return 0;
2414
2415		case SIOCGIFTXQLEN:
2416			ifr->ifr_qlen = dev->tx_queue_len;
2417			return 0;
2418
2419		case SIOCSIFTXQLEN:
2420			if (ifr->ifr_qlen < 0)
2421				return -EINVAL;
2422			dev->tx_queue_len = ifr->ifr_qlen;
2423			return 0;
2424
2425		case SIOCSIFNAME:
2426			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2427			return dev_change_name(dev, ifr->ifr_newname);
2428
2429		/*
2430		 *	Unknown or private ioctl
2431		 */
2432
2433		default:
2434			if ((cmd >= SIOCDEVPRIVATE &&
2435			    cmd <= SIOCDEVPRIVATE + 15) ||
2436			    cmd == SIOCBONDENSLAVE ||
2437			    cmd == SIOCBONDRELEASE ||
2438			    cmd == SIOCBONDSETHWADDR ||
2439			    cmd == SIOCBONDSLAVEINFOQUERY ||
2440			    cmd == SIOCBONDINFOQUERY ||
2441			    cmd == SIOCBONDCHANGEACTIVE ||
2442			    cmd == SIOCGMIIPHY ||
2443			    cmd == SIOCGMIIREG ||
2444			    cmd == SIOCSMIIREG ||
2445			    cmd == SIOCBRADDIF ||
2446			    cmd == SIOCBRDELIF ||
2447			    cmd == SIOCWANDEV) {
2448				err = -EOPNOTSUPP;
2449				if (dev->do_ioctl) {
2450					if (netif_device_present(dev))
2451						err = dev->do_ioctl(dev, ifr,
2452								    cmd);
2453					else
2454						err = -ENODEV;
2455				}
2456			} else
2457				err = -EINVAL;
2458
2459	}
2460	return err;
2461}
2462
2463/*
2464 *	This function handles all "interface"-type I/O control requests. The actual
2465 *	'doing' part of this is dev_ifsioc above.
2466 */
2467
2468/**
2469 *	dev_ioctl	-	network device ioctl
2470 *	@cmd: command to issue
2471 *	@arg: pointer to a struct ifreq in user space
2472 *
2473 *	Issue ioctl functions to devices. This is normally called by the
2474 *	user space syscall interfaces but can sometimes be useful for
2475 *	other purposes. The return value is the return from the syscall if
2476 *	positive or a negative errno code on error.
2477 */
2478
2479int dev_ioctl(unsigned int cmd, void __user *arg)
2480{
2481	struct ifreq ifr;
2482	int ret;
2483	char *colon;
2484
2485	/* One special case: SIOCGIFCONF takes ifconf argument
2486	   and requires shared lock, because it sleeps writing
2487	   to user space.
2488	 */
2489
2490	if (cmd == SIOCGIFCONF) {
2491		rtnl_shlock();
2492		ret = dev_ifconf((char __user *) arg);
2493		rtnl_shunlock();
2494		return ret;
2495	}
2496	if (cmd == SIOCGIFNAME)
2497		return dev_ifname((struct ifreq __user *)arg);
2498
2499	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2500		return -EFAULT;
2501
2502	ifr.ifr_name[IFNAMSIZ-1] = 0;
2503
2504	colon = strchr(ifr.ifr_name, ':');
2505	if (colon)
2506		*colon = 0;
2507
2508	/*
2509	 *	See which interface the caller is talking about.
2510	 */
2511
2512	switch (cmd) {
2513		/*
2514		 *	These ioctl calls:
2515		 *	- can be done by all.
2516		 *	- atomic and do not require locking.
2517		 *	- return a value
2518		 */
2519		case SIOCGIFFLAGS:
2520		case SIOCGIFMETRIC:
2521		case SIOCGIFMTU:
2522		case SIOCGIFHWADDR:
2523		case SIOCGIFSLAVE:
2524		case SIOCGIFMAP:
2525		case SIOCGIFINDEX:
2526		case SIOCGIFTXQLEN:
2527			dev_load(ifr.ifr_name);
2528			read_lock(&dev_base_lock);
2529			ret = dev_ifsioc(&ifr, cmd);
2530			read_unlock(&dev_base_lock);
2531			if (!ret) {
2532				if (colon)
2533					*colon = ':';
2534				if (copy_to_user(arg, &ifr,
2535						 sizeof(struct ifreq)))
2536					ret = -EFAULT;
2537			}
2538			return ret;
2539
2540		case SIOCETHTOOL:
2541			dev_load(ifr.ifr_name);
2542			rtnl_lock();
2543			ret = dev_ethtool(&ifr);
2544			rtnl_unlock();
2545			if (!ret) {
2546				if (colon)
2547					*colon = ':';
2548				if (copy_to_user(arg, &ifr,
2549						 sizeof(struct ifreq)))
2550					ret = -EFAULT;
2551			}
2552			return ret;
2553
2554		/*
2555		 *	These ioctl calls:
2556		 *	- require superuser power.
2557		 *	- require strict serialization.
2558		 *	- return a value
2559		 */
2560		case SIOCGMIIPHY:
2561		case SIOCGMIIREG:
2562		case SIOCSIFNAME:
2563			if (!capable(CAP_NET_ADMIN))
2564				return -EPERM;
2565			dev_load(ifr.ifr_name);
2566			rtnl_lock();
2567			ret = dev_ifsioc(&ifr, cmd);
2568			rtnl_unlock();
2569			if (!ret) {
2570				if (colon)
2571					*colon = ':';
2572				if (copy_to_user(arg, &ifr,
2573						 sizeof(struct ifreq)))
2574					ret = -EFAULT;
2575			}
2576			return ret;
2577
2578		/*
2579		 *	These ioctl calls:
2580		 *	- require superuser power.
2581		 *	- require strict serialization.
2582		 *	- do not return a value
2583		 */
2584		case SIOCSIFFLAGS:
2585		case SIOCSIFMETRIC:
2586		case SIOCSIFMTU:
2587		case SIOCSIFMAP:
2588		case SIOCSIFHWADDR:
2589		case SIOCSIFSLAVE:
2590		case SIOCADDMULTI:
2591		case SIOCDELMULTI:
2592		case SIOCSIFHWBROADCAST:
2593		case SIOCSIFTXQLEN:
2594		case SIOCSMIIREG:
2595		case SIOCBONDENSLAVE:
2596		case SIOCBONDRELEASE:
2597		case SIOCBONDSETHWADDR:
2598		case SIOCBONDSLAVEINFOQUERY:
2599		case SIOCBONDINFOQUERY:
2600		case SIOCBONDCHANGEACTIVE:
2601		case SIOCBRADDIF:
2602		case SIOCBRDELIF:
2603			if (!capable(CAP_NET_ADMIN))
2604				return -EPERM;
2605			dev_load(ifr.ifr_name);
2606			rtnl_lock();
2607			ret = dev_ifsioc(&ifr, cmd);
2608			rtnl_unlock();
2609			return ret;
2610
2611		case SIOCGIFMEM:
2612			/* Get the per device memory space. We can add this but
2613			 * currently do not support it */
2614		case SIOCSIFMEM:
2615			/* Set the per device memory buffer space.
2616			 * Not applicable in our case */
2617		case SIOCSIFLINK:
2618			return -EINVAL;
2619
2620		/*
2621		 *	Unknown or private ioctl.
2622		 */
2623		default:
2624			if (cmd == SIOCWANDEV ||
2625			    (cmd >= SIOCDEVPRIVATE &&
2626			     cmd <= SIOCDEVPRIVATE + 15)) {
2627				dev_load(ifr.ifr_name);
2628				rtnl_lock();
2629				ret = dev_ifsioc(&ifr, cmd);
2630				rtnl_unlock();
2631				if (!ret && copy_to_user(arg, &ifr,
2632							 sizeof(struct ifreq)))
2633					ret = -EFAULT;
2634				return ret;
2635			}
2636#ifdef WIRELESS_EXT
2637			/* Take care of Wireless Extensions */
2638			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2639				/* If command is `set a parameter', or
2640				 * `get the encoding parameters', check if
2641				 * the user has the right to do it */
2642				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2643					if (!capable(CAP_NET_ADMIN))
2644						return -EPERM;
2645				}
2646				dev_load(ifr.ifr_name);
2647				rtnl_lock();
2648				/* Follow me in net/core/wireless.c */
2649				ret = wireless_process_ioctl(&ifr, cmd);
2650				rtnl_unlock();
2651				if (IW_IS_GET(cmd) &&
2652				    copy_to_user(arg, &ifr,
2653					    	 sizeof(struct ifreq)))
2654					ret = -EFAULT;
2655				return ret;
2656			}
2657#endif	/* WIRELESS_EXT */
2658			return -EINVAL;
2659	}
2660}
2661
2662
2663/**
2664 *	dev_new_index	-	allocate an ifindex
2665 *
2666 *	Returns a suitable unique value for a new device interface
2667 *	number.  The caller must hold the rtnl semaphore or the
2668 *	dev_base_lock to be sure it remains unique.
2669 */
2670static int dev_new_index(void)
2671{
2672	static int ifindex;
2673	for (;;) {
2674		if (++ifindex <= 0)
2675			ifindex = 1;
2676		if (!__dev_get_by_index(ifindex))
2677			return ifindex;
2678	}
2679}
2680
2681static int dev_boot_phase = 1;
2682
2683/* Delayed registration/unregisteration */
2684static DEFINE_SPINLOCK(net_todo_list_lock);
2685static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2686
2687static inline void net_set_todo(struct net_device *dev)
2688{
2689	spin_lock(&net_todo_list_lock);
2690	list_add_tail(&dev->todo_list, &net_todo_list);
2691	spin_unlock(&net_todo_list_lock);
2692}
2693
2694/**
2695 *	register_netdevice	- register a network device
2696 *	@dev: device to register
2697 *
2698 *	Take a completed network device structure and add it to the kernel
2699 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2700 *	chain. 0 is returned on success. A negative errno code is returned
2701 *	on a failure to set up the device, or if the name is a duplicate.
2702 *
2703 *	Callers must hold the rtnl semaphore. You may want
2704 *	register_netdev() instead of this.
2705 *
2706 *	BUGS:
2707 *	The locking appears insufficient to guarantee two parallel registers
2708 *	will not get the same name.
2709 */
2710
2711int register_netdevice(struct net_device *dev)
2712{
2713	struct hlist_head *head;
2714	struct hlist_node *p;
2715	int ret;
2716
2717	BUG_ON(dev_boot_phase);
2718	ASSERT_RTNL();
2719
2720	/* When net_device's are persistent, this will be fatal. */
2721	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2722
2723	spin_lock_init(&dev->queue_lock);
2724	spin_lock_init(&dev->xmit_lock);
2725	dev->xmit_lock_owner = -1;
2726#ifdef CONFIG_NET_CLS_ACT
2727	spin_lock_init(&dev->ingress_lock);
2728#endif
2729
2730	ret = alloc_divert_blk(dev);
2731	if (ret)
2732		goto out;
2733
2734	dev->iflink = -1;
2735
2736	/* Init, if this function is available */
2737	if (dev->init) {
2738		ret = dev->init(dev);
2739		if (ret) {
2740			if (ret > 0)
2741				ret = -EIO;
2742			goto out_err;
2743		}
2744	}
2745 
2746	if (!dev_valid_name(dev->name)) {
2747		ret = -EINVAL;
2748		goto out_err;
2749	}
2750
2751	dev->ifindex = dev_new_index();
2752	if (dev->iflink == -1)
2753		dev->iflink = dev->ifindex;
2754
2755	/* Check for existence of name */
2756	head = dev_name_hash(dev->name);
2757	hlist_for_each(p, head) {
2758		struct net_device *d
2759			= hlist_entry(p, struct net_device, name_hlist);
2760		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2761			ret = -EEXIST;
2762 			goto out_err;
2763		}
2764 	}
2765
2766	/* Fix illegal SG+CSUM combinations. */
2767	if ((dev->features & NETIF_F_SG) &&
2768	    !(dev->features & (NETIF_F_IP_CSUM |
2769			       NETIF_F_NO_CSUM |
2770			       NETIF_F_HW_CSUM))) {
2771		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2772		       dev->name);
2773		dev->features &= ~NETIF_F_SG;
2774	}
2775
2776	/* TSO requires that SG is present as well. */
2777	if ((dev->features & NETIF_F_TSO) &&
2778	    !(dev->features & NETIF_F_SG)) {
2779		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2780		       dev->name);
2781		dev->features &= ~NETIF_F_TSO;
2782	}
2783
2784	/*
2785	 *	nil rebuild_header routine,
2786	 *	that should be never called and used as just bug trap.
2787	 */
2788
2789	if (!dev->rebuild_header)
2790		dev->rebuild_header = default_rebuild_header;
2791
2792	/*
2793	 *	Default initial state at registry is that the
2794	 *	device is present.
2795	 */
2796
2797	set_bit(__LINK_STATE_PRESENT, &dev->state);
2798
2799	dev->next = NULL;
2800	dev_init_scheduler(dev);
2801	write_lock_bh(&dev_base_lock);
2802	*dev_tail = dev;
2803	dev_tail = &dev->next;
2804	hlist_add_head(&dev->name_hlist, head);
2805	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2806	dev_hold(dev);
2807	dev->reg_state = NETREG_REGISTERING;
2808	write_unlock_bh(&dev_base_lock);
2809
2810	/* Notify protocols, that a new device appeared. */
2811	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2812
2813	/* Finish registration after unlock */
2814	net_set_todo(dev);
2815	ret = 0;
2816
2817out:
2818	return ret;
2819out_err:
2820	free_divert_blk(dev);
2821	goto out;
2822}
2823
2824/**
2825 *	register_netdev	- register a network device
2826 *	@dev: device to register
2827 *
2828 *	Take a completed network device structure and add it to the kernel
2829 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2830 *	chain. 0 is returned on success. A negative errno code is returned
2831 *	on a failure to set up the device, or if the name is a duplicate.
2832 *
2833 *	This is a wrapper around register_netdev that takes the rtnl semaphore
2834 *	and expands the device name if you passed a format string to
2835 *	alloc_netdev.
2836 */
2837int register_netdev(struct net_device *dev)
2838{
2839	int err;
2840
2841	rtnl_lock();
2842
2843	/*
2844	 * If the name is a format string the caller wants us to do a
2845	 * name allocation.
2846	 */
2847	if (strchr(dev->name, '%')) {
2848		err = dev_alloc_name(dev, dev->name);
2849		if (err < 0)
2850			goto out;
2851	}
2852	
2853	/*
2854	 * Back compatibility hook. Kill this one in 2.5
2855	 */
2856	if (dev->name[0] == 0 || dev->name[0] == ' ') {
2857		err = dev_alloc_name(dev, "eth%d");
2858		if (err < 0)
2859			goto out;
2860	}
2861
2862	err = register_netdevice(dev);
2863out:
2864	rtnl_unlock();
2865	return err;
2866}
2867EXPORT_SYMBOL(register_netdev);
2868
2869/*
2870 * netdev_wait_allrefs - wait until all references are gone.
2871 *
2872 * This is called when unregistering network devices.
2873 *
2874 * Any protocol or device that holds a reference should register
2875 * for netdevice notification, and cleanup and put back the
2876 * reference if they receive an UNREGISTER event.
2877 * We can get stuck here if buggy protocols don't correctly
2878 * call dev_put. 
2879 */
2880static void netdev_wait_allrefs(struct net_device *dev)
2881{
2882	unsigned long rebroadcast_time, warning_time;
2883
2884	rebroadcast_time = warning_time = jiffies;
2885	while (atomic_read(&dev->refcnt) != 0) {
2886		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2887			rtnl_shlock();
2888
2889			/* Rebroadcast unregister notification */
2890			notifier_call_chain(&netdev_chain,
2891					    NETDEV_UNREGISTER, dev);
2892
2893			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2894				     &dev->state)) {
2895				/* We must not have linkwatch events
2896				 * pending on unregister. If this
2897				 * happens, we simply run the queue
2898				 * unscheduled, resulting in a noop
2899				 * for this device.
2900				 */
2901				linkwatch_run_queue();
2902			}
2903
2904			rtnl_shunlock();
2905
2906			rebroadcast_time = jiffies;
2907		}
2908
2909		msleep(250);
2910
2911		if (time_after(jiffies, warning_time + 10 * HZ)) {
2912			printk(KERN_EMERG "unregister_netdevice: "
2913			       "waiting for %s to become free. Usage "
2914			       "count = %d\n",
2915			       dev->name, atomic_read(&dev->refcnt));
2916			warning_time = jiffies;
2917		}
2918	}
2919}
2920
2921/* The sequence is:
2922 *
2923 *	rtnl_lock();
2924 *	...
2925 *	register_netdevice(x1);
2926 *	register_netdevice(x2);
2927 *	...
2928 *	unregister_netdevice(y1);
2929 *	unregister_netdevice(y2);
2930 *      ...
2931 *	rtnl_unlock();
2932 *	free_netdev(y1);
2933 *	free_netdev(y2);
2934 *
2935 * We are invoked by rtnl_unlock() after it drops the semaphore.
2936 * This allows us to deal with problems:
2937 * 1) We can create/delete sysfs objects which invoke hotplug
2938 *    without deadlocking with linkwatch via keventd.
2939 * 2) Since we run with the RTNL semaphore not held, we can sleep
2940 *    safely in order to wait for the netdev refcnt to drop to zero.
2941 */
2942static DECLARE_MUTEX(net_todo_run_mutex);
2943void netdev_run_todo(void)
2944{
2945	struct list_head list = LIST_HEAD_INIT(list);
2946	int err;
2947
2948
2949	/* Need to guard against multiple cpu's getting out of order. */
2950	down(&net_todo_run_mutex);
2951
2952	/* Not safe to do outside the semaphore.  We must not return
2953	 * until all unregister events invoked by the local processor
2954	 * have been completed (either by this todo run, or one on
2955	 * another cpu).
2956	 */
2957	if (list_empty(&net_todo_list))
2958		goto out;
2959
2960	/* Snapshot list, allow later requests */
2961	spin_lock(&net_todo_list_lock);
2962	list_splice_init(&net_todo_list, &list);
2963	spin_unlock(&net_todo_list_lock);
2964		
2965	while (!list_empty(&list)) {
2966		struct net_device *dev
2967			= list_entry(list.next, struct net_device, todo_list);
2968		list_del(&dev->todo_list);
2969
2970		switch(dev->reg_state) {
2971		case NETREG_REGISTERING:
2972			err = netdev_register_sysfs(dev);
2973			if (err)
2974				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2975				       dev->name, err);
2976			dev->reg_state = NETREG_REGISTERED;
2977			break;
2978
2979		case NETREG_UNREGISTERING:
2980			netdev_unregister_sysfs(dev);
2981			dev->reg_state = NETREG_UNREGISTERED;
2982
2983			netdev_wait_allrefs(dev);
2984
2985			/* paranoia */
2986			BUG_ON(atomic_read(&dev->refcnt));
2987			BUG_TRAP(!dev->ip_ptr);
2988			BUG_TRAP(!dev->ip6_ptr);
2989			BUG_TRAP(!dev->dn_ptr);
2990
2991
2992			/* It must be the very last action, 
2993			 * after this 'dev' may point to freed up memory.
2994			 */
2995			if (dev->destructor)
2996				dev->destructor(dev);
2997			break;
2998
2999		default:
3000			printk(KERN_ERR "network todo '%s' but state %d\n",
3001			       dev->name, dev->reg_state);
3002			break;
3003		}
3004	}
3005
3006out:
3007	up(&net_todo_run_mutex);
3008}
3009
3010/**
3011 *	alloc_netdev - allocate network device
3012 *	@sizeof_priv:	size of private data to allocate space for
3013 *	@name:		device name format string
3014 *	@setup:		callback to initialize device
3015 *
3016 *	Allocates a struct net_device with private data area for driver use
3017 *	and performs basic initialization.
3018 */
3019struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3020		void (*setup)(struct net_device *))
3021{
3022	void *p;
3023	struct net_device *dev;
3024	int alloc_size;
3025
3026	/* ensure 32-byte alignment of both the device and private area */
3027	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3028	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3029
3030	p = kmalloc(alloc_size, GFP_KERNEL);
3031	if (!p) {
3032		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3033		return NULL;
3034	}
3035	memset(p, 0, alloc_size);
3036
3037	dev = (struct net_device *)
3038		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3039	dev->padded = (char *)dev - (char *)p;
3040
3041	if (sizeof_priv)
3042		dev->priv = netdev_priv(dev);
3043
3044	setup(dev);
3045	strcpy(dev->name, name);
3046	return dev;
3047}
3048EXPORT_SYMBOL(alloc_netdev);
3049
3050/**
3051 *	free_netdev - free network device
3052 *	@dev: device
3053 *
3054 *	This function does the last stage of destroying an allocated device 
3055 * 	interface. The reference to the device object is released.  
3056 *	If this is the last reference then it will be freed.
3057 */
3058void free_netdev(struct net_device *dev)
3059{
3060#ifdef CONFIG_SYSFS
3061	/*  Compatiablity with error handling in drivers */
3062	if (dev->reg_state == NETREG_UNINITIALIZED) {
3063		kfree((char *)dev - dev->padded);
3064		return;
3065	}
3066
3067	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3068	dev->reg_state = NETREG_RELEASED;
3069
3070	/* will free via class release */
3071	class_device_put(&dev->class_dev);
3072#else
3073	kfree((char *)dev - dev->padded);
3074#endif
3075}
3076 
3077/* Synchronize with packet receive processing. */
3078void synchronize_net(void) 
3079{
3080	might_sleep();
3081	synchronize_kernel();
3082}
3083
3084/**
3085 *	unregister_netdevice - remove device from the kernel
3086 *	@dev: device
3087 *
3088 *	This function shuts down a device interface and removes it
3089 *	from the kernel tables. On success 0 is returned, on a failure
3090 *	a negative errno code is returned.
3091 *
3092 *	Callers must hold the rtnl semaphore.  You may want
3093 *	unregister_netdev() instead of this.
3094 */
3095
3096int unregister_netdevice(struct net_device *dev)
3097{
3098	struct net_device *d, **dp;
3099
3100	BUG_ON(dev_boot_phase);
3101	ASSERT_RTNL();
3102
3103	/* Some devices call without registering for initialization unwind. */
3104	if (dev->reg_state == NETREG_UNINITIALIZED) {
3105		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3106				  "was registered\n", dev->name, dev);
3107		return -ENODEV;
3108	}
3109
3110	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3111
3112	/* If device is running, close it first. */
3113	if (dev->flags & IFF_UP)
3114		dev_close(dev);
3115
3116	/* And unlink it from device chain. */
3117	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3118		if (d == dev) {
3119			write_lock_bh(&dev_base_lock);
3120			hlist_del(&dev->name_hlist);
3121			hlist_del(&dev->index_hlist);
3122			if (dev_tail == &dev->next)
3123				dev_tail = dp;
3124			*dp = d->next;
3125			write_unlock_bh(&dev_base_lock);
3126			break;
3127		}
3128	}
3129	if (!d) {
3130		printk(KERN_ERR "unregister net_device: '%s' not found\n",
3131		       dev->name);
3132		return -ENODEV;
3133	}
3134
3135	dev->reg_state = NETREG_UNREGISTERING;
3136
3137	synchronize_net();
3138
3139	/* Shutdown queueing discipline. */
3140	dev_shutdown(dev);
3141
3142	
3143	/* Notify protocols, that we are about to destroy
3144	   this device. They should clean all the things.
3145	*/
3146	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3147	
3148	/*
3149	 *	Flush the multicast chain
3150	 */
3151	dev_mc_discard(dev);
3152
3153	if (dev->uninit)
3154		dev->uninit(dev);
3155
3156	/* Notifier chain MUST detach us from master device. */
3157	BUG_TRAP(!dev->master);
3158
3159	free_divert_blk(dev);
3160
3161	/* Finish processing unregister after unlock */
3162	net_set_todo(dev);
3163
3164	synchronize_net();
3165
3166	dev_put(dev);
3167	return 0;
3168}
3169
3170/**
3171 *	unregister_netdev - remove device from the kernel
3172 *	@dev: device
3173 *
3174 *	This function shuts down a device interface and removes it
3175 *	from the kernel tables. On success 0 is returned, on a failure
3176 *	a negative errno code is returned.
3177 *
3178 *	This is just a wrapper for unregister_netdevice that takes
3179 *	the rtnl semaphore.  In general you want to use this and not
3180 *	unregister_netdevice.
3181 */
3182void unregister_netdev(struct net_device *dev)
3183{
3184	rtnl_lock();
3185	unregister_netdevice(dev);
3186	rtnl_unlock();
3187}
3188
3189EXPORT_SYMBOL(unregister_netdev);
3190
3191#ifdef CONFIG_HOTPLUG_CPU
3192static int dev_cpu_callback(struct notifier_block *nfb,
3193			    unsigned long action,
3194			    void *ocpu)
3195{
3196	struct sk_buff **list_skb;
3197	struct net_device **list_net;
3198	struct sk_buff *skb;
3199	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3200	struct softnet_data *sd, *oldsd;
3201
3202	if (action != CPU_DEAD)
3203		return NOTIFY_OK;
3204
3205	local_irq_disable();
3206	cpu = smp_processor_id();
3207	sd = &per_cpu(softnet_data, cpu);
3208	oldsd = &per_cpu(softnet_data, oldcpu);
3209
3210	/* Find end of our completion_queue. */
3211	list_skb = &sd->completion_queue;
3212	while (*list_skb)
3213		list_skb = &(*list_skb)->next;
3214	/* Append completion queue from offline CPU. */
3215	*list_skb = oldsd->completion_queue;
3216	oldsd->completion_queue = NULL;
3217
3218	/* Find end of our output_queue. */
3219	list_net = &sd->output_queue;
3220	while (*list_net)
3221		list_net = &(*list_net)->next_sched;
3222	/* Append output queue from offline CPU. */
3223	*list_net = oldsd->output_queue;
3224	oldsd->output_queue = NULL;
3225
3226	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3227	local_irq_enable();
3228
3229	/* Process offline CPU's input_pkt_queue */
3230	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3231		netif_rx(skb);
3232
3233	return NOTIFY_OK;
3234}
3235#endif /* CONFIG_HOTPLUG_CPU */
3236
3237
3238/*
3239 *	Initialize the DEV module. At boot time this walks the device list and
3240 *	unhooks any devices that fail to initialise (normally hardware not
3241 *	present) and leaves us with a valid list of present and active devices.
3242 *
3243 */
3244
3245/*
3246 *       This is called single threaded during boot, so no need
3247 *       to take the rtnl semaphore.
3248 */
3249static int __init net_dev_init(void)
3250{
3251	int i, rc = -ENOMEM;
3252
3253	BUG_ON(!dev_boot_phase);
3254
3255	net_random_init();
3256
3257	if (dev_proc_init())
3258		goto out;
3259
3260	if (netdev_sysfs_init())
3261		goto out;
3262
3263	INIT_LIST_HEAD(&ptype_all);
3264	for (i = 0; i < 16; i++) 
3265		INIT_LIST_HEAD(&ptype_base[i]);
3266
3267	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3268		INIT_HLIST_HEAD(&dev_name_head[i]);
3269
3270	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3271		INIT_HLIST_HEAD(&dev_index_head[i]);
3272
3273	/*
3274	 *	Initialise the packet receive queues.
3275	 */
3276
3277	for (i = 0; i < NR_CPUS; i++) {
3278		struct softnet_data *queue;
3279
3280		queue = &per_cpu(softnet_data, i);
3281		skb_queue_head_init(&queue->input_pkt_queue);
3282		queue->throttle = 0;
3283		queue->cng_level = 0;
3284		queue->avg_blog = 10; /* arbitrary non-zero */
3285		queue->completion_queue = NULL;
3286		INIT_LIST_HEAD(&queue->poll_list);
3287		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3288		queue->backlog_dev.weight = weight_p;
3289		queue->backlog_dev.poll = process_backlog;
3290		atomic_set(&queue->backlog_dev.refcnt, 1);
3291	}
3292
3293#ifdef OFFLINE_SAMPLE
3294	samp_timer.expires = jiffies + (10 * HZ);
3295	add_timer(&samp_timer);
3296#endif
3297
3298	dev_boot_phase = 0;
3299
3300	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3301	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3302
3303	hotcpu_notifier(dev_cpu_callback, 0);
3304	dst_init();
3305	dev_mcast_init();
3306	rc = 0;
3307out:
3308	return rc;
3309}
3310
3311subsys_initcall(net_dev_init);
3312
3313EXPORT_SYMBOL(__dev_get_by_index);
3314EXPORT_SYMBOL(__dev_get_by_name);
3315EXPORT_SYMBOL(__dev_remove_pack);
3316EXPORT_SYMBOL(__skb_linearize);
3317EXPORT_SYMBOL(dev_add_pack);
3318EXPORT_SYMBOL(dev_alloc_name);
3319EXPORT_SYMBOL(dev_close);
3320EXPORT_SYMBOL(dev_get_by_flags);
3321EXPORT_SYMBOL(dev_get_by_index);
3322EXPORT_SYMBOL(dev_get_by_name);
3323EXPORT_SYMBOL(dev_ioctl);
3324EXPORT_SYMBOL(dev_open);
3325EXPORT_SYMBOL(dev_queue_xmit);
3326EXPORT_SYMBOL(dev_remove_pack);
3327EXPORT_SYMBOL(dev_set_allmulti);
3328EXPORT_SYMBOL(dev_set_promiscuity);
3329EXPORT_SYMBOL(dev_change_flags);
3330EXPORT_SYMBOL(dev_set_mtu);
3331EXPORT_SYMBOL(dev_set_mac_address);
3332EXPORT_SYMBOL(free_netdev);
3333EXPORT_SYMBOL(netdev_boot_setup_check);
3334EXPORT_SYMBOL(netdev_set_master);
3335EXPORT_SYMBOL(netdev_state_change);
3336EXPORT_SYMBOL(netif_receive_skb);
3337EXPORT_SYMBOL(netif_rx);
3338EXPORT_SYMBOL(register_gifconf);
3339EXPORT_SYMBOL(register_netdevice);
3340EXPORT_SYMBOL(register_netdevice_notifier);
3341EXPORT_SYMBOL(skb_checksum_help);
3342EXPORT_SYMBOL(synchronize_net);
3343EXPORT_SYMBOL(unregister_netdevice);
3344EXPORT_SYMBOL(unregister_netdevice_notifier);
3345EXPORT_SYMBOL(net_enable_timestamp);
3346EXPORT_SYMBOL(net_disable_timestamp);
3347EXPORT_SYMBOL(dev_get_flags);
3348
3349#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3350EXPORT_SYMBOL(br_handle_frame_hook);
3351EXPORT_SYMBOL(br_fdb_get_hook);
3352EXPORT_SYMBOL(br_fdb_put_hook);
3353#endif
3354
3355#ifdef CONFIG_KMOD
3356EXPORT_SYMBOL(dev_load);
3357#endif
3358
3359EXPORT_PER_CPU_SYMBOL(softnet_data);