2 * NET3 Protocol independent device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set to 2
24 * if register_netdev gets called before
25 * net_dev_init & also removed a few lines
26 * of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant stunts to
29 * keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into drivers
34 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
35 * Alan Cox : 100 backlog just doesn't cut it when
36 * you start doing multicast video 8)
37 * Alan Cox : Rewrote net_bh and list manager.
38 * Alan Cox : Fix ETH_P_ALL echoback lengths.
39 * Alan Cox : Took out transmit every packet pass
40 * Saved a few bytes in the ioctl handler
41 * Alan Cox : Network driver sets packet type before calling netif_rx. Saves
42 * a function call a packet.
43 * Alan Cox : Hashed net_bh()
44 * Richard Kooijman: Timestamp fixes.
45 * Alan Cox : Wrong field in SIOCGIFDSTADDR
46 * Alan Cox : Device lock protection.
47 * Alan Cox : Fixed nasty side effect of device close changes.
48 * Rudi Cilibrasi : Pass the right thing to set_mac_address()
49 * Dave Miller : 32bit quantity for the device lock to make it work out
51 * Bjorn Ekwall : Added KERNELD hack.
52 * Alan Cox : Cleaned up the backlog initialise.
53 * Craig Metz : SIOCGIFCONF fix if space for under
55 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
56 * is no device open function.
57 * Andi Kleen : Fix error reporting for SIOCGIFCONF
58 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
59 * Cyrus Durgin : Cleaned for KMOD
60 * Adam Sulmicki : Bug Fix : Network Device Unload
61 * A network device unload needs to purge
63 * Paul Rusty Russell : SIOCSIFNAME
64 * Pekka Riikonen : Netdev boot-time settings code
65 * Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt
66 * J Hadi Salim : - Backlog queue sampling
67 * - netif_rx() feedback
70 #include <asm/uaccess.h>
71 #include <asm/system.h>
72 #include <asm/bitops.h>
73 #include <linux/config.h>
74 #include <linux/types.h>
75 #include <linux/kernel.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
79 #include <linux/socket.h>
80 #include <linux/sockios.h>
81 #include <linux/errno.h>
82 #include <linux/interrupt.h>
83 #include <linux/if_ether.h>
84 #include <linux/netdevice.h>
85 #include <linux/etherdevice.h>
86 #include <linux/notifier.h>
87 #include <linux/skbuff.h>
88 #include <linux/brlock.h>
90 #include <linux/rtnetlink.h>
91 #include <linux/proc_fs.h>
92 #include <linux/stat.h>
93 #include <linux/if_bridge.h>
94 #include <linux/divert.h>
96 #include <net/pkt_sched.h>
97 #include <net/profile.h>
98 #include <net/checksum.h>
99 #include <linux/highmem.h>
100 #include <linux/init.h>
101 #include <linux/kmod.h>
102 #include <linux/module.h>
103 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
104 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
105 #include <net/iw_handler.h>
106 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
108 extern int plip_init(void);
112 /* This define, if set, will randomly drop a packet when congestion
113 * is more than moderate. It helps fairness in the multi-interface
114 * case when one of them is a hog, but it kills performance for the
115 * single interface case so it is off now by default.
119 /* Setting this will sample the queue lengths and thus congestion
120 * via a timer instead of as each packet is received.
122 #undef OFFLINE_SAMPLE
124 NET_PROFILE_DEFINE(dev_queue_xmit)
125 NET_PROFILE_DEFINE(softnet_process)
127 const char *if_port_text[] = {
138 * The list of packet types we will receive (as opposed to discard)
139 * and the routines to invoke.
141 * Why 16. Because with 16 the only overlap we get on a hash of the
142 * low nibble of the protocol value is RARP/SNAP/X.25.
144 * NOTE: That is no longer true with the addition of VLAN tags. Not
145 * sure which should go first, but I bet it won't make much
146 * difference if we are running VLANs. The good news is that
147 * this protocol won't be in the list unless compiled in, so
148 * the average user (w/out VLANs) will not be adversly affected.
165 static struct packet_type *ptype_base[16]; /* 16 way hashed list */
166 static struct packet_type *ptype_all = NULL; /* Taps */
168 #ifdef OFFLINE_SAMPLE
169 static void sample_queue(unsigned long dummy);
170 static struct timer_list samp_timer = { function: sample_queue };
173 #ifdef CONFIG_HOTPLUG
174 static int net_run_sbin_hotplug(struct net_device *dev, char *action);
176 #define net_run_sbin_hotplug(dev, action) ({ 0; })
183 static struct notifier_block *netdev_chain=NULL;
186 * Device drivers call our routines to queue packets here. We empty the
187 * queue in the local softnet handler.
189 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
191 #ifdef CONFIG_NET_FASTROUTE
192 int netdev_fastroute;
193 int netdev_fastroute_obstacles;
197 /******************************************************************************************
199 Protocol management and registration routines
201 *******************************************************************************************/
210 * Add a protocol ID to the list. Now that the input handler is
211 * smarter we can dispense with all the messy stuff that used to be
214 * BEWARE!!! Protocol handlers, mangling input packets,
215 * MUST BE last in hash buckets and checking protocol handlers
216 * MUST start from promiscous ptype_all chain in net_bh.
217 * It is true now, do not change it.
218 * Explantion follows: if protocol handler, mangling packet, will
219 * be the first on list, it is not able to sense, that packet
220 * is cloned and should be copied-on-write, so that it will
221 * change it and subsequent readers will get broken packet.
226 * dev_add_pack - add packet handler
227 * @pt: packet type declaration
229 * Add a protocol handler to the networking stack. The passed &packet_type
230 * is linked into kernel lists and may not be freed until it has been
231 * removed from the kernel lists.
234 void dev_add_pack(struct packet_type *pt)
238 br_write_lock_bh(BR_NETPROTO_LOCK);
240 #ifdef CONFIG_NET_FASTROUTE
241 /* Hack to detect packet socket */
242 if ((pt->data) && ((int)(pt->data)!=1)) {
243 netdev_fastroute_obstacles++;
244 dev_clear_fastroute(pt->dev);
247 if (pt->type == htons(ETH_P_ALL)) {
252 hash=ntohs(pt->type)&15;
253 pt->next = ptype_base[hash];
254 ptype_base[hash] = pt;
256 br_write_unlock_bh(BR_NETPROTO_LOCK);
261 * dev_remove_pack - remove packet handler
262 * @pt: packet type declaration
264 * Remove a protocol handler that was previously added to the kernel
265 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
266 * from the kernel lists and can be freed or reused once this function
270 void dev_remove_pack(struct packet_type *pt)
272 struct packet_type **pt1;
274 br_write_lock_bh(BR_NETPROTO_LOCK);
276 if (pt->type == htons(ETH_P_ALL)) {
280 pt1=&ptype_base[ntohs(pt->type)&15];
283 for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
286 #ifdef CONFIG_NET_FASTROUTE
288 netdev_fastroute_obstacles--;
290 br_write_unlock_bh(BR_NETPROTO_LOCK);
294 br_write_unlock_bh(BR_NETPROTO_LOCK);
295 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
298 /******************************************************************************
300 Device Boot-time Settings Routines
302 *******************************************************************************/
304 /* Boot time configuration table */
305 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
308 * netdev_boot_setup_add - add new setup entry
309 * @name: name of the device
310 * @map: configured settings for the device
312 * Adds new setup entry to the dev_boot_setup list. The function
313 * returns 0 on error and 1 on success. This is a generic routine to
316 int netdev_boot_setup_add(char *name, struct ifmap *map)
318 struct netdev_boot_setup *s;
322 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
323 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
324 memset(s[i].name, 0, sizeof(s[i].name));
325 strcpy(s[i].name, name);
326 memcpy(&s[i].map, map, sizeof(s[i].map));
331 if (i >= NETDEV_BOOT_SETUP_MAX)
338 * netdev_boot_setup_check - check boot time settings
339 * @dev: the netdevice
341 * Check boot time settings for the device.
342 * The found settings are set for the device to be used
343 * later in the device probing.
344 * Returns 0 if no settings found, 1 if they are.
346 int netdev_boot_setup_check(struct net_device *dev)
348 struct netdev_boot_setup *s;
352 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
353 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
354 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
355 dev->irq = s[i].map.irq;
356 dev->base_addr = s[i].map.base_addr;
357 dev->mem_start = s[i].map.mem_start;
358 dev->mem_end = s[i].map.mem_end;
366 * Saves at boot time configured settings for any netdevice.
368 int __init netdev_boot_setup(char *str)
373 str = get_options(str, ARRAY_SIZE(ints), ints);
378 memset(&map, 0, sizeof(map));
382 map.base_addr = ints[2];
384 map.mem_start = ints[3];
386 map.mem_end = ints[4];
388 /* Add new entry to the list */
389 return netdev_boot_setup_add(str, &map);
392 __setup("netdev=", netdev_boot_setup);
394 /*****************************************************************************************
396 Device Interface Subroutines
398 ******************************************************************************************/
401 * __dev_get_by_name - find a device by its name
402 * @name: name to find
404 * Find an interface by name. Must be called under RTNL semaphore
405 * or @dev_base_lock. If the name is found a pointer to the device
406 * is returned. If the name is not found then %NULL is returned. The
407 * reference counters are not incremented so the caller must be
408 * careful with locks.
412 struct net_device *__dev_get_by_name(const char *name)
414 struct net_device *dev;
416 for (dev = dev_base; dev != NULL; dev = dev->next) {
417 if (strncmp(dev->name, name, IFNAMSIZ) == 0)
424 * dev_get_by_name - find a device by its name
425 * @name: name to find
427 * Find an interface by name. This can be called from any
428 * context and does its own locking. The returned handle has
429 * the usage count incremented and the caller must use dev_put() to
430 * release it when it is no longer needed. %NULL is returned if no
431 * matching device is found.
434 struct net_device *dev_get_by_name(const char *name)
436 struct net_device *dev;
438 read_lock(&dev_base_lock);
439 dev = __dev_get_by_name(name);
442 read_unlock(&dev_base_lock);
447 Return value is changed to int to prevent illegal usage in future.
448 It is still legal to use to check for device existence.
450 User should understand, that the result returned by this function
451 is meaningless, if it was not issued under rtnl semaphore.
455 * dev_get - test if a device exists
456 * @name: name to test for
458 * Test if a name exists. Returns true if the name is found. In order
459 * to be sure the name is not allocated or removed during the test the
460 * caller must hold the rtnl semaphore.
462 * This function primarily exists for back compatibility with older
466 int dev_get(const char *name)
468 struct net_device *dev;
470 read_lock(&dev_base_lock);
471 dev = __dev_get_by_name(name);
472 read_unlock(&dev_base_lock);
477 * __dev_get_by_index - find a device by its ifindex
478 * @ifindex: index of device
480 * Search for an interface by index. Returns %NULL if the device
481 * is not found or a pointer to the device. The device has not
482 * had its reference counter increased so the caller must be careful
483 * about locking. The caller must hold either the RTNL semaphore
487 struct net_device * __dev_get_by_index(int ifindex)
489 struct net_device *dev;
491 for (dev = dev_base; dev != NULL; dev = dev->next) {
492 if (dev->ifindex == ifindex)
500 * dev_get_by_index - find a device by its ifindex
501 * @ifindex: index of device
503 * Search for an interface by index. Returns NULL if the device
504 * is not found or a pointer to the device. The device returned has
505 * had a reference added and the pointer is safe until the user calls
506 * dev_put to indicate they have finished with it.
509 struct net_device * dev_get_by_index(int ifindex)
511 struct net_device *dev;
513 read_lock(&dev_base_lock);
514 dev = __dev_get_by_index(ifindex);
517 read_unlock(&dev_base_lock);
522 * dev_getbyhwaddr - find a device by its hardware address
523 * @type: media type of device
524 * @ha: hardware address
526 * Search for an interface by MAC address. Returns NULL if the device
527 * is not found or a pointer to the device. The caller must hold the
528 * rtnl semaphore. The returned device has not had its ref count increased
529 * and the caller must therefore be careful about locking
532 * If the API was consistent this would be __dev_get_by_hwaddr
535 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
537 struct net_device *dev;
541 for (dev = dev_base; dev != NULL; dev = dev->next) {
542 if (dev->type == type &&
543 memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
550 * dev_alloc_name - allocate a name for a device
552 * @name: name format string
554 * Passed a format string - eg "lt%d" it will try and find a suitable
555 * id. Not efficient for many devices, not called a lot. The caller
556 * must hold the dev_base or rtnl lock while allocating the name and
557 * adding the device in order to avoid duplicates. Returns the number
558 * of the unit assigned or a negative errno code.
561 int dev_alloc_name(struct net_device *dev, const char *name)
568 * Verify the string as this thing may have come from
569 * the user. There must be either one "%d" and no other "%"
570 * characters, or no "%" characters at all.
572 p = strchr(name, '%');
573 if (p && (p[1] != 'd' || strchr(p+2, '%')))
577 * If you need over 100 please also fix the algorithm...
579 for (i = 0; i < 100; i++) {
580 snprintf(buf,sizeof(buf),name,i);
581 if (__dev_get_by_name(buf) == NULL) {
582 strcpy(dev->name, buf);
586 return -ENFILE; /* Over 100 of the things .. bail out! */
590 * dev_alloc - allocate a network device and name
591 * @name: name format string
592 * @err: error return pointer
594 * Passed a format string, eg. "lt%d", it will allocate a network device
595 * and space for the name. %NULL is returned if no memory is available.
596 * If the allocation succeeds then the name is assigned and the
597 * device pointer returned. %NULL is returned if the name allocation
598 * failed. The cause of an error is returned as a negative errno code
599 * in the variable @err points to.
601 * The caller must hold the @dev_base or RTNL locks when doing this in
602 * order to avoid duplicate name allocations.
605 struct net_device *dev_alloc(const char *name, int *err)
607 struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
612 memset(dev, 0, sizeof(struct net_device));
613 *err = dev_alloc_name(dev, name);
622 * netdev_state_change - device changes state
623 * @dev: device to cause notification
625 * Called to indicate a device has changed state. This function calls
626 * the notifier chains for netdev_chain and sends a NEWLINK message
627 * to the routing socket.
630 void netdev_state_change(struct net_device *dev)
632 if (dev->flags&IFF_UP) {
633 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
634 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
642 * dev_load - load a network module
643 * @name: name of interface
645 * If a network interface is not present and the process has suitable
646 * privileges this function loads the module. If module loading is not
647 * available in this kernel then it becomes a nop.
650 void dev_load(const char *name)
652 if (!dev_get(name) && capable(CAP_SYS_MODULE))
653 request_module(name);
658 extern inline void dev_load(const char *unused){;}
662 static int default_rebuild_header(struct sk_buff *skb)
664 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
670 * dev_open - prepare an interface for use.
671 * @dev: device to open
673 * Takes a device from down to up state. The device's private open
674 * function is invoked and then the multicast lists are loaded. Finally
675 * the device is moved into the up state and a %NETDEV_UP message is
676 * sent to the netdev notifier chain.
678 * Calling this function on an active interface is a nop. On a failure
679 * a negative errno code is returned.
682 int dev_open(struct net_device *dev)
690 if (dev->flags&IFF_UP)
694 * Is it even present?
696 if (!netif_device_present(dev))
700 * Call device private open method
702 if (try_inc_mod_count(dev->owner)) {
703 set_bit(__LINK_STATE_START, &dev->state);
705 ret = dev->open(dev);
707 clear_bit(__LINK_STATE_START, &dev->state);
709 __MOD_DEC_USE_COUNT(dev->owner);
717 * If it went open OK then:
725 dev->flags |= IFF_UP;
728 * Initialize multicasting status
733 * Wakeup transmit queue engine
738 * ... and announce new interface.
740 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
745 #ifdef CONFIG_NET_FASTROUTE
747 static void dev_do_clear_fastroute(struct net_device *dev)
749 if (dev->accept_fastpath) {
752 for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
753 struct dst_entry *dst;
755 write_lock_irq(&dev->fastpath_lock);
756 dst = dev->fastpath[i];
757 dev->fastpath[i] = NULL;
758 write_unlock_irq(&dev->fastpath_lock);
765 void dev_clear_fastroute(struct net_device *dev)
768 dev_do_clear_fastroute(dev);
770 read_lock(&dev_base_lock);
771 for (dev = dev_base; dev; dev = dev->next)
772 dev_do_clear_fastroute(dev);
773 read_unlock(&dev_base_lock);
779 * dev_close - shutdown an interface.
780 * @dev: device to shutdown
782 * This function moves an active device into down state. A
783 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
784 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
788 int dev_close(struct net_device *dev)
790 if (!(dev->flags&IFF_UP))
794 * Tell people we are going down, so that they can
795 * prepare to death, when device is still operating.
797 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
801 clear_bit(__LINK_STATE_START, &dev->state);
803 /* Synchronize to scheduled poll. We cannot touch poll list,
804 * it can be even on different cpu. So just clear netif_running(),
805 * and wait when poll really will happen. Actually, the best place
806 * for this is inside dev->stop() after device stopped its irq
807 * engine, but this requires more changes in devices. */
809 smp_mb__after_clear_bit(); /* Commit netif_running(). */
810 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
812 current->state = TASK_INTERRUPTIBLE;
817 * Call the device specific close. This cannot fail.
818 * Only if device is UP
820 * We allow it to be called even after a DETACH hot-plug
828 * Device is now down.
831 dev->flags &= ~IFF_UP;
832 #ifdef CONFIG_NET_FASTROUTE
833 dev_clear_fastroute(dev);
837 * Tell people we are down
839 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
842 * Drop the module refcount
845 __MOD_DEC_USE_COUNT(dev->owner);
852 * Device change register/unregister. These are not inline or static
853 * as we export them to the world.
857 * register_netdevice_notifier - register a network notifier block
860 * Register a notifier to be called when network device events occur.
861 * The notifier passed is linked into the kernel structures and must
862 * not be reused until it has been unregistered. A negative errno code
863 * is returned on a failure.
866 int register_netdevice_notifier(struct notifier_block *nb)
868 return notifier_chain_register(&netdev_chain, nb);
872 * unregister_netdevice_notifier - unregister a network notifier block
875 * Unregister a notifier previously registered by
876 * register_netdevice_notifier(). The notifier is unlinked into the
877 * kernel structures and may then be reused. A negative errno code
878 * is returned on a failure.
881 int unregister_netdevice_notifier(struct notifier_block *nb)
883 return notifier_chain_unregister(&netdev_chain,nb);
887 * Support routine. Sends outgoing frames to any network
888 * taps currently in use.
891 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
893 struct packet_type *ptype;
894 do_gettimeofday(&skb->stamp);
896 br_read_lock(BR_NETPROTO_LOCK);
897 for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
899 /* Never send packets back to the socket
900 * they originated from - MvS (miquels@drinkel.ow.org)
902 if ((ptype->dev == dev || !ptype->dev) &&
903 ((struct sock *)ptype->data != skb->sk))
905 struct sk_buff *skb2;
906 if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
909 /* skb->nh should be correctly
910 set by sender, so that the second statement is
911 just protection against buggy protocols.
913 skb2->mac.raw = skb2->data;
915 if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) {
917 printk(KERN_CRIT "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
918 skb2->nh.raw = skb2->data;
921 skb2->h.raw = skb2->nh.raw;
922 skb2->pkt_type = PACKET_OUTGOING;
923 ptype->func(skb2, skb->dev, ptype);
926 br_read_unlock(BR_NETPROTO_LOCK);
929 /* Calculate csum in the case, when packet is misrouted.
930 * If it failed by some reason, ignore and send skb with wrong
933 struct sk_buff * skb_checksum_help(struct sk_buff *skb)
938 offset = skb->h.raw - skb->data;
939 if (offset > (int)skb->len)
941 csum = skb_checksum(skb, offset, skb->len-offset, 0);
943 offset = skb->tail - skb->h.raw;
946 if (skb->csum+2 > offset)
949 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
950 skb->ip_summed = CHECKSUM_NONE;
954 #ifdef CONFIG_HIGHMEM
955 /* Actually, we should eliminate this check as soon as we know, that:
956 * 1. IOMMU is present and allows to map all the memory.
957 * 2. No high memory really exists on this machine.
961 illegal_highdma(struct net_device *dev, struct sk_buff *skb)
965 if (dev->features&NETIF_F_HIGHDMA)
968 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
969 if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
975 #define illegal_highdma(dev, skb) (0)
979 * dev_queue_xmit - transmit a buffer
980 * @skb: buffer to transmit
982 * Queue a buffer for transmission to a network device. The caller must
983 * have set the device and priority and built the buffer before calling this
984 * function. The function can be called from an interrupt.
986 * A negative errno code is returned on a failure. A success does not
987 * guarantee the frame will be transmitted as it may be dropped due
988 * to congestion or traffic shaping.
991 int dev_queue_xmit(struct sk_buff *skb)
993 struct net_device *dev = skb->dev;
996 if (skb_shinfo(skb)->frag_list &&
997 !(dev->features&NETIF_F_FRAGLIST) &&
998 skb_linearize(skb, GFP_ATOMIC) != 0) {
1003 /* Fragmented skb is linearized if device does not support SG,
1004 * or if at least one of fragments is in highmem and device
1005 * does not support DMA from it.
1007 if (skb_shinfo(skb)->nr_frags &&
1008 (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1009 skb_linearize(skb, GFP_ATOMIC) != 0) {
1014 /* If packet is not checksummed and device does not support
1015 * checksumming for this protocol, complete checksumming here.
1017 if (skb->ip_summed == CHECKSUM_HW &&
1018 (!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) &&
1019 (!(dev->features&NETIF_F_IP_CSUM) ||
1020 skb->protocol != htons(ETH_P_IP)))) {
1021 if ((skb = skb_checksum_help(skb)) == NULL)
1025 /* Grab device queue */
1026 spin_lock_bh(&dev->queue_lock);
1029 int ret = q->enqueue(skb, q);
1033 spin_unlock_bh(&dev->queue_lock);
1034 return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
1037 /* The device has no queue. Common case for software devices:
1038 loopback, all the sorts of tunnels...
1040 Really, it is unlikely that xmit_lock protection is necessary here.
1041 (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
1042 However, it is possible, that they rely on protection
1045 Check this and shot the lock. It is not prone from deadlocks.
1046 Either shot noqueue qdisc, it is even simpler 8)
1048 if (dev->flags&IFF_UP) {
1049 int cpu = smp_processor_id();
1051 if (dev->xmit_lock_owner != cpu) {
1052 spin_unlock(&dev->queue_lock);
1053 spin_lock(&dev->xmit_lock);
1054 dev->xmit_lock_owner = cpu;
1056 if (!netif_queue_stopped(dev)) {
1058 dev_queue_xmit_nit(skb,dev);
1060 if (dev->hard_start_xmit(skb, dev) == 0) {
1061 dev->xmit_lock_owner = -1;
1062 spin_unlock_bh(&dev->xmit_lock);
1066 dev->xmit_lock_owner = -1;
1067 spin_unlock_bh(&dev->xmit_lock);
1068 if (net_ratelimit())
1069 printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", dev->name);
1073 /* Recursion is detected! It is possible, unfortunately */
1074 if (net_ratelimit())
1075 printk(KERN_CRIT "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
1078 spin_unlock_bh(&dev->queue_lock);
1085 /*=======================================================================
1087 =======================================================================*/
1089 int netdev_max_backlog = 300;
1090 int weight_p = 64; /* old backlog weight */
1091 /* These numbers are selected based on intuition and some
1092 * experimentatiom, if you have more scientific way of doing this
1093 * please go ahead and fix things.
1095 int no_cong_thresh = 10;
1100 struct netif_rx_stats netdev_rx_stat[NR_CPUS];
1103 #ifdef CONFIG_NET_HW_FLOWCONTROL
1104 atomic_t netdev_dropping = ATOMIC_INIT(0);
1105 static unsigned long netdev_fc_mask = 1;
1106 unsigned long netdev_fc_xoff = 0;
1107 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
1111 void (*stimul)(struct net_device *);
1112 struct net_device *dev;
1113 } netdev_fc_slots[BITS_PER_LONG];
1115 int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
1118 unsigned long flags;
1120 spin_lock_irqsave(&netdev_fc_lock, flags);
1121 if (netdev_fc_mask != ~0UL) {
1122 bit = ffz(netdev_fc_mask);
1123 netdev_fc_slots[bit].stimul = stimul;
1124 netdev_fc_slots[bit].dev = dev;
1125 set_bit(bit, &netdev_fc_mask);
1126 clear_bit(bit, &netdev_fc_xoff);
1128 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1132 void netdev_unregister_fc(int bit)
1134 unsigned long flags;
1136 spin_lock_irqsave(&netdev_fc_lock, flags);
1138 netdev_fc_slots[bit].stimul = NULL;
1139 netdev_fc_slots[bit].dev = NULL;
1140 clear_bit(bit, &netdev_fc_mask);
1141 clear_bit(bit, &netdev_fc_xoff);
1143 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1146 static void netdev_wakeup(void)
1150 spin_lock(&netdev_fc_lock);
1151 xoff = netdev_fc_xoff;
1156 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1158 spin_unlock(&netdev_fc_lock);
1162 static void get_sample_stats(int cpu)
1168 int blog = softnet_data[cpu].input_pkt_queue.qlen;
1169 int avg_blog = softnet_data[cpu].avg_blog;
1171 avg_blog = (avg_blog >> 1)+ (blog >> 1);
1173 if (avg_blog > mod_cong) {
1174 /* Above moderate congestion levels. */
1175 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1178 rq = rd % netdev_max_backlog;
1179 if (rq < avg_blog) /* unlucky bastard */
1180 softnet_data[cpu].cng_level = NET_RX_DROP;
1182 } else if (avg_blog > lo_cong) {
1183 softnet_data[cpu].cng_level = NET_RX_CN_MOD;
1186 rq = rd % netdev_max_backlog;
1187 if (rq < avg_blog) /* unlucky bastard */
1188 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1190 } else if (avg_blog > no_cong)
1191 softnet_data[cpu].cng_level = NET_RX_CN_LOW;
1192 else /* no congestion */
1193 softnet_data[cpu].cng_level = NET_RX_SUCCESS;
1195 softnet_data[cpu].avg_blog = avg_blog;
1198 #ifdef OFFLINE_SAMPLE
1199 static void sample_queue(unsigned long dummy)
1201 /* 10 ms 0r 1ms -- i dont care -- JHS */
1203 int cpu = smp_processor_id();
1205 get_sample_stats(cpu);
1206 next_tick += jiffies;
1207 mod_timer(&samp_timer, next_tick);
1213 * netif_rx - post buffer to the network code
1214 * @skb: buffer to post
1216 * This function receives a packet from a device driver and queues it for
1217 * the upper (protocol) levels to process. It always succeeds. The buffer
1218 * may be dropped during processing for congestion control or by the
1222 * NET_RX_SUCCESS (no congestion)
1223 * NET_RX_CN_LOW (low congestion)
1224 * NET_RX_CN_MOD (moderate congestion)
1225 * NET_RX_CN_HIGH (high congestion)
1226 * NET_RX_DROP (packet was dropped)
1231 int netif_rx(struct sk_buff *skb)
1233 int this_cpu = smp_processor_id();
1234 struct softnet_data *queue;
1235 unsigned long flags;
1237 if (skb->stamp.tv_sec == 0)
1238 do_gettimeofday(&skb->stamp);
1240 /* The code is rearranged so that the path is the most
1241 short when CPU is congested, but is still operating.
1243 queue = &softnet_data[this_cpu];
1245 local_irq_save(flags);
1247 netdev_rx_stat[this_cpu].total++;
1248 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1249 if (queue->input_pkt_queue.qlen) {
1250 if (queue->throttle)
1255 __skb_queue_tail(&queue->input_pkt_queue,skb);
1256 local_irq_restore(flags);
1257 #ifndef OFFLINE_SAMPLE
1258 get_sample_stats(this_cpu);
1260 return queue->cng_level;
1263 if (queue->throttle) {
1264 queue->throttle = 0;
1265 #ifdef CONFIG_NET_HW_FLOWCONTROL
1266 if (atomic_dec_and_test(&netdev_dropping))
1271 netif_rx_schedule(&queue->blog_dev);
1275 if (queue->throttle == 0) {
1276 queue->throttle = 1;
1277 netdev_rx_stat[this_cpu].throttled++;
1278 #ifdef CONFIG_NET_HW_FLOWCONTROL
1279 atomic_inc(&netdev_dropping);
1284 netdev_rx_stat[this_cpu].dropped++;
1285 local_irq_restore(flags);
1291 /* Deliver skb to an old protocol, which is not threaded well
1292 or which do not understand shared skbs.
1294 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
1296 static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
1297 int ret = NET_RX_DROP;
1301 skb = skb_clone(skb, GFP_ATOMIC);
1305 if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
1310 /* The assumption (correct one) is that old protocols
1311 did not depened on BHs different of NET_BH and TIMER_BH.
1314 /* Emulate NET_BH with special spinlock */
1315 spin_lock(&net_bh_lock);
1317 /* Disable timers and wait for all timers completion */
1318 tasklet_disable(bh_task_vec+TIMER_BH);
1320 ret = pt->func(skb, skb->dev, pt);
1322 tasklet_hi_enable(bh_task_vec+TIMER_BH);
1323 spin_unlock(&net_bh_lock);
1327 static __inline__ void skb_bond(struct sk_buff *skb)
1329 struct net_device *dev = skb->dev;
1332 skb->dev = dev->master;
1335 static void net_tx_action(struct softirq_action *h)
1337 int cpu = smp_processor_id();
1339 if (softnet_data[cpu].completion_queue) {
1340 struct sk_buff *clist;
1342 local_irq_disable();
1343 clist = softnet_data[cpu].completion_queue;
1344 softnet_data[cpu].completion_queue = NULL;
1347 while (clist != NULL) {
1348 struct sk_buff *skb = clist;
1349 clist = clist->next;
1351 BUG_TRAP(atomic_read(&skb->users) == 0);
1356 if (softnet_data[cpu].output_queue) {
1357 struct net_device *head;
1359 local_irq_disable();
1360 head = softnet_data[cpu].output_queue;
1361 softnet_data[cpu].output_queue = NULL;
1364 while (head != NULL) {
1365 struct net_device *dev = head;
1366 head = head->next_sched;
1368 smp_mb__before_clear_bit();
1369 clear_bit(__LINK_STATE_SCHED, &dev->state);
1371 if (spin_trylock(&dev->queue_lock)) {
1373 spin_unlock(&dev->queue_lock);
1375 netif_schedule(dev);
1382 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1383 int (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
1386 static __inline__ int handle_bridge(struct sk_buff *skb,
1387 struct packet_type *pt_prev)
1389 int ret = NET_RX_DROP;
1393 ret = deliver_to_old_ones(pt_prev, skb, 0);
1395 atomic_inc(&skb->users);
1396 ret = pt_prev->func(skb, skb->dev, pt_prev);
1404 #ifdef CONFIG_NET_DIVERT
1405 static inline int handle_diverter(struct sk_buff *skb)
1407 /* if diversion is supported on device, then divert */
1408 if (skb->dev->divert && skb->dev->divert->divert)
1412 #endif /* CONFIG_NET_DIVERT */
1414 int netif_receive_skb(struct sk_buff *skb)
1416 struct packet_type *ptype, *pt_prev;
1417 int ret = NET_RX_DROP;
1418 unsigned short type = skb->protocol;
1420 if (skb->stamp.tv_sec == 0)
1421 do_gettimeofday(&skb->stamp);
1425 netdev_rx_stat[smp_processor_id()].total++;
1427 #ifdef CONFIG_NET_FASTROUTE
1428 if (skb->pkt_type == PACKET_FASTROUTE) {
1429 netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++;
1430 return dev_queue_xmit(skb);
1434 skb->h.raw = skb->nh.raw = skb->data;
1437 for (ptype = ptype_all; ptype; ptype = ptype->next) {
1438 if (!ptype->dev || ptype->dev == skb->dev) {
1440 if (!pt_prev->data) {
1441 ret = deliver_to_old_ones(pt_prev, skb, 0);
1443 atomic_inc(&skb->users);
1444 ret = pt_prev->func(skb, skb->dev, pt_prev);
1451 #ifdef CONFIG_NET_DIVERT
1452 if (skb->dev->divert && skb->dev->divert->divert)
1453 ret = handle_diverter(skb);
1454 #endif /* CONFIG_NET_DIVERT */
1456 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1457 if (skb->dev->br_port != NULL &&
1458 br_handle_frame_hook != NULL) {
1461 ret = handle_bridge(skb, pt_prev);
1462 if (br_handle_frame_hook(skb) == 0)
1468 for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
1469 if (ptype->type == type &&
1470 (!ptype->dev || ptype->dev == skb->dev)) {
1472 if (!pt_prev->data) {
1473 ret = deliver_to_old_ones(pt_prev, skb, 0);
1475 atomic_inc(&skb->users);
1476 ret = pt_prev->func(skb, skb->dev, pt_prev);
1484 if (!pt_prev->data) {
1485 ret = deliver_to_old_ones(pt_prev, skb, 1);
1487 ret = pt_prev->func(skb, skb->dev, pt_prev);
1491 /* Jamal, now you will not able to escape explaining
1492 * me how you were going to use this. :-)
1500 static int process_backlog(struct net_device *blog_dev, int *budget)
1503 int quota = min(blog_dev->quota, *budget);
1504 int this_cpu = smp_processor_id();
1505 struct softnet_data *queue = &softnet_data[this_cpu];
1506 unsigned long start_time = jiffies;
1509 struct sk_buff *skb;
1510 struct net_device *dev;
1512 local_irq_disable();
1513 skb = __skb_dequeue(&queue->input_pkt_queue);
1520 netif_receive_skb(skb);
1526 if (work >= quota || jiffies - start_time > 1)
1529 #ifdef CONFIG_NET_HW_FLOWCONTROL
1530 if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
1531 if (atomic_dec_and_test(&netdev_dropping)) {
1532 queue->throttle = 0;
1540 blog_dev->quota -= work;
1545 blog_dev->quota -= work;
1548 list_del(&blog_dev->poll_list);
1549 clear_bit(__LINK_STATE_RX_SCHED, &blog_dev->state);
1551 if (queue->throttle) {
1552 queue->throttle = 0;
1553 #ifdef CONFIG_NET_HW_FLOWCONTROL
1554 if (atomic_dec_and_test(&netdev_dropping))
1562 static void net_rx_action(struct softirq_action *h)
1564 int this_cpu = smp_processor_id();
1565 struct softnet_data *queue = &softnet_data[this_cpu];
1566 unsigned long start_time = jiffies;
1567 int budget = netdev_max_backlog;
1569 br_read_lock(BR_NETPROTO_LOCK);
1570 local_irq_disable();
1572 while (!list_empty(&queue->poll_list)) {
1573 struct net_device *dev;
1575 if (budget <= 0 || jiffies - start_time > 1)
1580 dev = list_entry(queue->poll_list.next, struct net_device, poll_list);
1582 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1583 local_irq_disable();
1584 list_del(&dev->poll_list);
1585 list_add_tail(&dev->poll_list, &queue->poll_list);
1587 dev->quota += dev->weight;
1589 dev->quota = dev->weight;
1592 local_irq_disable();
1597 br_read_unlock(BR_NETPROTO_LOCK);
1601 netdev_rx_stat[this_cpu].time_squeeze++;
1602 __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1605 br_read_unlock(BR_NETPROTO_LOCK);
1608 static gifconf_func_t * gifconf_list [NPROTO];
1611 * register_gifconf - register a SIOCGIF handler
1612 * @family: Address family
1613 * @gifconf: Function handler
1615 * Register protocol dependent address dumping routines. The handler
1616 * that is passed must not be freed or reused until it has been replaced
1617 * by another handler.
1620 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1624 gifconf_list[family] = gifconf;
1630 * Map an interface index to its name (SIOCGIFNAME)
1634 * We need this ioctl for efficient implementation of the
1635 * if_indextoname() function required by the IPv6 API. Without
1636 * it, we would have to search all the interfaces to find a
1640 static int dev_ifname(struct ifreq *arg)
1642 struct net_device *dev;
1646 * Fetch the caller's info block.
1649 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1652 read_lock(&dev_base_lock);
1653 dev = __dev_get_by_index(ifr.ifr_ifindex);
1655 read_unlock(&dev_base_lock);
1659 strcpy(ifr.ifr_name, dev->name);
1660 read_unlock(&dev_base_lock);
1662 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1668 * Perform a SIOCGIFCONF call. This structure will change
1669 * size eventually, and there is nothing I can do about it.
1670 * Thus we will need a 'compatibility mode'.
1673 static int dev_ifconf(char *arg)
1676 struct net_device *dev;
1683 * Fetch the caller's info block.
1686 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1693 * Loop over the interfaces, and write an info block for each.
1697 for (dev = dev_base; dev != NULL; dev = dev->next) {
1698 for (i=0; i<NPROTO; i++) {
1699 if (gifconf_list[i]) {
1702 done = gifconf_list[i](dev, NULL, 0);
1704 done = gifconf_list[i](dev, pos+total, len-total);
1715 * All done. Write the updated control block back to the caller.
1717 ifc.ifc_len = total;
1719 if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
1723 * Both BSD and Solaris return 0 here, so we do too.
1729 * This is invoked by the /proc filesystem handler to display a device
1733 #ifdef CONFIG_PROC_FS
1735 static int sprintf_stats(char *buffer, struct net_device *dev)
1737 struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
1741 size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1744 stats->rx_packets, stats->rx_errors,
1745 stats->rx_dropped + stats->rx_missed_errors,
1746 stats->rx_fifo_errors,
1747 stats->rx_length_errors + stats->rx_over_errors
1748 + stats->rx_crc_errors + stats->rx_frame_errors,
1749 stats->rx_compressed, stats->multicast,
1751 stats->tx_packets, stats->tx_errors, stats->tx_dropped,
1752 stats->tx_fifo_errors, stats->collisions,
1753 stats->tx_carrier_errors + stats->tx_aborted_errors
1754 + stats->tx_window_errors + stats->tx_heartbeat_errors,
1755 stats->tx_compressed);
1757 size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
1763 * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
1764 * to create /proc/net/dev
1767 static int dev_get_info(char *buffer, char **start, off_t offset, int length)
1773 struct net_device *dev;
1776 size = sprintf(buffer,
1777 "Inter-| Receive | Transmit\n"
1778 " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
1784 read_lock(&dev_base_lock);
1785 for (dev = dev_base; dev != NULL; dev = dev->next) {
1786 size = sprintf_stats(buffer+len, dev);
1794 if (pos > offset + length)
1797 read_unlock(&dev_base_lock);
1799 *start = buffer + (offset - begin); /* Start of wanted data */
1800 len -= (offset - begin); /* Start slop */
1802 len = length; /* Ending slop */
1808 static int dev_proc_stats(char *buffer, char **start, off_t offset,
1809 int length, int *eof, void *data)
1814 for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
1815 i = cpu_logical_map(lcpu);
1816 len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1817 netdev_rx_stat[i].total,
1818 netdev_rx_stat[i].dropped,
1819 netdev_rx_stat[i].time_squeeze,
1820 netdev_rx_stat[i].throttled,
1821 netdev_rx_stat[i].fastroute_hit,
1822 netdev_rx_stat[i].fastroute_success,
1823 netdev_rx_stat[i].fastroute_defer,
1824 netdev_rx_stat[i].fastroute_deferred_out,
1826 netdev_rx_stat[i].fastroute_latency_reduction
1828 netdev_rx_stat[i].cpu_collision
1840 *start = buffer + offset;
1846 #endif /* CONFIG_PROC_FS */
1850 * netdev_set_master - set up master/slave pair
1851 * @slave: slave device
1852 * @master: new master device
1854 * Changes the master device of the slave. Pass %NULL to break the
1855 * bonding. The caller must hold the RTNL semaphore. On a failure
1856 * a negative errno code is returned. On success the reference counts
1857 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
1858 * function returns zero.
1861 int netdev_set_master(struct net_device *slave, struct net_device *master)
1863 struct net_device *old = slave->master;
1873 br_write_lock_bh(BR_NETPROTO_LOCK);
1874 slave->master = master;
1875 br_write_unlock_bh(BR_NETPROTO_LOCK);
1881 slave->flags |= IFF_SLAVE;
1883 slave->flags &= ~IFF_SLAVE;
1885 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
1890 * dev_set_promiscuity - update promiscuity count on a device
1894 * Add or remove promsicuity from a device. While the count in the device
1895 * remains above zero the interface remains promiscuous. Once it hits zero
1896 * the device reverts back to normal filtering operation. A negative inc
1897 * value is used to drop promiscuity on the device.
1900 void dev_set_promiscuity(struct net_device *dev, int inc)
1902 unsigned short old_flags = dev->flags;
1904 dev->flags |= IFF_PROMISC;
1905 if ((dev->promiscuity += inc) == 0)
1906 dev->flags &= ~IFF_PROMISC;
1907 if (dev->flags^old_flags) {
1908 #ifdef CONFIG_NET_FASTROUTE
1909 if (dev->flags&IFF_PROMISC) {
1910 netdev_fastroute_obstacles++;
1911 dev_clear_fastroute(dev);
1913 netdev_fastroute_obstacles--;
1916 printk(KERN_INFO "device %s %s promiscuous mode\n",
1917 dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
1922 * dev_set_allmulti - update allmulti count on a device
1926 * Add or remove reception of all multicast frames to a device. While the
1927 * count in the device remains above zero the interface remains listening
1928 * to all interfaces. Once it hits zero the device reverts back to normal
1929 * filtering operation. A negative @inc value is used to drop the counter
1930 * when releasing a resource needing all multicasts.
1933 void dev_set_allmulti(struct net_device *dev, int inc)
1935 unsigned short old_flags = dev->flags;
1937 dev->flags |= IFF_ALLMULTI;
1938 if ((dev->allmulti += inc) == 0)
1939 dev->flags &= ~IFF_ALLMULTI;
1940 if (dev->flags^old_flags)
1944 int dev_change_flags(struct net_device *dev, unsigned flags)
1947 int old_flags = dev->flags;
1950 * Set the flags on our device.
1953 dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
1954 IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
1955 (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
1958 * Load in the correct multicast list now the flags have changed.
1964 * Have we downed the interface. We handle IFF_UP ourselves
1965 * according to user attempts to set it, rather than blindly
1970 if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
1972 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
1978 if (dev->flags&IFF_UP &&
1979 ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
1980 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
1982 if ((flags^dev->gflags)&IFF_PROMISC) {
1983 int inc = (flags&IFF_PROMISC) ? +1 : -1;
1984 dev->gflags ^= IFF_PROMISC;
1985 dev_set_promiscuity(dev, inc);
1988 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
1989 is important. Some (broken) drivers set IFF_PROMISC, when
1990 IFF_ALLMULTI is requested not asking us and not reporting.
1992 if ((flags^dev->gflags)&IFF_ALLMULTI) {
1993 int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
1994 dev->gflags ^= IFF_ALLMULTI;
1995 dev_set_allmulti(dev, inc);
1998 if (old_flags^dev->flags)
1999 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
2005 * Perform the SIOCxIFxxx calls.
2008 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2010 struct net_device *dev;
2013 if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
2018 case SIOCGIFFLAGS: /* Get interface flags */
2019 ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
2020 |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
2021 if (netif_running(dev) && netif_carrier_ok(dev))
2022 ifr->ifr_flags |= IFF_RUNNING;
2025 case SIOCSIFFLAGS: /* Set interface flags */
2026 return dev_change_flags(dev, ifr->ifr_flags);
2028 case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
2029 ifr->ifr_metric = 0;
2032 case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
2035 case SIOCGIFMTU: /* Get the MTU of a device */
2036 ifr->ifr_mtu = dev->mtu;
2039 case SIOCSIFMTU: /* Set the MTU of a device */
2040 if (ifr->ifr_mtu == dev->mtu)
2044 * MTU must be positive.
2050 if (!netif_device_present(dev))
2053 if (dev->change_mtu)
2054 err = dev->change_mtu(dev, ifr->ifr_mtu);
2056 dev->mtu = ifr->ifr_mtu;
2059 if (!err && dev->flags&IFF_UP)
2060 notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
2064 memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
2065 ifr->ifr_hwaddr.sa_family=dev->type;
2069 if (dev->set_mac_address == NULL)
2071 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2073 if (!netif_device_present(dev))
2075 err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2077 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2080 case SIOCSIFHWBROADCAST:
2081 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2083 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
2084 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2088 ifr->ifr_map.mem_start=dev->mem_start;
2089 ifr->ifr_map.mem_end=dev->mem_end;
2090 ifr->ifr_map.base_addr=dev->base_addr;
2091 ifr->ifr_map.irq=dev->irq;
2092 ifr->ifr_map.dma=dev->dma;
2093 ifr->ifr_map.port=dev->if_port;
2097 if (dev->set_config) {
2098 if (!netif_device_present(dev))
2100 return dev->set_config(dev,&ifr->ifr_map);
2105 if (dev->set_multicast_list == NULL ||
2106 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2108 if (!netif_device_present(dev))
2110 dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
2114 if (dev->set_multicast_list == NULL ||
2115 ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
2117 if (!netif_device_present(dev))
2119 dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
2123 ifr->ifr_ifindex = dev->ifindex;
2127 ifr->ifr_qlen = dev->tx_queue_len;
2131 if (ifr->ifr_qlen<0)
2133 dev->tx_queue_len = ifr->ifr_qlen;
2137 if (dev->flags&IFF_UP)
2139 if (__dev_get_by_name(ifr->ifr_newname))
2141 memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2142 dev->name[IFNAMSIZ-1] = 0;
2143 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
2147 * Unknown or private ioctl
2151 if ((cmd >= SIOCDEVPRIVATE &&
2152 cmd <= SIOCDEVPRIVATE + 15) ||
2153 cmd == SIOCBONDENSLAVE ||
2154 cmd == SIOCBONDRELEASE ||
2155 cmd == SIOCBONDSETHWADDR ||
2156 cmd == SIOCBONDSLAVEINFOQUERY ||
2157 cmd == SIOCBONDINFOQUERY ||
2158 cmd == SIOCBONDCHANGEACTIVE ||
2159 cmd == SIOCETHTOOL ||
2160 cmd == SIOCGMIIPHY ||
2161 cmd == SIOCGMIIREG ||
2162 cmd == SIOCSMIIREG ||
2163 cmd == SIOCWANDEV) {
2164 if (dev->do_ioctl) {
2165 if (!netif_device_present(dev))
2167 return dev->do_ioctl(dev, ifr, cmd);
2177 * This function handles all "interface"-type I/O control requests. The actual
2178 * 'doing' part of this is dev_ifsioc above.
2182 * dev_ioctl - network device ioctl
2183 * @cmd: command to issue
2184 * @arg: pointer to a struct ifreq in user space
2186 * Issue ioctl functions to devices. This is normally called by the
2187 * user space syscall interfaces but can sometimes be useful for
2188 * other purposes. The return value is the return from the syscall if
2189 * positive or a negative errno code on error.
2192 int dev_ioctl(unsigned int cmd, void *arg)
2198 /* One special case: SIOCGIFCONF takes ifconf argument
2199 and requires shared lock, because it sleeps writing
2203 if (cmd == SIOCGIFCONF) {
2205 ret = dev_ifconf((char *) arg);
2209 if (cmd == SIOCGIFNAME) {
2210 return dev_ifname((struct ifreq *)arg);
2213 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2216 ifr.ifr_name[IFNAMSIZ-1] = 0;
2218 colon = strchr(ifr.ifr_name, ':');
2223 * See which interface the caller is talking about.
2229 * These ioctl calls:
2230 * - can be done by all.
2231 * - atomic and do not require locking.
2243 dev_load(ifr.ifr_name);
2244 read_lock(&dev_base_lock);
2245 ret = dev_ifsioc(&ifr, cmd);
2246 read_unlock(&dev_base_lock);
2250 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2256 * These ioctl calls:
2257 * - require superuser power.
2258 * - require strict serialization.
2265 if (!capable(CAP_NET_ADMIN))
2267 dev_load(ifr.ifr_name);
2270 ret = dev_ifsioc(&ifr, cmd);
2276 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2282 * These ioctl calls:
2283 * - require superuser power.
2284 * - require strict serialization.
2285 * - do not return a value
2296 case SIOCSIFHWBROADCAST:
2300 case SIOCBONDENSLAVE:
2301 case SIOCBONDRELEASE:
2302 case SIOCBONDSETHWADDR:
2303 case SIOCBONDSLAVEINFOQUERY:
2304 case SIOCBONDINFOQUERY:
2305 case SIOCBONDCHANGEACTIVE:
2306 if (!capable(CAP_NET_ADMIN))
2308 dev_load(ifr.ifr_name);
2311 ret = dev_ifsioc(&ifr, cmd);
2317 /* Get the per device memory space. We can add this but currently
2318 do not support it */
2320 /* Set the per device memory buffer space. Not applicable in our case */
2325 * Unknown or private ioctl.
2329 if (cmd == SIOCWANDEV ||
2330 (cmd >= SIOCDEVPRIVATE &&
2331 cmd <= SIOCDEVPRIVATE + 15)) {
2332 dev_load(ifr.ifr_name);
2335 ret = dev_ifsioc(&ifr, cmd);
2338 if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2343 /* Take care of Wireless Extensions */
2344 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2345 /* If command is `set a parameter', or
2346 * `get the encoding parameters', check if
2347 * the user has the right to do it */
2348 if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
2349 if(!capable(CAP_NET_ADMIN))
2352 dev_load(ifr.ifr_name);
2354 /* Follow me in net/core/wireless.c */
2355 ret = wireless_process_ioctl(&ifr, cmd);
2357 if (!ret && IW_IS_GET(cmd) &&
2358 copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2362 #endif /* WIRELESS_EXT */
2369 * dev_new_index - allocate an ifindex
2371 * Returns a suitable unique value for a new device interface
2372 * number. The caller must hold the rtnl semaphore or the
2373 * dev_base_lock to be sure it remains unique.
2376 int dev_new_index(void)
2382 if (__dev_get_by_index(ifindex) == NULL)
2387 static int dev_boot_phase = 1;
2390 * register_netdevice - register a network device
2391 * @dev: device to register
2393 * Take a completed network device structure and add it to the kernel
2394 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2395 * chain. 0 is returned on success. A negative errno code is returned
2396 * on a failure to set up the device, or if the name is a duplicate.
2398 * Callers must hold the rtnl semaphore. See the comment at the
2399 * end of Space.c for details about the locking. You may want
2400 * register_netdev() instead of this.
2403 * The locking appears insufficient to guarantee two parallel registers
2404 * will not get the same name.
2407 int net_dev_init(void);
2409 int register_netdevice(struct net_device *dev)
2411 struct net_device *d, **dp;
2412 #ifdef CONFIG_NET_DIVERT
2416 spin_lock_init(&dev->queue_lock);
2417 spin_lock_init(&dev->xmit_lock);
2418 dev->xmit_lock_owner = -1;
2419 #ifdef CONFIG_NET_FASTROUTE
2420 dev->fastpath_lock=RW_LOCK_UNLOCKED;
2426 #ifdef CONFIG_NET_DIVERT
2427 ret = alloc_divert_blk(dev);
2430 #endif /* CONFIG_NET_DIVERT */
2434 /* Init, if this function is available */
2435 if (dev->init && dev->init(dev) != 0) {
2436 #ifdef CONFIG_NET_DIVERT
2437 free_divert_blk(dev);
2442 dev->ifindex = dev_new_index();
2443 if (dev->iflink == -1)
2444 dev->iflink = dev->ifindex;
2446 /* Check for existence, and append to tail of chain */
2447 for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2448 if (d == dev || strcmp(d->name, dev->name) == 0) {
2449 #ifdef CONFIG_NET_DIVERT
2450 free_divert_blk(dev);
2456 * nil rebuild_header routine,
2457 * that should be never called and used as just bug trap.
2460 if (dev->rebuild_header == NULL)
2461 dev->rebuild_header = default_rebuild_header;
2464 * Default initial state at registry is that the
2465 * device is present.
2468 set_bit(__LINK_STATE_PRESENT, &dev->state);
2471 dev_init_scheduler(dev);
2472 write_lock_bh(&dev_base_lock);
2476 write_unlock_bh(&dev_base_lock);
2478 /* Notify protocols, that a new device appeared. */
2479 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2481 net_run_sbin_hotplug(dev, "register");
2487 * netdev_finish_unregister - complete unregistration
2490 * Destroy and free a dead device. A value of zero is returned on
2494 int netdev_finish_unregister(struct net_device *dev)
2496 BUG_TRAP(dev->ip_ptr==NULL);
2497 BUG_TRAP(dev->ip6_ptr==NULL);
2498 BUG_TRAP(dev->dn_ptr==NULL);
2500 if (!dev->deadbeaf) {
2501 printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
2504 #ifdef NET_REFCNT_DEBUG
2505 printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
2506 (dev->features & NETIF_F_DYNALLOC)?"":", old style");
2508 if (dev->destructor)
2509 dev->destructor(dev);
2510 if (dev->features & NETIF_F_DYNALLOC)
2516 * unregister_netdevice - remove device from the kernel
2519 * This function shuts down a device interface and removes it
2520 * from the kernel tables. On success 0 is returned, on a failure
2521 * a negative errno code is returned.
2523 * Callers must hold the rtnl semaphore. See the comment at the
2524 * end of Space.c for details about the locking. You may want
2525 * unregister_netdev() instead of this.
2528 int unregister_netdevice(struct net_device *dev)
2530 unsigned long now, warning_time;
2531 struct net_device *d, **dp;
2533 /* If device is running, close it first. */
2534 if (dev->flags & IFF_UP)
2537 BUG_TRAP(dev->deadbeaf==0);
2540 /* And unlink it from device chain. */
2541 for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
2543 write_lock_bh(&dev_base_lock);
2545 write_unlock_bh(&dev_base_lock);
2550 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
2554 /* Synchronize to net_rx_action. */
2555 br_write_lock_bh(BR_NETPROTO_LOCK);
2556 br_write_unlock_bh(BR_NETPROTO_LOCK);
2558 if (dev_boot_phase == 0) {
2559 #ifdef CONFIG_NET_FASTROUTE
2560 dev_clear_fastroute(dev);
2563 /* Shutdown queueing discipline. */
2566 net_run_sbin_hotplug(dev, "unregister");
2568 /* Notify protocols, that we are about to destroy
2569 this device. They should clean all the things.
2571 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2574 * Flush the multicast chain
2576 dev_mc_discard(dev);
2582 /* Notifier chain MUST detach us from master device. */
2583 BUG_TRAP(dev->master==NULL);
2585 #ifdef CONFIG_NET_DIVERT
2586 free_divert_blk(dev);
2589 if (dev->features & NETIF_F_DYNALLOC) {
2590 #ifdef NET_REFCNT_DEBUG
2591 if (atomic_read(&dev->refcnt) != 1)
2592 printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
2598 /* Last reference is our one */
2599 if (atomic_read(&dev->refcnt) == 1) {
2604 #ifdef NET_REFCNT_DEBUG
2605 printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
2608 /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
2609 it means that someone in the kernel still has a reference
2610 to this device and we cannot release it.
2612 "New style" devices have destructors, hence we can return from this
2613 function and destructor will do all the work later. As of kernel 2.4.0
2614 there are very few "New Style" devices.
2616 "Old style" devices expect that the device is free of any references
2617 upon exit from this function.
2618 We cannot return from this function until all such references have
2619 fallen away. This is because the caller of this function will probably
2620 immediately kfree(*dev) and then be unloaded via sys_delete_module.
2622 So, we linger until all references fall away. The duration of the
2623 linger is basically unbounded! It is driven by, for example, the
2624 current setting of sysctl_ipfrag_time.
2626 After 1 second, we start to rebroadcast unregister notifications
2627 in hope that careless clients will release the device.
2631 now = warning_time = jiffies;
2632 while (atomic_read(&dev->refcnt) != 1) {
2633 if ((jiffies - now) > 1*HZ) {
2634 /* Rebroadcast unregister notification */
2635 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2637 current->state = TASK_INTERRUPTIBLE;
2638 schedule_timeout(HZ/4);
2639 current->state = TASK_RUNNING;
2640 if ((jiffies - warning_time) > 10*HZ) {
2641 printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
2642 "become free. Usage count = %d\n",
2643 dev->name, atomic_read(&dev->refcnt));
2644 warning_time = jiffies;
2653 * Initialize the DEV module. At boot time this walks the device list and
2654 * unhooks any devices that fail to initialise (normally hardware not
2655 * present) and leaves us with a valid list of present and active devices.
2659 extern void net_device_init(void);
2660 extern void ip_auto_config(void);
2661 struct proc_dir_entry *proc_net_drivers;
2662 #ifdef CONFIG_NET_DIVERT
2663 extern void dv_init(void);
2664 #endif /* CONFIG_NET_DIVERT */
2668 * Callers must hold the rtnl semaphore. See the comment at the
2669 * end of Space.c for details about the locking.
2671 int __init net_dev_init(void)
2673 struct net_device *dev, **dp;
2676 if (!dev_boot_phase)
2680 #ifdef CONFIG_NET_DIVERT
2682 #endif /* CONFIG_NET_DIVERT */
2685 * Initialise the packet receive queues.
2688 for (i = 0; i < NR_CPUS; i++) {
2689 struct softnet_data *queue;
2691 queue = &softnet_data[i];
2692 skb_queue_head_init(&queue->input_pkt_queue);
2693 queue->throttle = 0;
2694 queue->cng_level = 0;
2695 queue->avg_blog = 10; /* arbitrary non-zero */
2696 queue->completion_queue = NULL;
2697 INIT_LIST_HEAD(&queue->poll_list);
2698 set_bit(__LINK_STATE_START, &queue->blog_dev.state);
2699 queue->blog_dev.weight = weight_p;
2700 queue->blog_dev.poll = process_backlog;
2701 atomic_set(&queue->blog_dev.refcnt, 1);
2704 #ifdef CONFIG_NET_PROFILE
2706 NET_PROFILE_REGISTER(dev_queue_xmit);
2707 NET_PROFILE_REGISTER(softnet_process);
2710 #ifdef OFFLINE_SAMPLE
2711 samp_timer.expires = jiffies + (10 * HZ);
2712 add_timer(&samp_timer);
2717 * If the call to dev->init fails, the dev is removed
2718 * from the chain disconnecting the device until the
2721 * NB At boot phase networking is dead. No locking is required.
2722 * But we still preserve dev_base_lock for sanity.
2726 while ((dev = *dp) != NULL) {
2727 spin_lock_init(&dev->queue_lock);
2728 spin_lock_init(&dev->xmit_lock);
2729 #ifdef CONFIG_NET_FASTROUTE
2730 dev->fastpath_lock = RW_LOCK_UNLOCKED;
2732 dev->xmit_lock_owner = -1;
2737 * Allocate name. If the init() fails
2738 * the name will be reissued correctly.
2740 if (strchr(dev->name, '%'))
2741 dev_alloc_name(dev, dev->name);
2744 * Check boot time settings for the device.
2746 netdev_boot_setup_check(dev);
2748 if (dev->init && dev->init(dev)) {
2750 * It failed to come up. It will be unhooked later.
2751 * dev_alloc_name can now advance to next suitable
2752 * name that is checked next.
2758 dev->ifindex = dev_new_index();
2759 if (dev->iflink == -1)
2760 dev->iflink = dev->ifindex;
2761 if (dev->rebuild_header == NULL)
2762 dev->rebuild_header = default_rebuild_header;
2763 dev_init_scheduler(dev);
2764 set_bit(__LINK_STATE_PRESENT, &dev->state);
2769 * Unhook devices that failed to come up
2772 while ((dev = *dp) != NULL) {
2773 if (dev->deadbeaf) {
2774 write_lock_bh(&dev_base_lock);
2776 write_unlock_bh(&dev_base_lock);
2783 #ifdef CONFIG_PROC_FS
2784 proc_net_create("dev", 0, dev_get_info);
2785 create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
2786 proc_net_drivers = proc_mkdir("net/drivers", 0);
2788 /* Available in net/core/wireless.c */
2789 proc_net_create("wireless", 0, dev_get_wireless_info);
2790 #endif /* WIRELESS_EXT */
2791 #endif /* CONFIG_PROC_FS */
2795 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
2796 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
2801 #ifdef CONFIG_NET_SCHED
2805 * Initialise network devices
2813 #ifdef CONFIG_HOTPLUG
2815 /* Notify userspace when a netdevice event occurs,
2816 * by running '/sbin/hotplug net' with certain
2817 * environment variables set.
2820 static int net_run_sbin_hotplug(struct net_device *dev, char *action)
2822 char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
2825 sprintf(ifname, "INTERFACE=%s", dev->name);
2826 sprintf(action_str, "ACTION=%s", action);
2829 argv[i++] = hotplug_path;
2834 /* minimal command environment */
2835 envp [i++] = "HOME=/";
2836 envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2837 envp [i++] = ifname;
2838 envp [i++] = action_str;
2841 return call_usermodehelper(argv [0], argv, envp);