2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/socket.h>
21 #include <linux/sched.h>
22 #include <linux/netdevice.h>
24 #include <linux/sysctl.h>
26 #include <net/neighbour.h>
29 #include <linux/rtnetlink.h>
33 #define NEIGH_PRINTK(x...) printk(x)
34 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
35 #define NEIGH_PRINTK0 NEIGH_PRINTK
36 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
37 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
41 #define NEIGH_PRINTK1 NEIGH_PRINTK
45 #define NEIGH_PRINTK2 NEIGH_PRINTK
48 static void neigh_timer_handler(unsigned long arg);
50 static void neigh_app_notify(struct neighbour *n);
52 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
54 static int neigh_glbl_allocs;
55 static struct neigh_table *neigh_tables;
58 Neighbour hash table buckets are protected with rwlock tbl->lock.
60 - All the scans/updates to hash buckets MUST be made under this lock.
61 - NOTHING clever should be made under this lock: no callbacks
62 to protocol backends, no attempts to send something to network.
63 It will result in deadlocks, if backend/driver wants to use neighbour
65 - If the entry requires some non-trivial actions, increase
66 its reference count and release table lock.
68 Neighbour entries are protected:
69 - with reference count.
70 - with rwlock neigh->lock
72 Reference count prevents destruction.
74 neigh->lock mainly serializes ll address data and its validity state.
75 However, the same lock is used to protect another entry fields:
79 Again, nothing clever shall be made under neigh->lock,
80 the most complicated procedure, which we allow is dev->hard_header.
81 It is supposed, that dev->hard_header is simplistic and does
82 not make callbacks to neighbour tables.
84 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
85 list of neighbour tables. This list is used only in process context,
88 static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
90 static int neigh_blackhole(struct sk_buff *skb)
97 * It is random distribution in the interval (1/2)*base...(3/2)*base.
98 * It corresponds to default IPv6 settings and is not overridable,
99 * because it is really reasonbale choice.
102 unsigned long neigh_rand_reach_time(unsigned long base)
104 return (net_random() % base) + (base>>1);
108 static int neigh_forced_gc(struct neigh_table *tbl)
113 for (i=0; i<=NEIGH_HASHMASK; i++) {
114 struct neighbour *n, **np;
116 np = &tbl->hash_buckets[i];
117 write_lock_bh(&tbl->lock);
118 while ((n = *np) != NULL) {
119 /* Neighbour record may be discarded if:
120 - nobody refers to it.
121 - it is not premanent
122 - (NEW and probably wrong)
123 INCOMPLETE entries are kept at least for
124 n->parms->retrans_time, otherwise we could
125 flood network with resolution requests.
126 It is not clear, what is better table overflow
129 write_lock(&n->lock);
130 if (atomic_read(&n->refcnt) == 1 &&
131 !(n->nud_state&NUD_PERMANENT) &&
132 (n->nud_state != NUD_INCOMPLETE ||
133 jiffies - n->used > n->parms->retrans_time)) {
137 write_unlock(&n->lock);
141 write_unlock(&n->lock);
144 write_unlock_bh(&tbl->lock);
147 tbl->last_flush = jiffies;
151 static int neigh_del_timer(struct neighbour *n)
153 if (n->nud_state & NUD_IN_TIMER) {
154 if (del_timer(&n->timer)) {
162 static void pneigh_queue_purge(struct sk_buff_head *list)
166 while ((skb = skb_dequeue(list)) != NULL) {
172 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
176 write_lock_bh(&tbl->lock);
178 for (i=0; i<=NEIGH_HASHMASK; i++) {
179 struct neighbour *n, **np;
181 np = &tbl->hash_buckets[i];
182 while ((n = *np) != NULL) {
183 if (dev && n->dev != dev) {
188 write_lock(&n->lock);
192 if (atomic_read(&n->refcnt) != 1) {
193 /* The most unpleasant situation.
194 We must destroy neighbour entry,
195 but someone still uses it.
197 The destroy will be delayed until
198 the last user releases us, but
199 we must kill timers etc. and move
202 n->parms = &tbl->parms;
203 skb_queue_purge(&n->arp_queue);
204 n->output = neigh_blackhole;
205 if (n->nud_state&NUD_VALID)
206 n->nud_state = NUD_NOARP;
208 n->nud_state = NUD_NONE;
209 NEIGH_PRINTK2("neigh %p is stray.\n", n);
211 write_unlock(&n->lock);
216 pneigh_ifdown(tbl, dev);
217 write_unlock_bh(&tbl->lock);
219 del_timer_sync(&tbl->proxy_timer);
220 pneigh_queue_purge(&tbl->proxy_queue);
224 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
227 unsigned long now = jiffies;
229 if (tbl->entries > tbl->gc_thresh3 ||
230 (tbl->entries > tbl->gc_thresh2 &&
231 now - tbl->last_flush > 5*HZ)) {
232 if (neigh_forced_gc(tbl) == 0 &&
233 tbl->entries > tbl->gc_thresh3)
237 n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
241 memset(n, 0, tbl->entry_size);
243 skb_queue_head_init(&n->arp_queue);
244 n->lock = RW_LOCK_UNLOCKED;
245 n->updated = n->used = now;
246 n->nud_state = NUD_NONE;
247 n->output = neigh_blackhole;
248 n->parms = &tbl->parms;
249 init_timer(&n->timer);
250 n->timer.function = neigh_timer_handler;
251 n->timer.data = (unsigned long)n;
256 atomic_set(&n->refcnt, 1);
261 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
262 struct net_device *dev)
266 int key_len = tbl->key_len;
268 hash_val = tbl->hash(pkey, dev);
270 read_lock_bh(&tbl->lock);
271 for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
273 memcmp(n->primary_key, pkey, key_len) == 0) {
278 read_unlock_bh(&tbl->lock);
282 struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey,
283 struct net_device *dev)
285 struct neighbour *n, *n1;
287 int key_len = tbl->key_len;
290 n = neigh_alloc(tbl);
292 return ERR_PTR(-ENOBUFS);
294 memcpy(n->primary_key, pkey, key_len);
298 /* Protocol specific setup. */
299 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
301 return ERR_PTR(error);
304 /* Device specific setup. */
305 if (n->parms->neigh_setup &&
306 (error = n->parms->neigh_setup(n)) < 0) {
308 return ERR_PTR(error);
311 n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
313 hash_val = tbl->hash(pkey, dev);
315 write_lock_bh(&tbl->lock);
316 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
317 if (dev == n1->dev &&
318 memcmp(n1->primary_key, pkey, key_len) == 0) {
320 write_unlock_bh(&tbl->lock);
326 n->next = tbl->hash_buckets[hash_val];
327 tbl->hash_buckets[hash_val] = n;
330 write_unlock_bh(&tbl->lock);
331 NEIGH_PRINTK2("neigh %p is created.\n", n);
335 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
336 struct net_device *dev, int creat)
338 struct pneigh_entry *n;
340 int key_len = tbl->key_len;
342 hash_val = *(u32*)(pkey + key_len - 4);
343 hash_val ^= (hash_val>>16);
344 hash_val ^= hash_val>>8;
345 hash_val ^= hash_val>>4;
346 hash_val &= PNEIGH_HASHMASK;
348 read_lock_bh(&tbl->lock);
350 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
351 if (memcmp(n->key, pkey, key_len) == 0 &&
352 (n->dev == dev || !n->dev)) {
353 read_unlock_bh(&tbl->lock);
357 read_unlock_bh(&tbl->lock);
361 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
365 memcpy(n->key, pkey, key_len);
368 if (tbl->pconstructor && tbl->pconstructor(n)) {
373 write_lock_bh(&tbl->lock);
374 n->next = tbl->phash_buckets[hash_val];
375 tbl->phash_buckets[hash_val] = n;
376 write_unlock_bh(&tbl->lock);
381 int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
383 struct pneigh_entry *n, **np;
385 int key_len = tbl->key_len;
387 hash_val = *(u32*)(pkey + key_len - 4);
388 hash_val ^= (hash_val>>16);
389 hash_val ^= hash_val>>8;
390 hash_val ^= hash_val>>4;
391 hash_val &= PNEIGH_HASHMASK;
393 for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
394 if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
395 write_lock_bh(&tbl->lock);
397 write_unlock_bh(&tbl->lock);
398 if (tbl->pdestructor)
407 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
409 struct pneigh_entry *n, **np;
412 for (h=0; h<=PNEIGH_HASHMASK; h++) {
413 np = &tbl->phash_buckets[h];
414 while ((n=*np) != NULL) {
415 if (n->dev == dev || dev == NULL) {
417 if (tbl->pdestructor)
430 * neighbour must already be out of the table;
433 void neigh_destroy(struct neighbour *neigh)
438 printk("Destroying alive neighbour %p from %08lx\n", neigh,
439 *(((unsigned long*)&neigh)-1));
443 if (neigh_del_timer(neigh))
444 printk("Impossible event.\n");
446 while ((hh = neigh->hh) != NULL) {
447 neigh->hh = hh->hh_next;
449 write_lock_bh(&hh->hh_lock);
450 hh->hh_output = neigh_blackhole;
451 write_unlock_bh(&hh->hh_lock);
452 if (atomic_dec_and_test(&hh->hh_refcnt))
456 if (neigh->ops && neigh->ops->destructor)
457 (neigh->ops->destructor)(neigh);
459 skb_queue_purge(&neigh->arp_queue);
463 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
466 neigh->tbl->entries--;
467 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
470 /* Neighbour state is suspicious;
473 Called with write_locked neigh.
475 static void neigh_suspect(struct neighbour *neigh)
479 NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
481 neigh->output = neigh->ops->output;
483 for (hh = neigh->hh; hh; hh = hh->hh_next)
484 hh->hh_output = neigh->ops->output;
487 /* Neighbour state is OK;
490 Called with write_locked neigh.
492 static void neigh_connect(struct neighbour *neigh)
496 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
498 neigh->output = neigh->ops->connected_output;
500 for (hh = neigh->hh; hh; hh = hh->hh_next)
501 hh->hh_output = neigh->ops->hh_output;
505 Transitions NUD_STALE <-> NUD_REACHABLE do not occur
506 when fast path is built: we have no timers assotiated with
507 these states, we do not have time to check state when sending.
508 neigh_periodic_timer check periodically neigh->confirmed
509 time and moves NUD_REACHABLE -> NUD_STALE.
511 If a routine wants to know TRUE entry state, it calls
512 neigh_sync before checking state.
514 Called with write_locked neigh.
517 static void neigh_sync(struct neighbour *n)
519 unsigned long now = jiffies;
520 u8 state = n->nud_state;
522 if (state&(NUD_NOARP|NUD_PERMANENT))
524 if (state&NUD_REACHABLE) {
525 if (now - n->confirmed > n->parms->reachable_time) {
526 n->nud_state = NUD_STALE;
529 } else if (state&NUD_VALID) {
530 if (now - n->confirmed < n->parms->reachable_time) {
532 n->nud_state = NUD_REACHABLE;
538 static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg)
540 struct neigh_table *tbl = (struct neigh_table*)arg;
541 unsigned long now = jiffies;
545 write_lock(&tbl->lock);
548 * periodicly recompute ReachableTime from random function
551 if (now - tbl->last_rand > 300*HZ) {
552 struct neigh_parms *p;
553 tbl->last_rand = now;
554 for (p=&tbl->parms; p; p = p->next)
555 p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
558 for (i=0; i <= NEIGH_HASHMASK; i++) {
559 struct neighbour *n, **np;
561 np = &tbl->hash_buckets[i];
562 while ((n = *np) != NULL) {
565 write_lock(&n->lock);
567 state = n->nud_state;
568 if (state&(NUD_PERMANENT|NUD_IN_TIMER)) {
569 write_unlock(&n->lock);
573 if ((long)(n->used - n->confirmed) < 0)
574 n->used = n->confirmed;
576 if (atomic_read(&n->refcnt) == 1 &&
577 (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
580 write_unlock(&n->lock);
585 if (n->nud_state&NUD_REACHABLE &&
586 now - n->confirmed > n->parms->reachable_time) {
587 n->nud_state = NUD_STALE;
590 write_unlock(&n->lock);
597 mod_timer(&tbl->gc_timer, now + tbl->gc_interval);
598 write_unlock(&tbl->lock);
602 static void neigh_periodic_timer(unsigned long arg)
604 struct neigh_table *tbl = (struct neigh_table*)arg;
606 tasklet_schedule(&tbl->gc_task);
610 static __inline__ int neigh_max_probes(struct neighbour *n)
612 struct neigh_parms *p = n->parms;
613 return p->ucast_probes + p->app_probes + p->mcast_probes;
617 /* Called when a timer expires for a neighbour entry. */
619 static void neigh_timer_handler(unsigned long arg)
621 unsigned long now = jiffies;
622 struct neighbour *neigh = (struct neighbour*)arg;
626 write_lock(&neigh->lock);
628 state = neigh->nud_state;
630 if (!(state&NUD_IN_TIMER)) {
632 printk("neigh: timer & !nud_in_timer\n");
637 if ((state&NUD_VALID) &&
638 now - neigh->confirmed < neigh->parms->reachable_time) {
639 neigh->nud_state = NUD_REACHABLE;
640 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
641 neigh_connect(neigh);
644 if (state == NUD_DELAY) {
645 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
646 neigh->nud_state = NUD_PROBE;
647 atomic_set(&neigh->probes, 0);
650 if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
653 neigh->nud_state = NUD_FAILED;
655 neigh->tbl->stats.res_failed++;
656 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
658 /* It is very thin place. report_unreachable is very complicated
659 routine. Particularly, it can hit the same neighbour entry!
661 So that, we try to be accurate and avoid dead loop. --ANK
663 while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
664 write_unlock(&neigh->lock);
665 neigh->ops->error_report(neigh, skb);
666 write_lock(&neigh->lock);
668 skb_queue_purge(&neigh->arp_queue);
672 neigh->timer.expires = now + neigh->parms->retrans_time;
673 add_timer(&neigh->timer);
674 write_unlock(&neigh->lock);
676 neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
677 atomic_inc(&neigh->probes);
681 write_unlock(&neigh->lock);
683 if (notify && neigh->parms->app_probes)
684 neigh_app_notify(neigh);
686 neigh_release(neigh);
689 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
691 write_lock_bh(&neigh->lock);
692 if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
693 if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
694 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
695 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
696 neigh->nud_state = NUD_INCOMPLETE;
698 neigh->timer.expires = jiffies + neigh->parms->retrans_time;
699 add_timer(&neigh->timer);
700 write_unlock_bh(&neigh->lock);
701 neigh->ops->solicit(neigh, skb);
702 atomic_inc(&neigh->probes);
703 write_lock_bh(&neigh->lock);
705 neigh->nud_state = NUD_FAILED;
706 write_unlock_bh(&neigh->lock);
713 if (neigh->nud_state == NUD_INCOMPLETE) {
715 if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
716 struct sk_buff *buff;
717 buff = neigh->arp_queue.next;
718 __skb_unlink(buff, &neigh->arp_queue);
721 __skb_queue_tail(&neigh->arp_queue, skb);
723 write_unlock_bh(&neigh->lock);
726 if (neigh->nud_state == NUD_STALE) {
727 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
729 neigh->nud_state = NUD_DELAY;
730 neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
731 add_timer(&neigh->timer);
734 write_unlock_bh(&neigh->lock);
738 static __inline__ void neigh_update_hhs(struct neighbour *neigh)
741 void (*update)(struct hh_cache*, struct net_device*, unsigned char*) =
742 neigh->dev->header_cache_update;
745 for (hh=neigh->hh; hh; hh=hh->hh_next) {
746 write_lock_bh(&hh->hh_lock);
747 update(hh, neigh->dev, neigh->ha);
748 write_unlock_bh(&hh->hh_lock);
755 /* Generic update routine.
756 -- lladdr is new lladdr or NULL, if it is not supplied.
758 -- override==1 allows to override existing lladdr, if it is different.
759 -- arp==0 means that the change is administrative.
761 Caller MUST hold reference count on the entry.
764 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int override, int arp)
769 struct net_device *dev = neigh->dev;
771 write_lock_bh(&neigh->lock);
772 old = neigh->nud_state;
775 if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
778 if (!(new&NUD_VALID)) {
779 neigh_del_timer(neigh);
780 if (old&NUD_CONNECTED)
781 neigh_suspect(neigh);
782 neigh->nud_state = new;
784 notify = old&NUD_VALID;
788 /* Compare new lladdr with cached one */
789 if (dev->addr_len == 0) {
790 /* First case: device needs no address. */
793 /* The second case: if something is already cached
794 and a new address is proposed:
796 - if they are different, check override flag
799 if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
805 /* No address is supplied; if we know something,
806 use it, otherwise discard the request.
809 if (!(old&NUD_VALID))
815 old = neigh->nud_state;
816 if (new&NUD_CONNECTED)
817 neigh->confirmed = jiffies;
818 neigh->updated = jiffies;
820 /* If entry was valid and address is not changed,
821 do not change entry state, if new one is STALE.
825 if (lladdr == neigh->ha)
826 if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
829 neigh_del_timer(neigh);
830 neigh->nud_state = new;
831 if (lladdr != neigh->ha) {
832 memcpy(&neigh->ha, lladdr, dev->addr_len);
833 neigh_update_hhs(neigh);
834 if (!(new&NUD_CONNECTED))
835 neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
842 if (new&NUD_CONNECTED)
843 neigh_connect(neigh);
845 neigh_suspect(neigh);
846 if (!(old&NUD_VALID)) {
849 /* Again: avoid dead loop if something went wrong */
851 while (neigh->nud_state&NUD_VALID &&
852 (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
853 struct neighbour *n1 = neigh;
854 write_unlock_bh(&neigh->lock);
855 /* On shaper/eql skb->dst->neighbour != neigh :( */
856 if (skb->dst && skb->dst->neighbour)
857 n1 = skb->dst->neighbour;
859 write_lock_bh(&neigh->lock);
861 skb_queue_purge(&neigh->arp_queue);
864 write_unlock_bh(&neigh->lock);
866 if (notify && neigh->parms->app_probes)
867 neigh_app_notify(neigh);
872 struct neighbour * neigh_event_ns(struct neigh_table *tbl,
873 u8 *lladdr, void *saddr,
874 struct net_device *dev)
876 struct neighbour *neigh;
878 neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
880 neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
884 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
886 struct hh_cache *hh = NULL;
887 struct net_device *dev = dst->dev;
889 for (hh=n->hh; hh; hh = hh->hh_next)
890 if (hh->hh_type == protocol)
893 if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
894 memset(hh, 0, sizeof(struct hh_cache));
895 hh->hh_lock = RW_LOCK_UNLOCKED;
896 hh->hh_type = protocol;
897 atomic_set(&hh->hh_refcnt, 0);
899 if (dev->hard_header_cache(n, hh)) {
903 atomic_inc(&hh->hh_refcnt);
906 if (n->nud_state&NUD_CONNECTED)
907 hh->hh_output = n->ops->hh_output;
909 hh->hh_output = n->ops->output;
913 atomic_inc(&hh->hh_refcnt);
918 /* This function can be used in contexts, where only old dev_queue_xmit
919 worked, f.e. if you want to override normal output path (eql, shaper),
920 but resoltution is not made yet.
923 int neigh_compat_output(struct sk_buff *skb)
925 struct net_device *dev = skb->dev;
927 __skb_pull(skb, skb->nh.raw - skb->data);
929 if (dev->hard_header &&
930 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
931 dev->rebuild_header(skb))
934 return dev_queue_xmit(skb);
937 /* Slow and careful. */
939 int neigh_resolve_output(struct sk_buff *skb)
941 struct dst_entry *dst = skb->dst;
942 struct neighbour *neigh;
944 if (!dst || !(neigh = dst->neighbour))
947 __skb_pull(skb, skb->nh.raw - skb->data);
949 if (neigh_event_send(neigh, skb) == 0) {
951 struct net_device *dev = neigh->dev;
952 if (dev->hard_header_cache && dst->hh == NULL) {
953 write_lock_bh(&neigh->lock);
955 neigh_hh_init(neigh, dst, dst->ops->protocol);
956 err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
957 write_unlock_bh(&neigh->lock);
959 read_lock_bh(&neigh->lock);
960 err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
961 read_unlock_bh(&neigh->lock);
964 return neigh->ops->queue_xmit(skb);
971 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
976 /* As fast as possible without hh cache */
978 int neigh_connected_output(struct sk_buff *skb)
981 struct dst_entry *dst = skb->dst;
982 struct neighbour *neigh = dst->neighbour;
983 struct net_device *dev = neigh->dev;
985 __skb_pull(skb, skb->nh.raw - skb->data);
987 read_lock_bh(&neigh->lock);
988 err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
989 read_unlock_bh(&neigh->lock);
991 return neigh->ops->queue_xmit(skb);
996 static void neigh_proxy_process(unsigned long arg)
998 struct neigh_table *tbl = (struct neigh_table *)arg;
1000 unsigned long now = jiffies;
1001 struct sk_buff *skb;
1003 spin_lock(&tbl->proxy_queue.lock);
1005 skb = tbl->proxy_queue.next;
1007 while (skb != (struct sk_buff*)&tbl->proxy_queue) {
1008 struct sk_buff *back = skb;
1009 long tdif = back->stamp.tv_usec - now;
1013 struct net_device *dev = back->dev;
1014 __skb_unlink(back, &tbl->proxy_queue);
1015 if (tbl->proxy_redo && netif_running(dev))
1016 tbl->proxy_redo(back);
1021 } else if (!sched_next || tdif < sched_next)
1024 del_timer(&tbl->proxy_timer);
1026 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1027 spin_unlock(&tbl->proxy_queue.lock);
1030 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1031 struct sk_buff *skb)
1033 unsigned long now = jiffies;
1034 long sched_next = net_random()%p->proxy_delay;
1036 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1040 skb->stamp.tv_sec = 0;
1041 skb->stamp.tv_usec = now + sched_next;
1043 spin_lock(&tbl->proxy_queue.lock);
1044 if (del_timer(&tbl->proxy_timer)) {
1045 long tval = tbl->proxy_timer.expires - now;
1046 if (tval < sched_next)
1049 dst_release(skb->dst);
1052 __skb_queue_tail(&tbl->proxy_queue, skb);
1053 mod_timer(&tbl->proxy_timer, now + sched_next);
1054 spin_unlock(&tbl->proxy_queue.lock);
1058 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl)
1060 struct neigh_parms *p;
1061 p = kmalloc(sizeof(*p), GFP_KERNEL);
1063 memcpy(p, &tbl->parms, sizeof(*p));
1065 p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
1066 if (dev && dev->neigh_setup) {
1067 if (dev->neigh_setup(dev, p)) {
1072 write_lock_bh(&tbl->lock);
1073 p->next = tbl->parms.next;
1074 tbl->parms.next = p;
1075 write_unlock_bh(&tbl->lock);
1080 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1082 struct neigh_parms **p;
1084 if (parms == NULL || parms == &tbl->parms)
1086 write_lock_bh(&tbl->lock);
1087 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1090 write_unlock_bh(&tbl->lock);
1091 #ifdef CONFIG_SYSCTL
1092 neigh_sysctl_unregister(parms);
1098 write_unlock_bh(&tbl->lock);
1099 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1103 void neigh_table_init(struct neigh_table *tbl)
1105 unsigned long now = jiffies;
1107 tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
1109 if (tbl->kmem_cachep == NULL)
1110 tbl->kmem_cachep = kmem_cache_create(tbl->id,
1111 (tbl->entry_size+15)&~15,
1112 0, SLAB_HWCACHE_ALIGN,
1116 tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl);
1118 init_timer(&tbl->gc_timer);
1119 tbl->lock = RW_LOCK_UNLOCKED;
1120 tbl->gc_timer.data = (unsigned long)tbl;
1121 tbl->gc_timer.function = neigh_periodic_timer;
1122 tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
1123 add_timer(&tbl->gc_timer);
1125 init_timer(&tbl->proxy_timer);
1126 tbl->proxy_timer.data = (unsigned long)tbl;
1127 tbl->proxy_timer.function = neigh_proxy_process;
1128 skb_queue_head_init(&tbl->proxy_queue);
1130 tbl->last_flush = now;
1131 tbl->last_rand = now + tbl->parms.reachable_time*20;
1132 write_lock(&neigh_tbl_lock);
1133 tbl->next = neigh_tables;
1135 write_unlock(&neigh_tbl_lock);
1138 int neigh_table_clear(struct neigh_table *tbl)
1140 struct neigh_table **tp;
1142 /* It is not clean... Fix it to unload IPv6 module safely */
1143 del_timer_sync(&tbl->gc_timer);
1144 tasklet_kill(&tbl->gc_task);
1145 del_timer_sync(&tbl->proxy_timer);
1146 pneigh_queue_purge(&tbl->proxy_queue);
1147 neigh_ifdown(tbl, NULL);
1149 printk(KERN_CRIT "neighbour leakage\n");
1150 write_lock(&neigh_tbl_lock);
1151 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1157 write_unlock(&neigh_tbl_lock);
1158 #ifdef CONFIG_SYSCTL
1159 neigh_sysctl_unregister(&tbl->parms);
1164 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1166 struct ndmsg *ndm = NLMSG_DATA(nlh);
1167 struct rtattr **nda = arg;
1168 struct neigh_table *tbl;
1169 struct net_device *dev = NULL;
1172 if (ndm->ndm_ifindex) {
1173 if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1177 read_lock(&neigh_tbl_lock);
1178 for (tbl=neigh_tables; tbl; tbl = tbl->next) {
1179 struct neighbour *n;
1181 if (tbl->family != ndm->ndm_family)
1183 read_unlock(&neigh_tbl_lock);
1186 if (nda[NDA_DST-1] == NULL ||
1187 nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
1190 if (ndm->ndm_flags&NTF_PROXY) {
1191 err = pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
1198 n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
1200 err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
1208 read_unlock(&neigh_tbl_lock);
1213 return -EADDRNOTAVAIL;
1216 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1218 struct ndmsg *ndm = NLMSG_DATA(nlh);
1219 struct rtattr **nda = arg;
1220 struct neigh_table *tbl;
1221 struct net_device *dev = NULL;
1223 if (ndm->ndm_ifindex) {
1224 if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1228 read_lock(&neigh_tbl_lock);
1229 for (tbl=neigh_tables; tbl; tbl = tbl->next) {
1232 struct neighbour *n;
1234 if (tbl->family != ndm->ndm_family)
1236 read_unlock(&neigh_tbl_lock);
1239 if (nda[NDA_DST-1] == NULL ||
1240 nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
1242 if (ndm->ndm_flags&NTF_PROXY) {
1244 if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
1251 if (nda[NDA_LLADDR-1] != NULL &&
1252 nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
1255 n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
1257 if (nlh->nlmsg_flags&NLM_F_EXCL)
1259 override = nlh->nlmsg_flags&NLM_F_REPLACE;
1260 } else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
1263 n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
1270 err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
1281 read_unlock(&neigh_tbl_lock);
1285 return -EADDRNOTAVAIL;
1289 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1290 u32 pid, u32 seq, int event)
1292 unsigned long now = jiffies;
1294 struct nlmsghdr *nlh;
1295 unsigned char *b = skb->tail;
1296 struct nda_cacheinfo ci;
1299 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
1300 ndm = NLMSG_DATA(nlh);
1301 ndm->ndm_family = n->ops->family;
1302 ndm->ndm_flags = n->flags;
1303 ndm->ndm_type = n->type;
1304 ndm->ndm_ifindex = n->dev->ifindex;
1305 RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
1306 read_lock_bh(&n->lock);
1308 ndm->ndm_state = n->nud_state;
1309 if (n->nud_state&NUD_VALID)
1310 RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
1311 ci.ndm_used = now - n->used;
1312 ci.ndm_confirmed = now - n->confirmed;
1313 ci.ndm_updated = now - n->updated;
1314 ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
1315 read_unlock_bh(&n->lock);
1317 RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1318 nlh->nlmsg_len = skb->tail - b;
1324 read_unlock_bh(&n->lock);
1325 skb_trim(skb, b - skb->data);
1330 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
1332 struct neighbour *n;
1337 s_idx = idx = cb->args[2];
1338 for (h=0; h <= NEIGH_HASHMASK; h++) {
1339 if (h < s_h) continue;
1342 read_lock_bh(&tbl->lock);
1343 for (n = tbl->hash_buckets[h], idx = 0; n;
1344 n = n->next, idx++) {
1347 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1348 cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
1349 read_unlock_bh(&tbl->lock);
1355 read_unlock_bh(&tbl->lock);
1363 int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1367 struct neigh_table *tbl;
1368 int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
1372 read_lock(&neigh_tbl_lock);
1373 for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
1374 if (t < s_t) continue;
1375 if (family && tbl->family != family)
1378 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1379 if (neigh_dump_table(tbl, skb, cb) < 0)
1382 read_unlock(&neigh_tbl_lock);
1390 void neigh_app_ns(struct neighbour *n)
1392 struct sk_buff *skb;
1393 struct nlmsghdr *nlh;
1394 int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
1396 skb = alloc_skb(size, GFP_ATOMIC);
1400 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
1404 nlh = (struct nlmsghdr*)skb->data;
1405 nlh->nlmsg_flags = NLM_F_REQUEST;
1406 NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1407 netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1410 static void neigh_app_notify(struct neighbour *n)
1412 struct sk_buff *skb;
1413 struct nlmsghdr *nlh;
1414 int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
1416 skb = alloc_skb(size, GFP_ATOMIC);
1420 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
1424 nlh = (struct nlmsghdr*)skb->data;
1425 NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1426 netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1429 #endif /* CONFIG_ARPD */
1431 #ifdef CONFIG_SYSCTL
1433 struct neigh_sysctl_table
1435 struct ctl_table_header *sysctl_header;
1436 ctl_table neigh_vars[17];
1437 ctl_table neigh_dev[2];
1438 ctl_table neigh_neigh_dir[2];
1439 ctl_table neigh_proto_dir[2];
1440 ctl_table neigh_root_dir[2];
1441 } neigh_sysctl_template = {
1443 {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
1444 NULL, sizeof(int), 0644, NULL,
1446 {NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
1447 NULL, sizeof(int), 0644, NULL,
1449 {NET_NEIGH_APP_SOLICIT, "app_solicit",
1450 NULL, sizeof(int), 0644, NULL,
1452 {NET_NEIGH_RETRANS_TIME, "retrans_time",
1453 NULL, sizeof(int), 0644, NULL,
1455 {NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
1456 NULL, sizeof(int), 0644, NULL,
1457 &proc_dointvec_jiffies},
1458 {NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
1459 NULL, sizeof(int), 0644, NULL,
1460 &proc_dointvec_jiffies},
1461 {NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
1462 NULL, sizeof(int), 0644, NULL,
1463 &proc_dointvec_jiffies},
1464 {NET_NEIGH_UNRES_QLEN, "unres_qlen",
1465 NULL, sizeof(int), 0644, NULL,
1467 {NET_NEIGH_PROXY_QLEN, "proxy_qlen",
1468 NULL, sizeof(int), 0644, NULL,
1470 {NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
1471 NULL, sizeof(int), 0644, NULL,
1473 {NET_NEIGH_PROXY_DELAY, "proxy_delay",
1474 NULL, sizeof(int), 0644, NULL,
1476 {NET_NEIGH_LOCKTIME, "locktime",
1477 NULL, sizeof(int), 0644, NULL,
1479 {NET_NEIGH_GC_INTERVAL, "gc_interval",
1480 NULL, sizeof(int), 0644, NULL,
1481 &proc_dointvec_jiffies},
1482 {NET_NEIGH_GC_THRESH1, "gc_thresh1",
1483 NULL, sizeof(int), 0644, NULL,
1485 {NET_NEIGH_GC_THRESH2, "gc_thresh2",
1486 NULL, sizeof(int), 0644, NULL,
1488 {NET_NEIGH_GC_THRESH3, "gc_thresh3",
1489 NULL, sizeof(int), 0644, NULL,
1493 {{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}},
1494 {{0, "neigh", NULL, 0, 0555, NULL},{0}},
1495 {{0, NULL, NULL, 0, 0555, NULL},{0}},
1496 {{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
1499 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
1500 int p_id, int pdev_id, char *p_name)
1502 struct neigh_sysctl_table *t;
1504 t = kmalloc(sizeof(*t), GFP_KERNEL);
1507 memcpy(t, &neigh_sysctl_template, sizeof(*t));
1508 t->neigh_vars[0].data = &p->mcast_probes;
1509 t->neigh_vars[1].data = &p->ucast_probes;
1510 t->neigh_vars[2].data = &p->app_probes;
1511 t->neigh_vars[3].data = &p->retrans_time;
1512 t->neigh_vars[4].data = &p->base_reachable_time;
1513 t->neigh_vars[5].data = &p->delay_probe_time;
1514 t->neigh_vars[6].data = &p->gc_staletime;
1515 t->neigh_vars[7].data = &p->queue_len;
1516 t->neigh_vars[8].data = &p->proxy_qlen;
1517 t->neigh_vars[9].data = &p->anycast_delay;
1518 t->neigh_vars[10].data = &p->proxy_delay;
1519 t->neigh_vars[11].data = &p->locktime;
1521 t->neigh_dev[0].procname = dev->name;
1522 t->neigh_dev[0].ctl_name = dev->ifindex;
1523 memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
1525 t->neigh_vars[12].data = (int*)(p+1);
1526 t->neigh_vars[13].data = (int*)(p+1) + 1;
1527 t->neigh_vars[14].data = (int*)(p+1) + 2;
1528 t->neigh_vars[15].data = (int*)(p+1) + 3;
1530 t->neigh_neigh_dir[0].ctl_name = pdev_id;
1532 t->neigh_proto_dir[0].procname = p_name;
1533 t->neigh_proto_dir[0].ctl_name = p_id;
1535 t->neigh_dev[0].child = t->neigh_vars;
1536 t->neigh_neigh_dir[0].child = t->neigh_dev;
1537 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
1538 t->neigh_root_dir[0].child = t->neigh_proto_dir;
1540 t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
1541 if (t->sysctl_header == NULL) {
1545 p->sysctl_table = t;
1549 void neigh_sysctl_unregister(struct neigh_parms *p)
1551 if (p->sysctl_table) {
1552 struct neigh_sysctl_table *t = p->sysctl_table;
1553 p->sysctl_table = NULL;
1554 unregister_sysctl_table(t->sysctl_header);
1559 #endif /* CONFIG_SYSCTL */