# BRCM_VERSION=3
[bcm963xx.git] / kernel / linux / net / ipv4 / fib_hash.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 FIB: lookup engine and maintenance routines.
7  *
8  * Version:     $Id: fib_hash.c,v 1.1.1.1 2005/04/29 01:44:08 echo Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #define FTprint(a...)
47 /*
48    printk(KERN_DEBUG a)
49  */
50
51 static kmem_cache_t * fn_hash_kmem;
52
53 /*
54    These bizarre types are just to force strict type checking.
55    When I reversed order of bytes and changed to natural mask lengths,
56    I forgot to make fixes in several places. Now I am lazy to return
57    it back.
58  */
59
60 typedef struct {
61         u32     datum;
62 } fn_key_t;
63
64 typedef struct {
65         u32     datum;
66 } fn_hash_idx_t;
67
68 struct fib_node
69 {
70         struct fib_node         *fn_next;
71         struct fib_info         *fn_info;
72 #define FIB_INFO(f)     ((f)->fn_info)
73         fn_key_t                fn_key;
74         u8                      fn_tos;
75         u8                      fn_type;
76         u8                      fn_scope;
77         u8                      fn_state;
78 };
79
80 #define FN_S_ZOMBIE     1
81 #define FN_S_ACCESSED   2
82
83 static int fib_hash_zombies;
84
85 struct fn_zone
86 {
87         struct fn_zone  *fz_next;       /* Next not empty zone  */
88         struct fib_node **fz_hash;      /* Hash table pointer   */
89         int             fz_nent;        /* Number of entries    */
90
91         int             fz_divisor;     /* Hash divisor         */
92         u32             fz_hashmask;    /* (fz_divisor - 1)     */
93 #define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
94
95         int             fz_order;       /* Zone order           */
96         u32             fz_mask;
97 #define FZ_MASK(fz)     ((fz)->fz_mask)
98 };
99
100 /* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
101    can be cheaper than memory lookup, so that FZ_* macros are used.
102  */
103
104 struct fn_hash
105 {
106         struct fn_zone  *fn_zones[33];
107         struct fn_zone  *fn_zone_list;
108 };
109
110 static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
111 {
112         u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
113         h ^= (h>>20);
114         h ^= (h>>10);
115         h ^= (h>>5);
116         h &= FZ_HASHMASK(fz);
117         return *(fn_hash_idx_t*)&h;
118 }
119
120 #define fz_key_0(key)           ((key).datum = 0)
121 #define fz_prefix(key,fz)       ((key).datum)
122
123 static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
124 {
125         fn_key_t k;
126         k.datum = dst & FZ_MASK(fz);
127         return k;
128 }
129
130 static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
131 {
132         return &fz->fz_hash[fn_hash(key, fz).datum];
133 }
134
135 static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
136 {
137         return fz->fz_hash[fn_hash(key, fz).datum];
138 }
139
140 static __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
141 {
142         return a.datum == b.datum;
143 }
144
145 static __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
146 {
147         return a.datum <= b.datum;
148 }
149
150 static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
151
152 #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct fib_node *))
153
154 static struct fib_node **fz_hash_alloc(int divisor)
155 {
156         unsigned long size = divisor * sizeof(struct fib_node *);
157
158         if (divisor <= 1024) {
159                 return kmalloc(size, GFP_KERNEL);
160         } else {
161                 return (struct fib_node **)
162                         __get_free_pages(GFP_KERNEL, get_order(size));
163         }
164 }
165
166 /* The fib hash lock must be held when this is called. */
167 static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
168                                        struct fib_node **old_ht,
169                                        int old_divisor)
170 {
171         int i;
172         struct fib_node *f, **fp, *next;
173
174         for (i=0; i<old_divisor; i++) {
175                 for (f=old_ht[i]; f; f=next) {
176                         next = f->fn_next;
177                         for (fp = fz_chain_p(f->fn_key, fz);
178                              *fp && fn_key_leq((*fp)->fn_key, f->fn_key);
179                              fp = &(*fp)->fn_next)
180                                 /* NONE */;
181                         f->fn_next = *fp;
182                         *fp = f;
183                 }
184         }
185 }
186
187 static void fz_hash_free(struct fib_node **hash, int divisor)
188 {
189         if (divisor <= 1024)
190                 kfree(hash);
191         else
192                 free_pages((unsigned long) hash,
193                            get_order(divisor * sizeof(struct fib_node *)));
194 }
195
196 static void fn_rehash_zone(struct fn_zone *fz)
197 {
198         struct fib_node **ht, **old_ht;
199         int old_divisor, new_divisor;
200         u32 new_hashmask;
201                 
202         old_divisor = fz->fz_divisor;
203
204         switch (old_divisor) {
205         case 16:
206                 new_divisor = 256;
207                 break;
208         case 256:
209                 new_divisor = 1024;
210                 break;
211         default:
212                 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
213                         printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
214                         return;
215                 }
216                 new_divisor = (old_divisor << 1);
217                 break;
218         }
219
220         new_hashmask = (new_divisor - 1);
221
222 #if RT_CACHE_DEBUG >= 2
223         printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
224 #endif
225
226         ht = fz_hash_alloc(new_divisor);
227
228         if (ht) {
229                 memset(ht, 0, new_divisor*sizeof(struct fib_node*));
230
231                 write_lock_bh(&fib_hash_lock);
232                 old_ht = fz->fz_hash;
233                 fz->fz_hash = ht;
234                 fz->fz_hashmask = new_hashmask;
235                 fz->fz_divisor = new_divisor;
236                 fn_rebuild_zone(fz, old_ht, old_divisor);
237                 write_unlock_bh(&fib_hash_lock);
238
239                 fz_hash_free(old_ht, old_divisor);
240         }
241 }
242
243 static void fn_free_node(struct fib_node * f)
244 {
245         fib_release_info(FIB_INFO(f));
246         kmem_cache_free(fn_hash_kmem, f);
247 }
248
249
250 static struct fn_zone *
251 fn_new_zone(struct fn_hash *table, int z)
252 {
253         int i;
254         struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
255
256         if (!fz)
257                 return NULL;
258
259         memset(fz, 0, sizeof(struct fn_zone));
260         if (z) {
261                 fz->fz_divisor = 16;
262         } else {
263                 fz->fz_divisor = 1;
264         }
265         fz->fz_hashmask = (fz->fz_divisor - 1);
266         fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
267         if (!fz->fz_hash) {
268                 kfree(fz);
269                 return NULL;
270         }
271         memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
272         fz->fz_order = z;
273         fz->fz_mask = inet_make_mask(z);
274
275         /* Find the first not empty zone with more specific mask */
276         for (i=z+1; i<=32; i++)
277                 if (table->fn_zones[i])
278                         break;
279
280         write_lock_bh(&fib_hash_lock);
281         if (i>32) {
282                 /* No more specific masks, we are the first. */
283                 fz->fz_next = table->fn_zone_list;
284                 table->fn_zone_list = fz;
285         } else {
286                 fz->fz_next = table->fn_zones[i]->fz_next;
287                 table->fn_zones[i]->fz_next = fz;
288         }
289         table->fn_zones[z] = fz;
290         write_unlock_bh(&fib_hash_lock);
291         return fz;
292 }
293
294 static int
295 fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
296 {
297         int err;
298         struct fn_zone *fz;
299         struct fn_hash *t = (struct fn_hash*)tb->tb_data;
300
301         read_lock(&fib_hash_lock);
302         for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
303                 struct fib_node *f;
304                 fn_key_t k = fz_key(flp->fl4_dst, fz);
305
306                 for (f = fz_chain(k, fz); f; f = f->fn_next) {
307                         if (!fn_key_eq(k, f->fn_key)) {
308                                 if (fn_key_leq(k, f->fn_key))
309                                         break;
310                                 else
311                                         continue;
312                         }
313 #ifdef CONFIG_IP_ROUTE_TOS
314                         if (f->fn_tos && f->fn_tos != flp->fl4_tos)
315                                 continue;
316 #endif
317                         f->fn_state |= FN_S_ACCESSED;
318
319                         if (f->fn_state&FN_S_ZOMBIE)
320                                 continue;
321                         if (f->fn_scope < flp->fl4_scope)
322                                 continue;
323
324                         err = fib_semantic_match(f->fn_type, FIB_INFO(f), flp, res);
325                         if (err == 0) {
326                                 res->type = f->fn_type;
327                                 res->scope = f->fn_scope;
328                                 res->prefixlen = fz->fz_order;
329                                 goto out;
330                         }
331                         if (err < 0)
332                                 goto out;
333                 }
334         }
335         err = 1;
336 out:
337         read_unlock(&fib_hash_lock);
338         return err;
339 }
340
341 static int fn_hash_last_dflt=-1;
342
343 static int fib_detect_death(struct fib_info *fi, int order,
344                             struct fib_info **last_resort, int *last_idx)
345 {
346         struct neighbour *n;
347         int state = NUD_NONE;
348
349         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
350         if (n) {
351                 state = n->nud_state;
352                 neigh_release(n);
353         }
354         if (state==NUD_REACHABLE)
355                 return 0;
356         if ((state&NUD_VALID) && order != fn_hash_last_dflt)
357                 return 0;
358         if ((state&NUD_VALID) ||
359             (*last_idx<0 && order > fn_hash_last_dflt)) {
360                 *last_resort = fi;
361                 *last_idx = order;
362         }
363         return 1;
364 }
365
366 static void
367 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
368 {
369         int order, last_idx;
370         struct fib_node *f;
371         struct fib_info *fi = NULL;
372         struct fib_info *last_resort;
373         struct fn_hash *t = (struct fn_hash*)tb->tb_data;
374         struct fn_zone *fz = t->fn_zones[0];
375
376         if (fz == NULL)
377                 return;
378
379         last_idx = -1;
380         last_resort = NULL;
381         order = -1;
382
383         read_lock(&fib_hash_lock);
384         for (f = fz->fz_hash[0]; f; f = f->fn_next) {
385                 struct fib_info *next_fi = FIB_INFO(f);
386
387                 if ((f->fn_state&FN_S_ZOMBIE) ||
388                     f->fn_scope != res->scope ||
389                     f->fn_type != RTN_UNICAST)
390                         continue;
391
392                 if (next_fi->fib_priority > res->fi->fib_priority)
393                         break;
394                 if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
395                         continue;
396                 f->fn_state |= FN_S_ACCESSED;
397
398                 if (fi == NULL) {
399                         if (next_fi != res->fi)
400                                 break;
401                 } else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
402                         if (res->fi)
403                                 fib_info_put(res->fi);
404                         res->fi = fi;
405                         atomic_inc(&fi->fib_clntref);
406                         fn_hash_last_dflt = order;
407                         goto out;
408                 }
409                 fi = next_fi;
410                 order++;
411         }
412
413         if (order<=0 || fi==NULL) {
414                 fn_hash_last_dflt = -1;
415                 goto out;
416         }
417
418         if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
419                 if (res->fi)
420                         fib_info_put(res->fi);
421                 res->fi = fi;
422                 atomic_inc(&fi->fib_clntref);
423                 fn_hash_last_dflt = order;
424                 goto out;
425         }
426
427         if (last_idx >= 0) {
428                 if (res->fi)
429                         fib_info_put(res->fi);
430                 res->fi = last_resort;
431                 if (last_resort)
432                         atomic_inc(&last_resort->fib_clntref);
433         }
434         fn_hash_last_dflt = last_idx;
435 out:
436         read_unlock(&fib_hash_lock);
437 }
438
439 #define FIB_SCAN(f, fp) \
440 for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
441
442 #define FIB_SCAN_KEY(f, fp, key) \
443 for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
444
445 #ifndef CONFIG_IP_ROUTE_TOS
446 #define FIB_SCAN_TOS(f, fp, key, tos) FIB_SCAN_KEY(f, fp, key)
447 #else
448 #define FIB_SCAN_TOS(f, fp, key, tos) \
449 for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)) && \
450      (f)->fn_tos == (tos) ; (fp) = &(f)->fn_next)
451 #endif
452
453
454 static void rtmsg_fib(int, struct fib_node*, int, int,
455                       struct nlmsghdr *n,
456                       struct netlink_skb_parms *);
457
458 static int
459 fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
460                 struct nlmsghdr *n, struct netlink_skb_parms *req)
461 {
462         struct fn_hash *table = (struct fn_hash*)tb->tb_data;
463         struct fib_node *new_f, *f, **fp, **del_fp;
464         struct fn_zone *fz;
465         struct fib_info *fi;
466
467         int z = r->rtm_dst_len;
468         int type = r->rtm_type;
469 #ifdef CONFIG_IP_ROUTE_TOS
470         u8 tos = r->rtm_tos;
471 #endif
472         fn_key_t key;
473         int err;
474
475 FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
476 *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
477 rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
478
479         if (z > 32)
480                 return -EINVAL;
481         fz = table->fn_zones[z];
482         if (!fz && !(fz = fn_new_zone(table, z)))
483                 return -ENOBUFS;
484
485         fz_key_0(key);
486         if (rta->rta_dst) {
487                 u32 dst;
488                 memcpy(&dst, rta->rta_dst, 4);
489                 if (dst & ~FZ_MASK(fz))
490                         return -EINVAL;
491                 key = fz_key(dst, fz);
492         }
493
494         if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
495                 return err;
496
497         if (fz->fz_nent > (fz->fz_divisor<<1) &&
498             fz->fz_divisor < FZ_MAX_DIVISOR &&
499             (z==32 || (1<<z) > fz->fz_divisor))
500                 fn_rehash_zone(fz);
501
502         fp = fz_chain_p(key, fz);
503
504
505         /*
506          * Scan list to find the first route with the same destination
507          */
508         FIB_SCAN(f, fp) {
509                 if (fn_key_leq(key,f->fn_key))
510                         break;
511         }
512
513 #ifdef CONFIG_IP_ROUTE_TOS
514         /*
515          * Find route with the same destination and tos.
516          */
517         FIB_SCAN_KEY(f, fp, key) {
518                 if (f->fn_tos <= tos)
519                         break;
520         }
521 #endif
522
523         del_fp = NULL;
524
525         if (f && (f->fn_state&FN_S_ZOMBIE) &&
526 #ifdef CONFIG_IP_ROUTE_TOS
527             f->fn_tos == tos &&
528 #endif
529             fn_key_eq(f->fn_key, key)) {
530                 del_fp = fp;
531                 fp = &f->fn_next;
532                 f = *fp;
533                 goto create;
534         }
535
536         FIB_SCAN_TOS(f, fp, key, tos) {
537                 if (fi->fib_priority <= FIB_INFO(f)->fib_priority)
538                         break;
539         }
540
541         /* Now f==*fp points to the first node with the same
542            keys [prefix,tos,priority], if such key already
543            exists or to the node, before which we will insert new one.
544          */
545
546         if (f && 
547 #ifdef CONFIG_IP_ROUTE_TOS
548             f->fn_tos == tos &&
549 #endif
550             fn_key_eq(f->fn_key, key) &&
551             fi->fib_priority == FIB_INFO(f)->fib_priority) {
552                 struct fib_node **ins_fp;
553
554                 err = -EEXIST;
555                 if (n->nlmsg_flags&NLM_F_EXCL)
556                         goto out;
557
558                 if (n->nlmsg_flags&NLM_F_REPLACE) {
559                         del_fp = fp;
560                         fp = &f->fn_next;
561                         f = *fp;
562                         goto replace;
563                 }
564
565                 ins_fp = fp;
566                 err = -EEXIST;
567
568                 FIB_SCAN_TOS(f, fp, key, tos) {
569                         if (fi->fib_priority != FIB_INFO(f)->fib_priority)
570                                 break;
571                         if (f->fn_type == type && f->fn_scope == r->rtm_scope
572                             && FIB_INFO(f) == fi)
573                                 goto out;
574                 }
575
576                 if (!(n->nlmsg_flags&NLM_F_APPEND)) {
577                         fp = ins_fp;
578                         f = *fp;
579                 }
580         }
581
582 create:
583         err = -ENOENT;
584         if (!(n->nlmsg_flags&NLM_F_CREATE))
585                 goto out;
586
587 replace:
588         err = -ENOBUFS;
589         new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
590         if (new_f == NULL)
591                 goto out;
592
593         memset(new_f, 0, sizeof(struct fib_node));
594
595         new_f->fn_key = key;
596 #ifdef CONFIG_IP_ROUTE_TOS
597         new_f->fn_tos = tos;
598 #endif
599         new_f->fn_type = type;
600         new_f->fn_scope = r->rtm_scope;
601         FIB_INFO(new_f) = fi;
602
603         /*
604          * Insert new entry to the list.
605          */
606
607         new_f->fn_next = f;
608         write_lock_bh(&fib_hash_lock);
609         *fp = new_f;
610         write_unlock_bh(&fib_hash_lock);
611         fz->fz_nent++;
612
613         if (del_fp) {
614                 f = *del_fp;
615                 /* Unlink replaced node */
616                 write_lock_bh(&fib_hash_lock);
617                 *del_fp = f->fn_next;
618                 write_unlock_bh(&fib_hash_lock);
619
620                 if (!(f->fn_state&FN_S_ZOMBIE))
621                         rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
622                 if (f->fn_state&FN_S_ACCESSED)
623                         rt_cache_flush(-1);
624                 fn_free_node(f);
625                 fz->fz_nent--;
626         } else {
627                 rt_cache_flush(-1);
628         }
629         rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
630         return 0;
631
632 out:
633         fib_release_info(fi);
634         return err;
635 }
636
637
638 static int
639 fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
640                 struct nlmsghdr *n, struct netlink_skb_parms *req)
641 {
642         struct fn_hash *table = (struct fn_hash*)tb->tb_data;
643         struct fib_node **fp, **del_fp, *f;
644         int z = r->rtm_dst_len;
645         struct fn_zone *fz;
646         fn_key_t key;
647         int matched;
648 #ifdef CONFIG_IP_ROUTE_TOS
649         u8 tos = r->rtm_tos;
650 #endif
651
652 FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
653        *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
654
655         if (z > 32)
656                 return -EINVAL;
657         if ((fz  = table->fn_zones[z]) == NULL)
658                 return -ESRCH;
659
660         fz_key_0(key);
661         if (rta->rta_dst) {
662                 u32 dst;
663                 memcpy(&dst, rta->rta_dst, 4);
664                 if (dst & ~FZ_MASK(fz))
665                         return -EINVAL;
666                 key = fz_key(dst, fz);
667         }
668
669         fp = fz_chain_p(key, fz);
670
671
672         FIB_SCAN(f, fp) {
673                 if (fn_key_eq(f->fn_key, key))
674                         break;
675                 if (fn_key_leq(key, f->fn_key)) {
676                         return -ESRCH;
677                 }
678         }
679 #ifdef CONFIG_IP_ROUTE_TOS
680         FIB_SCAN_KEY(f, fp, key) {
681                 if (f->fn_tos == tos)
682                         break;
683         }
684 #endif
685
686         matched = 0;
687         del_fp = NULL;
688         FIB_SCAN_TOS(f, fp, key, tos) {
689                 struct fib_info * fi = FIB_INFO(f);
690
691                 if (f->fn_state&FN_S_ZOMBIE) {
692                         return -ESRCH;
693                 }
694                 matched++;
695
696                 if (del_fp == NULL &&
697                     (!r->rtm_type || f->fn_type == r->rtm_type) &&
698                     (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
699                     (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) &&
700                     fib_nh_match(r, n, rta, fi) == 0)
701                         del_fp = fp;
702         }
703
704         if (del_fp) {
705                 f = *del_fp;
706                 rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
707
708                 if (matched != 1) {
709                         write_lock_bh(&fib_hash_lock);
710                         *del_fp = f->fn_next;
711                         write_unlock_bh(&fib_hash_lock);
712
713                         if (f->fn_state&FN_S_ACCESSED)
714                                 rt_cache_flush(-1);
715                         fn_free_node(f);
716                         fz->fz_nent--;
717                 } else {
718                         f->fn_state |= FN_S_ZOMBIE;
719                         if (f->fn_state&FN_S_ACCESSED) {
720                                 f->fn_state &= ~FN_S_ACCESSED;
721                                 rt_cache_flush(-1);
722                         }
723                         if (++fib_hash_zombies > 128)
724                                 fib_flush();
725                 }
726
727                 return 0;
728         }
729         return -ESRCH;
730 }
731
732 static __inline__ int
733 fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
734 {
735         int found = 0;
736         struct fib_node *f;
737
738         while ((f = *fp) != NULL) {
739                 struct fib_info *fi = FIB_INFO(f);
740
741                 if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
742                         write_lock_bh(&fib_hash_lock);
743                         *fp = f->fn_next;
744                         write_unlock_bh(&fib_hash_lock);
745
746                         fn_free_node(f);
747                         found++;
748                         continue;
749                 }
750                 fp = &f->fn_next;
751         }
752         return found;
753 }
754
755 static int fn_hash_flush(struct fib_table *tb)
756 {
757         struct fn_hash *table = (struct fn_hash*)tb->tb_data;
758         struct fn_zone *fz;
759         int found = 0;
760
761         fib_hash_zombies = 0;
762         for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
763                 int i;
764                 int tmp = 0;
765                 for (i=fz->fz_divisor-1; i>=0; i--)
766                         tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
767                 fz->fz_nent -= tmp;
768                 found += tmp;
769         }
770         return found;
771 }
772
773
774 static __inline__ int
775 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
776                      struct fib_table *tb,
777                      struct fn_zone *fz,
778                      struct fib_node *f)
779 {
780         int i, s_i;
781
782         s_i = cb->args[3];
783         for (i=0; f; i++, f=f->fn_next) {
784                 if (i < s_i) continue;
785                 if (f->fn_state&FN_S_ZOMBIE) continue;
786                 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
787                                   RTM_NEWROUTE,
788                                   tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
789                                   &f->fn_key, fz->fz_order, f->fn_tos,
790                                   f->fn_info) < 0) {
791                         cb->args[3] = i;
792                         return -1;
793                 }
794         }
795         cb->args[3] = i;
796         return skb->len;
797 }
798
799 static __inline__ int
800 fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
801                    struct fib_table *tb,
802                    struct fn_zone *fz)
803 {
804         int h, s_h;
805
806         s_h = cb->args[2];
807         for (h=0; h < fz->fz_divisor; h++) {
808                 if (h < s_h) continue;
809                 if (h > s_h)
810                         memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
811                 if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
812                         continue;
813                 if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
814                         cb->args[2] = h;
815                         return -1;
816                 }
817         }
818         cb->args[2] = h;
819         return skb->len;
820 }
821
822 static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
823 {
824         int m, s_m;
825         struct fn_zone *fz;
826         struct fn_hash *table = (struct fn_hash*)tb->tb_data;
827
828         s_m = cb->args[1];
829         read_lock(&fib_hash_lock);
830         for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
831                 if (m < s_m) continue;
832                 if (m > s_m)
833                         memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
834                 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
835                         cb->args[1] = m;
836                         read_unlock(&fib_hash_lock);
837                         return -1;
838                 }
839         }
840         read_unlock(&fib_hash_lock);
841         cb->args[1] = m;
842         return skb->len;
843 }
844
845 static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
846                       struct nlmsghdr *n, struct netlink_skb_parms *req)
847 {
848         struct sk_buff *skb;
849         u32 pid = req ? req->pid : 0;
850         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
851
852         skb = alloc_skb(size, GFP_KERNEL);
853         if (!skb)
854                 return;
855
856         if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
857                           f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
858                           FIB_INFO(f)) < 0) {
859                 kfree_skb(skb);
860                 return;
861         }
862         NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
863         if (n->nlmsg_flags&NLM_F_ECHO)
864                 atomic_inc(&skb->users);
865         netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
866         if (n->nlmsg_flags&NLM_F_ECHO)
867                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
868 }
869
870 #ifdef CONFIG_IP_MULTIPLE_TABLES
871 struct fib_table * fib_hash_init(int id)
872 #else
873 struct fib_table * __init fib_hash_init(int id)
874 #endif
875 {
876         struct fib_table *tb;
877
878         if (fn_hash_kmem == NULL)
879                 fn_hash_kmem = kmem_cache_create("ip_fib_hash",
880                                                  sizeof(struct fib_node),
881                                                  0, SLAB_HWCACHE_ALIGN,
882                                                  NULL, NULL);
883
884         tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
885         if (tb == NULL)
886                 return NULL;
887
888         tb->tb_id = id;
889         tb->tb_lookup = fn_hash_lookup;
890         tb->tb_insert = fn_hash_insert;
891         tb->tb_delete = fn_hash_delete;
892         tb->tb_flush = fn_hash_flush;
893         tb->tb_select_default = fn_hash_select_default;
894         tb->tb_dump = fn_hash_dump;
895         memset(tb->tb_data, 0, sizeof(struct fn_hash));
896         return tb;
897 }
898
899 /* ------------------------------------------------------------------------ */
900 #ifdef CONFIG_PROC_FS
901
902 struct fib_iter_state {
903         struct fn_zone  *zone;
904         int             bucket;
905         struct fib_node **hash;
906         struct fib_node *node;
907 };
908
909 static __inline__ struct fib_node *fib_get_first(struct seq_file *seq)
910 {
911         struct fib_iter_state* iter = seq->private;
912         struct fn_hash *table = (struct fn_hash *)ip_fib_main_table->tb_data;
913
914         iter->bucket = 0;
915         iter->hash   = NULL;
916         iter->node   = NULL;
917
918         for (iter->zone = table->fn_zone_list; iter->zone;
919              iter->zone = iter->zone->fz_next) {
920                 int maxslot;
921 #if 0 //BRCM_BEGIN
922                 if (!iter->zone->fz_next)
923                         continue;
924 #endif //BRCM_END
925
926                 iter->hash = iter->zone->fz_hash;
927                 maxslot = iter->zone->fz_divisor;
928
929                 for (iter->bucket = 0; iter->bucket < maxslot;
930                      ++iter->bucket, ++iter->hash) {
931                         iter->node = *iter->hash;
932                         if (iter->node)
933                                 goto out;
934                 }
935         }
936 out:
937         return iter->node;
938 }
939
940 static __inline__ struct fib_node *fib_get_next(struct seq_file *seq)
941 {
942         struct fib_iter_state* iter = seq->private;
943
944         if (iter->node)
945                 iter->node = iter->node->fn_next;
946
947         if (iter->node)
948                 goto out;
949
950         if (!iter->zone)
951                 goto out;
952
953         for (;;) {
954                 int maxslot;
955
956                 maxslot = iter->zone->fz_divisor;
957
958                 while (++iter->bucket < maxslot) {
959                         iter->node = *++iter->hash;
960
961                         if (iter->node)
962                                 goto out;
963                 }
964
965                 iter->zone = iter->zone->fz_next;
966
967                 if (!iter->zone)
968                         goto out;
969                 
970                 iter->hash = iter->zone->fz_hash;
971                 iter->bucket = 0;
972                 iter->node = *iter->hash;
973                 if (iter->node)
974                         break;
975         }
976 out:
977         return iter->node;
978 }
979
980 static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
981 {
982         void *v = NULL;
983
984         read_lock(&fib_hash_lock);
985         if (ip_fib_main_table)
986                 v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
987         return v;
988 }
989
990 static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
991 {
992         ++*pos;
993         return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
994 }
995
996 static void fib_seq_stop(struct seq_file *seq, void *v)
997 {
998         read_unlock(&fib_hash_lock);
999 }
1000
1001 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
1002 {
1003         static unsigned type2flags[RTN_MAX + 1] = {
1004                 [7] = RTF_REJECT, [8] = RTF_REJECT,
1005         };
1006         unsigned flags = type2flags[type];
1007
1008         if (fi && fi->fib_nh->nh_gw)
1009                 flags |= RTF_GATEWAY;
1010         if (mask == 0xFFFFFFFF)
1011                 flags |= RTF_HOST;
1012         if (!dead)
1013                 flags |= RTF_UP;
1014         return flags;
1015 }
1016
1017 /* 
1018  *      This outputs /proc/net/route.
1019  *
1020  *      It always works in backward compatibility mode.
1021  *      The format of the file is not supposed to be changed.
1022  */
1023 static int fib_seq_show(struct seq_file *seq, void *v)
1024 {
1025         struct fib_iter_state* iter;
1026         char bf[128];
1027         u32 prefix, mask;
1028         unsigned flags;
1029         struct fib_node *f;
1030         struct fib_info *fi;
1031
1032         if (v == SEQ_START_TOKEN) {
1033                 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1034                            "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1035                            "\tWindow\tIRTT");
1036                 goto out;
1037         }
1038
1039         f       = v;
1040         fi      = FIB_INFO(f);
1041         iter    = seq->private;
1042         prefix  = fz_prefix(f->fn_key, iter->zone);
1043         mask    = FZ_MASK(iter->zone);
1044         flags   = fib_flag_trans(f->fn_type, f->fn_state & FN_S_ZOMBIE,
1045                                  mask, fi);
1046         if (fi)
1047                 snprintf(bf, sizeof(bf),
1048                          "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1049                          fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1050                          fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1051                          mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1052                          fi->fib_window,
1053                          fi->fib_rtt >> 3);
1054         else
1055                 snprintf(bf, sizeof(bf),
1056                          "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1057                          prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0);
1058         seq_printf(seq, "%-127s\n", bf);
1059 out:
1060         return 0;
1061 }
1062
1063 static struct seq_operations fib_seq_ops = {
1064         .start  = fib_seq_start,
1065         .next   = fib_seq_next,
1066         .stop   = fib_seq_stop,
1067         .show   = fib_seq_show,
1068 };
1069
1070 static int fib_seq_open(struct inode *inode, struct file *file)
1071 {
1072         struct seq_file *seq;
1073         int rc = -ENOMEM;
1074         struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
1075        
1076         if (!s)
1077                 goto out;
1078
1079         rc = seq_open(file, &fib_seq_ops);
1080         if (rc)
1081                 goto out_kfree;
1082
1083         seq          = file->private_data;
1084         seq->private = s;
1085         memset(s, 0, sizeof(*s));
1086 out:
1087         return rc;
1088 out_kfree:
1089         kfree(s);
1090         goto out;
1091 }
1092
1093 static struct file_operations fib_seq_fops = {
1094         .owner          = THIS_MODULE,
1095         .open           = fib_seq_open,
1096         .read           = seq_read,
1097         .llseek         = seq_lseek,
1098         .release        = seq_release_private,
1099 };
1100
1101 int __init fib_proc_init(void)
1102 {
1103         if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
1104                 return -ENOMEM;
1105         return 0;
1106 }
1107
1108 void __init fib_proc_exit(void)
1109 {
1110         proc_net_remove("route");
1111 }
1112 #endif /* CONFIG_PROC_FS */