port more changes to make PCI work
[linux-2.4.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <pedro_m@yahoo.com>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/route.h>
34 #include <linux/netdevice.h>
35 #include <linux/in6.h>
36 #include <linux/init.h>
37 #include <linux/netlink.h>
38 #include <linux/if_arp.h>
39
40 #ifdef  CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #endif
43
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52
53 #include <asm/uaccess.h>
54
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58
59 #undef CONFIG_RT6_POLICY
60
61 /* Set to 3 to get tracing. */
62 #define RT6_DEBUG 2
63
64 #if RT6_DEBUG >= 3
65 #define RDBG(x) printk x
66 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
67 #else
68 #define RDBG(x)
69 #define RT6_TRACE(x...) do { ; } while (0)
70 #endif
71
72
73 int ip6_rt_max_size = 4096;
74 int ip6_rt_gc_min_interval = HZ / 2;
75 int ip6_rt_gc_timeout = 60*HZ;
76 int ip6_rt_gc_interval = 30*HZ;
77 int ip6_rt_gc_elasticity = 9;
78 int ip6_rt_mtu_expires = 10*60*HZ;
79 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
80
81 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
82 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
84                                          struct sk_buff *skb);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int               ip6_dst_gc(void);
87
88 static int              ip6_pkt_discard(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90
91 struct dst_ops ip6_dst_ops = {
92         AF_INET6,
93         __constant_htons(ETH_P_IPV6),
94         1024,
95
96         ip6_dst_gc,
97         ip6_dst_check,
98         ip6_dst_reroute,
99         NULL,
100         ip6_negative_advice,
101         ip6_link_failure,
102         sizeof(struct rt6_info),
103 };
104
105 struct rt6_info ip6_null_entry = {
106         {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
107           -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108           -ENETUNREACH, NULL, NULL,
109           ip6_pkt_discard, ip6_pkt_discard,
110 #ifdef CONFIG_NET_CLS_ROUTE
111           0,
112 #endif
113           &ip6_dst_ops}},
114         NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
115         255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
116 };
117
118 struct fib6_node ip6_routing_table = {
119         NULL, NULL, NULL, NULL,
120         &ip6_null_entry,
121         0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
122 };
123
124 #ifdef CONFIG_RT6_POLICY
125 int     ip6_rt_policy = 0;
126
127 struct pol_chain *rt6_pol_list = NULL;
128
129
130 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
131 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
132
133 static struct rt6_info  *rt6_flow_lookup(struct rt6_info *rt,
134                                          struct in6_addr *daddr,
135                                          struct in6_addr *saddr,
136                                          struct fl_acc_args *args);
137
138 #else
139 #define ip6_rt_policy (0)
140 #endif
141
142 /* Protects all the ip6 fib */
143
144 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
145
146
147 /*
148  *      Route lookup. Any rt6_lock is implied.
149  */
150
151 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
152                                                     int oif,
153                                                     int strict)
154 {
155         struct rt6_info *local = NULL;
156         struct rt6_info *sprt;
157
158         if (oif) {
159                 for (sprt = rt; sprt; sprt = sprt->u.next) {
160                         struct net_device *dev = sprt->rt6i_dev;
161                         if (dev->ifindex == oif)
162                                 return sprt;
163                         if (dev->flags&IFF_LOOPBACK)
164                                 local = sprt;
165                 }
166
167                 if (local)
168                         return local;
169
170                 if (strict)
171                         return &ip6_null_entry;
172         }
173         return rt;
174 }
175
176 /*
177  *      pointer to the last default router chosen. BH is disabled locally.
178  */
179 static struct rt6_info *rt6_dflt_pointer = NULL;
180 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
181
182 /* Default Router Selection (RFC 2461 6.3.6) */
183 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
184 {
185         struct rt6_info *match = NULL;
186         struct rt6_info *sprt;
187         int mpri = 0;
188
189         for (sprt = rt; sprt; sprt = sprt->u.next) {
190                 struct neighbour *neigh;
191                 int m = 0;
192
193                 if (!oif ||
194                     (sprt->rt6i_dev &&
195                      sprt->rt6i_dev->ifindex == oif))
196                         m += 8;
197
198                 if (sprt == rt6_dflt_pointer)
199                         m += 4;
200
201                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
202                         read_lock_bh(&neigh->lock);
203                         switch (neigh->nud_state) {
204                         case NUD_REACHABLE:
205                                 m += 3;
206                                 break;
207
208                         case NUD_STALE:
209                         case NUD_DELAY:
210                         case NUD_PROBE:
211                                 m += 2;
212                                 break;
213
214                         case NUD_NOARP:
215                         case NUD_PERMANENT:
216                                 m += 1;
217                                 break;
218
219                         case NUD_INCOMPLETE:
220                         default:
221                                 read_unlock_bh(&neigh->lock);
222                                 continue;
223                         }
224                         read_unlock_bh(&neigh->lock);
225                 } else {
226                         continue;
227                 }
228
229                 if (m > mpri || m >= 12) {
230                         match = sprt;
231                         mpri = m;
232                         if (m >= 12) {
233                                 /* we choose the lastest default router if it
234                                  * is in (probably) reachable state.
235                                  * If route changed, we should do pmtu
236                                  * discovery. --yoshfuji
237                                  */
238                                 break;
239                         }
240                 }
241         }
242
243         spin_lock(&rt6_dflt_lock);
244         if (!match) {
245                 /*
246                  *      No default routers are known to be reachable.
247                  *      SHOULD round robin
248                  */
249                 if (rt6_dflt_pointer) {
250                         for (sprt = rt6_dflt_pointer->u.next;
251                              sprt; sprt = sprt->u.next) {
252                                 if (sprt->u.dst.obsolete <= 0 &&
253                                     sprt->u.dst.error == 0) {
254                                         match = sprt;
255                                         break;
256                                 }
257                         }
258                         for (sprt = rt;
259                              !match && sprt;
260                              sprt = sprt->u.next) {
261                                 if (sprt->u.dst.obsolete <= 0 &&
262                                     sprt->u.dst.error == 0) {
263                                         match = sprt;
264                                         break;
265                                 }
266                                 if (sprt == rt6_dflt_pointer)
267                                         break;
268                         }
269                 }
270         }
271
272         if (match)
273                 rt6_dflt_pointer = match;
274
275         spin_unlock(&rt6_dflt_lock);
276
277         if (!match) {
278                 /*
279                  * Last Resort: if no default routers found, 
280                  * use addrconf default route.
281                  * We don't record this route.
282                  */
283                 for (sprt = ip6_routing_table.leaf;
284                      sprt; sprt = sprt->u.next) {
285                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
286                             (!oif ||
287                              (sprt->rt6i_dev &&
288                               sprt->rt6i_dev->ifindex == oif))) {
289                                 match = sprt;
290                                 break;
291                         }
292                 }
293                 if (!match) {
294                         /* no default route.  give up. */
295                         match = &ip6_null_entry;
296                 }
297         }
298
299         return match;
300 }
301
302 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
303                             int oif, int strict)
304 {
305         struct fib6_node *fn;
306         struct rt6_info *rt;
307
308         read_lock_bh(&rt6_lock);
309         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
310         rt = rt6_device_match(fn->leaf, oif, strict);
311         dst_hold(&rt->u.dst);
312         rt->u.dst.__use++;
313         read_unlock_bh(&rt6_lock);
314
315         rt->u.dst.lastuse = jiffies;
316         if (rt->u.dst.error == 0)
317                 return rt;
318         dst_release(&rt->u.dst);
319         return NULL;
320 }
321
322 /* rt6_ins is called with FREE rt6_lock.
323    It takes new route entry, the addition fails by any reason the
324    route is freed. In any case, if caller does not hold it, it may
325    be destroyed.
326  */
327
328 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
329 {
330         int err;
331
332         write_lock_bh(&rt6_lock);
333         err = fib6_add(&ip6_routing_table, rt, nlh, req);
334         write_unlock_bh(&rt6_lock);
335
336         return err;
337 }
338
339 /* No rt6_lock! If COW failed, the function returns dead route entry
340    with dst->error set to errno value.
341  */
342
343 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
344                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
345 {
346         int err;
347         struct rt6_info *rt;
348
349         /*
350          *      Clone the route.
351          */
352
353         rt = ip6_rt_copy(ort);
354
355         if (rt) {
356                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
357
358                 if (!(rt->rt6i_flags&RTF_GATEWAY))
359                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
360
361                 rt->rt6i_dst.plen = 128;
362                 rt->rt6i_flags |= RTF_CACHE;
363                 rt->u.dst.flags |= DST_HOST;
364
365 #ifdef CONFIG_IPV6_SUBTREES
366                 if (rt->rt6i_src.plen && saddr) {
367                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
368                         rt->rt6i_src.plen = 128;
369                 }
370 #endif
371
372                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
373
374                 dst_hold(&rt->u.dst);
375
376                 err = rt6_ins(rt, NULL, req);
377                 if (err == 0)
378                         return rt;
379
380                 rt->u.dst.error = err;
381
382                 return rt;
383         }
384         dst_hold(&ip6_null_entry.u.dst);
385         return &ip6_null_entry;
386 }
387
388 #ifdef CONFIG_RT6_POLICY
389 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
390                                                       struct sk_buff *skb)
391 {
392         struct in6_addr *daddr, *saddr;
393         struct fl_acc_args arg;
394
395         arg.type = FL_ARG_FORWARD;
396         arg.fl_u.skb = skb;
397
398         saddr = &skb->nh.ipv6h->saddr;
399         daddr = &skb->nh.ipv6h->daddr;
400
401         return rt6_flow_lookup(rt, daddr, saddr, &arg);
402 }
403
404 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
405                                                        struct sock *sk,
406                                                        struct flowi *fl)
407 {
408         struct fl_acc_args arg;
409
410         arg.type = FL_ARG_ORIGIN;
411         arg.fl_u.fl_o.sk = sk;
412         arg.fl_u.fl_o.flow = fl;
413
414         return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
415                                &arg);
416 }
417
418 #endif
419
420 #define BACKTRACK() \
421 if (rt == &ip6_null_entry && strict) { \
422        while ((fn = fn->parent) != NULL) { \
423                 if (fn->fn_flags & RTN_ROOT) { \
424                         dst_hold(&rt->u.dst); \
425                         goto out; \
426                 } \
427                 if (fn->fn_flags & RTN_RTINFO) \
428                         goto restart; \
429         } \
430 }
431
432
433 void ip6_route_input(struct sk_buff *skb)
434 {
435         struct fib6_node *fn;
436         struct rt6_info *rt;
437         int strict;
438         int attempts = 3;
439
440         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
441
442 relookup:
443         read_lock_bh(&rt6_lock);
444
445         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
446                          &skb->nh.ipv6h->saddr);
447
448 restart:
449         rt = fn->leaf;
450
451         if ((rt->rt6i_flags & RTF_CACHE)) {
452                 if (ip6_rt_policy == 0) {
453                         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
454                         BACKTRACK();
455                         dst_hold(&rt->u.dst);
456                         goto out;
457                 }
458
459 #ifdef CONFIG_RT6_POLICY
460                 if ((rt->rt6i_flags & RTF_FLOW)) {
461                         struct rt6_info *sprt;
462
463                         for (sprt = rt; sprt; sprt = sprt->u.next) {
464                                 if (rt6_flow_match_in(sprt, skb)) {
465                                         rt = sprt;
466                                         dst_hold(&rt->u.dst);
467                                         goto out;
468                                 }
469                         }
470                 }
471 #endif
472         }
473
474         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
475         BACKTRACK();
476
477         if (ip6_rt_policy == 0) {
478                 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479                         read_unlock_bh(&rt6_lock);
480
481                         rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
482                                      &skb->nh.ipv6h->saddr,
483                                      &NETLINK_CB(skb));
484                         
485                         if (rt->u.dst.error != -EEXIST || --attempts <= 0)
486                                 goto out2;
487                         /* Race condition! In the gap, when rt6_lock was
488                            released someone could insert this route.  Relookup.
489                          */
490                         goto relookup;
491                 }
492                 dst_hold(&rt->u.dst);
493         } else {
494 #ifdef CONFIG_RT6_POLICY
495                 rt = rt6_flow_lookup_in(rt, skb);
496 #else
497                 /* NEVER REACHED */
498 #endif
499         }
500
501 out:
502         read_unlock_bh(&rt6_lock);
503 out2:
504         rt->u.dst.lastuse = jiffies;
505         rt->u.dst.__use++;
506         skb->dst = (struct dst_entry *) rt;
507 }
508
509 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
510 {
511         struct fib6_node *fn;
512         struct rt6_info *rt;
513         int strict;
514         int attempts = 3;
515
516         strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
517
518 relookup:
519         read_lock_bh(&rt6_lock);
520
521         fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
522                          fl->nl_u.ip6_u.saddr);
523
524 restart:
525         rt = fn->leaf;
526
527         if ((rt->rt6i_flags & RTF_CACHE)) {
528                 if (ip6_rt_policy == 0) {
529                         rt = rt6_device_match(rt, fl->oif, strict);
530                         BACKTRACK();
531                         dst_hold(&rt->u.dst);
532                         goto out;
533                 }
534
535 #ifdef CONFIG_RT6_POLICY
536                 if ((rt->rt6i_flags & RTF_FLOW)) {
537                         struct rt6_info *sprt;
538
539                         for (sprt = rt; sprt; sprt = sprt->u.next) {
540                                 if (rt6_flow_match_out(sprt, sk)) {
541                                         rt = sprt;
542                                         dst_hold(&rt->u.dst);
543                                         goto out;
544                                 }
545                         }
546                 }
547 #endif
548         }
549         if (rt->rt6i_flags & RTF_DEFAULT) {
550                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
551                         rt = rt6_best_dflt(rt, fl->oif);
552         } else {
553                 rt = rt6_device_match(rt, fl->oif, strict);
554                 BACKTRACK();
555         }
556
557         if (ip6_rt_policy == 0) {
558                 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
559                         read_unlock_bh(&rt6_lock);
560
561                         rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
562                                      fl->nl_u.ip6_u.saddr, NULL);
563                         
564                         if (rt->u.dst.error != -EEXIST || --attempts <= 0)
565                                 goto out2;
566
567                         /* Race condition! In the gap, when rt6_lock was
568                            released someone could insert this route.  Relookup.
569                          */
570                         goto relookup;
571                 }
572                 dst_hold(&rt->u.dst);
573         } else {
574 #ifdef CONFIG_RT6_POLICY
575                 rt = rt6_flow_lookup_out(rt, sk, fl);
576 #else
577                 /* NEVER REACHED */
578 #endif
579         }
580
581 out:
582         read_unlock_bh(&rt6_lock);
583 out2:
584         rt->u.dst.lastuse = jiffies;
585         rt->u.dst.__use++;
586         return &rt->u.dst;
587 }
588
589
590 /*
591  *      Destination cache support functions
592  */
593
594 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
595 {
596         struct rt6_info *rt;
597
598         rt = (struct rt6_info *) dst;
599
600         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
601                 return dst;
602
603         dst_release(dst);
604         return NULL;
605 }
606
607 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
608 {
609         /*
610          *      FIXME
611          */
612         RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
613               __builtin_return_address(0)));
614         return NULL;
615 }
616
617 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
618 {
619         struct rt6_info *rt = (struct rt6_info *) dst;
620
621         if (rt) {
622                 if (rt->rt6i_flags & RTF_CACHE)
623                         ip6_del_rt(rt, NULL, NULL);
624                 else
625                         dst_release(dst);
626         }
627         return NULL;
628 }
629
630 static void ip6_link_failure(struct sk_buff *skb)
631 {
632         struct rt6_info *rt;
633
634         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
635
636         rt = (struct rt6_info *) skb->dst;
637         if (rt) {
638                 if (rt->rt6i_flags&RTF_CACHE) {
639                         dst_set_expires(&rt->u.dst, 0);
640                         rt->rt6i_flags |= RTF_EXPIRES;
641                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
642                         rt->rt6i_node->fn_sernum = -1;
643         }
644 }
645
646 static int ip6_dst_gc()
647 {
648         static unsigned expire = 30*HZ;
649         static unsigned long last_gc;
650         unsigned long now = jiffies;
651
652         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
653             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
654                 goto out;
655
656         expire++;
657         fib6_run_gc(expire);
658         last_gc = now;
659         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
660                 expire = ip6_rt_gc_timeout>>1;
661
662 out:
663         expire -= expire>>ip6_rt_gc_elasticity;
664         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
665 }
666
667 /* Clean host part of a prefix. Not necessary in radix tree,
668    but results in cleaner routing tables.
669
670    Remove it only when all the things will work!
671  */
672
673 static void ipv6_addr_prefix(struct in6_addr *pfx,
674                              const struct in6_addr *addr, int plen)
675 {
676         int b = plen&0x7;
677         int o = plen>>3;
678
679         memcpy(pfx->s6_addr, addr, o);
680         if (o < 16)
681                 memset(pfx->s6_addr + o, 0, 16 - o);
682         if (b != 0)
683                 pfx->s6_addr[o] = addr->s6_addr[o]&(0xff00 >> b);
684 }
685
686 static int ipv6_get_mtu(struct net_device *dev)
687 {
688         int mtu = IPV6_MIN_MTU;
689         struct inet6_dev *idev;
690
691         idev = in6_dev_get(dev);
692         if (idev) {
693                 mtu = idev->cnf.mtu6;
694                 in6_dev_put(idev);
695         }
696         return mtu;
697 }
698
699 static int ipv6_get_hoplimit(struct net_device *dev)
700 {
701         int hoplimit = ipv6_devconf.hop_limit;
702         struct inet6_dev *idev;
703
704         idev = in6_dev_get(dev);
705         if (idev) {
706                 hoplimit = idev->cnf.hop_limit;
707                 in6_dev_put(idev);
708         }
709         return hoplimit;
710 }
711
712 /*
713  *
714  */
715
716 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
717 {
718         int err;
719         struct rtmsg *r;
720         struct rt6_info *rt;
721         struct net_device *dev = NULL;
722         int addr_type;
723
724         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
725                 return -EINVAL;
726 #ifndef CONFIG_IPV6_SUBTREES
727         if (rtmsg->rtmsg_src_len)
728                 return -EINVAL;
729 #endif
730         if (rtmsg->rtmsg_metric == 0)
731                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
732
733         rt = dst_alloc(&ip6_dst_ops);
734
735         if (rt == NULL)
736                 return -ENOMEM;
737
738         rt->u.dst.obsolete = -1;
739         rt->rt6i_expires = rtmsg->rtmsg_info;
740         if (nlh && (r = NLMSG_DATA(nlh))) {
741                 rt->rt6i_protocol = r->rtm_protocol;
742         } else {
743                 rt->rt6i_protocol = RTPROT_BOOT;
744         }
745
746         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
747
748         if (addr_type & IPV6_ADDR_MULTICAST)
749                 rt->u.dst.input = ip6_mc_input;
750         else
751                 rt->u.dst.input = ip6_forward;
752
753         rt->u.dst.output = ip6_output;
754
755         if (rtmsg->rtmsg_ifindex) {
756                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
757                 err = -ENODEV;
758                 if (dev == NULL)
759                         goto out;
760         }
761
762         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
763                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
764         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
765         if (rt->rt6i_dst.plen == 128)
766                rt->u.dst.flags = DST_HOST;
767
768 #ifdef CONFIG_IPV6_SUBTREES
769         ipv6_addr_prefix(&rt->rt6i_src.addr, 
770                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
771         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
772 #endif
773
774         rt->rt6i_metric = rtmsg->rtmsg_metric;
775
776         /* We cannot add true routes via loopback here,
777            they would result in kernel looping; promote them to reject routes
778          */
779         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
780             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
781                 if (dev)
782                         dev_put(dev);
783                 dev = &loopback_dev;
784                 dev_hold(dev);
785                 rt->u.dst.output = ip6_pkt_discard;
786                 rt->u.dst.input = ip6_pkt_discard;
787                 rt->u.dst.error = -ENETUNREACH;
788                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
789                 goto install_route;
790         }
791
792         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
793                 struct in6_addr *gw_addr;
794                 int gwa_type;
795
796                 gw_addr = &rtmsg->rtmsg_gateway;
797                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
798                 gwa_type = ipv6_addr_type(gw_addr);
799
800                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
801                         struct rt6_info *grt;
802
803                         /* IPv6 strictly inhibits using not link-local
804                            addresses as nexthop address.
805                            Otherwise, router will not able to send redirects.
806                            It is very good, but in some (rare!) curcumstances
807                            (SIT, PtP, NBMA NOARP links) it is handy to allow
808                            some exceptions. --ANK
809                          */
810                         err = -EINVAL;
811                         if (!(gwa_type&IPV6_ADDR_UNICAST))
812                                 goto out;
813
814                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
815
816                         err = -EHOSTUNREACH;
817                         if (grt == NULL)
818                                 goto out;
819                         if (dev) {
820                                 if (dev != grt->rt6i_dev) {
821                                         dst_release(&grt->u.dst);
822                                         goto out;
823                                 }
824                         } else {
825                                 dev = grt->rt6i_dev;
826                                 dev_hold(dev);
827                         }
828                         if (!(grt->rt6i_flags&RTF_GATEWAY))
829                                 err = 0;
830                         dst_release(&grt->u.dst);
831
832                         if (err)
833                                 goto out;
834                 }
835                 err = -EINVAL;
836                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
837                         goto out;
838         }
839
840         err = -ENODEV;
841         if (dev == NULL)
842                 goto out;
843
844         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
845                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
846                 if (IS_ERR(rt->rt6i_nexthop)) {
847                         err = PTR_ERR(rt->rt6i_nexthop);
848                         rt->rt6i_nexthop = NULL;
849                         goto out;
850                 }
851         }
852
853         if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
854                 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
855         else
856                 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
857         rt->rt6i_flags = rtmsg->rtmsg_flags;
858
859 install_route:
860         rt->u.dst.pmtu = ipv6_get_mtu(dev);
861         rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
862         /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
863            MSS is 65535 - tcp_header_size. 65535 is also valid and
864            means: "any MSS, rely only on pmtu discovery"
865          */
866         if (rt->u.dst.advmss > 65535-20)
867                 rt->u.dst.advmss = 65535;
868         rt->u.dst.dev = dev;
869         return rt6_ins(rt, nlh, req);
870
871 out:
872         if (dev)
873                 dev_put(dev);
874         dst_free((struct dst_entry *) rt);
875         return err;
876 }
877
878 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
879 {
880         int err;
881
882         write_lock_bh(&rt6_lock);
883
884         spin_lock_bh(&rt6_dflt_lock);
885         rt6_dflt_pointer = NULL;
886         spin_unlock_bh(&rt6_dflt_lock);
887
888         dst_release(&rt->u.dst);
889
890         err = fib6_del(rt, nlh, req);
891         write_unlock_bh(&rt6_lock);
892
893         return err;
894 }
895
896 int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
897 {
898         struct fib6_node *fn;
899         struct rt6_info *rt;
900         int err = -ESRCH;
901
902         read_lock_bh(&rt6_lock);
903
904         fn = fib6_locate(&ip6_routing_table,
905                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
906                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
907         
908         if (fn) {
909                 for (rt = fn->leaf; rt; rt = rt->u.next) {
910                         if (rtmsg->rtmsg_ifindex &&
911                             (rt->rt6i_dev == NULL ||
912                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
913                                 continue;
914                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
915                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
916                                 continue;
917                         if (rtmsg->rtmsg_metric &&
918                             rtmsg->rtmsg_metric != rt->rt6i_metric)
919                                 continue;
920                         dst_hold(&rt->u.dst);
921                         read_unlock_bh(&rt6_lock);
922
923                         return ip6_del_rt(rt, nlh, req);
924                 }
925         }
926         read_unlock_bh(&rt6_lock);
927
928         return err;
929 }
930
931 /*
932  *      Handle redirects
933  */
934 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
935                   struct neighbour *neigh, int on_link)
936 {
937         struct rt6_info *rt, *nrt;
938
939         /* Locate old route to this destination. */
940         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
941
942         if (rt == NULL)
943                 return;
944
945         if (neigh->dev != rt->rt6i_dev)
946                 goto out;
947
948         /* Redirect received -> path was valid.
949            Look, redirects are sent only in response to data packets,
950            so that this nexthop apparently is reachable. --ANK
951          */
952         dst_confirm(&rt->u.dst);
953
954         /* Duplicate redirect: silently ignore. */
955         if (neigh == rt->u.dst.neighbour)
956                 goto out;
957
958         /* Current route is on-link; redirect is always invalid.
959            
960            Seems, previous statement is not true. It could
961            be node, which looks for us as on-link (f.e. proxy ndisc)
962            But then router serving it might decide, that we should
963            know truth 8)8) --ANK (980726).
964          */
965         if (!(rt->rt6i_flags&RTF_GATEWAY))
966                 goto out;
967
968         /*
969          *      RFC 1970 specifies that redirects should only be
970          *      accepted if they come from the nexthop to the target.
971          *      Due to the way default routers are chosen, this notion
972          *      is a bit fuzzy and one might need to check all default
973          *      routers.
974          */
975
976         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
977                 if (rt->rt6i_flags & RTF_DEFAULT) {
978                         struct rt6_info *rt1;
979
980                         read_lock(&rt6_lock);
981                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
982                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
983                                         dst_hold(&rt1->u.dst);
984                                         dst_release(&rt->u.dst);
985                                         read_unlock(&rt6_lock);
986                                         rt = rt1;
987                                         goto source_ok;
988                                 }
989                         }
990                         read_unlock(&rt6_lock);
991                 }
992                 if (net_ratelimit())
993                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
994                                "for redirect target\n");
995                 goto out;
996         }
997
998 source_ok:
999
1000         /*
1001          *      We have finally decided to accept it.
1002          */
1003
1004         nrt = ip6_rt_copy(rt);
1005         if (nrt == NULL)
1006                 goto out;
1007
1008         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1009         if (on_link)
1010                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1011
1012         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1013         nrt->rt6i_dst.plen = 128;
1014         nrt->u.dst.flags |= DST_HOST;
1015
1016         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1017         nrt->rt6i_nexthop = neigh_clone(neigh);
1018         /* Reset pmtu, it may be better */
1019         nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
1020         nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1021         if (rt->u.dst.advmss > 65535-20)
1022                 rt->u.dst.advmss = 65535;
1023         nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
1024
1025         if (rt6_ins(nrt, NULL, NULL))
1026                 goto out;
1027
1028         if (rt->rt6i_flags&RTF_CACHE) {
1029                 ip6_del_rt(rt, NULL, NULL);
1030                 return;
1031         }
1032
1033 out:
1034         dst_release(&rt->u.dst);
1035         return;
1036 }
1037
1038 /*
1039  *      Handle ICMP "packet too big" messages
1040  *      i.e. Path MTU discovery
1041  */
1042
1043 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1044                         struct net_device *dev, u32 pmtu)
1045 {
1046         struct rt6_info *rt, *nrt;
1047
1048         if (pmtu < IPV6_MIN_MTU) {
1049                 if (net_ratelimit())
1050                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1051                                pmtu);
1052                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1053                    link MTU if the node receives a Packet Too Big message
1054                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1055                  */     
1056                 pmtu = IPV6_MIN_MTU;
1057         }
1058
1059         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1060
1061         if (rt == NULL)
1062                 return;
1063
1064         if (pmtu >= rt->u.dst.pmtu)
1065                 goto out;
1066
1067         /* New mtu received -> path was valid.
1068            They are sent only in response to data packets,
1069            so that this nexthop apparently is reachable. --ANK
1070          */
1071         dst_confirm(&rt->u.dst);
1072
1073         /* Host route. If it is static, it would be better
1074            not to override it, but add new one, so that
1075            when cache entry will expire old pmtu
1076            would return automatically.
1077          */
1078         if (rt->rt6i_flags & RTF_CACHE) {
1079                 rt->u.dst.pmtu = pmtu;
1080                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1081                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1082                 goto out;
1083         }
1084
1085         /* Network route.
1086            Two cases are possible:
1087            1. It is connected route. Action: COW
1088            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1089          */
1090         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1091                 nrt = rt6_cow(rt, daddr, saddr, NULL);
1092                 if (!nrt->u.dst.error) {
1093                         nrt->u.dst.pmtu = pmtu;
1094                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1095                            happened within 5 mins, the recommended timer is 10 mins.
1096                            Here this route expiration time is set to ip6_rt_mtu_expires 
1097                            which is 10 mins. After 10 mins the decreased pmtu is expired
1098                            and detecting PMTU increase will be automatically happened.
1099                          */
1100                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1101                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1102                         dst_release(&nrt->u.dst);
1103                 }
1104         } else {
1105                 nrt = ip6_rt_copy(rt);
1106                 if (nrt == NULL)
1107                         goto out;
1108                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1109                 nrt->rt6i_dst.plen = 128;
1110                 nrt->u.dst.flags |= DST_HOST;
1111                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1112                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1113                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1114                 nrt->u.dst.pmtu = pmtu;
1115                 rt6_ins(nrt, NULL, NULL);
1116         }
1117
1118 out:
1119         dst_release(&rt->u.dst);
1120 }
1121
1122 /*
1123  *      Misc support functions
1124  */
1125
1126 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1127 {
1128         struct rt6_info *rt;
1129
1130         rt = dst_alloc(&ip6_dst_ops);
1131
1132         if (rt) {
1133                 rt->u.dst.input = ort->u.dst.input;
1134                 rt->u.dst.output = ort->u.dst.output;
1135
1136                 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1137                 rt->u.dst.dev = ort->u.dst.dev;
1138                 if (rt->u.dst.dev)
1139                         dev_hold(rt->u.dst.dev);
1140                 rt->u.dst.lastuse = jiffies;
1141                 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1142                 rt->rt6i_expires = 0;
1143
1144                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1145                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1146                 rt->rt6i_metric = 0;
1147
1148                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1149 #ifdef CONFIG_IPV6_SUBTREES
1150                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1151 #endif
1152         }
1153         return rt;
1154 }
1155
1156 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1157 {       
1158         struct rt6_info *rt;
1159         struct fib6_node *fn;
1160
1161         fn = &ip6_routing_table;
1162
1163         write_lock_bh(&rt6_lock);
1164         for (rt = fn->leaf; rt; rt=rt->u.next) {
1165                 if (dev == rt->rt6i_dev &&
1166                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1167                         break;
1168         }
1169         if (rt)
1170                 dst_hold(&rt->u.dst);
1171         write_unlock_bh(&rt6_lock);
1172         return rt;
1173 }
1174
1175 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1176                                      struct net_device *dev)
1177 {
1178         struct in6_rtmsg rtmsg;
1179
1180         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1181         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1182         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1183         rtmsg.rtmsg_metric = 1024;
1184         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1185
1186         rtmsg.rtmsg_ifindex = dev->ifindex;
1187
1188         ip6_route_add(&rtmsg, NULL, NULL);
1189         return rt6_get_dflt_router(gwaddr, dev);
1190 }
1191
1192 void rt6_purge_dflt_routers(int last_resort)
1193 {
1194         struct rt6_info *rt;
1195         u32 flags;
1196
1197         if (last_resort)
1198                 flags = RTF_ALLONLINK;
1199         else
1200                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1201
1202 restart:
1203         read_lock_bh(&rt6_lock);
1204         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1205                 if (rt->rt6i_flags & flags) {
1206                         dst_hold(&rt->u.dst);
1207
1208                         spin_lock_bh(&rt6_dflt_lock);
1209                         rt6_dflt_pointer = NULL;
1210                         spin_unlock_bh(&rt6_dflt_lock);
1211
1212                         read_unlock_bh(&rt6_lock);
1213
1214                         ip6_del_rt(rt, NULL, NULL);
1215
1216                         goto restart;
1217                 }
1218         }
1219         read_unlock_bh(&rt6_lock);
1220 }
1221
1222 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1223 {
1224         struct in6_rtmsg rtmsg;
1225         int err;
1226
1227         switch(cmd) {
1228         case SIOCADDRT:         /* Add a route */
1229         case SIOCDELRT:         /* Delete a route */
1230                 if (!capable(CAP_NET_ADMIN))
1231                         return -EPERM;
1232                 err = copy_from_user(&rtmsg, arg,
1233                                      sizeof(struct in6_rtmsg));
1234                 if (err)
1235                         return -EFAULT;
1236                         
1237                 rtnl_lock();
1238                 switch (cmd) {
1239                 case SIOCADDRT:
1240                         err = ip6_route_add(&rtmsg, NULL, NULL);
1241                         break;
1242                 case SIOCDELRT:
1243                         err = ip6_route_del(&rtmsg, NULL, NULL);
1244                         break;
1245                 default:
1246                         err = -EINVAL;
1247                 }
1248                 rtnl_unlock();
1249
1250                 return err;
1251         };
1252
1253         return -EINVAL;
1254 }
1255
1256 /*
1257  *      Drop the packet on the floor
1258  */
1259
1260 int ip6_pkt_discard(struct sk_buff *skb)
1261 {
1262         IP6_INC_STATS(Ip6OutNoRoutes);
1263         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1264         kfree_skb(skb);
1265         return 0;
1266 }
1267
1268 /*
1269  *      Add address
1270  */
1271
1272 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1273 {
1274         struct rt6_info *rt;
1275
1276         rt = dst_alloc(&ip6_dst_ops);
1277         if (rt == NULL)
1278                 return -ENOMEM;
1279
1280         rt->u.dst.flags = DST_HOST;
1281         rt->u.dst.input = ip6_input;
1282         rt->u.dst.output = ip6_output;
1283         rt->rt6i_dev = dev_get_by_name("lo");
1284         rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1285         rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1286         if (rt->u.dst.advmss > 65535-20)
1287                 rt->u.dst.advmss = 65535;
1288         rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1289         rt->u.dst.obsolete = -1;
1290
1291         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1292         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1293         if (rt->rt6i_nexthop == NULL) {
1294                 dst_free((struct dst_entry *) rt);
1295                 return -ENOMEM;
1296         }
1297
1298         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1299         rt->rt6i_dst.plen = 128;
1300         rt6_ins(rt, NULL, NULL);
1301
1302         return 0;
1303 }
1304
1305 /* Delete address. Warning: you should check that this address
1306    disappeared before calling this function.
1307  */
1308
1309 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1310 {
1311         struct rt6_info *rt;
1312         int err = -ENOENT;
1313
1314         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1315         if (rt) {
1316                 if (rt->rt6i_dst.plen == 128)
1317                         err = ip6_del_rt(rt, NULL, NULL);
1318                 else
1319                         dst_release(&rt->u.dst);
1320         }
1321
1322         return err;
1323 }
1324
1325 #ifdef CONFIG_RT6_POLICY
1326
1327 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1328 {
1329         struct flow_filter *frule;
1330         struct pkt_filter *filter;
1331         int res = 1;
1332
1333         if ((frule = rt->rt6i_filter) == NULL)
1334                 goto out;
1335
1336         if (frule->type != FLR_INPUT) {
1337                 res = 0;
1338                 goto out;
1339         }
1340
1341         for (filter = frule->u.filter; filter; filter = filter->next) {
1342                 __u32 *word;
1343
1344                 word = (__u32 *) skb->h.raw;
1345                 word += filter->offset;
1346
1347                 if ((*word ^ filter->value) & filter->mask) {
1348                         res = 0;
1349                         break;
1350                 }
1351         }
1352
1353 out:
1354         return res;
1355 }
1356
1357 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1358 {
1359         struct flow_filter *frule;
1360         int res = 1;
1361
1362         if ((frule = rt->rt6i_filter) == NULL)
1363                 goto out;
1364
1365         if (frule->type != FLR_INPUT) {
1366                 res = 0;
1367                 goto out;
1368         }
1369
1370         if (frule->u.sk != sk)
1371                 res = 0;
1372 out:
1373         return res;
1374 }
1375
1376 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1377                                         struct in6_addr *daddr,
1378                                         struct in6_addr *saddr,
1379                                         struct fl_acc_args *args)
1380 {
1381         struct flow_rule *frule;
1382         struct rt6_info *nrt = NULL;
1383         struct pol_chain *pol;
1384
1385         for (pol = rt6_pol_list; pol; pol = pol->next) {
1386                 struct fib6_node *fn;
1387                 struct rt6_info *sprt;
1388
1389                 fn = fib6_lookup(pol->rules, daddr, saddr);
1390
1391                 do {
1392                         for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1393                                 int res;
1394
1395                                 frule = sprt->rt6i_flowr;
1396 #if RT6_DEBUG >= 2
1397                                 if (frule == NULL) {
1398                                         printk(KERN_DEBUG "NULL flowr\n");
1399                                         goto error;
1400                                 }
1401 #endif
1402                                 res = frule->ops->accept(rt, sprt, args, &nrt);
1403
1404                                 switch (res) {
1405                                 case FLOWR_SELECT:
1406                                         goto found;
1407                                 case FLOWR_CLEAR:
1408                                         goto next_policy;
1409                                 case FLOWR_NODECISION:
1410                                         break;
1411                                 default:
1412                                         goto error;
1413                                 };
1414                         }
1415
1416                         fn = fn->parent;
1417
1418                 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1419
1420         next_policy:
1421         }
1422
1423 error:
1424         dst_hold(&ip6_null_entry.u.dst);
1425         return &ip6_null_entry;
1426
1427 found:
1428         if (nrt == NULL)
1429                 goto error;
1430
1431         nrt->rt6i_flags |= RTF_CACHE;
1432         dst_hold(&nrt->u.dst);
1433         err = rt6_ins(nrt, NULL, NULL);
1434         if (err)
1435                 nrt->u.dst.error = err;
1436         return nrt;
1437 }
1438 #endif
1439
1440 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1441 {
1442         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1443             rt != &ip6_null_entry) {
1444                 RT6_TRACE("deleted by ifdown %p\n", rt);
1445                 return -1;
1446         }
1447         return 0;
1448 }
1449
1450 void rt6_ifdown(struct net_device *dev)
1451 {
1452         write_lock_bh(&rt6_lock);
1453         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1454         write_unlock_bh(&rt6_lock);
1455 }
1456
1457 struct rt6_mtu_change_arg
1458 {
1459         struct net_device *dev;
1460         unsigned mtu;
1461 };
1462
1463 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1464 {
1465         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1466         struct inet6_dev *idev;
1467         /* In IPv6 pmtu discovery is not optional,
1468            so that RTAX_MTU lock cannot disable it.
1469            We still use this lock to block changes
1470            caused by addrconf/ndisc.
1471         */
1472         idev = __in6_dev_get(arg->dev);
1473         if (idev == NULL)
1474                 return 0;
1475
1476         /* For administrative MTU increase, there is no way to discover 
1477            IPv6 PMTU increase, so PMTU increase should be updated here.
1478            Since RFC 1981 doesn't include administrative MTU increase
1479            update PMTU increase is a MUST. (i.e. jumbo frame)
1480          */
1481         /*
1482            If new MTU is less than route PMTU, this new MTU will be the 
1483            lowest MTU in the path, update the route PMTU to refect PMTU 
1484            decreases; if new MTU is greater than route PMTU, and the 
1485            old MTU is the lowest MTU in the path, update the route PMTU 
1486            to refect the increase. In this case if the other nodes' MTU
1487            also have the lowest MTU, TOO BIG MESSAGE will be lead to 
1488            PMTU discouvery. 
1489          */
1490         if (rt->rt6i_dev == arg->dev &&
1491             !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
1492               (rt->u.dst.pmtu > arg->mtu ||
1493                (rt->u.dst.pmtu < arg->mtu &&
1494                 rt->u.dst.pmtu == idev->cnf.mtu6)))
1495                 rt->u.dst.pmtu = arg->mtu;
1496         rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
1497         if (rt->u.dst.advmss > 65535-20)
1498                 rt->u.dst.advmss = 65535;
1499         return 0;
1500 }
1501
1502 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1503 {
1504         struct rt6_mtu_change_arg arg;
1505
1506         arg.dev = dev;
1507         arg.mtu = mtu;
1508         read_lock_bh(&rt6_lock);
1509         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1510         read_unlock_bh(&rt6_lock);
1511 }
1512
1513 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1514                               struct in6_rtmsg *rtmsg)
1515 {
1516         memset(rtmsg, 0, sizeof(*rtmsg));
1517
1518         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1519         rtmsg->rtmsg_src_len = r->rtm_src_len;
1520         rtmsg->rtmsg_flags = RTF_UP;
1521         if (r->rtm_type == RTN_UNREACHABLE)
1522                 rtmsg->rtmsg_flags |= RTF_REJECT;
1523
1524         if (rta[RTA_GATEWAY-1]) {
1525                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1526                         return -EINVAL;
1527                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1528                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1529         }
1530         if (rta[RTA_DST-1]) {
1531                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1532                         return -EINVAL;
1533                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1534         }
1535         if (rta[RTA_SRC-1]) {
1536                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1537                         return -EINVAL;
1538                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1539         }
1540         if (rta[RTA_OIF-1]) {
1541                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1542                         return -EINVAL;
1543                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1544         }
1545         if (rta[RTA_PRIORITY-1]) {
1546                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1547                         return -EINVAL;
1548                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1549         }
1550         return 0;
1551 }
1552
1553 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1554 {
1555         struct rtmsg *r = NLMSG_DATA(nlh);
1556         struct in6_rtmsg rtmsg;
1557
1558         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1559                 return -EINVAL;
1560         return ip6_route_del(&rtmsg, nlh, &NETLINK_CB(skb));
1561 }
1562
1563 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1564 {
1565         struct rtmsg *r = NLMSG_DATA(nlh);
1566         struct in6_rtmsg rtmsg;
1567
1568         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1569                 return -EINVAL;
1570         return ip6_route_add(&rtmsg, nlh, &NETLINK_CB(skb));
1571 }
1572
1573 struct rt6_rtnl_dump_arg
1574 {
1575         struct sk_buff *skb;
1576         struct netlink_callback *cb;
1577 };
1578
1579 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1580                          struct in6_addr *dst, struct in6_addr *src,
1581                          int iif, int type, u32 pid, u32 seq, int prefix)
1582 {
1583         struct rtmsg *rtm;
1584         struct nlmsghdr  *nlh;
1585         unsigned char    *b = skb->tail;
1586         struct rta_cacheinfo ci;
1587
1588         if (prefix) {   /* user wants prefix routes only */
1589                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1590                         /* success since this is not a prefix route */
1591                         return 1;
1592                 }
1593         }
1594
1595         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1596         rtm = NLMSG_DATA(nlh);
1597         rtm->rtm_family = AF_INET6;
1598         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1599         rtm->rtm_src_len = rt->rt6i_src.plen;
1600         rtm->rtm_tos = 0;
1601         rtm->rtm_table = RT_TABLE_MAIN;
1602         if (rt->rt6i_flags&RTF_REJECT)
1603                 rtm->rtm_type = RTN_UNREACHABLE;
1604         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1605                 rtm->rtm_type = RTN_LOCAL;
1606         else
1607                 rtm->rtm_type = RTN_UNICAST;
1608         rtm->rtm_flags = 0;
1609         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1610         rtm->rtm_protocol = rt->rt6i_protocol;
1611         if (rt->rt6i_flags&RTF_DYNAMIC)
1612                 rtm->rtm_protocol = RTPROT_REDIRECT;
1613         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1614                 rtm->rtm_protocol = RTPROT_KERNEL;
1615         else if (rt->rt6i_flags&RTF_DEFAULT)
1616                 rtm->rtm_protocol = RTPROT_RA;
1617
1618         if (rt->rt6i_flags&RTF_CACHE)
1619                 rtm->rtm_flags |= RTM_F_CLONED;
1620
1621         if (dst) {
1622                 RTA_PUT(skb, RTA_DST, 16, dst);
1623                 rtm->rtm_dst_len = 128;
1624         } else if (rtm->rtm_dst_len)
1625                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1626 #ifdef CONFIG_IPV6_SUBTREES
1627         if (src) {
1628                 RTA_PUT(skb, RTA_SRC, 16, src);
1629                 rtm->rtm_src_len = 128;
1630         } else if (rtm->rtm_src_len)
1631                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1632 #endif
1633         if (iif)
1634                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1635         else if (dst) {
1636                 struct in6_addr saddr_buf;
1637                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1638                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1639         }
1640         if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1641                 goto rtattr_failure;
1642         if (rt->u.dst.neighbour)
1643                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1644         if (rt->u.dst.dev)
1645                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1646         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1647         ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1648         if (rt->rt6i_expires)
1649                 ci.rta_expires = rt->rt6i_expires - jiffies;
1650         else
1651                 ci.rta_expires = 0;
1652         ci.rta_used = rt->u.dst.__use;
1653         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1654         ci.rta_error = rt->u.dst.error;
1655         ci.rta_id = 0;
1656         ci.rta_ts = 0;
1657         ci.rta_tsage = 0;
1658         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1659         nlh->nlmsg_len = skb->tail - b;
1660         return skb->len;
1661
1662 nlmsg_failure:
1663 rtattr_failure:
1664         skb_trim(skb, b - skb->data);
1665         return -1;
1666 }
1667
1668 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1669 {
1670         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1671         int prefix;
1672
1673         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1674                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1675                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1676         } else
1677                 prefix = 0;
1678
1679         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1680                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1681                      prefix);
1682 }
1683
1684 static int fib6_dump_node(struct fib6_walker_t *w)
1685 {
1686         int res;
1687         struct rt6_info *rt;
1688
1689         for (rt = w->leaf; rt; rt = rt->u.next) {
1690                 res = rt6_dump_route(rt, w->args);
1691                 if (res < 0) {
1692                         /* Frame is full, suspend walking */
1693                         w->leaf = rt;
1694                         return 1;
1695                 }
1696                 BUG_TRAP(res!=0);
1697         }
1698         w->leaf = NULL;
1699         return 0;
1700 }
1701
1702 static void fib6_dump_end(struct netlink_callback *cb)
1703 {
1704         struct fib6_walker_t *w = (void*)cb->args[0];
1705
1706         if (w) {
1707                 cb->args[0] = 0;
1708                 fib6_walker_unlink(w);
1709                 kfree(w);
1710         }
1711         if (cb->args[1]) {
1712                 cb->done = (void*)cb->args[1];
1713                 cb->args[1] = 0;
1714         }
1715 }
1716
1717 static int fib6_dump_done(struct netlink_callback *cb)
1718 {
1719         fib6_dump_end(cb);
1720         return cb->done(cb);
1721 }
1722
1723 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1724 {
1725         struct rt6_rtnl_dump_arg arg;
1726         struct fib6_walker_t *w;
1727         int res;
1728
1729         arg.skb = skb;
1730         arg.cb = cb;
1731
1732         w = (void*)cb->args[0];
1733         if (w == NULL) {
1734                 /* New dump:
1735                  * 
1736                  * 1. hook callback destructor.
1737                  */
1738                 cb->args[1] = (long)cb->done;
1739                 cb->done = fib6_dump_done;
1740
1741                 /*
1742                  * 2. allocate and initialize walker.
1743                  */
1744                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1745                 if (w == NULL)
1746                         return -ENOMEM;
1747                 RT6_TRACE("dump<%p", w);
1748                 memset(w, 0, sizeof(*w));
1749                 w->root = &ip6_routing_table;
1750                 w->func = fib6_dump_node;
1751                 w->args = &arg;
1752                 cb->args[0] = (long)w;
1753                 read_lock_bh(&rt6_lock);
1754                 res = fib6_walk(w);
1755                 read_unlock_bh(&rt6_lock);
1756         } else {
1757                 w->args = &arg;
1758                 read_lock_bh(&rt6_lock);
1759                 res = fib6_walk_continue(w);
1760                 read_unlock_bh(&rt6_lock);
1761         }
1762 #if RT6_DEBUG >= 3
1763         if (res <= 0 && skb->len == 0)
1764                 RT6_TRACE("%p>dump end\n", w);
1765 #endif
1766         res = res < 0 ? res : skb->len;
1767         /* res < 0 is an error. (really, impossible)
1768            res == 0 means that dump is complete, but skb still can contain data.
1769            res > 0 dump is not complete, but frame is full.
1770          */
1771         /* Destroy walker, if dump of this table is complete. */
1772         if (res <= 0)
1773                 fib6_dump_end(cb);
1774         return res;
1775 }
1776
1777 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1778 {
1779         struct rtattr **rta = arg;
1780         int iif = 0;
1781         int err = -ENOBUFS;
1782         struct sk_buff *skb;
1783         struct flowi fl;
1784         struct rt6_info *rt;
1785
1786         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1787         if (skb == NULL)
1788                 goto out;
1789
1790         /* Reserve room for dummy headers, this skb can pass
1791            through good chunk of routing engine.
1792          */
1793         skb->mac.raw = skb->data;
1794         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1795
1796         fl.proto = 0;
1797         fl.nl_u.ip6_u.daddr = NULL;
1798         fl.nl_u.ip6_u.saddr = NULL;
1799         fl.uli_u.icmpt.type = 0;
1800         fl.uli_u.icmpt.code = 0;
1801         if (rta[RTA_SRC-1])
1802                 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1803         if (rta[RTA_DST-1])
1804                 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1805
1806         if (rta[RTA_IIF-1])
1807                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1808
1809         if (iif) {
1810                 struct net_device *dev;
1811                 dev = __dev_get_by_index(iif);
1812                 if (!dev) {
1813                         err = -ENODEV;
1814                         goto out_free;
1815                 }
1816         }
1817
1818         fl.oif = 0;
1819         if (rta[RTA_OIF-1])
1820                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1821
1822         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1823
1824         skb->dst = &rt->u.dst;
1825
1826         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1827         err = rt6_fill_node(skb, rt, 
1828                             fl.nl_u.ip6_u.daddr,
1829                             fl.nl_u.ip6_u.saddr,
1830                             iif,
1831                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1832                             nlh->nlmsg_seq, 0);
1833         if (err < 0) {
1834                 err = -EMSGSIZE;
1835                 goto out_free;
1836         }
1837
1838         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1839         if (err > 0)
1840                 err = 0;
1841 out:
1842         return err;
1843 out_free:
1844         kfree_skb(skb);
1845         goto out;       
1846 }
1847
1848 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1849                       struct netlink_skb_parms *req)
1850 {
1851         struct sk_buff *skb;
1852         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1853         u32 pid = current->pid;
1854         u32 seq = 0;
1855
1856         if (req)
1857                 pid = req->pid;
1858         if (nlh)
1859                 seq = nlh->nlmsg_seq;
1860
1861         skb = alloc_skb(size, gfp_any());
1862         if (!skb) {
1863                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1864                 return;
1865         }
1866         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0) < 0) {
1867                 kfree_skb(skb);
1868                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1869                 return;
1870         }
1871         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1872         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1873 }
1874
1875 /*
1876  *      /proc
1877  */
1878
1879 #ifdef CONFIG_PROC_FS
1880
1881 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1882
1883 struct rt6_proc_arg
1884 {
1885         char *buffer;
1886         int offset;
1887         int length;
1888         int skip;
1889         int len;
1890 };
1891
1892 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1893 {
1894         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1895         int i;
1896
1897         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1898                 arg->skip++;
1899                 return 0;
1900         }
1901
1902         if (arg->len >= arg->length)
1903                 return 0;
1904
1905         for (i=0; i<16; i++) {
1906                 sprintf(arg->buffer + arg->len, "%02x",
1907                         rt->rt6i_dst.addr.s6_addr[i]);
1908                 arg->len += 2;
1909         }
1910         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1911                             rt->rt6i_dst.plen);
1912
1913 #ifdef CONFIG_IPV6_SUBTREES
1914         for (i=0; i<16; i++) {
1915                 sprintf(arg->buffer + arg->len, "%02x",
1916                         rt->rt6i_src.addr.s6_addr[i]);
1917                 arg->len += 2;
1918         }
1919         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1920                             rt->rt6i_src.plen);
1921 #else
1922         sprintf(arg->buffer + arg->len,
1923                 "00000000000000000000000000000000 00 ");
1924         arg->len += 36;
1925 #endif
1926
1927         if (rt->rt6i_nexthop) {
1928                 for (i=0; i<16; i++) {
1929                         sprintf(arg->buffer + arg->len, "%02x",
1930                                 rt->rt6i_nexthop->primary_key[i]);
1931                         arg->len += 2;
1932                 }
1933         } else {
1934                 sprintf(arg->buffer + arg->len,
1935                         "00000000000000000000000000000000");
1936                 arg->len += 32;
1937         }
1938         arg->len += sprintf(arg->buffer + arg->len,
1939                             " %08x %08x %08x %08x %8s\n",
1940                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1941                             rt->u.dst.__use, rt->rt6i_flags, 
1942                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1943         return 0;
1944 }
1945
1946 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1947 {
1948         struct rt6_proc_arg arg;
1949         arg.buffer = buffer;
1950         arg.offset = offset;
1951         arg.length = length;
1952         arg.skip = 0;
1953         arg.len = 0;
1954
1955         read_lock_bh(&rt6_lock);
1956         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1957         read_unlock_bh(&rt6_lock);
1958
1959         *start = buffer;
1960         if (offset)
1961                 *start += offset % RT6_INFO_LEN;
1962
1963         arg.len -= offset % RT6_INFO_LEN;
1964
1965         if (arg.len > length)
1966                 arg.len = length;
1967         if (arg.len < 0)
1968                 arg.len = 0;
1969
1970         return arg.len;
1971 }
1972
1973 extern struct rt6_statistics rt6_stats;
1974
1975 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1976 {
1977         int len;
1978
1979         len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1980                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1981                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1982                       rt6_stats.fib_rt_cache,
1983                       atomic_read(&ip6_dst_ops.entries));
1984
1985         len -= offset;
1986
1987         if (len > length)
1988                 len = length;
1989         if(len < 0)
1990                 len = 0;
1991
1992         *start = buffer + offset;
1993
1994         return len;
1995 }
1996 #endif  /* CONFIG_PROC_FS */
1997
1998 #ifdef CONFIG_SYSCTL
1999
2000 static int flush_delay;
2001
2002 static
2003 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2004                               void *buffer, size_t *lenp)
2005 {
2006         if (write) {
2007                 proc_dointvec(ctl, write, filp, buffer, lenp);
2008                 if (flush_delay < 0)
2009                         flush_delay = 0;
2010                 fib6_run_gc((unsigned long)flush_delay);
2011                 return 0;
2012         } else
2013                 return -EINVAL;
2014 }
2015
2016 ctl_table ipv6_route_table[] = {
2017         {NET_IPV6_ROUTE_FLUSH, "flush",
2018          &flush_delay, sizeof(int), 0644, NULL,
2019          &ipv6_sysctl_rtcache_flush},
2020         {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
2021          &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
2022          &proc_dointvec},
2023         {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
2024          &ip6_rt_max_size, sizeof(int), 0644, NULL,
2025          &proc_dointvec},
2026         {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
2027          &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
2028          &proc_dointvec_jiffies, &sysctl_jiffies},
2029         {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
2030          &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
2031          &proc_dointvec_jiffies, &sysctl_jiffies},
2032         {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
2033          &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
2034          &proc_dointvec_jiffies, &sysctl_jiffies},
2035         {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
2036          &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
2037          &proc_dointvec_jiffies, &sysctl_jiffies},
2038         {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
2039          &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
2040          &proc_dointvec_jiffies, &sysctl_jiffies},
2041         {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
2042          &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
2043          &proc_dointvec_jiffies, &sysctl_jiffies},
2044          {0}
2045 };
2046
2047 #endif
2048
2049
2050 void __init ip6_route_init(void)
2051 {
2052         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053                                                      sizeof(struct rt6_info),
2054                                                      0, SLAB_HWCACHE_ALIGN,
2055                                                      NULL, NULL);
2056         fib6_init();
2057 #ifdef  CONFIG_PROC_FS
2058         proc_net_create("ipv6_route", 0, rt6_proc_info);
2059         proc_net_create("rt6_stats", 0, rt6_proc_stats);
2060 #endif
2061 }
2062
2063 #ifdef MODULE
2064 void ip6_route_cleanup(void)
2065 {
2066 #ifdef CONFIG_PROC_FS
2067         proc_net_remove("ipv6_route");
2068         proc_net_remove("rt6_stats");
2069 #endif
2070
2071         rt6_ifdown(NULL);
2072         fib6_gc_cleanup();
2073 }
2074 #endif  /* MODULE */