2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.1.1.1 2005/04/11 02:51:13 jack Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/config.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/net.h>
22 #include <linux/route.h>
23 #include <linux/netdevice.h>
24 #include <linux/in6.h>
25 #include <linux/init.h>
26 #include <linux/netlink.h>
27 #include <linux/if_arp.h>
30 #include <linux/proc_fs.h>
35 #include <net/ip6_fib.h>
36 #include <net/ip6_route.h>
37 #include <net/ndisc.h>
38 #include <net/addrconf.h>
40 #include <linux/rtnetlink.h>
42 #include <asm/uaccess.h>
45 #include <linux/sysctl.h>
48 #undef CONFIG_RT6_POLICY
50 /* Set to 3 to get tracing. */
54 #define RDBG(x) printk x
55 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
58 #define RT6_TRACE(x...) do { ; } while (0)
62 int ip6_rt_max_size = 4096;
63 int ip6_rt_gc_min_interval = 5*HZ;
64 int ip6_rt_gc_timeout = 60*HZ;
65 int ip6_rt_gc_interval = 30*HZ;
66 int ip6_rt_gc_elasticity = 9;
67 int ip6_rt_mtu_expires = 10*60*HZ;
68 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
70 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
71 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
72 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
74 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
75 static int ip6_dst_gc(void);
77 static int ip6_pkt_discard(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
80 struct dst_ops ip6_dst_ops = {
82 __constant_htons(ETH_P_IPV6),
91 sizeof(struct rt6_info),
94 struct rt6_info ip6_null_entry = {
95 {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
96 -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 -ENETUNREACH, NULL, NULL,
98 ip6_pkt_discard, ip6_pkt_discard,
99 #ifdef CONFIG_NET_CLS_ROUTE
103 NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
104 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
107 struct fib6_node ip6_routing_table = {
108 NULL, NULL, NULL, NULL,
110 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
113 #ifdef CONFIG_RT6_POLICY
114 int ip6_rt_policy = 0;
116 struct pol_chain *rt6_pol_list = NULL;
119 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
120 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
122 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
123 struct in6_addr *daddr,
124 struct in6_addr *saddr,
125 struct fl_acc_args *args);
128 #define ip6_rt_policy (0)
131 /* Protects all the ip6 fib */
133 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
137 * Route lookup. Any rt6_lock is implied.
140 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
144 struct rt6_info *local = NULL;
145 struct rt6_info *sprt;
148 for (sprt = rt; sprt; sprt = sprt->u.next) {
149 struct net_device *dev = sprt->rt6i_dev;
150 if (dev->ifindex == oif)
152 if (dev->flags&IFF_LOOPBACK)
160 return &ip6_null_entry;
166 * pointer to the last default router chosen. BH is disabled locally.
168 static struct rt6_info *rt6_dflt_pointer = NULL;
169 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
171 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
173 struct rt6_info *match = NULL;
174 struct rt6_info *sprt;
177 for (sprt = rt; sprt; sprt = sprt->u.next) {
178 struct neighbour *neigh;
180 if ((neigh = sprt->rt6i_nexthop) != NULL) {
183 switch (neigh->nud_state) {
185 if (sprt != rt6_dflt_pointer) {
201 if (oif && sprt->rt6i_dev->ifindex == oif) {
216 * No default routers are known to be reachable.
219 spin_lock(&rt6_dflt_lock);
220 if (rt6_dflt_pointer) {
221 struct rt6_info *next;
223 if ((next = rt6_dflt_pointer->u.next) != NULL &&
224 next->u.dst.obsolete <= 0 &&
225 next->u.dst.error == 0)
228 spin_unlock(&rt6_dflt_lock);
232 spin_lock(&rt6_dflt_lock);
233 rt6_dflt_pointer = rt;
234 spin_unlock(&rt6_dflt_lock);
238 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
241 struct fib6_node *fn;
244 read_lock_bh(&rt6_lock);
245 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
246 rt = rt6_device_match(fn->leaf, oif, strict);
247 dst_hold(&rt->u.dst);
249 read_unlock_bh(&rt6_lock);
251 rt->u.dst.lastuse = jiffies;
252 if (rt->u.dst.error == 0)
254 dst_release(&rt->u.dst);
258 /* rt6_ins is called with FREE rt6_lock.
259 It takes new route entry, the addition fails by any reason the
260 route is freed. In any case, if caller does not hold it, it may
264 static int rt6_ins(struct rt6_info *rt)
268 write_lock_bh(&rt6_lock);
269 err = fib6_add(&ip6_routing_table, rt);
270 write_unlock_bh(&rt6_lock);
275 /* No rt6_lock! If COW faild, the function returns dead route entry
276 with dst->error set to errno value.
279 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
280 struct in6_addr *saddr)
289 rt = ip6_rt_copy(ort);
292 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
294 if (!(rt->rt6i_flags&RTF_GATEWAY))
295 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
297 rt->rt6i_dst.plen = 128;
298 rt->rt6i_flags |= RTF_CACHE;
299 rt->u.dst.flags |= DST_HOST;
301 #ifdef CONFIG_IPV6_SUBTREES
302 if (rt->rt6i_src.plen && saddr) {
303 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
304 rt->rt6i_src.plen = 128;
308 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
310 dst_clone(&rt->u.dst);
316 rt->u.dst.error = err;
320 dst_clone(&ip6_null_entry.u.dst);
321 return &ip6_null_entry;
324 #ifdef CONFIG_RT6_POLICY
325 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
328 struct in6_addr *daddr, *saddr;
329 struct fl_acc_args arg;
331 arg.type = FL_ARG_FORWARD;
334 saddr = &skb->nh.ipv6h->saddr;
335 daddr = &skb->nh.ipv6h->daddr;
337 return rt6_flow_lookup(rt, daddr, saddr, &arg);
340 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
344 struct fl_acc_args arg;
346 arg.type = FL_ARG_ORIGIN;
347 arg.fl_u.fl_o.sk = sk;
348 arg.fl_u.fl_o.flow = fl;
350 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
356 #define BACKTRACK() \
357 if (rt == &ip6_null_entry && strict) { \
358 while ((fn = fn->parent) != NULL) { \
359 if (fn->fn_flags & RTN_ROOT) { \
360 dst_clone(&rt->u.dst); \
363 if (fn->fn_flags & RTN_RTINFO) \
369 void ip6_route_input(struct sk_buff *skb)
371 struct fib6_node *fn;
376 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
379 read_lock_bh(&rt6_lock);
381 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
382 &skb->nh.ipv6h->saddr);
387 if ((rt->rt6i_flags & RTF_CACHE)) {
388 if (ip6_rt_policy == 0) {
389 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
391 dst_clone(&rt->u.dst);
395 #ifdef CONFIG_RT6_POLICY
396 if ((rt->rt6i_flags & RTF_FLOW)) {
397 struct rt6_info *sprt;
399 for (sprt = rt; sprt; sprt = sprt->u.next) {
400 if (rt6_flow_match_in(sprt, skb)) {
402 dst_clone(&rt->u.dst);
410 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
413 if (ip6_rt_policy == 0) {
414 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
415 read_unlock_bh(&rt6_lock);
417 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
418 &skb->nh.ipv6h->saddr);
420 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
422 /* Race condition! In the gap, when rt6_lock was
423 released someone could insert this route. Relookup.
427 dst_clone(&rt->u.dst);
429 #ifdef CONFIG_RT6_POLICY
430 rt = rt6_flow_lookup_in(rt, skb);
437 read_unlock_bh(&rt6_lock);
439 rt->u.dst.lastuse = jiffies;
441 skb->dst = (struct dst_entry *) rt;
444 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
446 struct fib6_node *fn;
451 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
454 read_lock_bh(&rt6_lock);
456 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
457 fl->nl_u.ip6_u.saddr);
462 if ((rt->rt6i_flags & RTF_CACHE)) {
463 if (ip6_rt_policy == 0) {
464 rt = rt6_device_match(rt, fl->oif, strict);
466 dst_clone(&rt->u.dst);
470 #ifdef CONFIG_RT6_POLICY
471 if ((rt->rt6i_flags & RTF_FLOW)) {
472 struct rt6_info *sprt;
474 for (sprt = rt; sprt; sprt = sprt->u.next) {
475 if (rt6_flow_match_out(sprt, sk)) {
477 dst_clone(&rt->u.dst);
484 if (rt->rt6i_flags & RTF_DEFAULT) {
485 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
486 rt = rt6_best_dflt(rt, fl->oif);
488 rt = rt6_device_match(rt, fl->oif, strict);
492 if (ip6_rt_policy == 0) {
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 read_unlock_bh(&rt6_lock);
496 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
497 fl->nl_u.ip6_u.saddr);
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
507 dst_clone(&rt->u.dst);
509 #ifdef CONFIG_RT6_POLICY
510 rt = rt6_flow_lookup_out(rt, sk, fl);
517 read_unlock_bh(&rt6_lock);
519 rt->u.dst.lastuse = jiffies;
526 * Destination cache support functions
529 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
533 rt = (struct rt6_info *) dst;
535 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
542 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
547 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
548 __builtin_return_address(0)));
552 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
554 struct rt6_info *rt = (struct rt6_info *) dst;
557 if (rt->rt6i_flags & RTF_CACHE)
565 static void ip6_link_failure(struct sk_buff *skb)
569 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
571 rt = (struct rt6_info *) skb->dst;
573 if (rt->rt6i_flags&RTF_CACHE) {
574 dst_set_expires(&rt->u.dst, 0);
575 rt->rt6i_flags |= RTF_EXPIRES;
576 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
577 rt->rt6i_node->fn_sernum = -1;
581 static int ip6_dst_gc()
583 static unsigned expire = 30*HZ;
584 static unsigned long last_gc;
585 unsigned long now = jiffies;
587 if ((long)(now - last_gc) < ip6_rt_gc_min_interval &&
588 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
594 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
595 expire = ip6_rt_gc_timeout>>1;
598 expire -= expire>>ip6_rt_gc_elasticity;
599 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
602 /* Clean host part of a prefix. Not necessary in radix tree,
603 but results in cleaner routing tables.
605 Remove it only when all the things will work!
608 static void ipv6_wash_prefix(struct in6_addr *pfx, int plen)
611 int o = (plen + 7)>>3;
614 memset(pfx->s6_addr + o, 0, 16 - o);
616 pfx->s6_addr[plen>>3] &= (0xFF<<(8-b));
619 static int ipv6_get_mtu(struct net_device *dev)
621 int mtu = IPV6_MIN_MTU;
622 struct inet6_dev *idev;
624 idev = in6_dev_get(dev);
626 mtu = idev->cnf.mtu6;
632 static int ipv6_get_hoplimit(struct net_device *dev)
634 int hoplimit = ipv6_devconf.hop_limit;
635 struct inet6_dev *idev;
637 idev = in6_dev_get(dev);
639 hoplimit = idev->cnf.hop_limit;
649 int ip6_route_add(struct in6_rtmsg *rtmsg)
653 struct net_device *dev = NULL;
656 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
658 #ifndef CONFIG_IPV6_SUBTREES
659 if (rtmsg->rtmsg_src_len)
662 if (rtmsg->rtmsg_metric == 0)
663 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
665 rt = dst_alloc(&ip6_dst_ops);
670 rt->u.dst.obsolete = -1;
671 rt->rt6i_expires = rtmsg->rtmsg_info;
673 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
675 if (addr_type & IPV6_ADDR_MULTICAST)
676 rt->u.dst.input = ip6_mc_input;
678 rt->u.dst.input = ip6_forward;
680 rt->u.dst.output = ip6_output;
682 if (rtmsg->rtmsg_ifindex) {
683 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
689 ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst);
690 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
691 if (rt->rt6i_dst.plen == 128)
692 rt->u.dst.flags = DST_HOST;
693 ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen);
695 #ifdef CONFIG_IPV6_SUBTREES
696 ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src);
697 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
698 ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen);
701 rt->rt6i_metric = rtmsg->rtmsg_metric;
703 /* We cannot add true routes via loopback here,
704 they would result in kernel looping; promote them to reject routes
706 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
707 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
712 rt->u.dst.output = ip6_pkt_discard;
713 rt->u.dst.input = ip6_pkt_discard;
714 rt->u.dst.error = -ENETUNREACH;
715 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
719 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
720 struct in6_addr *gw_addr;
723 gw_addr = &rtmsg->rtmsg_gateway;
724 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
725 gwa_type = ipv6_addr_type(gw_addr);
727 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
728 struct rt6_info *grt;
730 /* IPv6 strictly inhibits using not link-local
731 addresses as nexthop address.
732 Otherwise, router will not able to send redirects.
733 It is very good, but in some (rare!) curcumstances
734 (SIT, PtP, NBMA NOARP links) it is handy to allow
735 some exceptions. --ANK
738 if (!(gwa_type&IPV6_ADDR_UNICAST))
741 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
747 if (dev != grt->rt6i_dev) {
748 dst_release(&grt->u.dst);
755 if (!(grt->rt6i_flags&RTF_GATEWAY))
757 dst_release(&grt->u.dst);
763 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
771 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
772 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
773 if (IS_ERR(rt->rt6i_nexthop)) {
774 err = PTR_ERR(rt->rt6i_nexthop);
775 rt->rt6i_nexthop = NULL;
780 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
781 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
783 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
784 rt->rt6i_flags = rtmsg->rtmsg_flags;
787 rt->u.dst.pmtu = ipv6_get_mtu(dev);
788 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
789 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
790 MSS is 65535 - tcp_header_size. 65535 is also valid and
791 means: "any MSS, rely only on pmtu discovery"
793 if (rt->u.dst.advmss > 65535-20)
794 rt->u.dst.advmss = 65535;
801 dst_free((struct dst_entry *) rt);
805 int ip6_del_rt(struct rt6_info *rt)
809 write_lock_bh(&rt6_lock);
811 spin_lock_bh(&rt6_dflt_lock);
812 rt6_dflt_pointer = NULL;
813 spin_unlock_bh(&rt6_dflt_lock);
815 dst_release(&rt->u.dst);
818 write_unlock_bh(&rt6_lock);
823 int ip6_route_del(struct in6_rtmsg *rtmsg)
825 struct fib6_node *fn;
829 read_lock_bh(&rt6_lock);
831 fn = fib6_locate(&ip6_routing_table,
832 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
833 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
836 for (rt = fn->leaf; rt; rt = rt->u.next) {
837 if (rtmsg->rtmsg_ifindex &&
838 (rt->rt6i_dev == NULL ||
839 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
841 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
842 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
844 if (rtmsg->rtmsg_metric &&
845 rtmsg->rtmsg_metric != rt->rt6i_metric)
847 dst_clone(&rt->u.dst);
848 read_unlock_bh(&rt6_lock);
850 return ip6_del_rt(rt);
853 read_unlock_bh(&rt6_lock);
861 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
862 struct neighbour *neigh, int on_link)
864 struct rt6_info *rt, *nrt;
866 /* Locate old route to this destination. */
867 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
872 if (neigh->dev != rt->rt6i_dev)
875 /* Redirect received -> path was valid.
876 Look, redirects are sent only in response to data packets,
877 so that this nexthop apparently is reachable. --ANK
879 dst_confirm(&rt->u.dst);
881 /* Duplicate redirect: silently ignore. */
882 if (neigh == rt->u.dst.neighbour)
885 /* Current route is on-link; redirect is always invalid.
887 Seems, previous statement is not true. It could
888 be node, which looks for us as on-link (f.e. proxy ndisc)
889 But then router serving it might decide, that we should
890 know truth 8)8) --ANK (980726).
892 if (!(rt->rt6i_flags&RTF_GATEWAY))
896 * RFC 1970 specifies that redirects should only be
897 * accepted if they come from the nexthop to the target.
898 * Due to the way default routers are chosen, this notion
899 * is a bit fuzzy and one might need to check all default
903 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
904 if (rt->rt6i_flags & RTF_DEFAULT) {
905 struct rt6_info *rt1;
907 read_lock(&rt6_lock);
908 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
909 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
910 dst_clone(&rt1->u.dst);
911 dst_release(&rt->u.dst);
912 read_unlock(&rt6_lock);
917 read_unlock(&rt6_lock);
920 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
921 "for redirect target\n");
928 * We have finally decided to accept it.
931 nrt = ip6_rt_copy(rt);
935 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
937 nrt->rt6i_flags &= ~RTF_GATEWAY;
939 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
940 nrt->rt6i_dst.plen = 128;
941 nrt->u.dst.flags |= DST_HOST;
943 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
944 nrt->rt6i_nexthop = neigh_clone(neigh);
945 /* Reset pmtu, it may be better */
946 nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
947 nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
948 if (rt->u.dst.advmss > 65535-20)
949 rt->u.dst.advmss = 65535;
950 nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
955 if (rt->rt6i_flags&RTF_CACHE) {
961 dst_release(&rt->u.dst);
966 * Handle ICMP "packet too big" messages
967 * i.e. Path MTU discovery
970 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
971 struct net_device *dev, u32 pmtu)
973 struct rt6_info *rt, *nrt;
975 if (pmtu < IPV6_MIN_MTU) {
977 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
979 /* According to RFC1981, the PMTU is set to the IPv6 minimum
980 link MTU if the node receives a Packet Too Big message
981 reporting next-hop MTU that is less than the IPv6 minimum MTU.
986 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
991 if (pmtu >= rt->u.dst.pmtu)
994 /* New mtu received -> path was valid.
995 They are sent only in response to data packets,
996 so that this nexthop apparently is reachable. --ANK
998 dst_confirm(&rt->u.dst);
1000 /* Host route. If it is static, it would be better
1001 not to override it, but add new one, so that
1002 when cache entry will expire old pmtu
1003 would return automatically.
1005 if (rt->rt6i_flags & RTF_CACHE) {
1006 rt->u.dst.pmtu = pmtu;
1007 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1008 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1013 Two cases are possible:
1014 1. It is connected route. Action: COW
1015 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1017 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1018 nrt = rt6_cow(rt, daddr, saddr);
1019 if (!nrt->u.dst.error) {
1020 nrt->u.dst.pmtu = pmtu;
1021 /* According to RFC 1981, detecting PMTU increase shouldn't be
1022 happened within 5 mins, the recommended timer is 10 mins.
1023 Here this route expiration time is set to ip6_rt_mtu_expires
1024 which is 10 mins. After 10 mins the decreased pmtu is expired
1025 and detecting PMTU increase will be automatically happened.
1027 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1028 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1029 dst_release(&nrt->u.dst);
1032 nrt = ip6_rt_copy(rt);
1035 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1036 nrt->rt6i_dst.plen = 128;
1037 nrt->u.dst.flags |= DST_HOST;
1038 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1039 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1040 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1041 nrt->u.dst.pmtu = pmtu;
1046 dst_release(&rt->u.dst);
1050 * Misc support functions
1053 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1055 struct rt6_info *rt;
1057 rt = dst_alloc(&ip6_dst_ops);
1060 rt->u.dst.input = ort->u.dst.input;
1061 rt->u.dst.output = ort->u.dst.output;
1063 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1064 rt->u.dst.dev = ort->u.dst.dev;
1066 dev_hold(rt->u.dst.dev);
1067 rt->u.dst.lastuse = jiffies;
1068 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1069 rt->rt6i_expires = 0;
1071 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1072 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1073 rt->rt6i_metric = 0;
1075 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1076 #ifdef CONFIG_IPV6_SUBTREES
1077 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1083 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1085 struct rt6_info *rt;
1086 struct fib6_node *fn;
1088 fn = &ip6_routing_table;
1090 write_lock_bh(&rt6_lock);
1091 for (rt = fn->leaf; rt; rt=rt->u.next) {
1092 if (dev == rt->rt6i_dev &&
1093 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1097 dst_clone(&rt->u.dst);
1098 write_unlock_bh(&rt6_lock);
1102 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1103 struct net_device *dev)
1105 struct in6_rtmsg rtmsg;
1107 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1108 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1109 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1110 rtmsg.rtmsg_metric = 1024;
1111 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1113 rtmsg.rtmsg_ifindex = dev->ifindex;
1115 ip6_route_add(&rtmsg);
1116 return rt6_get_dflt_router(gwaddr, dev);
1119 void rt6_purge_dflt_routers(int last_resort)
1121 struct rt6_info *rt;
1125 flags = RTF_ALLONLINK;
1127 flags = RTF_DEFAULT | RTF_ADDRCONF;
1130 read_lock_bh(&rt6_lock);
1131 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1132 if (rt->rt6i_flags & flags) {
1133 dst_hold(&rt->u.dst);
1135 spin_lock_bh(&rt6_dflt_lock);
1136 rt6_dflt_pointer = NULL;
1137 spin_unlock_bh(&rt6_dflt_lock);
1139 read_unlock_bh(&rt6_lock);
1146 read_unlock_bh(&rt6_lock);
1149 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1151 struct in6_rtmsg rtmsg;
1155 case SIOCADDRT: /* Add a route */
1156 case SIOCDELRT: /* Delete a route */
1157 if (!capable(CAP_NET_ADMIN))
1159 err = copy_from_user(&rtmsg, arg,
1160 sizeof(struct in6_rtmsg));
1167 err = ip6_route_add(&rtmsg);
1170 err = ip6_route_del(&rtmsg);
1184 * Drop the packet on the floor
1187 int ip6_pkt_discard(struct sk_buff *skb)
1189 IP6_INC_STATS(Ip6OutNoRoutes);
1190 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
1199 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1201 struct rt6_info *rt;
1203 rt = dst_alloc(&ip6_dst_ops);
1207 rt->u.dst.flags = DST_HOST;
1208 rt->u.dst.input = ip6_input;
1209 rt->u.dst.output = ip6_output;
1210 rt->rt6i_dev = dev_get_by_name("lo");
1211 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1212 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1213 if (rt->u.dst.advmss > 65535-20)
1214 rt->u.dst.advmss = 65535;
1215 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1216 rt->u.dst.obsolete = -1;
1218 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1219 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1220 if (rt->rt6i_nexthop == NULL) {
1221 dst_free((struct dst_entry *) rt);
1225 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1226 rt->rt6i_dst.plen = 128;
1232 /* Delete address. Warning: you should check that this address
1233 disappeared before calling this function.
1236 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1238 struct rt6_info *rt;
1241 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1243 if (rt->rt6i_dst.plen == 128)
1244 err = ip6_del_rt(rt);
1246 dst_release(&rt->u.dst);
1252 #ifdef CONFIG_RT6_POLICY
1254 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1256 struct flow_filter *frule;
1257 struct pkt_filter *filter;
1260 if ((frule = rt->rt6i_filter) == NULL)
1263 if (frule->type != FLR_INPUT) {
1268 for (filter = frule->u.filter; filter; filter = filter->next) {
1271 word = (__u32 *) skb->h.raw;
1272 word += filter->offset;
1274 if ((*word ^ filter->value) & filter->mask) {
1284 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1286 struct flow_filter *frule;
1289 if ((frule = rt->rt6i_filter) == NULL)
1292 if (frule->type != FLR_INPUT) {
1297 if (frule->u.sk != sk)
1303 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1304 struct in6_addr *daddr,
1305 struct in6_addr *saddr,
1306 struct fl_acc_args *args)
1308 struct flow_rule *frule;
1309 struct rt6_info *nrt = NULL;
1310 struct pol_chain *pol;
1312 for (pol = rt6_pol_list; pol; pol = pol->next) {
1313 struct fib6_node *fn;
1314 struct rt6_info *sprt;
1316 fn = fib6_lookup(pol->rules, daddr, saddr);
1319 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1322 frule = sprt->rt6i_flowr;
1324 if (frule == NULL) {
1325 printk(KERN_DEBUG "NULL flowr\n");
1329 res = frule->ops->accept(rt, sprt, args, &nrt);
1336 case FLOWR_NODECISION:
1345 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1351 dst_clone(&ip6_null_entry.u.dst);
1352 return &ip6_null_entry;
1358 nrt->rt6i_flags |= RTF_CACHE;
1359 dst_clone(&nrt->u.dst);
1362 nrt->u.dst.error = err;
1367 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1369 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1370 rt != &ip6_null_entry) {
1371 RT6_TRACE("deleted by ifdown %p\n", rt);
1377 void rt6_ifdown(struct net_device *dev)
1379 write_lock_bh(&rt6_lock);
1380 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1381 write_unlock_bh(&rt6_lock);
1384 struct rt6_mtu_change_arg
1386 struct net_device *dev;
1390 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1392 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1393 struct inet6_dev *idev;
1394 /* In IPv6 pmtu discovery is not optional,
1395 so that RTAX_MTU lock cannot disable it.
1396 We still use this lock to block changes
1397 caused by addrconf/ndisc.
1399 idev = __in6_dev_get(arg->dev);
1403 /* For administrative MTU increase, there is no way to discover
1404 IPv6 PMTU increase, so PMTU increase should be updated here.
1405 Since RFC 1981 doesn't include administrative MTU increase
1406 update PMTU increase is a MUST. (i.e. jumbo frame)
1409 If new MTU is less than route PMTU, this new MTU will be the
1410 lowest MTU in the path, update the route PMTU to refect PMTU
1411 decreases; if new MTU is greater than route PMTU, and the
1412 old MTU is the lowest MTU in the path, update the route PMTU
1413 to refect the increase. In this case if the other nodes' MTU
1414 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1417 if (rt->rt6i_dev == arg->dev &&
1418 !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
1419 (rt->u.dst.pmtu > arg->mtu ||
1420 (rt->u.dst.pmtu < arg->mtu &&
1421 rt->u.dst.pmtu == idev->cnf.mtu6)))
1422 rt->u.dst.pmtu = arg->mtu;
1423 rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
1424 if (rt->u.dst.advmss > 65535-20)
1425 rt->u.dst.advmss = 65535;
1429 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1431 struct rt6_mtu_change_arg arg;
1435 read_lock_bh(&rt6_lock);
1436 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1437 read_unlock_bh(&rt6_lock);
1440 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1441 struct in6_rtmsg *rtmsg)
1443 memset(rtmsg, 0, sizeof(*rtmsg));
1445 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1446 rtmsg->rtmsg_src_len = r->rtm_src_len;
1447 rtmsg->rtmsg_flags = RTF_UP;
1448 if (r->rtm_type == RTN_UNREACHABLE)
1449 rtmsg->rtmsg_flags |= RTF_REJECT;
1451 if (rta[RTA_GATEWAY-1]) {
1452 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1454 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1455 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1457 if (rta[RTA_DST-1]) {
1458 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1460 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1462 if (rta[RTA_SRC-1]) {
1463 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1465 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1467 if (rta[RTA_OIF-1]) {
1468 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1470 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1472 if (rta[RTA_PRIORITY-1]) {
1473 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1475 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1480 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1482 struct rtmsg *r = NLMSG_DATA(nlh);
1483 struct in6_rtmsg rtmsg;
1485 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1487 return ip6_route_del(&rtmsg);
1490 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1492 struct rtmsg *r = NLMSG_DATA(nlh);
1493 struct in6_rtmsg rtmsg;
1495 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1497 return ip6_route_add(&rtmsg);
1500 struct rt6_rtnl_dump_arg
1502 struct sk_buff *skb;
1503 struct netlink_callback *cb;
1506 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1507 struct in6_addr *dst,
1508 struct in6_addr *src,
1510 int type, u32 pid, u32 seq)
1513 struct nlmsghdr *nlh;
1514 unsigned char *b = skb->tail;
1515 struct rta_cacheinfo ci;
1517 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1518 rtm = NLMSG_DATA(nlh);
1519 rtm->rtm_family = AF_INET6;
1520 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1521 rtm->rtm_src_len = rt->rt6i_src.plen;
1523 rtm->rtm_table = RT_TABLE_MAIN;
1524 if (rt->rt6i_flags&RTF_REJECT)
1525 rtm->rtm_type = RTN_UNREACHABLE;
1526 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1527 rtm->rtm_type = RTN_LOCAL;
1529 rtm->rtm_type = RTN_UNICAST;
1531 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1532 rtm->rtm_protocol = RTPROT_BOOT;
1533 if (rt->rt6i_flags&RTF_DYNAMIC)
1534 rtm->rtm_protocol = RTPROT_REDIRECT;
1535 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1536 rtm->rtm_protocol = RTPROT_KERNEL;
1537 else if (rt->rt6i_flags&RTF_DEFAULT)
1538 rtm->rtm_protocol = RTPROT_RA;
1540 if (rt->rt6i_flags&RTF_CACHE)
1541 rtm->rtm_flags |= RTM_F_CLONED;
1544 RTA_PUT(skb, RTA_DST, 16, dst);
1545 rtm->rtm_dst_len = 128;
1546 } else if (rtm->rtm_dst_len)
1547 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1548 #ifdef CONFIG_IPV6_SUBTREES
1550 RTA_PUT(skb, RTA_SRC, 16, src);
1551 rtm->rtm_src_len = 128;
1552 } else if (rtm->rtm_src_len)
1553 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1556 RTA_PUT(skb, RTA_IIF, 4, &iif);
1558 struct in6_addr saddr_buf;
1559 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1560 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1562 if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1563 goto rtattr_failure;
1564 if (rt->u.dst.neighbour)
1565 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1567 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1568 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1569 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1570 if (rt->rt6i_expires)
1571 ci.rta_expires = rt->rt6i_expires - jiffies;
1574 ci.rta_used = rt->u.dst.__use;
1575 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1576 ci.rta_error = rt->u.dst.error;
1580 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1581 nlh->nlmsg_len = skb->tail - b;
1586 skb_trim(skb, b - skb->data);
1590 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1592 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1594 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1595 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq);
1598 static int fib6_dump_node(struct fib6_walker_t *w)
1601 struct rt6_info *rt;
1603 for (rt = w->leaf; rt; rt = rt->u.next) {
1604 res = rt6_dump_route(rt, w->args);
1606 /* Frame is full, suspend walking */
1616 static void fib6_dump_end(struct netlink_callback *cb)
1618 struct fib6_walker_t *w = (void*)cb->args[0];
1622 fib6_walker_unlink(w);
1626 cb->done = (void*)cb->args[1];
1631 static int fib6_dump_done(struct netlink_callback *cb)
1634 return cb->done(cb);
1637 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1639 struct rt6_rtnl_dump_arg arg;
1640 struct fib6_walker_t *w;
1646 w = (void*)cb->args[0];
1650 * 1. hook callback destructor.
1652 cb->args[1] = (long)cb->done;
1653 cb->done = fib6_dump_done;
1656 * 2. allocate and initialize walker.
1658 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1661 RT6_TRACE("dump<%p", w);
1662 memset(w, 0, sizeof(*w));
1663 w->root = &ip6_routing_table;
1664 w->func = fib6_dump_node;
1666 cb->args[0] = (long)w;
1667 read_lock_bh(&rt6_lock);
1669 read_unlock_bh(&rt6_lock);
1672 read_lock_bh(&rt6_lock);
1673 res = fib6_walk_continue(w);
1674 read_unlock_bh(&rt6_lock);
1677 if (res <= 0 && skb->len == 0)
1678 RT6_TRACE("%p>dump end\n", w);
1680 res = res < 0 ? res : skb->len;
1681 /* res < 0 is an error. (really, impossible)
1682 res == 0 means that dump is complete, but skb still can contain data.
1683 res > 0 dump is not complete, but frame is full.
1685 /* Destroy walker, if dump of this table is complete. */
1691 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1693 struct rtattr **rta = arg;
1696 struct sk_buff *skb;
1698 struct rt6_info *rt;
1700 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1704 /* Reserve room for dummy headers, this skb can pass
1705 through good chunk of routing engine.
1707 skb->mac.raw = skb->data;
1708 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1711 fl.nl_u.ip6_u.daddr = NULL;
1712 fl.nl_u.ip6_u.saddr = NULL;
1713 fl.uli_u.icmpt.type = 0;
1714 fl.uli_u.icmpt.code = 0;
1716 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1718 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1721 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1724 struct net_device *dev;
1725 dev = __dev_get_by_index(iif);
1732 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1734 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1736 skb->dst = &rt->u.dst;
1738 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1739 err = rt6_fill_node(skb, rt,
1740 fl.nl_u.ip6_u.daddr,
1741 fl.nl_u.ip6_u.saddr,
1743 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq);
1747 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1753 void inet6_rt_notify(int event, struct rt6_info *rt)
1755 struct sk_buff *skb;
1756 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1758 skb = alloc_skb(size, gfp_any());
1760 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1763 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0) < 0) {
1765 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1768 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1769 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1776 #ifdef CONFIG_PROC_FS
1778 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1789 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1791 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1794 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1799 if (arg->len >= arg->length)
1802 for (i=0; i<16; i++) {
1803 sprintf(arg->buffer + arg->len, "%02x",
1804 rt->rt6i_dst.addr.s6_addr[i]);
1807 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1810 #ifdef CONFIG_IPV6_SUBTREES
1811 for (i=0; i<16; i++) {
1812 sprintf(arg->buffer + arg->len, "%02x",
1813 rt->rt6i_src.addr.s6_addr[i]);
1816 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1819 sprintf(arg->buffer + arg->len,
1820 "00000000000000000000000000000000 00 ");
1824 if (rt->rt6i_nexthop) {
1825 for (i=0; i<16; i++) {
1826 sprintf(arg->buffer + arg->len, "%02x",
1827 rt->rt6i_nexthop->primary_key[i]);
1831 sprintf(arg->buffer + arg->len,
1832 "00000000000000000000000000000000");
1835 arg->len += sprintf(arg->buffer + arg->len,
1836 " %08x %08x %08x %08x %8s\n",
1837 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1838 rt->u.dst.__use, rt->rt6i_flags,
1839 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1843 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1845 struct rt6_proc_arg arg;
1846 arg.buffer = buffer;
1847 arg.offset = offset;
1848 arg.length = length;
1852 read_lock_bh(&rt6_lock);
1853 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1854 read_unlock_bh(&rt6_lock);
1858 *start += offset % RT6_INFO_LEN;
1860 arg.len -= offset % RT6_INFO_LEN;
1862 if (arg.len > length)
1870 extern struct rt6_statistics rt6_stats;
1872 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1876 len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1877 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1878 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1879 rt6_stats.fib_rt_cache,
1880 atomic_read(&ip6_dst_ops.entries));
1889 *start = buffer + offset;
1893 #endif /* CONFIG_PROC_FS */
1895 #ifdef CONFIG_SYSCTL
1897 static int flush_delay;
1900 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1901 void *buffer, size_t *lenp)
1904 proc_dointvec(ctl, write, filp, buffer, lenp);
1905 if (flush_delay < 0)
1907 fib6_run_gc((unsigned long)flush_delay);
1913 ctl_table ipv6_route_table[] = {
1914 {NET_IPV6_ROUTE_FLUSH, "flush",
1915 &flush_delay, sizeof(int), 0644, NULL,
1916 &ipv6_sysctl_rtcache_flush},
1917 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
1918 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
1920 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
1921 &ip6_rt_max_size, sizeof(int), 0644, NULL,
1923 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
1924 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
1925 &proc_dointvec_jiffies, &sysctl_jiffies},
1926 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
1927 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
1928 &proc_dointvec_jiffies, &sysctl_jiffies},
1929 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
1930 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
1931 &proc_dointvec_jiffies, &sysctl_jiffies},
1932 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
1933 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
1934 &proc_dointvec_jiffies, &sysctl_jiffies},
1935 {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
1936 &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
1937 &proc_dointvec_jiffies, &sysctl_jiffies},
1938 {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
1939 &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
1940 &proc_dointvec_jiffies, &sysctl_jiffies},
1947 void __init ip6_route_init(void)
1949 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1950 sizeof(struct rt6_info),
1951 0, SLAB_HWCACHE_ALIGN,
1954 #ifdef CONFIG_PROC_FS
1955 proc_net_create("ipv6_route", 0, rt6_proc_info);
1956 proc_net_create("rt6_stats", 0, rt6_proc_stats);
1961 void ip6_route_cleanup(void)
1963 #ifdef CONFIG_PROC_FS
1964 proc_net_remove("ipv6_route");
1965 proc_net_remove("rt6_stats");