2 * Linux INET6 implementation
6 * Pedro Roque <pedro_m@yahoo.com>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/route.h>
34 #include <linux/netdevice.h>
35 #include <linux/in6.h>
36 #include <linux/init.h>
37 #include <linux/netlink.h>
38 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
51 #include <linux/rtnetlink.h>
53 #include <asm/uaccess.h>
56 #include <linux/sysctl.h>
59 #undef CONFIG_RT6_POLICY
61 /* Set to 3 to get tracing. */
65 #define RDBG(x) printk x
66 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #define RT6_TRACE(x...) do { ; } while (0)
73 int ip6_rt_max_size = 4096;
74 int ip6_rt_gc_min_interval = HZ / 2;
75 int ip6_rt_gc_timeout = 60*HZ;
76 int ip6_rt_gc_interval = 30*HZ;
77 int ip6_rt_gc_elasticity = 9;
78 int ip6_rt_mtu_expires = 10*60*HZ;
79 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
81 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
82 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int ip6_dst_gc(void);
88 static int ip6_pkt_discard(struct sk_buff *skb);
89 static void ip6_link_failure(struct sk_buff *skb);
91 struct dst_ops ip6_dst_ops = {
93 __constant_htons(ETH_P_IPV6),
102 sizeof(struct rt6_info),
105 struct rt6_info ip6_null_entry = {
106 {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
107 -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 -ENETUNREACH, NULL, NULL,
109 ip6_pkt_discard, ip6_pkt_discard,
110 #ifdef CONFIG_NET_CLS_ROUTE
114 NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
115 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
118 struct fib6_node ip6_routing_table = {
119 NULL, NULL, NULL, NULL,
121 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
124 #ifdef CONFIG_RT6_POLICY
125 int ip6_rt_policy = 0;
127 struct pol_chain *rt6_pol_list = NULL;
130 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
131 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
133 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
134 struct in6_addr *daddr,
135 struct in6_addr *saddr,
136 struct fl_acc_args *args);
139 #define ip6_rt_policy (0)
142 /* Protects all the ip6 fib */
144 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
148 * Route lookup. Any rt6_lock is implied.
151 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
155 struct rt6_info *local = NULL;
156 struct rt6_info *sprt;
159 for (sprt = rt; sprt; sprt = sprt->u.next) {
160 struct net_device *dev = sprt->rt6i_dev;
161 if (dev->ifindex == oif)
163 if (dev->flags&IFF_LOOPBACK)
171 return &ip6_null_entry;
177 * pointer to the last default router chosen. BH is disabled locally.
179 static struct rt6_info *rt6_dflt_pointer = NULL;
180 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
182 /* Default Router Selection (RFC 2461 6.3.6) */
183 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
185 struct rt6_info *match = NULL;
186 struct rt6_info *sprt;
189 for (sprt = rt; sprt; sprt = sprt->u.next) {
190 struct neighbour *neigh;
195 sprt->rt6i_dev->ifindex == oif))
198 if (sprt == rt6_dflt_pointer)
201 if ((neigh = sprt->rt6i_nexthop) != NULL) {
202 read_lock_bh(&neigh->lock);
203 switch (neigh->nud_state) {
221 read_unlock_bh(&neigh->lock);
224 read_unlock_bh(&neigh->lock);
229 if (m > mpri || m >= 12) {
233 /* we choose the lastest default router if it
234 * is in (probably) reachable state.
235 * If route changed, we should do pmtu
236 * discovery. --yoshfuji
243 spin_lock(&rt6_dflt_lock);
246 * No default routers are known to be reachable.
249 if (rt6_dflt_pointer) {
250 for (sprt = rt6_dflt_pointer->u.next;
251 sprt; sprt = sprt->u.next) {
252 if (sprt->u.dst.obsolete <= 0 &&
253 sprt->u.dst.error == 0) {
260 sprt = sprt->u.next) {
261 if (sprt->u.dst.obsolete <= 0 &&
262 sprt->u.dst.error == 0) {
266 if (sprt == rt6_dflt_pointer)
273 rt6_dflt_pointer = match;
275 spin_unlock(&rt6_dflt_lock);
279 * Last Resort: if no default routers found,
280 * use addrconf default route.
281 * We don't record this route.
283 for (sprt = ip6_routing_table.leaf;
284 sprt; sprt = sprt->u.next) {
285 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
288 sprt->rt6i_dev->ifindex == oif))) {
294 /* no default route. give up. */
295 match = &ip6_null_entry;
302 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
305 struct fib6_node *fn;
308 read_lock_bh(&rt6_lock);
309 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
310 rt = rt6_device_match(fn->leaf, oif, strict);
311 dst_hold(&rt->u.dst);
313 read_unlock_bh(&rt6_lock);
315 rt->u.dst.lastuse = jiffies;
316 if (rt->u.dst.error == 0)
318 dst_release(&rt->u.dst);
322 /* rt6_ins is called with FREE rt6_lock.
323 It takes new route entry, the addition fails by any reason the
324 route is freed. In any case, if caller does not hold it, it may
328 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
332 write_lock_bh(&rt6_lock);
333 err = fib6_add(&ip6_routing_table, rt, nlh, req);
334 write_unlock_bh(&rt6_lock);
339 /* No rt6_lock! If COW failed, the function returns dead route entry
340 with dst->error set to errno value.
343 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
344 struct in6_addr *saddr, struct netlink_skb_parms *req)
353 rt = ip6_rt_copy(ort);
356 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
358 if (!(rt->rt6i_flags&RTF_GATEWAY))
359 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
361 rt->rt6i_dst.plen = 128;
362 rt->rt6i_flags |= RTF_CACHE;
363 rt->u.dst.flags |= DST_HOST;
365 #ifdef CONFIG_IPV6_SUBTREES
366 if (rt->rt6i_src.plen && saddr) {
367 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
368 rt->rt6i_src.plen = 128;
372 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
374 dst_hold(&rt->u.dst);
376 err = rt6_ins(rt, NULL, req);
380 rt->u.dst.error = err;
384 dst_hold(&ip6_null_entry.u.dst);
385 return &ip6_null_entry;
388 #ifdef CONFIG_RT6_POLICY
389 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
392 struct in6_addr *daddr, *saddr;
393 struct fl_acc_args arg;
395 arg.type = FL_ARG_FORWARD;
398 saddr = &skb->nh.ipv6h->saddr;
399 daddr = &skb->nh.ipv6h->daddr;
401 return rt6_flow_lookup(rt, daddr, saddr, &arg);
404 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
408 struct fl_acc_args arg;
410 arg.type = FL_ARG_ORIGIN;
411 arg.fl_u.fl_o.sk = sk;
412 arg.fl_u.fl_o.flow = fl;
414 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
420 #define BACKTRACK() \
421 if (rt == &ip6_null_entry && strict) { \
422 while ((fn = fn->parent) != NULL) { \
423 if (fn->fn_flags & RTN_ROOT) { \
424 dst_hold(&rt->u.dst); \
427 if (fn->fn_flags & RTN_RTINFO) \
433 void ip6_route_input(struct sk_buff *skb)
435 struct fib6_node *fn;
440 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
443 read_lock_bh(&rt6_lock);
445 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
446 &skb->nh.ipv6h->saddr);
451 if ((rt->rt6i_flags & RTF_CACHE)) {
452 if (ip6_rt_policy == 0) {
453 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
455 dst_hold(&rt->u.dst);
459 #ifdef CONFIG_RT6_POLICY
460 if ((rt->rt6i_flags & RTF_FLOW)) {
461 struct rt6_info *sprt;
463 for (sprt = rt; sprt; sprt = sprt->u.next) {
464 if (rt6_flow_match_in(sprt, skb)) {
466 dst_hold(&rt->u.dst);
474 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
477 if (ip6_rt_policy == 0) {
478 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479 read_unlock_bh(&rt6_lock);
481 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
482 &skb->nh.ipv6h->saddr,
485 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
487 /* Race condition! In the gap, when rt6_lock was
488 released someone could insert this route. Relookup.
492 dst_hold(&rt->u.dst);
494 #ifdef CONFIG_RT6_POLICY
495 rt = rt6_flow_lookup_in(rt, skb);
502 read_unlock_bh(&rt6_lock);
504 rt->u.dst.lastuse = jiffies;
506 skb->dst = (struct dst_entry *) rt;
509 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
511 struct fib6_node *fn;
516 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
519 read_lock_bh(&rt6_lock);
521 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
522 fl->nl_u.ip6_u.saddr);
527 if ((rt->rt6i_flags & RTF_CACHE)) {
528 if (ip6_rt_policy == 0) {
529 rt = rt6_device_match(rt, fl->oif, strict);
531 dst_hold(&rt->u.dst);
535 #ifdef CONFIG_RT6_POLICY
536 if ((rt->rt6i_flags & RTF_FLOW)) {
537 struct rt6_info *sprt;
539 for (sprt = rt; sprt; sprt = sprt->u.next) {
540 if (rt6_flow_match_out(sprt, sk)) {
542 dst_hold(&rt->u.dst);
549 if (rt->rt6i_flags & RTF_DEFAULT) {
550 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
551 rt = rt6_best_dflt(rt, fl->oif);
553 rt = rt6_device_match(rt, fl->oif, strict);
557 if (ip6_rt_policy == 0) {
558 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
559 read_unlock_bh(&rt6_lock);
561 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
562 fl->nl_u.ip6_u.saddr, NULL);
564 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
567 /* Race condition! In the gap, when rt6_lock was
568 released someone could insert this route. Relookup.
572 dst_hold(&rt->u.dst);
574 #ifdef CONFIG_RT6_POLICY
575 rt = rt6_flow_lookup_out(rt, sk, fl);
582 read_unlock_bh(&rt6_lock);
584 rt->u.dst.lastuse = jiffies;
591 * Destination cache support functions
594 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
598 rt = (struct rt6_info *) dst;
600 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
607 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
612 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
613 __builtin_return_address(0)));
617 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
619 struct rt6_info *rt = (struct rt6_info *) dst;
622 if (rt->rt6i_flags & RTF_CACHE)
623 ip6_del_rt(rt, NULL, NULL);
630 static void ip6_link_failure(struct sk_buff *skb)
634 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
636 rt = (struct rt6_info *) skb->dst;
638 if (rt->rt6i_flags&RTF_CACHE) {
639 dst_set_expires(&rt->u.dst, 0);
640 rt->rt6i_flags |= RTF_EXPIRES;
641 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
642 rt->rt6i_node->fn_sernum = -1;
646 static int ip6_dst_gc()
648 static unsigned expire = 30*HZ;
649 static unsigned long last_gc;
650 unsigned long now = jiffies;
652 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
653 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
659 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
660 expire = ip6_rt_gc_timeout>>1;
663 expire -= expire>>ip6_rt_gc_elasticity;
664 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
667 /* Clean host part of a prefix. Not necessary in radix tree,
668 but results in cleaner routing tables.
670 Remove it only when all the things will work!
673 static void ipv6_addr_prefix(struct in6_addr *pfx,
674 const struct in6_addr *addr, int plen)
679 memcpy(pfx->s6_addr, addr, o);
681 memset(pfx->s6_addr + o, 0, 16 - o);
683 pfx->s6_addr[o] = addr->s6_addr[o]&(0xff00 >> b);
686 static int ipv6_get_mtu(struct net_device *dev)
688 int mtu = IPV6_MIN_MTU;
689 struct inet6_dev *idev;
691 idev = in6_dev_get(dev);
693 mtu = idev->cnf.mtu6;
699 static int ipv6_get_hoplimit(struct net_device *dev)
701 int hoplimit = ipv6_devconf.hop_limit;
702 struct inet6_dev *idev;
704 idev = in6_dev_get(dev);
706 hoplimit = idev->cnf.hop_limit;
716 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
721 struct net_device *dev = NULL;
724 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
726 #ifndef CONFIG_IPV6_SUBTREES
727 if (rtmsg->rtmsg_src_len)
730 if (rtmsg->rtmsg_metric == 0)
731 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
733 rt = dst_alloc(&ip6_dst_ops);
738 rt->u.dst.obsolete = -1;
739 rt->rt6i_expires = rtmsg->rtmsg_info;
740 if (nlh && (r = NLMSG_DATA(nlh))) {
741 rt->rt6i_protocol = r->rtm_protocol;
743 rt->rt6i_protocol = RTPROT_BOOT;
746 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
748 if (addr_type & IPV6_ADDR_MULTICAST)
749 rt->u.dst.input = ip6_mc_input;
751 rt->u.dst.input = ip6_forward;
753 rt->u.dst.output = ip6_output;
755 if (rtmsg->rtmsg_ifindex) {
756 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
762 ipv6_addr_prefix(&rt->rt6i_dst.addr,
763 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
764 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
765 if (rt->rt6i_dst.plen == 128)
766 rt->u.dst.flags = DST_HOST;
768 #ifdef CONFIG_IPV6_SUBTREES
769 ipv6_addr_prefix(&rt->rt6i_src.addr,
770 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
771 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
774 rt->rt6i_metric = rtmsg->rtmsg_metric;
776 /* We cannot add true routes via loopback here,
777 they would result in kernel looping; promote them to reject routes
779 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
780 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
785 rt->u.dst.output = ip6_pkt_discard;
786 rt->u.dst.input = ip6_pkt_discard;
787 rt->u.dst.error = -ENETUNREACH;
788 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
792 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
793 struct in6_addr *gw_addr;
796 gw_addr = &rtmsg->rtmsg_gateway;
797 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
798 gwa_type = ipv6_addr_type(gw_addr);
800 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
801 struct rt6_info *grt;
803 /* IPv6 strictly inhibits using not link-local
804 addresses as nexthop address.
805 Otherwise, router will not able to send redirects.
806 It is very good, but in some (rare!) curcumstances
807 (SIT, PtP, NBMA NOARP links) it is handy to allow
808 some exceptions. --ANK
811 if (!(gwa_type&IPV6_ADDR_UNICAST))
814 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
820 if (dev != grt->rt6i_dev) {
821 dst_release(&grt->u.dst);
828 if (!(grt->rt6i_flags&RTF_GATEWAY))
830 dst_release(&grt->u.dst);
836 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
844 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
845 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
846 if (IS_ERR(rt->rt6i_nexthop)) {
847 err = PTR_ERR(rt->rt6i_nexthop);
848 rt->rt6i_nexthop = NULL;
853 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
854 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
856 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
857 rt->rt6i_flags = rtmsg->rtmsg_flags;
860 rt->u.dst.pmtu = ipv6_get_mtu(dev);
861 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
862 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
863 MSS is 65535 - tcp_header_size. 65535 is also valid and
864 means: "any MSS, rely only on pmtu discovery"
866 if (rt->u.dst.advmss > 65535-20)
867 rt->u.dst.advmss = 65535;
869 return rt6_ins(rt, nlh, req);
874 dst_free((struct dst_entry *) rt);
878 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
882 write_lock_bh(&rt6_lock);
884 spin_lock_bh(&rt6_dflt_lock);
885 rt6_dflt_pointer = NULL;
886 spin_unlock_bh(&rt6_dflt_lock);
888 dst_release(&rt->u.dst);
890 err = fib6_del(rt, nlh, req);
891 write_unlock_bh(&rt6_lock);
896 int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
898 struct fib6_node *fn;
902 read_lock_bh(&rt6_lock);
904 fn = fib6_locate(&ip6_routing_table,
905 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
906 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
909 for (rt = fn->leaf; rt; rt = rt->u.next) {
910 if (rtmsg->rtmsg_ifindex &&
911 (rt->rt6i_dev == NULL ||
912 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
914 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
915 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
917 if (rtmsg->rtmsg_metric &&
918 rtmsg->rtmsg_metric != rt->rt6i_metric)
920 dst_hold(&rt->u.dst);
921 read_unlock_bh(&rt6_lock);
923 return ip6_del_rt(rt, nlh, req);
926 read_unlock_bh(&rt6_lock);
934 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
935 struct neighbour *neigh, int on_link)
937 struct rt6_info *rt, *nrt;
939 /* Locate old route to this destination. */
940 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
945 if (neigh->dev != rt->rt6i_dev)
948 /* Redirect received -> path was valid.
949 Look, redirects are sent only in response to data packets,
950 so that this nexthop apparently is reachable. --ANK
952 dst_confirm(&rt->u.dst);
954 /* Duplicate redirect: silently ignore. */
955 if (neigh == rt->u.dst.neighbour)
958 /* Current route is on-link; redirect is always invalid.
960 Seems, previous statement is not true. It could
961 be node, which looks for us as on-link (f.e. proxy ndisc)
962 But then router serving it might decide, that we should
963 know truth 8)8) --ANK (980726).
965 if (!(rt->rt6i_flags&RTF_GATEWAY))
969 * RFC 1970 specifies that redirects should only be
970 * accepted if they come from the nexthop to the target.
971 * Due to the way default routers are chosen, this notion
972 * is a bit fuzzy and one might need to check all default
976 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
977 if (rt->rt6i_flags & RTF_DEFAULT) {
978 struct rt6_info *rt1;
980 read_lock(&rt6_lock);
981 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
982 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
983 dst_hold(&rt1->u.dst);
984 dst_release(&rt->u.dst);
985 read_unlock(&rt6_lock);
990 read_unlock(&rt6_lock);
993 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
994 "for redirect target\n");
1001 * We have finally decided to accept it.
1004 nrt = ip6_rt_copy(rt);
1008 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1010 nrt->rt6i_flags &= ~RTF_GATEWAY;
1012 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1013 nrt->rt6i_dst.plen = 128;
1014 nrt->u.dst.flags |= DST_HOST;
1016 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1017 nrt->rt6i_nexthop = neigh_clone(neigh);
1018 /* Reset pmtu, it may be better */
1019 nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
1020 nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1021 if (rt->u.dst.advmss > 65535-20)
1022 rt->u.dst.advmss = 65535;
1023 nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
1025 if (rt6_ins(nrt, NULL, NULL))
1028 if (rt->rt6i_flags&RTF_CACHE) {
1029 ip6_del_rt(rt, NULL, NULL);
1034 dst_release(&rt->u.dst);
1039 * Handle ICMP "packet too big" messages
1040 * i.e. Path MTU discovery
1043 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1044 struct net_device *dev, u32 pmtu)
1046 struct rt6_info *rt, *nrt;
1048 if (pmtu < IPV6_MIN_MTU) {
1049 if (net_ratelimit())
1050 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1052 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1053 link MTU if the node receives a Packet Too Big message
1054 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1056 pmtu = IPV6_MIN_MTU;
1059 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1064 if (pmtu >= rt->u.dst.pmtu)
1067 /* New mtu received -> path was valid.
1068 They are sent only in response to data packets,
1069 so that this nexthop apparently is reachable. --ANK
1071 dst_confirm(&rt->u.dst);
1073 /* Host route. If it is static, it would be better
1074 not to override it, but add new one, so that
1075 when cache entry will expire old pmtu
1076 would return automatically.
1078 if (rt->rt6i_flags & RTF_CACHE) {
1079 rt->u.dst.pmtu = pmtu;
1080 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1081 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1086 Two cases are possible:
1087 1. It is connected route. Action: COW
1088 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1090 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1091 nrt = rt6_cow(rt, daddr, saddr, NULL);
1092 if (!nrt->u.dst.error) {
1093 nrt->u.dst.pmtu = pmtu;
1094 /* According to RFC 1981, detecting PMTU increase shouldn't be
1095 happened within 5 mins, the recommended timer is 10 mins.
1096 Here this route expiration time is set to ip6_rt_mtu_expires
1097 which is 10 mins. After 10 mins the decreased pmtu is expired
1098 and detecting PMTU increase will be automatically happened.
1100 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1101 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1102 dst_release(&nrt->u.dst);
1105 nrt = ip6_rt_copy(rt);
1108 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1109 nrt->rt6i_dst.plen = 128;
1110 nrt->u.dst.flags |= DST_HOST;
1111 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1112 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1113 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1114 nrt->u.dst.pmtu = pmtu;
1115 rt6_ins(nrt, NULL, NULL);
1119 dst_release(&rt->u.dst);
1123 * Misc support functions
1126 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1128 struct rt6_info *rt;
1130 rt = dst_alloc(&ip6_dst_ops);
1133 rt->u.dst.input = ort->u.dst.input;
1134 rt->u.dst.output = ort->u.dst.output;
1136 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1137 rt->u.dst.dev = ort->u.dst.dev;
1139 dev_hold(rt->u.dst.dev);
1140 rt->u.dst.lastuse = jiffies;
1141 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1142 rt->rt6i_expires = 0;
1144 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1145 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1146 rt->rt6i_metric = 0;
1148 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1149 #ifdef CONFIG_IPV6_SUBTREES
1150 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1156 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1158 struct rt6_info *rt;
1159 struct fib6_node *fn;
1161 fn = &ip6_routing_table;
1163 write_lock_bh(&rt6_lock);
1164 for (rt = fn->leaf; rt; rt=rt->u.next) {
1165 if (dev == rt->rt6i_dev &&
1166 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1170 dst_hold(&rt->u.dst);
1171 write_unlock_bh(&rt6_lock);
1175 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1176 struct net_device *dev)
1178 struct in6_rtmsg rtmsg;
1180 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1181 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1182 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1183 rtmsg.rtmsg_metric = 1024;
1184 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1186 rtmsg.rtmsg_ifindex = dev->ifindex;
1188 ip6_route_add(&rtmsg, NULL, NULL);
1189 return rt6_get_dflt_router(gwaddr, dev);
1192 void rt6_purge_dflt_routers(int last_resort)
1194 struct rt6_info *rt;
1198 flags = RTF_ALLONLINK;
1200 flags = RTF_DEFAULT | RTF_ADDRCONF;
1203 read_lock_bh(&rt6_lock);
1204 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1205 if (rt->rt6i_flags & flags) {
1206 dst_hold(&rt->u.dst);
1208 spin_lock_bh(&rt6_dflt_lock);
1209 rt6_dflt_pointer = NULL;
1210 spin_unlock_bh(&rt6_dflt_lock);
1212 read_unlock_bh(&rt6_lock);
1214 ip6_del_rt(rt, NULL, NULL);
1219 read_unlock_bh(&rt6_lock);
1222 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1224 struct in6_rtmsg rtmsg;
1228 case SIOCADDRT: /* Add a route */
1229 case SIOCDELRT: /* Delete a route */
1230 if (!capable(CAP_NET_ADMIN))
1232 err = copy_from_user(&rtmsg, arg,
1233 sizeof(struct in6_rtmsg));
1240 err = ip6_route_add(&rtmsg, NULL, NULL);
1243 err = ip6_route_del(&rtmsg, NULL, NULL);
1257 * Drop the packet on the floor
1260 int ip6_pkt_discard(struct sk_buff *skb)
1262 IP6_INC_STATS(Ip6OutNoRoutes);
1263 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1272 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1274 struct rt6_info *rt;
1276 rt = dst_alloc(&ip6_dst_ops);
1280 rt->u.dst.flags = DST_HOST;
1281 rt->u.dst.input = ip6_input;
1282 rt->u.dst.output = ip6_output;
1283 rt->rt6i_dev = dev_get_by_name("lo");
1284 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1285 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1286 if (rt->u.dst.advmss > 65535-20)
1287 rt->u.dst.advmss = 65535;
1288 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1289 rt->u.dst.obsolete = -1;
1291 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1292 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1293 if (rt->rt6i_nexthop == NULL) {
1294 dst_free((struct dst_entry *) rt);
1298 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1299 rt->rt6i_dst.plen = 128;
1300 rt6_ins(rt, NULL, NULL);
1305 /* Delete address. Warning: you should check that this address
1306 disappeared before calling this function.
1309 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1311 struct rt6_info *rt;
1314 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1316 if (rt->rt6i_dst.plen == 128)
1317 err = ip6_del_rt(rt, NULL, NULL);
1319 dst_release(&rt->u.dst);
1325 #ifdef CONFIG_RT6_POLICY
1327 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1329 struct flow_filter *frule;
1330 struct pkt_filter *filter;
1333 if ((frule = rt->rt6i_filter) == NULL)
1336 if (frule->type != FLR_INPUT) {
1341 for (filter = frule->u.filter; filter; filter = filter->next) {
1344 word = (__u32 *) skb->h.raw;
1345 word += filter->offset;
1347 if ((*word ^ filter->value) & filter->mask) {
1357 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1359 struct flow_filter *frule;
1362 if ((frule = rt->rt6i_filter) == NULL)
1365 if (frule->type != FLR_INPUT) {
1370 if (frule->u.sk != sk)
1376 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1377 struct in6_addr *daddr,
1378 struct in6_addr *saddr,
1379 struct fl_acc_args *args)
1381 struct flow_rule *frule;
1382 struct rt6_info *nrt = NULL;
1383 struct pol_chain *pol;
1385 for (pol = rt6_pol_list; pol; pol = pol->next) {
1386 struct fib6_node *fn;
1387 struct rt6_info *sprt;
1389 fn = fib6_lookup(pol->rules, daddr, saddr);
1392 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1395 frule = sprt->rt6i_flowr;
1397 if (frule == NULL) {
1398 printk(KERN_DEBUG "NULL flowr\n");
1402 res = frule->ops->accept(rt, sprt, args, &nrt);
1409 case FLOWR_NODECISION:
1418 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1424 dst_hold(&ip6_null_entry.u.dst);
1425 return &ip6_null_entry;
1431 nrt->rt6i_flags |= RTF_CACHE;
1432 dst_hold(&nrt->u.dst);
1433 err = rt6_ins(nrt, NULL, NULL);
1435 nrt->u.dst.error = err;
1440 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1442 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1443 rt != &ip6_null_entry) {
1444 RT6_TRACE("deleted by ifdown %p\n", rt);
1450 void rt6_ifdown(struct net_device *dev)
1452 write_lock_bh(&rt6_lock);
1453 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1454 write_unlock_bh(&rt6_lock);
1457 struct rt6_mtu_change_arg
1459 struct net_device *dev;
1463 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1465 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1466 struct inet6_dev *idev;
1467 /* In IPv6 pmtu discovery is not optional,
1468 so that RTAX_MTU lock cannot disable it.
1469 We still use this lock to block changes
1470 caused by addrconf/ndisc.
1472 idev = __in6_dev_get(arg->dev);
1476 /* For administrative MTU increase, there is no way to discover
1477 IPv6 PMTU increase, so PMTU increase should be updated here.
1478 Since RFC 1981 doesn't include administrative MTU increase
1479 update PMTU increase is a MUST. (i.e. jumbo frame)
1482 If new MTU is less than route PMTU, this new MTU will be the
1483 lowest MTU in the path, update the route PMTU to refect PMTU
1484 decreases; if new MTU is greater than route PMTU, and the
1485 old MTU is the lowest MTU in the path, update the route PMTU
1486 to refect the increase. In this case if the other nodes' MTU
1487 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1490 if (rt->rt6i_dev == arg->dev &&
1491 !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
1492 (rt->u.dst.pmtu > arg->mtu ||
1493 (rt->u.dst.pmtu < arg->mtu &&
1494 rt->u.dst.pmtu == idev->cnf.mtu6)))
1495 rt->u.dst.pmtu = arg->mtu;
1496 rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
1497 if (rt->u.dst.advmss > 65535-20)
1498 rt->u.dst.advmss = 65535;
1502 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1504 struct rt6_mtu_change_arg arg;
1508 read_lock_bh(&rt6_lock);
1509 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1510 read_unlock_bh(&rt6_lock);
1513 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1514 struct in6_rtmsg *rtmsg)
1516 memset(rtmsg, 0, sizeof(*rtmsg));
1518 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1519 rtmsg->rtmsg_src_len = r->rtm_src_len;
1520 rtmsg->rtmsg_flags = RTF_UP;
1521 if (r->rtm_type == RTN_UNREACHABLE)
1522 rtmsg->rtmsg_flags |= RTF_REJECT;
1524 if (rta[RTA_GATEWAY-1]) {
1525 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1527 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1528 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1530 if (rta[RTA_DST-1]) {
1531 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1533 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1535 if (rta[RTA_SRC-1]) {
1536 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1538 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1540 if (rta[RTA_OIF-1]) {
1541 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1543 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1545 if (rta[RTA_PRIORITY-1]) {
1546 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1548 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1553 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1555 struct rtmsg *r = NLMSG_DATA(nlh);
1556 struct in6_rtmsg rtmsg;
1558 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1560 return ip6_route_del(&rtmsg, nlh, &NETLINK_CB(skb));
1563 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1565 struct rtmsg *r = NLMSG_DATA(nlh);
1566 struct in6_rtmsg rtmsg;
1568 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1570 return ip6_route_add(&rtmsg, nlh, &NETLINK_CB(skb));
1573 struct rt6_rtnl_dump_arg
1575 struct sk_buff *skb;
1576 struct netlink_callback *cb;
1579 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1580 struct in6_addr *dst, struct in6_addr *src,
1581 int iif, int type, u32 pid, u32 seq, int prefix)
1584 struct nlmsghdr *nlh;
1585 unsigned char *b = skb->tail;
1586 struct rta_cacheinfo ci;
1588 if (prefix) { /* user wants prefix routes only */
1589 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1590 /* success since this is not a prefix route */
1595 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1596 rtm = NLMSG_DATA(nlh);
1597 rtm->rtm_family = AF_INET6;
1598 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1599 rtm->rtm_src_len = rt->rt6i_src.plen;
1601 rtm->rtm_table = RT_TABLE_MAIN;
1602 if (rt->rt6i_flags&RTF_REJECT)
1603 rtm->rtm_type = RTN_UNREACHABLE;
1604 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1605 rtm->rtm_type = RTN_LOCAL;
1607 rtm->rtm_type = RTN_UNICAST;
1609 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1610 rtm->rtm_protocol = rt->rt6i_protocol;
1611 if (rt->rt6i_flags&RTF_DYNAMIC)
1612 rtm->rtm_protocol = RTPROT_REDIRECT;
1613 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1614 rtm->rtm_protocol = RTPROT_KERNEL;
1615 else if (rt->rt6i_flags&RTF_DEFAULT)
1616 rtm->rtm_protocol = RTPROT_RA;
1618 if (rt->rt6i_flags&RTF_CACHE)
1619 rtm->rtm_flags |= RTM_F_CLONED;
1622 RTA_PUT(skb, RTA_DST, 16, dst);
1623 rtm->rtm_dst_len = 128;
1624 } else if (rtm->rtm_dst_len)
1625 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1626 #ifdef CONFIG_IPV6_SUBTREES
1628 RTA_PUT(skb, RTA_SRC, 16, src);
1629 rtm->rtm_src_len = 128;
1630 } else if (rtm->rtm_src_len)
1631 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1634 RTA_PUT(skb, RTA_IIF, 4, &iif);
1636 struct in6_addr saddr_buf;
1637 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1638 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1640 if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1641 goto rtattr_failure;
1642 if (rt->u.dst.neighbour)
1643 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1645 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1646 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1647 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1648 if (rt->rt6i_expires)
1649 ci.rta_expires = rt->rt6i_expires - jiffies;
1652 ci.rta_used = rt->u.dst.__use;
1653 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1654 ci.rta_error = rt->u.dst.error;
1658 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1659 nlh->nlmsg_len = skb->tail - b;
1664 skb_trim(skb, b - skb->data);
1668 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1670 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1673 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1674 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1675 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1679 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1680 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1684 static int fib6_dump_node(struct fib6_walker_t *w)
1687 struct rt6_info *rt;
1689 for (rt = w->leaf; rt; rt = rt->u.next) {
1690 res = rt6_dump_route(rt, w->args);
1692 /* Frame is full, suspend walking */
1702 static void fib6_dump_end(struct netlink_callback *cb)
1704 struct fib6_walker_t *w = (void*)cb->args[0];
1708 fib6_walker_unlink(w);
1712 cb->done = (void*)cb->args[1];
1717 static int fib6_dump_done(struct netlink_callback *cb)
1720 return cb->done(cb);
1723 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1725 struct rt6_rtnl_dump_arg arg;
1726 struct fib6_walker_t *w;
1732 w = (void*)cb->args[0];
1736 * 1. hook callback destructor.
1738 cb->args[1] = (long)cb->done;
1739 cb->done = fib6_dump_done;
1742 * 2. allocate and initialize walker.
1744 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1747 RT6_TRACE("dump<%p", w);
1748 memset(w, 0, sizeof(*w));
1749 w->root = &ip6_routing_table;
1750 w->func = fib6_dump_node;
1752 cb->args[0] = (long)w;
1753 read_lock_bh(&rt6_lock);
1755 read_unlock_bh(&rt6_lock);
1758 read_lock_bh(&rt6_lock);
1759 res = fib6_walk_continue(w);
1760 read_unlock_bh(&rt6_lock);
1763 if (res <= 0 && skb->len == 0)
1764 RT6_TRACE("%p>dump end\n", w);
1766 res = res < 0 ? res : skb->len;
1767 /* res < 0 is an error. (really, impossible)
1768 res == 0 means that dump is complete, but skb still can contain data.
1769 res > 0 dump is not complete, but frame is full.
1771 /* Destroy walker, if dump of this table is complete. */
1777 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1779 struct rtattr **rta = arg;
1782 struct sk_buff *skb;
1784 struct rt6_info *rt;
1786 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1790 /* Reserve room for dummy headers, this skb can pass
1791 through good chunk of routing engine.
1793 skb->mac.raw = skb->data;
1794 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1797 fl.nl_u.ip6_u.daddr = NULL;
1798 fl.nl_u.ip6_u.saddr = NULL;
1799 fl.uli_u.icmpt.type = 0;
1800 fl.uli_u.icmpt.code = 0;
1802 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1804 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1807 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1810 struct net_device *dev;
1811 dev = __dev_get_by_index(iif);
1820 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1822 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1824 skb->dst = &rt->u.dst;
1826 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1827 err = rt6_fill_node(skb, rt,
1828 fl.nl_u.ip6_u.daddr,
1829 fl.nl_u.ip6_u.saddr,
1831 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1838 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1848 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1849 struct netlink_skb_parms *req)
1851 struct sk_buff *skb;
1852 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1853 u32 pid = current->pid;
1859 seq = nlh->nlmsg_seq;
1861 skb = alloc_skb(size, gfp_any());
1863 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1866 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0) < 0) {
1868 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1871 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1872 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1879 #ifdef CONFIG_PROC_FS
1881 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1892 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1894 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1897 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1902 if (arg->len >= arg->length)
1905 for (i=0; i<16; i++) {
1906 sprintf(arg->buffer + arg->len, "%02x",
1907 rt->rt6i_dst.addr.s6_addr[i]);
1910 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1913 #ifdef CONFIG_IPV6_SUBTREES
1914 for (i=0; i<16; i++) {
1915 sprintf(arg->buffer + arg->len, "%02x",
1916 rt->rt6i_src.addr.s6_addr[i]);
1919 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1922 sprintf(arg->buffer + arg->len,
1923 "00000000000000000000000000000000 00 ");
1927 if (rt->rt6i_nexthop) {
1928 for (i=0; i<16; i++) {
1929 sprintf(arg->buffer + arg->len, "%02x",
1930 rt->rt6i_nexthop->primary_key[i]);
1934 sprintf(arg->buffer + arg->len,
1935 "00000000000000000000000000000000");
1938 arg->len += sprintf(arg->buffer + arg->len,
1939 " %08x %08x %08x %08x %8s\n",
1940 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1941 rt->u.dst.__use, rt->rt6i_flags,
1942 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1946 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1948 struct rt6_proc_arg arg;
1949 arg.buffer = buffer;
1950 arg.offset = offset;
1951 arg.length = length;
1955 read_lock_bh(&rt6_lock);
1956 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1957 read_unlock_bh(&rt6_lock);
1961 *start += offset % RT6_INFO_LEN;
1963 arg.len -= offset % RT6_INFO_LEN;
1965 if (arg.len > length)
1973 extern struct rt6_statistics rt6_stats;
1975 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1979 len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1980 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1981 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1982 rt6_stats.fib_rt_cache,
1983 atomic_read(&ip6_dst_ops.entries));
1992 *start = buffer + offset;
1996 #endif /* CONFIG_PROC_FS */
1998 #ifdef CONFIG_SYSCTL
2000 static int flush_delay;
2003 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2004 void *buffer, size_t *lenp)
2007 proc_dointvec(ctl, write, filp, buffer, lenp);
2008 if (flush_delay < 0)
2010 fib6_run_gc((unsigned long)flush_delay);
2016 ctl_table ipv6_route_table[] = {
2017 {NET_IPV6_ROUTE_FLUSH, "flush",
2018 &flush_delay, sizeof(int), 0644, NULL,
2019 &ipv6_sysctl_rtcache_flush},
2020 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
2021 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
2023 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
2024 &ip6_rt_max_size, sizeof(int), 0644, NULL,
2026 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
2027 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
2028 &proc_dointvec_jiffies, &sysctl_jiffies},
2029 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
2030 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
2031 &proc_dointvec_jiffies, &sysctl_jiffies},
2032 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
2033 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
2034 &proc_dointvec_jiffies, &sysctl_jiffies},
2035 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
2036 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
2037 &proc_dointvec_jiffies, &sysctl_jiffies},
2038 {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
2039 &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
2040 &proc_dointvec_jiffies, &sysctl_jiffies},
2041 {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
2042 &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
2043 &proc_dointvec_jiffies, &sysctl_jiffies},
2050 void __init ip6_route_init(void)
2052 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053 sizeof(struct rt6_info),
2054 0, SLAB_HWCACHE_ALIGN,
2057 #ifdef CONFIG_PROC_FS
2058 proc_net_create("ipv6_route", 0, rt6_proc_info);
2059 proc_net_create("rt6_stats", 0, rt6_proc_stats);
2064 void ip6_route_cleanup(void)
2066 #ifdef CONFIG_PROC_FS
2067 proc_net_remove("ipv6_route");
2068 proc_net_remove("rt6_stats");