[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts
[powerpc.git] / net / ipv6 / route.c
index d9baca0..024c8e2 100644 (file)
@@ -140,15 +140,49 @@ struct rt6_info ip6_null_entry = {
        .rt6i_ref       = ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-       .leaf           = &ip6_null_entry,
-       .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
-};
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
-/* Protects all the ip6 fib */
+struct rt6_info ip6_prohibit_entry = {
+       .u = {
+               .dst = {
+                       .__refcnt       = ATOMIC_INIT(1),
+                       .__use          = 1,
+                       .dev            = &loopback_dev,
+                       .obsolete       = -1,
+                       .error          = -EACCES,
+                       .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
+                       .input          = ip6_pkt_discard,
+                       .output         = ip6_pkt_discard_out,
+                       .ops            = &ip6_dst_ops,
+                       .path           = (struct dst_entry*)&ip6_prohibit_entry,
+               }
+       },
+       .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
+       .rt6i_metric    = ~(u32) 0,
+       .rt6i_ref       = ATOMIC_INIT(1),
+};
 
-DEFINE_RWLOCK(rt6_lock);
+struct rt6_info ip6_blk_hole_entry = {
+       .u = {
+               .dst = {
+                       .__refcnt       = ATOMIC_INIT(1),
+                       .__use          = 1,
+                       .dev            = &loopback_dev,
+                       .obsolete       = -1,
+                       .error          = -EINVAL,
+                       .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
+                       .input          = ip6_pkt_discard,
+                       .output         = ip6_pkt_discard_out,
+                       .ops            = &ip6_dst_ops,
+                       .path           = (struct dst_entry*)&ip6_blk_hole_entry,
+               }
+       },
+       .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
+       .rt6i_metric    = ~(u32) 0,
+       .rt6i_ref       = ATOMIC_INIT(1),
+};
 
+#endif
 
 /* allocate dst with ip6_dst_ops */
 static __inline__ struct rt6_info *ip6_dst_alloc(void)
@@ -188,8 +222,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
                time_after(jiffies, rt->rt6i_expires));
 }
 
+static inline int rt6_need_strict(struct in6_addr *daddr)
+{
+       return (ipv6_addr_type(daddr) &
+               (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+}
+
 /*
- *     Route lookup. Any rt6_lock is implied.
+ *     Route lookup. Any table->tb6_lock is implied.
  */
 
 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@@ -441,27 +481,66 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 }
 #endif
 
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
-                           int oif, int strict)
+#define BACKTRACK() \
+if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
+       while ((fn = fn->parent) != NULL) { \
+               if (fn->fn_flags & RTN_TL_ROOT) { \
+                       dst_hold(&rt->u.dst); \
+                       goto out; \
+               } \
+               if (fn->fn_flags & RTN_RTINFO) \
+                       goto restart; \
+       } \
+}
+
+static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
+                                            struct flowi *fl, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt;
 
-       read_lock_bh(&rt6_lock);
-       fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
-       rt = rt6_device_match(fn->leaf, oif, strict);
+       read_lock_bh(&table->tb6_lock);
+       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+restart:
+       rt = fn->leaf;
+       rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
+       BACKTRACK();
        dst_hold(&rt->u.dst);
-       rt->u.dst.__use++;
-       read_unlock_bh(&rt6_lock);
+out:
+       read_unlock_bh(&table->tb6_lock);
 
        rt->u.dst.lastuse = jiffies;
-       if (rt->u.dst.error == 0)
-               return rt;
-       dst_release(&rt->u.dst);
+       rt->u.dst.__use++;
+
+       return rt;
+
+}
+
+struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
+                           int oif, int strict)
+{
+       struct flowi fl = {
+               .oif = oif,
+               .nl_u = {
+                       .ip6_u = {
+                               .daddr = *daddr,
+                               /* TODO: saddr */
+                       },
+               },
+       };
+       struct dst_entry *dst;
+       int flags = strict ? RT6_F_STRICT : 0;
+
+       dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
+       if (dst->error == 0)
+               return (struct rt6_info *) dst;
+
+       dst_release(dst);
+
        return NULL;
 }
 
-/* ip6_ins_rt is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
    be destroyed.
@@ -471,10 +550,12 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
                void *_rtattr, struct netlink_skb_parms *req)
 {
        int err;
+       struct fib6_table *table;
 
-       write_lock_bh(&rt6_lock);
-       err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
-       write_unlock_bh(&rt6_lock);
+       table = rt->rt6i_table;
+       write_lock_bh(&table->tb6_lock);
+       err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
+       write_unlock_bh(&table->tb6_lock);
 
        return err;
 }
@@ -532,51 +613,40 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
        return rt;
 }
 
-#define BACKTRACK() \
-if (rt == &ip6_null_entry) { \
-       while ((fn = fn->parent) != NULL) { \
-               if (fn->fn_flags & RTN_ROOT) { \
-                       goto out; \
-               } \
-               if (fn->fn_flags & RTN_RTINFO) \
-                       goto restart; \
-       } \
-}
-
-
-void ip6_route_input(struct sk_buff *skb)
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+                                           struct flowi *fl, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt, *nrt;
-       int strict;
+       int strict = 0;
        int attempts = 3;
        int err;
        int reachable = RT6_SELECT_F_REACHABLE;
 
-       strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+       if (flags & RT6_F_STRICT)
+               strict = RT6_SELECT_F_IFACE;
 
 relookup:
-       read_lock_bh(&rt6_lock);
+       read_lock_bh(&table->tb6_lock);
 
 restart_2:
-       fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
-                        &skb->nh.ipv6h->saddr);
+       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-       rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
+       rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
        BACKTRACK();
        if (rt == &ip6_null_entry ||
            rt->rt6i_flags & RTF_CACHE)
                goto out;
 
        dst_hold(&rt->u.dst);
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 
        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-               nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
+               nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
        else {
 #if CLONE_OFFLINK_ROUTE
-               nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
+               nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
 #else
                goto out2;
 #endif
@@ -587,7 +657,7 @@ restart:
 
        dst_hold(&rt->u.dst);
        if (nrt) {
-               err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
+               err = ip6_ins_rt(nrt, NULL, NULL, NULL);
                if (!err)
                        goto out2;
        }
@@ -596,7 +666,7 @@ restart:
                goto out2;
 
        /*
-        * Race condition! In the gap, when rt6_lock was
+        * Race condition! In the gap, when table->tb6_lock was
         * released someone could insert this route.  Relookup.
         */
        dst_release(&rt->u.dst);
@@ -608,30 +678,54 @@ out:
                goto restart_2;
        }
        dst_hold(&rt->u.dst);
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 out2:
        rt->u.dst.lastuse = jiffies;
        rt->u.dst.__use++;
-       skb->dst = (struct dst_entry *) rt;
-       return;
+
+       return rt;
 }
 
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+void ip6_route_input(struct sk_buff *skb)
+{
+       struct ipv6hdr *iph = skb->nh.ipv6h;
+       struct flowi fl = {
+               .iif = skb->dev->ifindex,
+               .nl_u = {
+                       .ip6_u = {
+                               .daddr = iph->daddr,
+                               .saddr = iph->saddr,
+                               .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+                       },
+               },
+               .proto = iph->nexthdr,
+       };
+       int flags = 0;
+
+       if (rt6_need_strict(&iph->daddr))
+               flags |= RT6_F_STRICT;
+
+       skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
+}
+
+static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
+                                            struct flowi *fl, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt, *nrt;
-       int strict;
+       int strict = 0;
        int attempts = 3;
        int err;
        int reachable = RT6_SELECT_F_REACHABLE;
 
-       strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+       if (flags & RT6_F_STRICT)
+               strict = RT6_SELECT_F_IFACE;
 
 relookup:
-       read_lock_bh(&rt6_lock);
+       read_lock_bh(&table->tb6_lock);
 
 restart_2:
-       fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
        rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
@@ -641,7 +735,7 @@ restart:
                goto out;
 
        dst_hold(&rt->u.dst);
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 
        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
                nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@@ -667,7 +761,7 @@ restart:
                goto out2;
 
        /*
-        * Race condition! In the gap, when rt6_lock was
+        * Race condition! In the gap, when table->tb6_lock was
         * released someone could insert this route.  Relookup.
         */
        dst_release(&rt->u.dst);
@@ -679,11 +773,21 @@ out:
                goto restart_2;
        }
        dst_hold(&rt->u.dst);
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 out2:
        rt->u.dst.lastuse = jiffies;
        rt->u.dst.__use++;
-       return &rt->u.dst;
+       return rt;
+}
+
+struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+{
+       int flags = 0;
+
+       if (rt6_need_strict(&fl->fl6_dst))
+               flags |= RT6_F_STRICT;
+
+       return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
 
 
@@ -747,8 +851,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
        }
 }
 
-/* Protected by rt6_lock.  */
-static struct dst_entry *ndisc_dst_gc_list;
 static int ipv6_get_mtu(struct net_device *dev);
 
 static inline unsigned int ipv6_advmss(unsigned int mtu)
@@ -769,6 +871,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
        return mtu;
 }
 
+static struct dst_entry *ndisc_dst_gc_list;
+static DEFINE_SPINLOCK(ndisc_lock);
+
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
                                  struct neighbour *neigh,
                                  struct in6_addr *addr,
@@ -809,10 +914,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
        rt->rt6i_dst.plen = 128;
 #endif
 
-       write_lock_bh(&rt6_lock);
+       spin_lock_bh(&ndisc_lock);
        rt->u.dst.next = ndisc_dst_gc_list;
        ndisc_dst_gc_list = &rt->u.dst;
-       write_unlock_bh(&rt6_lock);
+       spin_unlock_bh(&ndisc_lock);
 
        fib6_force_start_gc();
 
@@ -826,8 +931,11 @@ int ndisc_dst_gc(int *more)
        int freed;
 
        next = NULL;
+       freed = 0;
+
+       spin_lock_bh(&ndisc_lock);
        pprev = &ndisc_dst_gc_list;
-       freed = 0;
+
        while ((dst = *pprev) != NULL) {
                if (!atomic_read(&dst->__refcnt)) {
                        *pprev = dst->next;
@@ -839,6 +947,8 @@ int ndisc_dst_gc(int *more)
                }
        }
 
+       spin_unlock_bh(&ndisc_lock);
+
        return freed;
 }
 
@@ -900,7 +1010,8 @@ int ipv6_get_hoplimit(struct net_device *dev)
  */
 
 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
-               void *_rtattr, struct netlink_skb_parms *req)
+                 void *_rtattr, struct netlink_skb_parms *req,
+                 u32 table_id)
 {
        int err;
        struct rtmsg *r;
@@ -908,6 +1019,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
        struct rt6_info *rt = NULL;
        struct net_device *dev = NULL;
        struct inet6_dev *idev = NULL;
+       struct fib6_table *table;
        int addr_type;
 
        rta = (struct rtattr **) _rtattr;
@@ -931,6 +1043,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
        if (rtmsg->rtmsg_metric == 0)
                rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
 
+       table = fib6_new_table(table_id);
+       if (table == NULL) {
+               err = -ENOBUFS;
+               goto out;
+       }
+
        rt = ip6_dst_alloc();
 
        if (rt == NULL) {
@@ -1087,6 +1205,7 @@ install_route:
                rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
        rt->u.dst.dev = dev;
        rt->rt6i_idev = idev;
+       rt->rt6i_table = table;
        return ip6_ins_rt(rt, nlh, _rtattr, req);
 
 out:
@@ -1102,26 +1221,38 @@ out:
 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
 {
        int err;
+       struct fib6_table *table;
+
+       if (rt == &ip6_null_entry)
+               return -ENOENT;
 
-       write_lock_bh(&rt6_lock);
+       table = rt->rt6i_table;
+       write_lock_bh(&table->tb6_lock);
 
        err = fib6_del(rt, nlh, _rtattr, req);
        dst_release(&rt->u.dst);
 
-       write_unlock_bh(&rt6_lock);
+       write_unlock_bh(&table->tb6_lock);
 
        return err;
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
+                        void *_rtattr, struct netlink_skb_parms *req,
+                        u32 table_id)
 {
+       struct fib6_table *table;
        struct fib6_node *fn;
        struct rt6_info *rt;
        int err = -ESRCH;
 
-       read_lock_bh(&rt6_lock);
+       table = fib6_get_table(table_id);
+       if (table == NULL)
+               return err;
 
-       fn = fib6_locate(&ip6_routing_table,
+       read_lock_bh(&table->tb6_lock);
+
+       fn = fib6_locate(&table->tb6_root,
                         &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
                         &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
        
@@ -1138,12 +1269,12 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
                            rtmsg->rtmsg_metric != rt->rt6i_metric)
                                continue;
                        dst_hold(&rt->u.dst);
-                       read_unlock_bh(&rt6_lock);
+                       read_unlock_bh(&table->tb6_lock);
 
                        return ip6_del_rt(rt, nlh, _rtattr, req);
                }
        }
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 
        return err;
 }
@@ -1155,10 +1286,15 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
                  struct neighbour *neigh, u8 *lladdr, int on_link)
 {
        struct rt6_info *rt, *nrt = NULL;
-       int strict;
        struct fib6_node *fn;
+       struct fib6_table *table;
        struct netevent_redirect netevent;
 
+       /* TODO: Very lazy, might need to check all tables */
+       table = fib6_get_table(RT6_TABLE_MAIN);
+       if (table == NULL)
+               return;
+
        /*
         * Get the "current" route for this destination and
         * check if the redirect has come from approriate router.
@@ -1169,10 +1305,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
         * is a bit fuzzy and one might need to check all possible
         * routes.
         */
-       strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
 
-       read_lock_bh(&rt6_lock);
-       fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+       read_lock_bh(&table->tb6_lock);
+       fn = fib6_lookup(&table->tb6_root, dest, NULL);
 restart:
        for (rt = fn->leaf; rt; rt = rt->u.next) {
                /*
@@ -1195,7 +1330,7 @@ restart:
        }
        if (rt)
                dst_hold(&rt->u.dst);
-       else if (strict) {
+       else if (rt6_need_strict(dest)) {
                while ((fn = fn->parent) != NULL) {
                        if (fn->fn_flags & RTN_ROOT)
                                break;
@@ -1203,7 +1338,7 @@ restart:
                                goto restart;
                }
        }
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 
        if (!rt) {
                if (net_ratelimit())
@@ -1378,6 +1513,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 #ifdef CONFIG_IPV6_SUBTREES
                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 #endif
+               rt->rt6i_table = ort->rt6i_table;
        }
        return rt;
 }
@@ -1388,9 +1524,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
 {
        struct fib6_node *fn;
        struct rt6_info *rt = NULL;
+       struct fib6_table *table;
+
+       table = fib6_get_table(RT6_TABLE_INFO);
+       if (table == NULL)
+               return NULL;
 
-       write_lock_bh(&rt6_lock);
-       fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+       write_lock_bh(&table->tb6_lock);
+       fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
        if (!fn)
                goto out;
 
@@ -1405,7 +1546,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
                break;
        }
 out:
-       write_unlock_bh(&rt6_lock);
+       write_unlock_bh(&table->tb6_lock);
        return rt;
 }
 
@@ -1427,7 +1568,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
                rtmsg.rtmsg_flags |= RTF_DEFAULT;
        rtmsg.rtmsg_ifindex = ifindex;
 
-       ip6_route_add(&rtmsg, NULL, NULL, NULL);
+       ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
 
        return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
 }
@@ -1436,12 +1577,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 {      
        struct rt6_info *rt;
-       struct fib6_node *fn;
+       struct fib6_table *table;
 
-       fn = &ip6_routing_table;
+       table = fib6_get_table(RT6_TABLE_DFLT);
+       if (table == NULL)
+               return NULL;
 
-       write_lock_bh(&rt6_lock);
-       for (rt = fn->leaf; rt; rt=rt->u.next) {
+       write_lock_bh(&table->tb6_lock);
+       for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
                if (dev == rt->rt6i_dev &&
                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -1449,7 +1592,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
        }
        if (rt)
                dst_hold(&rt->u.dst);
-       write_unlock_bh(&rt6_lock);
+       write_unlock_bh(&table->tb6_lock);
        return rt;
 }
 
@@ -1468,28 +1611,31 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
 
        rtmsg.rtmsg_ifindex = dev->ifindex;
 
-       ip6_route_add(&rtmsg, NULL, NULL, NULL);
+       ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
        return rt6_get_dflt_router(gwaddr, dev);
 }
 
 void rt6_purge_dflt_routers(void)
 {
        struct rt6_info *rt;
+       struct fib6_table *table;
+
+       /* NOTE: Keep consistent with rt6_get_dflt_router */
+       table = fib6_get_table(RT6_TABLE_DFLT);
+       if (table == NULL)
+               return;
 
 restart:
-       read_lock_bh(&rt6_lock);
-       for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+       read_lock_bh(&table->tb6_lock);
+       for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
                        dst_hold(&rt->u.dst);
-
-                       read_unlock_bh(&rt6_lock);
-
+                       read_unlock_bh(&table->tb6_lock);
                        ip6_del_rt(rt, NULL, NULL, NULL);
-
                        goto restart;
                }
        }
-       read_unlock_bh(&rt6_lock);
+       read_unlock_bh(&table->tb6_lock);
 }
 
 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
@@ -1510,10 +1656,12 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
                rtnl_lock();
                switch (cmd) {
                case SIOCADDRT:
-                       err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
+                       err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
+                                           RT6_TABLE_MAIN);
                        break;
                case SIOCDELRT:
-                       err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
+                       err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
+                                           RT6_TABLE_MAIN);
                        break;
                default:
                        err = -EINVAL;
@@ -1587,6 +1735,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 
        ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
        rt->rt6i_dst.plen = 128;
+       rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
 
        atomic_set(&rt->u.dst.__refcnt, 1);
 
@@ -1605,9 +1754,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
 
 void rt6_ifdown(struct net_device *dev)
 {
-       write_lock_bh(&rt6_lock);
-       fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
-       write_unlock_bh(&rt6_lock);
+       fib6_clean_all(fib6_ifdown, 0, dev);
 }
 
 struct rt6_mtu_change_arg
@@ -1657,13 +1804,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 {
-       struct rt6_mtu_change_arg arg;
+       struct rt6_mtu_change_arg arg = {
+               .dev = dev,
+               .mtu = mtu,
+       };
 
-       arg.dev = dev;
-       arg.mtu = mtu;
-       read_lock_bh(&rt6_lock);
-       fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
-       read_unlock_bh(&rt6_lock);
+       fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 }
 
 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
@@ -1713,7 +1859,8 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
        if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
                return -EINVAL;
-       return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+       return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
+                            rtm_get_table(arg, r->rtm_table));
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1723,15 +1870,10 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
        if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
                return -EINVAL;
-       return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+       return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
+                            rtm_get_table(arg, r->rtm_table));
 }
 
-struct rt6_rtnl_dump_arg
-{
-       struct sk_buff *skb;
-       struct netlink_callback *cb;
-};
-
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
                         struct in6_addr *dst, struct in6_addr *src,
                         int iif, int type, u32 pid, u32 seq,
@@ -1741,6 +1883,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
        struct nlmsghdr  *nlh;
        unsigned char    *b = skb->tail;
        struct rta_cacheinfo ci;
+       u32 table;
 
        if (prefix) {   /* user wants prefix routes only */
                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1755,7 +1898,12 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
        rtm->rtm_dst_len = rt->rt6i_dst.plen;
        rtm->rtm_src_len = rt->rt6i_src.plen;
        rtm->rtm_tos = 0;
-       rtm->rtm_table = RT_TABLE_MAIN;
+       if (rt->rt6i_table)
+               table = rt->rt6i_table->tb6_id;
+       else
+               table = RT6_TABLE_UNSPEC;
+       rtm->rtm_table = table;
+       RTA_PUT_U32(skb, RTA_TABLE, table);
        if (rt->rt6i_flags&RTF_REJECT)
                rtm->rtm_type = RTN_UNREACHABLE;
        else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
@@ -1822,7 +1970,7 @@ rtattr_failure:
        return -1;
 }
 
-static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 {
        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
        int prefix;
@@ -1838,96 +1986,6 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
                     prefix, NLM_F_MULTI);
 }
 
-static int fib6_dump_node(struct fib6_walker_t *w)
-{
-       int res;
-       struct rt6_info *rt;
-
-       for (rt = w->leaf; rt; rt = rt->u.next) {
-               res = rt6_dump_route(rt, w->args);
-               if (res < 0) {
-                       /* Frame is full, suspend walking */
-                       w->leaf = rt;
-                       return 1;
-               }
-               BUG_TRAP(res!=0);
-       }
-       w->leaf = NULL;
-       return 0;
-}
-
-static void fib6_dump_end(struct netlink_callback *cb)
-{
-       struct fib6_walker_t *w = (void*)cb->args[0];
-
-       if (w) {
-               cb->args[0] = 0;
-               fib6_walker_unlink(w);
-               kfree(w);
-       }
-       cb->done = (void*)cb->args[1];
-       cb->args[1] = 0;
-}
-
-static int fib6_dump_done(struct netlink_callback *cb)
-{
-       fib6_dump_end(cb);
-       return cb->done ? cb->done(cb) : 0;
-}
-
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
-{
-       struct rt6_rtnl_dump_arg arg;
-       struct fib6_walker_t *w;
-       int res;
-
-       arg.skb = skb;
-       arg.cb = cb;
-
-       w = (void*)cb->args[0];
-       if (w == NULL) {
-               /* New dump:
-                * 
-                * 1. hook callback destructor.
-                */
-               cb->args[1] = (long)cb->done;
-               cb->done = fib6_dump_done;
-
-               /*
-                * 2. allocate and initialize walker.
-                */
-               w = kzalloc(sizeof(*w), GFP_ATOMIC);
-               if (w == NULL)
-                       return -ENOMEM;
-               RT6_TRACE("dump<%p", w);
-               w->root = &ip6_routing_table;
-               w->func = fib6_dump_node;
-               w->args = &arg;
-               cb->args[0] = (long)w;
-               read_lock_bh(&rt6_lock);
-               res = fib6_walk(w);
-               read_unlock_bh(&rt6_lock);
-       } else {
-               w->args = &arg;
-               read_lock_bh(&rt6_lock);
-               res = fib6_walk_continue(w);
-               read_unlock_bh(&rt6_lock);
-       }
-#if RT6_DEBUG >= 3
-       if (res <= 0 && skb->len == 0)
-               RT6_TRACE("%p>dump end\n", w);
-#endif
-       res = res < 0 ? res : skb->len;
-       /* res < 0 is an error. (really, impossible)
-          res == 0 means that dump is complete, but skb still can contain data.
-          res > 0 dump is not complete, but frame is full.
-        */
-       /* Destroy walker, if dump of this table is complete. */
-       if (res <= 0)
-               fib6_dump_end(cb);
-       return res;
-}
-
 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
        struct rtattr **rta = arg;
@@ -1986,9 +2044,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
                goto out_free;
        }
 
-       err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-       if (err > 0)
-               err = 0;
+       err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 out:
        return err;
 out_free:
@@ -2096,16 +2152,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 
 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
 {
-       struct rt6_proc_arg arg;
-       arg.buffer = buffer;
-       arg.offset = offset;
-       arg.length = length;
-       arg.skip = 0;
-       arg.len = 0;
+       struct rt6_proc_arg arg = {
+               .buffer = buffer,
+               .offset = offset,
+               .length = length,
+       };
 
-       read_lock_bh(&rt6_lock);
-       fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
-       read_unlock_bh(&rt6_lock);
+       fib6_clean_all(rt6_info_route, 0, &arg);
 
        *start = buffer;
        if (offset)
@@ -2278,10 +2331,16 @@ void __init ip6_route_init(void)
 #ifdef CONFIG_XFRM
        xfrm6_init();
 #endif
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+       fib6_rules_init();
+#endif
 }
 
 void ip6_route_cleanup(void)
 {
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+       fib6_rules_cleanup();
+#endif
 #ifdef CONFIG_PROC_FS
        proc_net_remove("ipv6_route");
        proc_net_remove("rt6_stats");