2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.18.2.2 2002/01/12 07:54:15 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
46 #define FSprintk(a...)
48 static struct fib_info *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
52 #define for_fib_info() { struct fib_info *fi; \
53 for (fi = fib_info_list; fi; fi = fi->fib_next)
55 #define endfor_fib_info() }
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
79 #define endfor_nexthops(fi) }
86 } fib_props[RTN_MAX+1] = {
87 { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */
88 { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */
89 { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */
90 { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */
91 { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */
92 { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */
93 { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */
94 { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
95 { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */
96 { -EAGAIN, RT_SCOPE_UNIVERSE}, /* RTN_THROW */
97 #ifdef CONFIG_IP_ROUTE_NAT
98 { 0, RT_SCOPE_HOST}, /* RTN_NAT */
100 { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */
102 { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */
106 /* Release a nexthop info record */
108 void free_fib_info(struct fib_info *fi)
110 if (fi->fib_dead == 0) {
111 printk("Freeing alive fib_info %p\n", fi);
114 change_nexthops(fi) {
118 } endfor_nexthops(fi);
123 void fib_release_info(struct fib_info *fi)
125 write_lock(&fib_info_lock);
126 if (fi && --fi->fib_treeref == 0) {
128 fi->fib_next->fib_prev = fi->fib_prev;
130 fi->fib_prev->fib_next = fi->fib_next;
131 if (fi == fib_info_list)
132 fib_info_list = fi->fib_next;
136 write_unlock(&fib_info_lock);
139 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
141 const struct fib_nh *onh = ofi->fib_nh;
144 if (nh->nh_oif != onh->nh_oif ||
145 nh->nh_gw != onh->nh_gw ||
146 nh->nh_scope != onh->nh_scope ||
147 #ifdef CONFIG_IP_ROUTE_MULTIPATH
148 nh->nh_weight != onh->nh_weight ||
150 #ifdef CONFIG_NET_CLS_ROUTE
151 nh->nh_tclassid != onh->nh_tclassid ||
153 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
156 } endfor_nexthops(fi);
160 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
163 if (fi->fib_nhs != nfi->fib_nhs)
165 if (nfi->fib_protocol == fi->fib_protocol &&
166 nfi->fib_prefsrc == fi->fib_prefsrc &&
167 nfi->fib_priority == fi->fib_priority &&
168 memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
169 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
170 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
176 /* Check, that the gateway is already configured.
177 Used only by redirect accept routine.
180 int ip_fib_check_default(u32 gw, struct net_device *dev)
182 read_lock(&fib_info_lock);
184 if (fi->fib_flags & RTNH_F_DEAD)
187 if (nh->nh_dev == dev && nh->nh_gw == gw &&
188 nh->nh_scope == RT_SCOPE_LINK &&
189 !(nh->nh_flags&RTNH_F_DEAD)) {
190 read_unlock(&fib_info_lock);
193 } endfor_nexthops(fi);
195 read_unlock(&fib_info_lock);
199 #ifdef CONFIG_IP_ROUTE_MULTIPATH
201 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
203 while (RTA_OK(attr,attrlen)) {
204 if (attr->rta_type == type)
205 return *(u32*)RTA_DATA(attr);
206 attr = RTA_NEXT(attr, attrlen);
212 fib_count_nexthops(struct rtattr *rta)
215 struct rtnexthop *nhp = RTA_DATA(rta);
216 int nhlen = RTA_PAYLOAD(rta);
218 while (nhlen >= (int)sizeof(struct rtnexthop)) {
219 if ((nhlen -= nhp->rtnh_len) < 0)
222 nhp = RTNH_NEXT(nhp);
228 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
230 struct rtnexthop *nhp = RTA_DATA(rta);
231 int nhlen = RTA_PAYLOAD(rta);
233 change_nexthops(fi) {
234 int attrlen = nhlen - sizeof(struct rtnexthop);
235 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
237 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
238 nh->nh_oif = nhp->rtnh_ifindex;
239 nh->nh_weight = nhp->rtnh_hops + 1;
241 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
242 #ifdef CONFIG_NET_CLS_ROUTE
243 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
246 nhp = RTNH_NEXT(nhp);
247 } endfor_nexthops(fi);
253 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
256 #ifdef CONFIG_IP_ROUTE_MULTIPATH
257 struct rtnexthop *nhp;
261 if (rta->rta_priority &&
262 *rta->rta_priority != fi->fib_priority)
265 if (rta->rta_oif || rta->rta_gw) {
266 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
267 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
272 #ifdef CONFIG_IP_ROUTE_MULTIPATH
273 if (rta->rta_mp == NULL)
275 nhp = RTA_DATA(rta->rta_mp);
276 nhlen = RTA_PAYLOAD(rta->rta_mp);
279 int attrlen = nhlen - sizeof(struct rtnexthop);
282 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
284 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
287 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
288 if (gw && gw != nh->nh_gw)
290 #ifdef CONFIG_NET_CLS_ROUTE
291 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
292 if (gw && gw != nh->nh_tclassid)
296 nhp = RTNH_NEXT(nhp);
297 } endfor_nexthops(fi);
307 Semantics of nexthop is very messy by historical reasons.
308 We have to take into account, that:
309 a) gateway can be actually local interface address,
310 so that gatewayed route is direct.
311 b) gateway must be on-link address, possibly
312 described not by an ifaddr, but also by a direct route.
313 c) If both gateway and interface are specified, they should not
315 d) If we use tunnel routes, gateway could be not on-link.
317 Attempt to reconcile all of these (alas, self-contradictory) conditions
318 results in pretty ugly and hairy code with obscure logic.
320 I choosed to generalized it instead, so that the size
321 of code does not increase practically, but it becomes
323 Every prefix is assigned a "scope" value: "host" is local address,
324 "link" is direct route,
325 [ ... "site" ... "interior" ... ]
326 and "universe" is true gateway route with global meaning.
328 Every prefix refers to a set of "nexthop"s (gw, oif),
329 where gw must have narrower scope. This recursion stops
330 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
331 which means that gw is forced to be on link.
333 Code is still hairy, but now it is apparently logically
334 consistent and very flexible. F.e. as by-product it allows
335 to co-exists in peace independent exterior and interior
338 Normally it looks as following.
340 {universe prefix} -> (gw, oif) [scope link]
342 |-> {link prefix} -> (gw, oif) [scope local]
344 |-> {local prefix} (terminal node)
347 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
353 struct fib_result res;
355 #ifdef CONFIG_IP_ROUTE_PERVASIVE
356 if (nh->nh_flags&RTNH_F_PERVASIVE)
359 if (nh->nh_flags&RTNH_F_ONLINK) {
360 struct net_device *dev;
362 if (r->rtm_scope >= RT_SCOPE_LINK)
364 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
366 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
368 if (!(dev->flags&IFF_UP))
372 nh->nh_scope = RT_SCOPE_LINK;
375 memset(&key, 0, sizeof(key));
377 key.oif = nh->nh_oif;
378 key.scope = r->rtm_scope + 1;
380 /* It is not necessary, but requires a bit of thinking */
381 if (key.scope < RT_SCOPE_LINK)
382 key.scope = RT_SCOPE_LINK;
383 if ((err = fib_lookup(&key, &res)) != 0)
386 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
388 nh->nh_scope = res.scope;
389 nh->nh_oif = FIB_RES_OIF(res);
390 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
392 dev_hold(nh->nh_dev);
394 if (!(nh->nh_dev->flags & IFF_UP))
401 struct in_device *in_dev;
403 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
406 in_dev = inetdev_by_index(nh->nh_oif);
409 if (!(in_dev->dev->flags&IFF_UP)) {
413 nh->nh_dev = in_dev->dev;
414 dev_hold(nh->nh_dev);
415 nh->nh_scope = RT_SCOPE_HOST;
422 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
423 const struct nlmsghdr *nlh, int *errp)
426 struct fib_info *fi = NULL;
427 struct fib_info *ofi;
428 #ifdef CONFIG_IP_ROUTE_MULTIPATH
434 if (r->rtm_type > RTN_MAX)
437 /* Fast check to catch the most weird cases */
438 if (fib_props[r->rtm_type].scope > r->rtm_scope)
441 #ifdef CONFIG_IP_ROUTE_MULTIPATH
443 nhs = fib_count_nexthops(rta->rta_mp);
449 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
454 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
456 fi->fib_protocol = r->rtm_protocol;
458 fi->fib_flags = r->rtm_flags;
459 if (rta->rta_priority)
460 fi->fib_priority = *rta->rta_priority;
462 int attrlen = RTA_PAYLOAD(rta->rta_mx);
463 struct rtattr *attr = RTA_DATA(rta->rta_mx);
465 while (RTA_OK(attr, attrlen)) {
466 unsigned flavor = attr->rta_type;
468 if (flavor > RTAX_MAX)
470 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
472 attr = RTA_NEXT(attr, attrlen);
475 if (rta->rta_prefsrc)
476 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
482 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
484 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
486 #ifdef CONFIG_NET_CLS_ROUTE
487 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
494 struct fib_nh *nh = fi->fib_nh;
496 nh->nh_oif = *rta->rta_oif;
498 memcpy(&nh->nh_gw, rta->rta_gw, 4);
499 #ifdef CONFIG_NET_CLS_ROUTE
501 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
503 nh->nh_flags = r->rtm_flags;
504 #ifdef CONFIG_IP_ROUTE_MULTIPATH
509 #ifdef CONFIG_IP_ROUTE_NAT
510 if (r->rtm_type == RTN_NAT) {
511 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
513 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
518 if (fib_props[r->rtm_type].error) {
519 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
524 if (r->rtm_scope > RT_SCOPE_HOST)
527 if (r->rtm_scope == RT_SCOPE_HOST) {
528 struct fib_nh *nh = fi->fib_nh;
530 /* Local address is added. */
531 if (nhs != 1 || nh->nh_gw)
533 nh->nh_scope = RT_SCOPE_NOWHERE;
534 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
536 if (nh->nh_dev == NULL)
539 change_nexthops(fi) {
540 if ((err = fib_check_nh(r, fi, nh)) != 0)
542 } endfor_nexthops(fi)
545 if (fi->fib_prefsrc) {
546 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
547 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
548 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
553 if ((ofi = fib_find_info(fi)) != NULL) {
561 atomic_inc(&fi->fib_clntref);
562 write_lock(&fib_info_lock);
563 fi->fib_next = fib_info_list;
566 fib_info_list->fib_prev = fi;
568 write_unlock(&fib_info_lock);
584 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
586 int err = fib_props[type].error;
589 if (fi->fib_flags&RTNH_F_DEAD)
595 #ifdef CONFIG_IP_ROUTE_NAT
598 atomic_inc(&fi->fib_clntref);
607 if (nh->nh_flags&RTNH_F_DEAD)
609 if (!key->oif || key->oif == nh->nh_oif)
612 #ifdef CONFIG_IP_ROUTE_MULTIPATH
613 if (nhsel < fi->fib_nhs) {
615 atomic_inc(&fi->fib_clntref);
620 atomic_inc(&fi->fib_clntref);
629 printk(KERN_DEBUG "impossible 102\n");
636 /* Find appropriate source address to this destination */
638 u32 __fib_res_prefsrc(struct fib_result *res)
640 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
644 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
645 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
649 struct nlmsghdr *nlh;
650 unsigned char *b = skb->tail;
652 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
653 rtm = NLMSG_DATA(nlh);
654 rtm->rtm_family = AF_INET;
655 rtm->rtm_dst_len = dst_len;
656 rtm->rtm_src_len = 0;
658 rtm->rtm_table = tb_id;
659 rtm->rtm_type = type;
660 rtm->rtm_flags = fi->fib_flags;
661 rtm->rtm_scope = scope;
662 if (rtm->rtm_dst_len)
663 RTA_PUT(skb, RTA_DST, 4, dst);
664 rtm->rtm_protocol = fi->fib_protocol;
665 if (fi->fib_priority)
666 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
667 #ifdef CONFIG_NET_CLS_ROUTE
668 if (fi->fib_nh[0].nh_tclassid)
669 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
671 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
674 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
675 if (fi->fib_nhs == 1) {
676 if (fi->fib_nh->nh_gw)
677 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
678 if (fi->fib_nh->nh_oif)
679 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
681 #ifdef CONFIG_IP_ROUTE_MULTIPATH
682 if (fi->fib_nhs > 1) {
683 struct rtnexthop *nhp;
684 struct rtattr *mp_head;
685 if (skb_tailroom(skb) <= RTA_SPACE(0))
687 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
690 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
692 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
693 nhp->rtnh_flags = nh->nh_flags & 0xFF;
694 nhp->rtnh_hops = nh->nh_weight-1;
695 nhp->rtnh_ifindex = nh->nh_oif;
697 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
698 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
699 } endfor_nexthops(fi);
700 mp_head->rta_type = RTA_MULTIPATH;
701 mp_head->rta_len = skb->tail - (u8*)mp_head;
704 nlh->nlmsg_len = skb->tail - b;
709 skb_trim(skb, b - skb->data);
713 #ifndef CONFIG_IP_NOSIOCRT
716 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
717 struct kern_rta *rta, struct rtentry *r)
722 memset(rtm, 0, sizeof(*rtm));
723 memset(rta, 0, sizeof(*rta));
725 if (r->rt_dst.sa_family != AF_INET)
726 return -EAFNOSUPPORT;
728 /* Check mask for validity:
729 a) it must be contiguous.
730 b) destination must have all host bits clear.
731 c) if application forgot to set correct family (AF_INET),
732 reject request unless it is absolutely clear i.e.
733 both family and mask are zero.
736 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
737 if (!(r->rt_flags&RTF_HOST)) {
738 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
739 if (r->rt_genmask.sa_family != AF_INET) {
740 if (mask || r->rt_genmask.sa_family)
741 return -EAFNOSUPPORT;
743 if (bad_mask(mask, *ptr))
745 plen = inet_mask_len(mask);
748 nl->nlmsg_flags = NLM_F_REQUEST;
751 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
752 if (cmd == SIOCDELRT) {
753 nl->nlmsg_type = RTM_DELROUTE;
756 nl->nlmsg_type = RTM_NEWROUTE;
757 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
758 rtm->rtm_protocol = RTPROT_BOOT;
761 rtm->rtm_dst_len = plen;
765 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
766 rta->rta_priority = (u32*)&r->rt_pad3;
768 if (r->rt_flags&RTF_REJECT) {
769 rtm->rtm_scope = RT_SCOPE_HOST;
770 rtm->rtm_type = RTN_UNREACHABLE;
773 rtm->rtm_scope = RT_SCOPE_NOWHERE;
774 rtm->rtm_type = RTN_UNICAST;
778 struct net_device *dev;
779 char devname[IFNAMSIZ];
781 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
783 devname[IFNAMSIZ-1] = 0;
784 colon = strchr(devname, ':');
787 dev = __dev_get_by_name(devname);
790 rta->rta_oif = &dev->ifindex;
792 struct in_ifaddr *ifa;
793 struct in_device *in_dev = __in_dev_get(dev);
797 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
798 if (strcmp(ifa->ifa_label, devname) == 0)
802 rta->rta_prefsrc = &ifa->ifa_local;
806 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
807 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
809 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
810 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
813 if (cmd == SIOCDELRT)
816 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
819 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
820 rtm->rtm_scope = RT_SCOPE_LINK;
822 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
824 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
828 mx->rta_type = RTA_METRICS;
829 mx->rta_len = RTA_LENGTH(0);
830 if (r->rt_flags&RTF_MTU) {
831 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
832 rec->rta_type = RTAX_ADVMSS;
833 rec->rta_len = RTA_LENGTH(4);
834 mx->rta_len += RTA_LENGTH(4);
835 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
837 if (r->rt_flags&RTF_WINDOW) {
838 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
839 rec->rta_type = RTAX_WINDOW;
840 rec->rta_len = RTA_LENGTH(4);
841 mx->rta_len += RTA_LENGTH(4);
842 *(u32*)RTA_DATA(rec) = r->rt_window;
844 if (r->rt_flags&RTF_IRTT) {
845 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
846 rec->rta_type = RTAX_RTT;
847 rec->rta_len = RTA_LENGTH(4);
848 mx->rta_len += RTA_LENGTH(4);
849 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
859 - local address disappeared -> we must delete all the entries
861 - device went down -> we must shutdown all nexthops going via it.
864 int fib_sync_down(u32 local, struct net_device *dev, int force)
867 int scope = RT_SCOPE_NOWHERE;
873 if (local && fi->fib_prefsrc == local) {
874 fi->fib_flags |= RTNH_F_DEAD;
876 } else if (dev && fi->fib_nhs) {
879 change_nexthops(fi) {
880 if (nh->nh_flags&RTNH_F_DEAD)
882 else if (nh->nh_dev == dev &&
883 nh->nh_scope != scope) {
884 nh->nh_flags |= RTNH_F_DEAD;
885 #ifdef CONFIG_IP_ROUTE_MULTIPATH
886 spin_lock_bh(&fib_multipath_lock);
887 fi->fib_power -= nh->nh_power;
889 spin_unlock_bh(&fib_multipath_lock);
893 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894 if (force > 1 && nh->nh_dev == dev) {
899 } endfor_nexthops(fi)
900 if (dead == fi->fib_nhs) {
901 fi->fib_flags |= RTNH_F_DEAD;
909 #ifdef CONFIG_IP_ROUTE_MULTIPATH
912 Dead device goes up. We wake up dead nexthops.
913 It takes sense only on multipath routes.
916 int fib_sync_up(struct net_device *dev)
920 if (!(dev->flags&IFF_UP))
926 change_nexthops(fi) {
927 if (!(nh->nh_flags&RTNH_F_DEAD)) {
931 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
933 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
936 spin_lock_bh(&fib_multipath_lock);
938 nh->nh_flags &= ~RTNH_F_DEAD;
939 spin_unlock_bh(&fib_multipath_lock);
940 } endfor_nexthops(fi)
943 fi->fib_flags &= ~RTNH_F_DEAD;
951 The algorithm is suboptimal, but it provides really
952 fair weighted route distribution.
955 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
957 struct fib_info *fi = res->fi;
960 spin_lock_bh(&fib_multipath_lock);
961 if (fi->fib_power <= 0) {
963 change_nexthops(fi) {
964 if (!(nh->nh_flags&RTNH_F_DEAD)) {
965 power += nh->nh_weight;
966 nh->nh_power = nh->nh_weight;
968 } endfor_nexthops(fi);
969 fi->fib_power = power;
971 spin_unlock_bh(&fib_multipath_lock);
972 /* Race condition: route has just become dead. */
979 /* w should be random number [0..fi->fib_power-1],
980 it is pretty bad approximation.
983 w = jiffies % fi->fib_power;
985 change_nexthops(fi) {
986 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
987 if ((w -= nh->nh_power) <= 0) {
991 spin_unlock_bh(&fib_multipath_lock);
995 } endfor_nexthops(fi);
997 /* Race condition: route has just become dead. */
999 spin_unlock_bh(&fib_multipath_lock);
1004 #ifdef CONFIG_PROC_FS
1006 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
1008 static unsigned type2flags[RTN_MAX+1] = {
1009 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
1011 unsigned flags = type2flags[type];
1013 if (fi && fi->fib_nh->nh_gw)
1014 flags |= RTF_GATEWAY;
1015 if (mask == 0xFFFFFFFF)
1022 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
1025 unsigned flags = fib_flag_trans(type, dead, mask, fi);
1028 len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1029 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1030 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1031 mask, (fi->fib_advmss ? fi->fib_advmss+40 : 0),
1032 fi->fib_window, fi->fib_rtt>>3);
1034 len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1039 memset(buffer+len, ' ', 127-len);