ef8f4072dc3129b02c543d2d016f062634b1676f
[linux-2.4.git] / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.18.2.2 2002/01/12 07:54:15 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #define FSprintk(a...)
47
48 static struct fib_info  *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50 int fib_info_cnt;
51
52 #define for_fib_info() { struct fib_info *fi; \
53         for (fi = fib_info_list; fi; fi = fi->fib_next)
54
55 #define endfor_fib_info() }
56
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
58
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
60
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
63
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
68
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
70
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
73
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
78
79 #define endfor_nexthops(fi) }
80
81
82 static struct 
83 {
84         int     error;
85         u8      scope;
86 } fib_props[RTN_MAX+1] = {
87         { 0, RT_SCOPE_NOWHERE},         /* RTN_UNSPEC */
88         { 0, RT_SCOPE_UNIVERSE},        /* RTN_UNICAST */
89         { 0, RT_SCOPE_HOST},            /* RTN_LOCAL */
90         { 0, RT_SCOPE_LINK},            /* RTN_BROADCAST */
91         { 0, RT_SCOPE_LINK},            /* RTN_ANYCAST */
92         { 0, RT_SCOPE_UNIVERSE},        /* RTN_MULTICAST */
93         { -EINVAL, RT_SCOPE_UNIVERSE},  /* RTN_BLACKHOLE */
94         { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
95         { -EACCES, RT_SCOPE_UNIVERSE},  /* RTN_PROHIBIT */
96         { -EAGAIN, RT_SCOPE_UNIVERSE},  /* RTN_THROW */
97 #ifdef CONFIG_IP_ROUTE_NAT
98         { 0, RT_SCOPE_HOST},            /* RTN_NAT */
99 #else
100         { -EINVAL, RT_SCOPE_NOWHERE},   /* RTN_NAT */
101 #endif
102         { -EINVAL, RT_SCOPE_NOWHERE}    /* RTN_XRESOLVE */
103 };
104
105
106 /* Release a nexthop info record */
107
108 void free_fib_info(struct fib_info *fi)
109 {
110         if (fi->fib_dead == 0) {
111                 printk("Freeing alive fib_info %p\n", fi);
112                 return;
113         }
114         change_nexthops(fi) {
115                 if (nh->nh_dev)
116                         dev_put(nh->nh_dev);
117                 nh->nh_dev = NULL;
118         } endfor_nexthops(fi);
119         fib_info_cnt--;
120         kfree(fi);
121 }
122
123 void fib_release_info(struct fib_info *fi)
124 {
125         write_lock(&fib_info_lock);
126         if (fi && --fi->fib_treeref == 0) {
127                 if (fi->fib_next)
128                         fi->fib_next->fib_prev = fi->fib_prev;
129                 if (fi->fib_prev)
130                         fi->fib_prev->fib_next = fi->fib_next;
131                 if (fi == fib_info_list)
132                         fib_info_list = fi->fib_next;
133                 fi->fib_dead = 1;
134                 fib_info_put(fi);
135         }
136         write_unlock(&fib_info_lock);
137 }
138
139 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
140 {
141         const struct fib_nh *onh = ofi->fib_nh;
142
143         for_nexthops(fi) {
144                 if (nh->nh_oif != onh->nh_oif ||
145                     nh->nh_gw  != onh->nh_gw ||
146                     nh->nh_scope != onh->nh_scope ||
147 #ifdef CONFIG_IP_ROUTE_MULTIPATH
148                     nh->nh_weight != onh->nh_weight ||
149 #endif
150 #ifdef CONFIG_NET_CLS_ROUTE
151                     nh->nh_tclassid != onh->nh_tclassid ||
152 #endif
153                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
154                         return -1;
155                 onh++;
156         } endfor_nexthops(fi);
157         return 0;
158 }
159
160 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
161 {
162         for_fib_info() {
163                 if (fi->fib_nhs != nfi->fib_nhs)
164                         continue;
165                 if (nfi->fib_protocol == fi->fib_protocol &&
166                     nfi->fib_prefsrc == fi->fib_prefsrc &&
167                     nfi->fib_priority == fi->fib_priority &&
168                     memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
169                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
170                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
171                         return fi;
172         } endfor_fib_info();
173         return NULL;
174 }
175
176 /* Check, that the gateway is already configured.
177    Used only by redirect accept routine.
178  */
179
180 int ip_fib_check_default(u32 gw, struct net_device *dev)
181 {
182         read_lock(&fib_info_lock);
183         for_fib_info() {
184                 if (fi->fib_flags & RTNH_F_DEAD)
185                         continue;
186                 for_nexthops(fi) {
187                         if (nh->nh_dev == dev && nh->nh_gw == gw &&
188                             nh->nh_scope == RT_SCOPE_LINK &&
189                             !(nh->nh_flags&RTNH_F_DEAD)) {
190                                 read_unlock(&fib_info_lock);
191                                 return 0;
192                         }
193                 } endfor_nexthops(fi);
194         } endfor_fib_info();
195         read_unlock(&fib_info_lock);
196         return -1;
197 }
198
199 #ifdef CONFIG_IP_ROUTE_MULTIPATH
200
201 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
202 {
203         while (RTA_OK(attr,attrlen)) {
204                 if (attr->rta_type == type)
205                         return *(u32*)RTA_DATA(attr);
206                 attr = RTA_NEXT(attr, attrlen);
207         }
208         return 0;
209 }
210
211 static int
212 fib_count_nexthops(struct rtattr *rta)
213 {
214         int nhs = 0;
215         struct rtnexthop *nhp = RTA_DATA(rta);
216         int nhlen = RTA_PAYLOAD(rta);
217
218         while (nhlen >= (int)sizeof(struct rtnexthop)) {
219                 if ((nhlen -= nhp->rtnh_len) < 0)
220                         return 0;
221                 nhs++;
222                 nhp = RTNH_NEXT(nhp);
223         };
224         return nhs;
225 }
226
227 static int
228 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
229 {
230         struct rtnexthop *nhp = RTA_DATA(rta);
231         int nhlen = RTA_PAYLOAD(rta);
232
233         change_nexthops(fi) {
234                 int attrlen = nhlen - sizeof(struct rtnexthop);
235                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
236                         return -EINVAL;
237                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
238                 nh->nh_oif = nhp->rtnh_ifindex;
239                 nh->nh_weight = nhp->rtnh_hops + 1;
240                 if (attrlen) {
241                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
242 #ifdef CONFIG_NET_CLS_ROUTE
243                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
244 #endif
245                 }
246                 nhp = RTNH_NEXT(nhp);
247         } endfor_nexthops(fi);
248         return 0;
249 }
250
251 #endif
252
253 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
254                  struct fib_info *fi)
255 {
256 #ifdef CONFIG_IP_ROUTE_MULTIPATH
257         struct rtnexthop *nhp;
258         int nhlen;
259 #endif
260
261         if (rta->rta_priority &&
262             *rta->rta_priority != fi->fib_priority)
263                 return 1;
264
265         if (rta->rta_oif || rta->rta_gw) {
266                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
267                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
268                         return 0;
269                 return 1;
270         }
271
272 #ifdef CONFIG_IP_ROUTE_MULTIPATH
273         if (rta->rta_mp == NULL)
274                 return 0;
275         nhp = RTA_DATA(rta->rta_mp);
276         nhlen = RTA_PAYLOAD(rta->rta_mp);
277         
278         for_nexthops(fi) {
279                 int attrlen = nhlen - sizeof(struct rtnexthop);
280                 u32 gw;
281
282                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
283                         return -EINVAL;
284                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
285                         return 1;
286                 if (attrlen) {
287                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
288                         if (gw && gw != nh->nh_gw)
289                                 return 1;
290 #ifdef CONFIG_NET_CLS_ROUTE
291                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
292                         if (gw && gw != nh->nh_tclassid)
293                                 return 1;
294 #endif
295                 }
296                 nhp = RTNH_NEXT(nhp);
297         } endfor_nexthops(fi);
298 #endif
299         return 0;
300 }
301
302
303 /*
304    Picture
305    -------
306
307    Semantics of nexthop is very messy by historical reasons.
308    We have to take into account, that:
309    a) gateway can be actually local interface address,
310       so that gatewayed route is direct.
311    b) gateway must be on-link address, possibly
312       described not by an ifaddr, but also by a direct route.
313    c) If both gateway and interface are specified, they should not
314       contradict.
315    d) If we use tunnel routes, gateway could be not on-link.
316
317    Attempt to reconcile all of these (alas, self-contradictory) conditions
318    results in pretty ugly and hairy code with obscure logic.
319
320    I choosed to generalized it instead, so that the size
321    of code does not increase practically, but it becomes
322    much more general.
323    Every prefix is assigned a "scope" value: "host" is local address,
324    "link" is direct route,
325    [ ... "site" ... "interior" ... ]
326    and "universe" is true gateway route with global meaning.
327
328    Every prefix refers to a set of "nexthop"s (gw, oif),
329    where gw must have narrower scope. This recursion stops
330    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
331    which means that gw is forced to be on link.
332
333    Code is still hairy, but now it is apparently logically
334    consistent and very flexible. F.e. as by-product it allows
335    to co-exists in peace independent exterior and interior
336    routing processes.
337
338    Normally it looks as following.
339
340    {universe prefix}  -> (gw, oif) [scope link]
341                           |
342                           |-> {link prefix} -> (gw, oif) [scope local]
343                                                 |
344                                                 |-> {local prefix} (terminal node)
345  */
346
347 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
348 {
349         int err;
350
351         if (nh->nh_gw) {
352                 struct rt_key key;
353                 struct fib_result res;
354
355 #ifdef CONFIG_IP_ROUTE_PERVASIVE
356                 if (nh->nh_flags&RTNH_F_PERVASIVE)
357                         return 0;
358 #endif
359                 if (nh->nh_flags&RTNH_F_ONLINK) {
360                         struct net_device *dev;
361
362                         if (r->rtm_scope >= RT_SCOPE_LINK)
363                                 return -EINVAL;
364                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
365                                 return -EINVAL;
366                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
367                                 return -ENODEV;
368                         if (!(dev->flags&IFF_UP))
369                                 return -ENETDOWN;
370                         nh->nh_dev = dev;
371                         dev_hold(dev);
372                         nh->nh_scope = RT_SCOPE_LINK;
373                         return 0;
374                 }
375                 memset(&key, 0, sizeof(key));
376                 key.dst = nh->nh_gw;
377                 key.oif = nh->nh_oif;
378                 key.scope = r->rtm_scope + 1;
379
380                 /* It is not necessary, but requires a bit of thinking */
381                 if (key.scope < RT_SCOPE_LINK)
382                         key.scope = RT_SCOPE_LINK;
383                 if ((err = fib_lookup(&key, &res)) != 0)
384                         return err;
385                 err = -EINVAL;
386                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
387                         goto out;
388                 nh->nh_scope = res.scope;
389                 nh->nh_oif = FIB_RES_OIF(res);
390                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
391                         goto out;
392                 dev_hold(nh->nh_dev);
393                 err = -ENETDOWN;
394                 if (!(nh->nh_dev->flags & IFF_UP))
395                         goto out;
396                 err = 0;
397 out:
398                 fib_res_put(&res);
399                 return err;
400         } else {
401                 struct in_device *in_dev;
402
403                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
404                         return -EINVAL;
405
406                 in_dev = inetdev_by_index(nh->nh_oif);
407                 if (in_dev == NULL)
408                         return -ENODEV;
409                 if (!(in_dev->dev->flags&IFF_UP)) {
410                         in_dev_put(in_dev);
411                         return -ENETDOWN;
412                 }
413                 nh->nh_dev = in_dev->dev;
414                 dev_hold(nh->nh_dev);
415                 nh->nh_scope = RT_SCOPE_HOST;
416                 in_dev_put(in_dev);
417         }
418         return 0;
419 }
420
421 struct fib_info *
422 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
423                 const struct nlmsghdr *nlh, int *errp)
424 {
425         int err;
426         struct fib_info *fi = NULL;
427         struct fib_info *ofi;
428 #ifdef CONFIG_IP_ROUTE_MULTIPATH
429         int nhs = 1;
430 #else
431         const int nhs = 1;
432 #endif
433
434         if (r->rtm_type > RTN_MAX)
435                 goto err_inval;
436
437         /* Fast check to catch the most weird cases */
438         if (fib_props[r->rtm_type].scope > r->rtm_scope)
439                 goto err_inval;
440
441 #ifdef CONFIG_IP_ROUTE_MULTIPATH
442         if (rta->rta_mp) {
443                 nhs = fib_count_nexthops(rta->rta_mp);
444                 if (nhs == 0)
445                         goto err_inval;
446         }
447 #endif
448
449         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
450         err = -ENOBUFS;
451         if (fi == NULL)
452                 goto failure;
453         fib_info_cnt++;
454         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
455
456         fi->fib_protocol = r->rtm_protocol;
457         fi->fib_nhs = nhs;
458         fi->fib_flags = r->rtm_flags;
459         if (rta->rta_priority)
460                 fi->fib_priority = *rta->rta_priority;
461         if (rta->rta_mx) {
462                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
463                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
464
465                 while (RTA_OK(attr, attrlen)) {
466                         unsigned flavor = attr->rta_type;
467                         if (flavor) {
468                                 if (flavor > RTAX_MAX)
469                                         goto err_inval;
470                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
471                         }
472                         attr = RTA_NEXT(attr, attrlen);
473                 }
474         }
475         if (rta->rta_prefsrc)
476                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
477
478         if (rta->rta_mp) {
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
481                         goto failure;
482                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
483                         goto err_inval;
484                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
485                         goto err_inval;
486 #ifdef CONFIG_NET_CLS_ROUTE
487                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
488                         goto err_inval;
489 #endif
490 #else
491                 goto err_inval;
492 #endif
493         } else {
494                 struct fib_nh *nh = fi->fib_nh;
495                 if (rta->rta_oif)
496                         nh->nh_oif = *rta->rta_oif;
497                 if (rta->rta_gw)
498                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
499 #ifdef CONFIG_NET_CLS_ROUTE
500                 if (rta->rta_flow)
501                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
502 #endif
503                 nh->nh_flags = r->rtm_flags;
504 #ifdef CONFIG_IP_ROUTE_MULTIPATH
505                 nh->nh_weight = 1;
506 #endif
507         }
508
509 #ifdef CONFIG_IP_ROUTE_NAT
510         if (r->rtm_type == RTN_NAT) {
511                 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
512                         goto err_inval;
513                 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
514                 goto link_it;
515         }
516 #endif
517
518         if (fib_props[r->rtm_type].error) {
519                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
520                         goto err_inval;
521                 goto link_it;
522         }
523
524         if (r->rtm_scope > RT_SCOPE_HOST)
525                 goto err_inval;
526
527         if (r->rtm_scope == RT_SCOPE_HOST) {
528                 struct fib_nh *nh = fi->fib_nh;
529
530                 /* Local address is added. */
531                 if (nhs != 1 || nh->nh_gw)
532                         goto err_inval;
533                 nh->nh_scope = RT_SCOPE_NOWHERE;
534                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
535                 err = -ENODEV;
536                 if (nh->nh_dev == NULL)
537                         goto failure;
538         } else {
539                 change_nexthops(fi) {
540                         if ((err = fib_check_nh(r, fi, nh)) != 0)
541                                 goto failure;
542                 } endfor_nexthops(fi)
543         }
544
545         if (fi->fib_prefsrc) {
546                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
547                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
548                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
549                                 goto err_inval;
550         }
551
552 link_it:
553         if ((ofi = fib_find_info(fi)) != NULL) {
554                 fi->fib_dead = 1;
555                 free_fib_info(fi);
556                 ofi->fib_treeref++;
557                 return ofi;
558         }
559
560         fi->fib_treeref++;
561         atomic_inc(&fi->fib_clntref);
562         write_lock(&fib_info_lock);
563         fi->fib_next = fib_info_list;
564         fi->fib_prev = NULL;
565         if (fib_info_list)
566                 fib_info_list->fib_prev = fi;
567         fib_info_list = fi;
568         write_unlock(&fib_info_lock);
569         return fi;
570
571 err_inval:
572         err = -EINVAL;
573
574 failure:
575         *errp = err;
576         if (fi) {
577                 fi->fib_dead = 1;
578                 free_fib_info(fi);
579         }
580         return NULL;
581 }
582
583 int 
584 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
585 {
586         int err = fib_props[type].error;
587
588         if (err == 0) {
589                 if (fi->fib_flags&RTNH_F_DEAD)
590                         return 1;
591
592                 res->fi = fi;
593
594                 switch (type) {
595 #ifdef CONFIG_IP_ROUTE_NAT
596                 case RTN_NAT:
597                         FIB_RES_RESET(*res);
598                         atomic_inc(&fi->fib_clntref);
599                         return 0;
600 #endif
601                 case RTN_UNICAST:
602                 case RTN_LOCAL:
603                 case RTN_BROADCAST:
604                 case RTN_ANYCAST:
605                 case RTN_MULTICAST:
606                         for_nexthops(fi) {
607                                 if (nh->nh_flags&RTNH_F_DEAD)
608                                         continue;
609                                 if (!key->oif || key->oif == nh->nh_oif)
610                                         break;
611                         }
612 #ifdef CONFIG_IP_ROUTE_MULTIPATH
613                         if (nhsel < fi->fib_nhs) {
614                                 res->nh_sel = nhsel;
615                                 atomic_inc(&fi->fib_clntref);
616                                 return 0;
617                         }
618 #else
619                         if (nhsel < 1) {
620                                 atomic_inc(&fi->fib_clntref);
621                                 return 0;
622                         }
623 #endif
624                         endfor_nexthops(fi);
625                         res->fi = NULL;
626                         return 1;
627                 default:
628                         res->fi = NULL;
629                         printk(KERN_DEBUG "impossible 102\n");
630                         return -EINVAL;
631                 }
632         }
633         return err;
634 }
635
636 /* Find appropriate source address to this destination */
637
638 u32 __fib_res_prefsrc(struct fib_result *res)
639 {
640         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
641 }
642
643 int
644 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
645               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
646               struct fib_info *fi)
647 {
648         struct rtmsg *rtm;
649         struct nlmsghdr  *nlh;
650         unsigned char    *b = skb->tail;
651
652         nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
653         rtm = NLMSG_DATA(nlh);
654         rtm->rtm_family = AF_INET;
655         rtm->rtm_dst_len = dst_len;
656         rtm->rtm_src_len = 0;
657         rtm->rtm_tos = tos;
658         rtm->rtm_table = tb_id;
659         rtm->rtm_type = type;
660         rtm->rtm_flags = fi->fib_flags;
661         rtm->rtm_scope = scope;
662         if (rtm->rtm_dst_len)
663                 RTA_PUT(skb, RTA_DST, 4, dst);
664         rtm->rtm_protocol = fi->fib_protocol;
665         if (fi->fib_priority)
666                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
667 #ifdef CONFIG_NET_CLS_ROUTE
668         if (fi->fib_nh[0].nh_tclassid)
669                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
670 #endif
671         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
672                 goto rtattr_failure;
673         if (fi->fib_prefsrc)
674                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
675         if (fi->fib_nhs == 1) {
676                 if (fi->fib_nh->nh_gw)
677                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
678                 if (fi->fib_nh->nh_oif)
679                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
680         }
681 #ifdef CONFIG_IP_ROUTE_MULTIPATH
682         if (fi->fib_nhs > 1) {
683                 struct rtnexthop *nhp;
684                 struct rtattr *mp_head;
685                 if (skb_tailroom(skb) <= RTA_SPACE(0))
686                         goto rtattr_failure;
687                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
688
689                 for_nexthops(fi) {
690                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
691                                 goto rtattr_failure;
692                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
693                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
694                         nhp->rtnh_hops = nh->nh_weight-1;
695                         nhp->rtnh_ifindex = nh->nh_oif;
696                         if (nh->nh_gw)
697                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
698                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
699                 } endfor_nexthops(fi);
700                 mp_head->rta_type = RTA_MULTIPATH;
701                 mp_head->rta_len = skb->tail - (u8*)mp_head;
702         }
703 #endif
704         nlh->nlmsg_len = skb->tail - b;
705         return skb->len;
706
707 nlmsg_failure:
708 rtattr_failure:
709         skb_trim(skb, b - skb->data);
710         return -1;
711 }
712
713 #ifndef CONFIG_IP_NOSIOCRT
714
715 int
716 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
717                     struct kern_rta *rta, struct rtentry *r)
718 {
719         int    plen;
720         u32    *ptr;
721
722         memset(rtm, 0, sizeof(*rtm));
723         memset(rta, 0, sizeof(*rta));
724
725         if (r->rt_dst.sa_family != AF_INET)
726                 return -EAFNOSUPPORT;
727
728         /* Check mask for validity:
729            a) it must be contiguous.
730            b) destination must have all host bits clear.
731            c) if application forgot to set correct family (AF_INET),
732               reject request unless it is absolutely clear i.e.
733               both family and mask are zero.
734          */
735         plen = 32;
736         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
737         if (!(r->rt_flags&RTF_HOST)) {
738                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
739                 if (r->rt_genmask.sa_family != AF_INET) {
740                         if (mask || r->rt_genmask.sa_family)
741                                 return -EAFNOSUPPORT;
742                 }
743                 if (bad_mask(mask, *ptr))
744                         return -EINVAL;
745                 plen = inet_mask_len(mask);
746         }
747
748         nl->nlmsg_flags = NLM_F_REQUEST;
749         nl->nlmsg_pid = 0;
750         nl->nlmsg_seq = 0;
751         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
752         if (cmd == SIOCDELRT) {
753                 nl->nlmsg_type = RTM_DELROUTE;
754                 nl->nlmsg_flags = 0;
755         } else {
756                 nl->nlmsg_type = RTM_NEWROUTE;
757                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
758                 rtm->rtm_protocol = RTPROT_BOOT;
759         }
760
761         rtm->rtm_dst_len = plen;
762         rta->rta_dst = ptr;
763
764         if (r->rt_metric) {
765                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
766                 rta->rta_priority = (u32*)&r->rt_pad3;
767         }
768         if (r->rt_flags&RTF_REJECT) {
769                 rtm->rtm_scope = RT_SCOPE_HOST;
770                 rtm->rtm_type = RTN_UNREACHABLE;
771                 return 0;
772         }
773         rtm->rtm_scope = RT_SCOPE_NOWHERE;
774         rtm->rtm_type = RTN_UNICAST;
775
776         if (r->rt_dev) {
777                 char *colon;
778                 struct net_device *dev;
779                 char   devname[IFNAMSIZ];
780
781                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
782                         return -EFAULT;
783                 devname[IFNAMSIZ-1] = 0;
784                 colon = strchr(devname, ':');
785                 if (colon)
786                         *colon = 0;
787                 dev = __dev_get_by_name(devname);
788                 if (!dev)
789                         return -ENODEV;
790                 rta->rta_oif = &dev->ifindex;
791                 if (colon) {
792                         struct in_ifaddr *ifa;
793                         struct in_device *in_dev = __in_dev_get(dev);
794                         if (!in_dev)
795                                 return -ENODEV;
796                         *colon = ':';
797                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
798                                 if (strcmp(ifa->ifa_label, devname) == 0)
799                                         break;
800                         if (ifa == NULL)
801                                 return -ENODEV;
802                         rta->rta_prefsrc = &ifa->ifa_local;
803                 }
804         }
805
806         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
807         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
808                 rta->rta_gw = ptr;
809                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
810                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
811         }
812
813         if (cmd == SIOCDELRT)
814                 return 0;
815
816         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
817                 return -EINVAL;
818
819         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
820                 rtm->rtm_scope = RT_SCOPE_LINK;
821
822         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
823                 struct rtattr *rec;
824                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
825                 if (mx == NULL)
826                         return -ENOMEM;
827                 rta->rta_mx = mx;
828                 mx->rta_type = RTA_METRICS;
829                 mx->rta_len  = RTA_LENGTH(0);
830                 if (r->rt_flags&RTF_MTU) {
831                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
832                         rec->rta_type = RTAX_ADVMSS;
833                         rec->rta_len = RTA_LENGTH(4);
834                         mx->rta_len += RTA_LENGTH(4);
835                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
836                 }
837                 if (r->rt_flags&RTF_WINDOW) {
838                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
839                         rec->rta_type = RTAX_WINDOW;
840                         rec->rta_len = RTA_LENGTH(4);
841                         mx->rta_len += RTA_LENGTH(4);
842                         *(u32*)RTA_DATA(rec) = r->rt_window;
843                 }
844                 if (r->rt_flags&RTF_IRTT) {
845                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
846                         rec->rta_type = RTAX_RTT;
847                         rec->rta_len = RTA_LENGTH(4);
848                         mx->rta_len += RTA_LENGTH(4);
849                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
850                 }
851         }
852         return 0;
853 }
854
855 #endif
856
857 /*
858    Update FIB if:
859    - local address disappeared -> we must delete all the entries
860      referring to it.
861    - device went down -> we must shutdown all nexthops going via it.
862  */
863
864 int fib_sync_down(u32 local, struct net_device *dev, int force)
865 {
866         int ret = 0;
867         int scope = RT_SCOPE_NOWHERE;
868         
869         if (force)
870                 scope = -1;
871
872         for_fib_info() {
873                 if (local && fi->fib_prefsrc == local) {
874                         fi->fib_flags |= RTNH_F_DEAD;
875                         ret++;
876                 } else if (dev && fi->fib_nhs) {
877                         int dead = 0;
878
879                         change_nexthops(fi) {
880                                 if (nh->nh_flags&RTNH_F_DEAD)
881                                         dead++;
882                                 else if (nh->nh_dev == dev &&
883                                          nh->nh_scope != scope) {
884                                         nh->nh_flags |= RTNH_F_DEAD;
885 #ifdef CONFIG_IP_ROUTE_MULTIPATH
886                                         spin_lock_bh(&fib_multipath_lock);
887                                         fi->fib_power -= nh->nh_power;
888                                         nh->nh_power = 0;
889                                         spin_unlock_bh(&fib_multipath_lock);
890 #endif
891                                         dead++;
892                                 }
893 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894                                 if (force > 1 && nh->nh_dev == dev) {
895                                         dead = fi->fib_nhs;
896                                         break;
897                                 }
898 #endif
899                         } endfor_nexthops(fi)
900                         if (dead == fi->fib_nhs) {
901                                 fi->fib_flags |= RTNH_F_DEAD;
902                                 ret++;
903                         }
904                 }
905         } endfor_fib_info();
906         return ret;
907 }
908
909 #ifdef CONFIG_IP_ROUTE_MULTIPATH
910
911 /*
912    Dead device goes up. We wake up dead nexthops.
913    It takes sense only on multipath routes.
914  */
915
916 int fib_sync_up(struct net_device *dev)
917 {
918         int ret = 0;
919
920         if (!(dev->flags&IFF_UP))
921                 return 0;
922
923         for_fib_info() {
924                 int alive = 0;
925
926                 change_nexthops(fi) {
927                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
928                                 alive++;
929                                 continue;
930                         }
931                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
932                                 continue;
933                         if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
934                                 continue;
935                         alive++;
936                         spin_lock_bh(&fib_multipath_lock);
937                         nh->nh_power = 0;
938                         nh->nh_flags &= ~RTNH_F_DEAD;
939                         spin_unlock_bh(&fib_multipath_lock);
940                 } endfor_nexthops(fi)
941
942                 if (alive > 0) {
943                         fi->fib_flags &= ~RTNH_F_DEAD;
944                         ret++;
945                 }
946         } endfor_fib_info();
947         return ret;
948 }
949
950 /*
951    The algorithm is suboptimal, but it provides really
952    fair weighted route distribution.
953  */
954
955 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
956 {
957         struct fib_info *fi = res->fi;
958         int w;
959
960         spin_lock_bh(&fib_multipath_lock);
961         if (fi->fib_power <= 0) {
962                 int power = 0;
963                 change_nexthops(fi) {
964                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
965                                 power += nh->nh_weight;
966                                 nh->nh_power = nh->nh_weight;
967                         }
968                 } endfor_nexthops(fi);
969                 fi->fib_power = power;
970                 if (power <= 0) {
971                         spin_unlock_bh(&fib_multipath_lock);
972                         /* Race condition: route has just become dead. */
973                         res->nh_sel = 0;
974                         return;
975                 }
976         }
977
978
979         /* w should be random number [0..fi->fib_power-1],
980            it is pretty bad approximation.
981          */
982
983         w = jiffies % fi->fib_power;
984
985         change_nexthops(fi) {
986                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
987                         if ((w -= nh->nh_power) <= 0) {
988                                 nh->nh_power--;
989                                 fi->fib_power--;
990                                 res->nh_sel = nhsel;
991                                 spin_unlock_bh(&fib_multipath_lock);
992                                 return;
993                         }
994                 }
995         } endfor_nexthops(fi);
996
997         /* Race condition: route has just become dead. */
998         res->nh_sel = 0;
999         spin_unlock_bh(&fib_multipath_lock);
1000 }
1001 #endif
1002
1003
1004 #ifdef CONFIG_PROC_FS
1005
1006 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
1007 {
1008         static unsigned type2flags[RTN_MAX+1] = {
1009                 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
1010         };
1011         unsigned flags = type2flags[type];
1012
1013         if (fi && fi->fib_nh->nh_gw)
1014                 flags |= RTF_GATEWAY;
1015         if (mask == 0xFFFFFFFF)
1016                 flags |= RTF_HOST;
1017         if (!dead)
1018                 flags |= RTF_UP;
1019         return flags;
1020 }
1021
1022 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
1023 {
1024         int len;
1025         unsigned flags = fib_flag_trans(type, dead, mask, fi);
1026
1027         if (fi) {
1028                 len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1029                               fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1030                               fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1031                               mask, (fi->fib_advmss ? fi->fib_advmss+40 : 0),
1032                               fi->fib_window, fi->fib_rtt>>3);
1033         } else {
1034                 len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1035                               prefix, 0,
1036                               flags, 0, 0, 0,
1037                               mask, 0, 0, 0);
1038         }
1039         memset(buffer+len, ' ', 127-len);
1040         buffer[127] = '\n';
1041 }
1042
1043 #endif