770c74dd93d89612d2ef186361981572b1f71261
[linux-2.4.git] / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@redhat.com>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13  *
14  *      Fixes:
15  *      Michael Chastain        :       Incorrect size of copying.
16  *      Alan Cox                :       Added the cache manager code
17  *      Alan Cox                :       Fixed the clone/copy bug and device race.
18  *      Mike McLagan            :       Routing by source
19  *      Malcolm Beattie         :       Buffer handling fixes.
20  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
21  *      SVR Anand               :       Fixed several multicast bugs and problems.
22  *      Alexey Kuznetsov        :       Status, optimisations and more.
23  *      Brad Parker             :       Better behaviour on mrouted upcall
24  *                                      overflow.
25  *      Carlos Picoto           :       PIMv1 Support
26  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
27  *                                      Relax this requrement to work with older peers.
28  *
29  */
30
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
63
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM 1
66 #endif
67
68 static struct sock *mroute_socket;
69
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76
77 /*
78  *      Multicast router control variables
79  */
80
81 static struct vif_device vif_table[MAXVIFS];            /* Devices              */
82 static int maxvif;
83
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85
86 int mroute_do_assert;                                   /* Set in PIM assert    */
87 int mroute_do_pim;
88
89 static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
90
91 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
92 atomic_t cache_resolve_queue_len;                       /* Size of unresolved   */
93
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96
97 /* We return to original Alan's scheme. Hash table of resolved
98    entries is changed only in process context and protected
99    with weak lock mrt_lock. Queue of unresolved entries is protected
100    with strong spinlock mfc_unres_lock.
101
102    In this case data path is free of exclusive locks at all.
103  */
104
105 kmem_cache_t *mrt_cachep;
106
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110
111 extern struct inet_protocol pim_protocol;
112
113 static struct timer_list ipmr_expire_timer;
114
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120         struct net_device  *dev;
121
122         dev = __dev_get_by_name("tunl0");
123
124         if (dev) {
125                 int err;
126                 struct ifreq ifr;
127                 mm_segment_t    oldfs;
128                 struct ip_tunnel_parm p;
129                 struct in_device  *in_dev;
130
131                 memset(&p, 0, sizeof(p));
132                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134                 p.iph.version = 4;
135                 p.iph.ihl = 5;
136                 p.iph.protocol = IPPROTO_IPIP;
137                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138                 ifr.ifr_ifru.ifru_data = (void*)&p;
139
140                 oldfs = get_fs(); set_fs(KERNEL_DS);
141                 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142                 set_fs(oldfs);
143
144                 dev = NULL;
145
146                 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147                         dev->flags |= IFF_MULTICAST;
148
149                         in_dev = __in_dev_get(dev);
150                         if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151                                 goto failure;
152                         in_dev->cnf.rp_filter = 0;
153
154                         if (dev_open(dev))
155                                 goto failure;
156                 }
157         }
158         return dev;
159
160 failure:
161         unregister_netdevice(dev);
162         return NULL;
163 }
164
165 #ifdef CONFIG_IP_PIMSM
166
167 static int reg_vif_num = -1;
168
169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171         read_lock(&mrt_lock);
172         ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173         ((struct net_device_stats*)dev->priv)->tx_packets++;
174         ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175         read_unlock(&mrt_lock);
176         kfree_skb(skb);
177         return 0;
178 }
179
180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182         return (struct net_device_stats*)dev->priv;
183 }
184
185 static
186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188         struct net_device  *dev;
189         struct in_device *in_dev;
190         int size;
191
192         size = sizeof(*dev) + sizeof(struct net_device_stats);
193         dev = kmalloc(size, GFP_KERNEL);
194         if (!dev)
195                 return NULL;
196
197         memset(dev, 0, size);
198
199         dev->priv = dev + 1;
200
201         strcpy(dev->name, "pimreg");
202
203         dev->type               = ARPHRD_PIMREG;
204         dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
205         dev->flags              = IFF_NOARP;
206         dev->hard_start_xmit    = reg_vif_xmit;
207         dev->get_stats          = reg_vif_get_stats;
208         dev->features           |= NETIF_F_DYNALLOC;
209
210         if (register_netdevice(dev)) {
211                 kfree(dev);
212                 return NULL;
213         }
214         dev->iflink = 0;
215
216         if ((in_dev = inetdev_init(dev)) == NULL)
217                 goto failure;
218
219         in_dev->cnf.rp_filter = 0;
220
221         if (dev_open(dev))
222                 goto failure;
223
224         return dev;
225
226 failure:
227         unregister_netdevice(dev);
228         return NULL;
229 }
230 #endif
231
232 /*
233  *      Delete a VIF entry
234  */
235  
236 static int vif_delete(int vifi)
237 {
238         struct vif_device *v;
239         struct net_device *dev;
240         struct in_device *in_dev;
241
242         if (vifi < 0 || vifi >= maxvif)
243                 return -EADDRNOTAVAIL;
244
245         v = &vif_table[vifi];
246
247         write_lock_bh(&mrt_lock);
248         dev = v->dev;
249         v->dev = NULL;
250
251         if (!dev) {
252                 write_unlock_bh(&mrt_lock);
253                 return -EADDRNOTAVAIL;
254         }
255
256 #ifdef CONFIG_IP_PIMSM
257         if (vifi == reg_vif_num)
258                 reg_vif_num = -1;
259 #endif
260
261         if (vifi+1 == maxvif) {
262                 int tmp;
263                 for (tmp=vifi-1; tmp>=0; tmp--) {
264                         if (VIF_EXISTS(tmp))
265                                 break;
266                 }
267                 maxvif = tmp+1;
268         }
269
270         write_unlock_bh(&mrt_lock);
271
272         dev_set_allmulti(dev, -1);
273
274         if ((in_dev = __in_dev_get(dev)) != NULL) {
275                 in_dev->cnf.mc_forwarding--;
276                 ip_rt_multicast_event(in_dev);
277         }
278
279         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280                 unregister_netdevice(dev);
281
282         dev_put(dev);
283         return 0;
284 }
285
286 /* Destroy an unresolved cache entry, killing queued skbs
287    and reporting error to netlink readers.
288  */
289
290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292         struct sk_buff *skb;
293
294         atomic_dec(&cache_resolve_queue_len);
295
296         while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297                 if (skb->nh.iph->version == 0) {
298                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
299                         nlh->nlmsg_type = NLMSG_ERROR;
300                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
301                         skb_trim(skb, nlh->nlmsg_len);
302                         ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
303                         netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
304                 } else
305                         kfree_skb(skb);
306         }
307
308         kmem_cache_free(mrt_cachep, c);
309 }
310
311
312 /* Single timer process for all the unresolved queue. */
313
314 void ipmr_expire_process(unsigned long dummy)
315 {
316         unsigned long now;
317         unsigned long expires;
318         struct mfc_cache *c, **cp;
319
320         if (!spin_trylock(&mfc_unres_lock)) {
321                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
322                 return;
323         }
324
325         if (atomic_read(&cache_resolve_queue_len) == 0)
326                 goto out;
327
328         now = jiffies;
329         expires = 10*HZ;
330         cp = &mfc_unres_queue;
331
332         while ((c=*cp) != NULL) {
333                 long interval = c->mfc_un.unres.expires - now;
334
335                 if (interval > 0) {
336                         if (interval < expires)
337                                 expires = interval;
338                         cp = &c->next;
339                         continue;
340                 }
341
342                 *cp = c->next;
343
344                 ipmr_destroy_unres(c);
345         }
346
347         if (atomic_read(&cache_resolve_queue_len))
348                 mod_timer(&ipmr_expire_timer, jiffies + expires);
349
350 out:
351         spin_unlock(&mfc_unres_lock);
352 }
353
354 /* Fill oifs list. It is called under write locked mrt_lock. */
355
356 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
357 {
358         int vifi;
359
360         cache->mfc_un.res.minvif = MAXVIFS;
361         cache->mfc_un.res.maxvif = 0;
362         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
363
364         for (vifi=0; vifi<maxvif; vifi++) {
365                 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
366                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
367                         if (cache->mfc_un.res.minvif > vifi)
368                                 cache->mfc_un.res.minvif = vifi;
369                         if (cache->mfc_un.res.maxvif <= vifi)
370                                 cache->mfc_un.res.maxvif = vifi + 1;
371                 }
372         }
373 }
374
375 static int vif_add(struct vifctl *vifc, int mrtsock)
376 {
377         int vifi = vifc->vifc_vifi;
378         struct vif_device *v = &vif_table[vifi];
379         struct net_device *dev;
380         struct in_device *in_dev;
381
382         /* Is vif busy ? */
383         if (VIF_EXISTS(vifi))
384                 return -EADDRINUSE;
385
386         switch (vifc->vifc_flags) {
387 #ifdef CONFIG_IP_PIMSM
388         case VIFF_REGISTER:
389                 /*
390                  * Special Purpose VIF in PIM
391                  * All the packets will be sent to the daemon
392                  */
393                 if (reg_vif_num >= 0)
394                         return -EADDRINUSE;
395                 dev = ipmr_reg_vif(vifc);
396                 if (!dev)
397                         return -ENOBUFS;
398                 break;
399 #endif
400         case VIFF_TUNNEL:       
401                 dev = ipmr_new_tunnel(vifc);
402                 if (!dev)
403                         return -ENOBUFS;
404                 break;
405         case 0:
406                 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
407                 if (!dev)
408                         return -EADDRNOTAVAIL;
409                 __dev_put(dev);
410                 break;
411         default:
412                 return -EINVAL;
413         }
414
415         if ((in_dev = __in_dev_get(dev)) == NULL)
416                 return -EADDRNOTAVAIL;
417         in_dev->cnf.mc_forwarding++;
418         dev_set_allmulti(dev, +1);
419         ip_rt_multicast_event(in_dev);
420
421         /*
422          *      Fill in the VIF structures
423          */
424         v->rate_limit=vifc->vifc_rate_limit;
425         v->local=vifc->vifc_lcl_addr.s_addr;
426         v->remote=vifc->vifc_rmt_addr.s_addr;
427         v->flags=vifc->vifc_flags;
428         if (!mrtsock)
429                 v->flags |= VIFF_STATIC;
430         v->threshold=vifc->vifc_threshold;
431         v->bytes_in = 0;
432         v->bytes_out = 0;
433         v->pkt_in = 0;
434         v->pkt_out = 0;
435         v->link = dev->ifindex;
436         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
437                 v->link = dev->iflink;
438
439         /* And finish update writing critical data */
440         write_lock_bh(&mrt_lock);
441         dev_hold(dev);
442         v->dev=dev;
443 #ifdef CONFIG_IP_PIMSM
444         if (v->flags&VIFF_REGISTER)
445                 reg_vif_num = vifi;
446 #endif
447         if (vifi+1 > maxvif)
448                 maxvif = vifi+1;
449         write_unlock_bh(&mrt_lock);
450         return 0;
451 }
452
453 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
454 {
455         int line=MFC_HASH(mcastgrp,origin);
456         struct mfc_cache *c;
457
458         for (c=mfc_cache_array[line]; c; c = c->next) {
459                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
460                         break;
461         }
462         return c;
463 }
464
465 /*
466  *      Allocate a multicast cache entry
467  */
468 static struct mfc_cache *ipmr_cache_alloc(void)
469 {
470         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
471         if(c==NULL)
472                 return NULL;
473         memset(c, 0, sizeof(*c));
474         c->mfc_un.res.minvif = MAXVIFS;
475         return c;
476 }
477
478 static struct mfc_cache *ipmr_cache_alloc_unres(void)
479 {
480         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
481         if(c==NULL)
482                 return NULL;
483         memset(c, 0, sizeof(*c));
484         skb_queue_head_init(&c->mfc_un.unres.unresolved);
485         c->mfc_un.unres.expires = jiffies + 10*HZ;
486         return c;
487 }
488
489 /*
490  *      A cache entry has gone into a resolved state from queued
491  */
492  
493 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
494 {
495         struct sk_buff *skb;
496
497         /*
498          *      Play the pending entries through our router
499          */
500
501         while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
502                 if (skb->nh.iph->version == 0) {
503                         int err;
504                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
505
506                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
507                                 nlh->nlmsg_len = skb->tail - (u8*)nlh;
508                         } else {
509                                 nlh->nlmsg_type = NLMSG_ERROR;
510                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
511                                 skb_trim(skb, nlh->nlmsg_len);
512                                 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
513                         }
514                         err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
515                 } else
516                         ip_mr_forward(skb, c, 0);
517         }
518 }
519
520 /*
521  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
522  *      expects the following bizarre scheme.
523  *
524  *      Called under mrt_lock.
525  */
526  
527 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
528 {
529         struct sk_buff *skb;
530         int ihl = pkt->nh.iph->ihl<<2;
531         struct igmphdr *igmp;
532         struct igmpmsg *msg;
533         int ret;
534
535 #ifdef CONFIG_IP_PIMSM
536         if (assert == IGMPMSG_WHOLEPKT)
537                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
538         else
539 #endif
540                 skb = alloc_skb(128, GFP_ATOMIC);
541
542         if(!skb)
543                 return -ENOBUFS;
544
545 #ifdef CONFIG_IP_PIMSM
546         if (assert == IGMPMSG_WHOLEPKT) {
547                 /* Ugly, but we have no choice with this interface.
548                    Duplicate old header, fix ihl, length etc.
549                    And all this only to mangle msg->im_msgtype and
550                    to set msg->im_mbz to "mbz" :-)
551                  */
552                 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
553                 skb->nh.raw = skb->h.raw = (u8*)msg;
554                 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
555                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
556                 msg->im_mbz = 0;
557                 msg->im_vif = reg_vif_num;
558                 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
559                 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
560         } else 
561 #endif
562         {       
563                 
564         /*
565          *      Copy the IP header
566          */
567
568         skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
569         memcpy(skb->data,pkt->data,ihl);
570         skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
571         msg = (struct igmpmsg*)skb->nh.iph;
572         msg->im_vif = vifi;
573         skb->dst = dst_clone(pkt->dst);
574
575         /*
576          *      Add our header
577          */
578
579         igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
580         igmp->type      =
581         msg->im_msgtype = assert;
582         igmp->code      =       0;
583         skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
584         skb->h.raw = skb->nh.raw;
585         }
586
587         if (mroute_socket == NULL) {
588                 kfree_skb(skb);
589                 return -EINVAL;
590         }
591
592         /*
593          *      Deliver to mrouted
594          */
595         if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
596                 if (net_ratelimit())
597                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
598                 kfree_skb(skb);
599         }
600
601         return ret;
602 }
603
604 /*
605  *      Queue a packet for resolution. It gets locked cache entry!
606  */
607  
608 static int
609 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
610 {
611         int err;
612         struct mfc_cache *c;
613
614         spin_lock_bh(&mfc_unres_lock);
615         for (c=mfc_unres_queue; c; c=c->next) {
616                 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
617                     c->mfc_origin == skb->nh.iph->saddr)
618                         break;
619         }
620
621         if (c == NULL) {
622                 /*
623                  *      Create a new entry if allowable
624                  */
625
626                 if (atomic_read(&cache_resolve_queue_len)>=10 ||
627                     (c=ipmr_cache_alloc_unres())==NULL) {
628                         spin_unlock_bh(&mfc_unres_lock);
629
630                         kfree_skb(skb);
631                         return -ENOBUFS;
632                 }
633
634                 /*
635                  *      Fill in the new cache entry
636                  */
637                 c->mfc_parent=-1;
638                 c->mfc_origin=skb->nh.iph->saddr;
639                 c->mfc_mcastgrp=skb->nh.iph->daddr;
640
641                 /*
642                  *      Reflect first query at mrouted.
643                  */
644                 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
645                         /* If the report failed throw the cache entry 
646                            out - Brad Parker
647                          */
648                         spin_unlock_bh(&mfc_unres_lock);
649
650                         kmem_cache_free(mrt_cachep, c);
651                         kfree_skb(skb);
652                         return err;
653                 }
654
655                 atomic_inc(&cache_resolve_queue_len);
656                 c->next = mfc_unres_queue;
657                 mfc_unres_queue = c;
658
659                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
660         }
661
662         /*
663          *      See if we can append the packet
664          */
665         if (c->mfc_un.unres.unresolved.qlen>3) {
666                 kfree_skb(skb);
667                 err = -ENOBUFS;
668         } else {
669                 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
670                 err = 0;
671         }
672
673         spin_unlock_bh(&mfc_unres_lock);
674         return err;
675 }
676
677 /*
678  *      MFC cache manipulation by user space mroute daemon
679  */
680
681 int ipmr_mfc_delete(struct mfcctl *mfc)
682 {
683         int line;
684         struct mfc_cache *c, **cp;
685
686         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
687
688         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
689                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
690                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
691                         write_lock_bh(&mrt_lock);
692                         *cp = c->next;
693                         write_unlock_bh(&mrt_lock);
694
695                         kmem_cache_free(mrt_cachep, c);
696                         return 0;
697                 }
698         }
699         return -ENOENT;
700 }
701
702 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
703 {
704         int line;
705         struct mfc_cache *uc, *c, **cp;
706
707         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
708
709         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
710                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
711                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
712                         break;
713         }
714
715         if (c != NULL) {
716                 write_lock_bh(&mrt_lock);
717                 c->mfc_parent = mfc->mfcc_parent;
718                 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
719                 if (!mrtsock)
720                         c->mfc_flags |= MFC_STATIC;
721                 write_unlock_bh(&mrt_lock);
722                 return 0;
723         }
724
725         if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
726                 return -EINVAL;
727
728         c=ipmr_cache_alloc();
729         if (c==NULL)
730                 return -ENOMEM;
731
732         c->mfc_origin=mfc->mfcc_origin.s_addr;
733         c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
734         c->mfc_parent=mfc->mfcc_parent;
735         ipmr_update_threshoulds(c, mfc->mfcc_ttls);
736         if (!mrtsock)
737                 c->mfc_flags |= MFC_STATIC;
738
739         write_lock_bh(&mrt_lock);
740         c->next = mfc_cache_array[line];
741         mfc_cache_array[line] = c;
742         write_unlock_bh(&mrt_lock);
743
744         /*
745          *      Check to see if we resolved a queued list. If so we
746          *      need to send on the frames and tidy up.
747          */
748         spin_lock_bh(&mfc_unres_lock);
749         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
750              cp = &uc->next) {
751                 if (uc->mfc_origin == c->mfc_origin &&
752                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
753                         *cp = uc->next;
754                         if (atomic_dec_and_test(&cache_resolve_queue_len))
755                                 del_timer(&ipmr_expire_timer);
756                         break;
757                 }
758         }
759         spin_unlock_bh(&mfc_unres_lock);
760
761         if (uc) {
762                 ipmr_cache_resolve(uc, c);
763                 kmem_cache_free(mrt_cachep, uc);
764         }
765         return 0;
766 }
767
768 /*
769  *      Close the multicast socket, and clear the vif tables etc
770  */
771  
772 static void mroute_clean_tables(struct sock *sk)
773 {
774         int i;
775                 
776         /*
777          *      Shut down all active vif entries
778          */
779         for(i=0; i<maxvif; i++) {
780                 if (!(vif_table[i].flags&VIFF_STATIC))
781                         vif_delete(i);
782         }
783
784         /*
785          *      Wipe the cache
786          */
787         for (i=0;i<MFC_LINES;i++) {
788                 struct mfc_cache *c, **cp;
789
790                 cp = &mfc_cache_array[i];
791                 while ((c = *cp) != NULL) {
792                         if (c->mfc_flags&MFC_STATIC) {
793                                 cp = &c->next;
794                                 continue;
795                         }
796                         write_lock_bh(&mrt_lock);
797                         *cp = c->next;
798                         write_unlock_bh(&mrt_lock);
799
800                         kmem_cache_free(mrt_cachep, c);
801                 }
802         }
803
804         if (atomic_read(&cache_resolve_queue_len) != 0) {
805                 struct mfc_cache *c;
806
807                 spin_lock_bh(&mfc_unres_lock);
808                 while (mfc_unres_queue != NULL) {
809                         c = mfc_unres_queue;
810                         mfc_unres_queue = c->next;
811                         spin_unlock_bh(&mfc_unres_lock);
812
813                         ipmr_destroy_unres(c);
814
815                         spin_lock_bh(&mfc_unres_lock);
816                 }
817                 spin_unlock_bh(&mfc_unres_lock);
818         }
819 }
820
821 static void mrtsock_destruct(struct sock *sk)
822 {
823         rtnl_lock();
824         if (sk == mroute_socket) {
825                 ipv4_devconf.mc_forwarding--;
826
827                 write_lock_bh(&mrt_lock);
828                 mroute_socket=NULL;
829                 write_unlock_bh(&mrt_lock);
830
831                 mroute_clean_tables(sk);
832         }
833         rtnl_unlock();
834 }
835
836 /*
837  *      Socket options and virtual interface manipulation. The whole
838  *      virtual interface system is a complete heap, but unfortunately
839  *      that's how BSD mrouted happens to think. Maybe one day with a proper
840  *      MOSPF/PIM router set up we can clean this up.
841  */
842  
843 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
844 {
845         int ret;
846         struct vifctl vif;
847         struct mfcctl mfc;
848         
849         if(optname!=MRT_INIT)
850         {
851                 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
852                         return -EACCES;
853         }
854
855         switch(optname)
856         {
857                 case MRT_INIT:
858                         if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
859                                 return -EOPNOTSUPP;
860                         if(optlen!=sizeof(int))
861                                 return -ENOPROTOOPT;
862
863                         rtnl_lock();
864                         if (mroute_socket) {
865                                 rtnl_unlock();
866                                 return -EADDRINUSE;
867                         }
868
869                         ret = ip_ra_control(sk, 1, mrtsock_destruct);
870                         if (ret == 0) {
871                                 write_lock_bh(&mrt_lock);
872                                 mroute_socket=sk;
873                                 write_unlock_bh(&mrt_lock);
874
875                                 ipv4_devconf.mc_forwarding++;
876                         }
877                         rtnl_unlock();
878                         return ret;
879                 case MRT_DONE:
880                         if (sk!=mroute_socket)
881                                 return -EACCES;
882                         return ip_ra_control(sk, 0, NULL);
883                 case MRT_ADD_VIF:
884                 case MRT_DEL_VIF:
885                         if(optlen!=sizeof(vif))
886                                 return -EINVAL;
887                         if (copy_from_user(&vif,optval,sizeof(vif)))
888                                 return -EFAULT; 
889                         if(vif.vifc_vifi >= MAXVIFS)
890                                 return -ENFILE;
891                         rtnl_lock();
892                         if (optname==MRT_ADD_VIF) {
893                                 ret = vif_add(&vif, sk==mroute_socket);
894                         } else {
895                                 ret = vif_delete(vif.vifc_vifi);
896                         }
897                         rtnl_unlock();
898                         return ret;
899
900                 /*
901                  *      Manipulate the forwarding caches. These live
902                  *      in a sort of kernel/user symbiosis.
903                  */
904                 case MRT_ADD_MFC:
905                 case MRT_DEL_MFC:
906                         if(optlen!=sizeof(mfc))
907                                 return -EINVAL;
908                         if (copy_from_user(&mfc,optval, sizeof(mfc)))
909                                 return -EFAULT;
910                         rtnl_lock();
911                         if (optname==MRT_DEL_MFC)
912                                 ret = ipmr_mfc_delete(&mfc);
913                         else
914                                 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
915                         rtnl_unlock();
916                         return ret;
917                 /*
918                  *      Control PIM assert.
919                  */
920                 case MRT_ASSERT:
921                 {
922                         int v;
923                         if(get_user(v,(int *)optval))
924                                 return -EFAULT;
925                         mroute_do_assert=(v)?1:0;
926                         return 0;
927                 }
928 #ifdef CONFIG_IP_PIMSM
929                 case MRT_PIM:
930                 {
931                         int v;
932                         if(get_user(v,(int *)optval))
933                                 return -EFAULT;
934                         v = (v)?1:0;
935                         rtnl_lock();
936                         if (v != mroute_do_pim) {
937                                 mroute_do_pim = v;
938                                 mroute_do_assert = v;
939 #ifdef CONFIG_IP_PIMSM_V2
940                                 if (mroute_do_pim)
941                                         inet_add_protocol(&pim_protocol);
942                                 else
943                                         inet_del_protocol(&pim_protocol);
944 #endif
945                         }
946                         rtnl_unlock();
947                         return 0;
948                 }
949 #endif
950                 /*
951                  *      Spurious command, or MRT_VERSION which you cannot
952                  *      set.
953                  */
954                 default:
955                         return -ENOPROTOOPT;
956         }
957 }
958
959 /*
960  *      Getsock opt support for the multicast routing system.
961  */
962  
963 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
964 {
965         int olr;
966         int val;
967
968         if(optname!=MRT_VERSION && 
969 #ifdef CONFIG_IP_PIMSM
970            optname!=MRT_PIM &&
971 #endif
972            optname!=MRT_ASSERT)
973                 return -ENOPROTOOPT;
974
975         if (get_user(olr, optlen))
976                 return -EFAULT;
977
978         olr = min_t(unsigned int, olr, sizeof(int));
979         if (olr < 0)
980                 return -EINVAL;
981                 
982         if(put_user(olr,optlen))
983                 return -EFAULT;
984         if(optname==MRT_VERSION)
985                 val=0x0305;
986 #ifdef CONFIG_IP_PIMSM
987         else if(optname==MRT_PIM)
988                 val=mroute_do_pim;
989 #endif
990         else
991                 val=mroute_do_assert;
992         if(copy_to_user(optval,&val,olr))
993                 return -EFAULT;
994         return 0;
995 }
996
997 /*
998  *      The IP multicast ioctl support routines.
999  */
1000  
1001 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1002 {
1003         struct sioc_sg_req sr;
1004         struct sioc_vif_req vr;
1005         struct vif_device *vif;
1006         struct mfc_cache *c;
1007         
1008         switch(cmd)
1009         {
1010                 case SIOCGETVIFCNT:
1011                         if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1012                                 return -EFAULT; 
1013                         if(vr.vifi>=maxvif)
1014                                 return -EINVAL;
1015                         read_lock(&mrt_lock);
1016                         vif=&vif_table[vr.vifi];
1017                         if(VIF_EXISTS(vr.vifi)) {
1018                                 vr.icount=vif->pkt_in;
1019                                 vr.ocount=vif->pkt_out;
1020                                 vr.ibytes=vif->bytes_in;
1021                                 vr.obytes=vif->bytes_out;
1022                                 read_unlock(&mrt_lock);
1023
1024                                 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1025                                         return -EFAULT;
1026                                 return 0;
1027                         }
1028                         read_unlock(&mrt_lock);
1029                         return -EADDRNOTAVAIL;
1030                 case SIOCGETSGCNT:
1031                         if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1032                                 return -EFAULT;
1033
1034                         read_lock(&mrt_lock);
1035                         c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1036                         if (c) {
1037                                 sr.pktcnt = c->mfc_un.res.pkt;
1038                                 sr.bytecnt = c->mfc_un.res.bytes;
1039                                 sr.wrong_if = c->mfc_un.res.wrong_if;
1040                                 read_unlock(&mrt_lock);
1041
1042                                 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1043                                         return -EFAULT;
1044                                 return 0;
1045                         }
1046                         read_unlock(&mrt_lock);
1047                         return -EADDRNOTAVAIL;
1048                 default:
1049                         return -ENOIOCTLCMD;
1050         }
1051 }
1052
1053
1054 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1055 {
1056         struct vif_device *v;
1057         int ct;
1058         if (event != NETDEV_UNREGISTER)
1059                 return NOTIFY_DONE;
1060         v=&vif_table[0];
1061         for(ct=0;ct<maxvif;ct++,v++) {
1062                 if (v->dev==ptr)
1063                         vif_delete(ct);
1064         }
1065         return NOTIFY_DONE;
1066 }
1067
1068
1069 static struct notifier_block ip_mr_notifier={
1070         ipmr_device_event,
1071         NULL,
1072         0
1073 };
1074
1075 /*
1076  *      Encapsulate a packet by attaching a valid IPIP header to it.
1077  *      This avoids tunnel drivers and other mess and gives us the speed so
1078  *      important for multicast video.
1079  */
1080  
1081 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1082 {
1083         struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1084
1085         iph->version    =       4;
1086         iph->tos        =       skb->nh.iph->tos;
1087         iph->ttl        =       skb->nh.iph->ttl;
1088         iph->frag_off   =       0;
1089         iph->daddr      =       daddr;
1090         iph->saddr      =       saddr;
1091         iph->protocol   =       IPPROTO_IPIP;
1092         iph->ihl        =       5;
1093         iph->tot_len    =       htons(skb->len);
1094         ip_select_ident(iph, skb->dst, NULL);
1095         ip_send_check(iph);
1096
1097         skb->h.ipiph = skb->nh.iph;
1098         skb->nh.iph = iph;
1099         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1100         nf_reset(skb);
1101 }
1102
1103 static inline int ipmr_forward_finish(struct sk_buff *skb)
1104 {
1105         struct ip_options *opt = &(IPCB(skb)->opt);
1106         struct dst_entry *dst = skb->dst;
1107
1108         if (unlikely(opt->optlen))
1109                 ip_forward_options(skb);
1110
1111         if (skb->len <= dst->pmtu)
1112                 return dst->output(skb);
1113         else
1114                 return ip_fragment(skb, dst->output);
1115 }
1116
1117 /*
1118  *      Processing handlers for ipmr_forward
1119  */
1120
1121 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1122                            int vifi, int last)
1123 {
1124         struct iphdr *iph = skb->nh.iph;
1125         struct vif_device *vif = &vif_table[vifi];
1126         struct net_device *dev;
1127         struct rtable *rt;
1128         int    encap = 0;
1129         struct sk_buff *skb2;
1130
1131         if (vif->dev == NULL)
1132                 return;
1133
1134 #ifdef CONFIG_IP_PIMSM
1135         if (vif->flags & VIFF_REGISTER) {
1136                 vif->pkt_out++;
1137                 vif->bytes_out+=skb->len;
1138                 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1139                 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1140                 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1141                 return;
1142         }
1143 #endif
1144
1145         if (vif->flags&VIFF_TUNNEL) {
1146                 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1147                         return;
1148                 encap = sizeof(struct iphdr);
1149         } else {
1150                 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1151                         return;
1152         }
1153
1154         dev = rt->u.dst.dev;
1155
1156         if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1157                 /* Do not fragment multicasts. Alas, IPv4 does not
1158                    allow to send ICMP, so that packets will disappear
1159                    to blackhole.
1160                  */
1161
1162                 IP_INC_STATS_BH(IpFragFails);
1163                 ip_rt_put(rt);
1164                 return;
1165         }
1166
1167         encap += dev->hard_header_len;
1168
1169         if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1170                 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1171         else if (atomic_read(&skb->users) != 1)
1172                 skb2 = skb_clone(skb, GFP_ATOMIC);
1173         else {
1174                 atomic_inc(&skb->users);
1175                 skb2 = skb;
1176         }
1177
1178         if (skb2 == NULL) {
1179                 ip_rt_put(rt);
1180                 return;
1181         }
1182
1183         vif->pkt_out++;
1184         vif->bytes_out+=skb->len;
1185
1186         dst_release(skb2->dst);
1187         skb2->dst = &rt->u.dst;
1188         iph = skb2->nh.iph;
1189         ip_decrease_ttl(iph);
1190
1191         /* FIXME: forward and output firewalls used to be called here.
1192          * What do we do with netfilter? -- RR */
1193         if (vif->flags & VIFF_TUNNEL) {
1194                 ip_encap(skb2, vif->local, vif->remote);
1195                 /* FIXME: extra output firewall step used to be here. --RR */
1196                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1197                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1198         }
1199
1200         IPCB(skb2)->flags |= IPSKB_FORWARDED;
1201
1202         /*
1203          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1204          * not only before forwarding, but after forwarding on all output
1205          * interfaces. It is clear, if mrouter runs a multicasting
1206          * program, it should receive packets not depending to what interface
1207          * program is joined.
1208          * If we will not make it, the program will have to join on all
1209          * interfaces. On the other hand, multihoming host (or router, but
1210          * not mrouter) cannot join to more than one interface - it will
1211          * result in receiving multiple packets.
1212          */
1213         NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev, 
1214                 ipmr_forward_finish);
1215 }
1216
1217 int ipmr_find_vif(struct net_device *dev)
1218 {
1219         int ct;
1220         for (ct=maxvif-1; ct>=0; ct--) {
1221                 if (vif_table[ct].dev == dev)
1222                         break;
1223         }
1224         return ct;
1225 }
1226
1227 /* "local" means that we should preserve one skb (for local delivery) */
1228
1229 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1230 {
1231         int psend = -1;
1232         int vif, ct;
1233
1234         vif = cache->mfc_parent;
1235         cache->mfc_un.res.pkt++;
1236         cache->mfc_un.res.bytes += skb->len;
1237
1238         /*
1239          * Wrong interface: drop packet and (maybe) send PIM assert.
1240          */
1241         if (vif_table[vif].dev != skb->dev) {
1242                 int true_vifi;
1243
1244                 if (((struct rtable*)skb->dst)->key.iif == 0) {
1245                         /* It is our own packet, looped back.
1246                            Very complicated situation...
1247
1248                            The best workaround until routing daemons will be
1249                            fixed is not to redistribute packet, if it was
1250                            send through wrong interface. It means, that
1251                            multicast applications WILL NOT work for
1252                            (S,G), which have default multicast route pointing
1253                            to wrong oif. In any case, it is not a good
1254                            idea to use multicasting applications on router.
1255                          */
1256                         goto dont_forward;
1257                 }
1258
1259                 cache->mfc_un.res.wrong_if++;
1260                 true_vifi = ipmr_find_vif(skb->dev);
1261
1262                 if (true_vifi >= 0 && mroute_do_assert &&
1263                     /* pimsm uses asserts, when switching from RPT to SPT,
1264                        so that we cannot check that packet arrived on an oif.
1265                        It is bad, but otherwise we would need to move pretty
1266                        large chunk of pimd to kernel. Ough... --ANK
1267                      */
1268                     (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1269                     jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1270                         cache->mfc_un.res.last_assert = jiffies;
1271                         ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1272                 }
1273                 goto dont_forward;
1274         }
1275
1276         vif_table[vif].pkt_in++;
1277         vif_table[vif].bytes_in+=skb->len;
1278
1279         /*
1280          *      Forward the frame
1281          */
1282         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1283                 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1284                         if (psend != -1)
1285                                 ipmr_queue_xmit(skb, cache, psend, 0);
1286                         psend=ct;
1287                 }
1288         }
1289         if (psend != -1)
1290                 ipmr_queue_xmit(skb, cache, psend, !local);
1291
1292 dont_forward:
1293         if (!local)
1294                 kfree_skb(skb);
1295         return 0;
1296 }
1297
1298
1299 /*
1300  *      Multicast packets for forwarding arrive here
1301  */
1302
1303 int ip_mr_input(struct sk_buff *skb)
1304 {
1305         struct mfc_cache *cache;
1306         int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1307
1308         /* Packet is looped back after forward, it should not be
1309            forwarded second time, but still can be delivered locally.
1310          */
1311         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1312                 goto dont_forward;
1313
1314         if (!local) {
1315                     if (IPCB(skb)->opt.router_alert) {
1316                             if (ip_call_ra_chain(skb))
1317                                     return 0;
1318                     } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1319                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1320                                Cisco IOS <= 11.2(8)) do not put router alert
1321                                option to IGMP packets destined to routable
1322                                groups. It is very bad, because it means
1323                                that we can forward NO IGMP messages.
1324                              */
1325                             read_lock(&mrt_lock);
1326                             if (mroute_socket) {
1327                                     raw_rcv(mroute_socket, skb);
1328                                     read_unlock(&mrt_lock);
1329                                     return 0;
1330                             }
1331                             read_unlock(&mrt_lock);
1332                     }
1333         }
1334
1335         read_lock(&mrt_lock);
1336         cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1337
1338         /*
1339          *      No usable cache entry
1340          */
1341         if (cache==NULL) {
1342                 int vif;
1343
1344                 if (local) {
1345                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1346                         ip_local_deliver(skb);
1347                         if (skb2 == NULL) {
1348                                 read_unlock(&mrt_lock);
1349                                 return -ENOBUFS;
1350                         }
1351                         skb = skb2;
1352                 }
1353
1354                 vif = ipmr_find_vif(skb->dev);
1355                 if (vif >= 0) {
1356                         int err = ipmr_cache_unresolved(vif, skb);
1357                         read_unlock(&mrt_lock);
1358
1359                         return err;
1360                 }
1361                 read_unlock(&mrt_lock);
1362                 kfree_skb(skb);
1363                 return -ENODEV;
1364         }
1365
1366         ip_mr_forward(skb, cache, local);
1367
1368         read_unlock(&mrt_lock);
1369
1370         if (local)
1371                 return ip_local_deliver(skb);
1372
1373         return 0;
1374
1375 dont_forward:
1376         if (local)
1377                 return ip_local_deliver(skb);
1378         kfree_skb(skb);
1379         return 0;
1380 }
1381
1382 #ifdef CONFIG_IP_PIMSM_V1
1383 /*
1384  * Handle IGMP messages of PIMv1
1385  */
1386
1387 int pim_rcv_v1(struct sk_buff * skb)
1388 {
1389         struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1390         struct iphdr   *encap;
1391         struct net_device  *reg_dev = NULL;
1392
1393         if (skb_is_nonlinear(skb)) {
1394                 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1395                         kfree_skb(skb);
1396                         return -ENOMEM;
1397                 }
1398                 pim = (struct igmphdr*)skb->h.raw;
1399         }
1400
1401         if (!mroute_do_pim ||
1402             skb->len < sizeof(*pim) + sizeof(*encap) ||
1403             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1404                 kfree_skb(skb);
1405                 return -EINVAL;
1406         }
1407
1408         encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1409         /*
1410            Check that:
1411            a. packet is really destinted to a multicast group
1412            b. packet is not a NULL-REGISTER
1413            c. packet is not truncated
1414          */
1415         if (!MULTICAST(encap->daddr) ||
1416             ntohs(encap->tot_len) == 0 ||
1417             ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1418                 kfree_skb(skb);
1419                 return -EINVAL;
1420         }
1421
1422         read_lock(&mrt_lock);
1423         if (reg_vif_num >= 0)
1424                 reg_dev = vif_table[reg_vif_num].dev;
1425         if (reg_dev)
1426                 dev_hold(reg_dev);
1427         read_unlock(&mrt_lock);
1428
1429         if (reg_dev == NULL) {
1430                 kfree_skb(skb);
1431                 return -EINVAL;
1432         }
1433
1434         skb->mac.raw = skb->nh.raw;
1435         skb_pull(skb, (u8*)encap - skb->data);
1436         skb->nh.iph = (struct iphdr *)skb->data;
1437         skb->dev = reg_dev;
1438         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1439         skb->protocol = htons(ETH_P_IP);
1440         skb->ip_summed = 0;
1441         skb->pkt_type = PACKET_HOST;
1442         dst_release(skb->dst);
1443         skb->dst = NULL;
1444         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1445         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1446         nf_reset(skb);
1447         netif_rx(skb);
1448         dev_put(reg_dev);
1449         return 0;
1450 }
1451 #endif
1452
1453 #ifdef CONFIG_IP_PIMSM_V2
1454 int pim_rcv(struct sk_buff * skb)
1455 {
1456         struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1457         struct iphdr   *encap;
1458         struct net_device  *reg_dev = NULL;
1459
1460         if (skb_is_nonlinear(skb)) {
1461                 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1462                         kfree_skb(skb);
1463                         return -ENOMEM;
1464                 }
1465                 pim = (struct pimreghdr*)skb->h.raw;
1466         }
1467
1468         if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1469             pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1470             (pim->flags&PIM_NULL_REGISTER) ||
1471             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1472              ip_compute_csum((void *)pim, skb->len))) {
1473                 kfree_skb(skb);
1474                 return -EINVAL;
1475         }
1476
1477         /* check if the inner packet is destined to mcast group */
1478         encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1479         if (!MULTICAST(encap->daddr) ||
1480             ntohs(encap->tot_len) == 0 ||
1481             ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1482                 kfree_skb(skb);
1483                 return -EINVAL;
1484         }
1485
1486         read_lock(&mrt_lock);
1487         if (reg_vif_num >= 0)
1488                 reg_dev = vif_table[reg_vif_num].dev;
1489         if (reg_dev)
1490                 dev_hold(reg_dev);
1491         read_unlock(&mrt_lock);
1492
1493         if (reg_dev == NULL) {
1494                 kfree_skb(skb);
1495                 return -EINVAL;
1496         }
1497
1498         skb->mac.raw = skb->nh.raw;
1499         skb_pull(skb, (u8*)encap - skb->data);
1500         skb->nh.iph = (struct iphdr *)skb->data;
1501         skb->dev = reg_dev;
1502         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1503         skb->protocol = htons(ETH_P_IP);
1504         skb->ip_summed = 0;
1505         skb->pkt_type = PACKET_HOST;
1506         dst_release(skb->dst);
1507         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1508         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1509         skb->dst = NULL;
1510         nf_reset(skb);
1511         netif_rx(skb);
1512         dev_put(reg_dev);
1513         return 0;
1514 }
1515 #endif
1516
1517 static int
1518 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1519 {
1520         int ct;
1521         struct rtnexthop *nhp;
1522         struct net_device *dev = vif_table[c->mfc_parent].dev;
1523         u8 *b = skb->tail;
1524         struct rtattr *mp_head;
1525
1526         if (dev)
1527                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1528
1529         mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1530
1531         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1532                 if (c->mfc_un.res.ttls[ct] < 255) {
1533                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1534                                 goto rtattr_failure;
1535                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1536                         nhp->rtnh_flags = 0;
1537                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1538                         nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1539                         nhp->rtnh_len = sizeof(*nhp);
1540                 }
1541         }
1542         mp_head->rta_type = RTA_MULTIPATH;
1543         mp_head->rta_len = skb->tail - (u8*)mp_head;
1544         rtm->rtm_type = RTN_MULTICAST;
1545         return 1;
1546
1547 rtattr_failure:
1548         skb_trim(skb, b - skb->data);
1549         return -EMSGSIZE;
1550 }
1551
1552 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1553 {
1554         int err;
1555         struct mfc_cache *cache;
1556         struct rtable *rt = (struct rtable*)skb->dst;
1557
1558         read_lock(&mrt_lock);
1559         cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1560
1561         if (cache==NULL) {
1562                 struct net_device *dev;
1563                 int vif;
1564
1565                 if (nowait) {
1566                         read_unlock(&mrt_lock);
1567                         return -EAGAIN;
1568                 }
1569
1570                 dev = skb->dev;
1571                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1572                         read_unlock(&mrt_lock);
1573                         return -ENODEV;
1574                 }
1575                 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1576                 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1577                 skb->nh.iph->saddr = rt->rt_src;
1578                 skb->nh.iph->daddr = rt->rt_dst;
1579                 skb->nh.iph->version = 0;
1580                 err = ipmr_cache_unresolved(vif, skb);
1581                 read_unlock(&mrt_lock);
1582                 return err;
1583         }
1584
1585         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1586                 cache->mfc_flags |= MFC_NOTIFY;
1587         err = ipmr_fill_mroute(skb, cache, rtm);
1588         read_unlock(&mrt_lock);
1589         return err;
1590 }
1591
1592 #ifdef CONFIG_PROC_FS   
1593 /*
1594  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1595  */
1596  
1597 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1598 {
1599         struct vif_device *vif;
1600         int len=0;
1601         off_t pos=0;
1602         off_t begin=0;
1603         int size;
1604         int ct;
1605
1606         len += sprintf(buffer,
1607                  "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1608         pos=len;
1609   
1610         read_lock(&mrt_lock);
1611         for (ct=0;ct<maxvif;ct++) 
1612         {
1613                 char *name = "none";
1614                 vif=&vif_table[ct];
1615                 if(!VIF_EXISTS(ct))
1616                         continue;
1617                 if (vif->dev)
1618                         name = vif->dev->name;
1619                 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1620                         ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1621                         vif->flags, vif->local, vif->remote);
1622                 len+=size;
1623                 pos+=size;
1624                 if(pos<offset)
1625                 {
1626                         len=0;
1627                         begin=pos;
1628                 }
1629                 if(pos>offset+length)
1630                         break;
1631         }
1632         read_unlock(&mrt_lock);
1633         
1634         *start=buffer+(offset-begin);
1635         len-=(offset-begin);
1636         if(len>length)
1637                 len=length;
1638         if (len<0)
1639                 len = 0;
1640         return len;
1641 }
1642
1643 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1644 {
1645         struct mfc_cache *mfc;
1646         int len=0;
1647         off_t pos=0;
1648         off_t begin=0;
1649         int size;
1650         int ct;
1651
1652         len += sprintf(buffer,
1653                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1654         pos=len;
1655
1656         read_lock(&mrt_lock);
1657         for (ct=0;ct<MFC_LINES;ct++) 
1658         {
1659                 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1660                 {
1661                         int n;
1662
1663                         /*
1664                          *      Interface forwarding map
1665                          */
1666                         size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1667                                 (unsigned long)mfc->mfc_mcastgrp,
1668                                 (unsigned long)mfc->mfc_origin,
1669                                 mfc->mfc_parent,
1670                                 mfc->mfc_un.res.pkt,
1671                                 mfc->mfc_un.res.bytes,
1672                                 mfc->mfc_un.res.wrong_if);
1673                         for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1674                         {
1675                                 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1676                                         size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1677                         }
1678                         size += sprintf(buffer+len+size, "\n");
1679                         len+=size;
1680                         pos+=size;
1681                         if(pos<offset)
1682                         {
1683                                 len=0;
1684                                 begin=pos;
1685                         }
1686                         if(pos>offset+length)
1687                                 goto done;
1688                 }
1689         }
1690
1691         spin_lock_bh(&mfc_unres_lock);
1692         for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1693                 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1694                                (unsigned long)mfc->mfc_mcastgrp,
1695                                (unsigned long)mfc->mfc_origin,
1696                                -1,
1697                                 (long)mfc->mfc_un.unres.unresolved.qlen,
1698                                 0L, 0L);
1699                 len+=size;
1700                 pos+=size;
1701                 if(pos<offset)
1702                 {
1703                         len=0;
1704                         begin=pos;
1705                 }
1706                 if(pos>offset+length)
1707                         break;
1708         }
1709         spin_unlock_bh(&mfc_unres_lock);
1710
1711 done:
1712         read_unlock(&mrt_lock);
1713         *start=buffer+(offset-begin);
1714         len-=(offset-begin);
1715         if(len>length)
1716                 len=length;
1717         if (len < 0) {
1718                 len = 0;
1719         }
1720         return len;
1721 }
1722
1723 #endif  
1724
1725 #ifdef CONFIG_IP_PIMSM_V2
1726 struct inet_protocol pim_protocol = 
1727 {
1728         pim_rcv,                /* PIM handler          */
1729         NULL,                   /* PIM error control    */
1730         NULL,                   /* next                 */
1731         IPPROTO_PIM,            /* protocol ID          */
1732         0,                      /* copy                 */
1733         NULL,                   /* data                 */
1734         "PIM"                   /* name                 */
1735 };
1736 #endif
1737
1738
1739 /*
1740  *      Setup for IP multicast routing
1741  */
1742  
1743 void __init ip_mr_init(void)
1744 {
1745         printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1746         mrt_cachep = kmem_cache_create("ip_mrt_cache",
1747                                        sizeof(struct mfc_cache),
1748                                        0, SLAB_HWCACHE_ALIGN,
1749                                        NULL, NULL);
1750         init_timer(&ipmr_expire_timer);
1751         ipmr_expire_timer.function=ipmr_expire_process;
1752         register_netdevice_notifier(&ip_mr_notifier);
1753 #ifdef CONFIG_PROC_FS   
1754         proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1755         proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1756 #endif  
1757 }