http://www.usr.com/support/gpl/USR9107_release1.1.tar.gz
[bcm963xx.git] / kernel / linux / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@redhat.com>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13  *
14  *      Fixes:
15  *      Michael Chastain        :       Incorrect size of copying.
16  *      Alan Cox                :       Added the cache manager code
17  *      Alan Cox                :       Fixed the clone/copy bug and device race.
18  *      Mike McLagan            :       Routing by source
19  *      Malcolm Beattie         :       Buffer handling fixes.
20  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
21  *      SVR Anand               :       Fixed several multicast bugs and problems.
22  *      Alexey Kuznetsov        :       Status, optimisations and more.
23  *      Brad Parker             :       Better behaviour on mrouted upcall
24  *                                      overflow.
25  *      Carlos Picoto           :       PIMv1 Support
26  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
27  *                                      Relax this requrement to work with older peers.
28  *
29  */
30
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/seq_file.h>
50 #include <linux/mroute.h>
51 #include <linux/init.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64
65 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
66 #define CONFIG_IP_PIMSM 1
67 #endif
68
69 static struct sock *mroute_socket;
70
71
72 /* Big lock, protecting vif table, mrt cache and mroute socket state.
73    Note that the changes are semaphored via rtnl_lock.
74  */
75
76 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
77
78 /*
79  *      Multicast router control variables
80  */
81
82 static struct vif_device vif_table[MAXVIFS];            /* Devices              */
83 static int maxvif;
84
85 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
86
87 static int mroute_do_assert;                            /* Set in PIM assert    */
88 static int mroute_do_pim;
89
90 static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
91
92 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
93 static atomic_t cache_resolve_queue_len;                /* Size of unresolved   */
94
95 /* Special spinlock for queue of unresolved entries */
96 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
97
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102
103    In this case data path is free of exclusive locks at all.
104  */
105
106 static kmem_cache_t *mrt_cachep;
107
108 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
109 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
110 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
111
112 #ifdef CONFIG_IP_PIMSM_V2
113 static struct net_protocol pim_protocol;
114 #endif
115
116 static struct timer_list ipmr_expire_timer;
117
118 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
119
120 static
121 struct net_device *ipmr_new_tunnel(struct vifctl *v)
122 {
123         struct net_device  *dev;
124
125         dev = __dev_get_by_name("tunl0");
126
127         if (dev) {
128                 int err;
129                 struct ifreq ifr;
130                 mm_segment_t    oldfs;
131                 struct ip_tunnel_parm p;
132                 struct in_device  *in_dev;
133
134                 memset(&p, 0, sizeof(p));
135                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
136                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
137                 p.iph.version = 4;
138                 p.iph.ihl = 5;
139                 p.iph.protocol = IPPROTO_IPIP;
140                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
141                 ifr.ifr_ifru.ifru_data = (void*)&p;
142
143                 oldfs = get_fs(); set_fs(KERNEL_DS);
144                 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
145                 set_fs(oldfs);
146
147                 dev = NULL;
148
149                 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
150                         dev->flags |= IFF_MULTICAST;
151
152                         in_dev = __in_dev_get(dev);
153                         if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
154                                 goto failure;
155                         in_dev->cnf.rp_filter = 0;
156
157                         if (dev_open(dev))
158                                 goto failure;
159                 }
160         }
161         return dev;
162
163 failure:
164         /* allow the register to be completed before unregistering. */
165         rtnl_unlock();
166         rtnl_lock();
167
168         unregister_netdevice(dev);
169         return NULL;
170 }
171
172 #ifdef CONFIG_IP_PIMSM
173
174 static int reg_vif_num = -1;
175
176 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
177 {
178         read_lock(&mrt_lock);
179         ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
180         ((struct net_device_stats*)dev->priv)->tx_packets++;
181         ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
182         read_unlock(&mrt_lock);
183         kfree_skb(skb);
184         return 0;
185 }
186
187 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
188 {
189         return (struct net_device_stats*)dev->priv;
190 }
191
192 static void reg_vif_setup(struct net_device *dev)
193 {
194         dev->type               = ARPHRD_PIMREG;
195         dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
196         dev->flags              = IFF_NOARP;
197         dev->hard_start_xmit    = reg_vif_xmit;
198         dev->get_stats          = reg_vif_get_stats;
199         dev->destructor         = free_netdev;
200 }
201
202 static struct net_device *ipmr_reg_vif(void)
203 {
204         struct net_device *dev;
205         struct in_device *in_dev;
206
207         dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
208                            reg_vif_setup);
209
210         if (dev == NULL)
211                 return NULL;
212
213         if (register_netdevice(dev)) {
214                 free_netdev(dev);
215                 return NULL;
216         }
217         dev->iflink = 0;
218
219         if ((in_dev = inetdev_init(dev)) == NULL)
220                 goto failure;
221
222         in_dev->cnf.rp_filter = 0;
223
224         if (dev_open(dev))
225                 goto failure;
226
227         return dev;
228
229 failure:
230         /* allow the register to be completed before unregistering. */
231         rtnl_unlock();
232         rtnl_lock();
233
234         unregister_netdevice(dev);
235         return NULL;
236 }
237 #endif
238
239 /*
240  *      Delete a VIF entry
241  */
242  
243 static int vif_delete(int vifi)
244 {
245         struct vif_device *v;
246         struct net_device *dev;
247         struct in_device *in_dev;
248
249         if (vifi < 0 || vifi >= maxvif)
250                 return -EADDRNOTAVAIL;
251
252         v = &vif_table[vifi];
253
254         write_lock_bh(&mrt_lock);
255         dev = v->dev;
256         v->dev = NULL;
257
258         if (!dev) {
259                 write_unlock_bh(&mrt_lock);
260                 return -EADDRNOTAVAIL;
261         }
262
263 #ifdef CONFIG_IP_PIMSM
264         if (vifi == reg_vif_num)
265                 reg_vif_num = -1;
266 #endif
267
268         if (vifi+1 == maxvif) {
269                 int tmp;
270                 for (tmp=vifi-1; tmp>=0; tmp--) {
271                         if (VIF_EXISTS(tmp))
272                                 break;
273                 }
274                 maxvif = tmp+1;
275         }
276
277         write_unlock_bh(&mrt_lock);
278
279         dev_set_allmulti(dev, -1);
280
281         if ((in_dev = __in_dev_get(dev)) != NULL) {
282                 in_dev->cnf.mc_forwarding--;
283                 ip_rt_multicast_event(in_dev);
284         }
285
286         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
287                 unregister_netdevice(dev);
288
289         dev_put(dev);
290         return 0;
291 }
292
293 /* Destroy an unresolved cache entry, killing queued skbs
294    and reporting error to netlink readers.
295  */
296
297 static void ipmr_destroy_unres(struct mfc_cache *c)
298 {
299         struct sk_buff *skb;
300
301         atomic_dec(&cache_resolve_queue_len);
302
303         while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
304                 if (skb->nh.iph->version == 0) {
305                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
306                         nlh->nlmsg_type = NLMSG_ERROR;
307                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
308                         skb_trim(skb, nlh->nlmsg_len);
309                         ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
310                         netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
311                 } else
312                         kfree_skb(skb);
313         }
314
315         kmem_cache_free(mrt_cachep, c);
316 }
317
318
319 /* Single timer process for all the unresolved queue. */
320
321 static void ipmr_expire_process(unsigned long dummy)
322 {
323         unsigned long now;
324         unsigned long expires;
325         struct mfc_cache *c, **cp;
326
327         if (!spin_trylock(&mfc_unres_lock)) {
328                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
329                 return;
330         }
331
332         if (atomic_read(&cache_resolve_queue_len) == 0)
333                 goto out;
334
335         now = jiffies;
336         expires = 10*HZ;
337         cp = &mfc_unres_queue;
338
339         while ((c=*cp) != NULL) {
340                 if (time_after(c->mfc_un.unres.expires, now)) {
341                         unsigned long interval = c->mfc_un.unres.expires - now;
342                         if (interval < expires)
343                                 expires = interval;
344                         cp = &c->next;
345                         continue;
346                 }
347
348                 *cp = c->next;
349
350                 ipmr_destroy_unres(c);
351         }
352
353         if (atomic_read(&cache_resolve_queue_len))
354                 mod_timer(&ipmr_expire_timer, jiffies + expires);
355
356 out:
357         spin_unlock(&mfc_unres_lock);
358 }
359
360 /* Fill oifs list. It is called under write locked mrt_lock. */
361
362 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
363 {
364         int vifi;
365
366         cache->mfc_un.res.minvif = MAXVIFS;
367         cache->mfc_un.res.maxvif = 0;
368         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
369
370         for (vifi=0; vifi<maxvif; vifi++) {
371                 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
372                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
373                         if (cache->mfc_un.res.minvif > vifi)
374                                 cache->mfc_un.res.minvif = vifi;
375                         if (cache->mfc_un.res.maxvif <= vifi)
376                                 cache->mfc_un.res.maxvif = vifi + 1;
377                 }
378         }
379 }
380
381 static int vif_add(struct vifctl *vifc, int mrtsock)
382 {
383         int vifi = vifc->vifc_vifi;
384         struct vif_device *v = &vif_table[vifi];
385         struct net_device *dev;
386         struct in_device *in_dev;
387
388         /* Is vif busy ? */
389         if (VIF_EXISTS(vifi))
390                 return -EADDRINUSE;
391
392         switch (vifc->vifc_flags) {
393 #ifdef CONFIG_IP_PIMSM
394         case VIFF_REGISTER:
395                 /*
396                  * Special Purpose VIF in PIM
397                  * All the packets will be sent to the daemon
398                  */
399                 if (reg_vif_num >= 0)
400                         return -EADDRINUSE;
401                 dev = ipmr_reg_vif();
402                 if (!dev)
403                         return -ENOBUFS;
404                 break;
405 #endif
406         case VIFF_TUNNEL:       
407                 dev = ipmr_new_tunnel(vifc);
408                 if (!dev)
409                         return -ENOBUFS;
410                 break;
411         case 0:
412                 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
413                 if (!dev)
414                         return -EADDRNOTAVAIL;
415                 __dev_put(dev);
416                 break;
417         default:
418                 return -EINVAL;
419         }
420
421         if ((in_dev = __in_dev_get(dev)) == NULL)
422                 return -EADDRNOTAVAIL;
423         in_dev->cnf.mc_forwarding++;
424         dev_set_allmulti(dev, +1);
425         ip_rt_multicast_event(in_dev);
426
427         /*
428          *      Fill in the VIF structures
429          */
430         v->rate_limit=vifc->vifc_rate_limit;
431         v->local=vifc->vifc_lcl_addr.s_addr;
432         v->remote=vifc->vifc_rmt_addr.s_addr;
433         v->flags=vifc->vifc_flags;
434         if (!mrtsock)
435                 v->flags |= VIFF_STATIC;
436         v->threshold=vifc->vifc_threshold;
437         v->bytes_in = 0;
438         v->bytes_out = 0;
439         v->pkt_in = 0;
440         v->pkt_out = 0;
441         v->link = dev->ifindex;
442         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
443                 v->link = dev->iflink;
444
445         /* And finish update writing critical data */
446         write_lock_bh(&mrt_lock);
447         dev_hold(dev);
448         v->dev=dev;
449 #ifdef CONFIG_IP_PIMSM
450         if (v->flags&VIFF_REGISTER)
451                 reg_vif_num = vifi;
452 #endif
453         if (vifi+1 > maxvif)
454                 maxvif = vifi+1;
455         write_unlock_bh(&mrt_lock);
456         return 0;
457 }
458
459 #if defined(CONFIG_MIPS_BRCM)
460 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp,unsigned int ifindex)
461 {
462         int line=MFC_HASH(mcastgrp,htonl(0x00000000),ifindex);
463         struct mfc_cache *c;
464
465         for (c=mfc_cache_array[line]; c; c = c->next) {
466                 if ( c->mfc_mcastgrp==mcastgrp)
467                         break;
468         }
469         return c;
470 }
471 #else
472 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
473 {
474         int line=MFC_HASH(mcastgrp,origin);
475         struct mfc_cache *c;
476
477         for (c=mfc_cache_array[line]; c; c = c->next) {
478                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
479                         break;
480         }
481         return c;
482 }
483 #endif
484
485 /*
486  *      Allocate a multicast cache entry
487  */
488 static struct mfc_cache *ipmr_cache_alloc(void)
489 {
490         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
491         if(c==NULL)
492                 return NULL;
493         memset(c, 0, sizeof(*c));
494         c->mfc_un.res.minvif = MAXVIFS;
495         return c;
496 }
497
498 static struct mfc_cache *ipmr_cache_alloc_unres(void)
499 {
500         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
501         if(c==NULL)
502                 return NULL;
503         memset(c, 0, sizeof(*c));
504         skb_queue_head_init(&c->mfc_un.unres.unresolved);
505         c->mfc_un.unres.expires = jiffies + 10*HZ;
506         return c;
507 }
508
509 /*
510  *      A cache entry has gone into a resolved state from queued
511  */
512  
513 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
514 {
515         struct sk_buff *skb;
516
517         /*
518          *      Play the pending entries through our router
519          */
520
521         while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
522                 if (skb->nh.iph->version == 0) {
523                         int err;
524                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
525
526                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
527                                 nlh->nlmsg_len = skb->tail - (u8*)nlh;
528                         } else {
529                                 nlh->nlmsg_type = NLMSG_ERROR;
530                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
531                                 skb_trim(skb, nlh->nlmsg_len);
532                                 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
533                         }
534                         err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
535                 } else
536                         ip_mr_forward(skb, c, 0);
537         }
538 }
539
540 /*
541  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
542  *      expects the following bizarre scheme.
543  *
544  *      Called under mrt_lock.
545  */
546  
547 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
548 {
549         struct sk_buff *skb;
550         int ihl = pkt->nh.iph->ihl<<2;
551         struct igmphdr *igmp;
552         struct igmpmsg *msg;
553         int ret;
554
555 #ifdef CONFIG_IP_PIMSM
556         if (assert == IGMPMSG_WHOLEPKT)
557                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
558         else
559 #endif
560                 skb = alloc_skb(128, GFP_ATOMIC);
561
562         if(!skb)
563                 return -ENOBUFS;
564
565 #ifdef CONFIG_IP_PIMSM
566         if (assert == IGMPMSG_WHOLEPKT) {
567                 /* Ugly, but we have no choice with this interface.
568                    Duplicate old header, fix ihl, length etc.
569                    And all this only to mangle msg->im_msgtype and
570                    to set msg->im_mbz to "mbz" :-)
571                  */
572                 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
573                 skb->nh.raw = skb->h.raw = (u8*)msg;
574                 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
575                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
576                 msg->im_mbz = 0;
577                 msg->im_vif = reg_vif_num;
578                 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
579                 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
580         } else 
581 #endif
582         {       
583                 
584         /*
585          *      Copy the IP header
586          */
587
588         skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
589         memcpy(skb->data,pkt->data,ihl);
590         skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
591         msg = (struct igmpmsg*)skb->nh.iph;
592         msg->im_vif = vifi;
593         skb->dst = dst_clone(pkt->dst);
594
595         /*
596          *      Add our header
597          */
598
599         igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
600         igmp->type      =
601         msg->im_msgtype = assert;
602         igmp->code      =       0;
603         skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
604         skb->h.raw = skb->nh.raw;
605         }
606
607         if (mroute_socket == NULL) {
608                 kfree_skb(skb);
609                 return -EINVAL;
610         }
611
612         /*
613          *      Deliver to mrouted
614          */
615         if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
616                 if (net_ratelimit())
617                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
618                 kfree_skb(skb);
619         }
620
621         return ret;
622 }
623
624 /*
625  *      Queue a packet for resolution. It gets locked cache entry!
626  */
627  
628 static int
629 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
630 {
631         int err;
632         struct mfc_cache *c;
633
634         spin_lock_bh(&mfc_unres_lock);
635         for (c=mfc_unres_queue; c; c=c->next) {
636                 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
637                     c->mfc_origin == skb->nh.iph->saddr)
638                         break;
639         }
640
641         if (c == NULL) {
642                 /*
643                  *      Create a new entry if allowable
644                  */
645
646                 if (atomic_read(&cache_resolve_queue_len)>=10 ||
647                     (c=ipmr_cache_alloc_unres())==NULL) {
648                         spin_unlock_bh(&mfc_unres_lock);
649
650                         kfree_skb(skb);
651                         return -ENOBUFS;
652                 }
653
654                 /*
655                  *      Fill in the new cache entry
656                  */
657                 c->mfc_parent=-1;
658                 c->mfc_origin=skb->nh.iph->saddr;
659                 c->mfc_mcastgrp=skb->nh.iph->daddr;
660
661                 /*
662                  *      Reflect first query at mrouted.
663                  */
664                 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
665                         /* If the report failed throw the cache entry 
666                            out - Brad Parker
667                          */
668                         spin_unlock_bh(&mfc_unres_lock);
669
670                         kmem_cache_free(mrt_cachep, c);
671                         kfree_skb(skb);
672                         return err;
673                 }
674
675                 atomic_inc(&cache_resolve_queue_len);
676                 c->next = mfc_unres_queue;
677                 mfc_unres_queue = c;
678
679                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
680         }
681
682         /*
683          *      See if we can append the packet
684          */
685         if (c->mfc_un.unres.unresolved.qlen>3) {
686                 kfree_skb(skb);
687                 err = -ENOBUFS;
688         } else {
689                 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
690                 err = 0;
691         }
692
693         spin_unlock_bh(&mfc_unres_lock);
694         return err;
695 }
696
697 /*
698  *      MFC cache manipulation by user space mroute daemon
699  */
700
701 static int ipmr_mfc_delete(struct mfcctl *mfc)
702 {
703         int line;
704         struct mfc_cache *c, **cp;
705
706 #if defined(CONFIG_MIPS_BRCM)   
707         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr,mfc->mfcc_parent);
708 #else   
709         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
710 #endif  
711
712         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
713                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
714                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
715                         write_lock_bh(&mrt_lock);
716                         *cp = c->next;
717                         write_unlock_bh(&mrt_lock);
718
719                         kmem_cache_free(mrt_cachep, c);
720                         return 0;
721                 }
722         }
723         return -ENOENT;
724 }
725
726 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
727 {
728         int line;
729         struct mfc_cache *uc, *c, **cp;
730
731 #if defined(CONFIG_MIPS_BRCM)   
732         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr,mfc->mfcc_parent);
733 #else   
734         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
735 #endif  
736
737         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
738                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
739                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
740                         break;
741         }
742
743         if (c != NULL) {
744                 write_lock_bh(&mrt_lock);
745                 c->mfc_parent = mfc->mfcc_parent;
746                 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
747                 if (!mrtsock)
748                         c->mfc_flags |= MFC_STATIC;
749                 write_unlock_bh(&mrt_lock);
750                 return 0;
751         }
752
753         if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
754                 return -EINVAL;
755
756         c=ipmr_cache_alloc();
757         if (c==NULL)
758                 return -ENOMEM;
759
760         c->mfc_origin=mfc->mfcc_origin.s_addr;
761         c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
762         c->mfc_parent=mfc->mfcc_parent;
763         ipmr_update_threshoulds(c, mfc->mfcc_ttls);
764         if (!mrtsock)
765                 c->mfc_flags |= MFC_STATIC;
766
767         write_lock_bh(&mrt_lock);
768         c->next = mfc_cache_array[line];
769         mfc_cache_array[line] = c;
770         write_unlock_bh(&mrt_lock);
771
772         /*
773          *      Check to see if we resolved a queued list. If so we
774          *      need to send on the frames and tidy up.
775          */
776         spin_lock_bh(&mfc_unres_lock);
777         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
778              cp = &uc->next) {
779                 if (uc->mfc_origin == c->mfc_origin &&
780                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
781                         *cp = uc->next;
782                         if (atomic_dec_and_test(&cache_resolve_queue_len))
783                                 del_timer(&ipmr_expire_timer);
784                         break;
785                 }
786         }
787         spin_unlock_bh(&mfc_unres_lock);
788
789         if (uc) {
790                 ipmr_cache_resolve(uc, c);
791                 kmem_cache_free(mrt_cachep, uc);
792         }
793         return 0;
794 }
795
796 /*
797  *      Close the multicast socket, and clear the vif tables etc
798  */
799  
800 static void mroute_clean_tables(struct sock *sk)
801 {
802         int i;
803                 
804         /*
805          *      Shut down all active vif entries
806          */
807         for(i=0; i<maxvif; i++) {
808                 if (!(vif_table[i].flags&VIFF_STATIC))
809                         vif_delete(i);
810         }
811
812         /*
813          *      Wipe the cache
814          */
815         for (i=0;i<MFC_LINES;i++) {
816                 struct mfc_cache *c, **cp;
817
818                 cp = &mfc_cache_array[i];
819                 while ((c = *cp) != NULL) {
820                         if (c->mfc_flags&MFC_STATIC) {
821                                 cp = &c->next;
822                                 continue;
823                         }
824                         write_lock_bh(&mrt_lock);
825                         *cp = c->next;
826                         write_unlock_bh(&mrt_lock);
827
828                         kmem_cache_free(mrt_cachep, c);
829                 }
830         }
831
832         if (atomic_read(&cache_resolve_queue_len) != 0) {
833                 struct mfc_cache *c;
834
835                 spin_lock_bh(&mfc_unres_lock);
836                 while (mfc_unres_queue != NULL) {
837                         c = mfc_unres_queue;
838                         mfc_unres_queue = c->next;
839                         spin_unlock_bh(&mfc_unres_lock);
840
841                         ipmr_destroy_unres(c);
842
843                         spin_lock_bh(&mfc_unres_lock);
844                 }
845                 spin_unlock_bh(&mfc_unres_lock);
846         }
847 }
848
849 static void mrtsock_destruct(struct sock *sk)
850 {
851         rtnl_lock();
852         if (sk == mroute_socket) {
853                 ipv4_devconf.mc_forwarding--;
854
855                 write_lock_bh(&mrt_lock);
856                 mroute_socket=NULL;
857                 write_unlock_bh(&mrt_lock);
858
859                 mroute_clean_tables(sk);
860         }
861         rtnl_unlock();
862 }
863
864 /*
865  *      Socket options and virtual interface manipulation. The whole
866  *      virtual interface system is a complete heap, but unfortunately
867  *      that's how BSD mrouted happens to think. Maybe one day with a proper
868  *      MOSPF/PIM router set up we can clean this up.
869  */
870  
871 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
872 {
873         int ret;
874         struct vifctl vif;
875         struct mfcctl mfc;
876         
877         if(optname!=MRT_INIT)
878         {
879                 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
880                         return -EACCES;
881         }
882
883         switch(optname)
884         {
885                 case MRT_INIT:
886                         if (sk->sk_type != SOCK_RAW ||
887                             inet_sk(sk)->num != IPPROTO_IGMP)
888                                 return -EOPNOTSUPP;
889                         if(optlen!=sizeof(int))
890                                 return -ENOPROTOOPT;
891
892                         rtnl_lock();
893                         if (mroute_socket) {
894                                 rtnl_unlock();
895                                 return -EADDRINUSE;
896                         }
897
898                         ret = ip_ra_control(sk, 1, mrtsock_destruct);
899                         if (ret == 0) {
900                                 write_lock_bh(&mrt_lock);
901                                 mroute_socket=sk;
902                                 write_unlock_bh(&mrt_lock);
903
904                                 ipv4_devconf.mc_forwarding++;
905                         }
906                         rtnl_unlock();
907                         return ret;
908                 case MRT_DONE:
909                         if (sk!=mroute_socket)
910                                 return -EACCES;
911                         return ip_ra_control(sk, 0, NULL);
912                 case MRT_ADD_VIF:
913                 case MRT_DEL_VIF:
914                         if(optlen!=sizeof(vif))
915                                 return -EINVAL;
916                         if (copy_from_user(&vif,optval,sizeof(vif)))
917                                 return -EFAULT; 
918                         if(vif.vifc_vifi >= MAXVIFS)
919                                 return -ENFILE;
920                         rtnl_lock();
921                         if (optname==MRT_ADD_VIF) {
922                                 ret = vif_add(&vif, sk==mroute_socket);
923                         } else {
924                                 ret = vif_delete(vif.vifc_vifi);
925                         }
926                         rtnl_unlock();
927                         return ret;
928
929                 /*
930                  *      Manipulate the forwarding caches. These live
931                  *      in a sort of kernel/user symbiosis.
932                  */
933                 case MRT_ADD_MFC:
934                 case MRT_DEL_MFC:
935                         if(optlen!=sizeof(mfc))
936                                 return -EINVAL;
937                         if (copy_from_user(&mfc,optval, sizeof(mfc)))
938                                 return -EFAULT;
939                         rtnl_lock();
940                         if (optname==MRT_DEL_MFC)
941                                 ret = ipmr_mfc_delete(&mfc);
942                         else
943                                 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
944                         rtnl_unlock();
945                         return ret;
946                 /*
947                  *      Control PIM assert.
948                  */
949                 case MRT_ASSERT:
950                 {
951                         int v;
952                         if(get_user(v,(int __user *)optval))
953                                 return -EFAULT;
954                         mroute_do_assert=(v)?1:0;
955                         return 0;
956                 }
957 #ifdef CONFIG_IP_PIMSM
958                 case MRT_PIM:
959                 {
960                         int v, ret;
961                         if(get_user(v,(int __user *)optval))
962                                 return -EFAULT;
963                         v = (v)?1:0;
964                         rtnl_lock();
965                         ret = 0;
966                         if (v != mroute_do_pim) {
967                                 mroute_do_pim = v;
968                                 mroute_do_assert = v;
969 #ifdef CONFIG_IP_PIMSM_V2
970                                 if (mroute_do_pim)
971                                         ret = inet_add_protocol(&pim_protocol,
972                                                                 IPPROTO_PIM);
973                                 else
974                                         ret = inet_del_protocol(&pim_protocol,
975                                                                 IPPROTO_PIM);
976                                 if (ret < 0)
977                                         ret = -EAGAIN;
978 #endif
979                         }
980                         rtnl_unlock();
981                         return ret;
982                 }
983 #endif
984                 /*
985                  *      Spurious command, or MRT_VERSION which you cannot
986                  *      set.
987                  */
988                 default:
989                         return -ENOPROTOOPT;
990         }
991 }
992
993 /*
994  *      Getsock opt support for the multicast routing system.
995  */
996  
997 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
998 {
999         int olr;
1000         int val;
1001
1002         if(optname!=MRT_VERSION && 
1003 #ifdef CONFIG_IP_PIMSM
1004            optname!=MRT_PIM &&
1005 #endif
1006            optname!=MRT_ASSERT)
1007                 return -ENOPROTOOPT;
1008
1009         if (get_user(olr, optlen))
1010                 return -EFAULT;
1011
1012         olr = min_t(unsigned int, olr, sizeof(int));
1013         if (olr < 0)
1014                 return -EINVAL;
1015                 
1016         if(put_user(olr,optlen))
1017                 return -EFAULT;
1018         if(optname==MRT_VERSION)
1019                 val=0x0305;
1020 #ifdef CONFIG_IP_PIMSM
1021         else if(optname==MRT_PIM)
1022                 val=mroute_do_pim;
1023 #endif
1024         else
1025                 val=mroute_do_assert;
1026         if(copy_to_user(optval,&val,olr))
1027                 return -EFAULT;
1028         return 0;
1029 }
1030
1031 /*
1032  *      The IP multicast ioctl support routines.
1033  */
1034  
1035 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1036 {
1037         struct sioc_sg_req sr;
1038         struct sioc_vif_req vr;
1039         struct vif_device *vif;
1040 #if defined(CONFIG_MIPS_BRCM)   
1041         struct mfc_cache *c=NULL;
1042 #else
1043         struct mfc_cache *c;
1044 #endif  
1045         
1046         switch(cmd)
1047         {
1048                 case SIOCGETVIFCNT:
1049                         if (copy_from_user(&vr,arg,sizeof(vr)))
1050                                 return -EFAULT; 
1051                         if(vr.vifi>=maxvif)
1052                                 return -EINVAL;
1053                         read_lock(&mrt_lock);
1054                         vif=&vif_table[vr.vifi];
1055                         if(VIF_EXISTS(vr.vifi)) {
1056                                 vr.icount=vif->pkt_in;
1057                                 vr.ocount=vif->pkt_out;
1058                                 vr.ibytes=vif->bytes_in;
1059                                 vr.obytes=vif->bytes_out;
1060                                 read_unlock(&mrt_lock);
1061
1062                                 if (copy_to_user(arg,&vr,sizeof(vr)))
1063                                         return -EFAULT;
1064                                 return 0;
1065                         }
1066                         read_unlock(&mrt_lock);
1067                         return -EADDRNOTAVAIL;
1068                 case SIOCGETSGCNT:
1069                         if (copy_from_user(&sr,arg,sizeof(sr)))
1070                                 return -EFAULT;
1071
1072                         read_lock(&mrt_lock);
1073 #if defined(CONFIG_MIPS_BRCM)                   
1074                         // c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1075 #else
1076                         c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1077 #endif                  
1078                         if (c) {
1079                                 sr.pktcnt = c->mfc_un.res.pkt;
1080                                 sr.bytecnt = c->mfc_un.res.bytes;
1081                                 sr.wrong_if = c->mfc_un.res.wrong_if;
1082                                 read_unlock(&mrt_lock);
1083
1084                                 if (copy_to_user(arg,&sr,sizeof(sr)))
1085                                         return -EFAULT;
1086                                 return 0;
1087                         }
1088                         read_unlock(&mrt_lock);
1089                         return -EADDRNOTAVAIL;
1090                 default:
1091                         return -ENOIOCTLCMD;
1092         }
1093 }
1094
1095
1096 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1097 {
1098         struct vif_device *v;
1099         int ct;
1100         if (event != NETDEV_UNREGISTER)
1101                 return NOTIFY_DONE;
1102         v=&vif_table[0];
1103         for(ct=0;ct<maxvif;ct++,v++) {
1104                 if (v->dev==ptr)
1105                         vif_delete(ct);
1106         }
1107         return NOTIFY_DONE;
1108 }
1109
1110
1111 static struct notifier_block ip_mr_notifier={
1112         .notifier_call = ipmr_device_event,
1113 };
1114
1115 /*
1116  *      Encapsulate a packet by attaching a valid IPIP header to it.
1117  *      This avoids tunnel drivers and other mess and gives us the speed so
1118  *      important for multicast video.
1119  */
1120  
1121 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1122 {
1123         struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1124
1125         iph->version    =       4;
1126         iph->tos        =       skb->nh.iph->tos;
1127         iph->ttl        =       skb->nh.iph->ttl;
1128         iph->frag_off   =       0;
1129         iph->daddr      =       daddr;
1130         iph->saddr      =       saddr;
1131         iph->protocol   =       IPPROTO_IPIP;
1132         iph->ihl        =       5;
1133         iph->tot_len    =       htons(skb->len);
1134         ip_select_ident(iph, skb->dst, NULL);
1135         ip_send_check(iph);
1136
1137         skb->h.ipiph = skb->nh.iph;
1138         skb->nh.iph = iph;
1139         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1140         nf_reset(skb);
1141 }
1142
1143 static inline int ipmr_forward_finish(struct sk_buff *skb)
1144 {
1145         struct ip_options * opt = &(IPCB(skb)->opt);
1146
1147         IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1148
1149         if (unlikely(opt->optlen))
1150                 ip_forward_options(skb);
1151
1152         return dst_output(skb);
1153 }
1154
1155 /*
1156  *      Processing handlers for ipmr_forward
1157  */
1158
1159 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1160 {
1161         struct iphdr *iph = skb->nh.iph;
1162         struct vif_device *vif = &vif_table[vifi];
1163         struct net_device *dev;
1164         struct rtable *rt;
1165         int    encap = 0;
1166
1167         if (vif->dev == NULL)
1168                 goto out_free;
1169
1170 #ifdef CONFIG_IP_PIMSM
1171         if (vif->flags & VIFF_REGISTER) {
1172                 vif->pkt_out++;
1173                 vif->bytes_out+=skb->len;
1174                 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1175                 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1176                 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1177                 kfree_skb(skb);
1178                 return;
1179         }
1180 #endif
1181
1182         if (vif->flags&VIFF_TUNNEL) {
1183                 struct flowi fl = { .oif = vif->link,
1184                                     .nl_u = { .ip4_u =
1185                                               { .daddr = vif->remote,
1186                                                 .saddr = vif->local,
1187                                                 .tos = RT_TOS(iph->tos) } },
1188                                     .proto = IPPROTO_IPIP };
1189                 if (ip_route_output_key(&rt, &fl))
1190                         goto out_free;
1191                 encap = sizeof(struct iphdr);
1192         } else {
1193                 struct flowi fl = { .oif = vif->link,
1194                                     .nl_u = { .ip4_u =
1195                                               { .daddr = iph->daddr,
1196                                                 .tos = RT_TOS(iph->tos) } },
1197                                     .proto = IPPROTO_IPIP };
1198                 if (ip_route_output_key(&rt, &fl))
1199                         goto out_free;
1200         }
1201
1202         dev = rt->u.dst.dev;
1203
1204         if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1205                 /* Do not fragment multicasts. Alas, IPv4 does not
1206                    allow to send ICMP, so that packets will disappear
1207                    to blackhole.
1208                  */
1209
1210                 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1211                 ip_rt_put(rt);
1212                 goto out_free;
1213         }
1214
1215         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1216
1217         if (skb_cow(skb, encap)) {
1218                 ip_rt_put(rt);
1219                 goto out_free;
1220         }
1221
1222         vif->pkt_out++;
1223         vif->bytes_out+=skb->len;
1224
1225         dst_release(skb->dst);
1226         skb->dst = &rt->u.dst;
1227         iph = skb->nh.iph;
1228         ip_decrease_ttl(iph);
1229
1230         /* FIXME: forward and output firewalls used to be called here.
1231          * What do we do with netfilter? -- RR */
1232         if (vif->flags & VIFF_TUNNEL) {
1233                 ip_encap(skb, vif->local, vif->remote);
1234                 /* FIXME: extra output firewall step used to be here. --RR */
1235                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1236                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
1237         }
1238
1239         IPCB(skb)->flags |= IPSKB_FORWARDED;
1240
1241         /*
1242          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1243          * not only before forwarding, but after forwarding on all output
1244          * interfaces. It is clear, if mrouter runs a multicasting
1245          * program, it should receive packets not depending to what interface
1246          * program is joined.
1247          * If we will not make it, the program will have to join on all
1248          * interfaces. On the other hand, multihoming host (or router, but
1249          * not mrouter) cannot join to more than one interface - it will
1250          * result in receiving multiple packets.
1251          */
1252         NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 
1253                 ipmr_forward_finish);
1254         return;
1255
1256 out_free:
1257         kfree_skb(skb);
1258         return;
1259 }
1260
1261 static int ipmr_find_vif(struct net_device *dev)
1262 {
1263         int ct;
1264         for (ct=maxvif-1; ct>=0; ct--) {
1265                 if (vif_table[ct].dev == dev)
1266                         break;
1267         }
1268         return ct;
1269 }
1270
1271 /* "local" means that we should preserve one skb (for local delivery) */
1272
1273 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1274 {
1275         int psend = -1;
1276         int vif, ct;
1277
1278         vif = cache->mfc_parent;
1279         cache->mfc_un.res.pkt++;
1280         cache->mfc_un.res.bytes += skb->len;
1281
1282         /*
1283          * Wrong interface: drop packet and (maybe) send PIM assert.
1284          */
1285         if (vif_table[vif].dev != skb->dev) {
1286                 int true_vifi;
1287
1288                 if (((struct rtable*)skb->dst)->fl.iif == 0) {
1289                         /* It is our own packet, looped back.
1290                            Very complicated situation...
1291
1292                            The best workaround until routing daemons will be
1293                            fixed is not to redistribute packet, if it was
1294                            send through wrong interface. It means, that
1295                            multicast applications WILL NOT work for
1296                            (S,G), which have default multicast route pointing
1297                            to wrong oif. In any case, it is not a good
1298                            idea to use multicasting applications on router.
1299                          */
1300                         goto dont_forward;
1301                 }
1302
1303                 cache->mfc_un.res.wrong_if++;
1304                 true_vifi = ipmr_find_vif(skb->dev);
1305
1306                 if (true_vifi >= 0 && mroute_do_assert &&
1307                     /* pimsm uses asserts, when switching from RPT to SPT,
1308                        so that we cannot check that packet arrived on an oif.
1309                        It is bad, but otherwise we would need to move pretty
1310                        large chunk of pimd to kernel. Ough... --ANK
1311                      */
1312                     (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1313                     time_after(jiffies, 
1314                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1315                         cache->mfc_un.res.last_assert = jiffies;
1316                         ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1317                 }
1318                 goto dont_forward;
1319         }
1320
1321         vif_table[vif].pkt_in++;
1322         vif_table[vif].bytes_in+=skb->len;
1323
1324         /*
1325          *      Forward the frame
1326          */
1327         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1328                 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1329                         if (psend != -1) {
1330                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1331                                 if (skb2)
1332                                         ipmr_queue_xmit(skb2, cache, psend);
1333                         }
1334                         psend=ct;
1335                 }
1336         }
1337         if (psend != -1) {
1338                 if (local) {
1339                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1340                         if (skb2)
1341                                 ipmr_queue_xmit(skb2, cache, psend);
1342                 } else {
1343                         ipmr_queue_xmit(skb, cache, psend);
1344                         return 0;
1345                 }
1346         }
1347
1348 dont_forward:
1349         if (!local)
1350                 kfree_skb(skb);
1351         return 0;
1352 }
1353
1354
1355 /*
1356  *      Multicast packets for forwarding arrive here
1357  */
1358
1359 int ip_mr_input(struct sk_buff *skb)
1360 {
1361         struct mfc_cache *cache;
1362         int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1363 #if defined(CONFIG_MIPS_BRCM)   
1364         struct net_device *dev=skb->dev;
1365 #endif  
1366
1367         /* Packet is looped back after forward, it should not be
1368            forwarded second time, but still can be delivered locally.
1369          */
1370         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1371                 goto dont_forward;
1372
1373         if (!local) {
1374                     if (IPCB(skb)->opt.router_alert) {
1375                             if (ip_call_ra_chain(skb))
1376                                     return 0;
1377                     } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1378                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1379                                Cisco IOS <= 11.2(8)) do not put router alert
1380                                option to IGMP packets destined to routable
1381                                groups. It is very bad, because it means
1382                                that we can forward NO IGMP messages.
1383                              */
1384                             read_lock(&mrt_lock);
1385                             if (mroute_socket) {
1386                                     raw_rcv(mroute_socket, skb);
1387                                     read_unlock(&mrt_lock);
1388                                     return 0;
1389                             }
1390                             read_unlock(&mrt_lock);
1391                     }
1392         }
1393
1394         read_lock(&mrt_lock);
1395 #if defined(CONFIG_MIPS_BRCM)   
1396         cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr,dev->ifindex);
1397 #else   
1398         cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1399 #endif  
1400         
1401
1402         /*
1403          *      No usable cache entry
1404          */
1405         if (cache==NULL) {
1406                 int vif;
1407
1408                 if (local) {
1409                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1410                         ip_local_deliver(skb);
1411                         if (skb2 == NULL) {
1412                                 read_unlock(&mrt_lock);
1413                                 return -ENOBUFS;
1414                         }
1415                         skb = skb2;
1416                 }
1417
1418                 vif = ipmr_find_vif(skb->dev);
1419                 if (vif >= 0) {
1420                         int err = ipmr_cache_unresolved(vif, skb);
1421                         read_unlock(&mrt_lock);
1422
1423                         return err;
1424                 }
1425                 read_unlock(&mrt_lock);
1426                 kfree_skb(skb);
1427                 return -ENODEV;
1428         }
1429
1430         ip_mr_forward(skb, cache, local);
1431
1432         read_unlock(&mrt_lock);
1433
1434         if (local)
1435                 return ip_local_deliver(skb);
1436
1437         return 0;
1438
1439 dont_forward:
1440         if (local)
1441                 return ip_local_deliver(skb);
1442         kfree_skb(skb);
1443         return 0;
1444 }
1445
1446 #ifdef CONFIG_IP_PIMSM_V1
1447 /*
1448  * Handle IGMP messages of PIMv1
1449  */
1450
1451 int pim_rcv_v1(struct sk_buff * skb)
1452 {
1453         struct igmphdr *pim;
1454         struct iphdr   *encap;
1455         struct net_device  *reg_dev = NULL;
1456
1457         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1458                 goto drop;
1459
1460         pim = (struct igmphdr*)skb->h.raw;
1461
1462         if (!mroute_do_pim ||
1463             skb->len < sizeof(*pim) + sizeof(*encap) ||
1464             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
1465                 goto drop;
1466
1467         encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1468         /*
1469            Check that:
1470            a. packet is really destinted to a multicast group
1471            b. packet is not a NULL-REGISTER
1472            c. packet is not truncated
1473          */
1474         if (!MULTICAST(encap->daddr) ||
1475             encap->tot_len == 0 ||
1476             ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1477                 goto drop;
1478
1479         read_lock(&mrt_lock);
1480         if (reg_vif_num >= 0)
1481                 reg_dev = vif_table[reg_vif_num].dev;
1482         if (reg_dev)
1483                 dev_hold(reg_dev);
1484         read_unlock(&mrt_lock);
1485
1486         if (reg_dev == NULL) 
1487                 goto drop;
1488
1489         skb->mac.raw = skb->nh.raw;
1490         skb_pull(skb, (u8*)encap - skb->data);
1491         skb->nh.iph = (struct iphdr *)skb->data;
1492         skb->dev = reg_dev;
1493         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1494         skb->protocol = htons(ETH_P_IP);
1495         skb->ip_summed = 0;
1496         skb->pkt_type = PACKET_HOST;
1497         dst_release(skb->dst);
1498         skb->dst = NULL;
1499         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1500         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1501         nf_reset(skb);
1502         netif_rx(skb);
1503         dev_put(reg_dev);
1504         return 0;
1505  drop:
1506         kfree_skb(skb);
1507         return 0;
1508 }
1509 #endif
1510
1511 #ifdef CONFIG_IP_PIMSM_V2
1512 static int pim_rcv(struct sk_buff * skb)
1513 {
1514         struct pimreghdr *pim;
1515         struct iphdr   *encap;
1516         struct net_device  *reg_dev = NULL;
1517
1518         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1519                 goto drop;
1520
1521         pim = (struct pimreghdr*)skb->h.raw;
1522         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1523             (pim->flags&PIM_NULL_REGISTER) ||
1524             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 
1525              (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 
1526                 goto drop;
1527
1528         /* check if the inner packet is destined to mcast group */
1529         encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1530         if (!MULTICAST(encap->daddr) ||
1531             encap->tot_len == 0 ||
1532             ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1533                 goto drop;
1534
1535         read_lock(&mrt_lock);
1536         if (reg_vif_num >= 0)
1537                 reg_dev = vif_table[reg_vif_num].dev;
1538         if (reg_dev)
1539                 dev_hold(reg_dev);
1540         read_unlock(&mrt_lock);
1541
1542         if (reg_dev == NULL) 
1543                 goto drop;
1544
1545         skb->mac.raw = skb->nh.raw;
1546         skb_pull(skb, (u8*)encap - skb->data);
1547         skb->nh.iph = (struct iphdr *)skb->data;
1548         skb->dev = reg_dev;
1549         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1550         skb->protocol = htons(ETH_P_IP);
1551         skb->ip_summed = 0;
1552         skb->pkt_type = PACKET_HOST;
1553         dst_release(skb->dst);
1554         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1555         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1556         skb->dst = NULL;
1557         nf_reset(skb);
1558         netif_rx(skb);
1559         dev_put(reg_dev);
1560         return 0;
1561  drop:
1562         kfree_skb(skb);
1563         return 0;
1564 }
1565 #endif
1566
1567 static int
1568 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1569 {
1570         int ct;
1571         struct rtnexthop *nhp;
1572         struct net_device *dev = vif_table[c->mfc_parent].dev;
1573         u8 *b = skb->tail;
1574         struct rtattr *mp_head;
1575
1576         if (dev)
1577                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1578
1579         mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1580
1581         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1582                 if (c->mfc_un.res.ttls[ct] < 255) {
1583                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1584                                 goto rtattr_failure;
1585                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1586                         nhp->rtnh_flags = 0;
1587                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1588                         nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1589                         nhp->rtnh_len = sizeof(*nhp);
1590                 }
1591         }
1592         mp_head->rta_type = RTA_MULTIPATH;
1593         mp_head->rta_len = skb->tail - (u8*)mp_head;
1594         rtm->rtm_type = RTN_MULTICAST;
1595         return 1;
1596
1597 rtattr_failure:
1598         skb_trim(skb, b - skb->data);
1599         return -EMSGSIZE;
1600 }
1601
1602 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1603 {
1604         int err;
1605         struct mfc_cache *cache;
1606         struct rtable *rt = (struct rtable*)skb->dst;
1607 #if defined(CONFIG_MIPS_BRCM)   
1608         struct net_device *dev=skb->dev;
1609 #endif  
1610
1611         read_lock(&mrt_lock);
1612 #if defined(CONFIG_MIPS_BRCM)   
1613         cache = ipmr_cache_find(rt->rt_src, rt->rt_dst,dev->ifindex);
1614 #else   
1615         cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1616 #endif  
1617
1618         if (cache==NULL) {
1619                 struct net_device *dev;
1620                 int vif;
1621
1622                 if (nowait) {
1623                         read_unlock(&mrt_lock);
1624                         return -EAGAIN;
1625                 }
1626
1627                 dev = skb->dev;
1628                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1629                         read_unlock(&mrt_lock);
1630                         return -ENODEV;
1631                 }
1632                 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1633                 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1634                 skb->nh.iph->saddr = rt->rt_src;
1635                 skb->nh.iph->daddr = rt->rt_dst;
1636                 skb->nh.iph->version = 0;
1637                 err = ipmr_cache_unresolved(vif, skb);
1638                 read_unlock(&mrt_lock);
1639                 return err;
1640         }
1641
1642         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1643                 cache->mfc_flags |= MFC_NOTIFY;
1644         err = ipmr_fill_mroute(skb, cache, rtm);
1645         read_unlock(&mrt_lock);
1646         return err;
1647 }
1648
1649 #ifdef CONFIG_PROC_FS   
1650 /*
1651  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1652  */
1653 struct ipmr_vif_iter {
1654         int ct;
1655 };
1656
1657 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1658                                            loff_t pos)
1659 {
1660         for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1661                 if(!VIF_EXISTS(iter->ct))
1662                         continue;
1663                 if (pos-- == 0) 
1664                         return &vif_table[iter->ct];
1665         }
1666         return NULL;
1667 }
1668
1669 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1670 {
1671         read_lock(&mrt_lock);
1672         return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 
1673                 : SEQ_START_TOKEN;
1674 }
1675
1676 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1677 {
1678         struct ipmr_vif_iter *iter = seq->private;
1679
1680         ++*pos;
1681         if (v == SEQ_START_TOKEN)
1682                 return ipmr_vif_seq_idx(iter, 0);
1683         
1684         while (++iter->ct < maxvif) {
1685                 if(!VIF_EXISTS(iter->ct))
1686                         continue;
1687                 return &vif_table[iter->ct];
1688         }
1689         return NULL;
1690 }
1691
1692 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1693 {
1694         read_unlock(&mrt_lock);
1695 }
1696
1697 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1698 {
1699         if (v == SEQ_START_TOKEN) {
1700                 seq_puts(seq, 
1701                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1702         } else {
1703                 const struct vif_device *vif = v;
1704                 const char *name =  vif->dev ? vif->dev->name : "none";
1705
1706                 seq_printf(seq,
1707                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1708                            vif - vif_table,
1709                            name, vif->bytes_in, vif->pkt_in, 
1710                            vif->bytes_out, vif->pkt_out,
1711                            vif->flags, vif->local, vif->remote);
1712         }
1713         return 0;
1714 }
1715
1716 static struct seq_operations ipmr_vif_seq_ops = {
1717         .start = ipmr_vif_seq_start,
1718         .next  = ipmr_vif_seq_next,
1719         .stop  = ipmr_vif_seq_stop,
1720         .show  = ipmr_vif_seq_show,
1721 };
1722
1723 static int ipmr_vif_open(struct inode *inode, struct file *file)
1724 {
1725         struct seq_file *seq;
1726         int rc = -ENOMEM;
1727         struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1728        
1729         if (!s)
1730                 goto out;
1731
1732         rc = seq_open(file, &ipmr_vif_seq_ops);
1733         if (rc)
1734                 goto out_kfree;
1735
1736         s->ct = 0;
1737         seq = file->private_data;
1738         seq->private = s;
1739 out:
1740         return rc;
1741 out_kfree:
1742         kfree(s);
1743         goto out;
1744
1745 }
1746
1747 static struct file_operations ipmr_vif_fops = {
1748         .owner   = THIS_MODULE,
1749         .open    = ipmr_vif_open,
1750         .read    = seq_read,
1751         .llseek  = seq_lseek,
1752         .release = seq_release_private,
1753 };
1754
1755 struct ipmr_mfc_iter {
1756         struct mfc_cache **cache;
1757         int ct;
1758 };
1759
1760
1761 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1762 {
1763         struct mfc_cache *mfc;
1764
1765         it->cache = mfc_cache_array;
1766         read_lock(&mrt_lock);
1767         for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 
1768                 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 
1769                         if (pos-- == 0) 
1770                                 return mfc;
1771         read_unlock(&mrt_lock);
1772
1773         it->cache = &mfc_unres_queue;
1774         spin_lock_bh(&mfc_unres_lock);
1775         for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 
1776                 if (pos-- == 0)
1777                         return mfc;
1778         spin_unlock_bh(&mfc_unres_lock);
1779
1780         it->cache = NULL;
1781         return NULL;
1782 }
1783
1784
1785 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1786 {
1787         struct ipmr_mfc_iter *it = seq->private;
1788         it->cache = NULL;
1789         it->ct = 0;
1790         return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 
1791                 : SEQ_START_TOKEN;
1792 }
1793
1794 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1795 {
1796         struct mfc_cache *mfc = v;
1797         struct ipmr_mfc_iter *it = seq->private;
1798
1799         ++*pos;
1800
1801         if (v == SEQ_START_TOKEN)
1802                 return ipmr_mfc_seq_idx(seq->private, 0);
1803
1804         if (mfc->next)
1805                 return mfc->next;
1806         
1807         if (it->cache == &mfc_unres_queue) 
1808                 goto end_of_list;
1809
1810         BUG_ON(it->cache != mfc_cache_array);
1811
1812         while (++it->ct < MFC_LINES) {
1813                 mfc = mfc_cache_array[it->ct];
1814                 if (mfc)
1815                         return mfc;
1816         }
1817
1818         /* exhausted cache_array, show unresolved */
1819         read_unlock(&mrt_lock);
1820         it->cache = &mfc_unres_queue;
1821         it->ct = 0;
1822                 
1823         spin_lock_bh(&mfc_unres_lock);
1824         mfc = mfc_unres_queue;
1825         if (mfc) 
1826                 return mfc;
1827
1828  end_of_list:
1829         spin_unlock_bh(&mfc_unres_lock);
1830         it->cache = NULL;
1831
1832         return NULL;
1833 }
1834
1835 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1836 {
1837         struct ipmr_mfc_iter *it = seq->private;
1838
1839         if (it->cache == &mfc_unres_queue)
1840                 spin_unlock_bh(&mfc_unres_lock);
1841         else if (it->cache == mfc_cache_array)
1842                 read_unlock(&mrt_lock);
1843 }
1844
1845 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1846 {
1847         int n;
1848
1849         if (v == SEQ_START_TOKEN) {
1850                 seq_puts(seq, 
1851                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1852         } else {
1853                 const struct mfc_cache *mfc = v;
1854                 const struct ipmr_mfc_iter *it = seq->private;
1855                 
1856                 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1857                            (unsigned long) mfc->mfc_mcastgrp,
1858                            (unsigned long) mfc->mfc_origin,
1859                            mfc->mfc_parent,
1860                            mfc->mfc_un.res.pkt,
1861                            mfc->mfc_un.res.bytes,
1862                            mfc->mfc_un.res.wrong_if);
1863
1864                 if (it->cache != &mfc_unres_queue) {
1865                         for(n = mfc->mfc_un.res.minvif; 
1866                             n < mfc->mfc_un.res.maxvif; n++ ) {
1867                                 if(VIF_EXISTS(n) 
1868                                    && mfc->mfc_un.res.ttls[n] < 255)
1869                                 seq_printf(seq, 
1870                                            " %2d:%-3d", 
1871                                            n, mfc->mfc_un.res.ttls[n]);
1872                         }
1873                 }
1874                 seq_putc(seq, '\n');
1875         }
1876         return 0;
1877 }
1878
1879 static struct seq_operations ipmr_mfc_seq_ops = {
1880         .start = ipmr_mfc_seq_start,
1881         .next  = ipmr_mfc_seq_next,
1882         .stop  = ipmr_mfc_seq_stop,
1883         .show  = ipmr_mfc_seq_show,
1884 };
1885
1886 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1887 {
1888         struct seq_file *seq;
1889         int rc = -ENOMEM;
1890         struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1891        
1892         if (!s)
1893                 goto out;
1894
1895         rc = seq_open(file, &ipmr_mfc_seq_ops);
1896         if (rc)
1897                 goto out_kfree;
1898
1899         seq = file->private_data;
1900         seq->private = s;
1901 out:
1902         return rc;
1903 out_kfree:
1904         kfree(s);
1905         goto out;
1906
1907 }
1908
1909 static struct file_operations ipmr_mfc_fops = {
1910         .owner   = THIS_MODULE,
1911         .open    = ipmr_mfc_open,
1912         .read    = seq_read,
1913         .llseek  = seq_lseek,
1914         .release = seq_release_private,
1915 };
1916 #endif  
1917
1918 #ifdef CONFIG_IP_PIMSM_V2
1919 static struct net_protocol pim_protocol = {
1920         .handler        =       pim_rcv,
1921 };
1922 #endif
1923
1924
1925 /*
1926  *      Setup for IP multicast routing
1927  */
1928  
1929 void __init ip_mr_init(void)
1930 {
1931         mrt_cachep = kmem_cache_create("ip_mrt_cache",
1932                                        sizeof(struct mfc_cache),
1933                                        0, SLAB_HWCACHE_ALIGN,
1934                                        NULL, NULL);
1935         if (!mrt_cachep)
1936                 panic("cannot allocate ip_mrt_cache");
1937
1938         init_timer(&ipmr_expire_timer);
1939         ipmr_expire_timer.function=ipmr_expire_process;
1940         register_netdevice_notifier(&ip_mr_notifier);
1941 #ifdef CONFIG_PROC_FS   
1942         proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1943         proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1944 #endif  
1945 }