import of upstream 2.4.34.4 from kernel.org
[linux-2.4.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/protocol.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118
119 #define HASH_SIZE  16
120 #define HASH(addr) ((addr^(addr>>4))&0xF)
121
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124
125 static struct net_device ipip_fb_tunnel_dev = {
126         name:   "tunl0",
127         init:   ipip_fb_tunnel_init,
128 };
129
130 static struct ip_tunnel ipip_fb_tunnel = {
131         dev:    &ipip_fb_tunnel_dev,
132         parms:  { name: "tunl0", }
133 };
134
135 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
136 static struct ip_tunnel *tunnels_r[HASH_SIZE];
137 static struct ip_tunnel *tunnels_l[HASH_SIZE];
138 static struct ip_tunnel *tunnels_wc[1];
139 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
140
141 static rwlock_t ipip_lock = RW_LOCK_UNLOCKED;
142
143 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
144 {
145         unsigned h0 = HASH(remote);
146         unsigned h1 = HASH(local);
147         struct ip_tunnel *t;
148
149         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
150                 if (local == t->parms.iph.saddr &&
151                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
152                         return t;
153         }
154         for (t = tunnels_r[h0]; t; t = t->next) {
155                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
156                         return t;
157         }
158         for (t = tunnels_l[h1]; t; t = t->next) {
159                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
160                         return t;
161         }
162         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
163                 return t;
164         return NULL;
165 }
166
167 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
168 {
169         u32 remote = t->parms.iph.daddr;
170         u32 local = t->parms.iph.saddr;
171         unsigned h = 0;
172         int prio = 0;
173
174         if (remote) {
175                 prio |= 2;
176                 h ^= HASH(remote);
177         }
178         if (local) {
179                 prio |= 1;
180                 h ^= HASH(local);
181         }
182         return &tunnels[prio][h];
183 }
184
185
186 static void ipip_tunnel_unlink(struct ip_tunnel *t)
187 {
188         struct ip_tunnel **tp;
189
190         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
191                 if (t == *tp) {
192                         write_lock_bh(&ipip_lock);
193                         *tp = t->next;
194                         write_unlock_bh(&ipip_lock);
195                         break;
196                 }
197         }
198 }
199
200 static void ipip_tunnel_link(struct ip_tunnel *t)
201 {
202         struct ip_tunnel **tp = ipip_bucket(t);
203
204         t->next = *tp;
205         write_lock_bh(&ipip_lock);
206         *tp = t;
207         write_unlock_bh(&ipip_lock);
208 }
209
210 struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
211 {
212         u32 remote = parms->iph.daddr;
213         u32 local = parms->iph.saddr;
214         struct ip_tunnel *t, **tp, *nt;
215         struct net_device *dev;
216         unsigned h = 0;
217         int prio = 0;
218
219         if (remote) {
220                 prio |= 2;
221                 h ^= HASH(remote);
222         }
223         if (local) {
224                 prio |= 1;
225                 h ^= HASH(local);
226         }
227         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
228                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
229                         return t;
230         }
231         if (!create)
232                 return NULL;
233
234         MOD_INC_USE_COUNT;
235         dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
236         if (dev == NULL) {
237                 MOD_DEC_USE_COUNT;
238                 return NULL;
239         }
240         memset(dev, 0, sizeof(*dev) + sizeof(*t));
241         dev->priv = (void*)(dev+1);
242         nt = (struct ip_tunnel*)dev->priv;
243         nt->dev = dev;
244         dev->init = ipip_tunnel_init;
245         dev->features |= NETIF_F_DYNALLOC;
246         memcpy(&nt->parms, parms, sizeof(*parms));
247         nt->parms.name[IFNAMSIZ-1] = '\0';
248         strcpy(dev->name, nt->parms.name);
249         if (dev->name[0] == 0) {
250                 int i;
251                 for (i=1; i<100; i++) {
252                         sprintf(dev->name, "tunl%d", i);
253                         if (__dev_get_by_name(dev->name) == NULL)
254                                 break;
255                 }
256                 if (i==100)
257                         goto failed;
258                 memcpy(nt->parms.name, dev->name, IFNAMSIZ);
259         }
260         if (register_netdevice(dev) < 0)
261                 goto failed;
262
263         dev_hold(dev);
264         ipip_tunnel_link(nt);
265         /* Do not decrement MOD_USE_COUNT here. */
266         return nt;
267
268 failed:
269         kfree(dev);
270         MOD_DEC_USE_COUNT;
271         return NULL;
272 }
273
274 static void ipip_tunnel_destructor(struct net_device *dev)
275 {
276         if (dev != &ipip_fb_tunnel_dev) {
277                 MOD_DEC_USE_COUNT;
278         }
279 }
280
281 static void ipip_tunnel_uninit(struct net_device *dev)
282 {
283         if (dev == &ipip_fb_tunnel_dev) {
284                 write_lock_bh(&ipip_lock);
285                 tunnels_wc[0] = NULL;
286                 write_unlock_bh(&ipip_lock);
287         } else
288                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
289         dev_put(dev);
290 }
291
292 void ipip_err(struct sk_buff *skb, u32 info)
293 {
294 #ifndef I_WISH_WORLD_WERE_PERFECT
295
296 /* It is not :-( All the routers (except for Linux) return only
297    8 bytes of packet payload. It means, that precise relaying of
298    ICMP in the real Internet is absolutely infeasible.
299  */
300         struct iphdr *iph = (struct iphdr*)skb->data;
301         int type = skb->h.icmph->type;
302         int code = skb->h.icmph->code;
303         struct ip_tunnel *t;
304
305         switch (type) {
306         default:
307         case ICMP_PARAMETERPROB:
308                 return;
309
310         case ICMP_DEST_UNREACH:
311                 switch (code) {
312                 case ICMP_SR_FAILED:
313                 case ICMP_PORT_UNREACH:
314                         /* Impossible event. */
315                         return;
316                 case ICMP_FRAG_NEEDED:
317                         /* Soft state for pmtu is maintained by IP core. */
318                         return;
319                 default:
320                         /* All others are translated to HOST_UNREACH.
321                            rfc2003 contains "deep thoughts" about NET_UNREACH,
322                            I believe they are just ether pollution. --ANK
323                          */
324                         break;
325                 }
326                 break;
327         case ICMP_TIME_EXCEEDED:
328                 if (code != ICMP_EXC_TTL)
329                         return;
330                 break;
331         }
332
333         read_lock(&ipip_lock);
334         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
335         if (t == NULL || t->parms.iph.daddr == 0)
336                 goto out;
337         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
338                 goto out;
339
340         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
341                 t->err_count++;
342         else
343                 t->err_count = 1;
344         t->err_time = jiffies;
345 out:
346         read_unlock(&ipip_lock);
347         return;
348 #else
349         struct iphdr *iph = (struct iphdr*)dp;
350         int hlen = iph->ihl<<2;
351         struct iphdr *eiph;
352         int type = skb->h.icmph->type;
353         int code = skb->h.icmph->code;
354         int rel_type = 0;
355         int rel_code = 0;
356         int rel_info = 0;
357         struct sk_buff *skb2;
358         struct rtable *rt;
359
360         if (len < hlen + sizeof(struct iphdr))
361                 return;
362         eiph = (struct iphdr*)(dp + hlen);
363
364         switch (type) {
365         default:
366                 return;
367         case ICMP_PARAMETERPROB:
368                 if (skb->h.icmph->un.gateway < hlen)
369                         return;
370
371                 /* So... This guy found something strange INSIDE encapsulated
372                    packet. Well, he is fool, but what can we do ?
373                  */
374                 rel_type = ICMP_PARAMETERPROB;
375                 rel_info = skb->h.icmph->un.gateway - hlen;
376                 break;
377
378         case ICMP_DEST_UNREACH:
379                 switch (code) {
380                 case ICMP_SR_FAILED:
381                 case ICMP_PORT_UNREACH:
382                         /* Impossible event. */
383                         return;
384                 case ICMP_FRAG_NEEDED:
385                         /* And it is the only really necesary thing :-) */
386                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
387                         if (rel_info < hlen+68)
388                                 return;
389                         rel_info -= hlen;
390                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
391                         if (rel_info > ntohs(eiph->tot_len))
392                                 return;
393                         break;
394                 default:
395                         /* All others are translated to HOST_UNREACH.
396                            rfc2003 contains "deep thoughts" about NET_UNREACH,
397                            I believe, it is just ether pollution. --ANK
398                          */
399                         rel_type = ICMP_DEST_UNREACH;
400                         rel_code = ICMP_HOST_UNREACH;
401                         break;
402                 }
403                 break;
404         case ICMP_TIME_EXCEEDED:
405                 if (code != ICMP_EXC_TTL)
406                         return;
407                 break;
408         }
409
410         /* Prepare fake skb to feed it to icmp_send */
411         skb2 = skb_clone(skb, GFP_ATOMIC);
412         if (skb2 == NULL)
413                 return;
414         dst_release(skb2->dst);
415         skb2->dst = NULL;
416         skb_pull(skb2, skb->data - (u8*)eiph);
417         skb2->nh.raw = skb2->data;
418
419         /* Try to guess incoming interface */
420         if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
421                 kfree_skb(skb2);
422                 return;
423         }
424         skb2->dev = rt->u.dst.dev;
425
426         /* route "incoming" packet */
427         if (rt->rt_flags&RTCF_LOCAL) {
428                 ip_rt_put(rt);
429                 rt = NULL;
430                 if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
431                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
432                         ip_rt_put(rt);
433                         kfree_skb(skb2);
434                         return;
435                 }
436         } else {
437                 ip_rt_put(rt);
438                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
439                     skb2->dst->dev->type != ARPHRD_IPGRE) {
440                         kfree_skb(skb2);
441                         return;
442                 }
443         }
444
445         /* change mtu on this route */
446         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
447                 if (rel_info > skb2->dst->pmtu) {
448                         kfree_skb(skb2);
449                         return;
450                 }
451                 skb2->dst->pmtu = rel_info;
452                 rel_info = htonl(rel_info);
453         } else if (type == ICMP_TIME_EXCEEDED) {
454                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
455                 if (t->parms.iph.ttl) {
456                         rel_type = ICMP_DEST_UNREACH;
457                         rel_code = ICMP_HOST_UNREACH;
458                 }
459         }
460
461         icmp_send(skb2, rel_type, rel_code, rel_info);
462         kfree_skb(skb2);
463         return;
464 #endif
465 }
466
467 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
468 {
469         struct iphdr *inner_iph = skb->nh.iph;
470
471         if (INET_ECN_is_ce(outer_iph->tos) &&
472             INET_ECN_is_not_ce(inner_iph->tos))
473                 IP_ECN_set_ce(inner_iph);
474 }
475
476 int ipip_rcv(struct sk_buff *skb)
477 {
478         struct iphdr *iph;
479         struct ip_tunnel *tunnel;
480
481         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
482                 goto out;
483
484         iph = skb->nh.iph;
485         skb->mac.raw = skb->nh.raw;
486         skb->nh.raw = skb->data;
487         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
488         skb->protocol = htons(ETH_P_IP);
489         skb->pkt_type = PACKET_HOST;
490
491         read_lock(&ipip_lock);
492         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
493                 tunnel->stat.rx_packets++;
494                 tunnel->stat.rx_bytes += skb->len;
495                 skb->dev = tunnel->dev;
496                 dst_release(skb->dst);
497                 skb->dst = NULL;
498                 nf_reset(skb);
499                 ipip_ecn_decapsulate(iph, skb);
500                 netif_rx(skb);
501                 read_unlock(&ipip_lock);
502                 return 0;
503         }
504         read_unlock(&ipip_lock);
505
506         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
507 out:
508         kfree_skb(skb);
509         return 0;
510 }
511
512 /* Need this wrapper because NF_HOOK takes the function address */
513 static inline int do_ip_send(struct sk_buff *skb)
514 {
515         return ip_send(skb);
516 }
517
518 /*
519  *      This function assumes it is being called from dev_queue_xmit()
520  *      and that skb is filled properly by that function.
521  */
522
523 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
524 {
525         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
526         struct net_device_stats *stats = &tunnel->stat;
527         struct iphdr  *tiph = &tunnel->parms.iph;
528         u8     tos = tunnel->parms.iph.tos;
529         u16    df = tiph->frag_off;
530         struct rtable *rt;                      /* Route to the other host */
531         struct net_device *tdev;                        /* Device to other host */
532         struct iphdr  *old_iph = skb->nh.iph;
533         struct iphdr  *iph;                     /* Our new IP header */
534         int    max_headroom;                    /* The extra header space needed */
535         u32    dst = tiph->daddr;
536         int    mtu;
537
538         if (tunnel->recursion++) {
539                 tunnel->stat.collisions++;
540                 goto tx_error;
541         }
542
543         if (skb->protocol != htons(ETH_P_IP))
544                 goto tx_error;
545
546         if (tos&1)
547                 tos = old_iph->tos;
548
549         if (!dst) {
550                 /* NBMA tunnel */
551                 if ((rt = (struct rtable*)skb->dst) == NULL) {
552                         tunnel->stat.tx_fifo_errors++;
553                         goto tx_error;
554                 }
555                 if ((dst = rt->rt_gateway) == 0)
556                         goto tx_error_icmp;
557         }
558
559         if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
560                 tunnel->stat.tx_carrier_errors++;
561                 goto tx_error_icmp;
562         }
563         tdev = rt->u.dst.dev;
564
565         if (tdev == dev) {
566                 ip_rt_put(rt);
567                 tunnel->stat.collisions++;
568                 goto tx_error;
569         }
570
571         if (tiph->frag_off)
572                 mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
573         else
574                 mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
575
576         if (mtu < 68) {
577                 tunnel->stat.collisions++;
578                 ip_rt_put(rt);
579                 goto tx_error;
580         }
581         if (skb->dst && mtu < skb->dst->pmtu)
582                 skb->dst->pmtu = mtu;
583
584         df |= (old_iph->frag_off&htons(IP_DF));
585
586         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588                 ip_rt_put(rt);
589                 goto tx_error;
590         }
591
592         if (tunnel->err_count > 0) {
593                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594                         tunnel->err_count--;
595                         dst_link_failure(skb);
596                 } else
597                         tunnel->err_count = 0;
598         }
599
600         /*
601          * Okay, now see if we can stuff it in the buffer as-is.
602          */
603         max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
604
605         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607                 if (!new_skb) {
608                         ip_rt_put(rt);
609                         stats->tx_dropped++;
610                         dev_kfree_skb(skb);
611                         tunnel->recursion--;
612                         return 0;
613                 }
614                 if (skb->sk)
615                         skb_set_owner_w(new_skb, skb->sk);
616                 dev_kfree_skb(skb);
617                 skb = new_skb;
618                 old_iph = skb->nh.iph;
619         }
620
621         skb->h.raw = skb->nh.raw;
622         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624         dst_release(skb->dst);
625         skb->dst = &rt->u.dst;
626
627         /*
628          *      Push down and install the IPIP header.
629          */
630
631         iph                     =       skb->nh.iph;
632         iph->version            =       4;
633         iph->ihl                =       sizeof(struct iphdr)>>2;
634         iph->frag_off           =       df;
635         iph->protocol           =       IPPROTO_IPIP;
636         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
637         iph->daddr              =       rt->rt_dst;
638         iph->saddr              =       rt->rt_src;
639
640         if ((iph->ttl = tiph->ttl) == 0)
641                 iph->ttl        =       old_iph->ttl;
642
643         nf_reset(skb);
644
645         IPTUNNEL_XMIT();
646         tunnel->recursion--;
647         return 0;
648
649 tx_error_icmp:
650         dst_link_failure(skb);
651 tx_error:
652         stats->tx_errors++;
653         dev_kfree_skb(skb);
654         tunnel->recursion--;
655         return 0;
656 }
657
658 static int
659 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
660 {
661         int err = 0;
662         struct ip_tunnel_parm p;
663         struct ip_tunnel *t;
664
665         MOD_INC_USE_COUNT;
666
667         switch (cmd) {
668         case SIOCGETTUNNEL:
669                 t = NULL;
670                 if (dev == &ipip_fb_tunnel_dev) {
671                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
672                                 err = -EFAULT;
673                                 break;
674                         }
675                         t = ipip_tunnel_locate(&p, 0);
676                 }
677                 if (t == NULL)
678                         t = (struct ip_tunnel*)dev->priv;
679                 memcpy(&p, &t->parms, sizeof(p));
680                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
681                         err = -EFAULT;
682                 break;
683
684         case SIOCADDTUNNEL:
685         case SIOCCHGTUNNEL:
686                 err = -EPERM;
687                 if (!capable(CAP_NET_ADMIN))
688                         goto done;
689
690                 err = -EFAULT;
691                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
692                         goto done;
693
694                 err = -EINVAL;
695                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
696                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
697                         goto done;
698                 if (p.iph.ttl)
699                         p.iph.frag_off |= htons(IP_DF);
700
701                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
702
703                 if (dev != &ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL &&
704                     t != &ipip_fb_tunnel) {
705                         if (t != NULL) {
706                                 if (t->dev != dev) {
707                                         err = -EEXIST;
708                                         break;
709                                 }
710                         } else {
711                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
712                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
713                                         err = -EINVAL;
714                                         break;
715                                 }
716                                 t = (struct ip_tunnel*)dev->priv;
717                                 ipip_tunnel_unlink(t);
718                                 t->parms.iph.saddr = p.iph.saddr;
719                                 t->parms.iph.daddr = p.iph.daddr;
720                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
721                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
722                                 ipip_tunnel_link(t);
723                                 netdev_state_change(dev);
724                         }
725                 }
726
727                 if (t) {
728                         err = 0;
729                         if (cmd == SIOCCHGTUNNEL) {
730                                 t->parms.iph.ttl = p.iph.ttl;
731                                 t->parms.iph.tos = p.iph.tos;
732                                 t->parms.iph.frag_off = p.iph.frag_off;
733                         }
734                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
735                                 err = -EFAULT;
736                 } else
737                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
738                 break;
739
740         case SIOCDELTUNNEL:
741                 err = -EPERM;
742                 if (!capable(CAP_NET_ADMIN))
743                         goto done;
744
745                 if (dev == &ipip_fb_tunnel_dev) {
746                         err = -EFAULT;
747                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
748                                 goto done;
749                         err = -ENOENT;
750                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
751                                 goto done;
752                         err = -EPERM;
753                         if (t == &ipip_fb_tunnel)
754                                 goto done;
755                         dev = t->dev;
756                 }
757                 err = unregister_netdevice(dev);
758                 break;
759
760         default:
761                 err = -EINVAL;
762         }
763
764 done:
765         MOD_DEC_USE_COUNT;
766         return err;
767 }
768
769 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
770 {
771         return &(((struct ip_tunnel*)dev->priv)->stat);
772 }
773
774 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
775 {
776         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
777                 return -EINVAL;
778         dev->mtu = new_mtu;
779         return 0;
780 }
781
782 static void ipip_tunnel_init_gen(struct net_device *dev)
783 {
784         struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
785
786         dev->uninit             = ipip_tunnel_uninit;
787         dev->destructor         = ipip_tunnel_destructor;
788         dev->hard_start_xmit    = ipip_tunnel_xmit;
789         dev->get_stats          = ipip_tunnel_get_stats;
790         dev->do_ioctl           = ipip_tunnel_ioctl;
791         dev->change_mtu         = ipip_tunnel_change_mtu;
792
793         dev->type               = ARPHRD_TUNNEL;
794         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
795         dev->mtu                = 1500 - sizeof(struct iphdr);
796         dev->flags              = IFF_NOARP;
797         dev->iflink             = 0;
798         dev->addr_len           = 4;
799         memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
800         memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
801 }
802
803 static int ipip_tunnel_init(struct net_device *dev)
804 {
805         struct net_device *tdev = NULL;
806         struct ip_tunnel *tunnel;
807         struct iphdr *iph;
808
809         tunnel = (struct ip_tunnel*)dev->priv;
810         iph = &tunnel->parms.iph;
811
812         ipip_tunnel_init_gen(dev);
813
814         if (iph->daddr) {
815                 struct rtable *rt;
816                 if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
817                         tdev = rt->u.dst.dev;
818                         ip_rt_put(rt);
819                 }
820                 dev->flags |= IFF_POINTOPOINT;
821         }
822
823         if (!tdev && tunnel->parms.link)
824                 tdev = __dev_get_by_index(tunnel->parms.link);
825
826         if (tdev) {
827                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
828                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
829         }
830         dev->iflink = tunnel->parms.link;
831
832         return 0;
833 }
834
835 #ifdef MODULE
836 static int ipip_fb_tunnel_open(struct net_device *dev)
837 {
838         MOD_INC_USE_COUNT;
839         return 0;
840 }
841
842 static int ipip_fb_tunnel_close(struct net_device *dev)
843 {
844         MOD_DEC_USE_COUNT;
845         return 0;
846 }
847 #endif
848
849 int __init ipip_fb_tunnel_init(struct net_device *dev)
850 {
851         struct iphdr *iph;
852
853         ipip_tunnel_init_gen(dev);
854 #ifdef MODULE
855         dev->open               = ipip_fb_tunnel_open;
856         dev->stop               = ipip_fb_tunnel_close;
857 #endif
858
859         iph = &ipip_fb_tunnel.parms.iph;
860         iph->version            = 4;
861         iph->protocol           = IPPROTO_IPIP;
862         iph->ihl                = 5;
863
864         dev_hold(dev);
865         tunnels_wc[0]           = &ipip_fb_tunnel;
866         return 0;
867 }
868
869 static struct inet_protocol ipip_protocol = {
870         handler:        ipip_rcv,
871         err_handler:    ipip_err,
872         protocol:       IPPROTO_IPIP,
873         name:           "IPIP"
874 };
875
876 static char banner[] __initdata =
877         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
878
879 int __init ipip_init(void)
880 {
881         printk(banner);
882
883         ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
884         register_netdev(&ipip_fb_tunnel_dev);
885         inet_add_protocol(&ipip_protocol);
886         return 0;
887 }
888
889 static void __exit ipip_fini(void)
890 {
891         if ( inet_del_protocol(&ipip_protocol) < 0 )
892                 printk(KERN_INFO "ipip close: can't remove protocol\n");
893
894         unregister_netdev(&ipip_fb_tunnel_dev);
895 }
896
897 #ifdef MODULE
898 module_init(ipip_init);
899 #endif
900 module_exit(ipip_fini);
901 MODULE_LICENSE("GPL");