2 * Extension Header handling for IPv6
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Andi Kleen <ak@muc.de>
8 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * $Id: exthdrs.c,v 1.1.1.1 2005/04/11 02:51:13 jack Exp $
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 * yoshfuji : ensure not to overrun while parsing
23 #include <linux/errno.h>
24 #include <linux/types.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/sched.h>
28 #include <linux/net.h>
29 #include <linux/netdevice.h>
30 #include <linux/in6.h>
31 #include <linux/icmpv6.h>
37 #include <net/protocol.h>
38 #include <net/transp_v6.h>
39 #include <net/rawv6.h>
40 #include <net/ndisc.h>
41 #include <net/ip6_route.h>
42 #include <net/addrconf.h>
44 #include <asm/uaccess.h>
47 * Parsing inbound headers.
49 * Parsing function "func" returns offset wrt skb->nh of the place,
50 * where next nexthdr value is stored or NULL, if parsing
51 * failed. It should also update skb->h tp point at the next header.
57 int (*func) (struct sk_buff **, int offset);
61 * Parsing tlv encoded headers.
63 * Parsing function "func" returns 1, if parsing succeed
64 * and 0, if it failed.
65 * It MUST NOT touch skb->h.
71 int (*func) (struct sk_buff *, int offset);
74 /*********************
76 *********************/
78 /* An unknown option is detected, decide what to do */
80 int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
82 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
86 case 1: /* drop packet */
89 case 3: /* Send ICMP if not a multicast address and drop packet */
90 /* Actually, it is redundant check. icmp_send
91 will recheck in any case.
93 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
95 case 2: /* send ICMP PARM PROB regardless and drop packet */
96 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
104 /* Parse tlv encoded option header (hop-by-hop or destination) */
106 static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
108 struct tlvtype_proc *curr;
109 int off = skb->h.raw - skb->nh.raw;
110 int len = ((skb->h.raw[1]+1)<<3);
112 if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
119 int optlen = skb->nh.raw[off+1]+2;
121 switch (skb->nh.raw[off]) {
129 default: /* Other TLV code so scan list */
132 for (curr=procs; curr->type >= 0; curr++) {
133 if (curr->type == skb->nh.raw[off]) {
134 /* type specific length/alignment
135 checks will be perfomed in the
137 if (curr->func(skb, off) == 0)
142 if (curr->type < 0) {
143 if (ip6_tlvopt_unknown(skb, off) == 0)
158 /*****************************
159 Destination options header.
160 *****************************/
162 struct tlvtype_proc tlvprocdestopt_lst[] = {
163 /* No destination options are defined now */
167 static int ipv6_dest_opt(struct sk_buff **skb_ptr, int nhoff)
169 struct sk_buff *skb=*skb_ptr;
170 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
172 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
173 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
178 opt->dst1 = skb->h.raw - skb->nh.raw;
180 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
181 skb->h.raw += ((skb->h.raw[1]+1)<<3);
188 /********************************
189 NONE header. No data in packet.
190 ********************************/
192 static int ipv6_nodata(struct sk_buff **skb_ptr, int nhoff)
198 /********************************
200 ********************************/
202 static int ipv6_routing_header(struct sk_buff **skb_ptr, int nhoff)
204 struct sk_buff *skb = *skb_ptr;
205 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
206 struct in6_addr *addr;
207 struct in6_addr daddr;
211 struct ipv6_rt_hdr *hdr;
212 struct rt0_hdr *rthdr;
214 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
215 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
216 IP6_INC_STATS_BH(Ip6InHdrErrors);
221 hdr = (struct ipv6_rt_hdr *) skb->h.raw;
223 if ((ipv6_addr_type(&skb->nh.ipv6h->daddr)&IPV6_ADDR_MULTICAST) ||
224 skb->pkt_type != PACKET_HOST) {
230 if (hdr->segments_left == 0) {
231 opt->srcrt = skb->h.raw - skb->nh.raw;
232 skb->h.raw += (hdr->hdrlen + 1) << 3;
233 opt->dst0 = opt->dst1;
235 return (&hdr->nexthdr) - skb->nh.raw;
238 if (hdr->type != IPV6_SRCRT_TYPE_0 || (hdr->hdrlen & 0x01)) {
239 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, hdr->type != IPV6_SRCRT_TYPE_0 ? 2 : 1);
244 * This is the routing header forwarding algorithm from
248 n = hdr->hdrlen >> 1;
250 if (hdr->segments_left > n) {
251 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
255 /* We are about to mangle packet header. Be careful!
256 Do not damage packets queued somewhere.
258 if (skb_cloned(skb)) {
259 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
263 *skb_ptr = skb = skb2;
264 opt = (struct inet6_skb_parm *)skb2->cb;
265 hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
268 if (skb->ip_summed == CHECKSUM_HW)
269 skb->ip_summed = CHECKSUM_NONE;
271 i = n - --hdr->segments_left;
273 rthdr = (struct rt0_hdr *) hdr;
277 addr_type = ipv6_addr_type(addr);
279 if (addr_type&IPV6_ADDR_MULTICAST) {
284 ipv6_addr_copy(&daddr, addr);
285 ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
286 ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
288 dst_release(xchg(&skb->dst, NULL));
289 ip6_route_input(skb);
290 if (skb->dst->error) {
291 skb->dst->input(skb);
294 if (skb->dst->dev->flags&IFF_LOOPBACK) {
295 if (skb->nh.ipv6h->hop_limit <= 1) {
296 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
301 skb->nh.ipv6h->hop_limit--;
305 skb->dst->input(skb);
310 This function inverts received rthdr.
311 NOTE: specs allow to make it automatically only if
312 packet authenticated.
314 I will not discuss it here (though, I am really pissed off at
315 this stupid requirement making rthdr idea useless)
317 Actually, it creates severe problems for us.
318 Embrionic requests has no associated sockets,
319 so that user have no control over it and
320 cannot not only to set reply options, but
321 even to know, that someone wants to connect
324 For now we need to test the engine, so that I created
325 temporary (or permanent) backdoor.
326 If listening socket set IPV6_RTHDR to 2, then we invert header.
330 struct ipv6_txoptions *
331 ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
335 [ H1 -> H2 -> ... H_prev ] daddr=ME
338 [ H_prev -> ... -> H1 ] daddr =sender
340 Note, that IP output engine will rewrire this rthdr
341 by rotating it left by one addr.
345 struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
346 struct rt0_hdr *irthdr;
347 struct ipv6_txoptions *opt;
348 int hdrlen = ipv6_optlen(hdr);
350 if (hdr->segments_left ||
351 hdr->type != IPV6_SRCRT_TYPE_0 ||
355 n = hdr->hdrlen >> 1;
356 opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
359 memset(opt, 0, sizeof(*opt));
360 opt->tot_len = sizeof(*opt) + hdrlen;
361 opt->srcrt = (void*)(opt+1);
362 opt->opt_nflen = hdrlen;
364 memcpy(opt->srcrt, hdr, sizeof(*hdr));
365 irthdr = (struct rt0_hdr*)opt->srcrt;
366 /* Obsolete field, MBZ, when originated by us */
368 opt->srcrt->segments_left = n;
370 memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
374 /********************************
376 ********************************/
379 rfc1826 said, that if a host does not implement AUTH header
380 it MAY ignore it. We use this hole 8)
382 Actually, now we can implement OSPFv6 without kernel IPsec.
383 Authentication for poors may be done in user space with the same success.
385 Yes, it means, that we allow application to send/receive
386 raw authentication header. Apparently, we suppose, that it knows
387 what it does and calculates authentication data correctly.
388 Certainly, it is possible only for udp and raw sockets, but not for tcp.
390 AUTH header has 4byte granular length, which kills all the idea
391 behind AUTOMATIC 64bit alignment of IPv6. Now we will lose
392 cpu ticks, checking that sender did not something stupid
393 and opt->hdrlen is even. Shit! --ANK (980730)
396 static int ipv6_auth_hdr(struct sk_buff **skb_ptr, int nhoff)
398 struct sk_buff *skb=*skb_ptr;
399 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
402 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8))
406 * RFC2402 2.2 Payload Length
407 * The 8-bit field specifies the length of AH in 32-bit words
408 * (4-byte units), minus "2".
409 * -- Noriaki Takamiya @USAGI Project
411 len = (skb->h.raw[1]+2)<<2;
416 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+len))
419 opt->auth = skb->h.raw - skb->nh.raw;
428 /* This list MUST NOT contain entry for NEXTHDR_HOP.
429 It is parsed immediately after packet received
430 and if it occurs somewhere in another place we must
434 struct hdrtype_proc hdrproc_lst[] = {
435 {NEXTHDR_FRAGMENT, ipv6_reassembly},
436 {NEXTHDR_ROUTING, ipv6_routing_header},
437 {NEXTHDR_DEST, ipv6_dest_opt},
438 {NEXTHDR_NONE, ipv6_nodata},
439 {NEXTHDR_AUTH, ipv6_auth_hdr},
441 {NEXTHDR_ESP, ipv6_esp_hdr},
446 int ipv6_parse_exthdrs(struct sk_buff **skb_in, int nhoff)
448 struct hdrtype_proc *hdrt;
449 u8 nexthdr = (*skb_in)->nh.raw[nhoff];
452 for (hdrt=hdrproc_lst; hdrt->type >= 0; hdrt++) {
453 if (hdrt->type == nexthdr) {
454 if ((nhoff = hdrt->func(skb_in, nhoff)) >= 0) {
455 nexthdr = (*skb_in)->nh.raw[nhoff];
465 /**********************************
467 **********************************/
469 /* Router Alert as of draft-ietf-ipngwg-ipv6router-alert-04 */
471 static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
473 if (skb->nh.raw[optoff+1] == 2) {
474 ((struct inet6_skb_parm*)skb->cb)->ra = optoff;
478 printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]);
485 static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
489 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
491 printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]);
495 pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
496 if (pkt_len < 0x10000) {
497 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
500 if (skb->nh.ipv6h->payload_len) {
501 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
505 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
506 IP6_INC_STATS_BH(Ip6InTruncatedPkts);
509 if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
510 __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
511 if (skb->ip_summed == CHECKSUM_HW)
512 skb->ip_summed = CHECKSUM_NONE;
521 struct tlvtype_proc tlvprochopopt_lst[] = {
522 {IPV6_TLV_ROUTERALERT, ipv6_hop_ra},
523 {IPV6_TLV_JUMBO, ipv6_hop_jumbo},
527 int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
529 ((struct inet6_skb_parm*)skb->cb)->hop = sizeof(struct ipv6hdr);
530 if (ip6_parse_tlv(tlvprochopopt_lst, skb))
531 return sizeof(struct ipv6hdr);
536 * Creating outbound headers.
538 * "build" functions work when skb is filled from head to tail (datagram)
539 * "push" functions work when headers are added from tail to head (tcp)
541 * In both cases we assume, that caller reserved enough room
545 u8 *ipv6_build_rthdr(struct sk_buff *skb, u8 *prev_hdr,
546 struct ipv6_rt_hdr *opt, struct in6_addr *addr)
548 struct rt0_hdr *phdr, *ihdr;
551 ihdr = (struct rt0_hdr *) opt;
553 phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
554 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
556 hops = ihdr->rt_hdr.hdrlen >> 1;
559 memcpy(phdr->addr, ihdr->addr + 1,
560 (hops - 1) * sizeof(struct in6_addr));
562 ipv6_addr_copy(phdr->addr + (hops - 1), addr);
564 phdr->rt_hdr.nexthdr = *prev_hdr;
565 *prev_hdr = NEXTHDR_ROUTING;
566 return &phdr->rt_hdr.nexthdr;
569 static u8 *ipv6_build_exthdr(struct sk_buff *skb, u8 *prev_hdr, u8 type, struct ipv6_opt_hdr *opt)
571 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, ipv6_optlen(opt));
573 memcpy(h, opt, ipv6_optlen(opt));
574 h->nexthdr = *prev_hdr;
579 static u8 *ipv6_build_authhdr(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_opt_hdr *opt)
581 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, (opt->hdrlen+2)<<2);
583 memcpy(h, opt, (opt->hdrlen+2)<<2);
584 h->nexthdr = *prev_hdr;
585 *prev_hdr = NEXTHDR_AUTH;
590 u8 *ipv6_build_nfrag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt,
591 struct in6_addr *daddr, u32 jumbolen)
593 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb->data;
595 if (opt && opt->hopopt)
596 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_HOP, opt->hopopt);
599 u8 *jumboopt = (u8 *)skb_put(skb, 8);
601 if (opt && opt->hopopt) {
602 *jumboopt++ = IPV6_TLV_PADN;
606 h = (struct ipv6_opt_hdr *)jumboopt;
607 h->nexthdr = *prev_hdr;
610 *prev_hdr = NEXTHDR_HOP;
611 prev_hdr = &h->nexthdr;
613 jumboopt[0] = IPV6_TLV_JUMBO;
615 *(u32*)(jumboopt+2) = htonl(jumbolen);
619 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst0opt);
621 prev_hdr = ipv6_build_rthdr(skb, prev_hdr, opt->srcrt, daddr);
626 u8 *ipv6_build_frag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt)
629 prev_hdr = ipv6_build_authhdr(skb, prev_hdr, opt->auth);
631 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst1opt);
635 static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
636 struct ipv6_rt_hdr *opt,
637 struct in6_addr **addr_p)
639 struct rt0_hdr *phdr, *ihdr;
642 ihdr = (struct rt0_hdr *) opt;
644 phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
645 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
647 hops = ihdr->rt_hdr.hdrlen >> 1;
650 memcpy(phdr->addr, ihdr->addr + 1,
651 (hops - 1) * sizeof(struct in6_addr));
653 ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
654 *addr_p = ihdr->addr;
656 phdr->rt_hdr.nexthdr = *proto;
657 *proto = NEXTHDR_ROUTING;
660 static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
662 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
664 memcpy(h, opt, ipv6_optlen(opt));
669 static void ipv6_push_authhdr(struct sk_buff *skb, u8 *proto, struct ipv6_opt_hdr *opt)
671 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, (opt->hdrlen+2)<<2);
673 memcpy(h, opt, (opt->hdrlen+2)<<2);
675 *proto = NEXTHDR_AUTH;
678 void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
680 struct in6_addr **daddr)
683 ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
685 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
687 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
690 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
693 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
695 ipv6_push_authhdr(skb, proto, opt->auth);
698 struct ipv6_txoptions *
699 ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
701 struct ipv6_txoptions *opt2;
703 opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
705 long dif = (char*)opt2 - (char*)opt;
706 memcpy(opt2, opt, opt->tot_len);
708 *((char**)&opt2->hopopt) += dif;
710 *((char**)&opt2->dst0opt) += dif;
712 *((char**)&opt2->dst1opt) += dif;
714 *((char**)&opt2->auth) += dif;
716 *((char**)&opt2->srcrt) += dif;
723 * find out if nexthdr is a well-known extension header or a protocol
726 int ipv6_ext_hdr(u8 nexthdr)
729 * find out if nexthdr is an extension header or a protocol
731 return ( (nexthdr == NEXTHDR_HOP) ||
732 (nexthdr == NEXTHDR_ROUTING) ||
733 (nexthdr == NEXTHDR_FRAGMENT) ||
734 (nexthdr == NEXTHDR_AUTH) ||
735 (nexthdr == NEXTHDR_NONE) ||
736 (nexthdr == NEXTHDR_DEST) );
740 * Skip any extension headers. This is used by the ICMP module.
742 * Note that strictly speaking this conflicts with RFC1883 4.0:
743 * ...The contents and semantics of each extension header determine whether
744 * or not to proceed to the next header. Therefore, extension headers must
745 * be processed strictly in the order they appear in the packet; a
746 * receiver must not, for example, scan through a packet looking for a
747 * particular kind of extension header and process that header prior to
748 * processing all preceding ones.
750 * We do exactly this. This is a protocol bug. We can't decide after a
751 * seeing an unknown discard-with-error flavour TLV option if it's a
752 * ICMP error message or not (errors should never be send in reply to
753 * ICMP error messages).
755 * But I see no other way to do this. This might need to be reexamined
756 * when Linux implements ESP (and maybe AUTH) headers.
759 * This function parses (probably truncated) exthdr set "hdr"
760 * of length "len". "nexthdrp" initially points to some place,
761 * where type of the first header can be found.
763 * It skips all well-known exthdrs, and returns pointer to the start
764 * of unparsable area i.e. the first header with unknown type.
765 * If it is not NULL *nexthdr is updated by type/protocol of this header.
767 * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
768 * - it may return pointer pointing beyond end of packet,
769 * if the last recognized header is truncated in the middle.
770 * - if packet is truncated, so that all parsed headers are skipped,
772 * - First fragment header is skipped, not-first ones
773 * are considered as unparsable.
774 * - ESP is unparsable for now and considered like
775 * normal payload protocol.
776 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
781 int ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len)
783 u8 nexthdr = *nexthdrp;
785 while (ipv6_ext_hdr(nexthdr)) {
786 struct ipv6_opt_hdr hdr;
789 if (len < (int)sizeof(struct ipv6_opt_hdr))
791 if (nexthdr == NEXTHDR_NONE)
793 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
795 if (nexthdr == NEXTHDR_FRAGMENT) {
796 struct frag_hdr *fhdr = (struct frag_hdr *) &hdr;
797 if (ntohs(fhdr->frag_off) & ~0x7)
800 } else if (nexthdr == NEXTHDR_AUTH)
801 hdrlen = (hdr.hdrlen+2)<<2;
803 hdrlen = ipv6_optlen(&hdr);
805 nexthdr = hdr.nexthdr;