2 * Extension Header handling for IPv6
3 * Linux INET6 implementation
6 * Pedro Roque <pedro_m@yahoo.com>
7 * Andi Kleen <ak@muc.de>
8 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 * yoshfuji : ensure not to overrun while parsing
23 #include <linux/errno.h>
24 #include <linux/types.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/sched.h>
28 #include <linux/net.h>
29 #include <linux/netdevice.h>
30 #include <linux/in6.h>
31 #include <linux/icmpv6.h>
37 #include <net/protocol.h>
38 #include <net/transp_v6.h>
39 #include <net/rawv6.h>
40 #include <net/ndisc.h>
41 #include <net/ip6_route.h>
42 #include <net/addrconf.h>
44 #include <asm/uaccess.h>
47 * Parsing inbound headers.
49 * Parsing function "func" returns offset wrt skb->nh of the place,
50 * where next nexthdr value is stored or NULL, if parsing
51 * failed. It should also update skb->h tp point at the next header.
57 int (*func) (struct sk_buff **, int offset);
61 * Parsing tlv encoded headers.
63 * Parsing function "func" returns 1, if parsing succeed
64 * and 0, if it failed.
65 * It MUST NOT touch skb->h.
71 int (*func) (struct sk_buff *, int offset);
74 /*********************
76 *********************/
78 /* An unknown option is detected, decide what to do */
80 int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
82 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
86 case 1: /* drop packet */
89 case 3: /* Send ICMP if not a multicast address and drop packet */
90 /* Actually, it is redundant check. icmp_send
91 will recheck in any case.
93 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
95 case 2: /* send ICMP PARM PROB regardless and drop packet */
96 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
104 /* Parse tlv encoded option header (hop-by-hop or destination) */
106 static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
108 struct tlvtype_proc *curr;
109 int off = skb->h.raw - skb->nh.raw;
110 int len = ((skb->h.raw[1]+1)<<3);
112 if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
119 int optlen = skb->nh.raw[off+1]+2;
121 switch (skb->nh.raw[off]) {
129 default: /* Other TLV code so scan list */
132 for (curr=procs; curr->type >= 0; curr++) {
133 if (curr->type == skb->nh.raw[off]) {
134 /* type specific length/alignment
135 checks will be perfomed in the
137 if (curr->func(skb, off) == 0)
142 if (curr->type < 0) {
143 if (ip6_tlvopt_unknown(skb, off) == 0)
158 /*****************************
159 Destination options header.
160 *****************************/
162 struct tlvtype_proc tlvprocdestopt_lst[] = {
163 /* No destination options are defined now */
167 static int ipv6_dest_opt(struct sk_buff **skb_ptr, int nhoff)
169 struct sk_buff *skb=*skb_ptr;
170 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
172 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
173 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
178 opt->dst1 = skb->h.raw - skb->nh.raw;
180 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
181 skb->h.raw += ((skb->h.raw[1]+1)<<3);
188 /********************************
189 NONE header. No data in packet.
190 ********************************/
192 static int ipv6_nodata(struct sk_buff **skb_ptr, int nhoff)
198 /********************************
200 ********************************/
202 static int ipv6_routing_header(struct sk_buff **skb_ptr, int nhoff)
204 struct sk_buff *skb = *skb_ptr;
205 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
206 struct in6_addr *addr;
207 struct in6_addr daddr;
211 struct ipv6_rt_hdr *hdr;
212 struct rt0_hdr *rthdr;
214 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
215 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
216 IP6_INC_STATS_BH(Ip6InHdrErrors);
221 hdr = (struct ipv6_rt_hdr *) skb->h.raw;
223 if ((ipv6_addr_type(&skb->nh.ipv6h->daddr)&IPV6_ADDR_MULTICAST) ||
224 skb->pkt_type != PACKET_HOST) {
230 if (hdr->segments_left == 0) {
231 opt->srcrt = skb->h.raw - skb->nh.raw;
232 skb->h.raw += (hdr->hdrlen + 1) << 3;
233 opt->dst0 = opt->dst1;
235 return (&hdr->nexthdr) - skb->nh.raw;
238 if (hdr->type != IPV6_SRCRT_TYPE_0) {
239 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
243 if (hdr->hdrlen & 0x01) {
244 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
249 * This is the routing header forwarding algorithm from
253 n = hdr->hdrlen >> 1;
255 if (hdr->segments_left > n) {
256 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
260 /* We are about to mangle packet header. Be careful!
261 Do not damage packets queued somewhere.
263 if (skb_cloned(skb)) {
264 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
268 *skb_ptr = skb = skb2;
269 opt = (struct inet6_skb_parm *)skb2->cb;
270 hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
273 if (skb->ip_summed == CHECKSUM_HW)
274 skb->ip_summed = CHECKSUM_NONE;
276 i = n - --hdr->segments_left;
278 rthdr = (struct rt0_hdr *) hdr;
282 addr_type = ipv6_addr_type(addr);
284 if (addr_type&IPV6_ADDR_MULTICAST) {
289 ipv6_addr_copy(&daddr, addr);
290 ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
291 ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
293 dst_release(xchg(&skb->dst, NULL));
294 ip6_route_input(skb);
295 if (skb->dst->error) {
296 skb_push(skb, skb->data - skb->nh.raw);
297 skb->dst->input(skb);
301 if (skb->dst->dev->flags&IFF_LOOPBACK) {
302 if (skb->nh.ipv6h->hop_limit <= 1) {
303 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
308 skb->nh.ipv6h->hop_limit--;
312 skb_push(skb, skb->data - skb->nh.raw);
313 skb->dst->input(skb);
318 This function inverts received rthdr.
319 NOTE: specs allow to make it automatically only if
320 packet authenticated.
322 I will not discuss it here (though, I am really pissed off at
323 this stupid requirement making rthdr idea useless)
325 Actually, it creates severe problems for us.
326 Embrionic requests has no associated sockets,
327 so that user have no control over it and
328 cannot not only to set reply options, but
329 even to know, that someone wants to connect
332 For now we need to test the engine, so that I created
333 temporary (or permanent) backdoor.
334 If listening socket set IPV6_RTHDR to 2, then we invert header.
338 struct ipv6_txoptions *
339 ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
343 [ H1 -> H2 -> ... H_prev ] daddr=ME
346 [ H_prev -> ... -> H1 ] daddr =sender
348 Note, that IP output engine will rewrire this rthdr
349 by rotating it left by one addr.
353 struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
354 struct rt0_hdr *irthdr;
355 struct ipv6_txoptions *opt;
356 int hdrlen = ipv6_optlen(hdr);
358 if (hdr->segments_left ||
359 hdr->type != IPV6_SRCRT_TYPE_0 ||
363 n = hdr->hdrlen >> 1;
364 opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
367 memset(opt, 0, sizeof(*opt));
368 opt->tot_len = sizeof(*opt) + hdrlen;
369 opt->srcrt = (void*)(opt+1);
370 opt->opt_nflen = hdrlen;
372 memcpy(opt->srcrt, hdr, sizeof(*hdr));
373 irthdr = (struct rt0_hdr*)opt->srcrt;
374 /* Obsolete field, MBZ, when originated by us */
376 opt->srcrt->segments_left = n;
378 memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
382 /********************************
384 ********************************/
387 rfc1826 said, that if a host does not implement AUTH header
388 it MAY ignore it. We use this hole 8)
390 Actually, now we can implement OSPFv6 without kernel IPsec.
391 Authentication for poors may be done in user space with the same success.
393 Yes, it means, that we allow application to send/receive
394 raw authentication header. Apparently, we suppose, that it knows
395 what it does and calculates authentication data correctly.
396 Certainly, it is possible only for udp and raw sockets, but not for tcp.
398 AUTH header has 4byte granular length, which kills all the idea
399 behind AUTOMATIC 64bit alignment of IPv6. Now we will lose
400 cpu ticks, checking that sender did not something stupid
401 and opt->hdrlen is even. Shit! --ANK (980730)
404 static int ipv6_auth_hdr(struct sk_buff **skb_ptr, int nhoff)
406 struct sk_buff *skb=*skb_ptr;
407 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
410 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8))
414 * RFC2402 2.2 Payload Length
415 * The 8-bit field specifies the length of AH in 32-bit words
416 * (4-byte units), minus "2".
417 * -- Noriaki Takamiya @USAGI Project
419 len = (skb->h.raw[1]+2)<<2;
424 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+len))
427 opt->auth = skb->h.raw - skb->nh.raw;
436 /* This list MUST NOT contain entry for NEXTHDR_HOP.
437 It is parsed immediately after packet received
438 and if it occurs somewhere in another place we must
442 struct hdrtype_proc hdrproc_lst[] = {
443 {NEXTHDR_FRAGMENT, ipv6_reassembly},
444 {NEXTHDR_ROUTING, ipv6_routing_header},
445 {NEXTHDR_DEST, ipv6_dest_opt},
446 {NEXTHDR_NONE, ipv6_nodata},
447 {NEXTHDR_AUTH, ipv6_auth_hdr},
449 {NEXTHDR_ESP, ipv6_esp_hdr},
454 int ipv6_parse_exthdrs(struct sk_buff **skb_in, int nhoff)
456 struct hdrtype_proc *hdrt;
457 u8 nexthdr = (*skb_in)->nh.raw[nhoff];
460 for (hdrt=hdrproc_lst; hdrt->type >= 0; hdrt++) {
461 if (hdrt->type == nexthdr) {
462 if ((nhoff = hdrt->func(skb_in, nhoff)) >= 0) {
463 nexthdr = (*skb_in)->nh.raw[nhoff];
473 /**********************************
475 **********************************/
477 /* Router Alert as of draft-ietf-ipngwg-ipv6router-alert-04 */
479 static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
481 if (skb->nh.raw[optoff+1] == 2) {
482 ((struct inet6_skb_parm*)skb->cb)->ra = optoff;
486 printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]);
493 static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
497 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
499 printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]);
503 pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
504 if (pkt_len < 0x10000) {
505 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
508 if (skb->nh.ipv6h->payload_len) {
509 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
513 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
514 IP6_INC_STATS_BH(Ip6InTruncatedPkts);
517 if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
518 __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
519 if (skb->ip_summed == CHECKSUM_HW)
520 skb->ip_summed = CHECKSUM_NONE;
529 struct tlvtype_proc tlvprochopopt_lst[] = {
530 {IPV6_TLV_ROUTERALERT, ipv6_hop_ra},
531 {IPV6_TLV_JUMBO, ipv6_hop_jumbo},
535 int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
537 ((struct inet6_skb_parm*)skb->cb)->hop = sizeof(struct ipv6hdr);
538 if (ip6_parse_tlv(tlvprochopopt_lst, skb))
539 return sizeof(struct ipv6hdr);
544 * Creating outbound headers.
546 * "build" functions work when skb is filled from head to tail (datagram)
547 * "push" functions work when headers are added from tail to head (tcp)
549 * In both cases we assume, that caller reserved enough room
553 u8 *ipv6_build_rthdr(struct sk_buff *skb, u8 *prev_hdr,
554 struct ipv6_rt_hdr *opt, struct in6_addr *addr)
556 struct rt0_hdr *phdr, *ihdr;
559 ihdr = (struct rt0_hdr *) opt;
561 phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
562 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
564 hops = ihdr->rt_hdr.hdrlen >> 1;
567 memcpy(phdr->addr, ihdr->addr + 1,
568 (hops - 1) * sizeof(struct in6_addr));
570 ipv6_addr_copy(phdr->addr + (hops - 1), addr);
572 phdr->rt_hdr.nexthdr = *prev_hdr;
573 *prev_hdr = NEXTHDR_ROUTING;
574 return &phdr->rt_hdr.nexthdr;
577 static u8 *ipv6_build_exthdr(struct sk_buff *skb, u8 *prev_hdr, u8 type, struct ipv6_opt_hdr *opt)
579 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, ipv6_optlen(opt));
581 memcpy(h, opt, ipv6_optlen(opt));
582 h->nexthdr = *prev_hdr;
587 static u8 *ipv6_build_authhdr(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_opt_hdr *opt)
589 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, (opt->hdrlen+2)<<2);
591 memcpy(h, opt, (opt->hdrlen+2)<<2);
592 h->nexthdr = *prev_hdr;
593 *prev_hdr = NEXTHDR_AUTH;
598 u8 *ipv6_build_nfrag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt,
599 struct in6_addr *daddr, u32 jumbolen)
601 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb->data;
603 if (opt && opt->hopopt)
604 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_HOP, opt->hopopt);
607 u8 *jumboopt = (u8 *)skb_put(skb, 8);
609 if (opt && opt->hopopt) {
610 *jumboopt++ = IPV6_TLV_PADN;
614 h = (struct ipv6_opt_hdr *)jumboopt;
615 h->nexthdr = *prev_hdr;
618 *prev_hdr = NEXTHDR_HOP;
619 prev_hdr = &h->nexthdr;
621 jumboopt[0] = IPV6_TLV_JUMBO;
623 *(u32*)(jumboopt+2) = htonl(jumbolen);
627 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst0opt);
629 prev_hdr = ipv6_build_rthdr(skb, prev_hdr, opt->srcrt, daddr);
634 u8 *ipv6_build_frag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt)
637 prev_hdr = ipv6_build_authhdr(skb, prev_hdr, opt->auth);
639 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst1opt);
643 static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
644 struct ipv6_rt_hdr *opt,
645 struct in6_addr **addr_p)
647 struct rt0_hdr *phdr, *ihdr;
650 ihdr = (struct rt0_hdr *) opt;
652 phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
653 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
655 hops = ihdr->rt_hdr.hdrlen >> 1;
658 memcpy(phdr->addr, ihdr->addr + 1,
659 (hops - 1) * sizeof(struct in6_addr));
661 ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
662 *addr_p = ihdr->addr;
664 phdr->rt_hdr.nexthdr = *proto;
665 *proto = NEXTHDR_ROUTING;
668 static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
670 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
672 memcpy(h, opt, ipv6_optlen(opt));
677 static void ipv6_push_authhdr(struct sk_buff *skb, u8 *proto, struct ipv6_opt_hdr *opt)
679 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, (opt->hdrlen+2)<<2);
681 memcpy(h, opt, (opt->hdrlen+2)<<2);
683 *proto = NEXTHDR_AUTH;
686 void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
688 struct in6_addr **daddr)
691 ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
693 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
695 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
698 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
701 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
703 ipv6_push_authhdr(skb, proto, opt->auth);
706 struct ipv6_txoptions *
707 ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
709 struct ipv6_txoptions *opt2;
711 opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
713 long dif = (char*)opt2 - (char*)opt;
714 memcpy(opt2, opt, opt->tot_len);
716 *((char**)&opt2->hopopt) += dif;
718 *((char**)&opt2->dst0opt) += dif;
720 *((char**)&opt2->dst1opt) += dif;
722 *((char**)&opt2->auth) += dif;
724 *((char**)&opt2->srcrt) += dif;
731 * find out if nexthdr is a well-known extension header or a protocol
734 int ipv6_ext_hdr(u8 nexthdr)
737 * find out if nexthdr is an extension header or a protocol
739 return ( (nexthdr == NEXTHDR_HOP) ||
740 (nexthdr == NEXTHDR_ROUTING) ||
741 (nexthdr == NEXTHDR_FRAGMENT) ||
742 (nexthdr == NEXTHDR_AUTH) ||
743 (nexthdr == NEXTHDR_NONE) ||
744 (nexthdr == NEXTHDR_DEST) );
748 * Skip any extension headers. This is used by the ICMP module.
750 * Note that strictly speaking this conflicts with RFC1883 4.0:
751 * ...The contents and semantics of each extension header determine whether
752 * or not to proceed to the next header. Therefore, extension headers must
753 * be processed strictly in the order they appear in the packet; a
754 * receiver must not, for example, scan through a packet looking for a
755 * particular kind of extension header and process that header prior to
756 * processing all preceding ones.
758 * We do exactly this. This is a protocol bug. We can't decide after a
759 * seeing an unknown discard-with-error flavour TLV option if it's a
760 * ICMP error message or not (errors should never be send in reply to
761 * ICMP error messages).
763 * But I see no other way to do this. This might need to be reexamined
764 * when Linux implements ESP (and maybe AUTH) headers.
767 * This function parses (probably truncated) exthdr set "hdr"
768 * of length "len". "nexthdrp" initially points to some place,
769 * where type of the first header can be found.
771 * It skips all well-known exthdrs, and returns pointer to the start
772 * of unparsable area i.e. the first header with unknown type.
773 * If it is not NULL *nexthdr is updated by type/protocol of this header.
775 * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
776 * - it may return pointer pointing beyond end of packet,
777 * if the last recognized header is truncated in the middle.
778 * - if packet is truncated, so that all parsed headers are skipped,
780 * - First fragment header is skipped, not-first ones
781 * are considered as unparsable.
782 * - ESP is unparsable for now and considered like
783 * normal payload protocol.
784 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
789 int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len)
791 u8 nexthdr = *nexthdrp;
793 while (ipv6_ext_hdr(nexthdr)) {
794 struct ipv6_opt_hdr hdr;
797 if (len < (int)sizeof(struct ipv6_opt_hdr))
799 if (nexthdr == NEXTHDR_NONE)
801 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
803 if (nexthdr == NEXTHDR_FRAGMENT) {
804 unsigned short frag_off;
805 if (skb_copy_bits(skb,
806 start+offsetof(struct frag_hdr,
813 if (ntohs(frag_off) & ~0x7)
816 } else if (nexthdr == NEXTHDR_AUTH)
817 hdrlen = (hdr.hdrlen+2)<<2;
819 hdrlen = ipv6_optlen(&hdr);
821 nexthdr = hdr.nexthdr;