2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.1.1.1 2005/04/11 02:51:13 jack Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/route.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
53 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
55 static u32 ipv6_fragmentation_id = 1;
56 static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
58 spin_lock_bh(&ip6_id_lock);
59 fhdr->identification = htonl(ipv6_fragmentation_id);
60 if (++ipv6_fragmentation_id == 0)
61 ipv6_fragmentation_id = 1;
62 spin_unlock_bh(&ip6_id_lock);
65 static inline int ip6_output_finish(struct sk_buff *skb)
68 struct dst_entry *dst = skb->dst;
69 struct hh_cache *hh = dst->hh;
72 read_lock_bh(&hh->hh_lock);
73 memcpy(skb->data - 16, hh->hh_data, 16);
74 read_unlock_bh(&hh->hh_lock);
75 skb_push(skb, hh->hh_len);
76 return hh->hh_output(skb);
77 } else if (dst->neighbour)
78 return dst->neighbour->output(skb);
85 /* dev_loopback_xmit for use with netfilter. */
86 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 newskb->mac.raw = newskb->data;
89 __skb_pull(newskb, newskb->nh.raw - newskb->data);
90 newskb->pkt_type = PACKET_LOOPBACK;
91 newskb->ip_summed = CHECKSUM_UNNECESSARY;
92 BUG_TRAP(newskb->dst);
99 int ip6_output(struct sk_buff *skb)
101 struct dst_entry *dst = skb->dst;
102 struct net_device *dev = dst->dev;
104 skb->protocol = htons(ETH_P_IPV6);
107 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
108 if (!(dev->flags&IFF_LOOPBACK) &&
109 (skb->sk == NULL || skb->sk->net_pinfo.af_inet6.mc_loop) &&
110 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr)) {
111 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
113 /* Do not check for IFF_ALLMULTI; multicast routing
114 is not supported in any case.
117 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
119 ip6_dev_loopback_xmit);
121 if (skb->nh.ipv6h->hop_limit == 0) {
127 IP6_INC_STATS(Ip6OutMcastPkts);
130 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
134 #ifdef CONFIG_NETFILTER
135 static int route6_me_harder(struct sk_buff *skb)
137 struct ipv6hdr *iph = skb->nh.ipv6h;
138 struct dst_entry *dst;
141 fl.proto = iph->nexthdr;
142 fl.fl6_dst = &iph->daddr;
143 fl.fl6_src = &iph->saddr;
144 fl.oif = skb->sk ? skb->sk->bound_dev_if : 0;
145 fl.fl6_flowlabel = 0;
146 fl.uli_u.ports.dport = 0;
147 fl.uli_u.ports.sport = 0;
149 dst = ip6_route_output(skb->sk, &fl);
153 printk(KERN_DEBUG "route6_me_harder: No more route.\n");
157 /* Drop old route. */
158 dst_release(skb->dst);
165 static inline int ip6_maybe_reroute(struct sk_buff *skb)
167 #ifdef CONFIG_NETFILTER
168 if (skb->nfcache & NFC_ALTERED){
169 if (route6_me_harder(skb) != 0){
174 #endif /* CONFIG_NETFILTER */
175 return skb->dst->output(skb);
179 * xmit an sk_buff (used by TCP)
182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
183 struct ipv6_txoptions *opt)
185 struct ipv6_pinfo * np = sk ? &sk->net_pinfo.af_inet6 : NULL;
186 struct in6_addr *first_hop = fl->nl_u.ip6_u.daddr;
187 struct dst_entry *dst = skb->dst;
189 u8 proto = fl->proto;
190 int seg_len = skb->len;
196 /* First: exthdrs may take lots of space (~8K for now)
197 MAX_HEADER is not enough.
199 head_room = opt->opt_nflen + opt->opt_flen;
200 seg_len += head_room;
201 head_room += sizeof(struct ipv6hdr) + ((dst->dev->hard_header_len + 15)&~15);
203 if (skb_headroom(skb) < head_room) {
204 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
210 skb_set_owner_w(skb, sk);
213 ipv6_push_frag_opts(skb, opt, &proto);
215 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
218 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
221 * Fill in the IPv6 header
224 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
227 hlimit = np->hop_limit;
229 hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
231 hdr->payload_len = htons(seg_len);
232 hdr->nexthdr = proto;
233 hdr->hop_limit = hlimit;
235 ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
236 ipv6_addr_copy(&hdr->daddr, first_hop);
238 if (skb->len <= dst->pmtu) {
239 IP6_INC_STATS(Ip6OutRequests);
240 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
244 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
245 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
251 * To avoid extra problems ND packets are send through this
252 * routine. It's code duplication but I really want to avoid
253 * extra checks since ipv6_build_header is used by TCP (which
254 * is for us performace critical)
257 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
258 struct in6_addr *saddr, struct in6_addr *daddr,
261 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
265 skb->protocol = htons(ETH_P_IPV6);
268 totlen = len + sizeof(struct ipv6hdr);
270 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
273 *(u32*)hdr = htonl(0x60000000);
275 hdr->payload_len = htons(len);
276 hdr->nexthdr = proto;
277 hdr->hop_limit = np->hop_limit;
279 ipv6_addr_copy(&hdr->saddr, saddr);
280 ipv6_addr_copy(&hdr->daddr, daddr);
285 static struct ipv6hdr * ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
286 int hlimit, unsigned pktlength)
290 skb->nh.raw = skb_put(skb, sizeof(struct ipv6hdr));
293 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
295 hdr->payload_len = htons(pktlength - sizeof(struct ipv6hdr));
296 hdr->hop_limit = hlimit;
297 hdr->nexthdr = fl->proto;
299 ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
300 ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
304 static __inline__ u8 * ipv6_build_fraghdr(struct sk_buff *skb, u8* prev_hdr, unsigned offset)
306 struct frag_hdr *fhdr;
308 fhdr = (struct frag_hdr *) skb_put(skb, sizeof(struct frag_hdr));
310 fhdr->nexthdr = *prev_hdr;
311 *prev_hdr = NEXTHDR_FRAGMENT;
312 prev_hdr = &fhdr->nexthdr;
315 fhdr->frag_off = htons(offset);
316 ipv6_select_ident(skb, fhdr);
317 return &fhdr->nexthdr;
320 static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
321 const void *data, struct dst_entry *dst,
322 struct flowi *fl, struct ipv6_txoptions *opt,
323 struct in6_addr *final_dst,
324 int hlimit, int flags, unsigned length, int mtu)
327 struct sk_buff *last_skb;
341 * Extension header order:
342 * Hop-by-hop -> Dest0 -> Routing -> Fragment -> Auth -> Dest1 -> rest (...)
344 * We must build the non-fragmented part that
345 * will be in every packet... this also means
346 * that other extension headers (Dest, Auth, etc)
347 * must be considered in the data to be fragmented
350 unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr);
354 unfrag_len += opt->opt_nflen;
355 last_len += opt->opt_flen;
359 * Length of fragmented part on every packet but
360 * the last must be an:
361 * "integer multiple of 8 octects".
364 frag_len = (mtu - unfrag_len) & ~0x7;
366 /* Unfragmentable part exceeds mtu. */
368 ipv6_local_error(sk, EMSGSIZE, fl, mtu);
372 nfrags = last_len / frag_len;
375 * We must send from end to start because of
376 * UDP/ICMP checksums. We do a funny trick:
377 * fill the last skb first with the fixed
378 * header (and its data) and then use it
379 * to create the following segments and send it
380 * in the end. If the peer is checking the M_flag
381 * to trigger the reassembly code then this
382 * might be a good idea.
385 frag_off = nfrags * frag_len;
386 last_len -= frag_off;
390 frag_off -= frag_len;
395 /* And it is implementation problem: for now we assume, that
396 all the exthdrs will fit to the first fragment.
399 if (frag_len < opt->opt_flen) {
400 ipv6_local_error(sk, EMSGSIZE, fl, mtu);
403 data_off = frag_off - opt->opt_flen;
409 last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len +
410 dst->dev->hard_header_len + 15,
411 flags & MSG_DONTWAIT, &err);
413 if (last_skb == NULL)
416 last_skb->dst = dst_clone(dst);
418 skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15);
420 hdr = ip6_bld_1(sk, last_skb, fl, hlimit, frag_len+unfrag_len);
421 prev_hdr = &hdr->nexthdr;
423 if (opt && opt->opt_nflen)
424 prev_hdr = ipv6_build_nfrag_opts(last_skb, prev_hdr, opt, final_dst, 0);
426 prev_hdr = ipv6_build_fraghdr(last_skb, prev_hdr, frag_off);
427 fhdr_dist = prev_hdr - last_skb->data;
429 err = getfrag(data, &hdr->saddr, last_skb->tail, data_off, last_len);
435 struct frag_hdr *fhdr2;
437 skb = skb_copy(last_skb, sk->allocation);
440 IP6_INC_STATS(Ip6FragFails);
445 frag_off -= frag_len;
446 data_off -= frag_len;
448 fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist);
451 fhdr2->frag_off = htons(frag_off | 1);
453 /* Write fragmentable exthdrs to the first chunk */
454 if (nfrags == 0 && opt && opt->opt_flen) {
455 ipv6_build_frag_opts(skb, &fhdr2->nexthdr, opt);
456 frag_len -= opt->opt_flen;
460 err = getfrag(data, &hdr->saddr,skb_put(skb, frag_len),
468 IP6_INC_STATS(Ip6FragCreates);
469 IP6_INC_STATS(Ip6OutRequests);
470 err = NF_HOOK(PF_INET6,NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
479 IP6_INC_STATS(Ip6FragFails);
484 hdr->payload_len = htons(unfrag_len + last_len - sizeof(struct ipv6hdr));
487 * update last_skb to reflect the getfrag we did
491 skb_put(last_skb, last_len);
493 IP6_INC_STATS(Ip6FragCreates);
494 IP6_INC_STATS(Ip6FragOKs);
495 IP6_INC_STATS(Ip6OutRequests);
496 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, last_skb, NULL,dst->dev, ip6_maybe_reroute);
499 int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
500 struct flowi *fl, unsigned length,
501 struct ipv6_txoptions *opt, int hlimit, int flags)
503 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
504 struct in6_addr *final_dst = NULL;
505 struct dst_entry *dst;
507 unsigned int pktlength, jumbolen, mtu;
508 struct in6_addr saddr;
510 if (opt && opt->srcrt) {
511 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
512 final_dst = fl->fl6_dst;
513 fl->fl6_dst = rt0->addr;
516 if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
517 fl->oif = np->mcast_oif;
519 dst = __sk_dst_check(sk, np->dst_cookie);
521 struct rt6_info *rt = (struct rt6_info*)dst;
523 /* Yes, checking route validity in not connected
524 case is not very simple. Take into account,
525 that we do not support routing by source, TOS,
526 and MSG_DONTROUTE --ANK (980726)
528 1. If route was host route, check that
529 cached destination is current.
530 If it is network route, we still may
531 check its validity using saved pointer
532 to the last used address: daddr_cache.
533 We do not want to save whole address now,
534 (because main consumer of this service
535 is tcp, which has not this problem),
536 so that the last trick works only on connected
538 2. oif also should be the same.
541 if (((rt->rt6i_dst.plen != 128 ||
542 ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
543 && (np->daddr_cache == NULL ||
544 ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
545 || (fl->oif && fl->oif != dst->dev->ifindex)) {
552 dst = ip6_route_output(sk, fl);
555 IP6_INC_STATS(Ip6OutNoRoutes);
560 if (fl->fl6_src == NULL) {
561 err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr);
565 printk(KERN_DEBUG "ip6_build_xmit: "
566 "no availiable source address\n");
570 fl->fl6_src = &saddr;
575 if (ipv6_addr_is_multicast(fl->fl6_dst))
576 hlimit = np->mcast_hops;
578 hlimit = np->hop_limit;
580 hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
585 if (!sk->protinfo.af_inet.hdrincl) {
586 pktlength += sizeof(struct ipv6hdr);
588 pktlength += opt->opt_flen + opt->opt_nflen;
590 if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
592 It is assumed, that in the case of hdrincl
593 jumbo option is supplied by user.
596 jumbolen = pktlength - sizeof(struct ipv6hdr);
601 if (np->frag_size < mtu) {
604 else if (np->pmtudisc == IPV6_PMTUDISC_DONT)
608 /* Critical arithmetic overflow check.
609 FIXME: may gcc optimize it out? --ANK (980726)
611 if (pktlength < length) {
612 ipv6_local_error(sk, EMSGSIZE, fl, mtu);
617 if (flags&MSG_CONFIRM)
620 if (pktlength <= mtu) {
623 struct net_device *dev = dst->dev;
629 skb = sock_alloc_send_skb(sk, pktlength + 15 +
630 dev->hard_header_len,
631 flags & MSG_DONTWAIT, &err);
634 IP6_INC_STATS(Ip6OutDiscards);
638 skb->dst = dst_clone(dst);
640 skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
642 hdr = (struct ipv6hdr *) skb->tail;
645 if (!sk->protinfo.af_inet.hdrincl) {
646 ip6_bld_1(sk, skb, fl, hlimit,
647 jumbolen ? sizeof(struct ipv6hdr) : pktlength);
649 if (opt || jumbolen) {
650 u8 *prev_hdr = &hdr->nexthdr;
651 prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen);
652 if (opt && opt->opt_flen)
653 ipv6_build_frag_opts(skb, prev_hdr, opt);
657 skb_put(skb, length);
658 err = getfrag(data, &hdr->saddr,
659 ((char *) hdr) + (pktlength - length),
663 IP6_INC_STATS(Ip6OutRequests);
664 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
670 if (sk->protinfo.af_inet.hdrincl || jumbolen ||
671 np->pmtudisc == IPV6_PMTUDISC_DO) {
672 ipv6_local_error(sk, EMSGSIZE, fl, mtu);
677 err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit,
685 ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
687 err = np->recverr ? net_xmit_errno(err) : 0;
691 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
693 struct ip6_ra_chain *ra;
694 struct sock *last = NULL;
696 read_lock(&ip6_ra_lock);
697 for (ra = ip6_ra_chain; ra; ra = ra->next) {
698 struct sock *sk = ra->sk;
699 if (sk && ra->sel == sel) {
701 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
703 rawv6_rcv(last, skb2);
710 rawv6_rcv(last, skb);
711 read_unlock(&ip6_ra_lock);
714 read_unlock(&ip6_ra_lock);
718 static inline int ip6_forward_finish(struct sk_buff *skb)
720 return skb->dst->output(skb);
723 int ip6_forward(struct sk_buff *skb)
725 struct dst_entry *dst = skb->dst;
726 struct ipv6hdr *hdr = skb->nh.ipv6h;
727 struct inet6_skb_parm *opt =(struct inet6_skb_parm*)skb->cb;
729 if (ipv6_devconf.forwarding == 0)
732 skb->ip_summed = CHECKSUM_NONE;
735 * We DO NOT make any processing on
736 * RA packets, pushing them to user level AS IS
737 * without ane WARRANTY that application will be able
738 * to interpret them. The reason is that we
739 * cannot make anything clever here.
741 * We are not end-node, so that if packet contains
742 * AH/ESP, we cannot make anything.
743 * Defragmentation also would be mistake, RA packets
744 * cannot be fragmented, because there is no warranty
745 * that different fragments will go along one path. --ANK
748 u8 *ptr = skb->nh.raw + opt->ra;
749 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
754 * check and decrement ttl
756 if (hdr->hop_limit <= 1) {
757 /* Force OUTPUT device used as source address */
759 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
766 /* IPv6 specs say nothing about it, but it is clear that we cannot
767 send redirects to source routed frames.
769 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
770 struct in6_addr *target = NULL;
772 struct neighbour *n = dst->neighbour;
775 * incoming and outgoing devices are the same
779 rt = (struct rt6_info *) dst;
780 if ((rt->rt6i_flags & RTF_GATEWAY))
781 target = (struct in6_addr*)&n->primary_key;
783 target = &hdr->daddr;
785 /* Limit redirects both by destination (here)
786 and by source (inside ndisc_send_redirect)
788 if (xrlim_allow(dst, 1*HZ))
789 ndisc_send_redirect(skb, n, target);
790 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
791 |IPV6_ADDR_LINKLOCAL)) {
792 /* This check is security critical. */
796 if (skb->len > dst->pmtu) {
797 /* Again, force OUTPUT device used as source address */
799 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
800 IP6_INC_STATS_BH(Ip6InTooBigErrors);
805 if (skb_cow(skb, dst->dev->hard_header_len))
810 /* Mangling hops number delayed to point after skb COW */
814 IP6_INC_STATS_BH(Ip6OutForwDatagrams);
815 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
818 IP6_INC_STATS_BH(Ip6InAddrErrors);