net/ipv6/ip6_output.c

   1 /*
   2  *      IPv6 output functions
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <pedro_m@yahoo.com>
   7  *
   8  *      $Id: ip6_output.c,v 1.33 2001/09/20 00:35:35 davem Exp $
   9  *
  10  *      Based on linux/net/ipv4/ip_output.c
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  *
  17  *      Changes:
  18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19  *                              extension headers are implemented.
  20  *                              route changes now work.
  21  *                              ip6_forward does not confuse sniffers.
  22  *                              etc.
  23  *
  24  *      H. von Brand    :       Added missing #include <linux/string.h>
  25  *      Imran Patel     :       frag id should be in NBO
  26  */
  27
  28 #include <linux/config.h>
  29 #include <linux/errno.h>
  30 #include <linux/types.h>
  31 #include <linux/string.h>
  32 #include <linux/socket.h>
  33 #include <linux/net.h>
  34 #include <linux/netdevice.h>
  35 #include <linux/if_arp.h>
  36 #include <linux/in6.h>
  37 #include <linux/route.h>
  38
  39 #include <linux/netfilter.h>
  40 #include <linux/netfilter_ipv6.h>
  41
  42 #include <net/sock.h>
  43 #include <net/snmp.h>
  44
  45 #include <net/ipv6.h>
  46 #include <net/ndisc.h>
  47 #include <net/protocol.h>
  48 #include <net/ip6_route.h>
  49 #include <net/addrconf.h>
  50 #include <net/rawv6.h>
  51 #include <net/icmp.h>
  52
  53 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  54 {
  55         static u32 ipv6_fragmentation_id = 1;
  56         static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
  57
  58         spin_lock_bh(&ip6_id_lock);
  59         fhdr->identification = htonl(ipv6_fragmentation_id);
  60         if (++ipv6_fragmentation_id == 0)
  61                 ipv6_fragmentation_id = 1;
  62         spin_unlock_bh(&ip6_id_lock);
  63 }
  64
  65 static inline int ip6_output_finish(struct sk_buff *skb)
  66 {
  67
  68         struct dst_entry *dst = skb->dst;
  69         struct hh_cache *hh = dst->hh;
  70
  71         if (hh) {
  72                 int hh_alen;
  73
  74                 read_lock_bh(&hh->hh_lock);
  75                 hh_alen = HH_DATA_ALIGN(hh->hh_len);
  76                 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
  77                 read_unlock_bh(&hh->hh_lock);
  78                 skb_push(skb, hh->hh_len);
  79                 return hh->hh_output(skb);
  80         } else if (dst->neighbour)
  81                 return dst->neighbour->output(skb);
  82
  83         kfree_skb(skb);
  84         return -EINVAL;
  85
  86 }
  87
  88 /* dev_loopback_xmit for use with netfilter. */
  89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  90 {
  91         newskb->mac.raw = newskb->data;
  92         __skb_pull(newskb, newskb->nh.raw - newskb->data);
  93         newskb->pkt_type = PACKET_LOOPBACK;
  94         newskb->ip_summed = CHECKSUM_UNNECESSARY;
  95         BUG_TRAP(newskb->dst);
  96
  97         netif_rx(newskb);
  98         return 0;
  99 }
 100
 101
 102 int ip6_output(struct sk_buff *skb)
 103 {
 104         struct dst_entry *dst = skb->dst;
 105         struct net_device *dev = dst->dev;
 106
 107         skb->protocol = htons(ETH_P_IPV6);
 108         skb->dev = dev;
 109
 110         if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
 111                 if (!(dev->flags&IFF_LOOPBACK) &&
 112                     (skb->sk == NULL || skb->sk->net_pinfo.af_inet6.mc_loop) &&
 113                     ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
 114                                 &skb->nh.ipv6h->saddr)) {
 115                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 116
 117                         /* Do not check for IFF_ALLMULTI; multicast routing
 118                            is not supported in any case.
 119                          */
 120                         if (newskb)
 121                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
 122                                         newskb->dev,
 123                                         ip6_dev_loopback_xmit);
 124
 125                         if (skb->nh.ipv6h->hop_limit == 0) {
 126                                 kfree_skb(skb);
 127                                 return 0;
 128                         }
 129                 }
 130
 131                 IP6_INC_STATS(Ip6OutMcastPkts);
 132         }
 133
 134         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 135 }
 136
 137
 138 #ifdef CONFIG_NETFILTER
 139 int ip6_route_me_harder(struct sk_buff *skb)
 140 {
 141         struct ipv6hdr *iph = skb->nh.ipv6h;
 142         struct dst_entry *dst;
 143         struct flowi fl;
 144
 145         fl.proto = iph->nexthdr;
 146         fl.fl6_dst = &iph->daddr;
 147         fl.fl6_src = &iph->saddr;
 148         fl.oif = skb->sk ? skb->sk->bound_dev_if : 0;
 149         fl.fl6_flowlabel = 0;
 150         fl.uli_u.ports.dport = 0;
 151         fl.uli_u.ports.sport = 0;
 152
 153         dst = ip6_route_output(skb->sk, &fl);
 154
 155         if (dst->error) {
 156                 if (net_ratelimit())
 157                         printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
 158                 dst_release(dst);
 159                 return -EINVAL;
 160         }
 161
 162         /* Drop old route. */
 163         dst_release(skb->dst);
 164
 165         skb->dst = dst;
 166         return 0;
 167 }
 168 #endif
 169
 170 static inline int ip6_maybe_reroute(struct sk_buff *skb)
 171 {
 172 #ifdef CONFIG_NETFILTER
 173         if (skb->nfcache & NFC_ALTERED){
 174                 if (ip6_route_me_harder(skb) != 0){
 175                         kfree_skb(skb);
 176                         return -EINVAL;
 177                 }
 178         }
 179 #endif /* CONFIG_NETFILTER */
 180         return skb->dst->output(skb);
 181 }
 182
 183 /*
 184  *      xmit an sk_buff (used by TCP)
 185  */
 186
 187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 188              struct ipv6_txoptions *opt)
 189 {
 190         struct ipv6_pinfo * np = sk ? &sk->net_pinfo.af_inet6 : NULL;
 191         struct in6_addr *first_hop = fl->nl_u.ip6_u.daddr;
 192         struct dst_entry *dst = skb->dst;
 193         struct ipv6hdr *hdr;
 194         u8  proto = fl->proto;
 195         int seg_len = skb->len;
 196         int hlimit;
 197
 198         if (opt) {
 199                 int head_room;
 200
 201                 /* First: exthdrs may take lots of space (~8K for now)
 202                    MAX_HEADER is not enough.
 203                  */
 204                 head_room = opt->opt_nflen + opt->opt_flen;
 205                 seg_len += head_room;
 206                 head_room += sizeof(struct ipv6hdr) + ((dst->dev->hard_header_len + 15)&~15);
 207
 208                 if (skb_headroom(skb) < head_room) {
 209                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 210                         kfree_skb(skb);
 211                         skb = skb2;
 212                         if (skb == NULL)
 213                                 return -ENOBUFS;
 214                         if (sk)
 215                                 skb_set_owner_w(skb, sk);
 216                 }
 217                 if (opt->opt_flen)
 218                         ipv6_push_frag_opts(skb, opt, &proto);
 219                 if (opt->opt_nflen)
 220                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 221         }
 222
 223         hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
 224
 225         /*
 226          *      Fill in the IPv6 header
 227          */
 228
 229         *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
 230         hlimit = -1;
 231         if (np)
 232                 hlimit = np->hop_limit;
 233         if (hlimit < 0)
 234                 hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
 235
 236         hdr->payload_len = htons(seg_len);
 237         hdr->nexthdr = proto;
 238         hdr->hop_limit = hlimit;
 239
 240         ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
 241         ipv6_addr_copy(&hdr->daddr, first_hop);
 242
 243         if (skb->len <= dst->pmtu) {
 244                 IP6_INC_STATS(Ip6OutRequests);
 245                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
 246         }
 247
 248         if (net_ratelimit())
 249                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 250         skb->dev = dst->dev;
 251         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
 252         kfree_skb(skb);
 253         return -EMSGSIZE;
 254 }
 255
 256 /*
 257  *      To avoid extra problems ND packets are send through this
 258  *      routine. It's code duplication but I really want to avoid
 259  *      extra checks since ipv6_build_header is used by TCP (which
 260  *      is for us performace critical)
 261  */
 262
 263 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 264                struct in6_addr *saddr, struct in6_addr *daddr,
 265                int proto, int len)
 266 {
 267         struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
 268         struct ipv6hdr *hdr;
 269         int totlen;
 270
 271         skb->protocol = htons(ETH_P_IPV6);
 272         skb->dev = dev;
 273
 274         totlen = len + sizeof(struct ipv6hdr);
 275
 276         hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
 277         skb->nh.ipv6h = hdr;
 278
 279         *(u32*)hdr = htonl(0x60000000);
 280
 281         hdr->payload_len = htons(len);
 282         hdr->nexthdr = proto;
 283         hdr->hop_limit = np->hop_limit;
 284
 285         ipv6_addr_copy(&hdr->saddr, saddr);
 286         ipv6_addr_copy(&hdr->daddr, daddr);
 287
 288         return 0;
 289 }
 290
 291 static struct ipv6hdr * ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 292                                   int hlimit, unsigned pktlength)
 293 {
 294         struct ipv6hdr *hdr;
 295
 296         skb->nh.raw = skb_put(skb, sizeof(struct ipv6hdr));
 297         hdr = skb->nh.ipv6h;
 298
 299         *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
 300
 301         hdr->payload_len = htons(pktlength - sizeof(struct ipv6hdr));
 302         hdr->hop_limit = hlimit;
 303         hdr->nexthdr = fl->proto;
 304
 305         ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
 306         ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
 307         return hdr;
 308 }
 309
 310 static __inline__ u8 * ipv6_build_fraghdr(struct sk_buff *skb, u8* prev_hdr, unsigned offset)
 311 {
 312         struct frag_hdr *fhdr;
 313
 314         fhdr = (struct frag_hdr *) skb_put(skb, sizeof(struct frag_hdr));
 315
 316         fhdr->nexthdr  = *prev_hdr;
 317         *prev_hdr = NEXTHDR_FRAGMENT;
 318         prev_hdr = &fhdr->nexthdr;
 319
 320         fhdr->reserved = 0;
 321         fhdr->frag_off = htons(offset);
 322         ipv6_select_ident(skb, fhdr);
 323         return &fhdr->nexthdr;
 324 }
 325
 326 static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
 327                          const void *data, struct dst_entry *dst,
 328                          struct flowi *fl, struct ipv6_txoptions *opt,
 329                          struct in6_addr *final_dst,
 330                          int hlimit, int flags, unsigned length, int mtu)
 331 {
 332         struct ipv6hdr *hdr;
 333         struct sk_buff *last_skb;
 334         u8 *prev_hdr;
 335         int unfrag_len;
 336         int frag_len;
 337         int last_len;
 338         int nfrags;
 339         int fhdr_dist;
 340         int frag_off;
 341         int data_off;
 342         int err;
 343
 344         /*
 345          *      Fragmentation
 346          *
 347          *      Extension header order:
 348          *      Hop-by-hop -> Dest0 -> Routing -> Fragment -> Auth -> Dest1 -> rest (...)
 349          *
 350          *      We must build the non-fragmented part that
 351          *      will be in every packet... this also means
 352          *      that other extension headers (Dest, Auth, etc)
 353          *      must be considered in the data to be fragmented
 354          */
 355
 356         unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr);
 357         last_len = length;
 358
 359         if (opt) {
 360                 unfrag_len += opt->opt_nflen;
 361                 last_len += opt->opt_flen;
 362         }
 363
 364         /*
 365          *      Length of fragmented part on every packet but
 366          *      the last must be an:
 367          *      "integer multiple of 8 octects".
 368          */
 369
 370         frag_len = (mtu - unfrag_len) & ~0x7;
 371
 372         /* Unfragmentable part exceeds mtu. */
 373         if (frag_len <= 0) {
 374                 ipv6_local_error(sk, EMSGSIZE, fl, mtu);
 375                 return -EMSGSIZE;
 376         }
 377
 378         nfrags = last_len / frag_len;
 379
 380         /*
 381          *      We must send from end to start because of
 382          *      UDP/ICMP checksums. We do a funny trick:
 383          *      fill the last skb first with the fixed
 384          *      header (and its data) and then use it
 385          *      to create the following segments and send it
 386          *      in the end. If the peer is checking the M_flag
 387          *      to trigger the reassembly code then this
 388          *      might be a good idea.
 389          */
 390
 391         frag_off = nfrags * frag_len;
 392         last_len -= frag_off;
 393
 394         if (last_len == 0) {
 395                 last_len = frag_len;
 396                 frag_off -= frag_len;
 397                 nfrags--;
 398         }
 399         data_off = frag_off;
 400
 401         /* And it is implementation problem: for now we assume, that
 402            all the exthdrs will fit to the first fragment.
 403          */
 404         if (opt) {
 405                 if (frag_len < opt->opt_flen) {
 406                         ipv6_local_error(sk, EMSGSIZE, fl, mtu);
 407                         return -EMSGSIZE;
 408                 }
 409                 data_off = frag_off - opt->opt_flen;
 410         }
 411
 412         if (flags&MSG_PROBE)
 413                 return 0;
 414
 415         last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len +
 416                                        dst->dev->hard_header_len + 15,
 417                                        flags & MSG_DONTWAIT, &err);
 418
 419         if (last_skb == NULL)
 420                 return err;
 421
 422         last_skb->dst = dst_clone(dst);
 423
 424         skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15);
 425
 426         hdr = ip6_bld_1(sk, last_skb, fl, hlimit, frag_len+unfrag_len);
 427         prev_hdr = &hdr->nexthdr;
 428
 429         if (opt && opt->opt_nflen)
 430                 prev_hdr = ipv6_build_nfrag_opts(last_skb, prev_hdr, opt, final_dst, 0);
 431
 432         prev_hdr = ipv6_build_fraghdr(last_skb, prev_hdr, frag_off);
 433         fhdr_dist = prev_hdr - last_skb->data;
 434
 435         err = getfrag(data, &hdr->saddr, last_skb->tail, data_off, last_len);
 436
 437         if (!err) {
 438                 while (nfrags--) {
 439                         struct sk_buff *skb;
 440
 441                         struct frag_hdr *fhdr2;
 442
 443                         skb = skb_copy(last_skb, sk->allocation);
 444
 445                         if (skb == NULL) {
 446                                 IP6_INC_STATS(Ip6FragFails);
 447                                 kfree_skb(last_skb);
 448                                 return -ENOMEM;
 449                         }
 450
 451                         frag_off -= frag_len;
 452                         data_off -= frag_len;
 453
 454                         fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist);
 455
 456                         /* more flag on */
 457                         fhdr2->frag_off = htons(frag_off | 1);
 458
 459                         /* Write fragmentable exthdrs to the first chunk */
 460                         if (nfrags == 0 && opt && opt->opt_flen) {
 461                                 ipv6_build_frag_opts(skb, &fhdr2->nexthdr, opt);
 462                                 frag_len -= opt->opt_flen;
 463                                 data_off = 0;
 464                         }
 465
 466                         err = getfrag(data, &hdr->saddr,skb_put(skb, frag_len),
 467                                       data_off, frag_len);
 468
 469                         if (err) {
 470                                 kfree_skb(skb);
 471                                 break;
 472                         }
 473
 474                         IP6_INC_STATS(Ip6FragCreates);
 475                         IP6_INC_STATS(Ip6OutRequests);
 476                         err = NF_HOOK(PF_INET6,NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
 477                         if (err) {
 478                                 kfree_skb(last_skb);
 479                                 return err;
 480                         }
 481                 }
 482         }
 483
 484         if (err) {
 485                 IP6_INC_STATS(Ip6FragFails);
 486                 kfree_skb(last_skb);
 487                 return -EFAULT;
 488         }
 489
 490         hdr->payload_len = htons(unfrag_len + last_len - sizeof(struct ipv6hdr));
 491
 492         /*
 493          *      update last_skb to reflect the getfrag we did
 494          *      on start.
 495          */
 496
 497         skb_put(last_skb, last_len);
 498
 499         IP6_INC_STATS(Ip6FragCreates);
 500         IP6_INC_STATS(Ip6FragOKs);
 501         IP6_INC_STATS(Ip6OutRequests);
 502         return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, last_skb, NULL,dst->dev, ip6_maybe_reroute);
 503 }
 504
 505 int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
 506                    struct flowi *fl, unsigned length,
 507                    struct ipv6_txoptions *opt, int hlimit, int flags)
 508 {
 509         struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
 510         struct in6_addr *final_dst = NULL;
 511         struct dst_entry *dst;
 512         int err = 0;
 513         unsigned int pktlength, jumbolen, mtu;
 514         struct in6_addr saddr;
 515
 516         if (opt && opt->srcrt) {
 517                 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 518                 final_dst = fl->fl6_dst;
 519                 fl->fl6_dst = rt0->addr;
 520         }
 521
 522         if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
 523                 fl->oif = np->mcast_oif;
 524
 525         dst = __sk_dst_check(sk, np->dst_cookie);
 526         if (dst) {
 527                 struct rt6_info *rt = (struct rt6_info*)dst;
 528
 529                         /* Yes, checking route validity in not connected
 530                            case is not very simple. Take into account,
 531                            that we do not support routing by source, TOS,
 532                            and MSG_DONTROUTE            --ANK (980726)
 533
 534                            1. If route was host route, check that
 535                               cached destination is current.
 536                               If it is network route, we still may
 537                               check its validity using saved pointer
 538                               to the last used address: daddr_cache.
 539                               We do not want to save whole address now,
 540                               (because main consumer of this service
 541                                is tcp, which has not this problem),
 542                               so that the last trick works only on connected
 543                               sockets.
 544                            2. oif also should be the same.
 545                          */
 546
 547                 if (((rt->rt6i_dst.plen != 128 ||
 548                       ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
 549                      && (np->daddr_cache == NULL ||
 550                          ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
 551                     || (fl->oif && fl->oif != dst->dev->ifindex)) {
 552                         dst = NULL;
 553                 } else
 554                         dst_hold(dst);
 555         }
 556
 557         if (dst == NULL)
 558                 dst = ip6_route_output(sk, fl);
 559
 560         if (dst->error) {
 561                 IP6_INC_STATS(Ip6OutNoRoutes);
 562                 dst_release(dst);
 563                 return -ENETUNREACH;
 564         }
 565
 566         if (fl->fl6_src == NULL) {
 567                 err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr);
 568
 569                 if (err) {
 570 #if IP6_DEBUG >= 2
 571                         printk(KERN_DEBUG "ip6_build_xmit: "
 572                                "no available source address\n");
 573 #endif
 574                         goto out;
 575                 }
 576                 fl->fl6_src = &saddr;
 577         }
 578         pktlength = length;
 579
 580         if (hlimit < 0) {
 581                 if (ipv6_addr_is_multicast(fl->fl6_dst))
 582                         hlimit = np->mcast_hops;
 583                 else
 584                         hlimit = np->hop_limit;
 585                 if (hlimit < 0)
 586                         hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
 587         }
 588
 589         jumbolen = 0;
 590
 591         if (!sk->protinfo.af_inet.hdrincl) {
 592                 pktlength += sizeof(struct ipv6hdr);
 593                 if (opt)
 594                         pktlength += opt->opt_flen + opt->opt_nflen;
 595
 596                 if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
 597                         /* Jumbo datagram.
 598                            It is assumed, that in the case of hdrincl
 599                            jumbo option is supplied by user.
 600                          */
 601                         pktlength += 8;
 602                         jumbolen = pktlength - sizeof(struct ipv6hdr);
 603                 }
 604         }
 605
 606         mtu = dst->pmtu;
 607         if (np->frag_size < mtu) {
 608                 if (np->frag_size)
 609                         mtu = np->frag_size;
 610                 else if (np->pmtudisc == IPV6_PMTUDISC_DONT)
 611                         mtu = IPV6_MIN_MTU;
 612         }
 613
 614         /* Critical arithmetic overflow check.
 615            FIXME: may gcc optimize it out? --ANK (980726)
 616          */
 617         if (pktlength < length) {
 618                 ipv6_local_error(sk, EMSGSIZE, fl, mtu);
 619                 err = -EMSGSIZE;
 620                 goto out;
 621         }
 622
 623         if (flags&MSG_CONFIRM)
 624                 dst_confirm(dst);
 625
 626         if (pktlength <= mtu) {
 627                 struct sk_buff *skb;
 628                 struct ipv6hdr *hdr;
 629                 struct net_device *dev = dst->dev;
 630
 631                 err = 0;
 632                 if (flags&MSG_PROBE)
 633                         goto out;
 634
 635                 skb = sock_alloc_send_skb(sk, pktlength + 15 +
 636                                           dev->hard_header_len,
 637                                           flags & MSG_DONTWAIT, &err);
 638
 639                 if (skb == NULL) {
 640                         IP6_INC_STATS(Ip6OutDiscards);
 641                         goto out;
 642                 }
 643
 644                 skb->dst = dst_clone(dst);
 645
 646                 skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
 647
 648                 hdr = (struct ipv6hdr *) skb->tail;
 649                 skb->nh.ipv6h = hdr;
 650
 651                 if (!sk->protinfo.af_inet.hdrincl) {
 652                         ip6_bld_1(sk, skb, fl, hlimit,
 653                                   jumbolen ? sizeof(struct ipv6hdr) : pktlength);
 654
 655                         if (opt || jumbolen) {
 656                                 u8 *prev_hdr = &hdr->nexthdr;
 657                                 prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen);
 658                                 if (opt && opt->opt_flen)
 659                                         ipv6_build_frag_opts(skb, prev_hdr, opt);
 660                         }
 661                 }
 662
 663                 skb_put(skb, length);
 664                 err = getfrag(data, &hdr->saddr,
 665                               ((char *) hdr) + (pktlength - length),
 666                               0, length);
 667
 668                 if (!err) {
 669                         IP6_INC_STATS(Ip6OutRequests);
 670                         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
 671                 } else {
 672                         err = -EFAULT;
 673                         kfree_skb(skb);
 674                 }
 675         } else {
 676                 if (sk->protinfo.af_inet.hdrincl || jumbolen ||
 677                     np->pmtudisc == IPV6_PMTUDISC_DO) {
 678                         ipv6_local_error(sk, EMSGSIZE, fl, mtu);
 679                         err = -EMSGSIZE;
 680                         goto out;
 681                 }
 682
 683                 err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit,
 684                                     flags, length, mtu);
 685         }
 686
 687         /*
 688          *      cleanup
 689          */
 690 out:
 691         ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
 692         if (err > 0)
 693                 err = np->recverr ? net_xmit_errno(err) : 0;
 694         return err;
 695 }
 696
 697 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 698 {
 699         struct ip6_ra_chain *ra;
 700         struct sock *last = NULL;
 701
 702         read_lock(&ip6_ra_lock);
 703         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 704                 struct sock *sk = ra->sk;
 705                 if (sk && ra->sel == sel) {
 706                         if (last) {
 707                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 708                                 if (skb2)
 709                                         rawv6_rcv(last, skb2);
 710                         }
 711                         last = sk;
 712                 }
 713         }
 714
 715         if (last) {
 716                 rawv6_rcv(last, skb);
 717                 read_unlock(&ip6_ra_lock);
 718                 return 1;
 719         }
 720         read_unlock(&ip6_ra_lock);
 721         return 0;
 722 }
 723
 724 static inline int ip6_forward_finish(struct sk_buff *skb)
 725 {
 726         return skb->dst->output(skb);
 727 }
 728
 729 int ip6_forward(struct sk_buff *skb)
 730 {
 731         struct dst_entry *dst = skb->dst;
 732         struct ipv6hdr *hdr = skb->nh.ipv6h;
 733         struct inet6_skb_parm *opt =(struct inet6_skb_parm*)skb->cb;
 734
 735         if (ipv6_devconf.forwarding == 0)
 736                 goto error;
 737
 738         skb->ip_summed = CHECKSUM_NONE;
 739
 740         /*
 741          *      We DO NOT make any processing on
 742          *      RA packets, pushing them to user level AS IS
 743          *      without ane WARRANTY that application will be able
 744          *      to interpret them. The reason is that we
 745          *      cannot make anything clever here.
 746          *
 747          *      We are not end-node, so that if packet contains
 748          *      AH/ESP, we cannot make anything.
 749          *      Defragmentation also would be mistake, RA packets
 750          *      cannot be fragmented, because there is no warranty
 751          *      that different fragments will go along one path. --ANK
 752          */
 753         if (opt->ra) {
 754                 u8 *ptr = skb->nh.raw + opt->ra;
 755                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 756                         return 0;
 757         }
 758
 759         /*
 760          *      check and decrement ttl
 761          */
 762         if (hdr->hop_limit <= 1) {
 763                 /* Force OUTPUT device used as source address */
 764                 skb->dev = dst->dev;
 765                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 766                             0, skb->dev);
 767
 768                 kfree_skb(skb);
 769                 return -ETIMEDOUT;
 770         }
 771
 772         /* IPv6 specs say nothing about it, but it is clear that we cannot
 773            send redirects to source routed frames.
 774          */
 775         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
 776                 struct in6_addr *target = NULL;
 777                 struct rt6_info *rt;
 778                 struct neighbour *n = dst->neighbour;
 779
 780                 /*
 781                  *      incoming and outgoing devices are the same
 782                  *      send a redirect.
 783                  */
 784
 785                 rt = (struct rt6_info *) dst;
 786                 if ((rt->rt6i_flags & RTF_GATEWAY))
 787                         target = (struct in6_addr*)&n->primary_key;
 788                 else
 789                         target = &hdr->daddr;
 790
 791                 /* Limit redirects both by destination (here)
 792                    and by source (inside ndisc_send_redirect)
 793                  */
 794                 if (xrlim_allow(dst, 1*HZ))
 795                         ndisc_send_redirect(skb, n, target);
 796         } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
 797                                                 |IPV6_ADDR_LINKLOCAL)) {
 798                 /* This check is security critical. */
 799                 goto error;
 800         }
 801
 802         if (skb->len > dst->pmtu) {
 803                 /* Again, force OUTPUT device used as source address */
 804                 skb->dev = dst->dev;
 805                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
 806                 IP6_INC_STATS_BH(Ip6InTooBigErrors);
 807                 kfree_skb(skb);
 808                 return -EMSGSIZE;
 809         }
 810
 811         if (skb_cow(skb, dst->dev->hard_header_len))
 812                 goto drop;
 813
 814         hdr = skb->nh.ipv6h;
 815
 816         /* Mangling hops number delayed to point after skb COW */
 817
 818         hdr->hop_limit--;
 819
 820         IP6_INC_STATS_BH(Ip6OutForwDatagrams);
 821         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 822
 823 error:
 824         IP6_INC_STATS_BH(Ip6InAddrErrors);
 825 drop:
 826         kfree_skb(skb);
 827         return -EINVAL;
 828 }