3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.1.1.1 2005/04/11 02:51:13 jack Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version
24 * 2 of the License, or (at your option) any later version.
27 #define __NO_VERSION__
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/sched.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/ipsec.h>
42 #include <linux/ipv6.h>
43 #include <linux/icmpv6.h>
44 #include <linux/random.h>
47 #include <net/ndisc.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/inet_ecn.h>
54 #include <asm/uaccess.h>
56 static void tcp_v6_send_reset(struct sk_buff *skb);
57 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
58 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
61 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
62 static int tcp_v6_xmit(struct sk_buff *skb);
64 static struct tcp_func ipv6_mapped;
65 static struct tcp_func ipv6_specific;
67 /* I have no idea if this is a good hash for v6 or not. -DaveM */
68 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
69 struct in6_addr *faddr, u16 fport)
71 int hashent = (lport ^ fport);
73 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
74 hashent ^= hashent>>16;
75 hashent ^= hashent>>8;
76 return (hashent & (tcp_ehash_size - 1));
79 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
81 struct in6_addr *laddr = &sk->net_pinfo.af_inet6.rcv_saddr;
82 struct in6_addr *faddr = &sk->net_pinfo.af_inet6.daddr;
83 __u16 lport = sk->num;
84 __u16 fport = sk->dport;
85 return tcp_v6_hashfn(laddr, lport, faddr, fport);
88 /* Grrr, addr_type already calculated by caller, but I don't want
89 * to add some silly "cookie" argument to this method just for that.
90 * But it doesn't matter, the recalculation is in the rarest path
91 * this function ever takes.
93 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
95 struct tcp_bind_hashbucket *head;
96 struct tcp_bind_bucket *tb;
101 int low = sysctl_local_port_range[0];
102 int high = sysctl_local_port_range[1];
103 int remaining = (high - low) + 1;
106 spin_lock(&tcp_portalloc_lock);
107 rover = tcp_port_rover;
109 if ((rover < low) || (rover > high))
111 head = &tcp_bhash[tcp_bhashfn(rover)];
112 spin_lock(&head->lock);
113 for (tb = head->chain; tb; tb = tb->next)
114 if (tb->port == rover)
118 spin_unlock(&head->lock);
119 } while (--remaining > 0);
120 tcp_port_rover = rover;
121 spin_unlock(&tcp_portalloc_lock);
123 /* Exhausted local port range during search? */
128 /* OK, here is the one we will use. */
132 head = &tcp_bhash[tcp_bhashfn(snum)];
133 spin_lock(&head->lock);
134 for (tb = head->chain; tb != NULL; tb = tb->next)
135 if (tb->port == snum)
138 if (tb != NULL && tb->owners != NULL) {
139 if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
142 struct sock *sk2 = tb->owners;
143 int sk_reuse = sk->reuse;
144 int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
146 /* We must walk the whole port owner list in this case. -DaveM */
147 for( ; sk2 != NULL; sk2 = sk2->bind_next) {
149 sk->bound_dev_if == sk2->bound_dev_if) {
152 sk2->state == TCP_LISTEN) {
153 /* NOTE: IPv6 tw bucket have different format */
154 if ((!sk2->rcv_saddr && !ipv6_only_sock(sk)) ||
155 (sk2->family == AF_INET6 &&
156 ipv6_addr_any(&sk2->net_pinfo.af_inet6.rcv_saddr) &&
157 !(ipv6_only_sock(sk2) && addr_type == IPV6_ADDR_MAPPED)) ||
158 (addr_type == IPV6_ADDR_ANY &&
159 (!ipv6_only_sock(sk) ||
160 !(sk2->family == AF_INET6 ? ipv6_addr_type(&sk2->net_pinfo.af_inet6.rcv_saddr) == IPV6_ADDR_MAPPED : 1))) ||
161 (sk2->family == AF_INET6 &&
162 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
163 sk2->state != TCP_TIME_WAIT ?
164 &sk2->net_pinfo.af_inet6.rcv_saddr :
165 &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr)) ||
166 (addr_type == IPV6_ADDR_MAPPED &&
167 !ipv6_only_sock(sk2) &&
170 sk->rcv_saddr == sk2->rcv_saddr)))
175 /* If we found a conflict, fail. */
183 (tb = tcp_bucket_create(head, snum)) == NULL)
185 if (tb->owners == NULL) {
186 if (sk->reuse && sk->state != TCP_LISTEN)
190 } else if (tb->fastreuse &&
191 ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
196 if (sk->prev == NULL) {
197 if ((sk->bind_next = tb->owners) != NULL)
198 tb->owners->bind_pprev = &sk->bind_next;
200 sk->bind_pprev = &tb->owners;
201 sk->prev = (struct sock *) tb;
203 BUG_TRAP(sk->prev == (struct sock *) tb);
208 spin_unlock(&head->lock);
214 static __inline__ void __tcp_v6_hash(struct sock *sk)
219 BUG_TRAP(sk->pprev==NULL);
221 if(sk->state == TCP_LISTEN) {
222 skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
223 lock = &tcp_lhash_lock;
226 skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))].chain;
227 lock = &tcp_ehash[sk->hashent].lock;
231 if((sk->next = *skp) != NULL)
232 (*skp)->pprev = &sk->next;
235 sock_prot_inc_use(sk->prot);
240 static void tcp_v6_hash(struct sock *sk)
242 if(sk->state != TCP_CLOSE) {
243 if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
253 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
256 struct sock *result = NULL;
260 read_lock(&tcp_lhash_lock);
261 sk = tcp_listening_hash[tcp_lhashfn(hnum)];
262 for(; sk; sk = sk->next) {
263 if((sk->num == hnum) && (sk->family == PF_INET6)) {
264 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
267 if(!ipv6_addr_any(&np->rcv_saddr)) {
268 if(ipv6_addr_cmp(&np->rcv_saddr, daddr))
272 if (sk->bound_dev_if) {
273 if (sk->bound_dev_if != dif)
281 if (score > hiscore) {
289 read_unlock(&tcp_lhash_lock);
293 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
294 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
296 * The sockhash lock must be held as a reader here.
299 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
300 struct in6_addr *daddr, u16 hnum,
303 struct tcp_ehash_bucket *head;
305 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
308 /* Optimize here for direct hit, only listening connections can
309 * have wildcards anyways.
311 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
312 head = &tcp_ehash[hash];
313 read_lock(&head->lock);
314 for(sk = head->chain; sk; sk = sk->next) {
315 /* For IPV6 do the cheaper port and family tests first. */
316 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
317 goto hit; /* You sunk my battleship! */
319 /* Must check for a TIME_WAIT'er before going to listener hash. */
320 for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) {
321 if(*((__u32 *)&(sk->dport)) == ports &&
322 sk->family == PF_INET6) {
323 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
324 if(!ipv6_addr_cmp(&tw->v6_daddr, saddr) &&
325 !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) &&
326 (!sk->bound_dev_if || sk->bound_dev_if == dif))
330 read_unlock(&head->lock);
335 read_unlock(&head->lock);
340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
354 __inline__ struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
369 * Open request hash tables.
372 static __inline__ unsigned tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport)
374 unsigned h = raddr->s6_addr32[3] ^ rport;
377 return h&(TCP_SYNQ_HSIZE-1);
380 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
381 struct open_request ***prevp,
383 struct in6_addr *raddr,
384 struct in6_addr *laddr,
387 struct tcp_listen_opt *lopt = tp->listen_opt;
388 struct open_request *req, **prev;
390 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport)];
391 (req = *prev) != NULL;
392 prev = &req->dl_next) {
393 if (req->rmt_port == rport &&
394 req->class->family == AF_INET6 &&
395 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
396 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
397 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
398 BUG_TRAP(req->sk == NULL);
407 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
408 struct in6_addr *saddr,
409 struct in6_addr *daddr,
412 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
415 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
417 if (skb->protocol == htons(ETH_P_IPV6)) {
418 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
419 skb->nh.ipv6h->saddr.s6_addr32,
423 return secure_tcp_sequence_number(skb->nh.iph->daddr,
430 static int tcp_v6_check_established(struct sock *sk)
432 struct in6_addr *daddr = &sk->net_pinfo.af_inet6.rcv_saddr;
433 struct in6_addr *saddr = &sk->net_pinfo.af_inet6.daddr;
434 int dif = sk->bound_dev_if;
435 u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num);
436 int hash = tcp_v6_hashfn(daddr, sk->num, saddr, sk->dport);
437 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
438 struct sock *sk2, **skp;
439 struct tcp_tw_bucket *tw;
441 write_lock_bh(&head->lock);
443 for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
444 tw = (struct tcp_tw_bucket*)sk2;
446 if(*((__u32 *)&(sk2->dport)) == ports &&
447 sk2->family == PF_INET6 &&
448 !ipv6_addr_cmp(&tw->v6_daddr, saddr) &&
449 !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) &&
450 sk2->bound_dev_if == sk->bound_dev_if) {
451 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
453 if (tw->ts_recent_stamp) {
454 /* See comment in tcp_ipv4.c */
455 if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)
457 tp->ts_recent = tw->ts_recent;
458 tp->ts_recent_stamp = tw->ts_recent_stamp;
468 for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
469 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
474 BUG_TRAP(sk->pprev==NULL);
475 if ((sk->next = *skp) != NULL)
476 (*skp)->pprev = &sk->next;
481 sock_prot_inc_use(sk->prot);
482 write_unlock_bh(&head->lock);
485 /* Silly. Should hash-dance instead... */
487 tcp_tw_deschedule(tw);
488 tcp_timewait_kill(tw);
489 NET_INC_STATS_BH(TimeWaitRecycled);
497 write_unlock_bh(&head->lock);
498 return -EADDRNOTAVAIL;
501 static int tcp_v6_hash_connect(struct sock *sk)
503 struct tcp_bind_hashbucket *head;
504 struct tcp_bind_bucket *tb;
508 int err = tcp_v6_get_port(sk, sk->num);
511 sk->sport = htons(sk->num);
514 head = &tcp_bhash[tcp_bhashfn(sk->num)];
517 spin_lock_bh(&head->lock);
519 if (tb->owners == sk && sk->bind_next == NULL) {
521 spin_unlock_bh(&head->lock);
524 spin_unlock_bh(&head->lock);
525 return tcp_v6_check_established(sk);
529 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
531 struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
535 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
538 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
539 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
540 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
541 struct in6_addr *saddr = NULL;
542 struct in6_addr saddr_buf;
544 struct dst_entry *dst;
548 if (addr_len < SIN6_LEN_RFC2133)
551 if (usin->sin6_family != AF_INET6)
552 return(-EAFNOSUPPORT);
554 fl.fl6_flowlabel = 0;
556 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
557 IP6_ECN_flow_init(fl.fl6_flowlabel);
558 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
559 struct ip6_flowlabel *flowlabel;
560 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
561 if (flowlabel == NULL)
563 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
564 fl6_sock_release(flowlabel);
569 * connect() to INADDR_ANY means loopback (BSD'ism).
572 if(ipv6_addr_any(&usin->sin6_addr))
573 usin->sin6_addr.s6_addr[15] = 0x1;
575 addr_type = ipv6_addr_type(&usin->sin6_addr);
577 if(addr_type & IPV6_ADDR_MULTICAST)
580 if (addr_type&IPV6_ADDR_LINKLOCAL) {
581 if (addr_len >= sizeof(struct sockaddr_in6) &&
582 usin->sin6_scope_id) {
583 /* If interface is set while binding, indices
586 if (sk->bound_dev_if &&
587 sk->bound_dev_if != usin->sin6_scope_id)
590 sk->bound_dev_if = usin->sin6_scope_id;
593 /* Connect to link-local address requires an interface */
594 if (sk->bound_dev_if == 0)
598 if (tp->ts_recent_stamp && ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
600 tp->ts_recent_stamp = 0;
604 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
605 np->flow_label = fl.fl6_flowlabel;
611 if (addr_type == IPV6_ADDR_MAPPED) {
612 u32 exthdrlen = tp->ext_header_len;
613 struct sockaddr_in sin;
615 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
617 if (__ipv6_only_sock(sk))
620 sin.sin_family = AF_INET;
621 sin.sin_port = usin->sin6_port;
622 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
624 sk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
625 sk->backlog_rcv = tcp_v4_do_rcv;
627 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
630 tp->ext_header_len = exthdrlen;
631 sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
632 sk->backlog_rcv = tcp_v6_do_rcv;
635 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
637 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
644 if (!ipv6_addr_any(&np->rcv_saddr))
645 saddr = &np->rcv_saddr;
647 fl.proto = IPPROTO_TCP;
648 fl.fl6_dst = &np->daddr;
650 fl.oif = sk->bound_dev_if;
651 fl.uli_u.ports.dport = usin->sin6_port;
652 fl.uli_u.ports.sport = sk->sport;
654 if (np->opt && np->opt->srcrt) {
655 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
656 fl.nl_u.ip6_u.daddr = rt0->addr;
659 dst = ip6_route_output(sk, &fl);
661 if ((err = dst->error) != 0) {
666 ip6_dst_store(sk, dst, NULL);
667 sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
670 err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf);
677 /* set the source address */
678 ipv6_addr_copy(&np->rcv_saddr, saddr);
679 ipv6_addr_copy(&np->saddr, saddr);
680 sk->rcv_saddr= LOOPBACK4_IPV6;
682 tp->ext_header_len = 0;
684 tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen;
685 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
687 sk->dport = usin->sin6_port;
689 tcp_set_state(sk, TCP_SYN_SENT);
690 err = tcp_v6_hash_connect(sk);
695 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
697 sk->sport, sk->dport);
698 err = tcp_connect(sk);
705 tcp_set_state(sk, TCP_CLOSE);
713 void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
714 int type, int code, int offset, __u32 info)
716 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
717 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
718 struct ipv6_pinfo *np;
724 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
727 ICMP6_INC_STATS_BH(Icmp6InErrors);
731 if (sk->state == TCP_TIME_WAIT) {
732 tcp_tw_put((struct tcp_tw_bucket*)sk);
738 NET_INC_STATS_BH(LockDroppedIcmps);
740 if (sk->state == TCP_CLOSE)
743 tp = &sk->tp_pinfo.af_tcp;
744 seq = ntohl(th->seq);
745 if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
746 NET_INC_STATS_BH(OutOfWindowIcmps);
750 np = &sk->net_pinfo.af_inet6;
752 if (type == ICMPV6_PKT_TOOBIG) {
753 struct dst_entry *dst = NULL;
757 if ((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE))
760 /* icmp should have updated the destination cache entry */
761 dst = __sk_dst_check(sk, np->dst_cookie);
766 /* BUGGG_FUTURE: Again, it is not clear how
767 to handle rthdr case. Ignore this complexity
770 fl.proto = IPPROTO_TCP;
771 fl.nl_u.ip6_u.daddr = &np->daddr;
772 fl.nl_u.ip6_u.saddr = &np->saddr;
773 fl.oif = sk->bound_dev_if;
774 fl.uli_u.ports.dport = sk->dport;
775 fl.uli_u.ports.sport = sk->sport;
777 dst = ip6_route_output(sk, &fl);
782 sk->err_soft = -dst->error;
783 } else if (tp->pmtu_cookie > dst->pmtu) {
784 tcp_sync_mss(sk, dst->pmtu);
785 tcp_simple_retransmit(sk);
786 } /* else let the usual retransmit timer handle it */
791 icmpv6_err_convert(type, code, &err);
793 /* Might be for an open_request */
795 struct open_request *req, **prev;
800 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
801 &hdr->saddr, tcp_v6_iif(skb));
805 /* ICMPs are not backlogged, hence we cannot get
806 * an established socket here.
808 BUG_TRAP(req->sk == NULL);
810 if (seq != req->snt_isn) {
811 NET_INC_STATS_BH(OutOfWindowIcmps);
815 tcp_synq_drop(sk, req, prev);
819 case TCP_SYN_RECV: /* Cannot happen.
820 It can, it SYNs are crossed. --ANK */
821 if (sk->lock.users == 0) {
822 TCP_INC_STATS_BH(TcpAttemptFails);
824 sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
833 if (sk->lock.users == 0 && np->recverr) {
835 sk->error_report(sk);
846 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
847 struct dst_entry *dst)
849 struct sk_buff * skb;
850 struct ipv6_txoptions *opt = NULL;
854 fl.proto = IPPROTO_TCP;
855 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
856 fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
857 fl.fl6_flowlabel = 0;
858 fl.oif = req->af.v6_req.iif;
859 fl.uli_u.ports.dport = req->rmt_port;
860 fl.uli_u.ports.sport = sk->sport;
863 opt = sk->net_pinfo.af_inet6.opt;
865 sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 &&
866 req->af.v6_req.pktopts) {
867 struct sk_buff *pktopts = req->af.v6_req.pktopts;
868 struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)pktopts->cb;
870 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
873 if (opt && opt->srcrt) {
874 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
875 fl.nl_u.ip6_u.daddr = rt0->addr;
878 dst = ip6_route_output(sk, &fl);
883 skb = tcp_make_synack(sk, dst, req);
885 struct tcphdr *th = skb->h.th;
887 th->check = tcp_v6_check(th, skb->len,
888 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
889 csum_partial((char *)th, skb->len, skb->csum));
891 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
892 err = ip6_xmit(sk, skb, &fl, opt);
893 if (err == NET_XMIT_CN)
899 if (opt && opt != sk->net_pinfo.af_inet6.opt)
900 sock_kfree_s(sk, opt, opt->tot_len);
904 static void tcp_v6_or_free(struct open_request *req)
906 if (req->af.v6_req.pktopts)
907 kfree_skb(req->af.v6_req.pktopts);
910 static struct or_calltable or_ipv6 = {
918 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
920 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
922 if (sk->net_pinfo.af_inet6.rxopt.all) {
923 if ((opt->hop && sk->net_pinfo.af_inet6.rxopt.bits.hopopts) ||
924 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
925 sk->net_pinfo.af_inet6.rxopt.bits.rxflow) ||
926 (opt->srcrt && sk->net_pinfo.af_inet6.rxopt.bits.srcrt) ||
927 ((opt->dst1 || opt->dst0) && sk->net_pinfo.af_inet6.rxopt.bits.dstopts))
934 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
937 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
939 if (skb->ip_summed == CHECKSUM_HW) {
940 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
941 skb->csum = offsetof(struct tcphdr, check);
943 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
944 csum_partial((char *)th, th->doff<<2,
950 static void tcp_v6_send_reset(struct sk_buff *skb)
952 struct tcphdr *th = skb->h.th, *t1;
953 struct sk_buff *buff;
959 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
963 * We need to grab some memory, and put together an RST,
964 * and then put it into the queue to be sent.
967 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
971 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
973 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
975 /* Swap the send and the receive. */
976 memset(t1, 0, sizeof(*t1));
977 t1->dest = th->source;
978 t1->source = th->dest;
979 t1->doff = sizeof(*t1)/4;
983 t1->seq = th->ack_seq;
986 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
987 + skb->len - (th->doff<<2));
990 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
992 fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
993 fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
994 fl.fl6_flowlabel = 0;
996 t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
998 sizeof(*t1), IPPROTO_TCP,
1001 fl.proto = IPPROTO_TCP;
1002 fl.oif = tcp_v6_iif(skb);
1003 fl.uli_u.ports.dport = t1->dest;
1004 fl.uli_u.ports.sport = t1->source;
1006 /* sk = NULL, but it is safe for now. RST socket required. */
1007 buff->dst = ip6_route_output(NULL, &fl);
1009 if (buff->dst->error == 0) {
1010 ip6_xmit(NULL, buff, &fl, NULL);
1011 TCP_INC_STATS_BH(TcpOutSegs);
1012 TCP_INC_STATS_BH(TcpOutRsts);
1019 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1021 struct tcphdr *th = skb->h.th, *t1;
1022 struct sk_buff *buff;
1024 int tot_len = sizeof(struct tcphdr);
1026 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1030 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1035 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1037 /* Swap the send and the receive. */
1038 memset(t1, 0, sizeof(*t1));
1039 t1->dest = th->source;
1040 t1->source = th->dest;
1041 t1->doff = tot_len/4;
1042 t1->seq = htonl(seq);
1043 t1->ack_seq = htonl(ack);
1045 t1->window = htons(win);
1048 u32 *ptr = (u32*)(t1 + 1);
1049 *ptr++ = htonl((TCPOPT_NOP << 24) |
1050 (TCPOPT_NOP << 16) |
1051 (TCPOPT_TIMESTAMP << 8) |
1053 *ptr++ = htonl(tcp_time_stamp);
1057 buff->csum = csum_partial((char *)t1, tot_len, 0);
1059 fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
1060 fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
1061 fl.fl6_flowlabel = 0;
1063 t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
1064 fl.nl_u.ip6_u.daddr,
1065 tot_len, IPPROTO_TCP,
1068 fl.proto = IPPROTO_TCP;
1069 fl.oif = tcp_v6_iif(skb);
1070 fl.uli_u.ports.dport = t1->dest;
1071 fl.uli_u.ports.sport = t1->source;
1073 buff->dst = ip6_route_output(NULL, &fl);
1075 if (buff->dst->error == 0) {
1076 ip6_xmit(NULL, buff, &fl, NULL);
1077 TCP_INC_STATS_BH(TcpOutSegs);
1084 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1086 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1088 tcp_v6_send_ack(skb, tw->snd_nxt, tw->rcv_nxt,
1089 tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent);
1094 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1096 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1100 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1102 struct open_request *req, **prev;
1103 struct tcphdr *th = skb->h.th;
1104 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1107 /* Find possible connection requests. */
1108 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1109 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1111 return tcp_check_req(sk, skb, req, prev);
1113 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1115 &skb->nh.ipv6h->daddr,
1120 if (nsk->state != TCP_TIME_WAIT) {
1124 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1128 #if 0 /*def CONFIG_SYN_COOKIES*/
1129 if (!th->rst && !th->syn && th->ack)
1130 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1135 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1137 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1138 struct tcp_listen_opt *lopt = tp->listen_opt;
1139 unsigned h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port);
1142 req->expires = jiffies + TCP_TIMEOUT_INIT;
1144 req->dl_next = lopt->syn_table[h];
1146 write_lock(&tp->syn_wait_lock);
1147 lopt->syn_table[h] = req;
1148 write_unlock(&tp->syn_wait_lock);
1154 /* FIXME: this is substantially similar to the ipv4 code.
1155 * Can some kind of merge be done? -- erics
1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1160 struct open_request *req = NULL;
1161 __u32 isn = TCP_SKB_CB(skb)->when;
1163 if (skb->protocol == htons(ETH_P_IP))
1164 return tcp_v4_conn_request(sk, skb);
1166 /* FIXME: do the same check for anycast */
1167 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
1171 * There are no SYN attacks on IPv6, yet...
1173 if (tcp_synq_is_full(sk) && !isn) {
1174 if (net_ratelimit())
1175 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1179 if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1182 req = tcp_openreq_alloc();
1186 tcp_clear_options(&tp);
1187 tp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1188 tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
1190 tcp_parse_options(skb, &tp, 0);
1192 tp.tstamp_ok = tp.saw_tstamp;
1193 tcp_openreq_init(req, &tp, skb);
1195 req->class = &or_ipv6;
1196 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1197 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1198 TCP_ECN_create_request(req, skb->h.th);
1199 req->af.v6_req.pktopts = NULL;
1200 if (ipv6_opt_accepted(sk, skb) ||
1201 sk->net_pinfo.af_inet6.rxopt.bits.rxinfo ||
1202 sk->net_pinfo.af_inet6.rxopt.bits.rxhlim) {
1203 atomic_inc(&skb->users);
1204 req->af.v6_req.pktopts = skb;
1206 req->af.v6_req.iif = sk->bound_dev_if;
1208 /* So that link locals have meaning */
1209 if (!sk->bound_dev_if && ipv6_addr_type(&req->af.v6_req.rmt_addr)&IPV6_ADDR_LINKLOCAL)
1210 req->af.v6_req.iif = tcp_v6_iif(skb);
1213 isn = tcp_v6_init_sequence(sk,skb);
1217 if (tcp_v6_send_synack(sk, req, NULL))
1220 tcp_v6_synq_add(sk, req);
1226 tcp_openreq_free(req);
1228 TCP_INC_STATS_BH(TcpAttemptFails);
1229 return 0; /* don't send reset */
1232 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1233 struct open_request *req,
1234 struct dst_entry *dst)
1236 struct ipv6_pinfo *np;
1238 struct tcp_opt *newtp;
1240 struct ipv6_txoptions *opt;
1242 if (skb->protocol == htons(ETH_P_IP)) {
1247 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1252 np = &newsk->net_pinfo.af_inet6;
1254 ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000FFFF),
1257 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
1260 ipv6_addr_copy(&np->rcv_saddr, &np->saddr);
1262 newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
1263 newsk->backlog_rcv = tcp_v4_do_rcv;
1264 newsk->net_pinfo.af_inet6.pktoptions = NULL;
1265 newsk->net_pinfo.af_inet6.opt = NULL;
1266 newsk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb);
1267 newsk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit;
1269 /* Charge newly allocated IPv6 socket. Though it is mapped,
1272 #ifdef INET_REFCNT_DEBUG
1273 atomic_inc(&inet6_sock_nr);
1277 /* It is tricky place. Until this moment IPv4 tcp
1278 worked with IPv6 af_tcp.af_specific.
1281 tcp_sync_mss(newsk, newsk->tp_pinfo.af_tcp.pmtu_cookie);
1286 opt = sk->net_pinfo.af_inet6.opt;
1288 if (tcp_acceptq_is_full(sk))
1291 if (sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 &&
1292 opt == NULL && req->af.v6_req.pktopts) {
1293 struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)req->af.v6_req.pktopts->cb;
1295 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1299 fl.proto = IPPROTO_TCP;
1300 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
1301 if (opt && opt->srcrt) {
1302 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1303 fl.nl_u.ip6_u.daddr = rt0->addr;
1305 fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
1306 fl.fl6_flowlabel = 0;
1307 fl.oif = sk->bound_dev_if;
1308 fl.uli_u.ports.dport = req->rmt_port;
1309 fl.uli_u.ports.sport = sk->sport;
1311 dst = ip6_route_output(sk, &fl);
1317 newsk = tcp_create_openreq_child(sk, req, skb);
1321 /* Charge newly allocated IPv6 socket */
1322 #ifdef INET_REFCNT_DEBUG
1323 atomic_inc(&inet6_sock_nr);
1327 ip6_dst_store(newsk, dst, NULL);
1328 sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
1330 newtp = &(newsk->tp_pinfo.af_tcp);
1332 np = &newsk->net_pinfo.af_inet6;
1333 ipv6_addr_copy(&np->daddr, &req->af.v6_req.rmt_addr);
1334 ipv6_addr_copy(&np->saddr, &req->af.v6_req.loc_addr);
1335 ipv6_addr_copy(&np->rcv_saddr, &req->af.v6_req.loc_addr);
1336 newsk->bound_dev_if = req->af.v6_req.iif;
1338 /* Now IPv6 options...
1340 First: no IPv4 options.
1342 newsk->protinfo.af_inet.opt = NULL;
1345 np->rxopt.all = sk->net_pinfo.af_inet6.rxopt.all;
1347 /* Clone pktoptions received with SYN */
1348 np->pktoptions = NULL;
1349 if (req->af.v6_req.pktopts) {
1350 np->pktoptions = skb_clone(req->af.v6_req.pktopts, GFP_ATOMIC);
1351 kfree_skb(req->af.v6_req.pktopts);
1352 req->af.v6_req.pktopts = NULL;
1354 skb_set_owner_r(np->pktoptions, newsk);
1357 np->mcast_oif = tcp_v6_iif(skb);
1358 np->mcast_hops = skb->nh.ipv6h->hop_limit;
1360 /* Clone native IPv6 options from listening socket (if any)
1362 Yes, keeping reference count would be much more clever,
1363 but we make one more one thing there: reattach optmem
1367 np->opt = ipv6_dup_options(newsk, opt);
1368 if (opt != sk->net_pinfo.af_inet6.opt)
1369 sock_kfree_s(sk, opt, opt->tot_len);
1372 newtp->ext_header_len = 0;
1374 newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
1376 tcp_sync_mss(newsk, dst->pmtu);
1377 newtp->advmss = dst->advmss;
1378 tcp_initialize_rcv_mss(newsk);
1380 newsk->daddr = LOOPBACK4_IPV6;
1381 newsk->saddr = LOOPBACK4_IPV6;
1382 newsk->rcv_saddr= LOOPBACK4_IPV6;
1384 __tcp_v6_hash(newsk);
1385 tcp_inherit_port(sk, newsk);
1390 NET_INC_STATS_BH(ListenOverflows);
1392 NET_INC_STATS_BH(ListenDrops);
1393 if (opt && opt != sk->net_pinfo.af_inet6.opt)
1394 sock_kfree_s(sk, opt, opt->tot_len);
1399 static int tcp_v6_checksum_init(struct sk_buff *skb)
1401 if (skb->ip_summed == CHECKSUM_HW) {
1402 skb->ip_summed = CHECKSUM_UNNECESSARY;
1403 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1404 &skb->nh.ipv6h->daddr,skb->csum))
1406 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1408 if (skb->len <= 76) {
1409 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1410 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1412 skb->ip_summed = CHECKSUM_UNNECESSARY;
1414 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1415 &skb->nh.ipv6h->daddr,0);
1420 /* The socket must have it's spinlock held when we get
1423 * We have a potential double-lock case here, so even when
1424 * doing backlog processing we use the BH locking scheme.
1425 * This is because we cannot sleep with the original spinlock
1428 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1430 #ifdef CONFIG_FILTER
1431 struct sk_filter *filter;
1433 struct sk_buff *opt_skb = NULL;
1435 /* Imagine: socket is IPv6. IPv4 packet arrives,
1436 goes to IPv4 receive handler and backlogged.
1437 From backlog it always goes here. Kerboom...
1438 Fortunately, tcp_rcv_established and rcv_established
1439 handle them correctly, but it is not case with
1440 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1443 if (skb->protocol == htons(ETH_P_IP))
1444 return tcp_v4_do_rcv(sk, skb);
1446 #ifdef CONFIG_FILTER
1447 filter = sk->filter;
1448 if (filter && sk_filter(skb, filter))
1450 #endif /* CONFIG_FILTER */
1453 * socket locking is here for SMP purposes as backlog rcv
1454 * is currently called with bh processing disabled.
1457 IP6_INC_STATS_BH(Ip6InDelivers);
1459 /* Do Stevens' IPV6_PKTOPTIONS.
1461 Yes, guys, it is the only place in our code, where we
1462 may make it not affecting IPv4.
1463 The rest of code is protocol independent,
1464 and I do not like idea to uglify IPv4.
1466 Actually, all the idea behind IPV6_PKTOPTIONS
1467 looks not very well thought. For now we latch
1468 options, received in the last packet, enqueued
1469 by tcp. Feel free to propose better solution.
1472 if (sk->net_pinfo.af_inet6.rxopt.all)
1473 opt_skb = skb_clone(skb, GFP_ATOMIC);
1475 if (sk->state == TCP_ESTABLISHED) { /* Fast path */
1476 TCP_CHECK_TIMER(sk);
1477 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1479 TCP_CHECK_TIMER(sk);
1481 goto ipv6_pktoptions;
1485 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1488 if (sk->state == TCP_LISTEN) {
1489 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1494 * Queue it on the new socket if the new socket is active,
1495 * otherwise we just shortcircuit this and continue with
1499 if (tcp_child_process(sk, nsk, skb))
1502 __kfree_skb(opt_skb);
1507 TCP_CHECK_TIMER(sk);
1508 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1510 TCP_CHECK_TIMER(sk);
1512 goto ipv6_pktoptions;
1516 tcp_v6_send_reset(skb);
1519 __kfree_skb(opt_skb);
1523 TCP_INC_STATS_BH(TcpInErrs);
1528 /* Do you ask, what is it?
1530 1. skb was enqueued by tcp.
1531 2. skb is added to tail of read queue, rather than out of order.
1532 3. socket is not in passive state.
1533 4. Finally, it really contains options, which user wants to receive.
1535 if (TCP_SKB_CB(opt_skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt &&
1536 !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) {
1537 if (sk->net_pinfo.af_inet6.rxopt.bits.rxinfo)
1538 sk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(opt_skb);
1539 if (sk->net_pinfo.af_inet6.rxopt.bits.rxhlim)
1540 sk->net_pinfo.af_inet6.mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1541 if (ipv6_opt_accepted(sk, opt_skb)) {
1542 skb_set_owner_r(opt_skb, sk);
1543 opt_skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, opt_skb);
1545 __kfree_skb(opt_skb);
1546 opt_skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL);
1555 int tcp_v6_rcv(struct sk_buff *skb)
1561 if (skb->pkt_type != PACKET_HOST)
1565 * Count it even if it's bad.
1567 TCP_INC_STATS_BH(TcpInSegs);
1569 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1574 if (th->doff < sizeof(struct tcphdr)/4)
1576 if (!pskb_may_pull(skb, th->doff*4))
1579 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1580 tcp_v6_checksum_init(skb) < 0))
1584 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1585 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1586 skb->len - th->doff*4);
1587 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1588 TCP_SKB_CB(skb)->when = 0;
1589 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1590 TCP_SKB_CB(skb)->sacked = 0;
1592 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1593 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1599 if(!ipsec_sk_policy(sk,skb))
1600 goto discard_and_relse;
1601 if(sk->state == TCP_TIME_WAIT)
1608 if (!sk->lock.users) {
1609 if (!tcp_prequeue(sk, skb))
1610 ret = tcp_v6_do_rcv(sk, skb);
1612 sk_add_backlog(sk, skb);
1619 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1621 TCP_INC_STATS_BH(TcpInErrs);
1623 tcp_v6_send_reset(skb);
1640 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1641 TCP_INC_STATS_BH(TcpInErrs);
1646 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1647 skb, th, skb->len)) {
1652 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1654 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1655 tcp_timewait_kill((struct tcp_tw_bucket *)sk);
1656 tcp_tw_put((struct tcp_tw_bucket *)sk);
1660 /* Fall through to ACK */
1663 tcp_v6_timewait_ack(sk, skb);
1667 case TCP_TW_SUCCESS:;
1672 static int tcp_v6_rebuild_header(struct sock *sk)
1675 struct dst_entry *dst;
1676 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
1678 dst = __sk_dst_check(sk, np->dst_cookie);
1683 fl.proto = IPPROTO_TCP;
1684 fl.nl_u.ip6_u.daddr = &np->daddr;
1685 fl.nl_u.ip6_u.saddr = &np->saddr;
1686 fl.fl6_flowlabel = np->flow_label;
1687 fl.oif = sk->bound_dev_if;
1688 fl.uli_u.ports.dport = sk->dport;
1689 fl.uli_u.ports.sport = sk->sport;
1691 if (np->opt && np->opt->srcrt) {
1692 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1693 fl.nl_u.ip6_u.daddr = rt0->addr;
1696 dst = ip6_route_output(sk, &fl);
1705 ip6_dst_store(sk, dst, NULL);
1706 sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
1712 static int tcp_v6_xmit(struct sk_buff *skb)
1714 struct sock *sk = skb->sk;
1715 struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
1717 struct dst_entry *dst;
1719 fl.proto = IPPROTO_TCP;
1720 fl.fl6_dst = &np->daddr;
1721 fl.fl6_src = &np->saddr;
1722 fl.fl6_flowlabel = np->flow_label;
1723 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1724 fl.oif = sk->bound_dev_if;
1725 fl.uli_u.ports.sport = sk->sport;
1726 fl.uli_u.ports.dport = sk->dport;
1728 if (np->opt && np->opt->srcrt) {
1729 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1730 fl.nl_u.ip6_u.daddr = rt0->addr;
1733 dst = __sk_dst_check(sk, np->dst_cookie);
1736 dst = ip6_route_output(sk, &fl);
1739 sk->err_soft = -dst->error;
1741 return -sk->err_soft;
1744 ip6_dst_store(sk, dst, NULL);
1747 skb->dst = dst_clone(dst);
1749 /* Restore final destination back after routing done */
1750 fl.nl_u.ip6_u.daddr = &np->daddr;
1752 return ip6_xmit(sk, skb, &fl, np->opt);
1755 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1757 struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
1758 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1760 sin6->sin6_family = AF_INET6;
1761 memcpy(&sin6->sin6_addr, &np->daddr, sizeof(struct in6_addr));
1762 sin6->sin6_port = sk->dport;
1763 /* We do not store received flowlabel for TCP */
1764 sin6->sin6_flowinfo = 0;
1765 sin6->sin6_scope_id = 0;
1766 if (sk->bound_dev_if && ipv6_addr_type(&sin6->sin6_addr)&IPV6_ADDR_LINKLOCAL)
1767 sin6->sin6_scope_id = sk->bound_dev_if;
1770 static int tcp_v6_remember_stamp(struct sock *sk)
1772 /* Alas, not yet... */
1776 static struct tcp_func ipv6_specific = {
1779 tcp_v6_rebuild_header,
1780 tcp_v6_conn_request,
1781 tcp_v6_syn_recv_sock,
1782 tcp_v6_remember_stamp,
1783 sizeof(struct ipv6hdr),
1788 sizeof(struct sockaddr_in6)
1792 * TCP over IPv4 via INET6 API
1795 static struct tcp_func ipv6_mapped = {
1798 tcp_v4_rebuild_header,
1799 tcp_v6_conn_request,
1800 tcp_v6_syn_recv_sock,
1801 tcp_v4_remember_stamp,
1802 sizeof(struct iphdr),
1807 sizeof(struct sockaddr_in6)
1812 /* NOTE: A lot of things set to zero explicitly by call to
1813 * sk_alloc() so need not be done here.
1815 static int tcp_v6_init_sock(struct sock *sk)
1817 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1819 skb_queue_head_init(&tp->out_of_order_queue);
1820 tcp_init_xmit_timers(sk);
1821 tcp_prequeue_init(tp);
1823 tp->rto = TCP_TIMEOUT_INIT;
1824 tp->mdev = TCP_TIMEOUT_INIT;
1826 /* So many TCP implementations out there (incorrectly) count the
1827 * initial SYN frame in their delayed-ACK and congestion control
1828 * algorithms that we must have the following bandaid to talk
1829 * efficiently to them. -DaveM
1833 /* See draft-stevens-tcpca-spec-01 for discussion of the
1834 * initialization of these values.
1836 tp->snd_ssthresh = 0x7fffffff;
1837 tp->snd_cwnd_clamp = ~0;
1838 tp->mss_cache = 536;
1840 tp->reordering = sysctl_tcp_reordering;
1842 sk->state = TCP_CLOSE;
1844 sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
1846 sk->write_space = tcp_write_space;
1847 sk->use_write_queue = 1;
1849 sk->sndbuf = sysctl_tcp_wmem[1];
1850 sk->rcvbuf = sysctl_tcp_rmem[1];
1852 atomic_inc(&tcp_sockets_allocated);
1857 static int tcp_v6_destroy_sock(struct sock *sk)
1859 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1861 tcp_clear_xmit_timers(sk);
1863 /* Cleanup up the write buffer. */
1864 tcp_writequeue_purge(sk);
1866 /* Cleans up our, hopefully empty, out_of_order_queue. */
1867 __skb_queue_purge(&tp->out_of_order_queue);
1869 /* Clean prequeue, it must be empty really */
1870 __skb_queue_purge(&tp->ucopy.prequeue);
1872 /* Clean up a referenced TCP bind bucket. */
1873 if(sk->prev != NULL)
1876 /* If sendmsg cached page exists, toss it. */
1877 if (tp->sndmsg_page != NULL)
1878 __free_page(tp->sndmsg_page);
1880 atomic_dec(&tcp_sockets_allocated);
1882 return inet6_destroy_sock(sk);
1885 /* Proc filesystem TCPv6 sock list dumping. */
1886 static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid)
1888 struct in6_addr *dest, *src;
1889 int ttd = req->expires - jiffies;
1894 src = &req->af.v6_req.loc_addr;
1895 dest = &req->af.v6_req.rmt_addr;
1897 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1898 "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
1900 src->s6_addr32[0], src->s6_addr32[1],
1901 src->s6_addr32[2], src->s6_addr32[3],
1903 dest->s6_addr32[0], dest->s6_addr32[1],
1904 dest->s6_addr32[2], dest->s6_addr32[3],
1905 ntohs(req->rmt_port),
1907 0,0, /* could print option size, but that is af dependent. */
1908 1, /* timers active (only the expire timer) */
1912 0, /* non standard timer */
1913 0, /* open_requests have no inode */
1917 static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
1919 struct in6_addr *dest, *src;
1922 unsigned long timer_expires;
1923 struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
1925 dest = &sp->net_pinfo.af_inet6.daddr;
1926 src = &sp->net_pinfo.af_inet6.rcv_saddr;
1927 destp = ntohs(sp->dport);
1928 srcp = ntohs(sp->sport);
1929 if (tp->pending == TCP_TIME_RETRANS) {
1931 timer_expires = tp->timeout;
1932 } else if (tp->pending == TCP_TIME_PROBE0) {
1934 timer_expires = tp->timeout;
1935 } else if (timer_pending(&sp->timer)) {
1937 timer_expires = sp->timer.expires;
1940 timer_expires = jiffies;
1944 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1945 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d",
1947 src->s6_addr32[0], src->s6_addr32[1],
1948 src->s6_addr32[2], src->s6_addr32[3], srcp,
1949 dest->s6_addr32[0], dest->s6_addr32[1],
1950 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1952 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1953 timer_active, timer_expires-jiffies,
1958 atomic_read(&sp->refcnt), sp,
1959 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
1960 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1964 static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
1966 struct in6_addr *dest, *src;
1968 int ttd = tw->ttd - jiffies;
1973 dest = &tw->v6_daddr;
1974 src = &tw->v6_rcv_saddr;
1975 destp = ntohs(tw->dport);
1976 srcp = ntohs(tw->sport);
1979 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1980 "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
1982 src->s6_addr32[0], src->s6_addr32[1],
1983 src->s6_addr32[2], src->s6_addr32[3], srcp,
1984 dest->s6_addr32[0], dest->s6_addr32[1],
1985 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1988 atomic_read(&tw->refcnt), tw);
1991 #define LINE_LEN 190
1992 #define LINE_FMT "%-190s\n"
1994 int tcp6_get_info(char *buffer, char **start, off_t offset, int length)
1996 int len = 0, num = 0, i;
1997 off_t begin, pos = 0;
1998 char tmpbuf[LINE_LEN+2];
2000 if (offset < LINE_LEN+1)
2001 len += sprintf(buffer, LINE_FMT,
2003 "local_address " /* 38 */
2004 "remote_address " /* 38 */
2005 "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
2006 " uid timeout inode"); /* 21 */
2012 /* First, walk listening socket table. */
2014 for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
2015 struct sock *sk = tcp_listening_hash[i];
2016 struct tcp_listen_opt *lopt;
2019 for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
2020 struct open_request *req;
2022 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2024 if (sk->family != PF_INET6)
2027 if (pos >= offset) {
2028 get_tcp6_sock(sk, tmpbuf, num);
2029 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2030 if (pos >= offset + length) {
2031 tcp_listen_unlock();
2036 uid = sock_i_uid(sk);
2037 read_lock_bh(&tp->syn_wait_lock);
2038 lopt = tp->listen_opt;
2039 if (lopt && lopt->qlen != 0) {
2040 for (k=0; k<TCP_SYNQ_HSIZE; k++) {
2041 for (req = lopt->syn_table[k]; req; req = req->dl_next, num++) {
2042 if (req->class->family != PF_INET6)
2047 get_openreq6(sk, req, tmpbuf, num, uid);
2048 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2049 if (pos >= offset + length) {
2050 read_unlock_bh(&tp->syn_wait_lock);
2051 tcp_listen_unlock();
2057 read_unlock_bh(&tp->syn_wait_lock);
2059 /* Completed requests are in normal socket hash table */
2062 tcp_listen_unlock();
2066 /* Next, walk established hash chain. */
2067 for (i = 0; i < tcp_ehash_size; i++) {
2068 struct tcp_ehash_bucket *head = &tcp_ehash[i];
2070 struct tcp_tw_bucket *tw;
2072 read_lock(&head->lock);
2073 for(sk = head->chain; sk; sk = sk->next, num++) {
2074 if (sk->family != PF_INET6)
2079 get_tcp6_sock(sk, tmpbuf, num);
2080 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2081 if (pos >= offset + length) {
2082 read_unlock(&head->lock);
2086 for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain;
2088 tw = (struct tcp_tw_bucket *)tw->next, num++) {
2089 if (tw->family != PF_INET6)
2094 get_timewait6_sock(tw, tmpbuf, num);
2095 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2096 if (pos >= offset + length) {
2097 read_unlock(&head->lock);
2101 read_unlock(&head->lock);
2108 begin = len - (pos - offset);
2109 *start = buffer + begin;
2118 struct proto tcpv6_prot = {
2121 connect: tcp_v6_connect,
2122 disconnect: tcp_disconnect,
2125 init: tcp_v6_init_sock,
2126 destroy: tcp_v6_destroy_sock,
2127 shutdown: tcp_shutdown,
2128 setsockopt: tcp_setsockopt,
2129 getsockopt: tcp_getsockopt,
2130 sendmsg: tcp_sendmsg,
2131 recvmsg: tcp_recvmsg,
2132 backlog_rcv: tcp_v6_do_rcv,
2135 get_port: tcp_v6_get_port,
2138 static struct inet6_protocol tcpv6_protocol =
2140 tcp_v6_rcv, /* TCP handler */
2141 tcp_v6_err, /* TCP error control */
2143 IPPROTO_TCP, /* protocol ID */
2149 extern struct proto_ops inet6_stream_ops;
2151 static struct inet_protosw tcpv6_protosw = {
2153 protocol: IPPROTO_TCP,
2155 ops: &inet6_stream_ops,
2158 flags: INET_PROTOSW_PERMANENT,
2161 void __init tcpv6_init(void)
2163 /* register inet6 protocol */
2164 inet6_add_protocol(&tcpv6_protocol);
2165 inet6_register_protosw(&tcpv6_protosw);