3 * Linux INET6 implementation
6 * Pedro Roque <pedro_m@yahoo.com>
8 * Adapted from linux/net/ipv4/raw.c
10 * $Id: raw.c,v 1.50.2.1 2002/03/05 12:47:34 davem Exp $
13 * Hideaki YOSHIFUJI : sin6_scope_id support
14 * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
22 #include <linux/errno.h>
23 #include <linux/types.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
26 #include <linux/sched.h>
27 #include <linux/net.h>
28 #include <linux/in6.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_arp.h>
31 #include <linux/icmpv6.h>
32 #include <asm/uaccess.h>
33 #include <asm/ioctls.h>
39 #include <net/ndisc.h>
40 #include <net/protocol.h>
41 #include <net/ip6_route.h>
42 #include <net/addrconf.h>
43 #include <net/transp_v6.h>
45 #include <net/inet_common.h>
47 #include <net/rawv6.h>
49 struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
50 rwlock_t raw_v6_lock = RW_LOCK_UNLOCKED;
52 static void raw_v6_hash(struct sock *sk)
54 struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)];
56 write_lock_bh(&raw_v6_lock);
57 if ((sk->next = *skp) != NULL)
58 (*skp)->pprev = &sk->next;
61 sock_prot_inc_use(sk->prot);
63 write_unlock_bh(&raw_v6_lock);
66 static void raw_v6_unhash(struct sock *sk)
68 write_lock_bh(&raw_v6_lock);
71 sk->next->pprev = sk->pprev;
72 *sk->pprev = sk->next;
74 sock_prot_dec_use(sk->prot);
77 write_unlock_bh(&raw_v6_lock);
81 /* Grumble... icmp and ip_input want to get at this... */
82 struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
83 struct in6_addr *loc_addr, struct in6_addr *rmt_addr)
86 int addr_type = ipv6_addr_type(loc_addr);
88 for(s = sk; s; s = s->next) {
90 struct ipv6_pinfo *np = &s->net_pinfo.af_inet6;
92 if (!ipv6_addr_any(&np->daddr) &&
93 ipv6_addr_cmp(&np->daddr, rmt_addr))
96 if (!ipv6_addr_any(&np->rcv_saddr)) {
97 if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0)
99 if ((addr_type & IPV6_ADDR_MULTICAST) &&
100 inet6_mc_check(s, loc_addr, rmt_addr))
114 static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
116 struct icmp6hdr *icmph;
117 struct raw6_opt *opt;
119 opt = &sk->tp_pinfo.tp_raw;
120 if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
121 __u32 *data = &opt->filter.data[0];
124 icmph = (struct icmp6hdr *) skb->data;
125 bit_nr = icmph->icmp6_type;
127 return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
133 * demultiplex raw sockets.
134 * (should consider queueing the skb in the sock receive_queue
135 * without calling rawv6.c)
137 struct sock * ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
139 struct in6_addr *saddr;
140 struct in6_addr *daddr;
141 struct sock *sk, *sk2;
144 saddr = &skb->nh.ipv6h->saddr;
147 hash = nexthdr & (MAX_INET_PROTOS - 1);
149 read_lock(&raw_v6_lock);
150 sk = raw_v6_htable[hash];
153 * The first socket found will be delivered after
154 * delivery to transport protocols.
160 sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
165 while ((sk2 = __raw_v6_lookup(sk2->next, nexthdr, daddr, saddr))) {
166 struct sk_buff *buff;
168 if (nexthdr == IPPROTO_ICMPV6 &&
169 icmpv6_filter(sk2, skb))
172 buff = skb_clone(skb, GFP_ATOMIC);
174 rawv6_rcv(sk2, buff);
178 if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb))
184 read_unlock(&raw_v6_lock);
188 /* This cleans up af_inet6 a bit. -DaveM */
189 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
191 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
196 if (addr_len < SIN6_LEN_RFC2133)
198 addr_type = ipv6_addr_type(&addr->sin6_addr);
200 /* Raw sockets are IPv6 only */
201 if (addr_type == IPV6_ADDR_MAPPED)
202 return(-EADDRNOTAVAIL);
207 if (sk->state != TCP_CLOSE)
210 if (addr_type & IPV6_ADDR_LINKLOCAL) {
211 if (addr_len >= sizeof(struct sockaddr_in6) &&
212 addr->sin6_scope_id) {
213 /* Override any existing binding, if another one
214 * is supplied by user.
216 sk->bound_dev_if = addr->sin6_scope_id;
219 /* Binding to link-local address requires an interface */
220 if (sk->bound_dev_if == 0)
224 /* Check if the address belongs to the host. */
225 if (addr_type != IPV6_ADDR_ANY) {
226 /* ipv4 addr of the socket is invalid. Only the
227 * unpecified and mapped address have a v4 equivalent.
229 v4addr = LOOPBACK4_IPV6;
230 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
231 err = -EADDRNOTAVAIL;
232 if (!ipv6_chk_addr(&addr->sin6_addr, NULL))
237 sk->rcv_saddr = v4addr;
239 ipv6_addr_copy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr);
240 if (!(addr_type & IPV6_ADDR_MULTICAST))
241 ipv6_addr_copy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr);
248 void rawv6_err(struct sock *sk, struct sk_buff *skb,
249 struct inet6_skb_parm *opt,
250 int type, int code, int offset, u32 info)
255 /* Report error on raw socket, if:
256 1. User requested recverr.
257 2. Socket is connected (otherwise the error indication
258 is useless without recverr and error is hard.
260 if (!sk->net_pinfo.af_inet6.recverr && sk->state != TCP_ESTABLISHED)
263 harderr = icmpv6_err_convert(type, code, &err);
264 if (type == ICMPV6_PKT_TOOBIG)
265 harderr = (sk->net_pinfo.af_inet6.pmtudisc == IPV6_PMTUDISC_DO);
267 if (sk->net_pinfo.af_inet6.recverr) {
268 u8 *payload = skb->data;
269 if (!sk->protinfo.af_inet.hdrincl)
271 ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
274 if (sk->net_pinfo.af_inet6.recverr || harderr) {
276 sk->error_report(sk);
280 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
282 if ((sk->tp_pinfo.tp_raw.checksum
283 #if defined(CONFIG_FILTER)
286 ) && skb->ip_summed != CHECKSUM_UNNECESSARY) {
287 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
288 IP6_INC_STATS_BH(Ip6InDiscards);
292 skb->ip_summed = CHECKSUM_UNNECESSARY;
294 /* Charge it to the socket. */
295 if (sock_queue_rcv_skb(sk,skb)<0) {
296 IP6_INC_STATS_BH(Ip6InDiscards);
301 IP6_INC_STATS_BH(Ip6InDelivers);
306 * This is next to useless...
307 * if we demultiplex in network layer we don't need the extra call
308 * just to queue the skb...
309 * maybe we could have the network decide uppon a hint if it
310 * should call raw_rcv for demultiplexing
312 int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
314 if (!sk->tp_pinfo.tp_raw.checksum)
315 skb->ip_summed = CHECKSUM_UNNECESSARY;
317 if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
318 if (skb->ip_summed == CHECKSUM_HW) {
319 skb->ip_summed = CHECKSUM_UNNECESSARY;
320 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
321 &skb->nh.ipv6h->daddr,
322 skb->len, sk->num, skb->csum)) {
323 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "raw v6 hw csum failure.\n"));
324 skb->ip_summed = CHECKSUM_NONE;
327 if (skb->ip_summed == CHECKSUM_NONE)
328 skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
329 &skb->nh.ipv6h->daddr,
330 skb->len, sk->num, 0);
333 if (sk->protinfo.af_inet.hdrincl) {
334 if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
335 (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
336 IP6_INC_STATS_BH(Ip6InDiscards);
340 skb->ip_summed = CHECKSUM_UNNECESSARY;
343 rawv6_rcv_skb(sk, skb);
349 * This should be easy, if there is something there
350 * we return it, otherwise we block.
353 int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
354 int noblock, int flags, int *addr_len)
356 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
364 *addr_len=sizeof(*sin6);
366 if (flags & MSG_ERRQUEUE)
367 return ipv6_recv_error(sk, msg, len);
369 skb = skb_recv_datagram(sk, flags, noblock, &err);
376 msg->msg_flags |= MSG_TRUNC;
379 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
380 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
381 } else if (msg->msg_flags&MSG_TRUNC) {
382 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
384 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
386 err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
393 /* Copy the address. */
395 sin6->sin6_family = AF_INET6;
396 memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr,
397 sizeof(struct in6_addr));
398 sin6->sin6_flowinfo = 0;
399 sin6->sin6_scope_id = 0;
400 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
401 struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
402 sin6->sin6_scope_id = opt->iif;
406 sock_recv_timestamp(msg, sk, skb);
408 if (sk->net_pinfo.af_inet6.rxopt.all)
409 datagram_recv_ctl(sk, msg, skb);
412 if (flags & MSG_TRUNC)
416 skb_free_datagram(sk, skb);
422 if (flags&MSG_PEEK) {
424 spin_lock_irq(&sk->receive_queue.lock);
425 if (skb == skb_peek(&sk->receive_queue)) {
426 __skb_unlink(skb, &sk->receive_queue);
429 spin_unlock_irq(&sk->receive_queue.lock);
434 /* Error for blocking case is chosen to masquerade
435 as some normal condition.
437 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
438 IP6_INC_STATS_USER(Ip6InDiscards);
446 struct rawv6_fakehdr {
452 struct in6_addr *daddr;
455 static int rawv6_getfrag(const void *data, struct in6_addr *saddr,
456 char *buff, unsigned int offset, unsigned int len)
458 struct iovec *iov = (struct iovec *) data;
460 return memcpy_fromiovecend(buff, iov, offset, len);
463 static int rawv6_frag_cksum(const void *data, struct in6_addr *addr,
464 char *buff, unsigned int offset,
467 struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data;
469 if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset,
475 struct raw6_opt *opt;
476 struct in6_addr *daddr;
479 opt = &sk->tp_pinfo.tp_raw;
486 hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len,
487 hdr->proto, hdr->cksum);
489 if (opt->offset + 1 < len) {
492 csum = (__u16 *) (buff + opt->offset);
494 /* in case cksum was not initialized */
495 __u32 sum = hdr->cksum;
497 *csum = hdr->cksum = (sum + (sum>>16));
503 printk(KERN_DEBUG "icmp: cksum offset too big\n");
511 static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
513 struct ipv6_txoptions opt_space;
514 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
515 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
516 struct ipv6_txoptions *opt = NULL;
517 struct ip6_flowlabel *flowlabel = NULL;
519 int addr_len = msg->msg_namelen;
520 struct in6_addr *daddr;
521 struct raw6_opt *raw_opt;
526 /* Rough check on arithmetic overflow,
527 better check is made in ip6_build_xmit
532 /* Mirror BSD error message compatibility */
533 if (msg->msg_flags & MSG_OOB)
537 * Get and verify the address.
540 fl.fl6_flowlabel = 0;
544 if (addr_len < SIN6_LEN_RFC2133)
547 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
550 /* port is the proto value [0..255] carried in nexthdr */
551 proto = ntohs(sin6->sin6_port);
559 daddr = &sin6->sin6_addr;
561 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
562 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
563 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
564 if (flowlabel == NULL)
566 daddr = &flowlabel->dst;
570 /* Otherwise it will be difficult to maintain sk->dst_cache. */
571 if (sk->state == TCP_ESTABLISHED &&
572 !ipv6_addr_cmp(daddr, &sk->net_pinfo.af_inet6.daddr))
573 daddr = &sk->net_pinfo.af_inet6.daddr;
575 if (addr_len >= sizeof(struct sockaddr_in6) &&
576 sin6->sin6_scope_id &&
577 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
578 fl.oif = sin6->sin6_scope_id;
580 if (sk->state != TCP_ESTABLISHED)
581 return -EDESTADDRREQ;
584 daddr = &(sk->net_pinfo.af_inet6.daddr);
585 fl.fl6_flowlabel = np->flow_label;
588 if (ipv6_addr_any(daddr)) {
590 * unspecfied destination address
591 * treated as error... is this correct ?
597 fl.oif = sk->bound_dev_if;
600 if (msg->msg_controllen) {
602 memset(opt, 0, sizeof(struct ipv6_txoptions));
604 err = datagram_send_ctl(msg, &fl, opt, &hlimit);
606 fl6_sock_release(flowlabel);
609 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
610 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
611 if (flowlabel == NULL)
614 if (!(opt->opt_nflen|opt->opt_flen))
620 opt = fl6_merge_options(&opt_space, flowlabel, opt);
622 raw_opt = &sk->tp_pinfo.tp_raw;
626 if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
627 fl.fl6_src = &np->saddr;
628 fl.uli_u.icmpt.type = 0;
629 fl.uli_u.icmpt.code = 0;
631 if (raw_opt->checksum) {
632 struct rawv6_fakehdr hdr;
634 hdr.iov = msg->msg_iov;
640 if (opt && opt->srcrt)
645 err = ip6_build_xmit(sk, rawv6_frag_cksum, &hdr, &fl, len,
646 opt, hlimit, msg->msg_flags);
648 err = ip6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, &fl, len,
649 opt, hlimit, msg->msg_flags);
652 fl6_sock_release(flowlabel);
654 return err<0?err:len;
657 static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
658 char *optval, int optlen)
662 if (optlen > sizeof(struct icmp6_filter))
663 optlen = sizeof(struct icmp6_filter);
664 if (copy_from_user(&sk->tp_pinfo.tp_raw.filter, optval, optlen))
674 static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
675 char *optval, int *optlen)
681 if (get_user(len, optlen))
685 if (len > sizeof(struct icmp6_filter))
686 len = sizeof(struct icmp6_filter);
687 if (put_user(len, optlen))
689 if (copy_to_user(optval, &sk->tp_pinfo.tp_raw.filter, len))
700 static int rawv6_setsockopt(struct sock *sk, int level, int optname,
701 char *optval, int optlen)
703 struct raw6_opt *opt = &sk->tp_pinfo.tp_raw;
711 if (sk->num != IPPROTO_ICMPV6)
713 return rawv6_seticmpfilter(sk, level, optname, optval,
716 if (optname == IPV6_CHECKSUM)
719 return ipv6_setsockopt(sk, level, optname, optval,
723 if (get_user(val, (int *)optval))
728 /* You may get strange result with a positive odd offset;
729 RFC2292bis agrees with me. */
730 if (val > 0 && (val&1))
743 return(-ENOPROTOOPT);
747 static int rawv6_getsockopt(struct sock *sk, int level, int optname,
748 char *optval, int *optlen)
750 struct raw6_opt *opt = &sk->tp_pinfo.tp_raw;
758 if (sk->num != IPPROTO_ICMPV6)
760 return rawv6_geticmpfilter(sk, level, optname, optval,
763 if (optname == IPV6_CHECKSUM)
766 return ipv6_getsockopt(sk, level, optname, optval,
770 if (get_user(len,optlen))
775 if (opt->checksum == 0)
785 len = min_t(unsigned int, sizeof(int), len);
787 if (put_user(len, optlen))
789 if (copy_to_user(optval,&val,len))
794 static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
799 int amount = atomic_read(&sk->wmem_alloc);
800 return put_user(amount, (int *)arg);
807 spin_lock_irq(&sk->receive_queue.lock);
808 skb = skb_peek(&sk->receive_queue);
810 amount = skb->tail - skb->h.raw;
811 spin_unlock_irq(&sk->receive_queue.lock);
812 return put_user(amount, (int *)arg);
820 static void rawv6_close(struct sock *sk, long timeout)
822 if (sk->num == IPPROTO_RAW)
823 ip6_ra_control(sk, -1, NULL);
825 inet_sock_release(sk);
828 static int rawv6_init_sk(struct sock *sk)
830 if (sk->num == IPPROTO_ICMPV6){
831 struct raw6_opt *opt = &sk->tp_pinfo.tp_raw;
839 #define LINE_FMT "%-190s\n"
841 static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i)
843 struct in6_addr *dest, *src;
846 dest = &sp->net_pinfo.af_inet6.daddr;
847 src = &sp->net_pinfo.af_inet6.rcv_saddr;
851 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
852 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
854 src->s6_addr32[0], src->s6_addr32[1],
855 src->s6_addr32[2], src->s6_addr32[3], srcp,
856 dest->s6_addr32[0], dest->s6_addr32[1],
857 dest->s6_addr32[2], dest->s6_addr32[3], destp,
859 atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
863 atomic_read(&sp->refcnt), sp);
866 int raw6_get_info(char *buffer, char **start, off_t offset, int length)
868 int len = 0, num = 0, i;
871 char tmpbuf[LINE_LEN+2];
873 if (offset < LINE_LEN+1)
874 len += sprintf(buffer, LINE_FMT,
876 "local_address " /* 38 */
877 "remote_address " /* 38 */
878 "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
879 " uid timeout inode"); /* 21 */
883 read_lock(&raw_v6_lock);
884 for (i = 0; i < RAWV6_HTABLE_SIZE; i++) {
887 for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) {
888 if (sk->family != PF_INET6)
893 get_raw6_sock(sk, tmpbuf, i);
894 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
900 read_unlock(&raw_v6_lock);
901 begin = len - (pos - offset);
902 *start = buffer + begin;
911 struct proto rawv6_prot = {
914 connect: udpv6_connect,
915 disconnect: udp_disconnect,
918 destroy: inet6_destroy_sock,
919 setsockopt: rawv6_setsockopt,
920 getsockopt: rawv6_getsockopt,
921 sendmsg: rawv6_sendmsg,
922 recvmsg: rawv6_recvmsg,
924 backlog_rcv: rawv6_rcv_skb,
926 unhash: raw_v6_unhash,