2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
8 * Version: $Id: ip_sockglue.c,v 1.1.1.1 2005/04/11 02:51:13 jack Exp $
13 * Many : Split from ip.c , see ip.c for history.
14 * Martin Mares : TOS setting fixed.
15 * Alan Cox : Fixed a couple of oopses in Martin's
17 * Mike McLagan : Routing by source
20 #include <linux/config.h>
21 #include <linux/types.h>
23 #include <linux/sched.h>
24 #include <linux/skbuff.h>
26 #include <linux/icmp.h>
27 #include <linux/netdevice.h>
32 #include <linux/tcp.h>
33 #include <linux/udp.h>
34 #include <linux/igmp.h>
35 #include <linux/netfilter.h>
36 #include <linux/route.h>
37 #include <linux/mroute.h>
38 #include <net/route.h>
39 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
40 #include <net/transp_v6.h>
43 #include <linux/errqueue.h>
44 #include <asm/uaccess.h>
46 #define IP_CMSG_PKTINFO 1
49 #define IP_CMSG_RECVOPTS 8
50 #define IP_CMSG_RETOPTS 16
53 * SOL_IP control messages.
56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
58 struct in_pktinfo info;
59 struct rtable *rt = (struct rtable *)skb->dst;
61 info.ipi_addr.s_addr = skb->nh.iph->daddr;
63 info.ipi_ifindex = rt->rt_iif;
64 info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
67 info.ipi_spec_dst.s_addr = 0;
70 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
73 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
75 int ttl = skb->nh.iph->ttl;
76 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
79 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
81 put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
84 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
86 if (IPCB(skb)->opt.optlen == 0)
89 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
93 void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
95 unsigned char optbuf[sizeof(struct ip_options) + 40];
96 struct ip_options * opt = (struct ip_options*)optbuf;
98 if (IPCB(skb)->opt.optlen == 0)
101 if (ip_options_echo(opt, skb)) {
102 msg->msg_flags |= MSG_CTRUNC;
105 ip_options_undo(opt);
107 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
111 void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
113 unsigned flags = skb->sk->protinfo.af_inet.cmsg_flags;
115 /* Ordered by supposed usage frequency */
117 ip_cmsg_recv_pktinfo(msg, skb);
118 if ((flags>>=1) == 0)
122 ip_cmsg_recv_ttl(msg, skb);
123 if ((flags>>=1) == 0)
127 ip_cmsg_recv_tos(msg, skb);
128 if ((flags>>=1) == 0)
132 ip_cmsg_recv_opts(msg, skb);
133 if ((flags>>=1) == 0)
137 ip_cmsg_recv_retopts(msg, skb);
140 int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
143 struct cmsghdr *cmsg;
145 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
146 if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
147 (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
148 + cmsg->cmsg_len) > msg->msg_controllen) {
151 if (cmsg->cmsg_level != SOL_IP)
153 switch (cmsg->cmsg_type) {
155 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
156 err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
162 struct in_pktinfo *info;
163 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
165 info = (struct in_pktinfo *)CMSG_DATA(cmsg);
166 ipc->oif = info->ipi_ifindex;
167 ipc->addr = info->ipi_spec_dst.s_addr;
178 /* Special input handler for packets catched by router alert option.
179 They are selected only by protocol field, and then processed likely
180 local ones; but only if someone wants them! Otherwise, router
181 not running rsvpd will kill RSVP.
183 It is user level problem, what it will make with them.
184 I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
185 but receiver should be enough clever f.e. to forward mtrace requests,
186 sent to multicast group to reach destination designated router.
188 struct ip_ra_chain *ip_ra_chain;
189 rwlock_t ip_ra_lock = RW_LOCK_UNLOCKED;
191 int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *))
193 struct ip_ra_chain *ra, *new_ra, **rap;
195 if (sk->type != SOCK_RAW || sk->num == IPPROTO_RAW)
198 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
200 write_lock_bh(&ip_ra_lock);
201 for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
204 write_unlock_bh(&ip_ra_lock);
210 write_unlock_bh(&ip_ra_lock);
219 if (new_ra == NULL) {
220 write_unlock_bh(&ip_ra_lock);
224 new_ra->destructor = destructor;
229 write_unlock_bh(&ip_ra_lock);
234 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
235 u16 port, u32 info, u8 *payload)
237 struct sock_exterr_skb *serr;
239 if (!sk->protinfo.af_inet.recverr)
242 skb = skb_clone(skb, GFP_ATOMIC);
246 serr = SKB_EXT_ERR(skb);
247 serr->ee.ee_errno = err;
248 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
249 serr->ee.ee_type = skb->h.icmph->type;
250 serr->ee.ee_code = skb->h.icmph->code;
252 serr->ee.ee_info = info;
253 serr->ee.ee_data = 0;
254 serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
257 skb->h.raw = payload;
258 if (!skb_pull(skb, payload - skb->data) ||
259 sock_queue_err_skb(sk, skb))
263 void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info)
265 struct sock_exterr_skb *serr;
269 if (!sk->protinfo.af_inet.recverr)
272 skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
276 iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
280 serr = SKB_EXT_ERR(skb);
281 serr->ee.ee_errno = err;
282 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
283 serr->ee.ee_type = 0;
284 serr->ee.ee_code = 0;
286 serr->ee.ee_info = info;
287 serr->ee.ee_data = 0;
288 serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
291 skb->h.raw = skb->tail;
292 __skb_pull(skb, skb->tail - skb->data);
294 if (sock_queue_err_skb(sk, skb))
299 * Handle MSG_ERRQUEUE
301 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
303 struct sock_exterr_skb *serr;
304 struct sk_buff *skb, *skb2;
305 struct sockaddr_in *sin;
307 struct sock_extended_err ee;
308 struct sockaddr_in offender;
314 skb = skb_dequeue(&sk->error_queue);
320 msg->msg_flags |= MSG_TRUNC;
323 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
327 sock_recv_timestamp(msg, sk, skb);
329 serr = SKB_EXT_ERR(skb);
331 sin = (struct sockaddr_in *)msg->msg_name;
333 sin->sin_family = AF_INET;
334 sin->sin_addr.s_addr = *(u32*)(skb->nh.raw + serr->addr_offset);
335 sin->sin_port = serr->port;
336 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
339 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
340 sin = &errhdr.offender;
341 sin->sin_family = AF_UNSPEC;
342 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
343 sin->sin_family = AF_INET;
344 sin->sin_addr.s_addr = skb->nh.iph->saddr;
346 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
347 if (sk->protinfo.af_inet.cmsg_flags)
348 ip_cmsg_recv(msg, skb);
351 put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
353 /* Now we could try to dump offended packet options */
355 msg->msg_flags |= MSG_ERRQUEUE;
358 /* Reset and regenerate socket error */
359 spin_lock_irq(&sk->error_queue.lock);
361 if ((skb2 = skb_peek(&sk->error_queue)) != NULL) {
362 sk->err = SKB_EXT_ERR(skb2)->ee.ee_errno;
363 spin_unlock_irq(&sk->error_queue.lock);
364 sk->error_report(sk);
366 spin_unlock_irq(&sk->error_queue.lock);
377 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
381 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
388 if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
389 (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
390 (1<<IP_RETOPTS) | (1<<IP_TOS) |
391 (1<<IP_TTL) | (1<<IP_HDRINCL) |
392 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
393 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND))) ||
394 optname == IP_MULTICAST_TTL ||
395 optname == IP_MULTICAST_LOOP) {
396 if (optlen >= sizeof(int)) {
397 if (get_user(val, (int *) optval))
399 } else if (optlen >= sizeof(char)) {
402 if (get_user(ucval, (unsigned char *) optval))
408 /* If optlen==0, it is equivalent to val == 0 */
410 #ifdef CONFIG_IP_MROUTE
411 if (optname >= MRT_BASE && optname <= (MRT_BASE + 10))
412 return ip_mroute_setsockopt(sk,optname,optval,optlen);
421 struct ip_options * opt = NULL;
422 if (optlen > 40 || optlen < 0)
424 err = ip_options_get(&opt, optval, optlen, 1);
427 if (sk->type == SOCK_STREAM) {
428 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
429 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
430 if (sk->family == PF_INET ||
431 (!((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE))
432 && sk->daddr != LOOPBACK4_IPV6)) {
435 tp->ext_header_len = opt->optlen;
436 tcp_sync_mss(sk, tp->pmtu_cookie);
437 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
441 opt = xchg(&sk->protinfo.af_inet.opt, opt);
448 sk->protinfo.af_inet.cmsg_flags |= IP_CMSG_PKTINFO;
450 sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_PKTINFO;
454 sk->protinfo.af_inet.cmsg_flags |= IP_CMSG_TTL;
456 sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_TTL;
460 sk->protinfo.af_inet.cmsg_flags |= IP_CMSG_TOS;
462 sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_TOS;
466 sk->protinfo.af_inet.cmsg_flags |= IP_CMSG_RECVOPTS;
468 sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_RECVOPTS;
472 sk->protinfo.af_inet.cmsg_flags |= IP_CMSG_RETOPTS;
474 sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_RETOPTS;
476 case IP_TOS: /* This sets both TOS and Precedence */
477 if (sk->type == SOCK_STREAM) {
479 val |= sk->protinfo.af_inet.tos & 3;
481 if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
482 !capable(CAP_NET_ADMIN)) {
486 if (sk->protinfo.af_inet.tos != val) {
487 sk->protinfo.af_inet.tos=val;
488 sk->priority = rt_tos2priority(val);
496 val = sysctl_ip_default_ttl;
499 sk->protinfo.af_inet.ttl=val;
502 if(sk->type!=SOCK_RAW) {
506 sk->protinfo.af_inet.hdrincl=val?1:0;
508 case IP_MTU_DISCOVER:
511 sk->protinfo.af_inet.pmtudisc = val;
514 sk->protinfo.af_inet.recverr = !!val;
516 skb_queue_purge(&sk->error_queue);
518 case IP_MULTICAST_TTL:
519 if (sk->type == SOCK_STREAM)
525 if (val < 0 || val > 255)
527 sk->protinfo.af_inet.mc_ttl=val;
529 case IP_MULTICAST_LOOP:
532 sk->protinfo.af_inet.mc_loop = val ? 1 : 0;
534 case IP_MULTICAST_IF:
536 struct ip_mreqn mreq;
537 struct net_device *dev = NULL;
539 if (sk->type == SOCK_STREAM)
542 * Check the arguments are allowable
546 if (optlen >= sizeof(struct ip_mreqn)) {
547 if (copy_from_user(&mreq,optval,sizeof(mreq)))
550 memset(&mreq, 0, sizeof(mreq));
551 if (optlen >= sizeof(struct in_addr) &&
552 copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
556 if (!mreq.imr_ifindex) {
557 if (mreq.imr_address.s_addr == INADDR_ANY) {
558 sk->protinfo.af_inet.mc_index = 0;
559 sk->protinfo.af_inet.mc_addr = 0;
563 dev = ip_dev_find(mreq.imr_address.s_addr);
565 mreq.imr_ifindex = dev->ifindex;
569 dev = __dev_get_by_index(mreq.imr_ifindex);
572 err = -EADDRNOTAVAIL;
577 if (sk->bound_dev_if && mreq.imr_ifindex != sk->bound_dev_if)
580 sk->protinfo.af_inet.mc_index = mreq.imr_ifindex;
581 sk->protinfo.af_inet.mc_addr = mreq.imr_address.s_addr;
586 case IP_ADD_MEMBERSHIP:
587 case IP_DROP_MEMBERSHIP:
589 struct ip_mreqn mreq;
591 if (optlen < sizeof(struct ip_mreq))
594 if (optlen >= sizeof(struct ip_mreqn)) {
595 if(copy_from_user(&mreq,optval,sizeof(mreq)))
598 memset(&mreq, 0, sizeof(mreq));
599 if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
603 if (optname == IP_ADD_MEMBERSHIP)
604 err = ip_mc_join_group(sk,&mreq);
606 err = ip_mc_leave_group(sk,&mreq);
609 case IP_ROUTER_ALERT:
610 err = ip_ra_control(sk, val ? 1 : 0, NULL);
616 sk->protinfo.af_inet.freebind = !!val;
620 #ifdef CONFIG_NETFILTER
621 err = nf_setsockopt(sk, PF_INET, optname, optval,
637 * Get the options. Note for future reference. The GET of IP options gets the
638 * _received_ ones. The set sets the _sent_ ones.
641 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
649 #ifdef CONFIG_IP_MROUTE
650 if(optname>=MRT_BASE && optname <=MRT_BASE+10)
652 return ip_mroute_getsockopt(sk,optname,optval,optlen);
656 if(get_user(len,optlen))
666 unsigned char optbuf[sizeof(struct ip_options)+40];
667 struct ip_options * opt = (struct ip_options*)optbuf;
669 if (sk->protinfo.af_inet.opt)
670 memcpy(optbuf, sk->protinfo.af_inet.opt,
671 sizeof(struct ip_options)+
672 sk->protinfo.af_inet.opt->optlen);
675 if (opt->optlen == 0)
676 return put_user(0, optlen);
678 ip_options_undo(opt);
680 len = min_t(unsigned int, len, opt->optlen);
681 if(put_user(len, optlen))
683 if(copy_to_user(optval, opt->__data, len))
688 val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_PKTINFO) != 0;
691 val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_TTL) != 0;
694 val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_TOS) != 0;
697 val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_RECVOPTS) != 0;
700 val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_RETOPTS) != 0;
703 val=sk->protinfo.af_inet.tos;
706 val=sk->protinfo.af_inet.ttl;
709 val=sk->protinfo.af_inet.hdrincl;
711 case IP_MTU_DISCOVER:
712 val=sk->protinfo.af_inet.pmtudisc;
716 struct dst_entry *dst;
718 dst = sk_dst_get(sk);
730 val=sk->protinfo.af_inet.recverr;
732 case IP_MULTICAST_TTL:
733 val=sk->protinfo.af_inet.mc_ttl;
735 case IP_MULTICAST_LOOP:
736 val=sk->protinfo.af_inet.mc_loop;
738 case IP_MULTICAST_IF:
741 len = min_t(unsigned int, len, sizeof(struct in_addr));
742 addr.s_addr = sk->protinfo.af_inet.mc_addr;
745 if(put_user(len, optlen))
747 if(copy_to_user((void *)optval, &addr, len))
757 if (sk->type != SOCK_STREAM)
760 msg.msg_control = optval;
761 msg.msg_controllen = len;
764 if (sk->protinfo.af_inet.cmsg_flags&IP_CMSG_PKTINFO) {
765 struct in_pktinfo info;
767 info.ipi_addr.s_addr = sk->rcv_saddr;
768 info.ipi_spec_dst.s_addr = sk->rcv_saddr;
769 info.ipi_ifindex = sk->protinfo.af_inet.mc_index;
770 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
772 if (sk->protinfo.af_inet.cmsg_flags&IP_CMSG_TTL) {
773 int hlim = sk->protinfo.af_inet.mc_ttl;
774 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
776 len -= msg.msg_controllen;
777 return put_user(len, optlen);
780 val = sk->protinfo.af_inet.freebind;
783 #ifdef CONFIG_NETFILTER
784 val = nf_getsockopt(sk, PF_INET, optname, optval,
788 val = put_user(len, optlen);
797 if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
798 unsigned char ucval = (unsigned char)val;
800 if(put_user(len, optlen))
802 if(copy_to_user(optval,&ucval,1))
805 len = min_t(unsigned int, sizeof(int), len);
806 if(put_user(len, optlen))
808 if(copy_to_user(optval,&val,len))