2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
73 * The ICMP socket(s). This is the most convenient way to flow control
74 * our ICMP output as well as maintain a clean interface throughout
75 * all layers. All Socketless IP sends will soon be gone.
77 * On SMP we have one ICMP socket per-cpu.
79 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
80 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
82 static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
84 static struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv,
86 .flags = INET6_PROTO_FINAL,
89 static __inline__ int icmpv6_xmit_lock(void)
93 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
94 /* This can happen if the output path (f.e. SIT or
95 * ip6ip6 tunnel) signals dst_link_failure() for an
96 * outgoing ICMP6 packet.
104 static __inline__ void icmpv6_xmit_unlock(void)
106 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
110 * Slightly more convenient version of icmpv6_send.
112 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
114 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
119 * Figure out, may we reply to this packet with icmp error.
121 * We do not reply, if:
122 * - it was icmp error message.
123 * - it is truncated, so that it is known, that protocol is ICMPV6
124 * (i.e. in the middle of some exthdr)
129 static int is_ineligible(struct sk_buff *skb)
131 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
132 int len = skb->len - ptr;
133 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
138 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
141 if (nexthdr == IPPROTO_ICMPV6) {
143 tp = skb_header_pointer(skb,
144 ptr+offsetof(struct icmp6hdr, icmp6_type),
145 sizeof(_type), &_type);
147 !(*tp & ICMPV6_INFOMSG_MASK))
153 static int sysctl_icmpv6_time = 1*HZ;
156 * Check the ICMP output rate limit
158 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
161 struct dst_entry *dst;
164 /* Informational messages are not limited. */
165 if (type & ICMPV6_INFOMSG_MASK)
168 /* Do not limit pmtu discovery, it would break it. */
169 if (type == ICMPV6_PKT_TOOBIG)
173 * Look up the output route.
174 * XXX: perhaps the expire for routing entries cloned by
175 * this lookup should be more aggressive (not longer than timeout).
177 dst = ip6_route_output(sk, fl);
179 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
180 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
183 struct rt6_info *rt = (struct rt6_info *)dst;
184 int tmo = sysctl_icmpv6_time;
186 /* Give more bandwidth to wider prefixes. */
187 if (rt->rt6i_dst.plen < 128)
188 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
190 res = xrlim_allow(dst, tmo);
197 * an inline helper for the "simple" if statement below
198 * checks if parameter problem report is caused by an
199 * unrecognized IPv6 option that has the Option Type
200 * highest-order two bits set to 10
203 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
207 offset += skb->nh.raw - skb->data;
208 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
211 return (*op & 0xC0) == 0x80;
214 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
217 struct icmp6hdr *icmp6h;
220 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
223 icmp6h = (struct icmp6hdr*) skb->h.raw;
224 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
225 icmp6h->icmp6_cksum = 0;
227 if (skb_queue_len(&sk->sk_write_queue) == 1) {
228 skb->csum = csum_partial((char *)icmp6h,
229 sizeof(struct icmp6hdr), skb->csum);
230 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
237 skb_queue_walk(&sk->sk_write_queue, skb) {
238 tmp_csum = csum_add(tmp_csum, skb->csum);
241 tmp_csum = csum_partial((char *)icmp6h,
242 sizeof(struct icmp6hdr), tmp_csum);
243 tmp_csum = csum_ipv6_magic(&fl->fl6_src,
245 len, fl->proto, tmp_csum);
246 icmp6h->icmp6_cksum = tmp_csum;
248 if (icmp6h->icmp6_cksum == 0)
249 icmp6h->icmp6_cksum = -1;
250 ip6_push_pending_frames(sk);
260 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
262 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
263 struct sk_buff *org_skb = msg->skb;
266 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
268 skb->csum = csum_block_add(skb->csum, csum, odd);
273 * Send an ICMP message in response to a packet in error
275 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
276 struct net_device *dev)
278 struct inet6_dev *idev = NULL;
279 struct ipv6hdr *hdr = skb->nh.ipv6h;
281 struct ipv6_pinfo *np;
282 struct in6_addr *saddr = NULL;
283 struct dst_entry *dst;
284 struct icmp6hdr tmp_hdr;
286 struct icmpv6_msg msg;
293 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
297 * Make sure we respect the rules
298 * i.e. RFC 1885 2.4(e)
299 * Rule (e.1) is enforced by not using icmpv6_send
300 * in any code that processes icmp errors.
302 addr_type = ipv6_addr_type(&hdr->daddr);
304 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
311 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
312 if (type != ICMPV6_PKT_TOOBIG &&
313 !(type == ICMPV6_PARAMPROB &&
314 code == ICMPV6_UNK_OPTION &&
315 (opt_unrec(skb, info))))
321 addr_type = ipv6_addr_type(&hdr->saddr);
327 if (addr_type & IPV6_ADDR_LINKLOCAL)
328 iif = skb->dev->ifindex;
331 * Must not send if we know that source is Anycast also.
332 * for now we don't know that.
334 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
335 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
340 * Never answer to a ICMP packet.
342 if (is_ineligible(skb)) {
343 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
347 memset(&fl, 0, sizeof(fl));
348 fl.proto = IPPROTO_ICMPV6;
349 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
351 ipv6_addr_copy(&fl.fl6_src, saddr);
353 fl.fl_icmp_type = type;
354 fl.fl_icmp_code = code;
356 if (icmpv6_xmit_lock())
359 sk = icmpv6_socket->sk;
362 if (!icmpv6_xrlim_allow(sk, type, &fl))
365 tmp_hdr.icmp6_type = type;
366 tmp_hdr.icmp6_code = code;
367 tmp_hdr.icmp6_cksum = 0;
368 tmp_hdr.icmp6_pointer = htonl(info);
370 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
371 fl.oif = np->mcast_oif;
373 err = ip6_dst_lookup(sk, &dst, &fl);
376 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
377 goto out_dst_release;
379 if (ipv6_addr_is_multicast(&fl.fl6_dst))
380 hlimit = np->mcast_hops;
382 hlimit = np->hop_limit;
384 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
386 hlimit = ipv6_get_hoplimit(dst->dev);
388 tclass = np->cork.tclass;
393 msg.offset = skb->nh.raw - skb->data;
395 len = skb->len - msg.offset;
396 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
398 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
399 goto out_dst_release;
402 idev = in6_dev_get(skb->dev);
404 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
405 len + sizeof(struct icmp6hdr),
406 sizeof(struct icmp6hdr),
407 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
410 ip6_flush_pending_frames(sk);
413 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
415 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
416 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
417 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
420 if (likely(idev != NULL))
425 icmpv6_xmit_unlock();
428 static void icmpv6_echo_reply(struct sk_buff *skb)
431 struct inet6_dev *idev;
432 struct ipv6_pinfo *np;
433 struct in6_addr *saddr = NULL;
434 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
435 struct icmp6hdr tmp_hdr;
437 struct icmpv6_msg msg;
438 struct dst_entry *dst;
443 saddr = &skb->nh.ipv6h->daddr;
445 if (!ipv6_unicast_destination(skb))
448 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
449 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
451 memset(&fl, 0, sizeof(fl));
452 fl.proto = IPPROTO_ICMPV6;
453 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
455 ipv6_addr_copy(&fl.fl6_src, saddr);
456 fl.oif = skb->dev->ifindex;
457 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
459 if (icmpv6_xmit_lock())
462 sk = icmpv6_socket->sk;
465 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
466 fl.oif = np->mcast_oif;
468 err = ip6_dst_lookup(sk, &dst, &fl);
471 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
472 goto out_dst_release;
474 if (ipv6_addr_is_multicast(&fl.fl6_dst))
475 hlimit = np->mcast_hops;
477 hlimit = np->hop_limit;
479 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
481 hlimit = ipv6_get_hoplimit(dst->dev);
483 tclass = np->cork.tclass;
487 idev = in6_dev_get(skb->dev);
492 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
493 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
494 (struct rt6_info*)dst, MSG_DONTWAIT);
497 ip6_flush_pending_frames(sk);
500 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
502 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
503 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
506 if (likely(idev != NULL))
511 icmpv6_xmit_unlock();
514 static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
516 struct in6_addr *saddr, *daddr;
517 struct inet6_protocol *ipprot;
523 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
526 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
527 if (ipv6_ext_hdr(nexthdr)) {
528 /* now skip over extension headers */
529 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
533 inner_offset = sizeof(struct ipv6hdr);
536 /* Checkin header including 8 bytes of inner protocol header. */
537 if (!pskb_may_pull(skb, inner_offset+8))
540 saddr = &skb->nh.ipv6h->saddr;
541 daddr = &skb->nh.ipv6h->daddr;
543 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
544 Without this we will not able f.e. to make source routed
546 Corresponding argument (opt) to notifiers is already added.
550 hash = nexthdr & (MAX_INET_PROTOS - 1);
553 ipprot = rcu_dereference(inet6_protos[hash]);
554 if (ipprot && ipprot->err_handler)
555 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
558 read_lock(&raw_v6_lock);
559 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
560 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
562 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
566 read_unlock(&raw_v6_lock);
570 * Handle icmp messages
573 static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
575 struct sk_buff *skb = *pskb;
576 struct net_device *dev = skb->dev;
577 struct inet6_dev *idev = __in6_dev_get(dev);
578 struct in6_addr *saddr, *daddr;
579 struct ipv6hdr *orig_hdr;
580 struct icmp6hdr *hdr;
583 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
585 saddr = &skb->nh.ipv6h->saddr;
586 daddr = &skb->nh.ipv6h->daddr;
588 /* Perform checksum. */
589 if (skb->ip_summed == CHECKSUM_HW) {
590 skb->ip_summed = CHECKSUM_UNNECESSARY;
591 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
593 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
594 skb->ip_summed = CHECKSUM_NONE;
597 if (skb->ip_summed == CHECKSUM_NONE) {
598 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
599 skb_checksum(skb, 0, skb->len, 0))) {
600 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
601 NIP6(*saddr), NIP6(*daddr));
606 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
609 hdr = (struct icmp6hdr *) skb->h.raw;
611 type = hdr->icmp6_type;
613 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
614 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
615 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
616 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
619 case ICMPV6_ECHO_REQUEST:
620 icmpv6_echo_reply(skb);
623 case ICMPV6_ECHO_REPLY:
624 /* we couldn't care less */
627 case ICMPV6_PKT_TOOBIG:
628 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
629 standard destination cache. Seems, only "advanced"
630 destination cache will allow to solve this problem
633 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
635 hdr = (struct icmp6hdr *) skb->h.raw;
636 orig_hdr = (struct ipv6hdr *) (hdr + 1);
637 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
638 ntohl(hdr->icmp6_mtu));
641 * Drop through to notify
644 case ICMPV6_DEST_UNREACH:
645 case ICMPV6_TIME_EXCEED:
646 case ICMPV6_PARAMPROB:
647 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
650 case NDISC_ROUTER_SOLICITATION:
651 case NDISC_ROUTER_ADVERTISEMENT:
652 case NDISC_NEIGHBOUR_SOLICITATION:
653 case NDISC_NEIGHBOUR_ADVERTISEMENT:
658 case ICMPV6_MGM_QUERY:
659 igmp6_event_query(skb);
662 case ICMPV6_MGM_REPORT:
663 igmp6_event_report(skb);
666 case ICMPV6_MGM_REDUCTION:
667 case ICMPV6_NI_QUERY:
668 case ICMPV6_NI_REPLY:
669 case ICMPV6_MLD2_REPORT:
670 case ICMPV6_DHAAD_REQUEST:
671 case ICMPV6_DHAAD_REPLY:
672 case ICMPV6_MOBILE_PREFIX_SOL:
673 case ICMPV6_MOBILE_PREFIX_ADV:
677 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
680 if (type & ICMPV6_INFOMSG_MASK)
684 * error of unknown type.
685 * must pass to upper level
688 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
694 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
699 int __init icmpv6_init(struct net_proto_family *ops)
704 for (i = 0; i < NR_CPUS; i++) {
705 if (!cpu_possible(i))
708 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
709 &per_cpu(__icmpv6_socket, i));
712 "Failed to initialize the ICMP6 control socket "
718 sk = per_cpu(__icmpv6_socket, i)->sk;
719 sk->sk_allocation = GFP_ATOMIC;
721 /* Enough space for 2 64K ICMP packets, including
722 * sk_buff struct overhead.
725 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
727 sk->sk_prot->unhash(sk);
731 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
732 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
740 for (j = 0; j < i; j++) {
741 if (!cpu_possible(j))
743 sock_release(per_cpu(__icmpv6_socket, j));
749 void icmpv6_cleanup(void)
753 for (i = 0; i < NR_CPUS; i++) {
754 if (!cpu_possible(i))
756 sock_release(per_cpu(__icmpv6_socket, i));
758 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
761 static struct icmp6_err {
769 { /* ADM_PROHIBITED */
773 { /* Was NOT_NEIGHBOUR, now reserved */
787 int icmpv6_err_convert(int type, int code, int *err)
794 case ICMPV6_DEST_UNREACH:
796 if (code <= ICMPV6_PORT_UNREACH) {
797 *err = tab_unreach[code].err;
798 fatal = tab_unreach[code].fatal;
802 case ICMPV6_PKT_TOOBIG:
806 case ICMPV6_PARAMPROB:
811 case ICMPV6_TIME_EXCEED:
820 ctl_table ipv6_icmp_table[] = {
822 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
823 .procname = "ratelimit",
824 .data = &sysctl_icmpv6_time,
825 .maxlen = sizeof(int),
827 .proc_handler = &proc_dointvec