2 * Anycast support for IPv6
3 * Linux INET6 implementation
6 * David L Stevens (dlstevens@us.ibm.com)
8 * based heavily on net/ipv6/mcast.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/types.h>
20 #include <linux/random.h>
21 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/sched.h>
25 #include <linux/net.h>
26 #include <linux/in6.h>
27 #include <linux/netdevice.h>
28 #include <linux/if_arp.h>
29 #include <linux/route.h>
30 #include <linux/init.h>
31 #include <linux/proc_fs.h>
37 #include <net/protocol.h>
38 #include <net/if_inet6.h>
39 #include <net/ndisc.h>
40 #include <net/addrconf.h>
41 #include <net/ip6_route.h>
43 #include <net/checksum.h>
45 /* Big ac list lock for all the sockets */
46 static rwlock_t ipv6_sk_ac_lock = RW_LOCK_UNLOCKED;
48 /* XXX ip6_addr_match() and ip6_onlink() really belong in net/core.c */
51 ip6_addr_match(struct in6_addr *addr1, struct in6_addr *addr2, int prefix)
56 if (prefix > 128 || prefix < 0)
64 mask = htonl(~0 << (32 - prefix));
65 if ((addr1->s6_addr32[i] ^ addr2->s6_addr32[i]) & mask)
75 ip6_onlink(struct in6_addr *addr, struct net_device *dev)
77 struct inet6_dev *idev;
78 struct inet6_ifaddr *ifa;
82 read_lock(&addrconf_lock);
83 idev = __in6_dev_get(dev);
85 read_lock_bh(&idev->lock);
86 for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
87 onlink = ip6_addr_match(addr, &ifa->addr,
92 read_unlock_bh(&idev->lock);
94 read_unlock(&addrconf_lock);
100 * socket join an anycast group
103 int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
105 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
106 struct net_device *dev = NULL;
107 struct inet6_dev *idev;
108 struct ipv6_ac_socklist *pac;
109 int ishost = !ipv6_devconf.forwarding;
112 if (ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)
115 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
118 pac->acl_next = NULL;
119 ipv6_addr_copy(&pac->acl_addr, addr);
124 rt = rt6_lookup(addr, NULL, 0, 0);
128 dst_release(&rt->u.dst);
130 err = -EADDRNOTAVAIL;
133 /* router, no matching interface: just pick one */
135 dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
138 dev = dev_get_by_index(ifindex);
145 idev = in6_dev_get(dev);
150 err = -EADDRNOTAVAIL;
153 /* reset ishost, now that we have a specific device */
154 ishost = !idev->cnf.forwarding;
157 pac->acl_ifindex = dev->ifindex;
160 * For hosts, allow link-local or matching prefix anycasts.
161 * This obviates the need for propagating anycast routes while
162 * still allowing some non-router anycast participation.
164 * allow anyone to join anycasts that don't require a special route
165 * and can't be spoofs of unicast addresses (reserved anycast only)
167 if (!ip6_onlink(addr, dev)) {
169 err = -EADDRNOTAVAIL;
170 else if (!capable(CAP_NET_ADMIN))
174 } else if (!(ipv6_addr_type(addr) & IPV6_ADDR_ANYCAST) &&
175 !capable(CAP_NET_ADMIN)) {
180 err = ipv6_dev_ac_inc(dev, addr);
184 write_lock_bh(&ipv6_sk_ac_lock);
185 pac->acl_next = np->ipv6_ac_list;
186 np->ipv6_ac_list = pac;
187 write_unlock_bh(&ipv6_sk_ac_lock);
196 sock_kfree_s(sk, pac, sizeof(*pac));
201 * socket leave an anycast group
203 int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
205 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
206 struct net_device *dev;
207 struct ipv6_ac_socklist *pac, *prev_pac;
209 write_lock_bh(&ipv6_sk_ac_lock);
211 for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
212 if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
213 ipv6_addr_cmp(&pac->acl_addr, addr) == 0)
218 write_unlock_bh(&ipv6_sk_ac_lock);
222 prev_pac->acl_next = pac->acl_next;
224 np->ipv6_ac_list = pac->acl_next;
226 write_unlock_bh(&ipv6_sk_ac_lock);
228 dev = dev_get_by_index(pac->acl_ifindex);
230 ipv6_dev_ac_dec(dev, &pac->acl_addr);
233 sock_kfree_s(sk, pac, sizeof(*pac));
237 void ipv6_sock_ac_close(struct sock *sk)
239 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
240 struct net_device *dev = 0;
241 struct ipv6_ac_socklist *pac;
244 write_lock_bh(&ipv6_sk_ac_lock);
245 pac = np->ipv6_ac_list;
246 np->ipv6_ac_list = 0;
247 write_unlock_bh(&ipv6_sk_ac_lock);
251 struct ipv6_ac_socklist *next = pac->acl_next;
253 if (pac->acl_ifindex != prev_index) {
256 dev = dev_get_by_index(pac->acl_ifindex);
257 prev_index = pac->acl_ifindex;
260 ipv6_dev_ac_dec(dev, &pac->acl_addr);
261 sock_kfree_s(sk, pac, sizeof(*pac));
268 int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
270 struct ipv6_ac_socklist *pac;
271 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
275 read_lock(&ipv6_sk_ac_lock);
276 for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
277 if (ifindex && pac->acl_ifindex != ifindex)
279 found = ipv6_addr_cmp(&pac->acl_addr, addr) == 0;
283 read_unlock(&ipv6_sk_ac_lock);
288 static void aca_put(struct ifacaddr6 *ac)
290 if (atomic_dec_and_test(&ac->aca_refcnt)) {
291 in6_dev_put(ac->aca_idev);
297 * device anycast group inc (add if not found)
299 int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
301 struct ifacaddr6 *aca;
302 struct inet6_dev *idev;
304 idev = in6_dev_get(dev);
309 write_lock_bh(&idev->lock);
311 write_unlock_bh(&idev->lock);
316 for (aca = idev->ac_list; aca; aca = aca->aca_next) {
317 if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) {
319 write_unlock_bh(&idev->lock);
326 * not found: create a new one.
329 aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
332 write_unlock_bh(&idev->lock);
337 memset(aca, 0, sizeof(struct ifacaddr6));
339 ipv6_addr_copy(&aca->aca_addr, addr);
340 aca->aca_idev = idev;
342 atomic_set(&aca->aca_refcnt, 2);
343 aca->aca_lock = SPIN_LOCK_UNLOCKED;
345 aca->aca_next = idev->ac_list;
347 write_unlock_bh(&idev->lock);
349 ip6_rt_addr_add(&aca->aca_addr, dev);
351 addrconf_join_solict(dev, &aca->aca_addr);
358 * device anycast group decrement
360 int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
362 struct inet6_dev *idev;
363 struct ifacaddr6 *aca, *prev_aca;
365 idev = in6_dev_get(dev);
369 write_lock_bh(&idev->lock);
371 for (aca = idev->ac_list; aca; aca = aca->aca_next) {
372 if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0)
377 write_unlock_bh(&idev->lock);
381 if (--aca->aca_users > 0) {
382 write_unlock_bh(&idev->lock);
387 prev_aca->aca_next = aca->aca_next;
389 idev->ac_list = aca->aca_next;
390 write_unlock_bh(&idev->lock);
391 addrconf_leave_solict(dev, &aca->aca_addr);
393 ip6_rt_addr_del(&aca->aca_addr, dev);
401 * check if the interface has this anycast address
403 static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
405 struct inet6_dev *idev;
406 struct ifacaddr6 *aca;
408 idev = in6_dev_get(dev);
410 read_lock_bh(&idev->lock);
411 for (aca = idev->ac_list; aca; aca = aca->aca_next)
412 if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0)
414 read_unlock_bh(&idev->lock);
422 * check if given interface (or any, if dev==0) has this anycast address
424 int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
427 return ipv6_chk_acast_dev(dev, addr);
428 read_lock(&dev_base_lock);
429 for (dev=dev_base; dev; dev=dev->next)
430 if (ipv6_chk_acast_dev(dev, addr))
432 read_unlock(&dev_base_lock);
437 #ifdef CONFIG_PROC_FS
438 int anycast6_get_info(char *buffer, char **start, off_t offset, int length)
440 off_t pos=0, begin=0;
441 struct ifacaddr6 *im;
443 struct net_device *dev;
445 read_lock(&dev_base_lock);
446 for (dev = dev_base; dev; dev = dev->next) {
447 struct inet6_dev *idev;
449 if ((idev = in6_dev_get(dev)) == NULL)
452 read_lock_bh(&idev->lock);
453 for (im = idev->ac_list; im; im = im->aca_next) {
456 len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, dev->name);
459 len += sprintf(buffer+len, "%02x", im->aca_addr.s6_addr[i]);
461 len += sprintf(buffer+len, " %5d\n", im->aca_users);
468 if (pos > offset+length) {
469 read_unlock_bh(&idev->lock);
474 read_unlock_bh(&idev->lock);
479 read_unlock(&dev_base_lock);
481 *start=buffer+(offset-begin);