more changes on original files
[linux-2.4.git] / net / ipv6 / anycast.c
1 /*
2  *      Anycast support for IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      David L Stevens (dlstevens@us.ibm.com)
7  *
8  *      based heavily on net/ipv6/mcast.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/types.h>
20 #include <linux/random.h>
21 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/sched.h>
25 #include <linux/net.h>
26 #include <linux/in6.h>
27 #include <linux/netdevice.h>
28 #include <linux/if_arp.h>
29 #include <linux/route.h>
30 #include <linux/init.h>
31 #include <linux/proc_fs.h>
32
33 #include <net/sock.h>
34 #include <net/snmp.h>
35
36 #include <net/ipv6.h>
37 #include <net/protocol.h>
38 #include <net/if_inet6.h>
39 #include <net/ndisc.h>
40 #include <net/addrconf.h>
41 #include <net/ip6_route.h>
42
43 #include <net/checksum.h>
44
45 /* Big ac list lock for all the sockets */
46 static rwlock_t ipv6_sk_ac_lock = RW_LOCK_UNLOCKED;
47
48 /* XXX ip6_addr_match() and ip6_onlink() really belong in net/core.c */
49
50 static int
51 ip6_addr_match(struct in6_addr *addr1, struct in6_addr *addr2, int prefix)
52 {
53         __u32   mask;
54         int     i;
55
56         if (prefix > 128 || prefix < 0)
57                 return 0;
58         if (prefix == 0)
59                 return 1;
60         for (i=0; i<4; ++i) {
61                 if (prefix >= 32)
62                         mask = ~0;
63                 else
64                         mask = htonl(~0 << (32 - prefix));
65                 if ((addr1->s6_addr32[i] ^ addr2->s6_addr32[i]) & mask)
66                         return 0;
67                 prefix -= 32;
68                 if (prefix <= 0)
69                         break;
70         }
71         return 1;
72 }
73
74 static int
75 ip6_onlink(struct in6_addr *addr, struct net_device *dev)
76 {
77         struct inet6_dev        *idev;
78         struct inet6_ifaddr     *ifa;
79         int     onlink;
80
81         onlink = 0;
82         read_lock(&addrconf_lock);
83         idev = __in6_dev_get(dev);
84         if (idev) {
85                 read_lock_bh(&idev->lock);
86                 for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
87                         onlink = ip6_addr_match(addr, &ifa->addr,
88                                         ifa->prefix_len);
89                         if (onlink)
90                                 break;
91                 }
92                 read_unlock_bh(&idev->lock);
93         }
94         read_unlock(&addrconf_lock);
95         return onlink;
96 }
97
98
99 /*
100  *      socket join an anycast group
101  */
102
103 int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
104 {
105         struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
106         struct net_device *dev = NULL;
107         struct inet6_dev *idev;
108         struct ipv6_ac_socklist *pac;
109         int     ishost = !ipv6_devconf.forwarding;
110         int     err = 0;
111
112         if (ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)
113                 return -EINVAL;
114
115         pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
116         if (pac == NULL)
117                 return -ENOMEM;
118         pac->acl_next = NULL;
119         ipv6_addr_copy(&pac->acl_addr, addr);
120
121         if (ifindex == 0) {
122                 struct rt6_info *rt;
123
124                 rt = rt6_lookup(addr, NULL, 0, 0);
125                 if (rt) {
126                         dev = rt->rt6i_dev;
127                         dev_hold(dev);
128                         dst_release(&rt->u.dst);
129                 } else if (ishost) {
130                         err = -EADDRNOTAVAIL;
131                         goto out_free_pac;
132                 } else {
133                         /* router, no matching interface: just pick one */
134
135                         dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
136                 }
137         } else
138                 dev = dev_get_by_index(ifindex);
139
140         if (dev == NULL) {
141                 err = -ENODEV;
142                 goto out_free_pac;
143         }
144
145         idev = in6_dev_get(dev);
146         if (!idev) {
147                 if (ifindex)
148                         err = -ENODEV;
149                 else
150                         err = -EADDRNOTAVAIL;
151                 goto out_dev_put;
152         }
153         /* reset ishost, now that we have a specific device */
154         ishost = !idev->cnf.forwarding;
155         in6_dev_put(idev);
156
157         pac->acl_ifindex = dev->ifindex;
158
159         /* XXX
160          * For hosts, allow link-local or matching prefix anycasts.
161          * This obviates the need for propagating anycast routes while
162          * still allowing some non-router anycast participation.
163          *
164          * allow anyone to join anycasts that don't require a special route
165          * and can't be spoofs of unicast addresses (reserved anycast only)
166          */
167         if (!ip6_onlink(addr, dev)) {
168                 if (ishost)
169                         err = -EADDRNOTAVAIL;
170                 else if (!capable(CAP_NET_ADMIN))
171                         err = -EPERM;
172                 if (err)
173                         goto out_dev_put;
174         } else if (!(ipv6_addr_type(addr) & IPV6_ADDR_ANYCAST) &&
175                    !capable(CAP_NET_ADMIN)) {
176                 err = -EPERM;
177                 goto out_dev_put;
178         }
179
180         err = ipv6_dev_ac_inc(dev, addr);
181         if (err)
182                 goto out_dev_put;
183
184         write_lock_bh(&ipv6_sk_ac_lock);
185         pac->acl_next = np->ipv6_ac_list;
186         np->ipv6_ac_list = pac;
187         write_unlock_bh(&ipv6_sk_ac_lock);
188
189         dev_put(dev);
190
191         return 0;
192
193 out_dev_put:
194         dev_put(dev);
195 out_free_pac:
196         sock_kfree_s(sk, pac, sizeof(*pac));
197         return err;
198 }
199
200 /*
201  *      socket leave an anycast group
202  */
203 int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
204 {
205         struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
206         struct net_device *dev;
207         struct ipv6_ac_socklist *pac, *prev_pac;
208
209         write_lock_bh(&ipv6_sk_ac_lock);
210         prev_pac = 0;
211         for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
212                 if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
213                      ipv6_addr_cmp(&pac->acl_addr, addr) == 0)
214                         break;
215                 prev_pac = pac;
216         }
217         if (!pac) {
218                 write_unlock_bh(&ipv6_sk_ac_lock);
219                 return -ENOENT;
220         }
221         if (prev_pac)
222                 prev_pac->acl_next = pac->acl_next;
223         else
224                 np->ipv6_ac_list = pac->acl_next;
225
226         write_unlock_bh(&ipv6_sk_ac_lock);
227
228         dev = dev_get_by_index(pac->acl_ifindex);
229         if (dev) {
230                 ipv6_dev_ac_dec(dev, &pac->acl_addr);
231                 dev_put(dev);
232         }
233         sock_kfree_s(sk, pac, sizeof(*pac));
234         return 0;
235 }
236
237 void ipv6_sock_ac_close(struct sock *sk)
238 {
239         struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
240         struct net_device *dev = 0;
241         struct ipv6_ac_socklist *pac;
242         int     prev_index;
243
244         write_lock_bh(&ipv6_sk_ac_lock);
245         pac = np->ipv6_ac_list;
246         np->ipv6_ac_list = 0;
247         write_unlock_bh(&ipv6_sk_ac_lock);
248
249         prev_index = 0;
250         while (pac) {
251                 struct ipv6_ac_socklist *next = pac->acl_next;
252
253                 if (pac->acl_ifindex != prev_index) {
254                         if (dev)
255                                 dev_put(dev);
256                         dev = dev_get_by_index(pac->acl_ifindex);
257                         prev_index = pac->acl_ifindex;
258                 }
259                 if (dev)
260                         ipv6_dev_ac_dec(dev, &pac->acl_addr);
261                 sock_kfree_s(sk, pac, sizeof(*pac));
262                 pac = next;
263         }
264         if (dev)
265                 dev_put(dev);
266 }
267
268 int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
269 {
270         struct ipv6_ac_socklist *pac;
271         struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
272         int     found;
273
274         found = 0;
275         read_lock(&ipv6_sk_ac_lock);
276         for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
277                 if (ifindex && pac->acl_ifindex != ifindex)
278                         continue;
279                 found = ipv6_addr_cmp(&pac->acl_addr, addr) == 0;
280                 if (found)
281                         break;
282         }
283         read_unlock(&ipv6_sk_ac_lock);
284
285         return found;
286 }
287
288 static void aca_put(struct ifacaddr6 *ac)
289 {
290         if (atomic_dec_and_test(&ac->aca_refcnt)) {
291                 in6_dev_put(ac->aca_idev);
292                 kfree(ac);
293         }
294 }
295
296 /*
297  *      device anycast group inc (add if not found)
298  */
299 int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
300 {
301         struct ifacaddr6 *aca;
302         struct inet6_dev *idev;
303
304         idev = in6_dev_get(dev);
305
306         if (idev == NULL)
307                 return -EINVAL;
308
309         write_lock_bh(&idev->lock);
310         if (idev->dead) {
311                 write_unlock_bh(&idev->lock);
312                 in6_dev_put(idev);
313                 return -ENODEV;
314         }
315
316         for (aca = idev->ac_list; aca; aca = aca->aca_next) {
317                 if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) {
318                         aca->aca_users++;
319                         write_unlock_bh(&idev->lock);
320                         in6_dev_put(idev);
321                         return 0;
322                 }
323         }
324
325         /*
326          *      not found: create a new one.
327          */
328
329         aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
330
331         if (aca == NULL) {
332                 write_unlock_bh(&idev->lock);
333                 in6_dev_put(idev);
334                 return -ENOMEM;
335         }
336
337         memset(aca, 0, sizeof(struct ifacaddr6));
338
339         ipv6_addr_copy(&aca->aca_addr, addr);
340         aca->aca_idev = idev;
341         aca->aca_users = 1;
342         atomic_set(&aca->aca_refcnt, 2);
343         aca->aca_lock = SPIN_LOCK_UNLOCKED;
344
345         aca->aca_next = idev->ac_list;
346         idev->ac_list = aca;
347         write_unlock_bh(&idev->lock);
348
349         ip6_rt_addr_add(&aca->aca_addr, dev);
350
351         addrconf_join_solict(dev, &aca->aca_addr);
352
353         aca_put(aca);
354         return 0;
355 }
356
357 /*
358  *      device anycast group decrement
359  */
360 int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
361 {
362         struct inet6_dev *idev;
363         struct ifacaddr6 *aca, *prev_aca;
364
365         idev = in6_dev_get(dev);
366         if (idev == NULL)
367                 return -ENODEV;
368
369         write_lock_bh(&idev->lock);
370         prev_aca = 0;
371         for (aca = idev->ac_list; aca; aca = aca->aca_next) {
372                 if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0)
373                         break;
374                 prev_aca = aca;
375         }
376         if (!aca) {
377                 write_unlock_bh(&idev->lock);
378                 in6_dev_put(idev);
379                 return -ENOENT;
380         }
381         if (--aca->aca_users > 0) {
382                 write_unlock_bh(&idev->lock);
383                 in6_dev_put(idev);
384                 return 0;
385         }
386         if (prev_aca)
387                 prev_aca->aca_next = aca->aca_next;
388         else
389                 idev->ac_list = aca->aca_next;
390         write_unlock_bh(&idev->lock);
391         addrconf_leave_solict(dev, &aca->aca_addr);
392
393         ip6_rt_addr_del(&aca->aca_addr, dev);
394
395         aca_put(aca);
396         in6_dev_put(idev);
397         return 0;
398 }
399
400 /*
401  *      check if the interface has this anycast address
402  */
403 static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
404 {
405         struct inet6_dev *idev;
406         struct ifacaddr6 *aca;
407
408         idev = in6_dev_get(dev);
409         if (idev) {
410                 read_lock_bh(&idev->lock);
411                 for (aca = idev->ac_list; aca; aca = aca->aca_next)
412                         if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0)
413                                 break;
414                 read_unlock_bh(&idev->lock);
415                 in6_dev_put(idev);
416                 return aca != 0;
417         }
418         return 0;
419 }
420
421 /*
422  *      check if given interface (or any, if dev==0) has this anycast address
423  */
424 int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
425 {
426         if (dev)
427                 return ipv6_chk_acast_dev(dev, addr);
428         read_lock(&dev_base_lock);
429         for (dev=dev_base; dev; dev=dev->next)
430                 if (ipv6_chk_acast_dev(dev, addr))
431                         break;
432         read_unlock(&dev_base_lock);
433         return dev != 0;
434 }
435
436
437 #ifdef CONFIG_PROC_FS
438 int anycast6_get_info(char *buffer, char **start, off_t offset, int length)
439 {
440         off_t pos=0, begin=0;
441         struct ifacaddr6 *im;
442         int len=0;
443         struct net_device *dev;
444         
445         read_lock(&dev_base_lock);
446         for (dev = dev_base; dev; dev = dev->next) {
447                 struct inet6_dev *idev;
448
449                 if ((idev = in6_dev_get(dev)) == NULL)
450                         continue;
451
452                 read_lock_bh(&idev->lock);
453                 for (im = idev->ac_list; im; im = im->aca_next) {
454                         int i;
455
456                         len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, dev->name);
457
458                         for (i=0; i<16; i++)
459                                 len += sprintf(buffer+len, "%02x", im->aca_addr.s6_addr[i]);
460
461                         len += sprintf(buffer+len, " %5d\n", im->aca_users);
462
463                         pos=begin+len;
464                         if (pos < offset) {
465                                 len=0;
466                                 begin=pos;
467                         }
468                         if (pos > offset+length) {
469                                 read_unlock_bh(&idev->lock);
470                                 in6_dev_put(idev);
471                                 goto done;
472                         }
473                 }
474                 read_unlock_bh(&idev->lock);
475                 in6_dev_put(idev);
476         }
477
478 done:
479         read_unlock(&dev_base_lock);
480
481         *start=buffer+(offset-begin);
482         len-=(offset-begin);
483         if(len>length)
484                 len=length;
485         if (len<0)
486                 len=0;
487         return len;
488 }
489
490 #endif