cleanup
[linux-2.4.21-pre4.git] / net / netlink / af_netlink.c
1 /*
2  * NETLINK      Kernel-user communication protocol.
3  *
4  *              Authors:        Alan Cox <alan@redhat.com>
5  *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  * 
12  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13  *                               added netlink_proto_exit
14  *
15  */
16
17 #include <linux/config.h>
18 #include <linux/module.h>
19
20 #include <linux/kernel.h>
21 #include <linux/init.h>
22 #include <linux/major.h>
23 #include <linux/signal.h>
24 #include <linux/sched.h>
25 #include <linux/errno.h>
26 #include <linux/string.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/un.h>
30 #include <linux/fcntl.h>
31 #include <linux/termios.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/fs.h>
35 #include <linux/slab.h>
36 #include <asm/uaccess.h>
37 #include <linux/skbuff.h>
38 #include <linux/netdevice.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/proc_fs.h>
41 #include <linux/smp_lock.h>
42 #include <linux/notifier.h>
43 #include <net/sock.h>
44 #include <net/scm.h>
45
46 #define Nprintk(a...)
47
48 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
49 #define NL_EMULATE_DEV
50 #endif
51
52 struct netlink_opt
53 {
54         u32                     pid;
55         unsigned                groups;
56         u32                     dst_pid;
57         unsigned                dst_groups;
58         unsigned long           state;
59         int                     (*handler)(int unit, struct sk_buff *skb);
60         wait_queue_head_t       wait;
61         struct netlink_callback *cb;
62         spinlock_t              cb_lock;
63         void                    (*data_ready)(struct sock *sk, int bytes);
64 };
65
66 static struct sock *nl_table[MAX_LINKS];
67 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
68 static unsigned nl_nonroot[MAX_LINKS];
69
70 #ifdef NL_EMULATE_DEV
71 static struct socket *netlink_kernel[MAX_LINKS];
72 #endif
73
74 static int netlink_dump(struct sock *sk);
75 static void netlink_destroy_callback(struct netlink_callback *cb);
76
77 atomic_t netlink_sock_nr;
78
79 static rwlock_t nl_table_lock = RW_LOCK_UNLOCKED;
80 static atomic_t nl_table_users = ATOMIC_INIT(0);
81
82 static struct notifier_block *netlink_chain;
83
84 static void netlink_sock_destruct(struct sock *sk)
85 {
86         skb_queue_purge(&sk->receive_queue);
87
88         if (!sk->dead) {
89                 printk("Freeing alive netlink socket %p\n", sk);
90                 return;
91         }
92         BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
93         BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
94         BUG_TRAP(sk->protinfo.af_netlink->cb==NULL);
95
96         kfree(sk->protinfo.af_netlink);
97
98         atomic_dec(&netlink_sock_nr);
99 #ifdef NETLINK_REFCNT_DEBUG
100         printk(KERN_DEBUG "NETLINK %p released, %d are still alive\n", sk, atomic_read(&netlink_sock_nr));
101 #endif
102 }
103
104 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
105  * Look, when several writers sleep and reader wakes them up, all but one
106  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
107  * this, _but_ remember, it adds useless work on UP machines.
108  */
109
110 static void netlink_table_grab(void)
111 {
112         write_lock_bh(&nl_table_lock);
113
114         if (atomic_read(&nl_table_users)) {
115                 DECLARE_WAITQUEUE(wait, current);
116
117                 add_wait_queue_exclusive(&nl_table_wait, &wait);
118                 for(;;) {
119                         set_current_state(TASK_UNINTERRUPTIBLE);
120                         if (atomic_read(&nl_table_users) == 0)
121                                 break;
122                         write_unlock_bh(&nl_table_lock);
123                         schedule();
124                         write_lock_bh(&nl_table_lock);
125                 }
126
127                 __set_current_state(TASK_RUNNING);
128                 remove_wait_queue(&nl_table_wait, &wait);
129         }
130 }
131
132 static __inline__ void netlink_table_ungrab(void)
133 {
134         write_unlock_bh(&nl_table_lock);
135         wake_up(&nl_table_wait);
136 }
137
138 static __inline__ void
139 netlink_lock_table(void)
140 {
141         /* read_lock() synchronizes us to netlink_table_grab */
142
143         read_lock(&nl_table_lock);
144         atomic_inc(&nl_table_users);
145         read_unlock(&nl_table_lock);
146 }
147
148 static __inline__ void
149 netlink_unlock_table(void)
150 {
151         if (atomic_dec_and_test(&nl_table_users))
152                 wake_up(&nl_table_wait);
153 }
154
155 static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
156 {
157         struct sock *sk;
158
159         read_lock(&nl_table_lock);
160         for (sk=nl_table[protocol]; sk; sk=sk->next) {
161                 if (sk->protinfo.af_netlink->pid == pid) {
162                         sock_hold(sk);
163                         read_unlock(&nl_table_lock);
164                         return sk;
165                 }
166         }
167
168         read_unlock(&nl_table_lock);
169         return NULL;
170 }
171
172 extern struct proto_ops netlink_ops;
173
174 static int netlink_insert(struct sock *sk, u32 pid)
175 {
176         int err = -EADDRINUSE;
177         struct sock *osk;
178
179         netlink_table_grab();
180         for (osk=nl_table[sk->protocol]; osk; osk=osk->next) {
181                 if (osk->protinfo.af_netlink->pid == pid)
182                         break;
183         }
184         if (osk == NULL) {
185                 err = -EBUSY;
186                 if (sk->protinfo.af_netlink->pid == 0) {
187                         sk->protinfo.af_netlink->pid = pid;
188                         sk->next = nl_table[sk->protocol];
189                         nl_table[sk->protocol] = sk;
190                         sock_hold(sk);
191                         err = 0;
192                 }
193         }
194         netlink_table_ungrab();
195         return err;
196 }
197
198 static void netlink_remove(struct sock *sk)
199 {
200         struct sock **skp;
201
202         netlink_table_grab();
203         for (skp = &nl_table[sk->protocol]; *skp; skp = &((*skp)->next)) {
204                 if (*skp == sk) {
205                         *skp = sk->next;
206                         __sock_put(sk);
207                         break;
208                 }
209         }
210         netlink_table_ungrab();
211 }
212
213 static int netlink_create(struct socket *sock, int protocol)
214 {
215         struct sock *sk;
216
217         sock->state = SS_UNCONNECTED;
218
219         if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
220                 return -ESOCKTNOSUPPORT;
221
222         if (protocol<0 || protocol >= MAX_LINKS)
223                 return -EPROTONOSUPPORT;
224
225         sock->ops = &netlink_ops;
226
227         sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1);
228         if (!sk)
229                 return -ENOMEM;
230
231         sock_init_data(sock,sk);
232
233         sk->protinfo.af_netlink = kmalloc(sizeof(struct netlink_opt), GFP_KERNEL);
234         if (sk->protinfo.af_netlink == NULL) {
235                 sk_free(sk);
236                 return -ENOMEM;
237         }
238         memset(sk->protinfo.af_netlink, 0, sizeof(struct netlink_opt));
239
240         spin_lock_init(&sk->protinfo.af_netlink->cb_lock);
241         init_waitqueue_head(&sk->protinfo.af_netlink->wait);
242         sk->destruct = netlink_sock_destruct;
243         atomic_inc(&netlink_sock_nr);
244
245         sk->protocol=protocol;
246         return 0;
247 }
248
249 static int netlink_release(struct socket *sock)
250 {
251         struct sock *sk = sock->sk;
252
253         if (!sk)
254                 return 0;
255
256         netlink_remove(sk);
257
258         spin_lock(&sk->protinfo.af_netlink->cb_lock);
259         if (sk->protinfo.af_netlink->cb) {
260                 sk->protinfo.af_netlink->cb->done(sk->protinfo.af_netlink->cb);
261                 netlink_destroy_callback(sk->protinfo.af_netlink->cb);
262                 sk->protinfo.af_netlink->cb = NULL;
263                 __sock_put(sk);
264         }
265         spin_unlock(&sk->protinfo.af_netlink->cb_lock);
266
267         /* OK. Socket is unlinked, and, therefore,
268            no new packets will arrive */
269
270         sock_orphan(sk);
271         sock->sk = NULL;
272         wake_up_interruptible_all(&sk->protinfo.af_netlink->wait);
273
274         skb_queue_purge(&sk->write_queue);
275
276         if (sk->protinfo.af_netlink->pid && !sk->protinfo.af_netlink->groups) {
277                 struct netlink_notify n = { protocol:sk->protocol,
278                                             pid:sk->protinfo.af_netlink->pid };
279                 notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
280         }       
281         
282         sock_put(sk);
283         return 0;
284 }
285
286 static int netlink_autobind(struct socket *sock)
287 {
288         struct sock *sk = sock->sk;
289         struct sock *osk;
290         s32 pid = current->pid;
291         int err;
292
293 retry:
294         netlink_table_grab();
295         for (osk=nl_table[sk->protocol]; osk; osk=osk->next) {
296                 if (osk->protinfo.af_netlink->pid == pid) {
297                         /* Bind collision, search negative pid values. */
298                         if (pid > 0)
299                                 pid = -4096;
300                         pid--;
301                         netlink_table_ungrab();
302                         goto retry;
303                 }
304         }
305         netlink_table_ungrab();
306
307         err = netlink_insert(sk, pid);
308         if (err == -EADDRINUSE)
309                 goto retry;
310         sk->protinfo.af_netlink->groups = 0;
311         return 0;
312 }
313
314 static inline int netlink_capable(struct socket *sock, unsigned flag) 
315
316         return (nl_nonroot[sock->sk->protocol] & flag) || capable(CAP_NET_ADMIN);
317
318
319 static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
320 {
321         struct sock *sk = sock->sk;
322         int err;
323         struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
324         
325         if (nladdr->nl_family != AF_NETLINK)
326                 return -EINVAL;
327
328         /* Only superuser is allowed to listen multicasts */
329         if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_RECV))
330                 return -EPERM;
331
332         if (sk->protinfo.af_netlink->pid) {
333                 if (nladdr->nl_pid != sk->protinfo.af_netlink->pid)
334                         return -EINVAL;
335                 sk->protinfo.af_netlink->groups = nladdr->nl_groups;
336                 return 0;
337         }
338
339         if (nladdr->nl_pid == 0) {
340                 err = netlink_autobind(sock);
341                 if (err == 0)
342                         sk->protinfo.af_netlink->groups = nladdr->nl_groups;
343                 return err;
344         }
345
346         err = netlink_insert(sk, nladdr->nl_pid);
347         if (err == 0)
348                 sk->protinfo.af_netlink->groups = nladdr->nl_groups;
349         return err;
350 }
351
352 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
353                            int alen, int flags)
354 {
355         int err = 0;
356         struct sock *sk = sock->sk;
357         struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
358
359         if (addr->sa_family == AF_UNSPEC) {
360                 sk->protinfo.af_netlink->dst_pid = 0;
361                 sk->protinfo.af_netlink->dst_groups = 0;
362                 return 0;
363         }
364         if (addr->sa_family != AF_NETLINK)
365                 return -EINVAL;
366
367         /* Only superuser is allowed to send multicasts */
368         if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
369                 return -EPERM;
370
371         if (!sk->protinfo.af_netlink->pid)
372                 err = netlink_autobind(sock);
373
374         if (err == 0) {
375                 sk->protinfo.af_netlink->dst_pid = nladdr->nl_pid;
376                 sk->protinfo.af_netlink->dst_groups = nladdr->nl_groups;
377         }
378
379         return 0;
380 }
381
382 static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
383 {
384         struct sock *sk = sock->sk;
385         struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
386         
387         nladdr->nl_family = AF_NETLINK;
388         *addr_len = sizeof(*nladdr);
389
390         if (peer) {
391                 nladdr->nl_pid = sk->protinfo.af_netlink->dst_pid;
392                 nladdr->nl_groups = sk->protinfo.af_netlink->dst_groups;
393         } else {
394                 nladdr->nl_pid = sk->protinfo.af_netlink->pid;
395                 nladdr->nl_groups = sk->protinfo.af_netlink->groups;
396         }
397         return 0;
398 }
399
400 static void netlink_overrun(struct sock *sk)
401 {
402         if (!test_and_set_bit(0, &sk->protinfo.af_netlink->state)) {
403                 sk->err = ENOBUFS;
404                 sk->error_report(sk);
405         }
406 }
407
408 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
409 {
410         struct sock *sk;
411         int len = skb->len;
412         int protocol = ssk->protocol;
413         long timeo;
414         DECLARE_WAITQUEUE(wait, current);
415
416         timeo = sock_sndtimeo(ssk, nonblock);
417
418 retry:
419         sk = netlink_lookup(protocol, pid);
420         if (sk == NULL)
421                 goto no_dst;
422
423 #ifdef NL_EMULATE_DEV
424         if (sk->protinfo.af_netlink->handler) {
425                 skb_orphan(skb);
426                 len = sk->protinfo.af_netlink->handler(protocol, skb);
427                 sock_put(sk);
428                 return len;
429         }
430 #endif
431
432         if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
433             test_bit(0, &sk->protinfo.af_netlink->state)) {
434                 if (!timeo) {
435                         if (ssk->protinfo.af_netlink->pid == 0)
436                                 netlink_overrun(sk);
437                         sock_put(sk);
438                         kfree_skb(skb);
439                         return -EAGAIN;
440                 }
441
442                 __set_current_state(TASK_INTERRUPTIBLE);
443                 add_wait_queue(&sk->protinfo.af_netlink->wait, &wait);
444
445                 if ((atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
446                     test_bit(0, &sk->protinfo.af_netlink->state)) &&
447                     !sk->dead)
448                         timeo = schedule_timeout(timeo);
449
450                 __set_current_state(TASK_RUNNING);
451                 remove_wait_queue(&sk->protinfo.af_netlink->wait, &wait);
452                 sock_put(sk);
453
454                 if (signal_pending(current)) {
455                         kfree_skb(skb);
456                         return sock_intr_errno(timeo);
457                 }
458                 goto retry;
459         }
460
461         skb_orphan(skb);
462         skb_set_owner_r(skb, sk);
463         skb_queue_tail(&sk->receive_queue, skb);
464         sk->data_ready(sk, len);
465         sock_put(sk);
466         return len;
467
468 no_dst:
469         kfree_skb(skb);
470         return -ECONNREFUSED;
471 }
472
473 static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
474 {
475 #ifdef NL_EMULATE_DEV
476         if (sk->protinfo.af_netlink->handler) {
477                 skb_orphan(skb);
478                 sk->protinfo.af_netlink->handler(sk->protocol, skb);
479                 return 0;
480         } else
481 #endif
482         if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf &&
483             !test_bit(0, &sk->protinfo.af_netlink->state)) {
484                 skb_orphan(skb);
485                 skb_set_owner_r(skb, sk);
486                 skb_queue_tail(&sk->receive_queue, skb);
487                 sk->data_ready(sk, skb->len);
488                 return 0;
489         }
490         return -1;
491 }
492
493 void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
494                        u32 group, int allocation)
495 {
496         struct sock *sk;
497         struct sk_buff *skb2 = NULL;
498         int protocol = ssk->protocol;
499         int failure = 0;
500
501         /* While we sleep in clone, do not allow to change socket list */
502
503         netlink_lock_table();
504
505         for (sk = nl_table[protocol]; sk; sk = sk->next) {
506                 if (ssk == sk)
507                         continue;
508
509                 if (sk->protinfo.af_netlink->pid == pid ||
510                     !(sk->protinfo.af_netlink->groups&group))
511                         continue;
512
513                 if (failure) {
514                         netlink_overrun(sk);
515                         continue;
516                 }
517
518                 sock_hold(sk);
519                 if (skb2 == NULL) {
520                         if (atomic_read(&skb->users) != 1) {
521                                 skb2 = skb_clone(skb, allocation);
522                         } else {
523                                 skb2 = skb;
524                                 atomic_inc(&skb->users);
525                         }
526                 }
527                 if (skb2 == NULL) {
528                         netlink_overrun(sk);
529                         /* Clone failed. Notify ALL listeners. */
530                         failure = 1;
531                 } else if (netlink_broadcast_deliver(sk, skb2)) {
532                         netlink_overrun(sk);
533                 } else
534                         skb2 = NULL;
535                 sock_put(sk);
536         }
537
538         netlink_unlock_table();
539
540         if (skb2)
541                 kfree_skb(skb2);
542         kfree_skb(skb);
543 }
544
545 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
546 {
547         struct sock *sk;
548         int protocol = ssk->protocol;
549
550         read_lock(&nl_table_lock);
551         for (sk = nl_table[protocol]; sk; sk = sk->next) {
552                 if (ssk == sk)
553                         continue;
554
555                 if (sk->protinfo.af_netlink->pid == pid ||
556                     !(sk->protinfo.af_netlink->groups&group))
557                         continue;
558
559                 sk->err = code;
560                 sk->error_report(sk);
561         }
562         read_unlock(&nl_table_lock);
563 }
564
565 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, int len,
566                            struct scm_cookie *scm)
567 {
568         struct sock *sk = sock->sk;
569         struct sockaddr_nl *addr=msg->msg_name;
570         u32 dst_pid;
571         u32 dst_groups;
572         struct sk_buff *skb;
573         int err;
574
575         if (msg->msg_flags&MSG_OOB)
576                 return -EOPNOTSUPP;
577
578         if (msg->msg_namelen) {
579                 if (addr->nl_family != AF_NETLINK)
580                         return -EINVAL;
581                 dst_pid = addr->nl_pid;
582                 dst_groups = addr->nl_groups;
583                 if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
584                         return -EPERM;
585         } else {
586                 dst_pid = sk->protinfo.af_netlink->dst_pid;
587                 dst_groups = sk->protinfo.af_netlink->dst_groups;
588         }
589
590         if (!sk->protinfo.af_netlink->pid) {
591                 err = netlink_autobind(sock);
592                 if (err)
593                         goto out;
594         }
595
596         err = -EMSGSIZE;
597         if ((unsigned)len > sk->sndbuf-32)
598                 goto out;
599         err = -ENOBUFS;
600         skb = alloc_skb(len, GFP_KERNEL);
601         if (skb==NULL)
602                 goto out;
603
604         NETLINK_CB(skb).pid = sk->protinfo.af_netlink->pid;
605         NETLINK_CB(skb).groups = sk->protinfo.af_netlink->groups;
606         NETLINK_CB(skb).dst_pid = dst_pid;
607         NETLINK_CB(skb).dst_groups = dst_groups;
608         memcpy(NETLINK_CREDS(skb), &scm->creds, sizeof(struct ucred));
609
610         /* What can I do? Netlink is asynchronous, so that
611            we will have to save current capabilities to
612            check them, when this message will be delivered
613            to corresponding kernel module.   --ANK (980802)
614          */
615         NETLINK_CB(skb).eff_cap = current->cap_effective;
616
617         err = -EFAULT;
618         if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
619                 kfree_skb(skb);
620                 goto out;
621         }
622
623         if (dst_groups) {
624                 atomic_inc(&skb->users);
625                 netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
626         }
627         err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
628
629 out:
630         return err;
631 }
632
633 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, int len,
634                            int flags, struct scm_cookie *scm)
635 {
636         struct sock *sk = sock->sk;
637         int noblock = flags&MSG_DONTWAIT;
638         int copied;
639         struct sk_buff *skb;
640         int err;
641
642         if (flags&MSG_OOB)
643                 return -EOPNOTSUPP;
644
645         copied = 0;
646
647         skb = skb_recv_datagram(sk,flags,noblock,&err);
648         if (skb==NULL)
649                 goto out;
650
651         msg->msg_namelen = 0;
652
653         copied = skb->len;
654         if (len < copied) {
655                 msg->msg_flags |= MSG_TRUNC;
656                 copied = len;
657         }
658
659         skb->h.raw = skb->data;
660         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
661
662         if (msg->msg_name) {
663                 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name;
664                 addr->nl_family = AF_NETLINK;
665                 addr->nl_pid    = NETLINK_CB(skb).pid;
666                 addr->nl_groups = NETLINK_CB(skb).dst_groups;
667                 msg->msg_namelen = sizeof(*addr);
668         }
669
670         scm->creds = *NETLINK_CREDS(skb);
671         skb_free_datagram(sk, skb);
672
673         if (sk->protinfo.af_netlink->cb
674             && atomic_read(&sk->rmem_alloc) <= sk->rcvbuf/2)
675                 netlink_dump(sk);
676
677 out:
678         if (skb_queue_len(&sk->receive_queue) <= sk->rcvbuf/2) {
679                 if (skb_queue_len(&sk->receive_queue) == 0)
680                         clear_bit(0, &sk->protinfo.af_netlink->state);
681                 if (!test_bit(0, &sk->protinfo.af_netlink->state))
682                         wake_up_interruptible(&sk->protinfo.af_netlink->wait);
683         }
684         return err ? : copied;
685 }
686
687 void netlink_data_ready(struct sock *sk, int len)
688 {
689         if (sk->protinfo.af_netlink->data_ready)
690                 sk->protinfo.af_netlink->data_ready(sk, len);
691
692         if (skb_queue_len(&sk->receive_queue) <= sk->rcvbuf/2) {
693                 if (skb_queue_len(&sk->receive_queue) == 0)
694                         clear_bit(0, &sk->protinfo.af_netlink->state);
695                 if (!test_bit(0, &sk->protinfo.af_netlink->state))
696                         wake_up_interruptible(&sk->protinfo.af_netlink->wait);
697         }
698 }
699
700 /*
701  *      We export these functions to other modules. They provide a 
702  *      complete set of kernel non-blocking support for message
703  *      queueing.
704  */
705
706 struct sock *
707 netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
708 {
709         struct socket *sock;
710         struct sock *sk;
711
712         if (unit<0 || unit>=MAX_LINKS)
713                 return NULL;
714
715         if (!(sock = sock_alloc())) 
716                 return NULL;
717
718         sock->type = SOCK_RAW;
719
720         if (netlink_create(sock, unit) < 0) {
721                 sock_release(sock);
722                 return NULL;
723         }
724         sk = sock->sk;
725         sk->data_ready = netlink_data_ready;
726         if (input)
727                 sk->protinfo.af_netlink->data_ready = input;
728
729         netlink_insert(sk, 0);
730         return sk;
731 }
732
733 void netlink_set_nonroot(int protocol, unsigned flags)
734
735         if ((unsigned)protocol < MAX_LINKS) 
736                 nl_nonroot[protocol] = flags;
737
738
739 static void netlink_destroy_callback(struct netlink_callback *cb)
740 {
741         if (cb->skb)
742                 kfree_skb(cb->skb);
743         kfree(cb);
744 }
745
746 /*
747  * It looks a bit ugly.
748  * It would be better to create kernel thread.
749  */
750
751 static int netlink_dump(struct sock *sk)
752 {
753         struct netlink_callback *cb;
754         struct sk_buff *skb;
755         struct nlmsghdr *nlh;
756         int len;
757         
758         skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
759         if (!skb)
760                 return -ENOBUFS;
761
762         spin_lock(&sk->protinfo.af_netlink->cb_lock);
763
764         cb = sk->protinfo.af_netlink->cb;
765         if (cb == NULL) {
766                 spin_unlock(&sk->protinfo.af_netlink->cb_lock);
767                 kfree_skb(skb);
768                 return -EINVAL;
769         }
770
771         len = cb->dump(skb, cb);
772
773         if (len > 0) {
774                 spin_unlock(&sk->protinfo.af_netlink->cb_lock);
775                 skb_queue_tail(&sk->receive_queue, skb);
776                 sk->data_ready(sk, len);
777                 return 0;
778         }
779
780         nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int));
781         nlh->nlmsg_flags |= NLM_F_MULTI;
782         memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
783         skb_queue_tail(&sk->receive_queue, skb);
784         sk->data_ready(sk, skb->len);
785
786         cb->done(cb);
787         sk->protinfo.af_netlink->cb = NULL;
788         spin_unlock(&sk->protinfo.af_netlink->cb_lock);
789
790         netlink_destroy_callback(cb);
791         sock_put(sk);
792         return 0;
793 }
794
795 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
796                        struct nlmsghdr *nlh,
797                        int (*dump)(struct sk_buff *skb, struct netlink_callback*),
798                        int (*done)(struct netlink_callback*))
799 {
800         struct netlink_callback *cb;
801         struct sock *sk;
802
803         cb = kmalloc(sizeof(*cb), GFP_KERNEL);
804         if (cb == NULL)
805                 return -ENOBUFS;
806
807         memset(cb, 0, sizeof(*cb));
808         cb->dump = dump;
809         cb->done = done;
810         cb->nlh = nlh;
811         atomic_inc(&skb->users);
812         cb->skb = skb;
813
814         sk = netlink_lookup(ssk->protocol, NETLINK_CB(skb).pid);
815         if (sk == NULL) {
816                 netlink_destroy_callback(cb);
817                 return -ECONNREFUSED;
818         }
819         /* A dump is in progress... */
820         spin_lock(&sk->protinfo.af_netlink->cb_lock);
821         if (sk->protinfo.af_netlink->cb) {
822                 spin_unlock(&sk->protinfo.af_netlink->cb_lock);
823                 netlink_destroy_callback(cb);
824                 sock_put(sk);
825                 return -EBUSY;
826         }
827         sk->protinfo.af_netlink->cb = cb;
828         spin_unlock(&sk->protinfo.af_netlink->cb_lock);
829
830         netlink_dump(sk);
831         return 0;
832 }
833
834 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
835 {
836         struct sk_buff *skb;
837         struct nlmsghdr *rep;
838         struct nlmsgerr *errmsg;
839         int size;
840
841         if (err == 0)
842                 size = NLMSG_SPACE(sizeof(struct nlmsgerr));
843         else
844                 size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
845
846         skb = alloc_skb(size, GFP_KERNEL);
847         if (!skb)
848                 return;
849
850         rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
851                           NLMSG_ERROR, sizeof(struct nlmsgerr));
852         errmsg = NLMSG_DATA(rep);
853         errmsg->error = err;
854         memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
855         netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
856 }
857
858
859 #ifdef NL_EMULATE_DEV
860
861 static rwlock_t nl_emu_lock = RW_LOCK_UNLOCKED;
862
863 /*
864  *      Backward compatibility.
865  */     
866  
867 int netlink_attach(int unit, int (*function)(int, struct sk_buff *skb))
868 {
869         struct sock *sk = netlink_kernel_create(unit, NULL);
870         if (sk == NULL)
871                 return -ENOBUFS;
872         sk->protinfo.af_netlink->handler = function;
873         write_lock_bh(&nl_emu_lock);
874         netlink_kernel[unit] = sk->socket;
875         write_unlock_bh(&nl_emu_lock);
876         return 0;
877 }
878
879 void netlink_detach(int unit)
880 {
881         struct socket *sock;
882
883         write_lock_bh(&nl_emu_lock);
884         sock = netlink_kernel[unit];
885         netlink_kernel[unit] = NULL;
886         write_unlock_bh(&nl_emu_lock);
887
888         sock_release(sock);
889 }
890
891 int netlink_post(int unit, struct sk_buff *skb)
892 {
893         struct socket *sock;
894
895         read_lock(&nl_emu_lock);
896         sock = netlink_kernel[unit];
897         if (sock) {
898                 struct sock *sk = sock->sk;
899                 memset(skb->cb, 0, sizeof(skb->cb));
900                 sock_hold(sk);
901                 read_unlock(&nl_emu_lock);
902
903                 netlink_broadcast(sk, skb, 0, ~0, GFP_ATOMIC);
904
905                 sock_put(sk);
906                 return 0;
907         }
908         read_unlock(&nl_emu_lock);
909         return -EUNATCH;
910 }
911
912 #endif
913
914
915 #ifdef CONFIG_PROC_FS
916 static int netlink_read_proc(char *buffer, char **start, off_t offset,
917                              int length, int *eof, void *data)
918 {
919         off_t pos=0;
920         off_t begin=0;
921         int len=0;
922         int i;
923         struct sock *s;
924         
925         len+= sprintf(buffer,"sk       Eth Pid    Groups   "
926                       "Rmem     Wmem     Dump     Locks\n");
927         
928         for (i=0; i<MAX_LINKS; i++) {
929                 read_lock(&nl_table_lock);
930                 for (s = nl_table[i]; s; s = s->next) {
931                         len+=sprintf(buffer+len,"%p %-3d %-6d %08x %-8d %-8d %p %d",
932                                      s,
933                                      s->protocol,
934                                      s->protinfo.af_netlink->pid,
935                                      s->protinfo.af_netlink->groups,
936                                      atomic_read(&s->rmem_alloc),
937                                      atomic_read(&s->wmem_alloc),
938                                      s->protinfo.af_netlink->cb,
939                                      atomic_read(&s->refcnt)
940                                      );
941
942                         buffer[len++]='\n';
943                 
944                         pos=begin+len;
945                         if(pos<offset) {
946                                 len=0;
947                                 begin=pos;
948                         }
949                         if(pos>offset+length) {
950                                 read_unlock(&nl_table_lock);
951                                 goto done;
952                         }
953                 }
954                 read_unlock(&nl_table_lock);
955         }
956         *eof = 1;
957
958 done:
959         *start=buffer+(offset-begin);
960         len-=(offset-begin);
961         if(len>length)
962                 len=length;
963         if(len<0)
964                 len=0;
965         return len;
966 }
967 #endif
968
969 int netlink_register_notifier(struct notifier_block *nb)
970 {
971         return notifier_chain_register(&netlink_chain, nb);
972 }
973
974 int netlink_unregister_notifier(struct notifier_block *nb)
975 {
976         return notifier_chain_unregister(&netlink_chain, nb);
977 }
978                 
979 struct proto_ops netlink_ops = {
980         family:         PF_NETLINK,
981
982         release:        netlink_release,
983         bind:           netlink_bind,
984         connect:        netlink_connect,
985         socketpair:     sock_no_socketpair,
986         accept:         sock_no_accept,
987         getname:        netlink_getname,
988         poll:           datagram_poll,
989         ioctl:          sock_no_ioctl,
990         listen:         sock_no_listen,
991         shutdown:       sock_no_shutdown,
992         setsockopt:     sock_no_setsockopt,
993         getsockopt:     sock_no_getsockopt,
994         sendmsg:        netlink_sendmsg,
995         recvmsg:        netlink_recvmsg,
996         mmap:           sock_no_mmap,
997         sendpage:       sock_no_sendpage,
998 };
999
1000 struct net_proto_family netlink_family_ops = {
1001         PF_NETLINK,
1002         netlink_create
1003 };
1004
1005 static int __init netlink_proto_init(void)
1006 {
1007         struct sk_buff *dummy_skb;
1008
1009         if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) {
1010                 printk(KERN_CRIT "netlink_init: panic\n");
1011                 return -1;
1012         }
1013         sock_register(&netlink_family_ops);
1014 #ifdef CONFIG_PROC_FS
1015         create_proc_read_entry("net/netlink", 0, 0, netlink_read_proc, NULL);
1016 #endif
1017         return 0;
1018 }
1019
1020 static void __exit netlink_proto_exit(void)
1021 {
1022        sock_unregister(PF_NETLINK);
1023        remove_proc_entry("net/netlink", NULL);
1024 }
1025
1026 module_init(netlink_proto_init);
1027 module_exit(netlink_proto_exit);