update atp870u driver to 0.78 from D-Link source
[linux-2.4.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.126.2.5 2002/03/05 12:47:34 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *
51  *
52  * Known differences from reference BSD that was tested:
53  *
54  *      [TO FIX]
55  *      ECONNREFUSED is not returned from one end of a connected() socket to the
56  *              other the moment one end closes.
57  *      fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
58  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
59  *      [NOT TO FIX]
60  *      accept() returns a path name even if the connecting socket has closed
61  *              in the meantime (BSD loses the path and gives up).
62  *      accept() returns 0 length path for an unbound connector. BSD returns 16
63  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
64  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
65  *      BSD af_unix apparently has connect forgetting to block properly.
66  *              (need to check this with the POSIX spec in detail)
67  *
68  * Differences from 2.0.0-11-... (ANK)
69  *      Bug fixes and improvements.
70  *              - client shutdown killed server socket.
71  *              - removed all useless cli/sti pairs.
72  *
73  *      Semantic changes/extensions.
74  *              - generic control message passing.
75  *              - SCM_CREDENTIALS control message.
76  *              - "Abstract" (not FS based) socket bindings.
77  *                Abstract names are sequences of bytes (not zero terminated)
78  *                started by 0, so that this name space does not intersect
79  *                with BSD names.
80  */
81
82 #include <linux/module.h>
83 #include <linux/config.h>
84 #include <linux/kernel.h>
85 #include <linux/major.h>
86 #include <linux/signal.h>
87 #include <linux/sched.h>
88 #include <linux/errno.h>
89 #include <linux/string.h>
90 #include <linux/stat.h>
91 #include <linux/socket.h>
92 #include <linux/un.h>
93 #include <linux/fcntl.h>
94 #include <linux/termios.h>
95 #include <linux/sockios.h>
96 #include <linux/net.h>
97 #include <linux/in.h>
98 #include <linux/fs.h>
99 #include <linux/slab.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <net/sock.h>
104 #include <linux/tcp.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/smp_lock.h>
111 #include <linux/rtnetlink.h>
112
113 #include <asm/checksum.h>
114
115 int sysctl_unix_max_dgram_qlen = 10;
116
117 unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
118 rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
119 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
120
121 #define unix_sockets_unbound    (unix_socket_table[UNIX_HASH_SIZE])
122
123 #define UNIX_ABSTRACT(sk)       ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)
124
125 /*
126  *  SMP locking strategy:
127  *    hash table is protected with rwlock unix_table_lock
128  *    each socket state is protected by separate rwlock.
129  */
130
131 static inline unsigned unix_hash_fold(unsigned hash)
132 {
133         hash ^= hash>>16;
134         hash ^= hash>>8;
135         return hash&(UNIX_HASH_SIZE-1);
136 }
137
138 #define unix_peer(sk) ((sk)->pair)
139
140 static inline int unix_our_peer(unix_socket *sk, unix_socket *osk)
141 {
142         return unix_peer(osk) == sk;
143 }
144
145 static inline int unix_may_send(unix_socket *sk, unix_socket *osk)
146 {
147         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
148 }
149
150 static inline unix_socket * unix_peer_get(unix_socket *s)
151 {
152         unix_socket *peer;
153
154         unix_state_rlock(s);
155         peer = unix_peer(s);
156         if (peer)
157                 sock_hold(peer);
158         unix_state_runlock(s);
159         return peer;
160 }
161
162 extern inline void unix_release_addr(struct unix_address *addr)
163 {
164         if (atomic_dec_and_test(&addr->refcnt))
165                 kfree(addr);
166 }
167
168 /*
169  *      Check unix socket name:
170  *              - should be not zero length.
171  *              - if started by not zero, should be NULL terminated (FS object)
172  *              - if started by zero, it is abstract name.
173  */
174  
175 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
176 {
177         if (len <= sizeof(short) || len > sizeof(*sunaddr))
178                 return -EINVAL;
179         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
180                 return -EINVAL;
181         if (sunaddr->sun_path[0]) {
182                 ((char *)sunaddr)[len]=0;
183                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
184                 return len;
185         }
186
187         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
188         return len;
189 }
190
191 static void __unix_remove_socket(unix_socket *sk)
192 {
193         unix_socket **list = sk->protinfo.af_unix.list;
194         if (list) {
195                 if (sk->next)
196                         sk->next->prev = sk->prev;
197                 if (sk->prev)
198                         sk->prev->next = sk->next;
199                 if (*list == sk)
200                         *list = sk->next;
201                 sk->protinfo.af_unix.list = NULL;
202                 sk->prev = NULL;
203                 sk->next = NULL;
204                 __sock_put(sk);
205         }
206 }
207
208 static void __unix_insert_socket(unix_socket **list, unix_socket *sk)
209 {
210         BUG_TRAP(sk->protinfo.af_unix.list==NULL);
211
212         sk->protinfo.af_unix.list = list;
213         sk->prev = NULL;
214         sk->next = *list;
215         if (*list)
216                 (*list)->prev = sk;
217         *list=sk;
218         sock_hold(sk);
219 }
220
221 static inline void unix_remove_socket(unix_socket *sk)
222 {
223         write_lock(&unix_table_lock);
224         __unix_remove_socket(sk);
225         write_unlock(&unix_table_lock);
226 }
227
228 static inline void unix_insert_socket(unix_socket **list, unix_socket *sk)
229 {
230         write_lock(&unix_table_lock);
231         __unix_insert_socket(list, sk);
232         write_unlock(&unix_table_lock);
233 }
234
235 static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname,
236                                               int len, int type, unsigned hash)
237 {
238         unix_socket *s;
239
240         for (s=unix_socket_table[hash^type]; s; s=s->next) {
241                 if(s->protinfo.af_unix.addr->len==len &&
242                    memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0)
243                         return s;
244         }
245         return NULL;
246 }
247
248 static inline unix_socket *
249 unix_find_socket_byname(struct sockaddr_un *sunname,
250                         int len, int type, unsigned hash)
251 {
252         unix_socket *s;
253
254         read_lock(&unix_table_lock);
255         s = __unix_find_socket_byname(sunname, len, type, hash);
256         if (s)
257                 sock_hold(s);
258         read_unlock(&unix_table_lock);
259         return s;
260 }
261
262 static unix_socket *unix_find_socket_byinode(struct inode *i)
263 {
264         unix_socket *s;
265
266         read_lock(&unix_table_lock);
267         for (s=unix_socket_table[i->i_ino & (UNIX_HASH_SIZE-1)]; s; s=s->next)
268         {
269                 struct dentry *dentry = s->protinfo.af_unix.dentry;
270
271                 if(dentry && dentry->d_inode == i)
272                 {
273                         sock_hold(s);
274                         break;
275                 }
276         }
277         read_unlock(&unix_table_lock);
278         return s;
279 }
280
281 static inline int unix_writable(struct sock *sk)
282 {
283         return ((atomic_read(&sk->wmem_alloc)<<2) <= sk->sndbuf);
284 }
285
286 static void unix_write_space(struct sock *sk)
287 {
288         read_lock(&sk->callback_lock);
289         if (unix_writable(sk)) {
290                 if (sk->sleep && waitqueue_active(sk->sleep))
291                         wake_up_interruptible(sk->sleep);
292                 sk_wake_async(sk, 2, POLL_OUT);
293         }
294         read_unlock(&sk->callback_lock);
295 }
296
297 /* When dgram socket disconnects (or changes its peer), we clear its receive
298  * queue of packets arrived from previous peer. First, it allows to do
299  * flow control based only on wmem_alloc; second, sk connected to peer
300  * may receive messages only from that peer. */
301 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
302 {
303         if (skb_queue_len(&sk->receive_queue)) {
304                 skb_queue_purge(&sk->receive_queue);
305                 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
306
307                 /* If one link of bidirectional dgram pipe is disconnected,
308                  * we signal error. Messages are lost. Do not make this,
309                  * when peer was not connected to us.
310                  */
311                 if (!other->dead && unix_peer(other) == sk) {
312                         other->err = ECONNRESET;
313                         other->error_report(other);
314                 }
315         }
316 }
317
318 static void unix_sock_destructor(struct sock *sk)
319 {
320         skb_queue_purge(&sk->receive_queue);
321
322         BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
323         BUG_TRAP(sk->protinfo.af_unix.list==NULL);
324         BUG_TRAP(sk->socket==NULL);
325         if (sk->dead==0) {
326                 printk("Attempt to release alive unix socket: %p\n", sk);
327                 return;
328         }
329
330         if (sk->protinfo.af_unix.addr)
331                 unix_release_addr(sk->protinfo.af_unix.addr);
332
333         atomic_dec(&unix_nr_socks);
334 #ifdef UNIX_REFCNT_DEBUG
335         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
336 #endif
337         MOD_DEC_USE_COUNT;
338 }
339
340 static int unix_release_sock (unix_socket *sk, int embrion)
341 {
342         struct dentry *dentry;
343         struct vfsmount *mnt;
344         unix_socket *skpair;
345         struct sk_buff *skb;
346         int state;
347
348         unix_remove_socket(sk);
349
350         /* Clear state */
351         unix_state_wlock(sk);
352         sock_orphan(sk);
353         sk->shutdown = SHUTDOWN_MASK;
354         dentry = sk->protinfo.af_unix.dentry;
355         sk->protinfo.af_unix.dentry=NULL;
356         mnt = sk->protinfo.af_unix.mnt;
357         sk->protinfo.af_unix.mnt=NULL;
358         state = sk->state;
359         sk->state = TCP_CLOSE;
360         unix_state_wunlock(sk);
361
362         wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
363
364         skpair=unix_peer(sk);
365
366         if (skpair!=NULL) {
367                 if (sk->type==SOCK_STREAM) {
368                         unix_state_wlock(skpair);
369                         skpair->shutdown=SHUTDOWN_MASK; /* No more writes*/
370                         if (!skb_queue_empty(&sk->receive_queue) || embrion)
371                                 skpair->err = ECONNRESET;
372                         unix_state_wunlock(skpair);
373                         skpair->state_change(skpair);
374                         read_lock(&skpair->callback_lock);
375                         sk_wake_async(skpair,1,POLL_HUP);
376                         read_unlock(&skpair->callback_lock);
377                 }
378                 sock_put(skpair); /* It may now die */
379                 unix_peer(sk) = NULL;
380         }
381
382         /* Try to flush out this socket. Throw out buffers at least */
383
384         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
385         {
386                 if (state==TCP_LISTEN)
387                         unix_release_sock(skb->sk, 1);
388                 /* passed fds are erased in the kfree_skb hook        */
389                 kfree_skb(skb);
390         }
391
392         if (dentry) {
393                 dput(dentry);
394                 mntput(mnt);
395         }
396
397         sock_put(sk);
398
399         /* ---- Socket is dead now and most probably destroyed ---- */
400
401         /*
402          * Fixme: BSD difference: In BSD all sockets connected to use get
403          *        ECONNRESET and we die on the spot. In Linux we behave
404          *        like files and pipes do and wait for the last
405          *        dereference.
406          *
407          * Can't we simply set sock->err?
408          *
409          *        What the above comment does talk about? --ANK(980817)
410          */
411
412         if (atomic_read(&unix_tot_inflight))
413                 unix_gc();              /* Garbage collect fds */       
414
415         return 0;
416 }
417
418 static int unix_listen(struct socket *sock, int backlog)
419 {
420         int err;
421         struct sock *sk = sock->sk;
422
423         err = -EOPNOTSUPP;
424         if (sock->type!=SOCK_STREAM)
425                 goto out;                       /* Only stream sockets accept */
426         err = -EINVAL;
427         if (!sk->protinfo.af_unix.addr)
428                 goto out;                       /* No listens on an unbound socket */
429         unix_state_wlock(sk);
430         if (sk->state != TCP_CLOSE && sk->state != TCP_LISTEN)
431                 goto out_unlock;
432         if (backlog > sk->max_ack_backlog)
433                 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
434         sk->max_ack_backlog=backlog;
435         sk->state=TCP_LISTEN;
436         /* set credentials so connect can copy them */
437         sk->peercred.pid = current->pid;
438         sk->peercred.uid = current->euid;
439         sk->peercred.gid = current->egid;
440         err = 0;
441
442 out_unlock:
443         unix_state_wunlock(sk);
444 out:
445         return err;
446 }
447
448 extern struct proto_ops unix_stream_ops;
449 extern struct proto_ops unix_dgram_ops;
450
451 static struct sock * unix_create1(struct socket *sock)
452 {
453         struct sock *sk;
454
455         if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
456                 return NULL;
457
458         MOD_INC_USE_COUNT;
459         sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1);
460         if (!sk) {
461                 MOD_DEC_USE_COUNT;
462                 return NULL;
463         }
464
465         atomic_inc(&unix_nr_socks);
466
467         sock_init_data(sock,sk);
468
469         sk->write_space         =       unix_write_space;
470
471         sk->max_ack_backlog = sysctl_unix_max_dgram_qlen;
472         sk->destruct = unix_sock_destructor;
473         sk->protinfo.af_unix.dentry=NULL;
474         sk->protinfo.af_unix.mnt=NULL;
475         sk->protinfo.af_unix.lock = RW_LOCK_UNLOCKED;
476         atomic_set(&sk->protinfo.af_unix.inflight, sock ? 0 : -1);
477         init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */
478         init_waitqueue_head(&sk->protinfo.af_unix.peer_wait);
479         sk->protinfo.af_unix.list=NULL;
480         unix_insert_socket(&unix_sockets_unbound, sk);
481
482         return sk;
483 }
484
485 static int unix_create(struct socket *sock, int protocol)
486 {
487         if (protocol && protocol != PF_UNIX)
488                 return -EPROTONOSUPPORT;
489
490         sock->state = SS_UNCONNECTED;
491
492         switch (sock->type) {
493         case SOCK_STREAM:
494                 sock->ops = &unix_stream_ops;
495                 break;
496                 /*
497                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
498                  *      nothing uses it.
499                  */
500         case SOCK_RAW:
501                 sock->type=SOCK_DGRAM;
502         case SOCK_DGRAM:
503                 sock->ops = &unix_dgram_ops;
504                 break;
505         default:
506                 return -ESOCKTNOSUPPORT;
507         }
508
509         return unix_create1(sock) ? 0 : -ENOMEM;
510 }
511
512 static int unix_release(struct socket *sock)
513 {
514         unix_socket *sk = sock->sk;
515
516         if (!sk)
517                 return 0;
518
519         sock->sk = NULL;
520
521         return unix_release_sock (sk, 0);
522 }
523
524 static int unix_autobind(struct socket *sock)
525 {
526         struct sock *sk = sock->sk;
527         static u32 ordernum = 1;
528         struct unix_address * addr;
529         int err;
530
531         down(&sk->protinfo.af_unix.readsem);
532
533         err = 0;
534         if (sk->protinfo.af_unix.addr)
535                 goto out;
536
537         err = -ENOMEM;
538         addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
539         if (!addr)
540                 goto out;
541
542         memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
543         addr->name->sun_family = AF_UNIX;
544         atomic_set(&addr->refcnt, 1);
545
546 retry:
547         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
548         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
549
550         write_lock(&unix_table_lock);
551         ordernum = (ordernum+1)&0xFFFFF;
552
553         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
554                                       addr->hash)) {
555                 write_unlock(&unix_table_lock);
556                 /* Sanity yield. It is unusual case, but yet... */
557                 if (!(ordernum&0xFF))
558                         yield();
559                 goto retry;
560         }
561         addr->hash ^= sk->type;
562
563         __unix_remove_socket(sk);
564         sk->protinfo.af_unix.addr = addr;
565         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
566         write_unlock(&unix_table_lock);
567         err = 0;
568
569 out:
570         up(&sk->protinfo.af_unix.readsem);
571         return err;
572 }
573
574 static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len,
575                                     int type, unsigned hash, int *error)
576 {
577         unix_socket *u;
578         struct nameidata nd;
579         int err = 0;
580         
581         if (sunname->sun_path[0]) {
582                 if (path_init(sunname->sun_path, 
583                               LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))
584                         err = path_walk(sunname->sun_path, &nd);
585                 if (err)
586                         goto fail;
587                 err = permission(nd.dentry->d_inode,MAY_WRITE);
588                 if (err)
589                         goto put_fail;
590
591                 err = -ECONNREFUSED;
592                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
593                         goto put_fail;
594                 u=unix_find_socket_byinode(nd.dentry->d_inode);
595                 if (!u)
596                         goto put_fail;
597
598                 if (u->type == type)
599                         UPDATE_ATIME(nd.dentry->d_inode);
600
601                 path_release(&nd);
602
603                 err=-EPROTOTYPE;
604                 if (u->type != type) {
605                         sock_put(u);
606                         goto fail;
607                 }
608         } else {
609                 err = -ECONNREFUSED;
610                 u=unix_find_socket_byname(sunname, len, type, hash);
611                 if (u) {
612                         struct dentry *dentry;
613                         dentry = u->protinfo.af_unix.dentry;
614                         if (dentry)
615                                 UPDATE_ATIME(dentry->d_inode);
616                 } else
617                         goto fail;
618         }
619         return u;
620
621 put_fail:
622         path_release(&nd);
623 fail:
624         *error=err;
625         return NULL;
626 }
627
628
629 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
630 {
631         struct sock *sk = sock->sk;
632         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
633         struct dentry * dentry = NULL;
634         struct nameidata nd;
635         int err;
636         unsigned hash;
637         struct unix_address *addr;
638         unix_socket **list;
639
640         err = -EINVAL;
641         if (sunaddr->sun_family != AF_UNIX)
642                 goto out;
643
644         if (addr_len==sizeof(short)) {
645                 err = unix_autobind(sock);
646                 goto out;
647         }
648
649         err = unix_mkname(sunaddr, addr_len, &hash);
650         if (err < 0)
651                 goto out;
652         addr_len = err;
653
654         down(&sk->protinfo.af_unix.readsem);
655
656         err = -EINVAL;
657         if (sk->protinfo.af_unix.addr)
658                 goto out_up;
659
660         err = -ENOMEM;
661         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
662         if (!addr)
663                 goto out_up;
664
665         memcpy(addr->name, sunaddr, addr_len);
666         addr->len = addr_len;
667         addr->hash = hash^sk->type;
668         atomic_set(&addr->refcnt, 1);
669
670         if (sunaddr->sun_path[0]) {
671                 unsigned int mode;
672                 err = 0;
673                 /*
674                  * Get the parent directory, calculate the hash for last
675                  * component.
676                  */
677                 if (path_init(sunaddr->sun_path, LOOKUP_PARENT, &nd))
678                         err = path_walk(sunaddr->sun_path, &nd);
679                 if (err)
680                         goto out_mknod_parent;
681                 /*
682                  * Yucky last component or no last component at all?
683                  * (foo/., foo/.., /////)
684                  */
685                 err = -EEXIST;
686                 if (nd.last_type != LAST_NORM)
687                         goto out_mknod;
688                 /*
689                  * Lock the directory.
690                  */
691                 down(&nd.dentry->d_inode->i_sem);
692                 /*
693                  * Do the final lookup.
694                  */
695                 dentry = lookup_hash(&nd.last, nd.dentry);
696                 err = PTR_ERR(dentry);
697                 if (IS_ERR(dentry))
698                         goto out_mknod_unlock;
699                 err = -ENOENT;
700                 /*
701                  * Special case - lookup gave negative, but... we had foo/bar/
702                  * From the vfs_mknod() POV we just have a negative dentry -
703                  * all is fine. Let's be bastards - you had / on the end, you've
704                  * been asking for (non-existent) directory. -ENOENT for you.
705                  */
706                 if (nd.last.name[nd.last.len] && !dentry->d_inode)
707                         goto out_mknod_dput;
708                 /*
709                  * All right, let's create it.
710                  */
711                 mode = S_IFSOCK | (sock->inode->i_mode & ~current->fs->umask);
712                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
713                 if (err)
714                         goto out_mknod_dput;
715                 up(&nd.dentry->d_inode->i_sem);
716                 dput(nd.dentry);
717                 nd.dentry = dentry;
718
719                 addr->hash = UNIX_HASH_SIZE;
720         }
721
722         write_lock(&unix_table_lock);
723
724         if (!sunaddr->sun_path[0]) {
725                 err = -EADDRINUSE;
726                 if (__unix_find_socket_byname(sunaddr, addr_len,
727                                               sk->type, hash)) {
728                         unix_release_addr(addr);
729                         goto out_unlock;
730                 }
731
732                 list = &unix_socket_table[addr->hash];
733         } else {
734                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
735                 sk->protinfo.af_unix.dentry = nd.dentry;
736                 sk->protinfo.af_unix.mnt = nd.mnt;
737         }
738
739         err = 0;
740         __unix_remove_socket(sk);
741         sk->protinfo.af_unix.addr = addr;
742         __unix_insert_socket(list, sk);
743
744 out_unlock:
745         write_unlock(&unix_table_lock);
746 out_up:
747         up(&sk->protinfo.af_unix.readsem);
748 out:
749         return err;
750
751 out_mknod_dput:
752         dput(dentry);
753 out_mknod_unlock:
754         up(&nd.dentry->d_inode->i_sem);
755 out_mknod:
756         path_release(&nd);
757 out_mknod_parent:
758         if (err==-EEXIST)
759                 err=-EADDRINUSE;
760         unix_release_addr(addr);
761         goto out_up;
762 }
763
764 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
765                               int alen, int flags)
766 {
767         struct sock *sk = sock->sk;
768         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
769         struct sock *other;
770         unsigned hash;
771         int err;
772
773         if (addr->sa_family != AF_UNSPEC) {
774                 err = unix_mkname(sunaddr, alen, &hash);
775                 if (err < 0)
776                         goto out;
777                 alen = err;
778
779                 if (sock->passcred && !sk->protinfo.af_unix.addr &&
780                     (err = unix_autobind(sock)) != 0)
781                         goto out;
782
783                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
784                 if (!other)
785                         goto out;
786
787                 unix_state_wlock(sk);
788
789                 err = -EPERM;
790                 if (!unix_may_send(sk, other))
791                         goto out_unlock;
792         } else {
793                 /*
794                  *      1003.1g breaking connected state with AF_UNSPEC
795                  */
796                 other = NULL;
797                 unix_state_wlock(sk);
798         }
799
800         /*
801          * If it was connected, reconnect.
802          */
803         if (unix_peer(sk)) {
804                 struct sock *old_peer = unix_peer(sk);
805                 unix_peer(sk)=other;
806                 unix_state_wunlock(sk);
807
808                 if (other != old_peer)
809                         unix_dgram_disconnected(sk, old_peer);
810                 sock_put(old_peer);
811         } else {
812                 unix_peer(sk)=other;
813                 unix_state_wunlock(sk);
814         }
815         return 0;
816
817 out_unlock:
818         unix_state_wunlock(sk);
819         sock_put(other);
820 out:
821         return err;
822 }
823
824 static long unix_wait_for_peer(unix_socket *other, long timeo)
825 {
826         int sched;
827         DECLARE_WAITQUEUE(wait, current);
828
829         __set_current_state(TASK_INTERRUPTIBLE);
830         add_wait_queue_exclusive(&other->protinfo.af_unix.peer_wait, &wait);
831
832         sched = (!other->dead &&
833                  !(other->shutdown&RCV_SHUTDOWN) &&
834                  skb_queue_len(&other->receive_queue) > other->max_ack_backlog);
835
836         unix_state_runlock(other);
837
838         if (sched)
839                 timeo = schedule_timeout(timeo);
840
841         __set_current_state(TASK_RUNNING);
842         remove_wait_queue(&other->protinfo.af_unix.peer_wait, &wait);
843         return timeo;
844 }
845
846 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
847                                int addr_len, int flags)
848 {
849         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
850         struct sock *sk = sock->sk;
851         struct sock *newsk = NULL;
852         unix_socket *other = NULL;
853         struct sk_buff *skb = NULL;
854         unsigned hash;
855         int st;
856         int err;
857         long timeo;
858
859         err = unix_mkname(sunaddr, addr_len, &hash);
860         if (err < 0)
861                 goto out;
862         addr_len = err;
863
864         if (sock->passcred && !sk->protinfo.af_unix.addr &&
865             (err = unix_autobind(sock)) != 0)
866                 goto out;
867
868         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
869
870         /* First of all allocate resources.
871            If we will make it after state is locked,
872            we will have to recheck all again in any case.
873          */
874
875         err = -ENOMEM;
876
877         /* create new sock for complete connection */
878         newsk = unix_create1(NULL);
879         if (newsk == NULL)
880                 goto out;
881
882         /* Allocate skb for sending to listening sock */
883         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
884         if (skb == NULL)
885                 goto out;
886
887 restart:
888         /*  Find listening sock. */
889         other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err);
890         if (!other)
891                 goto out;
892
893         /* Latch state of peer */
894         unix_state_rlock(other);
895
896         /* Apparently VFS overslept socket death. Retry. */
897         if (other->dead) {
898                 unix_state_runlock(other);
899                 sock_put(other);
900                 goto restart;
901         }
902
903         err = -ECONNREFUSED;
904         if (other->state != TCP_LISTEN)
905                 goto out_unlock;
906
907         if (skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
908                 err = -EAGAIN;
909                 if (!timeo)
910                         goto out_unlock;
911
912                 timeo = unix_wait_for_peer(other, timeo);
913
914                 err = sock_intr_errno(timeo);
915                 if (signal_pending(current))
916                         goto out;
917                 sock_put(other);
918                 goto restart;
919         }
920
921         /* Latch our state.
922
923            It is tricky place. We need to grab write lock and cannot
924            drop lock on peer. It is dangerous because deadlock is
925            possible. Connect to self case and simultaneous
926            attempt to connect are eliminated by checking socket
927            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
928            check this before attempt to grab lock.
929
930            Well, and we have to recheck the state after socket locked.
931          */
932         st = sk->state;
933
934         switch (st) {
935         case TCP_CLOSE:
936                 /* This is ok... continue with connect */
937                 break;
938         case TCP_ESTABLISHED:
939                 /* Socket is already connected */
940                 err = -EISCONN;
941                 goto out_unlock;
942         default:
943                 err = -EINVAL;
944                 goto out_unlock;
945         }
946
947         unix_state_wlock(sk);
948
949         if (sk->state != st) {
950                 unix_state_wunlock(sk);
951                 unix_state_runlock(other);
952                 sock_put(other);
953                 goto restart;
954         }
955
956         /* The way is open! Fastly set all the necessary fields... */
957
958         sock_hold(sk);
959         unix_peer(newsk)=sk;
960         newsk->state=TCP_ESTABLISHED;
961         newsk->type=SOCK_STREAM;
962         newsk->peercred.pid = current->pid;
963         newsk->peercred.uid = current->euid;
964         newsk->peercred.gid = current->egid;
965         newsk->sleep = &newsk->protinfo.af_unix.peer_wait;
966
967         /* copy address information from listening to new sock*/
968         if (other->protinfo.af_unix.addr)
969         {
970                 atomic_inc(&other->protinfo.af_unix.addr->refcnt);
971                 newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr;
972         }
973         if (other->protinfo.af_unix.dentry) {
974                 newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry);
975                 newsk->protinfo.af_unix.mnt=mntget(other->protinfo.af_unix.mnt);
976         }
977
978         /* Set credentials */
979         sk->peercred = other->peercred;
980
981         sock_hold(newsk);
982         unix_peer(sk)=newsk;
983         sock->state=SS_CONNECTED;
984         sk->state=TCP_ESTABLISHED;
985
986         unix_state_wunlock(sk);
987
988         /* take ten and and send info to listening sock */
989         spin_lock(&other->receive_queue.lock);
990         __skb_queue_tail(&other->receive_queue,skb);
991         /* Undo artificially decreased inflight after embrion
992          * is installed to listening socket. */
993         atomic_inc(&newsk->protinfo.af_unix.inflight);
994         spin_unlock(&other->receive_queue.lock);
995         unix_state_runlock(other);
996         other->data_ready(other, 0);
997         sock_put(other);
998         return 0;
999
1000 out_unlock:
1001         if (other)
1002                 unix_state_runlock(other);
1003
1004 out:
1005         if (skb)
1006                 kfree_skb(skb);
1007         if (newsk)
1008                 unix_release_sock(newsk, 0);
1009         if (other)
1010                 sock_put(other);
1011         return err;
1012 }
1013
1014 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1015 {
1016         struct sock *ska=socka->sk, *skb = sockb->sk;
1017
1018         /* Join our sockets back to back */
1019         sock_hold(ska);
1020         sock_hold(skb);
1021         unix_peer(ska)=skb;
1022         unix_peer(skb)=ska;
1023         ska->peercred.pid = skb->peercred.pid = current->pid;
1024         ska->peercred.uid = skb->peercred.uid = current->euid;
1025         ska->peercred.gid = skb->peercred.gid = current->egid;
1026
1027         if (ska->type != SOCK_DGRAM)
1028         {
1029                 ska->state=TCP_ESTABLISHED;
1030                 skb->state=TCP_ESTABLISHED;
1031                 socka->state=SS_CONNECTED;
1032                 sockb->state=SS_CONNECTED;
1033         }
1034         return 0;
1035 }
1036
1037 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1038 {
1039         unix_socket *sk = sock->sk;
1040         unix_socket *tsk;
1041         struct sk_buff *skb;
1042         int err;
1043
1044         err = -EOPNOTSUPP;
1045         if (sock->type!=SOCK_STREAM)
1046                 goto out;
1047
1048         err = -EINVAL;
1049         if (sk->state!=TCP_LISTEN)
1050                 goto out;
1051
1052         /* If socket state is TCP_LISTEN it cannot change (for now...),
1053          * so that no locks are necessary.
1054          */
1055
1056         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1057         if (!skb) {
1058                 /* This means receive shutdown. */
1059                 if (err == 0)
1060                         err = -EINVAL;
1061                 goto out;
1062         }
1063
1064         tsk = skb->sk;
1065         skb_free_datagram(sk, skb);
1066         wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1067
1068         /* attach accepted sock to socket */
1069         unix_state_wlock(tsk);
1070         newsock->state = SS_CONNECTED;
1071         sock_graft(tsk, newsock);
1072         unix_state_wunlock(tsk);
1073         return 0;
1074
1075 out:
1076         return err;
1077 }
1078
1079
1080 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1081 {
1082         struct sock *sk = sock->sk;
1083         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1084         int err = 0;
1085
1086         if (peer) {
1087                 sk = unix_peer_get(sk);
1088
1089                 err = -ENOTCONN;
1090                 if (!sk)
1091                         goto out;
1092                 err = 0;
1093         } else {
1094                 sock_hold(sk);
1095         }
1096
1097         unix_state_rlock(sk);
1098         if (!sk->protinfo.af_unix.addr) {
1099                 sunaddr->sun_family = AF_UNIX;
1100                 sunaddr->sun_path[0] = 0;
1101                 *uaddr_len = sizeof(short);
1102         } else {
1103                 struct unix_address *addr = sk->protinfo.af_unix.addr;
1104
1105                 *uaddr_len = addr->len;
1106                 memcpy(sunaddr, addr->name, *uaddr_len);
1107         }
1108         unix_state_runlock(sk);
1109         sock_put(sk);
1110 out:
1111         return err;
1112 }
1113
1114 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1115 {
1116         int i;
1117
1118         scm->fp = UNIXCB(skb).fp;
1119         skb->destructor = sock_wfree;
1120         UNIXCB(skb).fp = NULL;
1121
1122         for (i=scm->fp->count-1; i>=0; i--)
1123                 unix_notinflight(scm->fp->fp[i]);
1124 }
1125
1126 static void unix_destruct_fds(struct sk_buff *skb)
1127 {
1128         struct scm_cookie scm;
1129         memset(&scm, 0, sizeof(scm));
1130         unix_detach_fds(&scm, skb);
1131
1132         /* Alas, it calls VFS */
1133         /* So fscking what? fput() had been SMP-safe since the last Summer */
1134         scm_destroy(&scm);
1135         sock_wfree(skb);
1136 }
1137
1138 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1139 {
1140         int i;
1141         for (i=scm->fp->count-1; i>=0; i--)
1142                 unix_inflight(scm->fp->fp[i]);
1143         UNIXCB(skb).fp = scm->fp;
1144         skb->destructor = unix_destruct_fds;
1145         scm->fp = NULL;
1146 }
1147
1148 /*
1149  *      Send AF_UNIX data.
1150  */
1151
1152 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1153                               struct scm_cookie *scm)
1154 {
1155         struct sock *sk = sock->sk;
1156         struct sockaddr_un *sunaddr=msg->msg_name;
1157         unix_socket *other = NULL;
1158         int namelen = 0; /* fake GCC */
1159         int err;
1160         unsigned hash;
1161         struct sk_buff *skb;
1162         long timeo;
1163
1164         err = -EOPNOTSUPP;
1165         if (msg->msg_flags&MSG_OOB)
1166                 goto out;
1167
1168         if (msg->msg_namelen) {
1169                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1170                 if (err < 0)
1171                         goto out;
1172                 namelen = err;
1173         } else {
1174                 sunaddr = NULL;
1175                 err = -ENOTCONN;
1176                 other = unix_peer_get(sk);
1177                 if (!other)
1178                         goto out;
1179         }
1180
1181         if (sock->passcred && !sk->protinfo.af_unix.addr &&
1182             (err = unix_autobind(sock)) != 0)
1183                 goto out;
1184
1185         err = -EMSGSIZE;
1186         if ((unsigned)len > sk->sndbuf - 32)
1187                 goto out;
1188
1189         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1190         if (skb==NULL)
1191                 goto out;
1192
1193         memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1194         if (scm->fp)
1195                 unix_attach_fds(scm, skb);
1196
1197         skb->h.raw = skb->data;
1198         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1199         if (err)
1200                 goto out_free;
1201
1202         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1203
1204 restart:
1205         if (!other) {
1206                 err = -ECONNRESET;
1207                 if (sunaddr == NULL)
1208                         goto out_free;
1209
1210                 other = unix_find_other(sunaddr, namelen, sk->type, hash, &err);
1211                 if (other==NULL)
1212                         goto out_free;
1213         }
1214
1215         unix_state_rlock(other);
1216         err = -EPERM;
1217         if (!unix_may_send(sk, other))
1218                 goto out_unlock;
1219
1220         if (other->dead) {
1221                 /*
1222                  *      Check with 1003.1g - what should
1223                  *      datagram error
1224                  */
1225                 unix_state_runlock(other);
1226                 sock_put(other);
1227
1228                 err = 0;
1229                 unix_state_wlock(sk);
1230                 if (unix_peer(sk) == other) {
1231                         unix_peer(sk)=NULL;
1232                         unix_state_wunlock(sk);
1233
1234                         unix_dgram_disconnected(sk, other);
1235                         sock_put(other);
1236                         err = -ECONNREFUSED;
1237                 } else {
1238                         unix_state_wunlock(sk);
1239                 }
1240
1241                 other = NULL;
1242                 if (err)
1243                         goto out_free;
1244                 goto restart;
1245         }
1246
1247         err = -EPIPE;
1248         if (other->shutdown&RCV_SHUTDOWN)
1249                 goto out_unlock;
1250
1251         if (unix_peer(other) != sk &&
1252             skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
1253                 if (!timeo) {
1254                         err = -EAGAIN;
1255                         goto out_unlock;
1256                 }
1257
1258                 timeo = unix_wait_for_peer(other, timeo);
1259
1260                 err = sock_intr_errno(timeo);
1261                 if (signal_pending(current))
1262                         goto out_free;
1263
1264                 goto restart;
1265         }
1266
1267         skb_queue_tail(&other->receive_queue, skb);
1268         unix_state_runlock(other);
1269         other->data_ready(other, len);
1270         sock_put(other);
1271         return len;
1272
1273 out_unlock:
1274         unix_state_runlock(other);
1275 out_free:
1276         kfree_skb(skb);
1277 out:
1278         if (other)
1279                 sock_put(other);
1280         return err;
1281 }
1282
1283                 
1284 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1285                                struct scm_cookie *scm)
1286 {
1287         struct sock *sk = sock->sk;
1288         unix_socket *other = NULL;
1289         struct sockaddr_un *sunaddr=msg->msg_name;
1290         int err,size;
1291         struct sk_buff *skb;
1292         int sent=0;
1293
1294         err = -EOPNOTSUPP;
1295         if (msg->msg_flags&MSG_OOB)
1296                 goto out_err;
1297
1298         if (msg->msg_namelen) {
1299                 err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
1300                 goto out_err;
1301         } else {
1302                 sunaddr = NULL;
1303                 err = -ENOTCONN;
1304                 other = unix_peer_get(sk);
1305                 if (!other)
1306                         goto out_err;
1307         }
1308
1309         if (sk->shutdown&SEND_SHUTDOWN)
1310                 goto pipe_err;
1311
1312         while(sent < len)
1313         {
1314                 /*
1315                  *      Optimisation for the fact that under 0.01% of X messages typically
1316                  *      need breaking up.
1317                  */
1318
1319                 size=len-sent;
1320
1321                 /* Keep two messages in the pipe so it schedules better */
1322                 if (size > sk->sndbuf/2 - 64)
1323                         size = sk->sndbuf/2 - 64;
1324
1325                 if (size > SKB_MAX_ALLOC)
1326                         size = SKB_MAX_ALLOC;
1327                         
1328                 /*
1329                  *      Grab a buffer
1330                  */
1331                  
1332                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1333
1334                 if (skb==NULL)
1335                         goto out_err;
1336
1337                 /*
1338                  *      If you pass two values to the sock_alloc_send_skb
1339                  *      it tries to grab the large buffer with GFP_NOFS
1340                  *      (which can fail easily), and if it fails grab the
1341                  *      fallback size buffer which is under a page and will
1342                  *      succeed. [Alan]
1343                  */
1344                 size = min_t(int, size, skb_tailroom(skb));
1345
1346                 memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1347                 if (scm->fp)
1348                         unix_attach_fds(scm, skb);
1349
1350                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1351                         kfree_skb(skb);
1352                         goto out_err;
1353                 }
1354
1355                 unix_state_rlock(other);
1356
1357                 if (other->dead || (other->shutdown & RCV_SHUTDOWN))
1358                         goto pipe_err_free;
1359
1360                 skb_queue_tail(&other->receive_queue, skb);
1361                 unix_state_runlock(other);
1362                 other->data_ready(other, size);
1363                 sent+=size;
1364         }
1365         sock_put(other);
1366         return sent;
1367
1368 pipe_err_free:
1369         unix_state_runlock(other);
1370         kfree_skb(skb);
1371 pipe_err:
1372         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1373                 send_sig(SIGPIPE,current,0);
1374         err = -EPIPE;
1375 out_err:
1376         if (other)
1377                 sock_put(other);
1378         return sent ? : err;
1379 }
1380
1381 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1382 {
1383         msg->msg_namelen = 0;
1384         if (sk->protinfo.af_unix.addr) {
1385                 msg->msg_namelen=sk->protinfo.af_unix.addr->len;
1386                 memcpy(msg->msg_name,
1387                        sk->protinfo.af_unix.addr->name,
1388                        sk->protinfo.af_unix.addr->len);
1389         }
1390 }
1391
1392 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1393                               int flags, struct scm_cookie *scm)
1394 {
1395         struct sock *sk = sock->sk;
1396         int noblock = flags & MSG_DONTWAIT;
1397         struct sk_buff *skb;
1398         int err;
1399
1400         err = -EOPNOTSUPP;
1401         if (flags&MSG_OOB)
1402                 goto out;
1403
1404         msg->msg_namelen = 0;
1405
1406         down(&sk->protinfo.af_unix.readsem);
1407
1408         skb = skb_recv_datagram(sk, flags, noblock, &err);
1409         if (!skb)
1410                 goto out_unlock;
1411
1412         wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1413
1414         if (msg->msg_name)
1415                 unix_copy_addr(msg, skb->sk);
1416
1417         if (size > skb->len)
1418                 size = skb->len;
1419         else if (size < skb->len)
1420                 msg->msg_flags |= MSG_TRUNC;
1421
1422         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1423         if (err)
1424                 goto out_free;
1425
1426         scm->creds = *UNIXCREDS(skb);
1427
1428         if (!(flags & MSG_PEEK))
1429         {
1430                 if (UNIXCB(skb).fp)
1431                         unix_detach_fds(scm, skb);
1432         }
1433         else 
1434         {
1435                 /* It is questionable: on PEEK we could:
1436                    - do not return fds - good, but too simple 8)
1437                    - return fds, and do not return them on read (old strategy,
1438                      apparently wrong)
1439                    - clone fds (I choosed it for now, it is the most universal
1440                      solution)
1441                 
1442                    POSIX 1003.1g does not actually define this clearly
1443                    at all. POSIX 1003.1g doesn't define a lot of things
1444                    clearly however!                  
1445                    
1446                 */
1447                 if (UNIXCB(skb).fp)
1448                         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1449         }
1450         err = size;
1451
1452 out_free:
1453         skb_free_datagram(sk,skb);
1454 out_unlock:
1455         up(&sk->protinfo.af_unix.readsem);
1456 out:
1457         return err;
1458 }
1459
1460 /*
1461  *      Sleep until data has arrive. But check for races..
1462  */
1463  
1464 static long unix_stream_data_wait(unix_socket * sk, long timeo)
1465 {
1466         DECLARE_WAITQUEUE(wait, current);
1467
1468         unix_state_rlock(sk);
1469
1470         add_wait_queue(sk->sleep, &wait);
1471
1472         for (;;) {
1473                 set_current_state(TASK_INTERRUPTIBLE);
1474
1475                 if (skb_queue_len(&sk->receive_queue) ||
1476                     sk->err ||
1477                     (sk->shutdown & RCV_SHUTDOWN) ||
1478                     signal_pending(current) ||
1479                     !timeo)
1480                         break;
1481
1482                 set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1483                 unix_state_runlock(sk);
1484                 timeo = schedule_timeout(timeo);
1485                 unix_state_rlock(sk);
1486                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1487         }
1488
1489         __set_current_state(TASK_RUNNING);
1490         remove_wait_queue(sk->sleep, &wait);
1491         unix_state_runlock(sk);
1492         return timeo;
1493 }
1494
1495
1496
1497 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1498                                int flags, struct scm_cookie *scm)
1499 {
1500         struct sock *sk = sock->sk;
1501         struct sockaddr_un *sunaddr=msg->msg_name;
1502         int copied = 0;
1503         int check_creds = 0;
1504         int target;
1505         int err = 0;
1506         long timeo;
1507
1508         err = -EINVAL;
1509         if (sk->state != TCP_ESTABLISHED)
1510                 goto out;
1511
1512         err = -EOPNOTSUPP;
1513         if (flags&MSG_OOB)
1514                 goto out;
1515
1516         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1517         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1518
1519         msg->msg_namelen = 0;
1520
1521         /* Lock the socket to prevent queue disordering
1522          * while sleeps in memcpy_tomsg
1523          */
1524
1525         down(&sk->protinfo.af_unix.readsem);
1526
1527         do
1528         {
1529                 int chunk;
1530                 struct sk_buff *skb;
1531
1532                 skb=skb_dequeue(&sk->receive_queue);
1533                 if (skb==NULL)
1534                 {
1535                         if (copied >= target)
1536                                 break;
1537
1538                         /*
1539                          *      POSIX 1003.1g mandates this order.
1540                          */
1541                          
1542                         if ((err = sock_error(sk)) != 0)
1543                                 break;
1544                         if (sk->shutdown & RCV_SHUTDOWN)
1545                                 break;
1546                         err = -EAGAIN;
1547                         if (!timeo)
1548                                 break;
1549                         up(&sk->protinfo.af_unix.readsem);
1550
1551                         timeo = unix_stream_data_wait(sk, timeo);
1552
1553                         if (signal_pending(current)) {
1554                                 err = sock_intr_errno(timeo);
1555                                 goto out;
1556                         }
1557                         down(&sk->protinfo.af_unix.readsem);
1558                         continue;
1559                 }
1560
1561                 if (check_creds) {
1562                         /* Never glue messages from different writers */
1563                         if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
1564                                 skb_queue_head(&sk->receive_queue, skb);
1565                                 break;
1566                         }
1567                 } else {
1568                         /* Copy credentials */
1569                         scm->creds = *UNIXCREDS(skb);
1570                         check_creds = 1;
1571                 }
1572
1573                 /* Copy address just once */
1574                 if (sunaddr)
1575                 {
1576                         unix_copy_addr(msg, skb->sk);
1577                         sunaddr = NULL;
1578                 }
1579
1580                 chunk = min_t(unsigned int, skb->len, size);
1581                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1582                         skb_queue_head(&sk->receive_queue, skb);
1583                         if (copied == 0)
1584                                 copied = -EFAULT;
1585                         break;
1586                 }
1587                 copied += chunk;
1588                 size -= chunk;
1589
1590                 /* Mark read part of skb as used */
1591                 if (!(flags & MSG_PEEK))
1592                 {
1593                         skb_pull(skb, chunk);
1594
1595                         if (UNIXCB(skb).fp)
1596                                 unix_detach_fds(scm, skb);
1597
1598                         /* put the skb back if we didn't use it up.. */
1599                         if (skb->len)
1600                         {
1601                                 skb_queue_head(&sk->receive_queue, skb);
1602                                 break;
1603                         }
1604
1605                         kfree_skb(skb);
1606
1607                         if (scm->fp)
1608                                 break;
1609                 }
1610                 else
1611                 {
1612                         /* It is questionable, see note in unix_dgram_recvmsg.
1613                          */
1614                         if (UNIXCB(skb).fp)
1615                                 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1616
1617                         /* put message back and return */
1618                         skb_queue_head(&sk->receive_queue, skb);
1619                         break;
1620                 }
1621         } while (size);
1622
1623         up(&sk->protinfo.af_unix.readsem);
1624 out:
1625         return copied ? : err;
1626 }
1627
1628 static int unix_shutdown(struct socket *sock, int mode)
1629 {
1630         struct sock *sk = sock->sk;
1631         unix_socket *other;
1632
1633         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1634
1635         if (mode) {
1636                 unix_state_wlock(sk);
1637                 sk->shutdown |= mode;
1638                 other=unix_peer(sk);
1639                 if (other)
1640                         sock_hold(other);
1641                 unix_state_wunlock(sk);
1642                 sk->state_change(sk);
1643
1644                 if (other && sk->type == SOCK_STREAM) {
1645                         int peer_mode = 0;
1646
1647                         if (mode&RCV_SHUTDOWN)
1648                                 peer_mode |= SEND_SHUTDOWN;
1649                         if (mode&SEND_SHUTDOWN)
1650                                 peer_mode |= RCV_SHUTDOWN;
1651                         unix_state_wlock(other);
1652                         other->shutdown |= peer_mode;
1653                         unix_state_wunlock(other);
1654                         other->state_change(other);
1655                         read_lock(&other->callback_lock);
1656                         if (peer_mode == SHUTDOWN_MASK)
1657                                 sk_wake_async(other,1,POLL_HUP);
1658                         else if (peer_mode & RCV_SHUTDOWN)
1659                                 sk_wake_async(other,1,POLL_IN);
1660                         read_unlock(&other->callback_lock);
1661                 }
1662                 if (other)
1663                         sock_put(other);
1664         }
1665         return 0;
1666 }
1667
1668 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1669 {
1670         struct sock *sk = sock->sk;
1671         long amount=0;
1672         int err;
1673
1674         switch(cmd)
1675         {
1676                 case SIOCOUTQ:
1677                         amount = atomic_read(&sk->wmem_alloc);
1678                         err = put_user(amount, (int *)arg);
1679                         break;
1680                 case SIOCINQ:
1681                 {
1682                         struct sk_buff *skb;
1683                         if (sk->state==TCP_LISTEN) {
1684                                 err = -EINVAL;
1685                                 break;
1686                         }
1687
1688                         spin_lock(&sk->receive_queue.lock);
1689                         if (sk->type == SOCK_STREAM) {
1690                                 skb_queue_walk(&sk->receive_queue, skb)
1691                                         amount += skb->len;
1692                         } else {
1693                                 if((skb=skb_peek(&sk->receive_queue))!=NULL)
1694                                         amount=skb->len;
1695                         }
1696                         spin_unlock(&sk->receive_queue.lock);
1697                         err = put_user(amount, (int *)arg);
1698                         break;
1699                 }
1700
1701                 default:
1702                         err = dev_ioctl(cmd, (void *)arg);
1703                         break;
1704         }
1705         return err;
1706 }
1707
1708 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1709 {
1710         struct sock *sk = sock->sk;
1711         unsigned int mask;
1712
1713         poll_wait(file, sk->sleep, wait);
1714         mask = 0;
1715
1716         /* exceptional events? */
1717         if (sk->err)
1718                 mask |= POLLERR;
1719         if (sk->shutdown == SHUTDOWN_MASK)
1720                 mask |= POLLHUP;
1721
1722         /* readable? */
1723         if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
1724                 mask |= POLLIN | POLLRDNORM;
1725
1726         /* Connection-based need to check for termination and startup */
1727         if (sk->type == SOCK_STREAM && sk->state==TCP_CLOSE)
1728                 mask |= POLLHUP;
1729
1730         /*
1731          * we set writable also when the other side has shut down the
1732          * connection. This prevents stuck sockets.
1733          */
1734         if (unix_writable(sk))
1735                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1736
1737         return mask;
1738 }
1739
1740
1741 #ifdef CONFIG_PROC_FS
1742 static int unix_read_proc(char *buffer, char **start, off_t offset,
1743                           int length, int *eof, void *data)
1744 {
1745         off_t pos=0;
1746         off_t begin=0;
1747         int len=0;
1748         int i;
1749         unix_socket *s;
1750         
1751         len+= sprintf(buffer,"Num       RefCount Protocol Flags    Type St "
1752             "Inode Path\n");
1753
1754         read_lock(&unix_table_lock);
1755         forall_unix_sockets (i,s)
1756         {
1757                 unix_state_rlock(s);
1758
1759                 len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5lu",
1760                         s,
1761                         atomic_read(&s->refcnt),
1762                         0,
1763                         s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1764                         s->type,
1765                         s->socket ?
1766                         (s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1767                         (s->state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1768                         sock_i_ino(s));
1769
1770                 if (s->protinfo.af_unix.addr)
1771                 {
1772                         buffer[len++] = ' ';
1773                         memcpy(buffer+len, s->protinfo.af_unix.addr->name->sun_path,
1774                                s->protinfo.af_unix.addr->len-sizeof(short));
1775                         if (!UNIX_ABSTRACT(s))
1776                                 len--;
1777                         else
1778                                 buffer[len] = '@';
1779                         len += s->protinfo.af_unix.addr->len - sizeof(short);
1780                 }
1781                 unix_state_runlock(s);
1782
1783                 buffer[len++]='\n';
1784                 
1785                 pos = begin + len;
1786                 if(pos<offset)
1787                 {
1788                         len=0;
1789                         begin=pos;
1790                 }
1791                 if(pos>offset+length)
1792                         goto done;
1793         }
1794         *eof = 1;
1795 done:
1796         read_unlock(&unix_table_lock);
1797         *start=buffer+(offset-begin);
1798         len-=(offset-begin);
1799         if(len>length)
1800                 len=length;
1801         if (len < 0)
1802                 len = 0;
1803         return len;
1804 }
1805 #endif
1806
1807 struct proto_ops unix_stream_ops = {
1808         family:         PF_UNIX,
1809         
1810         release:        unix_release,
1811         bind:           unix_bind,
1812         connect:        unix_stream_connect,
1813         socketpair:     unix_socketpair,
1814         accept:         unix_accept,
1815         getname:        unix_getname,
1816         poll:           unix_poll,
1817         ioctl:          unix_ioctl,
1818         listen:         unix_listen,
1819         shutdown:       unix_shutdown,
1820         setsockopt:     sock_no_setsockopt,
1821         getsockopt:     sock_no_getsockopt,
1822         sendmsg:        unix_stream_sendmsg,
1823         recvmsg:        unix_stream_recvmsg,
1824         mmap:           sock_no_mmap,
1825         sendpage:       sock_no_sendpage,
1826 };
1827
1828 struct proto_ops unix_dgram_ops = {
1829         family:         PF_UNIX,
1830
1831         release:        unix_release,
1832         bind:           unix_bind,
1833         connect:        unix_dgram_connect,
1834         socketpair:     unix_socketpair,
1835         accept:         sock_no_accept,
1836         getname:        unix_getname,
1837         poll:           datagram_poll,
1838         ioctl:          unix_ioctl,
1839         listen:         sock_no_listen,
1840         shutdown:       unix_shutdown,
1841         setsockopt:     sock_no_setsockopt,
1842         getsockopt:     sock_no_getsockopt,
1843         sendmsg:        unix_dgram_sendmsg,
1844         recvmsg:        unix_dgram_recvmsg,
1845         mmap:           sock_no_mmap,
1846         sendpage:       sock_no_sendpage,
1847 };
1848
1849 struct net_proto_family unix_family_ops = {
1850         family:         PF_UNIX,
1851         create:         unix_create
1852 };
1853
1854 #ifdef CONFIG_SYSCTL
1855 extern void unix_sysctl_register(void);
1856 extern void unix_sysctl_unregister(void);
1857 #else
1858 static inline void unix_sysctl_register(void) {}
1859 static inline void unix_sysctl_unregister(void) {}
1860 #endif
1861
1862 static char banner[] __initdata = KERN_INFO "NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.\n";
1863
1864 static int __init af_unix_init(void)
1865 {
1866         struct sk_buff *dummy_skb;
1867
1868         printk(banner);
1869         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb))
1870         {
1871                 printk(KERN_CRIT "unix_proto_init: panic\n");
1872                 return -1;
1873         }
1874         sock_register(&unix_family_ops);
1875 #ifdef CONFIG_PROC_FS
1876         create_proc_read_entry("net/unix", 0, 0, unix_read_proc, NULL);
1877 #endif
1878         unix_sysctl_register();
1879         return 0;
1880 }
1881
1882 static void __exit af_unix_exit(void)
1883 {
1884         sock_unregister(PF_UNIX);
1885         unix_sysctl_unregister();
1886         remove_proc_entry("net/unix", 0);
1887 }
1888
1889 module_init(af_unix_init);
1890 module_exit(af_unix_exit);
1891
1892 MODULE_LICENSE("GPL");