make oldconfig will rebuild these...
[linux-2.4.21-pre4.git] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13
14 #include <asm/uaccess.h>
15 #include <asm/ioctls.h>
16
17 /*
18  * We use a start+len construction, which provides full use of the 
19  * allocated memory.
20  * -- Florian Coosmann (FGC)
21  * 
22  * Reads with count = 0 should always return 0.
23  * -- Julian Bradfield 1999-06-07.
24  */
25
26 /* Drop the inode semaphore and wait for a pipe event, atomically */
27 void pipe_wait(struct inode * inode)
28 {
29         DECLARE_WAITQUEUE(wait, current);
30         current->state = TASK_INTERRUPTIBLE;
31         add_wait_queue(PIPE_WAIT(*inode), &wait);
32         up(PIPE_SEM(*inode));
33         schedule();
34         remove_wait_queue(PIPE_WAIT(*inode), &wait);
35         current->state = TASK_RUNNING;
36         down(PIPE_SEM(*inode));
37 }
38
39 static ssize_t
40 pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
41 {
42         struct inode *inode = filp->f_dentry->d_inode;
43         ssize_t size, read, ret;
44
45         /* Seeks are not allowed on pipes.  */
46         ret = -ESPIPE;
47         read = 0;
48         if (ppos != &filp->f_pos)
49                 goto out_nolock;
50
51         /* Always return 0 on null read.  */
52         ret = 0;
53         if (count == 0)
54                 goto out_nolock;
55
56         /* Get the pipe semaphore */
57         ret = -ERESTARTSYS;
58         if (down_interruptible(PIPE_SEM(*inode)))
59                 goto out_nolock;
60
61         if (PIPE_EMPTY(*inode)) {
62 do_more_read:
63                 ret = 0;
64                 if (!PIPE_WRITERS(*inode))
65                         goto out;
66
67                 ret = -EAGAIN;
68                 if (filp->f_flags & O_NONBLOCK)
69                         goto out;
70
71                 for (;;) {
72                         PIPE_WAITING_READERS(*inode)++;
73                         pipe_wait(inode);
74                         PIPE_WAITING_READERS(*inode)--;
75                         ret = -ERESTARTSYS;
76                         if (signal_pending(current))
77                                 goto out;
78                         ret = 0;
79                         if (!PIPE_EMPTY(*inode))
80                                 break;
81                         if (!PIPE_WRITERS(*inode))
82                                 goto out;
83                 }
84         }
85
86         /* Read what data is available.  */
87         ret = -EFAULT;
88         while (count > 0 && (size = PIPE_LEN(*inode))) {
89                 char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
90                 ssize_t chars = PIPE_MAX_RCHUNK(*inode);
91
92                 if (chars > count)
93                         chars = count;
94                 if (chars > size)
95                         chars = size;
96
97                 if (copy_to_user(buf, pipebuf, chars))
98                         goto out;
99
100                 read += chars;
101                 PIPE_START(*inode) += chars;
102                 PIPE_START(*inode) &= (PIPE_SIZE - 1);
103                 PIPE_LEN(*inode) -= chars;
104                 count -= chars;
105                 buf += chars;
106         }
107
108         /* Cache behaviour optimization */
109         if (!PIPE_LEN(*inode))
110                 PIPE_START(*inode) = 0;
111
112         if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
113                 /*
114                  * We know that we are going to sleep: signal
115                  * writers synchronously that there is more
116                  * room.
117                  */
118                 wake_up_interruptible_sync(PIPE_WAIT(*inode));
119                 if (!PIPE_EMPTY(*inode))
120                         BUG();
121                 goto do_more_read;
122         }
123         /* Signal writers asynchronously that there is more room.  */
124         wake_up_interruptible(PIPE_WAIT(*inode));
125
126         ret = read;
127 out:
128         up(PIPE_SEM(*inode));
129 out_nolock:
130         if (read)
131                 ret = read;
132
133         UPDATE_ATIME(inode);
134         return ret;
135 }
136
137 static ssize_t
138 pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
139 {
140         struct inode *inode = filp->f_dentry->d_inode;
141         ssize_t free, written, ret;
142
143         /* Seeks are not allowed on pipes.  */
144         ret = -ESPIPE;
145         written = 0;
146         if (ppos != &filp->f_pos)
147                 goto out_nolock;
148
149         /* Null write succeeds.  */
150         ret = 0;
151         if (count == 0)
152                 goto out_nolock;
153
154         ret = -ERESTARTSYS;
155         if (down_interruptible(PIPE_SEM(*inode)))
156                 goto out_nolock;
157
158         /* No readers yields SIGPIPE.  */
159         if (!PIPE_READERS(*inode))
160                 goto sigpipe;
161
162         /* If count <= PIPE_BUF, we have to make it atomic.  */
163         free = (count <= PIPE_BUF ? count : 1);
164
165         /* Wait, or check for, available space.  */
166         if (filp->f_flags & O_NONBLOCK) {
167                 ret = -EAGAIN;
168                 if (PIPE_FREE(*inode) < free)
169                         goto out;
170         } else {
171                 while (PIPE_FREE(*inode) < free) {
172                         PIPE_WAITING_WRITERS(*inode)++;
173                         pipe_wait(inode);
174                         PIPE_WAITING_WRITERS(*inode)--;
175                         ret = -ERESTARTSYS;
176                         if (signal_pending(current))
177                                 goto out;
178
179                         if (!PIPE_READERS(*inode))
180                                 goto sigpipe;
181                 }
182         }
183
184         /* Copy into available space.  */
185         ret = -EFAULT;
186         while (count > 0) {
187                 int space;
188                 char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
189                 ssize_t chars = PIPE_MAX_WCHUNK(*inode);
190
191                 if ((space = PIPE_FREE(*inode)) != 0) {
192                         if (chars > count)
193                                 chars = count;
194                         if (chars > space)
195                                 chars = space;
196
197                         if (copy_from_user(pipebuf, buf, chars))
198                                 goto out;
199
200                         written += chars;
201                         PIPE_LEN(*inode) += chars;
202                         count -= chars;
203                         buf += chars;
204                         space = PIPE_FREE(*inode);
205                         continue;
206                 }
207
208                 ret = written;
209                 if (filp->f_flags & O_NONBLOCK)
210                         break;
211
212                 do {
213                         /*
214                          * Synchronous wake-up: it knows that this process
215                          * is going to give up this CPU, so it doesn't have
216                          * to do idle reschedules.
217                          */
218                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
219                         PIPE_WAITING_WRITERS(*inode)++;
220                         pipe_wait(inode);
221                         PIPE_WAITING_WRITERS(*inode)--;
222                         if (signal_pending(current))
223                                 goto out;
224                         if (!PIPE_READERS(*inode))
225                                 goto sigpipe;
226                 } while (!PIPE_FREE(*inode));
227                 ret = -EFAULT;
228         }
229
230         /* Signal readers asynchronously that there is more data.  */
231         wake_up_interruptible(PIPE_WAIT(*inode));
232
233         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
234         mark_inode_dirty(inode);
235
236 out:
237         up(PIPE_SEM(*inode));
238 out_nolock:
239         if (written)
240                 ret = written;
241         return ret;
242
243 sigpipe:
244         if (written)
245                 goto out;
246         up(PIPE_SEM(*inode));
247         send_sig(SIGPIPE, current, 0);
248         return -EPIPE;
249 }
250
251 static ssize_t
252 bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
253 {
254         return -EBADF;
255 }
256
257 static ssize_t
258 bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
259 {
260         return -EBADF;
261 }
262
263 static int
264 pipe_ioctl(struct inode *pino, struct file *filp,
265            unsigned int cmd, unsigned long arg)
266 {
267         switch (cmd) {
268                 case FIONREAD:
269                         return put_user(PIPE_LEN(*pino), (int *)arg);
270                 default:
271                         return -EINVAL;
272         }
273 }
274
275 /* No kernel lock held - fine */
276 static unsigned int
277 pipe_poll(struct file *filp, poll_table *wait)
278 {
279         unsigned int mask;
280         struct inode *inode = filp->f_dentry->d_inode;
281
282         poll_wait(filp, PIPE_WAIT(*inode), wait);
283
284         /* Reading only -- no need for acquiring the semaphore.  */
285         mask = POLLIN | POLLRDNORM;
286         if (PIPE_EMPTY(*inode))
287                 mask = POLLOUT | POLLWRNORM;
288         if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
289                 mask |= POLLHUP;
290         if (!PIPE_READERS(*inode))
291                 mask |= POLLERR;
292
293         return mask;
294 }
295
296 /* FIXME: most Unices do not set POLLERR for fifos */
297 #define fifo_poll pipe_poll
298
299 static int
300 pipe_release(struct inode *inode, int decr, int decw)
301 {
302         down(PIPE_SEM(*inode));
303         PIPE_READERS(*inode) -= decr;
304         PIPE_WRITERS(*inode) -= decw;
305         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
306                 struct pipe_inode_info *info = inode->i_pipe;
307                 inode->i_pipe = NULL;
308                 free_page((unsigned long) info->base);
309                 kfree(info);
310         } else {
311                 wake_up_interruptible(PIPE_WAIT(*inode));
312         }
313         up(PIPE_SEM(*inode));
314
315         return 0;
316 }
317
318 static int
319 pipe_read_release(struct inode *inode, struct file *filp)
320 {
321         return pipe_release(inode, 1, 0);
322 }
323
324 static int
325 pipe_write_release(struct inode *inode, struct file *filp)
326 {
327         return pipe_release(inode, 0, 1);
328 }
329
330 static int
331 pipe_rdwr_release(struct inode *inode, struct file *filp)
332 {
333         int decr, decw;
334
335         decr = (filp->f_mode & FMODE_READ) != 0;
336         decw = (filp->f_mode & FMODE_WRITE) != 0;
337         return pipe_release(inode, decr, decw);
338 }
339
340 static int
341 pipe_read_open(struct inode *inode, struct file *filp)
342 {
343         /* We could have perhaps used atomic_t, but this and friends
344            below are the only places.  So it doesn't seem worthwhile.  */
345         down(PIPE_SEM(*inode));
346         PIPE_READERS(*inode)++;
347         up(PIPE_SEM(*inode));
348
349         return 0;
350 }
351
352 static int
353 pipe_write_open(struct inode *inode, struct file *filp)
354 {
355         down(PIPE_SEM(*inode));
356         PIPE_WRITERS(*inode)++;
357         up(PIPE_SEM(*inode));
358
359         return 0;
360 }
361
362 static int
363 pipe_rdwr_open(struct inode *inode, struct file *filp)
364 {
365         down(PIPE_SEM(*inode));
366         if (filp->f_mode & FMODE_READ)
367                 PIPE_READERS(*inode)++;
368         if (filp->f_mode & FMODE_WRITE)
369                 PIPE_WRITERS(*inode)++;
370         up(PIPE_SEM(*inode));
371
372         return 0;
373 }
374
375 /*
376  * The file_operations structs are not static because they
377  * are also used in linux/fs/fifo.c to do operations on FIFOs.
378  */
379 struct file_operations read_fifo_fops = {
380         llseek:         no_llseek,
381         read:           pipe_read,
382         write:          bad_pipe_w,
383         poll:           fifo_poll,
384         ioctl:          pipe_ioctl,
385         open:           pipe_read_open,
386         release:        pipe_read_release,
387 };
388
389 struct file_operations write_fifo_fops = {
390         llseek:         no_llseek,
391         read:           bad_pipe_r,
392         write:          pipe_write,
393         poll:           fifo_poll,
394         ioctl:          pipe_ioctl,
395         open:           pipe_write_open,
396         release:        pipe_write_release,
397 };
398
399 struct file_operations rdwr_fifo_fops = {
400         llseek:         no_llseek,
401         read:           pipe_read,
402         write:          pipe_write,
403         poll:           fifo_poll,
404         ioctl:          pipe_ioctl,
405         open:           pipe_rdwr_open,
406         release:        pipe_rdwr_release,
407 };
408
409 struct file_operations read_pipe_fops = {
410         llseek:         no_llseek,
411         read:           pipe_read,
412         write:          bad_pipe_w,
413         poll:           pipe_poll,
414         ioctl:          pipe_ioctl,
415         open:           pipe_read_open,
416         release:        pipe_read_release,
417 };
418
419 struct file_operations write_pipe_fops = {
420         llseek:         no_llseek,
421         read:           bad_pipe_r,
422         write:          pipe_write,
423         poll:           pipe_poll,
424         ioctl:          pipe_ioctl,
425         open:           pipe_write_open,
426         release:        pipe_write_release,
427 };
428
429 struct file_operations rdwr_pipe_fops = {
430         llseek:         no_llseek,
431         read:           pipe_read,
432         write:          pipe_write,
433         poll:           pipe_poll,
434         ioctl:          pipe_ioctl,
435         open:           pipe_rdwr_open,
436         release:        pipe_rdwr_release,
437 };
438
439 struct inode* pipe_new(struct inode* inode)
440 {
441         unsigned long page;
442
443         page = __get_free_page(GFP_USER);
444         if (!page)
445                 return NULL;
446
447         inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
448         if (!inode->i_pipe)
449                 goto fail_page;
450
451         init_waitqueue_head(PIPE_WAIT(*inode));
452         PIPE_BASE(*inode) = (char*) page;
453         PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
454         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
455         PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
456         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
457
458         return inode;
459 fail_page:
460         free_page(page);
461         return NULL;
462 }
463
464 static struct vfsmount *pipe_mnt;
465 static int pipefs_delete_dentry(struct dentry *dentry)
466 {
467         return 1;
468 }
469 static struct dentry_operations pipefs_dentry_operations = {
470         d_delete:       pipefs_delete_dentry,
471 };
472
473 static struct inode * get_pipe_inode(void)
474 {
475         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
476
477         if (!inode)
478                 goto fail_inode;
479
480         if(!pipe_new(inode))
481                 goto fail_iput;
482         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
483         inode->i_fop = &rdwr_pipe_fops;
484
485         /*
486          * Mark the inode dirty from the very beginning,
487          * that way it will never be moved to the dirty
488          * list because "mark_inode_dirty()" will think
489          * that it already _is_ on the dirty list.
490          */
491         inode->i_state = I_DIRTY;
492         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
493         inode->i_uid = current->fsuid;
494         inode->i_gid = current->fsgid;
495         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
496         inode->i_blksize = PAGE_SIZE;
497         return inode;
498
499 fail_iput:
500         iput(inode);
501 fail_inode:
502         return NULL;
503 }
504
505 int do_pipe(int *fd)
506 {
507         struct qstr this;
508         char name[32];
509         struct dentry *dentry;
510         struct inode * inode;
511         struct file *f1, *f2;
512         int error;
513         int i,j;
514
515         error = -ENFILE;
516         f1 = get_empty_filp();
517         if (!f1)
518                 goto no_files;
519
520         f2 = get_empty_filp();
521         if (!f2)
522                 goto close_f1;
523
524         inode = get_pipe_inode();
525         if (!inode)
526                 goto close_f12;
527
528         error = get_unused_fd();
529         if (error < 0)
530                 goto close_f12_inode;
531         i = error;
532
533         error = get_unused_fd();
534         if (error < 0)
535                 goto close_f12_inode_i;
536         j = error;
537
538         error = -ENOMEM;
539         sprintf(name, "[%lu]", inode->i_ino);
540         this.name = name;
541         this.len = strlen(name);
542         this.hash = inode->i_ino; /* will go */
543         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
544         if (!dentry)
545                 goto close_f12_inode_i_j;
546         dentry->d_op = &pipefs_dentry_operations;
547         d_add(dentry, inode);
548         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
549         f1->f_dentry = f2->f_dentry = dget(dentry);
550
551         /* read file */
552         f1->f_pos = f2->f_pos = 0;
553         f1->f_flags = O_RDONLY;
554         f1->f_op = &read_pipe_fops;
555         f1->f_mode = 1;
556         f1->f_version = 0;
557
558         /* write file */
559         f2->f_flags = O_WRONLY;
560         f2->f_op = &write_pipe_fops;
561         f2->f_mode = 2;
562         f2->f_version = 0;
563
564         fd_install(i, f1);
565         fd_install(j, f2);
566         fd[0] = i;
567         fd[1] = j;
568         return 0;
569
570 close_f12_inode_i_j:
571         put_unused_fd(j);
572 close_f12_inode_i:
573         put_unused_fd(i);
574 close_f12_inode:
575         free_page((unsigned long) PIPE_BASE(*inode));
576         kfree(inode->i_pipe);
577         inode->i_pipe = NULL;
578         iput(inode);
579 close_f12:
580         put_filp(f2);
581 close_f1:
582         put_filp(f1);
583 no_files:
584         return error;   
585 }
586
587 /*
588  * pipefs should _never_ be mounted by userland - too much of security hassle,
589  * no real gain from having the whole whorehouse mounted. So we don't need
590  * any operations on the root directory. However, we need a non-trivial
591  * d_name - pipe: will go nicely and kill the special-casing in procfs.
592  */
593 static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
594 {
595         buf->f_type = PIPEFS_MAGIC;
596         buf->f_bsize = 1024;
597         buf->f_namelen = 255;
598         return 0;
599 }
600
601 static struct super_operations pipefs_ops = {
602         statfs:         pipefs_statfs,
603 };
604
605 static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
606 {
607         struct inode *root = new_inode(sb);
608         if (!root)
609                 return NULL;
610         root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
611         root->i_uid = root->i_gid = 0;
612         root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
613         sb->s_blocksize = 1024;
614         sb->s_blocksize_bits = 10;
615         sb->s_magic = PIPEFS_MAGIC;
616         sb->s_op        = &pipefs_ops;
617         sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
618         if (!sb->s_root) {
619                 iput(root);
620                 return NULL;
621         }
622         sb->s_root->d_sb = sb;
623         sb->s_root->d_parent = sb->s_root;
624         d_instantiate(sb->s_root, root);
625         return sb;
626 }
627
628 static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT);
629
630 static int __init init_pipe_fs(void)
631 {
632         int err = register_filesystem(&pipe_fs_type);
633         if (!err) {
634                 pipe_mnt = kern_mount(&pipe_fs_type);
635                 err = PTR_ERR(pipe_mnt);
636                 if (IS_ERR(pipe_mnt))
637                         unregister_filesystem(&pipe_fs_type);
638                 else
639                         err = 0;
640         }
641         return err;
642 }
643
644 static void __exit exit_pipe_fs(void)
645 {
646         unregister_filesystem(&pipe_fs_type);
647         mntput(pipe_mnt);
648 }
649
650 module_init(init_pipe_fs)
651 module_exit(exit_pipe_fs)