original comment: +Wilson03172004,marked due to this pci host does not support MWI
[linux-2.4.git] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13
14 #include <asm/uaccess.h>
15 #include <asm/ioctls.h>
16
17 /*
18  * We use a start+len construction, which provides full use of the 
19  * allocated memory.
20  * -- Florian Coosmann (FGC)
21  * 
22  * Reads with count = 0 should always return 0.
23  * -- Julian Bradfield 1999-06-07.
24  */
25
26 /* Drop the inode semaphore and wait for a pipe event, atomically */
27 void pipe_wait(struct inode * inode)
28 {
29         DECLARE_WAITQUEUE(wait, current);
30         current->state = TASK_INTERRUPTIBLE;
31         add_wait_queue(PIPE_WAIT(*inode), &wait);
32         up(PIPE_SEM(*inode));
33         schedule();
34         remove_wait_queue(PIPE_WAIT(*inode), &wait);
35         current->state = TASK_RUNNING;
36         down(PIPE_SEM(*inode));
37 }
38
39 static ssize_t
40 pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
41 {
42         struct inode *inode = filp->f_dentry->d_inode;
43         ssize_t size, read, ret;
44
45         /* Seeks are not allowed on pipes.  */
46         ret = -ESPIPE;
47         read = 0;
48         if (ppos != &filp->f_pos)
49                 goto out_nolock;
50
51         /* Always return 0 on null read.  */
52         ret = 0;
53         if (count == 0)
54                 goto out_nolock;
55
56         /* Get the pipe semaphore */
57         ret = -ERESTARTSYS;
58         if (down_interruptible(PIPE_SEM(*inode)))
59                 goto out_nolock;
60
61         if (PIPE_EMPTY(*inode)) {
62 do_more_read:
63                 ret = 0;
64                 if (!PIPE_WRITERS(*inode))
65                         goto out;
66
67                 ret = -EAGAIN;
68                 if (filp->f_flags & O_NONBLOCK)
69                         goto out;
70
71                 for (;;) {
72                         PIPE_WAITING_READERS(*inode)++;
73                         pipe_wait(inode);
74                         PIPE_WAITING_READERS(*inode)--;
75                         ret = -ERESTARTSYS;
76                         if (signal_pending(current))
77                                 goto out;
78                         ret = 0;
79                         if (!PIPE_EMPTY(*inode))
80                                 break;
81                         if (!PIPE_WRITERS(*inode))
82                                 goto out;
83                 }
84         }
85
86         /* Read what data is available.  */
87         ret = -EFAULT;
88         while (count > 0 && (size = PIPE_LEN(*inode))) {
89                 char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
90                 ssize_t chars = PIPE_MAX_RCHUNK(*inode);
91
92                 if (chars > count)
93                         chars = count;
94                 if (chars > size)
95                         chars = size;
96
97                 if (copy_to_user(buf, pipebuf, chars))
98                         goto out;
99
100                 read += chars;
101                 PIPE_START(*inode) += chars;
102                 PIPE_START(*inode) &= (PIPE_SIZE - 1);
103                 PIPE_LEN(*inode) -= chars;
104                 count -= chars;
105                 buf += chars;
106         }
107
108         /* Cache behaviour optimization */
109         if (!PIPE_LEN(*inode))
110                 PIPE_START(*inode) = 0;
111
112         if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
113                 /*
114                  * We know that we are going to sleep: signal
115                  * writers synchronously that there is more
116                  * room.
117                  */
118                 wake_up_interruptible_sync(PIPE_WAIT(*inode));
119                 if (!PIPE_EMPTY(*inode))
120                         BUG();
121                 goto do_more_read;
122         }
123         /* Signal writers asynchronously that there is more room.  */
124         wake_up_interruptible(PIPE_WAIT(*inode));
125
126         ret = read;
127 out:
128         up(PIPE_SEM(*inode));
129 out_nolock:
130         if (read)
131                 ret = read;
132
133         UPDATE_ATIME(inode);
134         return ret;
135 }
136
137 static ssize_t
138 pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
139 {
140         struct inode *inode = filp->f_dentry->d_inode;
141         ssize_t free, written, ret;
142
143         /* Seeks are not allowed on pipes.  */
144         ret = -ESPIPE;
145         written = 0;
146         if (ppos != &filp->f_pos)
147                 goto out_nolock;
148
149         /* Null write succeeds.  */
150         ret = 0;
151         if (count == 0)
152                 goto out_nolock;
153
154         ret = -ERESTARTSYS;
155         if (down_interruptible(PIPE_SEM(*inode)))
156                 goto out_nolock;
157
158         /* No readers yields SIGPIPE.  */
159         if (!PIPE_READERS(*inode))
160                 goto sigpipe;
161
162         /* If count <= PIPE_BUF, we have to make it atomic.  */
163         free = (count <= PIPE_BUF ? count : 1);
164
165         /* Wait, or check for, available space.  */
166         if (filp->f_flags & O_NONBLOCK) {
167                 ret = -EAGAIN;
168                 if (PIPE_FREE(*inode) < free)
169                         goto out;
170         } else {
171                 while (PIPE_FREE(*inode) < free) {
172                         PIPE_WAITING_WRITERS(*inode)++;
173                         pipe_wait(inode);
174                         PIPE_WAITING_WRITERS(*inode)--;
175                         ret = -ERESTARTSYS;
176                         if (signal_pending(current))
177                                 goto out;
178
179                         if (!PIPE_READERS(*inode))
180                                 goto sigpipe;
181                 }
182         }
183
184         /* Copy into available space.  */
185         ret = -EFAULT;
186         while (count > 0) {
187                 int space;
188                 char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
189                 ssize_t chars = PIPE_MAX_WCHUNK(*inode);
190
191                 if ((space = PIPE_FREE(*inode)) != 0) {
192                         if (chars > count)
193                                 chars = count;
194                         if (chars > space)
195                                 chars = space;
196
197                         if (copy_from_user(pipebuf, buf, chars))
198                                 goto out;
199
200                         written += chars;
201                         PIPE_LEN(*inode) += chars;
202                         count -= chars;
203                         buf += chars;
204                         space = PIPE_FREE(*inode);
205                         continue;
206                 }
207
208                 ret = written;
209                 if (filp->f_flags & O_NONBLOCK)
210                         break;
211
212                 do {
213                         /*
214                          * Synchronous wake-up: it knows that this process
215                          * is going to give up this CPU, so it doesn't have
216                          * to do idle reschedules.
217                          */
218                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
219                         PIPE_WAITING_WRITERS(*inode)++;
220                         pipe_wait(inode);
221                         PIPE_WAITING_WRITERS(*inode)--;
222                         if (signal_pending(current))
223                                 goto out;
224                         if (!PIPE_READERS(*inode))
225                                 goto sigpipe;
226                 } while (!PIPE_FREE(*inode));
227                 ret = -EFAULT;
228         }
229
230         /* Signal readers asynchronously that there is more data.  */
231         wake_up_interruptible(PIPE_WAIT(*inode));
232
233         update_mctime(inode);
234
235 out:
236         up(PIPE_SEM(*inode));
237 out_nolock:
238         if (written)
239                 ret = written;
240         return ret;
241
242 sigpipe:
243         if (written)
244                 goto out;
245         up(PIPE_SEM(*inode));
246         send_sig(SIGPIPE, current, 0);
247         return -EPIPE;
248 }
249
250 static ssize_t
251 bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
252 {
253         return -EBADF;
254 }
255
256 static ssize_t
257 bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
258 {
259         return -EBADF;
260 }
261
262 static int
263 pipe_ioctl(struct inode *pino, struct file *filp,
264            unsigned int cmd, unsigned long arg)
265 {
266         switch (cmd) {
267                 case FIONREAD:
268                         return put_user(PIPE_LEN(*pino), (int *)arg);
269                 default:
270                         return -EINVAL;
271         }
272 }
273
274 /* No kernel lock held - fine */
275 static unsigned int
276 pipe_poll(struct file *filp, poll_table *wait)
277 {
278         unsigned int mask;
279         struct inode *inode = filp->f_dentry->d_inode;
280
281         poll_wait(filp, PIPE_WAIT(*inode), wait);
282
283         /* Reading only -- no need for acquiring the semaphore.  */
284         mask = POLLIN | POLLRDNORM;
285         if (PIPE_EMPTY(*inode))
286                 mask = POLLOUT | POLLWRNORM;
287         if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
288                 mask |= POLLHUP;
289         if (!PIPE_READERS(*inode))
290                 mask |= POLLERR;
291
292         return mask;
293 }
294
295 /* FIXME: most Unices do not set POLLERR for fifos */
296 #define fifo_poll pipe_poll
297
298 static int
299 pipe_release(struct inode *inode, int decr, int decw)
300 {
301         down(PIPE_SEM(*inode));
302         PIPE_READERS(*inode) -= decr;
303         PIPE_WRITERS(*inode) -= decw;
304         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
305                 struct pipe_inode_info *info = inode->i_pipe;
306                 inode->i_pipe = NULL;
307                 free_page((unsigned long) info->base);
308                 kfree(info);
309         } else {
310                 wake_up_interruptible(PIPE_WAIT(*inode));
311         }
312         up(PIPE_SEM(*inode));
313
314         return 0;
315 }
316
317 static int
318 pipe_read_release(struct inode *inode, struct file *filp)
319 {
320         return pipe_release(inode, 1, 0);
321 }
322
323 static int
324 pipe_write_release(struct inode *inode, struct file *filp)
325 {
326         return pipe_release(inode, 0, 1);
327 }
328
329 static int
330 pipe_rdwr_release(struct inode *inode, struct file *filp)
331 {
332         int decr, decw;
333
334         decr = (filp->f_mode & FMODE_READ) != 0;
335         decw = (filp->f_mode & FMODE_WRITE) != 0;
336         return pipe_release(inode, decr, decw);
337 }
338
339 static int
340 pipe_read_open(struct inode *inode, struct file *filp)
341 {
342         /* We could have perhaps used atomic_t, but this and friends
343            below are the only places.  So it doesn't seem worthwhile.  */
344         down(PIPE_SEM(*inode));
345         PIPE_READERS(*inode)++;
346         up(PIPE_SEM(*inode));
347
348         return 0;
349 }
350
351 static int
352 pipe_write_open(struct inode *inode, struct file *filp)
353 {
354         down(PIPE_SEM(*inode));
355         PIPE_WRITERS(*inode)++;
356         up(PIPE_SEM(*inode));
357
358         return 0;
359 }
360
361 static int
362 pipe_rdwr_open(struct inode *inode, struct file *filp)
363 {
364         down(PIPE_SEM(*inode));
365         if (filp->f_mode & FMODE_READ)
366                 PIPE_READERS(*inode)++;
367         if (filp->f_mode & FMODE_WRITE)
368                 PIPE_WRITERS(*inode)++;
369         up(PIPE_SEM(*inode));
370
371         return 0;
372 }
373
374 /*
375  * The file_operations structs are not static because they
376  * are also used in linux/fs/fifo.c to do operations on FIFOs.
377  */
378 struct file_operations read_fifo_fops = {
379         llseek:         no_llseek,
380         read:           pipe_read,
381         write:          bad_pipe_w,
382         poll:           fifo_poll,
383         ioctl:          pipe_ioctl,
384         open:           pipe_read_open,
385         release:        pipe_read_release,
386 };
387
388 struct file_operations write_fifo_fops = {
389         llseek:         no_llseek,
390         read:           bad_pipe_r,
391         write:          pipe_write,
392         poll:           fifo_poll,
393         ioctl:          pipe_ioctl,
394         open:           pipe_write_open,
395         release:        pipe_write_release,
396 };
397
398 struct file_operations rdwr_fifo_fops = {
399         llseek:         no_llseek,
400         read:           pipe_read,
401         write:          pipe_write,
402         poll:           fifo_poll,
403         ioctl:          pipe_ioctl,
404         open:           pipe_rdwr_open,
405         release:        pipe_rdwr_release,
406 };
407
408 struct file_operations read_pipe_fops = {
409         llseek:         no_llseek,
410         read:           pipe_read,
411         write:          bad_pipe_w,
412         poll:           pipe_poll,
413         ioctl:          pipe_ioctl,
414         open:           pipe_read_open,
415         release:        pipe_read_release,
416 };
417
418 struct file_operations write_pipe_fops = {
419         llseek:         no_llseek,
420         read:           bad_pipe_r,
421         write:          pipe_write,
422         poll:           pipe_poll,
423         ioctl:          pipe_ioctl,
424         open:           pipe_write_open,
425         release:        pipe_write_release,
426 };
427
428 struct file_operations rdwr_pipe_fops = {
429         llseek:         no_llseek,
430         read:           pipe_read,
431         write:          pipe_write,
432         poll:           pipe_poll,
433         ioctl:          pipe_ioctl,
434         open:           pipe_rdwr_open,
435         release:        pipe_rdwr_release,
436 };
437
438 struct inode* pipe_new(struct inode* inode)
439 {
440         unsigned long page;
441
442         page = __get_free_page(GFP_USER);
443         if (!page)
444                 return NULL;
445
446         inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
447         if (!inode->i_pipe)
448                 goto fail_page;
449
450         init_waitqueue_head(PIPE_WAIT(*inode));
451         PIPE_BASE(*inode) = (char*) page;
452         PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
453         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
454         PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
455         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
456
457         return inode;
458 fail_page:
459         free_page(page);
460         return NULL;
461 }
462
463 static struct vfsmount *pipe_mnt;
464 static int pipefs_delete_dentry(struct dentry *dentry)
465 {
466         return 1;
467 }
468 static struct dentry_operations pipefs_dentry_operations = {
469         d_delete:       pipefs_delete_dentry,
470 };
471
472 static struct inode * get_pipe_inode(void)
473 {
474         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
475
476         if (!inode)
477                 goto fail_inode;
478
479         if(!pipe_new(inode))
480                 goto fail_iput;
481         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
482         inode->i_fop = &rdwr_pipe_fops;
483
484         /*
485          * Mark the inode dirty from the very beginning,
486          * that way it will never be moved to the dirty
487          * list because "mark_inode_dirty()" will think
488          * that it already _is_ on the dirty list.
489          */
490         inode->i_state = I_DIRTY;
491         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
492         inode->i_uid = current->fsuid;
493         inode->i_gid = current->fsgid;
494         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
495         inode->i_blksize = PAGE_SIZE;
496         return inode;
497
498 fail_iput:
499         iput(inode);
500 fail_inode:
501         return NULL;
502 }
503
504 int do_pipe(int *fd)
505 {
506         struct qstr this;
507         char name[32];
508         struct dentry *dentry;
509         struct inode * inode;
510         struct file *f1, *f2;
511         int error;
512         int i,j;
513
514         error = -ENFILE;
515         f1 = get_empty_filp();
516         if (!f1)
517                 goto no_files;
518
519         f2 = get_empty_filp();
520         if (!f2)
521                 goto close_f1;
522
523         inode = get_pipe_inode();
524         if (!inode)
525                 goto close_f12;
526
527         error = get_unused_fd();
528         if (error < 0)
529                 goto close_f12_inode;
530         i = error;
531
532         error = get_unused_fd();
533         if (error < 0)
534                 goto close_f12_inode_i;
535         j = error;
536
537         error = -ENOMEM;
538         sprintf(name, "[%lu]", inode->i_ino);
539         this.name = name;
540         this.len = strlen(name);
541         this.hash = inode->i_ino; /* will go */
542         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
543         if (!dentry)
544                 goto close_f12_inode_i_j;
545         dentry->d_op = &pipefs_dentry_operations;
546         d_add(dentry, inode);
547         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
548         f1->f_dentry = f2->f_dentry = dget(dentry);
549
550         /* read file */
551         f1->f_pos = f2->f_pos = 0;
552         f1->f_flags = O_RDONLY;
553         f1->f_op = &read_pipe_fops;
554         f1->f_mode = 1;
555         f1->f_version = 0;
556
557         /* write file */
558         f2->f_flags = O_WRONLY;
559         f2->f_op = &write_pipe_fops;
560         f2->f_mode = 2;
561         f2->f_version = 0;
562
563         fd_install(i, f1);
564         fd_install(j, f2);
565         fd[0] = i;
566         fd[1] = j;
567         return 0;
568
569 close_f12_inode_i_j:
570         put_unused_fd(j);
571 close_f12_inode_i:
572         put_unused_fd(i);
573 close_f12_inode:
574         free_page((unsigned long) PIPE_BASE(*inode));
575         kfree(inode->i_pipe);
576         inode->i_pipe = NULL;
577         iput(inode);
578 close_f12:
579         put_filp(f2);
580 close_f1:
581         put_filp(f1);
582 no_files:
583         return error;   
584 }
585
586 /*
587  * pipefs should _never_ be mounted by userland - too much of security hassle,
588  * no real gain from having the whole whorehouse mounted. So we don't need
589  * any operations on the root directory. However, we need a non-trivial
590  * d_name - pipe: will go nicely and kill the special-casing in procfs.
591  */
592 static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
593 {
594         buf->f_type = PIPEFS_MAGIC;
595         buf->f_bsize = 1024;
596         buf->f_namelen = 255;
597         return 0;
598 }
599
600 static struct super_operations pipefs_ops = {
601         statfs:         pipefs_statfs,
602 };
603
604 static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
605 {
606         struct inode *root = new_inode(sb);
607         if (!root)
608                 return NULL;
609         root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
610         root->i_uid = root->i_gid = 0;
611         root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
612         sb->s_blocksize = 1024;
613         sb->s_blocksize_bits = 10;
614         sb->s_magic = PIPEFS_MAGIC;
615         sb->s_op        = &pipefs_ops;
616         sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
617         if (!sb->s_root) {
618                 iput(root);
619                 return NULL;
620         }
621         sb->s_root->d_sb = sb;
622         sb->s_root->d_parent = sb->s_root;
623         d_instantiate(sb->s_root, root);
624         return sb;
625 }
626
627 static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT);
628
629 static int __init init_pipe_fs(void)
630 {
631         int err = register_filesystem(&pipe_fs_type);
632         if (!err) {
633                 pipe_mnt = kern_mount(&pipe_fs_type);
634                 err = PTR_ERR(pipe_mnt);
635                 if (IS_ERR(pipe_mnt))
636                         unregister_filesystem(&pipe_fs_type);
637                 else
638                         err = 0;
639         }
640         return err;
641 }
642
643 static void __exit exit_pipe_fs(void)
644 {
645         unregister_filesystem(&pipe_fs_type);
646         mntput(pipe_mnt);
647 }
648
649 module_init(init_pipe_fs)
650 module_exit(exit_pipe_fs)