fs/fcntl.c

   1 /*
   2  *  linux/fs/fcntl.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/init.h>
   8 #include <linux/mm.h>
   9 #include <linux/file.h>
  10 #include <linux/dnotify.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/slab.h>
  13 #include <linux/iobuf.h>
  14
  15 #include <asm/poll.h>
  16 #include <asm/siginfo.h>
  17 #include <asm/uaccess.h>
  18
  19 extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
  20 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
  21 extern int fcntl_getlease(struct file *filp);
  22
  23 /* Expand files.  Return <0 on error; 0 nothing done; 1 files expanded,
  24  * we may have blocked.
  25  *
  26  * Should be called with the files->file_lock spinlock held for write.
  27  */
  28 static int expand_files(struct files_struct *files, int nr)
  29 {
  30         int err, expand = 0;
  31 #ifdef FDSET_DEBUG
  32         printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr);
  33 #endif
  34
  35         if (nr >= files->max_fdset) {
  36                 expand = 1;
  37                 if ((err = expand_fdset(files, nr)))
  38                         goto out;
  39         }
  40         if (nr >= files->max_fds) {
  41                 expand = 1;
  42                 if ((err = expand_fd_array(files, nr)))
  43                         goto out;
  44         }
  45         err = expand;
  46  out:
  47 #ifdef FDSET_DEBUG
  48         if (err)
  49                 printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err);
  50 #endif
  51         return err;
  52 }
  53
  54 /*
  55  * locate_fd finds a free file descriptor in the open_fds fdset,
  56  * expanding the fd arrays if necessary.  The files write lock will be
  57  * held on exit to ensure that the fd can be entered atomically.
  58  */
  59
  60 static int locate_fd(struct files_struct *files,
  61                             struct file *file, int orig_start)
  62 {
  63         unsigned int newfd;
  64         int error;
  65         int start;
  66
  67         write_lock(&files->file_lock);
  68
  69         error = -EINVAL;
  70         if (orig_start >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  71                 goto out;
  72
  73 repeat:
  74         /*
  75          * Someone might have closed fd's in the range
  76          * orig_start..files->next_fd
  77          */
  78         start = orig_start;
  79         if (start < files->next_fd)
  80                 start = files->next_fd;
  81
  82         newfd = start;
  83         if (start < files->max_fdset) {
  84                 newfd = find_next_zero_bit(files->open_fds->fds_bits,
  85                         files->max_fdset, start);
  86         }
  87
  88         error = -EMFILE;
  89         if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  90                 goto out;
  91
  92         error = expand_files(files, newfd);
  93         if (error < 0)
  94                 goto out;
  95
  96         /*
  97          * If we needed to expand the fs array we
  98          * might have blocked - try again.
  99          */
 100         if (error)
 101                 goto repeat;
 102
 103         if (start <= files->next_fd)
 104                 files->next_fd = newfd + 1;
 105
 106         error = newfd;
 107
 108 out:
 109         return error;
 110 }
 111
 112 static inline void allocate_fd(struct files_struct *files,
 113                                         struct file *file, int fd)
 114 {
 115         FD_SET(fd, files->open_fds);
 116         FD_CLR(fd, files->close_on_exec);
 117         write_unlock(&files->file_lock);
 118         fd_install(fd, file);
 119 }
 120
 121 static int dupfd(struct file *file, int start)
 122 {
 123         struct files_struct * files = current->files;
 124         int ret;
 125
 126         ret = locate_fd(files, file, start);
 127         if (ret < 0)
 128                 goto out_putf;
 129         allocate_fd(files, file, ret);
 130         return ret;
 131
 132 out_putf:
 133         write_unlock(&files->file_lock);
 134         fput(file);
 135         return ret;
 136 }
 137
 138 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 139 {
 140         int err = -EBADF;
 141         struct file * file, *tofree;
 142         struct files_struct * files = current->files;
 143
 144         write_lock(&files->file_lock);
 145         if (!(file = fcheck(oldfd)))
 146                 goto out_unlock;
 147         err = newfd;
 148         if (newfd == oldfd)
 149                 goto out_unlock;
 150         err = -EBADF;
 151         if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
 152                 goto out_unlock;
 153         get_file(file);                 /* We are now finished with oldfd */
 154
 155         err = expand_files(files, newfd);
 156         if (err < 0)
 157                 goto out_fput;
 158
 159         /* To avoid races with open() and dup(), we will mark the fd as
 160          * in-use in the open-file bitmap throughout the entire dup2()
 161          * process.  This is quite safe: do_close() uses the fd array
 162          * entry, not the bitmap, to decide what work needs to be
 163          * done.  --sct */
 164         /* Doesn't work. open() might be there first. --AV */
 165
 166         /* Yes. It's a race. In user space. Nothing sane to do */
 167         err = -EBUSY;
 168         tofree = files->fd[newfd];
 169         if (!tofree && FD_ISSET(newfd, files->open_fds))
 170                 goto out_fput;
 171
 172         files->fd[newfd] = file;
 173         FD_SET(newfd, files->open_fds);
 174         FD_CLR(newfd, files->close_on_exec);
 175         write_unlock(&files->file_lock);
 176
 177         if (tofree)
 178                 filp_close(tofree, files);
 179         err = newfd;
 180 out:
 181         return err;
 182 out_unlock:
 183         write_unlock(&files->file_lock);
 184         goto out;
 185
 186 out_fput:
 187         write_unlock(&files->file_lock);
 188         fput(file);
 189         goto out;
 190 }
 191
 192 asmlinkage long sys_dup(unsigned int fildes)
 193 {
 194         int ret = -EBADF;
 195         struct file * file = fget(fildes);
 196
 197         if (file)
 198                 ret = dupfd(file, 0);
 199         return ret;
 200 }
 201
 202 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
 203
 204 static int setfl(int fd, struct file * filp, unsigned long arg)
 205 {
 206         struct inode * inode = filp->f_dentry->d_inode;
 207         int error;
 208
 209         /*
 210          * In the case of an append-only file, O_APPEND
 211          * cannot be cleared
 212          */
 213         if (!(arg & O_APPEND) && IS_APPEND(inode))
 214                 return -EPERM;
 215
 216         /* Did FASYNC state change? */
 217         if ((arg ^ filp->f_flags) & FASYNC) {
 218                 if (filp->f_op && filp->f_op->fasync) {
 219                         error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 220                         if (error < 0)
 221                                 return error;
 222                 }
 223         }
 224
 225         if (arg & O_DIRECT) {
 226                 /*
 227                  * alloc_kiovec() can sleep and we are only serialized by
 228                  * the big kernel lock here, so abuse the i_sem to serialize
 229                  * this case too. We of course wouldn't need to go deep down
 230                  * to the inode layer, we could stay at the file layer, but
 231                  * we don't want to pay for the memory of a semaphore in each
 232                  * file structure too and we use the inode semaphore that we just
 233                  * pay for anyways.
 234                  */
 235                 error = 0;
 236                 down(&inode->i_sem);
 237                 if (!filp->f_iobuf)
 238                         error = alloc_kiovec(1, &filp->f_iobuf);
 239                 up(&inode->i_sem);
 240                 if (error < 0)
 241                         return error;
 242         }
 243
 244         /* required for strict SunOS emulation */
 245         if (O_NONBLOCK != O_NDELAY)
 246                if (arg & O_NDELAY)
 247                    arg |= O_NONBLOCK;
 248
 249         filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 250         return 0;
 251 }
 252
 253 static long do_fcntl(unsigned int fd, unsigned int cmd,
 254                      unsigned long arg, struct file * filp)
 255 {
 256         long err = -EINVAL;
 257
 258         switch (cmd) {
 259                 case F_DUPFD:
 260                         if (arg < NR_OPEN) {
 261                                 get_file(filp);
 262                                 err = dupfd(filp, arg);
 263                         }
 264                         break;
 265                 case F_GETFD:
 266                         err = get_close_on_exec(fd);
 267                         break;
 268                 case F_SETFD:
 269                         err = 0;
 270                         set_close_on_exec(fd, arg&1);
 271                         break;
 272                 case F_GETFL:
 273                         err = filp->f_flags;
 274                         break;
 275                 case F_SETFL:
 276                         lock_kernel();
 277                         err = setfl(fd, filp, arg);
 278                         unlock_kernel();
 279                         break;
 280                 case F_GETLK:
 281                         err = fcntl_getlk(fd, (struct flock *) arg);
 282                         break;
 283                 case F_SETLK:
 284                 case F_SETLKW:
 285                         err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 286                         break;
 287                 case F_GETOWN:
 288                         /*
 289                          * XXX If f_owner is a process group, the
 290                          * negative return value will get converted
 291                          * into an error.  Oops.  If we keep the
 292                          * current syscall conventions, the only way
 293                          * to fix this will be in libc.
 294                          */
 295                         err = filp->f_owner.pid;
 296                         break;
 297                 case F_SETOWN:
 298                         lock_kernel();
 299                         filp->f_owner.pid = arg;
 300                         filp->f_owner.uid = current->uid;
 301                         filp->f_owner.euid = current->euid;
 302                         err = 0;
 303                         if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 304                                 err = sock_fcntl (filp, F_SETOWN, arg);
 305                         unlock_kernel();
 306                         break;
 307                 case F_GETSIG:
 308                         err = filp->f_owner.signum;
 309                         break;
 310                 case F_SETSIG:
 311                         /* arg == 0 restores default behaviour. */
 312                         if (arg < 0 || arg > _NSIG) {
 313                                 break;
 314                         }
 315                         err = 0;
 316                         filp->f_owner.signum = arg;
 317                         break;
 318                 case F_GETLEASE:
 319                         err = fcntl_getlease(filp);
 320                         break;
 321                 case F_SETLEASE:
 322                         err = fcntl_setlease(fd, filp, arg);
 323                         break;
 324                 case F_NOTIFY:
 325                         err = fcntl_dirnotify(fd, filp, arg);
 326                         break;
 327                 default:
 328                         /* sockets need a few special fcntls. */
 329                         err = -EINVAL;
 330                         if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 331                                 err = sock_fcntl (filp, cmd, arg);
 332                         break;
 333         }
 334
 335         return err;
 336 }
 337
 338 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 339 {
 340         struct file * filp;
 341         long err = -EBADF;
 342
 343         filp = fget(fd);
 344         if (!filp)
 345                 goto out;
 346
 347         err = do_fcntl(fd, cmd, arg, filp);
 348
 349         fput(filp);
 350 out:
 351         return err;
 352 }
 353
 354 #if BITS_PER_LONG == 32
 355 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
 356 {
 357         struct file * filp;
 358         long err;
 359
 360         err = -EBADF;
 361         filp = fget(fd);
 362         if (!filp)
 363                 goto out;
 364
 365         switch (cmd) {
 366                 case F_GETLK64:
 367                         err = fcntl_getlk64(fd, (struct flock64 *) arg);
 368                         break;
 369                 case F_SETLK64:
 370                         err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
 371                         break;
 372                 case F_SETLKW64:
 373                         err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
 374                         break;
 375                 default:
 376                         err = do_fcntl(fd, cmd, arg, filp);
 377                         break;
 378         }
 379         fput(filp);
 380 out:
 381         return err;
 382 }
 383 #endif
 384
 385 /* Table to convert sigio signal codes into poll band bitmaps */
 386
 387 static long band_table[NSIGPOLL] = {
 388         POLLIN | POLLRDNORM,                    /* POLL_IN */
 389         POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 390         POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 391         POLLERR,                                /* POLL_ERR */
 392         POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 393         POLLHUP | POLLERR                       /* POLL_HUP */
 394 };
 395
 396 static void send_sigio_to_task(struct task_struct *p,
 397                                struct fown_struct *fown,
 398                                int fd,
 399                                int reason)
 400 {
 401         if ((fown->euid != 0) &&
 402             (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
 403             (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
 404                 return;
 405         switch (fown->signum) {
 406                 siginfo_t si;
 407                 default:
 408                         /* Queue a rt signal with the appropriate fd as its
 409                            value.  We use SI_SIGIO as the source, not
 410                            SI_KERNEL, since kernel signals always get
 411                            delivered even if we can't queue.  Failure to
 412                            queue in this case _should_ be reported; we fall
 413                            back to SIGIO in that case. --sct */
 414                         si.si_signo = fown->signum;
 415                         si.si_errno = 0;
 416                         si.si_code  = reason;
 417                         /* Make sure we are called with one of the POLL_*
 418                            reasons, otherwise we could leak kernel stack into
 419                            userspace.  */
 420                         if ((reason & __SI_MASK) != __SI_POLL)
 421                                 BUG();
 422                         if (reason - POLL_IN >= NSIGPOLL)
 423                                 si.si_band  = ~0L;
 424                         else
 425                                 si.si_band = band_table[reason - POLL_IN];
 426                         si.si_fd    = fd;
 427                         if (!send_sig_info(fown->signum, &si, p))
 428                                 break;
 429                 /* fall-through: fall back on the old plain SIGIO signal */
 430                 case 0:
 431                         send_sig(SIGIO, p, 1);
 432         }
 433 }
 434
 435 void send_sigio(struct fown_struct *fown, int fd, int band)
 436 {
 437         struct task_struct * p;
 438         int   pid       = fown->pid;
 439
 440         read_lock(&tasklist_lock);
 441         if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
 442                 send_sigio_to_task(p, fown, fd, band);
 443                 goto out;
 444         }
 445         for_each_task(p) {
 446                 int match = p->pid;
 447                 if (pid < 0)
 448                         match = -p->pgrp;
 449                 if (pid != match)
 450                         continue;
 451                 send_sigio_to_task(p, fown, fd, band);
 452         }
 453 out:
 454         read_unlock(&tasklist_lock);
 455 }
 456
 457 static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
 458 static kmem_cache_t *fasync_cache;
 459
 460 /*
 461  * fasync_helper() is used by some character device drivers (mainly mice)
 462  * to set up the fasync queue. It returns negative on error, 0 if it did
 463  * no changes and positive if it added/deleted the entry.
 464  */
 465 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 466 {
 467         struct fasync_struct *fa, **fp;
 468         struct fasync_struct *new = NULL;
 469         int result = 0;
 470
 471         if (on) {
 472                 new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
 473                 if (!new)
 474                         return -ENOMEM;
 475         }
 476         write_lock_irq(&fasync_lock);
 477         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 478                 if (fa->fa_file == filp) {
 479                         if(on) {
 480                                 fa->fa_fd = fd;
 481                                 kmem_cache_free(fasync_cache, new);
 482                         } else {
 483                                 *fp = fa->fa_next;
 484                                 kmem_cache_free(fasync_cache, fa);
 485                                 result = 1;
 486                         }
 487                         goto out;
 488                 }
 489         }
 490
 491         if (on) {
 492                 new->magic = FASYNC_MAGIC;
 493                 new->fa_file = filp;
 494                 new->fa_fd = fd;
 495                 new->fa_next = *fapp;
 496                 *fapp = new;
 497                 result = 1;
 498         }
 499 out:
 500         write_unlock_irq(&fasync_lock);
 501         return result;
 502 }
 503
 504 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 505 {
 506         while (fa) {
 507                 struct fown_struct * fown;
 508                 if (fa->magic != FASYNC_MAGIC) {
 509                         printk(KERN_ERR "kill_fasync: bad magic number in "
 510                                "fasync_struct!\n");
 511                         return;
 512                 }
 513                 fown = &fa->fa_file->f_owner;
 514                 /* Don't send SIGURG to processes which have not set a
 515                    queued signum: SIGURG has its own default signalling
 516                    mechanism. */
 517                 if (fown->pid && !(sig == SIGURG && fown->signum == 0))
 518                         send_sigio(fown, fa->fa_fd, band);
 519                 fa = fa->fa_next;
 520         }
 521 }
 522
 523 void kill_fasync(struct fasync_struct **fp, int sig, int band)
 524 {
 525         read_lock(&fasync_lock);
 526         __kill_fasync(*fp, sig, band);
 527         read_unlock(&fasync_lock);
 528 }
 529
 530 static int __init fasync_init(void)
 531 {
 532         fasync_cache = kmem_cache_create("fasync_cache",
 533                 sizeof(struct fasync_struct), 0, 0, NULL, NULL);
 534         if (!fasync_cache)
 535                 panic("cannot create fasync slab cache");
 536         return 0;
 537 }
 538
 539 module_init(fasync_init)