fs/proc/base.c

   1 /*
   2  *  linux/fs/proc/base.c
   3  *
   4  *  Copyright (C) 1991, 1992 Linus Torvalds
   5  *
   6  *  proc base directory handling functions
   7  *
   8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
   9  *  Instead of using magical inumbers to determine the kind of object
  10  *  we allocate and fill in-core inodes upon lookup. They don't even
  11  *  go into icache. We cache the reference to task_struct upon lookup too.
  12  *  Eventually it should become a filesystem in its own. We don't use the
  13  *  rest of procfs anymore.
  14  */
  15
  16 #include <asm/uaccess.h>
  17
  18 #include <linux/config.h>
  19 #include <linux/errno.h>
  20 #include <linux/sched.h>
  21 #include <linux/proc_fs.h>
  22 #include <linux/stat.h>
  23 #include <linux/init.h>
  24 #include <linux/file.h>
  25 #include <linux/string.h>
  26 #include <linux/seq_file.h>
  27 #include <linux/namespace.h>
  28
  29 /*
  30  * For hysterical raisins we keep the same inumbers as in the old procfs.
  31  * Feel free to change the macro below - just keep the range distinct from
  32  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
  33  * As soon as we'll get a separate superblock we will be able to forget
  34  * about magical ranges too.
  35  */
  36
  37 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
  38
  39 int proc_pid_stat(struct task_struct*,char*);
  40 int proc_pid_status(struct task_struct*,char*);
  41 int proc_pid_statm(struct task_struct*,char*);
  42 int proc_pid_cpu(struct task_struct*,char*);
  43
  44 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
  45 {
  46         if (inode->u.proc_i.file) {
  47                 *mnt = mntget(inode->u.proc_i.file->f_vfsmnt);
  48                 *dentry = dget(inode->u.proc_i.file->f_dentry);
  49                 return 0;
  50         }
  51         return -ENOENT;
  52 }
  53
  54 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
  55 {
  56         struct mm_struct * mm;
  57         struct vm_area_struct * vma;
  58         int result = -ENOENT;
  59         struct task_struct *task = inode->u.proc_i.task;
  60
  61         task_lock(task);
  62         mm = task->mm;
  63         if (mm)
  64                 atomic_inc(&mm->mm_users);
  65         task_unlock(task);
  66         if (!mm)
  67                 goto out;
  68         down_read(&mm->mmap_sem);
  69         vma = mm->mmap;
  70         while (vma) {
  71                 if ((vma->vm_flags & VM_EXECUTABLE) &&
  72                     vma->vm_file) {
  73                         *mnt = mntget(vma->vm_file->f_vfsmnt);
  74                         *dentry = dget(vma->vm_file->f_dentry);
  75                         result = 0;
  76                         break;
  77                 }
  78                 vma = vma->vm_next;
  79         }
  80         up_read(&mm->mmap_sem);
  81         mmput(mm);
  82 out:
  83         return result;
  84 }
  85
  86 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
  87 {
  88         struct fs_struct *fs;
  89         int result = -ENOENT;
  90         task_lock(inode->u.proc_i.task);
  91         fs = inode->u.proc_i.task->fs;
  92         if(fs)
  93                 atomic_inc(&fs->count);
  94         task_unlock(inode->u.proc_i.task);
  95         if (fs) {
  96                 read_lock(&fs->lock);
  97                 *mnt = mntget(fs->pwdmnt);
  98                 *dentry = dget(fs->pwd);
  99                 read_unlock(&fs->lock);
 100                 result = 0;
 101                 put_fs_struct(fs);
 102         }
 103         return result;
 104 }
 105
 106 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
 107 {
 108         struct fs_struct *fs;
 109         int result = -ENOENT;
 110         task_lock(inode->u.proc_i.task);
 111         fs = inode->u.proc_i.task->fs;
 112         if(fs)
 113                 atomic_inc(&fs->count);
 114         task_unlock(inode->u.proc_i.task);
 115         if (fs) {
 116                 read_lock(&fs->lock);
 117                 *mnt = mntget(fs->rootmnt);
 118                 *dentry = dget(fs->root);
 119                 read_unlock(&fs->lock);
 120                 result = 0;
 121                 put_fs_struct(fs);
 122         }
 123         return result;
 124 }
 125
 126 #define MAY_PTRACE(task) \
 127         (task == current || \
 128         (task->p_pptr == current && \
 129         (task->ptrace & PT_PTRACED) && task->state == TASK_STOPPED))
 130
 131 static int may_ptrace_attach(struct task_struct *task)
 132 {
 133         int retval = 0;
 134
 135         task_lock(task);
 136
 137         if (((current->uid != task->euid) ||
 138             (current->uid != task->suid) ||
 139             (current->uid != task->uid) ||
 140             (current->gid != task->egid) ||
 141             (current->gid != task->sgid) ||
 142             (!cap_issubset(task->cap_permitted, current->cap_permitted)) ||
 143             (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 144                 goto out;
 145         rmb();
 146         if (!is_dumpable(task) && !capable(CAP_SYS_PTRACE))
 147                 goto out;
 148
 149         retval = 1;
 150
 151 out:
 152         task_unlock(task);
 153         return retval;
 154 }
 155
 156 static int proc_pid_environ(struct task_struct *task, char * buffer)
 157 {
 158         struct mm_struct *mm;
 159         int res = 0;
 160
 161         if (!may_ptrace_attach(task))
 162                 return -ESRCH;
 163
 164         task_lock(task);
 165         mm = task->mm;
 166         if (mm)
 167                 atomic_inc(&mm->mm_users);
 168         task_unlock(task);
 169         if (mm && mm->env_start && mm->env_start < mm->env_end) {
 170                 unsigned long len = mm->env_end - mm->env_start;
 171                 if (len > PAGE_SIZE)
 172                         len = PAGE_SIZE;
 173                 res = access_process_vm(task, mm->env_start, buffer, len, 0);
 174                 if (res >= 0 && !may_ptrace_attach(task))
 175                         res = -ESRCH;
 176         }
 177         if (mm)
 178                 mmput(mm);
 179         return res;
 180 }
 181
 182 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
 183 {
 184         struct mm_struct *mm;
 185         int res = 0;
 186         task_lock(task);
 187         mm = task->mm;
 188         if (mm) {
 189                 if (mm->arg_end)
 190                         atomic_inc(&mm->mm_users);
 191                 else
 192                         mm = NULL;
 193         }
 194         task_unlock(task);
 195         if (mm && mm->arg_start && mm->arg_start < mm->arg_end) {
 196                 unsigned long len = mm->arg_end - mm->arg_start;
 197                 if (len > PAGE_SIZE)
 198                         len = PAGE_SIZE;
 199                 res = access_process_vm(task, mm->arg_start, buffer, len, 0);
 200                 /* If the nul at the end of args has been overwritten, then
 201                    assume application is using setproctitle(3). */
 202                 if (res > 0 && buffer[res - 1] != '\0') {
 203                         len = strnlen(buffer, res);
 204                         if (len < res) {
 205                                 res = len;
 206                         } else
 207                         if (mm->env_start < mm->env_end && res <= PAGE_SIZE) {
 208                                 len = mm->env_end - mm->env_start;
 209                                 if (len > PAGE_SIZE - res)
 210                                         len = PAGE_SIZE - res;
 211                                 res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
 212                                 res = strnlen(buffer, res);
 213                         } else
 214                                 res = 0;
 215                 }
 216         }
 217         if (mm)
 218                 mmput(mm);
 219         return res;
 220 }
 221
 222 /************************************************************************/
 223 /*                       Here the fs part begins                        */
 224 /************************************************************************/
 225
 226 /* permission checks */
 227
 228 static int proc_check_root(struct inode *inode)
 229 {
 230         struct dentry *de, *base, *root;
 231         struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
 232         int res = 0;
 233
 234         if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
 235                 return -ENOENT;
 236         read_lock(&current->fs->lock);
 237         our_vfsmnt = mntget(current->fs->rootmnt);
 238         base = dget(current->fs->root);
 239         read_unlock(&current->fs->lock);
 240
 241         spin_lock(&dcache_lock);
 242         de = root;
 243         mnt = vfsmnt;
 244
 245         while (vfsmnt != our_vfsmnt) {
 246                 if (vfsmnt == vfsmnt->mnt_parent)
 247                         goto out;
 248                 de = vfsmnt->mnt_mountpoint;
 249                 vfsmnt = vfsmnt->mnt_parent;
 250         }
 251
 252         if (!is_subdir(de, base))
 253                 goto out;
 254         spin_unlock(&dcache_lock);
 255
 256 exit:
 257         dput(base);
 258         mntput(our_vfsmnt);
 259         dput(root);
 260         mntput(mnt);
 261         return res;
 262 out:
 263         spin_unlock(&dcache_lock);
 264         res = -EACCES;
 265         goto exit;
 266 }
 267
 268 static int proc_permission(struct inode *inode, int mask)
 269 {
 270         if (vfs_permission(inode, mask) != 0)
 271                 return -EACCES;
 272         return proc_check_root(inode);
 273 }
 274
 275 extern struct seq_operations proc_pid_maps_op;
 276 static int maps_open(struct inode *inode, struct file *file)
 277 {
 278         struct task_struct *task = inode->u.proc_i.task;
 279         int ret = seq_open(file, &proc_pid_maps_op);
 280         if (!ret) {
 281                 struct seq_file *m = file->private_data;
 282                 m->private = task;
 283         }
 284         return ret;
 285 }
 286
 287 static struct file_operations proc_maps_operations = {
 288         .open           = maps_open,
 289         .read           = seq_read,
 290         .llseek         = seq_lseek,
 291         .release        = seq_release,
 292 };
 293
 294 extern struct seq_operations mounts_op;
 295 static int mounts_open(struct inode *inode, struct file *file)
 296 {
 297         struct task_struct *task = inode->u.proc_i.task;
 298         int ret = seq_open(file, &mounts_op);
 299
 300         if (!ret) {
 301                 struct seq_file *m = file->private_data;
 302                 struct namespace *namespace;
 303                 task_lock(task);
 304                 namespace = task->namespace;
 305                 if (namespace)
 306                         get_namespace(namespace);
 307                 task_unlock(task);
 308
 309                 if (namespace)
 310                         m->private = namespace;
 311                 else {
 312                         seq_release(inode, file);
 313                         ret = -EINVAL;
 314                 }
 315         }
 316         return ret;
 317 }
 318
 319 static int mounts_release(struct inode *inode, struct file *file)
 320 {
 321         struct seq_file *m = file->private_data;
 322         struct namespace *namespace = m->private;
 323         put_namespace(namespace);
 324         return seq_release(inode, file);
 325 }
 326
 327 static struct file_operations proc_mounts_operations = {
 328         open:           mounts_open,
 329         read:           seq_read,
 330         llseek:         seq_lseek,
 331         release:        mounts_release,
 332 };
 333
 334 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
 335
 336 static ssize_t proc_info_read(struct file * file, char * buf,
 337                           size_t count, loff_t *ppos)
 338 {
 339         struct inode * inode = file->f_dentry->d_inode;
 340         unsigned long page;
 341         ssize_t length;
 342         ssize_t end;
 343         struct task_struct *task = inode->u.proc_i.task;
 344         loff_t pos = *ppos;
 345
 346         if (count > PROC_BLOCK_SIZE)
 347                 count = PROC_BLOCK_SIZE;
 348         if (!(page = __get_free_page(GFP_KERNEL)))
 349                 return -ENOMEM;
 350
 351         length = inode->u.proc_i.op.proc_read(task, (char*)page);
 352
 353         if (length < 0) {
 354                 free_page(page);
 355                 return length;
 356         }
 357         /* Static 4kB (or whatever) block capacity */
 358         if (pos < 0 || pos >= length) {
 359                 free_page(page);
 360                 return 0;
 361         }
 362         if (count > length - pos)
 363                 count = length - pos;
 364         end = count + pos;
 365         copy_to_user(buf, (char *) page + pos, count);
 366         *ppos = end;
 367         free_page(page);
 368         return count;
 369 }
 370
 371 static struct file_operations proc_info_file_operations = {
 372         read:           proc_info_read,
 373 };
 374
 375 static int mem_open(struct inode* inode, struct file* file)
 376 {
 377         file->private_data = (void*)((long)current->self_exec_id);
 378         return 0;
 379 }
 380
 381 static ssize_t mem_read(struct file * file, char * buf,
 382                         size_t count, loff_t *ppos)
 383 {
 384         struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
 385         char *page;
 386         unsigned long src = *ppos;
 387         int copied = 0;
 388         struct mm_struct *mm;
 389
 390         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
 391                 return -ESRCH;
 392
 393         page = (char *)__get_free_page(GFP_USER);
 394         if (!page)
 395                 return -ENOMEM;
 396
 397         task_lock(task);
 398         mm = task->mm;
 399         if (mm)
 400                 atomic_inc(&mm->mm_users);
 401         task_unlock(task);
 402         if (!mm){
 403                 copied = 0;
 404                 goto out_free;
 405         }
 406
 407         if (file->private_data != (void*)((long)current->self_exec_id) ) {
 408                 mmput(mm);
 409                 copied = -EIO;
 410                 goto out_free;
 411         }
 412
 413         while (count > 0) {
 414                 int this_len, retval;
 415
 416                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 417                 retval = access_process_vm(task, src, page, this_len, 0);
 418                 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
 419                         if (!copied)
 420                                 copied = -EIO;
 421                         break;
 422                 }
 423                 if (copy_to_user(buf, page, retval)) {
 424                         copied = -EFAULT;
 425                         break;
 426                 }
 427                 copied += retval;
 428                 src += retval;
 429                 buf += retval;
 430                 count -= retval;
 431         }
 432         *ppos = src;
 433         mmput(mm);
 434
 435 out_free:
 436         free_page((unsigned long) page);
 437         return copied;
 438 }
 439
 440 #define mem_write NULL
 441
 442 #ifndef mem_write
 443 /* This is a security hazard */
 444 static ssize_t mem_write(struct file * file, const char * buf,
 445                          size_t count, loff_t *ppos)
 446 {
 447         int copied = 0;
 448         char *page;
 449         struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
 450         unsigned long dst = *ppos;
 451
 452         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
 453                 return -ESRCH;
 454
 455         page = (char *)__get_free_page(GFP_USER);
 456         if (!page)
 457                 return -ENOMEM;
 458
 459         while (count > 0) {
 460                 int this_len, retval;
 461
 462                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 463                 if (copy_from_user(page, buf, this_len)) {
 464                         copied = -EFAULT;
 465                         break;
 466                 }
 467                 retval = access_process_vm(task, dst, page, this_len, 1);
 468                 if (!retval) {
 469                         if (!copied)
 470                                 copied = -EIO;
 471                         break;
 472                 }
 473                 copied += retval;
 474                 buf += retval;
 475                 dst += retval;
 476                 count -= retval;
 477         }
 478         *ppos = dst;
 479         free_page((unsigned long) page);
 480         return copied;
 481 }
 482 #endif
 483
 484 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
 485 {
 486         switch (orig) {
 487         case 0:
 488                 file->f_pos = offset;
 489                 break;
 490         case 1:
 491                 file->f_pos += offset;
 492                 break;
 493         default:
 494                 return -EINVAL;
 495         }
 496         force_successful_syscall_return();
 497         return file->f_pos;
 498 }
 499
 500 static struct file_operations proc_mem_operations = {
 501         llseek:         mem_lseek,
 502         read:           mem_read,
 503         write:          mem_write,
 504         open:           mem_open,
 505 };
 506
 507 static struct inode_operations proc_mem_inode_operations = {
 508         permission:     proc_permission,
 509 };
 510
 511 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 512 {
 513         struct inode *inode = dentry->d_inode;
 514         int error = -EACCES;
 515
 516         /* We don't need a base pointer in the /proc filesystem */
 517         path_release(nd);
 518
 519         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
 520                 goto out;
 521         error = proc_check_root(inode);
 522         if (error)
 523                 goto out;
 524
 525         error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
 526         nd->last_type = LAST_BIND;
 527 out:
 528         return error;
 529 }
 530
 531 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
 532                             char * buffer, int buflen)
 533 {
 534         struct inode * inode;
 535         char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
 536         int len;
 537
 538         if (!tmp)
 539                 return -ENOMEM;
 540
 541         inode = dentry->d_inode;
 542         path = d_path(dentry, mnt, tmp, PAGE_SIZE);
 543         if (IS_ERR(path)) {
 544                 free_page((unsigned long)tmp);
 545                 return PTR_ERR(path);
 546         }
 547         len = tmp + PAGE_SIZE - 1 - path;
 548
 549         if (len < buflen)
 550                 buflen = len;
 551         copy_to_user(buffer, path, buflen);
 552         free_page((unsigned long)tmp);
 553         return buflen;
 554 }
 555
 556 static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
 557 {
 558         int error = -EACCES;
 559         struct inode *inode = dentry->d_inode;
 560         struct dentry *de;
 561         struct vfsmount *mnt = NULL;
 562
 563         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
 564                 goto out;
 565         error = proc_check_root(inode);
 566         if (error)
 567                 goto out;
 568
 569         error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt);
 570         if (error)
 571                 goto out;
 572
 573         error = do_proc_readlink(de, mnt, buffer, buflen);
 574         dput(de);
 575         mntput(mnt);
 576 out:
 577         return error;
 578 }
 579
 580 static struct inode_operations proc_pid_link_inode_operations = {
 581         readlink:       proc_pid_readlink,
 582         follow_link:    proc_pid_follow_link
 583 };
 584
 585 struct pid_entry {
 586         int type;
 587         int len;
 588         char *name;
 589         mode_t mode;
 590 };
 591
 592 enum pid_directory_inos {
 593         PROC_PID_INO = 2,
 594         PROC_PID_STATUS,
 595         PROC_PID_MEM,
 596         PROC_PID_CWD,
 597         PROC_PID_ROOT,
 598         PROC_PID_EXE,
 599         PROC_PID_FD,
 600         PROC_PID_ENVIRON,
 601         PROC_PID_CMDLINE,
 602         PROC_PID_STAT,
 603         PROC_PID_STATM,
 604         PROC_PID_MAPS,
 605         PROC_PID_CPU,
 606         PROC_PID_MOUNTS,
 607         PROC_PID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
 608 };
 609
 610 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
 611 static struct pid_entry base_stuff[] = {
 612   E(PROC_PID_FD,        "fd",           S_IFDIR|S_IRUSR|S_IXUSR),
 613   E(PROC_PID_ENVIRON,   "environ",      S_IFREG|S_IRUSR),
 614   E(PROC_PID_STATUS,    "status",       S_IFREG|S_IRUGO),
 615   E(PROC_PID_CMDLINE,   "cmdline",      S_IFREG|S_IRUGO),
 616   E(PROC_PID_STAT,      "stat",         S_IFREG|S_IRUGO),
 617   E(PROC_PID_STATM,     "statm",        S_IFREG|S_IRUGO),
 618 #ifdef CONFIG_SMP
 619   E(PROC_PID_CPU,       "cpu",          S_IFREG|S_IRUGO),
 620 #endif
 621   E(PROC_PID_MAPS,      "maps",         S_IFREG|S_IRUGO),
 622   E(PROC_PID_MEM,       "mem",          S_IFREG|S_IRUSR|S_IWUSR),
 623   E(PROC_PID_CWD,       "cwd",          S_IFLNK|S_IRWXUGO),
 624   E(PROC_PID_ROOT,      "root",         S_IFLNK|S_IRWXUGO),
 625   E(PROC_PID_EXE,       "exe",          S_IFLNK|S_IRWXUGO),
 626   E(PROC_PID_MOUNTS,    "mounts",       S_IFREG|S_IRUGO),
 627   {0,0,NULL,0}
 628 };
 629 #undef E
 630
 631 #define NUMBUF 10
 632
 633 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 634 {
 635         struct inode *inode = filp->f_dentry->d_inode;
 636         struct task_struct *p = inode->u.proc_i.task;
 637         unsigned int fd, pid, ino;
 638         int retval;
 639         char buf[NUMBUF];
 640         struct files_struct * files;
 641
 642         retval = 0;
 643         pid = p->pid;
 644
 645         fd = filp->f_pos;
 646         switch (fd) {
 647                 case 0:
 648                         if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
 649                                 goto out;
 650                         filp->f_pos++;
 651                 case 1:
 652                         ino = fake_ino(pid, PROC_PID_INO);
 653                         if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
 654                                 goto out;
 655                         filp->f_pos++;
 656                 default:
 657                         task_lock(p);
 658                         files = p->files;
 659                         if (files)
 660                                 atomic_inc(&files->count);
 661                         task_unlock(p);
 662                         if (!files)
 663                                 goto out;
 664                         read_lock(&files->file_lock);
 665                         for (fd = filp->f_pos-2;
 666                              fd < files->max_fds;
 667                              fd++, filp->f_pos++) {
 668                                 unsigned int i,j;
 669
 670                                 if (!fcheck_files(files, fd))
 671                                         continue;
 672                                 read_unlock(&files->file_lock);
 673
 674                                 j = NUMBUF;
 675                                 i = fd;
 676                                 do {
 677                                         j--;
 678                                         buf[j] = '0' + (i % 10);
 679                                         i /= 10;
 680                                 } while (i);
 681
 682                                 ino = fake_ino(pid, PROC_PID_FD_DIR + fd);
 683                                 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
 684                                         read_lock(&files->file_lock);
 685                                         break;
 686                                 }
 687                                 read_lock(&files->file_lock);
 688                         }
 689                         read_unlock(&files->file_lock);
 690                         put_files_struct(files);
 691         }
 692 out:
 693         return retval;
 694 }
 695
 696 static int proc_base_readdir(struct file * filp,
 697         void * dirent, filldir_t filldir)
 698 {
 699         int i;
 700         int pid;
 701         struct inode *inode = filp->f_dentry->d_inode;
 702         struct pid_entry *p;
 703
 704         pid = inode->u.proc_i.task->pid;
 705         if (!pid)
 706                 return -ENOENT;
 707         i = filp->f_pos;
 708         switch (i) {
 709                 case 0:
 710                         if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
 711                                 return 0;
 712                         i++;
 713                         filp->f_pos++;
 714                         /* fall through */
 715                 case 1:
 716                         if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
 717                                 return 0;
 718                         i++;
 719                         filp->f_pos++;
 720                         /* fall through */
 721                 default:
 722                         i -= 2;
 723                         if (i>=sizeof(base_stuff)/sizeof(base_stuff[0]))
 724                                 return 1;
 725                         p = base_stuff + i;
 726                         while (p->name) {
 727                                 if (filldir(dirent, p->name, p->len, filp->f_pos,
 728                                             fake_ino(pid, p->type), p->mode >> 12) < 0)
 729                                         return 0;
 730                                 filp->f_pos++;
 731                                 p++;
 732                         }
 733         }
 734         return 1;
 735 }
 736
 737 /* building an inode */
 738
 739 static int task_dumpable(struct task_struct *task)
 740 {
 741         int dumpable = 0;
 742         struct mm_struct *mm;
 743
 744         task_lock(task);
 745         mm = task->mm;
 746         if (mm)
 747                 dumpable = mm->dumpable;
 748         task_unlock(task);
 749         return dumpable;
 750 }
 751
 752
 753 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
 754 {
 755         struct inode * inode;
 756
 757         /* We need a new inode */
 758
 759         inode = new_inode(sb);
 760         if (!inode)
 761                 goto out;
 762
 763         /* Common stuff */
 764
 765         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 766         inode->i_ino = fake_ino(task->pid, ino);
 767
 768         if (!task->pid)
 769                 goto out_unlock;
 770
 771         /*
 772          * grab the reference to task.
 773          */
 774         get_task_struct(task);
 775         inode->u.proc_i.task = task;
 776         inode->i_uid = 0;
 777         inode->i_gid = 0;
 778         if (ino == PROC_PID_INO || task_dumpable(task)) {
 779                 inode->i_uid = task->euid;
 780                 inode->i_gid = task->egid;
 781         }
 782
 783 out:
 784         return inode;
 785
 786 out_unlock:
 787         inode->u.generic_ip = NULL;
 788         iput(inode);
 789         return NULL;
 790 }
 791
 792 /* dentry stuff */
 793
 794 static int pid_fd_revalidate(struct dentry * dentry, int flags)
 795 {
 796         return 0;
 797 }
 798
 799 /*
 800  *      Exceptional case: normally we are not allowed to unhash a busy
 801  * directory. In this case, however, we can do it - no aliasing problems
 802  * due to the way we treat inodes.
 803  */
 804 static int pid_base_revalidate(struct dentry * dentry, int flags)
 805 {
 806         if (dentry->d_inode->u.proc_i.task->pid)
 807                 return 1;
 808         d_drop(dentry);
 809         return 0;
 810 }
 811
 812 static int pid_delete_dentry(struct dentry * dentry)
 813 {
 814         return 1;
 815 }
 816
 817 static struct dentry_operations pid_fd_dentry_operations =
 818 {
 819         d_revalidate:   pid_fd_revalidate,
 820         d_delete:       pid_delete_dentry,
 821 };
 822
 823 static struct dentry_operations pid_dentry_operations =
 824 {
 825         d_delete:       pid_delete_dentry,
 826 };
 827
 828 static struct dentry_operations pid_base_dentry_operations =
 829 {
 830         d_revalidate:   pid_base_revalidate,
 831         d_delete:       pid_delete_dentry,
 832 };
 833
 834 /* Lookups */
 835 #define MAX_MULBY10     ((~0U-9)/10)
 836
 837 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
 838 {
 839         unsigned int fd, c;
 840         struct task_struct *task = dir->u.proc_i.task;
 841         struct file * file;
 842         struct files_struct * files;
 843         struct inode *inode;
 844         const char *name;
 845         int len;
 846
 847         fd = 0;
 848         len = dentry->d_name.len;
 849         name = dentry->d_name.name;
 850         if (len > 1 && *name == '0') goto out;
 851         while (len-- > 0) {
 852                 c = *name - '0';
 853                 name++;
 854                 if (c > 9)
 855                         goto out;
 856                 if (fd >= MAX_MULBY10)
 857                         goto out;
 858                 fd *= 10;
 859                 fd += c;
 860         }
 861
 862         inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
 863         if (!inode)
 864                 goto out;
 865         task_lock(task);
 866         files = task->files;
 867         if (files)
 868                 atomic_inc(&files->count);
 869         task_unlock(task);
 870         if (!files)
 871                 goto out_unlock;
 872         read_lock(&files->file_lock);
 873         file = inode->u.proc_i.file = fcheck_files(files, fd);
 874         if (!file)
 875                 goto out_unlock2;
 876         get_file(file);
 877         read_unlock(&files->file_lock);
 878         put_files_struct(files);
 879         inode->i_op = &proc_pid_link_inode_operations;
 880         inode->i_size = 64;
 881         inode->i_mode = S_IFLNK;
 882         inode->u.proc_i.op.proc_get_link = proc_fd_link;
 883         if (file->f_mode & 1)
 884                 inode->i_mode |= S_IRUSR | S_IXUSR;
 885         if (file->f_mode & 2)
 886                 inode->i_mode |= S_IWUSR | S_IXUSR;
 887         dentry->d_op = &pid_fd_dentry_operations;
 888         d_add(dentry, inode);
 889         return NULL;
 890
 891 out_unlock2:
 892         read_unlock(&files->file_lock);
 893         put_files_struct(files);
 894 out_unlock:
 895         iput(inode);
 896 out:
 897         return ERR_PTR(-ENOENT);
 898 }
 899
 900 static struct file_operations proc_fd_operations = {
 901         read:           generic_read_dir,
 902         readdir:        proc_readfd,
 903 };
 904
 905 /*
 906  * proc directories can do almost nothing..
 907  */
 908 static struct inode_operations proc_fd_inode_operations = {
 909         lookup:         proc_lookupfd,
 910         permission:     proc_permission,
 911 };
 912
 913 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
 914 {
 915         struct inode *inode;
 916         int error;
 917         struct task_struct *task = dir->u.proc_i.task;
 918         struct pid_entry *p;
 919
 920         error = -ENOENT;
 921         inode = NULL;
 922
 923         for (p = base_stuff; p->name; p++) {
 924                 if (p->len != dentry->d_name.len)
 925                         continue;
 926                 if (!memcmp(dentry->d_name.name, p->name, p->len))
 927                         break;
 928         }
 929         if (!p->name)
 930                 goto out;
 931
 932         error = -EINVAL;
 933         inode = proc_pid_make_inode(dir->i_sb, task, p->type);
 934         if (!inode)
 935                 goto out;
 936
 937         inode->i_mode = p->mode;
 938         /*
 939          * Yes, it does not scale. And it should not. Don't add
 940          * new entries into /proc/<pid>/ without very good reasons.
 941          */
 942         switch(p->type) {
 943                 case PROC_PID_FD:
 944                         inode->i_nlink = 2;
 945                         inode->i_op = &proc_fd_inode_operations;
 946                         inode->i_fop = &proc_fd_operations;
 947                         break;
 948                 case PROC_PID_EXE:
 949                         inode->i_op = &proc_pid_link_inode_operations;
 950                         inode->u.proc_i.op.proc_get_link = proc_exe_link;
 951                         break;
 952                 case PROC_PID_CWD:
 953                         inode->i_op = &proc_pid_link_inode_operations;
 954                         inode->u.proc_i.op.proc_get_link = proc_cwd_link;
 955                         break;
 956                 case PROC_PID_ROOT:
 957                         inode->i_op = &proc_pid_link_inode_operations;
 958                         inode->u.proc_i.op.proc_get_link = proc_root_link;
 959                         break;
 960                 case PROC_PID_ENVIRON:
 961                         inode->i_fop = &proc_info_file_operations;
 962                         inode->u.proc_i.op.proc_read = proc_pid_environ;
 963                         break;
 964                 case PROC_PID_STATUS:
 965                         inode->i_fop = &proc_info_file_operations;
 966                         inode->u.proc_i.op.proc_read = proc_pid_status;
 967                         break;
 968                 case PROC_PID_STAT:
 969                         inode->i_fop = &proc_info_file_operations;
 970                         inode->u.proc_i.op.proc_read = proc_pid_stat;
 971                         break;
 972                 case PROC_PID_CMDLINE:
 973                         inode->i_fop = &proc_info_file_operations;
 974                         inode->u.proc_i.op.proc_read = proc_pid_cmdline;
 975                         break;
 976                 case PROC_PID_STATM:
 977                         inode->i_fop = &proc_info_file_operations;
 978                         inode->u.proc_i.op.proc_read = proc_pid_statm;
 979                         break;
 980                 case PROC_PID_MAPS:
 981                         inode->i_fop = &proc_maps_operations;
 982                         break;
 983 #ifdef CONFIG_SMP
 984                 case PROC_PID_CPU:
 985                         inode->i_fop = &proc_info_file_operations;
 986                         inode->u.proc_i.op.proc_read = proc_pid_cpu;
 987                         break;
 988 #endif
 989                 case PROC_PID_MEM:
 990                         inode->i_op = &proc_mem_inode_operations;
 991                         inode->i_fop = &proc_mem_operations;
 992                         break;
 993                 case PROC_PID_MOUNTS:
 994                         inode->i_fop = &proc_mounts_operations;
 995                         break;
 996                 default:
 997                         printk("procfs: impossible type (%d)",p->type);
 998                         iput(inode);
 999                         return ERR_PTR(-EINVAL);
1000         }
1001         dentry->d_op = &pid_dentry_operations;
1002         d_add(dentry, inode);
1003         return NULL;
1004
1005 out:
1006         return ERR_PTR(error);
1007 }
1008
1009 static struct file_operations proc_base_operations = {
1010         read:           generic_read_dir,
1011         readdir:        proc_base_readdir,
1012 };
1013
1014 static struct inode_operations proc_base_inode_operations = {
1015         lookup:         proc_base_lookup,
1016 };
1017
1018 /*
1019  * /proc/self:
1020  */
1021 static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
1022 {
1023         char tmp[30];
1024         sprintf(tmp, "%d", current->pid);
1025         return vfs_readlink(dentry,buffer,buflen,tmp);
1026 }
1027
1028 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1029 {
1030         char tmp[30];
1031         sprintf(tmp, "%d", current->pid);
1032         return vfs_follow_link(nd,tmp);
1033 }
1034
1035 static struct inode_operations proc_self_inode_operations = {
1036         readlink:       proc_self_readlink,
1037         follow_link:    proc_self_follow_link,
1038 };
1039
1040 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
1041 {
1042         unsigned int pid, c;
1043         struct task_struct *task;
1044         const char *name;
1045         struct inode *inode;
1046         int len;
1047
1048         pid = 0;
1049         name = dentry->d_name.name;
1050         len = dentry->d_name.len;
1051         if (len == 4 && !memcmp(name, "self", 4)) {
1052                 inode = new_inode(dir->i_sb);
1053                 if (!inode)
1054                         return ERR_PTR(-ENOMEM);
1055                 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1056                 inode->i_ino = fake_ino(0, PROC_PID_INO);
1057                 inode->u.proc_i.file = NULL;
1058                 inode->u.proc_i.task = NULL;
1059                 inode->i_mode = S_IFLNK|S_IRWXUGO;
1060                 inode->i_uid = inode->i_gid = 0;
1061                 inode->i_size = 64;
1062                 inode->i_op = &proc_self_inode_operations;
1063                 d_add(dentry, inode);
1064                 return NULL;
1065         }
1066         while (len-- > 0) {
1067                 c = *name - '0';
1068                 name++;
1069                 if (c > 9)
1070                         goto out;
1071                 if (pid >= MAX_MULBY10)
1072                         goto out;
1073                 pid *= 10;
1074                 pid += c;
1075                 if (!pid)
1076                         goto out;
1077         }
1078
1079         read_lock(&tasklist_lock);
1080         task = find_task_by_pid(pid);
1081         if (task)
1082                 get_task_struct(task);
1083         read_unlock(&tasklist_lock);
1084         if (!task)
1085                 goto out;
1086
1087         inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
1088
1089         free_task_struct(task);
1090
1091         if (!inode)
1092                 goto out;
1093         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1094         inode->i_op = &proc_base_inode_operations;
1095         inode->i_fop = &proc_base_operations;
1096         inode->i_nlink = 3;
1097         inode->i_flags|=S_IMMUTABLE;
1098
1099         dentry->d_op = &pid_base_dentry_operations;
1100         d_add(dentry, inode);
1101         return NULL;
1102 out:
1103         return ERR_PTR(-ENOENT);
1104 }
1105
1106 void proc_pid_delete_inode(struct inode *inode)
1107 {
1108         if (inode->u.proc_i.file)
1109                 fput(inode->u.proc_i.file);
1110         if (inode->u.proc_i.task)
1111                 free_task_struct(inode->u.proc_i.task);
1112 }
1113
1114 #define PROC_NUMBUF 10
1115 #define PROC_MAXPIDS 20
1116
1117 /*
1118  * Get a few pid's to return for filldir - we need to hold the
1119  * tasklist lock while doing this, and we must release it before
1120  * we actually do the filldir itself, so we use a temp buffer..
1121  */
1122 static int get_pid_list(int index, unsigned int *pids)
1123 {
1124         struct task_struct *p;
1125         int nr_pids = 0;
1126
1127         index--;
1128         read_lock(&tasklist_lock);
1129         for_each_task(p) {
1130                 int pid = p->pid;
1131                 if (!pid)
1132                         continue;
1133                 if (--index >= 0)
1134                         continue;
1135                 pids[nr_pids] = pid;
1136                 nr_pids++;
1137                 if (nr_pids >= PROC_MAXPIDS)
1138                         break;
1139         }
1140         read_unlock(&tasklist_lock);
1141         return nr_pids;
1142 }
1143
1144 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1145 {
1146         unsigned int pid_array[PROC_MAXPIDS];
1147         char buf[PROC_NUMBUF];
1148         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1149         unsigned int nr_pids, i;
1150
1151         if (!nr) {
1152                 ino_t ino = fake_ino(0,PROC_PID_INO);
1153                 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1154                         return 0;
1155                 filp->f_pos++;
1156                 nr++;
1157         }
1158
1159         nr_pids = get_pid_list(nr, pid_array);
1160
1161         for (i = 0; i < nr_pids; i++) {
1162                 int pid = pid_array[i];
1163                 ino_t ino = fake_ino(pid,PROC_PID_INO);
1164                 unsigned long j = PROC_NUMBUF;
1165
1166                 do buf[--j] = '0' + (pid % 10); while (pid/=10);
1167
1168                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
1169                         break;
1170                 filp->f_pos++;
1171         }
1172         return 0;
1173 }