kernel/sys.c

   1 /*
   2  *  linux/kernel/sys.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/module.h>
   8 #include <linux/mm.h>
   9 #include <linux/utsname.h>
  10 #include <linux/mman.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/notifier.h>
  13 #include <linux/reboot.h>
  14 #include <linux/prctl.h>
  15 #include <linux/init.h>
  16 #include <linux/highuid.h>
  17
  18 #include <asm/uaccess.h>
  19 #include <asm/io.h>
  20
  21 #ifndef SET_UNALIGN_CTL
  22 # define SET_UNALIGN_CTL(a,b)   (-EINVAL)
  23 #endif
  24 #ifndef GET_UNALIGN_CTL
  25 # define GET_UNALIGN_CTL(a,b)   (-EINVAL)
  26 #endif
  27 #ifndef SET_FPEMU_CTL
  28 # define SET_FPEMU_CTL(a,b)     (-EINVAL)
  29 #endif
  30 #ifndef GET_FPEMU_CTL
  31 # define GET_FPEMU_CTL(a,b)     (-EINVAL)
  32 #endif
  33 #ifndef SET_FPEXC_CTL
  34 # define SET_FPEXC_CTL(a,b)     (-EINVAL)
  35 #endif
  36 #ifndef GET_FPEXC_CTL
  37 # define GET_FPEXC_CTL(a,b)     (-EINVAL)
  38 #endif
  39
  40 /*
  41  * this is where the system-wide overflow UID and GID are defined, for
  42  * architectures that now have 32-bit UID/GID but didn't in the past
  43  */
  44
  45 int overflowuid = DEFAULT_OVERFLOWUID;
  46 int overflowgid = DEFAULT_OVERFLOWGID;
  47
  48 /*
  49  * the same as above, but for filesystems which can only store a 16-bit
  50  * UID and GID. as such, this is needed on all architectures
  51  */
  52
  53 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  54 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  55
  56 /*
  57  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  58  */
  59
  60 int C_A_D = 1;
  61 int cad_pid = 1;
  62
  63
  64 /*
  65  *      Notifier list for kernel code which wants to be called
  66  *      at shutdown. This is used to stop any idling DMA operations
  67  *      and the like.
  68  */
  69
  70 static struct notifier_block *reboot_notifier_list;
  71 rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
  72
  73 /**
  74  *      notifier_chain_register - Add notifier to a notifier chain
  75  *      @list: Pointer to root list pointer
  76  *      @n: New entry in notifier chain
  77  *
  78  *      Adds a notifier to a notifier chain.
  79  *
  80  *      Currently always returns zero.
  81  */
  82
  83 int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
  84 {
  85         write_lock(&notifier_lock);
  86         while(*list)
  87         {
  88                 if(n->priority > (*list)->priority)
  89                         break;
  90                 list= &((*list)->next);
  91         }
  92         n->next = *list;
  93         *list=n;
  94         write_unlock(&notifier_lock);
  95         return 0;
  96 }
  97
  98 /**
  99  *      notifier_chain_unregister - Remove notifier from a notifier chain
 100  *      @nl: Pointer to root list pointer
 101  *      @n: New entry in notifier chain
 102  *
 103  *      Removes a notifier from a notifier chain.
 104  *
 105  *      Returns zero on success, or %-ENOENT on failure.
 106  */
 107
 108 int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
 109 {
 110         write_lock(&notifier_lock);
 111         while((*nl)!=NULL)
 112         {
 113                 if((*nl)==n)
 114                 {
 115                         *nl=n->next;
 116                         write_unlock(&notifier_lock);
 117                         return 0;
 118                 }
 119                 nl=&((*nl)->next);
 120         }
 121         write_unlock(&notifier_lock);
 122         return -ENOENT;
 123 }
 124
 125 /**
 126  *      notifier_call_chain - Call functions in a notifier chain
 127  *      @n: Pointer to root pointer of notifier chain
 128  *      @val: Value passed unmodified to notifier function
 129  *      @v: Pointer passed unmodified to notifier function
 130  *
 131  *      Calls each function in a notifier chain in turn.
 132  *
 133  *      If the return value of the notifier can be and'd
 134  *      with %NOTIFY_STOP_MASK, then notifier_call_chain
 135  *      will return immediately, with the return value of
 136  *      the notifier function which halted execution.
 137  *      Otherwise, the return value is the return value
 138  *      of the last notifier function called.
 139  */
 140
 141 int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
 142 {
 143         int ret=NOTIFY_DONE;
 144         struct notifier_block *nb = *n;
 145
 146         while(nb)
 147         {
 148                 ret=nb->notifier_call(nb,val,v);
 149                 if(ret&NOTIFY_STOP_MASK)
 150                 {
 151                         return ret;
 152                 }
 153                 nb=nb->next;
 154         }
 155         return ret;
 156 }
 157
 158 /**
 159  *      register_reboot_notifier - Register function to be called at reboot time
 160  *      @nb: Info about notifier function to be called
 161  *
 162  *      Registers a function with the list of functions
 163  *      to be called at reboot time.
 164  *
 165  *      Currently always returns zero, as notifier_chain_register
 166  *      always returns zero.
 167  */
 168
 169 int register_reboot_notifier(struct notifier_block * nb)
 170 {
 171         return notifier_chain_register(&reboot_notifier_list, nb);
 172 }
 173
 174 /**
 175  *      unregister_reboot_notifier - Unregister previously registered reboot notifier
 176  *      @nb: Hook to be unregistered
 177  *
 178  *      Unregisters a previously registered reboot
 179  *      notifier function.
 180  *
 181  *      Returns zero on success, or %-ENOENT on failure.
 182  */
 183
 184 int unregister_reboot_notifier(struct notifier_block * nb)
 185 {
 186         return notifier_chain_unregister(&reboot_notifier_list, nb);
 187 }
 188
 189 asmlinkage long sys_ni_syscall(void)
 190 {
 191         return -ENOSYS;
 192 }
 193
 194 static int proc_sel(struct task_struct *p, int which, int who)
 195 {
 196         if(p->pid)
 197         {
 198                 switch (which) {
 199                         case PRIO_PROCESS:
 200                                 if (!who && p == current)
 201                                         return 1;
 202                                 return(p->pid == who);
 203                         case PRIO_PGRP:
 204                                 if (!who)
 205                                         who = current->pgrp;
 206                                 return(p->pgrp == who);
 207                         case PRIO_USER:
 208                                 if (!who)
 209                                         who = current->uid;
 210                                 return(p->uid == who);
 211                 }
 212         }
 213         return 0;
 214 }
 215
 216 asmlinkage long sys_setpriority(int which, int who, int niceval)
 217 {
 218         struct task_struct *p;
 219         int error;
 220
 221         if (which > 2 || which < 0)
 222                 return -EINVAL;
 223
 224         /* normalize: avoid signed division (rounding problems) */
 225         error = -ESRCH;
 226         if (niceval < -20)
 227                 niceval = -20;
 228         if (niceval > 19)
 229                 niceval = 19;
 230
 231         read_lock(&tasklist_lock);
 232         for_each_task(p) {
 233                 if (!proc_sel(p, which, who))
 234                         continue;
 235                 if (p->uid != current->euid &&
 236                         p->uid != current->uid && !capable(CAP_SYS_NICE)) {
 237                         error = -EPERM;
 238                         continue;
 239                 }
 240                 if (error == -ESRCH)
 241                         error = 0;
 242                 if (niceval < p->nice && !capable(CAP_SYS_NICE))
 243                         error = -EACCES;
 244                 else
 245                         p->nice = niceval;
 246         }
 247         read_unlock(&tasklist_lock);
 248
 249         return error;
 250 }
 251
 252 /*
 253  * Ugh. To avoid negative return values, "getpriority()" will
 254  * not return the normal nice-value, but a negated value that
 255  * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 256  * to stay compatible.
 257  */
 258 asmlinkage long sys_getpriority(int which, int who)
 259 {
 260         struct task_struct *p;
 261         long retval = -ESRCH;
 262
 263         if (which > 2 || which < 0)
 264                 return -EINVAL;
 265
 266         read_lock(&tasklist_lock);
 267         for_each_task (p) {
 268                 long niceval;
 269                 if (!proc_sel(p, which, who))
 270                         continue;
 271                 niceval = 20 - p->nice;
 272                 if (niceval > retval)
 273                         retval = niceval;
 274         }
 275         read_unlock(&tasklist_lock);
 276
 277         return retval;
 278 }
 279
 280
 281 /*
 282  * Reboot system call: for obvious reasons only root may call it,
 283  * and even root needs to set up some magic numbers in the registers
 284  * so that some mistake won't make this reboot the whole machine.
 285  * You can also set the meaning of the ctrl-alt-del-key here.
 286  *
 287  * reboot doesn't sync: do that yourself before calling this.
 288  */
 289 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
 290 {
 291         char buffer[256];
 292
 293         /* We only trust the superuser with rebooting the system. */
 294         if (!capable(CAP_SYS_BOOT))
 295                 return -EPERM;
 296
 297         /* For safety, we require "magic" arguments. */
 298         if (magic1 != LINUX_REBOOT_MAGIC1 ||
 299             (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
 300                         magic2 != LINUX_REBOOT_MAGIC2B))
 301                 return -EINVAL;
 302
 303         lock_kernel();
 304         switch (cmd) {
 305         case LINUX_REBOOT_CMD_RESTART:
 306                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 307                 printk(KERN_EMERG "Restarting system.\n");
 308                 machine_restart(NULL);
 309                 break;
 310
 311         case LINUX_REBOOT_CMD_CAD_ON:
 312                 C_A_D = 1;
 313                 break;
 314
 315         case LINUX_REBOOT_CMD_CAD_OFF:
 316                 C_A_D = 0;
 317                 break;
 318
 319         case LINUX_REBOOT_CMD_HALT:
 320                 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 321                 printk(KERN_EMERG "System halted.\n");
 322                 machine_halt();
 323                 do_exit(0);
 324                 break;
 325
 326         case LINUX_REBOOT_CMD_POWER_OFF:
 327                 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 328                 printk(KERN_EMERG "Power down.\n");
 329                 machine_power_off();
 330                 do_exit(0);
 331                 break;
 332
 333         case LINUX_REBOOT_CMD_RESTART2:
 334                 if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
 335                         unlock_kernel();
 336                         return -EFAULT;
 337                 }
 338                 buffer[sizeof(buffer) - 1] = '\0';
 339
 340                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
 341                 printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
 342                 machine_restart(buffer);
 343                 break;
 344
 345         default:
 346                 unlock_kernel();
 347                 return -EINVAL;
 348         }
 349         unlock_kernel();
 350         return 0;
 351 }
 352
 353 static void deferred_cad(void *dummy)
 354 {
 355         notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 356         machine_restart(NULL);
 357 }
 358
 359 /*
 360  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 361  * As it's called within an interrupt, it may NOT sync: the only choice
 362  * is whether to reboot at once, or just ignore the ctrl-alt-del.
 363  */
 364 void ctrl_alt_del(void)
 365 {
 366         static struct tq_struct cad_tq = {
 367                 routine: deferred_cad,
 368         };
 369
 370         if (C_A_D)
 371                 schedule_task(&cad_tq);
 372         else
 373                 kill_proc(cad_pid, SIGINT, 1);
 374 }
 375
 376
 377 /*
 378  * Unprivileged users may change the real gid to the effective gid
 379  * or vice versa.  (BSD-style)
 380  *
 381  * If you set the real gid at all, or set the effective gid to a value not
 382  * equal to the real gid, then the saved gid is set to the new effective gid.
 383  *
 384  * This makes it possible for a setgid program to completely drop its
 385  * privileges, which is often a useful assertion to make when you are doing
 386  * a security audit over a program.
 387  *
 388  * The general idea is that a program which uses just setregid() will be
 389  * 100% compatible with BSD.  A program which uses just setgid() will be
 390  * 100% compatible with POSIX with saved IDs.
 391  *
 392  * SMP: There are not races, the GIDs are checked only by filesystem
 393  *      operations (as far as semantic preservation is concerned).
 394  */
 395 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 396 {
 397         int old_rgid = current->gid;
 398         int old_egid = current->egid;
 399         int new_rgid = old_rgid;
 400         int new_egid = old_egid;
 401
 402         if (rgid != (gid_t) -1) {
 403                 if ((old_rgid == rgid) ||
 404                     (current->egid==rgid) ||
 405                     capable(CAP_SETGID))
 406                         new_rgid = rgid;
 407                 else
 408                         return -EPERM;
 409         }
 410         if (egid != (gid_t) -1) {
 411                 if ((old_rgid == egid) ||
 412                     (current->egid == egid) ||
 413                     (current->sgid == egid) ||
 414                     capable(CAP_SETGID))
 415                         new_egid = egid;
 416                 else {
 417                         return -EPERM;
 418                 }
 419         }
 420         if (new_egid != old_egid)
 421         {
 422                 current->mm->dumpable = 0;
 423                 wmb();
 424         }
 425         if (rgid != (gid_t) -1 ||
 426             (egid != (gid_t) -1 && egid != old_rgid))
 427                 current->sgid = new_egid;
 428         current->fsgid = new_egid;
 429         current->egid = new_egid;
 430         current->gid = new_rgid;
 431         return 0;
 432 }
 433
 434 /*
 435  * setgid() is implemented like SysV w/ SAVED_IDS
 436  *
 437  * SMP: Same implicit races as above.
 438  */
 439 asmlinkage long sys_setgid(gid_t gid)
 440 {
 441         int old_egid = current->egid;
 442
 443         if (capable(CAP_SETGID))
 444         {
 445                 if(old_egid != gid)
 446                 {
 447                         current->mm->dumpable=0;
 448                         wmb();
 449                 }
 450                 current->gid = current->egid = current->sgid = current->fsgid = gid;
 451         }
 452         else if ((gid == current->gid) || (gid == current->sgid))
 453         {
 454                 if(old_egid != gid)
 455                 {
 456                         current->mm->dumpable=0;
 457                         wmb();
 458                 }
 459                 current->egid = current->fsgid = gid;
 460         }
 461         else
 462                 return -EPERM;
 463         return 0;
 464 }
 465
 466 /*
 467  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 468  * a process after a call to setuid, setreuid, or setresuid.
 469  *
 470  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 471  *  {r,e,s}uid != 0, the permitted and effective capabilities are
 472  *  cleared.
 473  *
 474  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 475  *  capabilities of the process are cleared.
 476  *
 477  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 478  *  capabilities are set to the permitted capabilities.
 479  *
 480  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
 481  *  never happen.
 482  *
 483  *  -astor
 484  *
 485  * cevans - New behaviour, Oct '99
 486  * A process may, via prctl(), elect to keep its capabilities when it
 487  * calls setuid() and switches away from uid==0. Both permitted and
 488  * effective sets will be retained.
 489  * Without this change, it was impossible for a daemon to drop only some
 490  * of its privilege. The call to setuid(!=0) would drop all privileges!
 491  * Keeping uid 0 is not an option because uid 0 owns too many vital
 492  * files..
 493  * Thanks to Olaf Kirch and Peter Benie for spotting this.
 494  */
 495 static inline void cap_emulate_setxuid(int old_ruid, int old_euid,
 496                                        int old_suid)
 497 {
 498         if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
 499             (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
 500             !current->keep_capabilities) {
 501                 cap_clear(current->cap_permitted);
 502                 cap_clear(current->cap_effective);
 503         }
 504         if (old_euid == 0 && current->euid != 0) {
 505                 cap_clear(current->cap_effective);
 506         }
 507         if (old_euid != 0 && current->euid == 0) {
 508                 current->cap_effective = current->cap_permitted;
 509         }
 510 }
 511
 512 static int set_user(uid_t new_ruid, int dumpclear)
 513 {
 514         struct user_struct *new_user;
 515
 516         new_user = alloc_uid(new_ruid);
 517         if (!new_user)
 518                 return -EAGAIN;
 519         switch_uid(new_user);
 520
 521         if(dumpclear)
 522         {
 523                 current->mm->dumpable = 0;
 524                 wmb();
 525         }
 526         current->uid = new_ruid;
 527         return 0;
 528 }
 529
 530 /*
 531  * Unprivileged users may change the real uid to the effective uid
 532  * or vice versa.  (BSD-style)
 533  *
 534  * If you set the real uid at all, or set the effective uid to a value not
 535  * equal to the real uid, then the saved uid is set to the new effective uid.
 536  *
 537  * This makes it possible for a setuid program to completely drop its
 538  * privileges, which is often a useful assertion to make when you are doing
 539  * a security audit over a program.
 540  *
 541  * The general idea is that a program which uses just setreuid() will be
 542  * 100% compatible with BSD.  A program which uses just setuid() will be
 543  * 100% compatible with POSIX with saved IDs.
 544  */
 545 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 546 {
 547         int old_ruid, old_euid, old_suid, new_ruid, new_euid;
 548
 549         new_ruid = old_ruid = current->uid;
 550         new_euid = old_euid = current->euid;
 551         old_suid = current->suid;
 552
 553         if (ruid != (uid_t) -1) {
 554                 new_ruid = ruid;
 555                 if ((old_ruid != ruid) &&
 556                     (current->euid != ruid) &&
 557                     !capable(CAP_SETUID))
 558                         return -EPERM;
 559         }
 560
 561         if (euid != (uid_t) -1) {
 562                 new_euid = euid;
 563                 if ((old_ruid != euid) &&
 564                     (current->euid != euid) &&
 565                     (current->suid != euid) &&
 566                     !capable(CAP_SETUID))
 567                         return -EPERM;
 568         }
 569
 570         if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
 571                 return -EAGAIN;
 572
 573         if (new_euid != old_euid)
 574         {
 575                 current->mm->dumpable=0;
 576                 wmb();
 577         }
 578         current->fsuid = current->euid = new_euid;
 579         if (ruid != (uid_t) -1 ||
 580             (euid != (uid_t) -1 && euid != old_ruid))
 581                 current->suid = current->euid;
 582         current->fsuid = current->euid;
 583
 584         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 585                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 586         }
 587
 588         return 0;
 589 }
 590
 591
 592
 593 /*
 594  * setuid() is implemented like SysV with SAVED_IDS
 595  *
 596  * Note that SAVED_ID's is deficient in that a setuid root program
 597  * like sendmail, for example, cannot set its uid to be a normal
 598  * user and then switch back, because if you're root, setuid() sets
 599  * the saved uid too.  If you don't like this, blame the bright people
 600  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 601  * will allow a root program to temporarily drop privileges and be able to
 602  * regain them by swapping the real and effective uid.
 603  */
 604 asmlinkage long sys_setuid(uid_t uid)
 605 {
 606         int old_euid = current->euid;
 607         int old_ruid, old_suid, new_ruid, new_suid;
 608
 609         old_ruid = new_ruid = current->uid;
 610         old_suid = current->suid;
 611         new_suid = old_suid;
 612
 613         if (capable(CAP_SETUID)) {
 614                 if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
 615                         return -EAGAIN;
 616                 new_suid = uid;
 617         } else if ((uid != current->uid) && (uid != new_suid))
 618                 return -EPERM;
 619
 620         if (old_euid != uid)
 621         {
 622                 current->mm->dumpable = 0;
 623                 wmb();
 624         }
 625         current->fsuid = current->euid = uid;
 626         current->suid = new_suid;
 627
 628         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 629                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 630         }
 631
 632         return 0;
 633 }
 634
 635
 636 /*
 637  * This function implements a generic ability to update ruid, euid,
 638  * and suid.  This allows you to implement the 4.4 compatible seteuid().
 639  */
 640 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 641 {
 642         int old_ruid = current->uid;
 643         int old_euid = current->euid;
 644         int old_suid = current->suid;
 645
 646         if (!capable(CAP_SETUID)) {
 647                 if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 648                     (ruid != current->euid) && (ruid != current->suid))
 649                         return -EPERM;
 650                 if ((euid != (uid_t) -1) && (euid != current->uid) &&
 651                     (euid != current->euid) && (euid != current->suid))
 652                         return -EPERM;
 653                 if ((suid != (uid_t) -1) && (suid != current->uid) &&
 654                     (suid != current->euid) && (suid != current->suid))
 655                         return -EPERM;
 656         }
 657         if (ruid != (uid_t) -1) {
 658                 if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
 659                         return -EAGAIN;
 660         }
 661         if (euid != (uid_t) -1) {
 662                 if (euid != current->euid)
 663                 {
 664                         current->mm->dumpable = 0;
 665                         wmb();
 666                 }
 667                 current->euid = euid;
 668         }
 669         current->fsuid = current->euid;
 670         if (suid != (uid_t) -1)
 671                 current->suid = suid;
 672
 673         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 674                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 675         }
 676
 677         return 0;
 678 }
 679
 680 asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 681 {
 682         int retval;
 683
 684         if (!(retval = put_user(current->uid, ruid)) &&
 685             !(retval = put_user(current->euid, euid)))
 686                 retval = put_user(current->suid, suid);
 687
 688         return retval;
 689 }
 690
 691 /*
 692  * Same as above, but for rgid, egid, sgid.
 693  */
 694 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 695 {
 696         if (!capable(CAP_SETGID)) {
 697                 if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 698                     (rgid != current->egid) && (rgid != current->sgid))
 699                         return -EPERM;
 700                 if ((egid != (gid_t) -1) && (egid != current->gid) &&
 701                     (egid != current->egid) && (egid != current->sgid))
 702                         return -EPERM;
 703                 if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 704                     (sgid != current->egid) && (sgid != current->sgid))
 705                         return -EPERM;
 706         }
 707         if (egid != (gid_t) -1) {
 708                 if (egid != current->egid)
 709                 {
 710                         current->mm->dumpable = 0;
 711                         wmb();
 712                 }
 713                 current->egid = egid;
 714         }
 715         current->fsgid = current->egid;
 716         if (rgid != (gid_t) -1)
 717                 current->gid = rgid;
 718         if (sgid != (gid_t) -1)
 719                 current->sgid = sgid;
 720         return 0;
 721 }
 722
 723 asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
 724 {
 725         int retval;
 726
 727         if (!(retval = put_user(current->gid, rgid)) &&
 728             !(retval = put_user(current->egid, egid)))
 729                 retval = put_user(current->sgid, sgid);
 730
 731         return retval;
 732 }
 733
 734
 735 /*
 736  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 737  * is used for "access()" and for the NFS daemon (letting nfsd stay at
 738  * whatever uid it wants to). It normally shadows "euid", except when
 739  * explicitly set by setfsuid() or for access..
 740  */
 741 asmlinkage long sys_setfsuid(uid_t uid)
 742 {
 743         int old_fsuid;
 744
 745         old_fsuid = current->fsuid;
 746         if (uid == current->uid || uid == current->euid ||
 747             uid == current->suid || uid == current->fsuid ||
 748             capable(CAP_SETUID))
 749         {
 750                 if (uid != old_fsuid)
 751                 {
 752                         current->mm->dumpable = 0;
 753                         wmb();
 754                 }
 755                 current->fsuid = uid;
 756         }
 757
 758         /* We emulate fsuid by essentially doing a scaled-down version
 759          * of what we did in setresuid and friends. However, we only
 760          * operate on the fs-specific bits of the process' effective
 761          * capabilities
 762          *
 763          * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
 764          *          if not, we might be a bit too harsh here.
 765          */
 766
 767         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 768                 if (old_fsuid == 0 && current->fsuid != 0) {
 769                         cap_t(current->cap_effective) &= ~CAP_FS_MASK;
 770                 }
 771                 if (old_fsuid != 0 && current->fsuid == 0) {
 772                         cap_t(current->cap_effective) |=
 773                                 (cap_t(current->cap_permitted) & CAP_FS_MASK);
 774                 }
 775         }
 776
 777         return old_fsuid;
 778 }
 779
 780 /*
 781  * Samma på svenska..
 782  */
 783 asmlinkage long sys_setfsgid(gid_t gid)
 784 {
 785         int old_fsgid;
 786
 787         old_fsgid = current->fsgid;
 788         if (gid == current->gid || gid == current->egid ||
 789             gid == current->sgid || gid == current->fsgid ||
 790             capable(CAP_SETGID))
 791         {
 792                 if (gid != old_fsgid)
 793                 {
 794                         current->mm->dumpable = 0;
 795                         wmb();
 796                 }
 797                 current->fsgid = gid;
 798         }
 799         return old_fsgid;
 800 }
 801
 802 asmlinkage long sys_times(struct tms * tbuf)
 803 {
 804         /*
 805          *      In the SMP world we might just be unlucky and have one of
 806          *      the times increment as we use it. Since the value is an
 807          *      atomically safe type this is just fine. Conceptually its
 808          *      as if the syscall took an instant longer to occur.
 809          */
 810         if (tbuf)
 811                 if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
 812                         return -EFAULT;
 813         return jiffies;
 814 }
 815
 816 /*
 817  * This needs some heavy checking ...
 818  * I just haven't the stomach for it. I also don't fully
 819  * understand sessions/pgrp etc. Let somebody who does explain it.
 820  *
 821  * OK, I think I have the protection semantics right.... this is really
 822  * only important on a multi-user system anyway, to make sure one user
 823  * can't send a signal to a process owned by another.  -TYT, 12/12/91
 824  *
 825  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 826  * LBT 04.03.94
 827  */
 828
 829 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 830 {
 831         struct task_struct * p;
 832         int err = -EINVAL;
 833
 834         if (!pid)
 835                 pid = current->pid;
 836         if (!pgid)
 837                 pgid = pid;
 838         if (pgid < 0)
 839                 return -EINVAL;
 840
 841         /* From this point forward we keep holding onto the tasklist lock
 842          * so that our parent does not change from under us. -DaveM
 843          */
 844         read_lock(&tasklist_lock);
 845
 846         err = -ESRCH;
 847         p = find_task_by_pid(pid);
 848         if (!p)
 849                 goto out;
 850
 851         if (p->p_pptr == current || p->p_opptr == current) {
 852                 err = -EPERM;
 853                 if (p->session != current->session)
 854                         goto out;
 855                 err = -EACCES;
 856                 if (p->did_exec)
 857                         goto out;
 858         } else if (p != current)
 859                 goto out;
 860         err = -EPERM;
 861         if (p->leader)
 862                 goto out;
 863         if (pgid != pid) {
 864                 struct task_struct * tmp;
 865                 for_each_task (tmp) {
 866                         if (tmp->pgrp == pgid &&
 867                             tmp->session == current->session)
 868                                 goto ok_pgid;
 869                 }
 870                 goto out;
 871         }
 872
 873 ok_pgid:
 874         p->pgrp = pgid;
 875         err = 0;
 876 out:
 877         /* All paths lead to here, thus we are safe. -DaveM */
 878         read_unlock(&tasklist_lock);
 879         return err;
 880 }
 881
 882 asmlinkage long sys_getpgid(pid_t pid)
 883 {
 884         if (!pid) {
 885                 return current->pgrp;
 886         } else {
 887                 int retval;
 888                 struct task_struct *p;
 889
 890                 read_lock(&tasklist_lock);
 891                 p = find_task_by_pid(pid);
 892
 893                 retval = -ESRCH;
 894                 if (p)
 895                         retval = p->pgrp;
 896                 read_unlock(&tasklist_lock);
 897                 return retval;
 898         }
 899 }
 900
 901 asmlinkage long sys_getpgrp(void)
 902 {
 903         /* SMP - assuming writes are word atomic this is fine */
 904         return current->pgrp;
 905 }
 906
 907 asmlinkage long sys_getsid(pid_t pid)
 908 {
 909         if (!pid) {
 910                 return current->session;
 911         } else {
 912                 int retval;
 913                 struct task_struct *p;
 914
 915                 read_lock(&tasklist_lock);
 916                 p = find_task_by_pid(pid);
 917
 918                 retval = -ESRCH;
 919                 if(p)
 920                         retval = p->session;
 921                 read_unlock(&tasklist_lock);
 922                 return retval;
 923         }
 924 }
 925
 926 asmlinkage long sys_setsid(void)
 927 {
 928         struct task_struct * p;
 929         int err = -EPERM;
 930
 931         read_lock(&tasklist_lock);
 932         for_each_task(p) {
 933                 if (p->pgrp == current->pid)
 934                         goto out;
 935         }
 936
 937         current->leader = 1;
 938         current->session = current->pgrp = current->pid;
 939         current->tty = NULL;
 940         current->tty_old_pgrp = 0;
 941         err = current->pgrp;
 942 out:
 943         read_unlock(&tasklist_lock);
 944         return err;
 945 }
 946
 947 /*
 948  * Supplementary group IDs
 949  */
 950 asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
 951 {
 952         int i;
 953
 954         /*
 955          *      SMP: Nobody else can change our grouplist. Thus we are
 956          *      safe.
 957          */
 958
 959         if (gidsetsize < 0)
 960                 return -EINVAL;
 961         i = current->ngroups;
 962         if (gidsetsize) {
 963                 if (i > gidsetsize)
 964                         return -EINVAL;
 965                 if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
 966                         return -EFAULT;
 967         }
 968         return i;
 969 }
 970
 971 /*
 972  *      SMP: Our groups are not shared. We can copy to/from them safely
 973  *      without another task interfering.
 974  */
 975
 976 asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
 977 {
 978         if (!capable(CAP_SETGID))
 979                 return -EPERM;
 980         if ((unsigned) gidsetsize > NGROUPS)
 981                 return -EINVAL;
 982         if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
 983                 return -EFAULT;
 984         current->ngroups = gidsetsize;
 985         return 0;
 986 }
 987
 988 static int supplemental_group_member(gid_t grp)
 989 {
 990         int i = current->ngroups;
 991
 992         if (i) {
 993                 gid_t *groups = current->groups;
 994                 do {
 995                         if (*groups == grp)
 996                                 return 1;
 997                         groups++;
 998                         i--;
 999                 } while (i);
1000         }
1001         return 0;
1002 }
1003
1004 /*
1005  * Check whether we're fsgid/egid or in the supplemental group..
1006  */
1007 int in_group_p(gid_t grp)
1008 {
1009         int retval = 1;
1010         if (grp != current->fsgid)
1011                 retval = supplemental_group_member(grp);
1012         return retval;
1013 }
1014
1015 int in_egroup_p(gid_t grp)
1016 {
1017         int retval = 1;
1018         if (grp != current->egid)
1019                 retval = supplemental_group_member(grp);
1020         return retval;
1021 }
1022
1023 DECLARE_RWSEM(uts_sem);
1024
1025 asmlinkage long sys_newuname(struct new_utsname * name)
1026 {
1027         int errno = 0;
1028
1029         down_read(&uts_sem);
1030         if (copy_to_user(name,&system_utsname,sizeof *name))
1031                 errno = -EFAULT;
1032         up_read(&uts_sem);
1033         return errno;
1034 }
1035
1036 asmlinkage long sys_sethostname(char *name, int len)
1037 {
1038         int errno;
1039         char tmp[__NEW_UTS_LEN];
1040
1041         if (!capable(CAP_SYS_ADMIN))
1042                 return -EPERM;
1043         if (len < 0 || len > __NEW_UTS_LEN)
1044                 return -EINVAL;
1045         down_write(&uts_sem);
1046         errno = -EFAULT;
1047         if (!copy_from_user(tmp, name, len)) {
1048                 memcpy(system_utsname.nodename, tmp, len);
1049                 system_utsname.nodename[len] = 0;
1050                 errno = 0;
1051         }
1052         up_write(&uts_sem);
1053         return errno;
1054 }
1055
1056 asmlinkage long sys_gethostname(char *name, int len)
1057 {
1058         int i, errno;
1059
1060         if (len < 0)
1061                 return -EINVAL;
1062         down_read(&uts_sem);
1063         i = 1 + strlen(system_utsname.nodename);
1064         if (i > len)
1065                 i = len;
1066         errno = 0;
1067         if (copy_to_user(name, system_utsname.nodename, i))
1068                 errno = -EFAULT;
1069         up_read(&uts_sem);
1070         return errno;
1071 }
1072
1073 /*
1074  * Only setdomainname; getdomainname can be implemented by calling
1075  * uname()
1076  */
1077 asmlinkage long sys_setdomainname(char *name, int len)
1078 {
1079         int errno;
1080         char tmp[__NEW_UTS_LEN];
1081
1082         if (!capable(CAP_SYS_ADMIN))
1083                 return -EPERM;
1084         if (len < 0 || len > __NEW_UTS_LEN)
1085                 return -EINVAL;
1086
1087         down_write(&uts_sem);
1088         errno = -EFAULT;
1089         if (!copy_from_user(tmp, name, len)) {
1090                 memcpy(system_utsname.domainname, tmp, len);
1091                 system_utsname.domainname[len] = 0;
1092                 errno = 0;
1093         }
1094         up_write(&uts_sem);
1095         return errno;
1096 }
1097
1098 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
1099 {
1100         if (resource >= RLIM_NLIMITS)
1101                 return -EINVAL;
1102         else
1103                 return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1104                         ? -EFAULT : 0;
1105 }
1106
1107 #if !defined(__ia64__)
1108
1109 /*
1110  *      Back compatibility for getrlimit. Needed for some apps.
1111  */
1112
1113 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
1114 {
1115         struct rlimit x;
1116         if (resource >= RLIM_NLIMITS)
1117                 return -EINVAL;
1118
1119         memcpy(&x, current->rlim + resource, sizeof(*rlim));
1120         if(x.rlim_cur > 0x7FFFFFFF)
1121                 x.rlim_cur = 0x7FFFFFFF;
1122         if(x.rlim_max > 0x7FFFFFFF)
1123                 x.rlim_max = 0x7FFFFFFF;
1124         return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1125 }
1126
1127 #endif
1128
1129 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
1130 {
1131         struct rlimit new_rlim, *old_rlim;
1132
1133         if (resource >= RLIM_NLIMITS)
1134                 return -EINVAL;
1135         if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1136                 return -EFAULT;
1137        if (new_rlim.rlim_cur > new_rlim.rlim_max)
1138                return -EINVAL;
1139         old_rlim = current->rlim + resource;
1140         if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1141              (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1142             !capable(CAP_SYS_RESOURCE))
1143                 return -EPERM;
1144         if (resource == RLIMIT_NOFILE) {
1145                 if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1146                         return -EPERM;
1147         }
1148         *old_rlim = new_rlim;
1149         return 0;
1150 }
1151
1152 /*
1153  * It would make sense to put struct rusage in the task_struct,
1154  * except that would make the task_struct be *really big*.  After
1155  * task_struct gets moved into malloc'ed memory, it would
1156  * make sense to do this.  It will make moving the rest of the information
1157  * a lot simpler!  (Which we're not doing right now because we're not
1158  * measuring them yet).
1159  *
1160  * This is SMP safe.  Either we are called from sys_getrusage on ourselves
1161  * below (we know we aren't going to exit/disappear and only we change our
1162  * rusage counters), or we are called from wait4() on a process which is
1163  * either stopped or zombied.  In the zombied case the task won't get
1164  * reaped till shortly after the call to getrusage(), in both cases the
1165  * task being examined is in a frozen state so the counters won't change.
1166  *
1167  * FIXME! Get the fault counts properly!
1168  */
1169 int getrusage(struct task_struct *p, int who, struct rusage *ru)
1170 {
1171         struct rusage r;
1172
1173         memset((char *) &r, 0, sizeof(r));
1174         switch (who) {
1175                 case RUSAGE_SELF:
1176                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
1177                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
1178                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
1179                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
1180                         r.ru_minflt = p->min_flt;
1181                         r.ru_majflt = p->maj_flt;
1182                         r.ru_nswap = p->nswap;
1183                         break;
1184                 case RUSAGE_CHILDREN:
1185                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
1186                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
1187                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
1188                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
1189                         r.ru_minflt = p->cmin_flt;
1190                         r.ru_majflt = p->cmaj_flt;
1191                         r.ru_nswap = p->cnswap;
1192                         break;
1193                 default:
1194                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
1195                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
1196                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
1197                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
1198                         r.ru_minflt = p->min_flt + p->cmin_flt;
1199                         r.ru_majflt = p->maj_flt + p->cmaj_flt;
1200                         r.ru_nswap = p->nswap + p->cnswap;
1201                         break;
1202         }
1203         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1204 }
1205
1206 asmlinkage long sys_getrusage(int who, struct rusage *ru)
1207 {
1208         if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1209                 return -EINVAL;
1210         return getrusage(current, who, ru);
1211 }
1212
1213 asmlinkage long sys_umask(int mask)
1214 {
1215         mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1216         return mask;
1217 }
1218
1219 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1220                           unsigned long arg4, unsigned long arg5)
1221 {
1222         int error = 0;
1223         int sig;
1224
1225         switch (option) {
1226                 case PR_SET_PDEATHSIG:
1227                         sig = arg2;
1228                         if (sig < 0 || sig > _NSIG) {
1229                                 error = -EINVAL;
1230                                 break;
1231                         }
1232                         current->pdeath_signal = sig;
1233                         break;
1234                 case PR_GET_PDEATHSIG:
1235                         error = put_user(current->pdeath_signal, (int *)arg2);
1236                         break;
1237                 case PR_GET_DUMPABLE:
1238                         if (is_dumpable(current))
1239                                 error = 1;
1240                         break;
1241                 case PR_SET_DUMPABLE:
1242                         if (arg2 != 0 && arg2 != 1) {
1243                                 error = -EINVAL;
1244                                 break;
1245                         }
1246                         current->mm->dumpable = arg2;
1247                         break;
1248
1249                 case PR_SET_UNALIGN:
1250                         error = SET_UNALIGN_CTL(current, arg2);
1251                         break;
1252                 case PR_GET_UNALIGN:
1253                         error = GET_UNALIGN_CTL(current, arg2);
1254                         break;
1255                 case PR_SET_FPEMU:
1256                         error = SET_FPEMU_CTL(current, arg2);
1257                         break;
1258                 case PR_GET_FPEMU:
1259                         error = GET_FPEMU_CTL(current, arg2);
1260                         break;
1261                 case PR_SET_FPEXC:
1262                         error = SET_FPEXC_CTL(current, arg2);
1263                         break;
1264                 case PR_GET_FPEXC:
1265                         error = GET_FPEXC_CTL(current, arg2);
1266                         break;
1267
1268                 case PR_GET_KEEPCAPS:
1269                         if (current->keep_capabilities)
1270                                 error = 1;
1271                         break;
1272                 case PR_SET_KEEPCAPS:
1273                         if (arg2 != 0 && arg2 != 1) {
1274                                 error = -EINVAL;
1275                                 break;
1276                         }
1277                         current->keep_capabilities = arg2;
1278                         break;
1279                 default:
1280                         error = -EINVAL;
1281                         break;
1282         }
1283         return error;
1284 }
1285
1286 EXPORT_SYMBOL(notifier_chain_register);
1287 EXPORT_SYMBOL(notifier_chain_unregister);
1288 EXPORT_SYMBOL(notifier_call_chain);
1289 EXPORT_SYMBOL(register_reboot_notifier);
1290 EXPORT_SYMBOL(unregister_reboot_notifier);
1291 EXPORT_SYMBOL(in_group_p);
1292 EXPORT_SYMBOL(in_egroup_p);