kernel/timer.c

   1 /*
   2  *  linux/kernel/timer.c
   3  *
   4  *  Kernel internal timers, kernel timekeeping, basic process system calls
   5  *
   6  *  Copyright (C) 1991, 1992  Linus Torvalds
   7  *
   8  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9  *
  10  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13  *              serialize accesses to xtime/lost_ticks).
  14  *                              Copyright (C) 1998  Andrea Arcangeli
  15  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16  */
  17
  18 #include <linux/config.h>
  19 #include <linux/mm.h>
  20 #include <linux/timex.h>
  21 #include <linux/delay.h>
  22 #include <linux/smp_lock.h>
  23 #include <linux/interrupt.h>
  24 #include <linux/kernel_stat.h>
  25
  26 #include <asm/uaccess.h>
  27
  28 /*
  29  * Timekeeping variables
  30  */
  31
  32 long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */
  33
  34 /* The current time */
  35 struct timeval xtime __attribute__ ((aligned (16)));
  36
  37 /* Don't completely fail for HZ > 500.  */
  38 int tickadj = 500/HZ ? : 1;             /* microsecs */
  39
  40 DECLARE_TASK_QUEUE(tq_timer);
  41 DECLARE_TASK_QUEUE(tq_immediate);
  42
  43 /*
  44  * phase-lock loop variables
  45  */
  46 /* TIME_ERROR prevents overwriting the CMOS clock */
  47 int time_state = TIME_OK;               /* clock synchronization status */
  48 int time_status = STA_UNSYNC;           /* clock status bits            */
  49 long time_offset;                       /* time adjustment (us)         */
  50 long time_constant = 2;                 /* pll time constant            */
  51 long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
  52 long time_precision = 1;                /* clock precision (us)         */
  53 long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
  54 long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
  55 long time_phase;                        /* phase offset (scaled us)     */
  56 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
  57                                         /* frequency offset (scaled ppm)*/
  58 long time_adj;                          /* tick adjust (scaled 1 / HZ)  */
  59 long time_reftime;                      /* time at last adjustment (s)  */
  60
  61 long time_adjust;
  62 long time_adjust_step;
  63
  64 unsigned long event;
  65
  66 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
  67
  68 unsigned long volatile jiffies;
  69
  70 unsigned int * prof_buffer;
  71 unsigned long prof_len;
  72 unsigned long prof_shift;
  73
  74 /*
  75  * Event timer code
  76  */
  77 #define TVN_BITS 6
  78 #define TVR_BITS 8
  79 #define TVN_SIZE (1 << TVN_BITS)
  80 #define TVR_SIZE (1 << TVR_BITS)
  81 #define TVN_MASK (TVN_SIZE - 1)
  82 #define TVR_MASK (TVR_SIZE - 1)
  83
  84 struct timer_vec {
  85         int index;
  86         struct list_head vec[TVN_SIZE];
  87 };
  88
  89 struct timer_vec_root {
  90         int index;
  91         struct list_head vec[TVR_SIZE];
  92 };
  93
  94 static struct timer_vec tv5;
  95 static struct timer_vec tv4;
  96 static struct timer_vec tv3;
  97 static struct timer_vec tv2;
  98 static struct timer_vec_root tv1;
  99
 100 static struct timer_vec * const tvecs[] = {
 101         (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 102 };
 103
 104 static struct list_head * run_timer_list_running;
 105
 106 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 107
 108 void init_timervecs (void)
 109 {
 110         int i;
 111
 112         for (i = 0; i < TVN_SIZE; i++) {
 113                 INIT_LIST_HEAD(tv5.vec + i);
 114                 INIT_LIST_HEAD(tv4.vec + i);
 115                 INIT_LIST_HEAD(tv3.vec + i);
 116                 INIT_LIST_HEAD(tv2.vec + i);
 117         }
 118         for (i = 0; i < TVR_SIZE; i++)
 119                 INIT_LIST_HEAD(tv1.vec + i);
 120 }
 121
 122 static unsigned long timer_jiffies;
 123
 124 static inline void internal_add_timer(struct timer_list *timer)
 125 {
 126         /*
 127          * must be cli-ed when calling this
 128          */
 129         unsigned long expires = timer->expires;
 130         unsigned long idx = expires - timer_jiffies;
 131         struct list_head * vec;
 132
 133         if (run_timer_list_running)
 134                 vec = run_timer_list_running;
 135         else if (idx < TVR_SIZE) {
 136                 int i = expires & TVR_MASK;
 137                 vec = tv1.vec + i;
 138         } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 139                 int i = (expires >> TVR_BITS) & TVN_MASK;
 140                 vec = tv2.vec + i;
 141         } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 142                 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 143                 vec =  tv3.vec + i;
 144         } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 145                 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 146                 vec = tv4.vec + i;
 147         } else if ((signed long) idx < 0) {
 148                 /* can happen if you add a timer with expires == jiffies,
 149                  * or you set a timer to go off in the past
 150                  */
 151                 vec = tv1.vec + tv1.index;
 152         } else if (idx <= 0xffffffffUL) {
 153                 int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 154                 vec = tv5.vec + i;
 155         } else {
 156                 /* Can only get here on architectures with 64-bit jiffies */
 157                 INIT_LIST_HEAD(&timer->list);
 158                 return;
 159         }
 160         /*
 161          * Timers are FIFO!
 162          */
 163         list_add(&timer->list, vec->prev);
 164 }
 165
 166 /* Initialize both explicitly - let's try to have them in the same cache line */
 167 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
 168
 169 #ifdef CONFIG_SMP
 170 volatile struct timer_list * volatile running_timer;
 171 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
 172 #define timer_exit() do { running_timer = NULL; } while (0)
 173 #define timer_is_running(t) (running_timer == t)
 174 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
 175 #else
 176 #define timer_enter(t)          do { } while (0)
 177 #define timer_exit()            do { } while (0)
 178 #endif
 179
 180 void add_timer(struct timer_list *timer)
 181 {
 182         unsigned long flags;
 183
 184         spin_lock_irqsave(&timerlist_lock, flags);
 185         if (timer_pending(timer))
 186                 goto bug;
 187         internal_add_timer(timer);
 188         spin_unlock_irqrestore(&timerlist_lock, flags);
 189         return;
 190 bug:
 191         spin_unlock_irqrestore(&timerlist_lock, flags);
 192         printk("bug: kernel timer added twice at %p.\n",
 193                         __builtin_return_address(0));
 194 }
 195
 196 static inline int detach_timer (struct timer_list *timer)
 197 {
 198         if (!timer_pending(timer))
 199                 return 0;
 200         list_del(&timer->list);
 201         return 1;
 202 }
 203
 204 int mod_timer(struct timer_list *timer, unsigned long expires)
 205 {
 206         int ret;
 207         unsigned long flags;
 208
 209         spin_lock_irqsave(&timerlist_lock, flags);
 210         timer->expires = expires;
 211         ret = detach_timer(timer);
 212         internal_add_timer(timer);
 213         spin_unlock_irqrestore(&timerlist_lock, flags);
 214         return ret;
 215 }
 216
 217 int del_timer(struct timer_list * timer)
 218 {
 219         int ret;
 220         unsigned long flags;
 221
 222         spin_lock_irqsave(&timerlist_lock, flags);
 223         ret = detach_timer(timer);
 224         timer->list.next = timer->list.prev = NULL;
 225         spin_unlock_irqrestore(&timerlist_lock, flags);
 226         return ret;
 227 }
 228
 229 #ifdef CONFIG_SMP
 230 void sync_timers(void)
 231 {
 232         spin_unlock_wait(&global_bh_lock);
 233 }
 234
 235 /*
 236  * SMP specific function to delete periodic timer.
 237  * Caller must disable by some means restarting the timer
 238  * for new. Upon exit the timer is not queued and handler is not running
 239  * on any CPU. It returns number of times, which timer was deleted
 240  * (for reference counting).
 241  */
 242
 243 int del_timer_sync(struct timer_list * timer)
 244 {
 245         int ret = 0;
 246
 247         for (;;) {
 248                 unsigned long flags;
 249                 int running;
 250
 251                 spin_lock_irqsave(&timerlist_lock, flags);
 252                 ret += detach_timer(timer);
 253                 timer->list.next = timer->list.prev = 0;
 254                 running = timer_is_running(timer);
 255                 spin_unlock_irqrestore(&timerlist_lock, flags);
 256
 257                 if (!running)
 258                         break;
 259
 260                 timer_synchronize(timer);
 261         }
 262
 263         return ret;
 264 }
 265 #endif
 266
 267
 268 static inline void cascade_timers(struct timer_vec *tv)
 269 {
 270         /* cascade all the timers from tv up one level */
 271         struct list_head *head, *curr, *next;
 272
 273         head = tv->vec + tv->index;
 274         curr = head->next;
 275         /*
 276          * We are removing _all_ timers from the list, so we don't  have to
 277          * detach them individually, just clear the list afterwards.
 278          */
 279         while (curr != head) {
 280                 struct timer_list *tmp;
 281
 282                 tmp = list_entry(curr, struct timer_list, list);
 283                 next = curr->next;
 284                 list_del(curr); // not needed
 285                 internal_add_timer(tmp);
 286                 curr = next;
 287         }
 288         INIT_LIST_HEAD(head);
 289         tv->index = (tv->index + 1) & TVN_MASK;
 290 }
 291
 292 static inline void run_timer_list(void)
 293 {
 294         spin_lock_irq(&timerlist_lock);
 295         while ((long)(jiffies - timer_jiffies) >= 0) {
 296                 LIST_HEAD(queued);
 297                 struct list_head *head, *curr;
 298                 if (!tv1.index) {
 299                         int n = 1;
 300                         do {
 301                                 cascade_timers(tvecs[n]);
 302                         } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 303                 }
 304                 run_timer_list_running = &queued;
 305 repeat:
 306                 head = tv1.vec + tv1.index;
 307                 curr = head->next;
 308                 if (curr != head) {
 309                         struct timer_list *timer;
 310                         void (*fn)(unsigned long);
 311                         unsigned long data;
 312
 313                         timer = list_entry(curr, struct timer_list, list);
 314                         fn = timer->function;
 315                         data= timer->data;
 316
 317                         detach_timer(timer);
 318                         timer->list.next = timer->list.prev = NULL;
 319                         timer_enter(timer);
 320                         spin_unlock_irq(&timerlist_lock);
 321                         fn(data);
 322                         spin_lock_irq(&timerlist_lock);
 323                         timer_exit();
 324                         goto repeat;
 325                 }
 326                 run_timer_list_running = NULL;
 327                 ++timer_jiffies;
 328                 tv1.index = (tv1.index + 1) & TVR_MASK;
 329
 330                 curr = queued.next;
 331                 while (curr != &queued) {
 332                         struct timer_list *timer;
 333
 334                         timer = list_entry(curr, struct timer_list, list);
 335                         curr = curr->next;
 336                         internal_add_timer(timer);
 337                 }
 338         }
 339         spin_unlock_irq(&timerlist_lock);
 340 }
 341
 342 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
 343
 344 void tqueue_bh(void)
 345 {
 346         run_task_queue(&tq_timer);
 347 }
 348
 349 void immediate_bh(void)
 350 {
 351         run_task_queue(&tq_immediate);
 352 }
 353
 354 /*
 355  * this routine handles the overflow of the microsecond field
 356  *
 357  * The tricky bits of code to handle the accurate clock support
 358  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 359  * They were originally developed for SUN and DEC kernels.
 360  * All the kudos should go to Dave for this stuff.
 361  *
 362  */
 363 static void second_overflow(void)
 364 {
 365     long ltemp;
 366
 367     /* Bump the maxerror field */
 368     time_maxerror += time_tolerance >> SHIFT_USEC;
 369     if ( time_maxerror > NTP_PHASE_LIMIT ) {
 370         time_maxerror = NTP_PHASE_LIMIT;
 371         time_status |= STA_UNSYNC;
 372     }
 373
 374     /*
 375      * Leap second processing. If in leap-insert state at
 376      * the end of the day, the system clock is set back one
 377      * second; if in leap-delete state, the system clock is
 378      * set ahead one second. The microtime() routine or
 379      * external clock driver will insure that reported time
 380      * is always monotonic. The ugly divides should be
 381      * replaced.
 382      */
 383     switch (time_state) {
 384
 385     case TIME_OK:
 386         if (time_status & STA_INS)
 387             time_state = TIME_INS;
 388         else if (time_status & STA_DEL)
 389             time_state = TIME_DEL;
 390         break;
 391
 392     case TIME_INS:
 393         if (xtime.tv_sec % 86400 == 0) {
 394             xtime.tv_sec--;
 395             time_state = TIME_OOP;
 396             printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
 397         }
 398         break;
 399
 400     case TIME_DEL:
 401         if ((xtime.tv_sec + 1) % 86400 == 0) {
 402             xtime.tv_sec++;
 403             time_state = TIME_WAIT;
 404             printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
 405         }
 406         break;
 407
 408     case TIME_OOP:
 409         time_state = TIME_WAIT;
 410         break;
 411
 412     case TIME_WAIT:
 413         if (!(time_status & (STA_INS | STA_DEL)))
 414             time_state = TIME_OK;
 415     }
 416
 417     /*
 418      * Compute the phase adjustment for the next second. In
 419      * PLL mode, the offset is reduced by a fixed factor
 420      * times the time constant. In FLL mode the offset is
 421      * used directly. In either mode, the maximum phase
 422      * adjustment for each second is clamped so as to spread
 423      * the adjustment over not more than the number of
 424      * seconds between updates.
 425      */
 426     if (time_offset < 0) {
 427         ltemp = -time_offset;
 428         if (!(time_status & STA_FLL))
 429             ltemp >>= SHIFT_KG + time_constant;
 430         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 431             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 432         time_offset += ltemp;
 433         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 434     } else {
 435         ltemp = time_offset;
 436         if (!(time_status & STA_FLL))
 437             ltemp >>= SHIFT_KG + time_constant;
 438         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 439             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 440         time_offset -= ltemp;
 441         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 442     }
 443
 444     /*
 445      * Compute the frequency estimate and additional phase
 446      * adjustment due to frequency error for the next
 447      * second. When the PPS signal is engaged, gnaw on the
 448      * watchdog counter and update the frequency computed by
 449      * the pll and the PPS signal.
 450      */
 451     pps_valid++;
 452     if (pps_valid == PPS_VALID) {       /* PPS signal lost */
 453         pps_jitter = MAXTIME;
 454         pps_stabil = MAXFREQ;
 455         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 456                          STA_PPSWANDER | STA_PPSERROR);
 457     }
 458     ltemp = time_freq + pps_freq;
 459     if (ltemp < 0)
 460         time_adj -= -ltemp >>
 461             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 462     else
 463         time_adj += ltemp >>
 464             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 465
 466 #if HZ == 100
 467     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
 468      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
 469      */
 470     if (time_adj < 0)
 471         time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
 472     else
 473         time_adj += (time_adj >> 2) + (time_adj >> 5);
 474 #endif
 475 }
 476
 477 /* in the NTP reference this is called "hardclock()" */
 478 static void update_wall_time_one_tick(void)
 479 {
 480         if ( (time_adjust_step = time_adjust) != 0 ) {
 481             /* We are doing an adjtime thing.
 482              *
 483              * Prepare time_adjust_step to be within bounds.
 484              * Note that a positive time_adjust means we want the clock
 485              * to run faster.
 486              *
 487              * Limit the amount of the step to be in the range
 488              * -tickadj .. +tickadj
 489              */
 490              if (time_adjust > tickadj)
 491                 time_adjust_step = tickadj;
 492              else if (time_adjust < -tickadj)
 493                 time_adjust_step = -tickadj;
 494
 495             /* Reduce by this step the amount of time left  */
 496             time_adjust -= time_adjust_step;
 497         }
 498         xtime.tv_usec += tick + time_adjust_step;
 499         /*
 500          * Advance the phase, once it gets to one microsecond, then
 501          * advance the tick more.
 502          */
 503         time_phase += time_adj;
 504         if (time_phase <= -FINEUSEC) {
 505                 long ltemp = -time_phase >> SHIFT_SCALE;
 506                 time_phase += ltemp << SHIFT_SCALE;
 507                 xtime.tv_usec -= ltemp;
 508         }
 509         else if (time_phase >= FINEUSEC) {
 510                 long ltemp = time_phase >> SHIFT_SCALE;
 511                 time_phase -= ltemp << SHIFT_SCALE;
 512                 xtime.tv_usec += ltemp;
 513         }
 514 }
 515
 516 /*
 517  * Using a loop looks inefficient, but "ticks" is
 518  * usually just one (we shouldn't be losing ticks,
 519  * we're doing this this way mainly for interrupt
 520  * latency reasons, not because we think we'll
 521  * have lots of lost timer ticks
 522  */
 523 static void update_wall_time(unsigned long ticks)
 524 {
 525         do {
 526                 ticks--;
 527                 update_wall_time_one_tick();
 528         } while (ticks);
 529
 530         if (xtime.tv_usec >= 1000000) {
 531             xtime.tv_usec -= 1000000;
 532             xtime.tv_sec++;
 533             second_overflow();
 534         }
 535 }
 536
 537 static inline void do_process_times(struct task_struct *p,
 538         unsigned long user, unsigned long system)
 539 {
 540         unsigned long psecs;
 541
 542         psecs = (p->times.tms_utime += user);
 543         psecs += (p->times.tms_stime += system);
 544         if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
 545                 /* Send SIGXCPU every second.. */
 546                 if (!(psecs % HZ))
 547                         send_sig(SIGXCPU, p, 1);
 548                 /* and SIGKILL when we go over max.. */
 549                 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
 550                         send_sig(SIGKILL, p, 1);
 551         }
 552 }
 553
 554 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
 555 {
 556         unsigned long it_virt = p->it_virt_value;
 557
 558         if (it_virt) {
 559                 it_virt -= ticks;
 560                 if (!it_virt) {
 561                         it_virt = p->it_virt_incr;
 562                         send_sig(SIGVTALRM, p, 1);
 563                 }
 564                 p->it_virt_value = it_virt;
 565         }
 566 }
 567
 568 static inline void do_it_prof(struct task_struct *p)
 569 {
 570         unsigned long it_prof = p->it_prof_value;
 571
 572         if (it_prof) {
 573                 if (--it_prof == 0) {
 574                         it_prof = p->it_prof_incr;
 575                         send_sig(SIGPROF, p, 1);
 576                 }
 577                 p->it_prof_value = it_prof;
 578         }
 579 }
 580
 581 void update_one_process(struct task_struct *p, unsigned long user,
 582                         unsigned long system, int cpu)
 583 {
 584         p->per_cpu_utime[cpu] += user;
 585         p->per_cpu_stime[cpu] += system;
 586         do_process_times(p, user, system);
 587         do_it_virt(p, user);
 588         do_it_prof(p);
 589 }
 590
 591 /*
 592  * Called from the timer interrupt handler to charge one tick to the current
 593  * process.  user_tick is 1 if the tick is user time, 0 for system.
 594  */
 595 void update_process_times(int user_tick)
 596 {
 597         struct task_struct *p = current;
 598         int cpu = smp_processor_id(), system = user_tick ^ 1;
 599
 600         update_one_process(p, user_tick, system, cpu);
 601         if (p->pid) {
 602                 if (--p->counter <= 0) {
 603                         p->counter = 0;
 604                         /*
 605                          * SCHED_FIFO is priority preemption, so this is
 606                          * not the place to decide whether to reschedule a
 607                          * SCHED_FIFO task or not - Bhavesh Davda
 608                          */
 609                         if (p->policy != SCHED_FIFO) {
 610                                 p->need_resched = 1;
 611                         }
 612                 }
 613                 if (p->nice > 0)
 614                         kstat.per_cpu_nice[cpu] += user_tick;
 615                 else
 616                         kstat.per_cpu_user[cpu] += user_tick;
 617                 kstat.per_cpu_system[cpu] += system;
 618         } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
 619                 kstat.per_cpu_system[cpu] += system;
 620 }
 621
 622 /*
 623  * Nr of active tasks - counted in fixed-point numbers
 624  */
 625 static unsigned long count_active_tasks(void)
 626 {
 627         struct task_struct *p;
 628         unsigned long nr = 0;
 629
 630         read_lock(&tasklist_lock);
 631         for_each_task(p) {
 632                 if ((p->state == TASK_RUNNING ||
 633                      (p->state & TASK_UNINTERRUPTIBLE)))
 634                         nr += FIXED_1;
 635         }
 636         read_unlock(&tasklist_lock);
 637         return nr;
 638 }
 639
 640 /*
 641  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 642  * imply that avenrun[] is the standard name for this kind of thing.
 643  * Nothing else seems to be standardized: the fractional size etc
 644  * all seem to differ on different machines.
 645  */
 646 unsigned long avenrun[3];
 647
 648 static inline void calc_load(unsigned long ticks)
 649 {
 650         unsigned long active_tasks; /* fixed-point */
 651         static int count = LOAD_FREQ;
 652
 653         count -= ticks;
 654         if (count < 0) {
 655                 count += LOAD_FREQ;
 656                 active_tasks = count_active_tasks();
 657                 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 658                 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 659                 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 660         }
 661 }
 662
 663 /* jiffies at the most recent update of wall time */
 664 unsigned long wall_jiffies;
 665
 666 /*
 667  * This spinlock protect us from races in SMP while playing with xtime. -arca
 668  */
 669 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
 670
 671 static inline void update_times(void)
 672 {
 673         unsigned long ticks;
 674
 675         /*
 676          * update_times() is run from the raw timer_bh handler so we
 677          * just know that the irqs are locally enabled and so we don't
 678          * need to save/restore the flags of the local CPU here. -arca
 679          */
 680         write_lock_irq(&xtime_lock);
 681         vxtime_lock();
 682
 683         ticks = jiffies - wall_jiffies;
 684         if (ticks) {
 685                 wall_jiffies += ticks;
 686                 update_wall_time(ticks);
 687         }
 688         vxtime_unlock();
 689         write_unlock_irq(&xtime_lock);
 690         calc_load(ticks);
 691 }
 692
 693 void timer_bh(void)
 694 {
 695         update_times();
 696         run_timer_list();
 697 }
 698
 699 void do_timer(struct pt_regs *regs)
 700 {
 701         (*(unsigned long *)&jiffies)++;
 702 #ifndef CONFIG_SMP
 703         /* SMP process accounting uses the local APIC timer */
 704
 705         update_process_times(user_mode(regs));
 706 #endif
 707         mark_bh(TIMER_BH);
 708         if (TQ_ACTIVE(tq_timer))
 709                 mark_bh(TQUEUE_BH);
 710 }
 711
 712 #if !defined(__alpha__) && !defined(__ia64__)
 713
 714 /*
 715  * For backwards compatibility?  This can be done in libc so Alpha
 716  * and all newer ports shouldn't need it.
 717  */
 718 asmlinkage unsigned long sys_alarm(unsigned int seconds)
 719 {
 720         struct itimerval it_new, it_old;
 721         unsigned int oldalarm;
 722
 723         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 724         it_new.it_value.tv_sec = seconds;
 725         it_new.it_value.tv_usec = 0;
 726         do_setitimer(ITIMER_REAL, &it_new, &it_old);
 727         oldalarm = it_old.it_value.tv_sec;
 728         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 729         /* And we'd better return too much than too little anyway */
 730         if (it_old.it_value.tv_usec)
 731                 oldalarm++;
 732         return oldalarm;
 733 }
 734
 735 #endif
 736
 737 #ifndef __alpha__
 738
 739 /*
 740  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 741  * should be moved into arch/i386 instead?
 742  */
 743
 744 /**
 745  * sys_getpid - return the thread group id of the current process
 746  *
 747  * Note, despite the name, this returns the tgid not the pid.  The tgid and
 748  * the pid are identical unless CLONE_THREAD was specified on clone() in
 749  * which case the tgid is the same in all threads of the same group.
 750  *
 751  * This is SMP safe as current->tgid does not change.
 752  */
 753 asmlinkage long sys_getpid(void)
 754 {
 755         return current->tgid;
 756 }
 757
 758 /*
 759  * This is not strictly SMP safe: p_opptr could change
 760  * from under us. However, rather than getting any lock
 761  * we can use an optimistic algorithm: get the parent
 762  * pid, and go back and check that the parent is still
 763  * the same. If it has changed (which is extremely unlikely
 764  * indeed), we just try again..
 765  *
 766  * NOTE! This depends on the fact that even if we _do_
 767  * get an old value of "parent", we can happily dereference
 768  * the pointer: we just can't necessarily trust the result
 769  * until we know that the parent pointer is valid.
 770  *
 771  * The "mb()" macro is a memory barrier - a synchronizing
 772  * event. It also makes sure that gcc doesn't optimize
 773  * away the necessary memory references.. The barrier doesn't
 774  * have to have all that strong semantics: on x86 we don't
 775  * really require a synchronizing instruction, for example.
 776  * The barrier is more important for code generation than
 777  * for any real memory ordering semantics (even if there is
 778  * a small window for a race, using the old pointer is
 779  * harmless for a while).
 780  */
 781 asmlinkage long sys_getppid(void)
 782 {
 783         int pid;
 784         struct task_struct * me = current;
 785         struct task_struct * parent;
 786
 787         parent = me->p_opptr;
 788         for (;;) {
 789                 pid = parent->pid;
 790 #if CONFIG_SMP
 791 {
 792                 struct task_struct *old = parent;
 793                 mb();
 794                 parent = me->p_opptr;
 795                 if (old != parent)
 796                         continue;
 797 }
 798 #endif
 799                 break;
 800         }
 801         return pid;
 802 }
 803
 804 asmlinkage long sys_getuid(void)
 805 {
 806         /* Only we change this so SMP safe */
 807         return current->uid;
 808 }
 809
 810 asmlinkage long sys_geteuid(void)
 811 {
 812         /* Only we change this so SMP safe */
 813         return current->euid;
 814 }
 815
 816 asmlinkage long sys_getgid(void)
 817 {
 818         /* Only we change this so SMP safe */
 819         return current->gid;
 820 }
 821
 822 asmlinkage long sys_getegid(void)
 823 {
 824         /* Only we change this so SMP safe */
 825         return  current->egid;
 826 }
 827
 828 #endif
 829
 830 /* Thread ID - the internal kernel "pid" */
 831 asmlinkage long sys_gettid(void)
 832 {
 833         return current->pid;
 834 }
 835
 836 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 837 {
 838         struct timespec t;
 839         unsigned long expire;
 840
 841         if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 842                 return -EFAULT;
 843
 844         if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 845                 return -EINVAL;
 846
 847
 848         if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
 849             current->policy != SCHED_OTHER)
 850         {
 851                 /*
 852                  * Short delay requests up to 2 ms will be handled with
 853                  * high precision by a busy wait for all real-time processes.
 854                  *
 855                  * Its important on SMP not to do this holding locks.
 856                  */
 857                 udelay((t.tv_nsec + 999) / 1000);
 858                 return 0;
 859         }
 860
 861         expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 862
 863         current->state = TASK_INTERRUPTIBLE;
 864         expire = schedule_timeout(expire);
 865
 866         if (expire) {
 867                 if (rmtp) {
 868                         jiffies_to_timespec(expire, &t);
 869                         if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 870                                 return -EFAULT;
 871                 }
 872                 return -EINTR;
 873         }
 874         return 0;
 875 }
 876