[PATCH] Time: i386 Conversion - part 3: Enable Generic Timekeeping
[powerpc.git] / arch / i386 / kernel / timers / timer_tsc.c
1 /*
2  * This code largely moved from arch/i386/kernel/time.c.
3  * See comments there for proper credits.
4  *
5  * 2004-06-25    Jesper Juhl
6  *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7  *      failing to inline.
8  */
9
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
17
18 #include <asm/timer.h>
19 #include <asm/io.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
22
23 #include "io_ports.h"
24 #include "mach_timer.h"
25
26 #include <asm/hpet.h>
27 #include <asm/i8253.h>
28
29 #ifdef CONFIG_HPET_TIMER
30 static unsigned long hpet_usec_quotient;
31 static unsigned long hpet_last;
32 static struct timer_opts timer_tsc;
33 #endif
34
35 static int use_tsc;
36 /* Number of usecs that the last interrupt was delayed */
37 static int delay_at_last_interrupt;
38
39 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
40 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
41 static unsigned long long monotonic_base;
42 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
43
44 /* Avoid compensating for lost ticks before TSCs are synched */
45 static int detect_lost_ticks;
46 static int __init start_lost_tick_compensation(void)
47 {
48         detect_lost_ticks = 1;
49         return 0;
50 }
51 late_initcall(start_lost_tick_compensation);
52
53 /* convert from cycles(64bits) => nanoseconds (64bits)
54  *  basic equation:
55  *              ns = cycles / (freq / ns_per_sec)
56  *              ns = cycles * (ns_per_sec / freq)
57  *              ns = cycles * (10^9 / (cpu_khz * 10^3))
58  *              ns = cycles * (10^6 / cpu_khz)
59  *
60  *      Then we use scaling math (suggested by george@mvista.com) to get:
61  *              ns = cycles * (10^6 * SC / cpu_khz) / SC
62  *              ns = cycles * cyc2ns_scale / SC
63  *
64  *      And since SC is a constant power of two, we can convert the div
65  *  into a shift.
66  *
67  *  We can use khz divisor instead of mhz to keep a better percision, since
68  *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
69  *  (mathieu.desnoyers@polymtl.ca)
70  *
71  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
72  */
73 static unsigned long cyc2ns_scale __read_mostly;
74 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
75
76 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
77 {
78         cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
79 }
80
81 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
82 {
83         return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
84 }
85
86 static int count2; /* counter for mark_offset_tsc() */
87
88 /* Cached *multiplier* to convert TSC counts to microseconds.
89  * (see the equation below).
90  * Equal to 2^32 * (1 / (clocks per usec) ).
91  * Initialized in time_init.
92  */
93 static unsigned long fast_gettimeoffset_quotient;
94
95 static unsigned long get_offset_tsc(void)
96 {
97         register unsigned long eax, edx;
98
99         /* Read the Time Stamp Counter */
100
101         rdtsc(eax,edx);
102
103         /* .. relative to previous jiffy (32 bits is enough) */
104         eax -= last_tsc_low;    /* tsc_low delta */
105
106         /*
107          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
108          *             = (tsc_low delta) * (usecs_per_clock)
109          *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
110          *
111          * Using a mull instead of a divl saves up to 31 clock cycles
112          * in the critical path.
113          */
114
115         __asm__("mull %2"
116                 :"=a" (eax), "=d" (edx)
117                 :"rm" (fast_gettimeoffset_quotient),
118                  "0" (eax));
119
120         /* our adjusted time offset in microseconds */
121         return delay_at_last_interrupt + edx;
122 }
123
124 static unsigned long long monotonic_clock_tsc(void)
125 {
126         unsigned long long last_offset, this_offset, base;
127         unsigned seq;
128         
129         /* atomically read monotonic base & last_offset */
130         do {
131                 seq = read_seqbegin(&monotonic_lock);
132                 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
133                 base = monotonic_base;
134         } while (read_seqretry(&monotonic_lock, seq));
135
136         /* Read the Time Stamp Counter */
137         rdtscll(this_offset);
138
139         /* return the value in ns */
140         return base + cycles_2_ns(this_offset - last_offset);
141 }
142
143 static void delay_tsc(unsigned long loops)
144 {
145         unsigned long bclock, now;
146         
147         rdtscl(bclock);
148         do
149         {
150                 rep_nop();
151                 rdtscl(now);
152         } while ((now-bclock) < loops);
153 }
154
155 #ifdef CONFIG_HPET_TIMER
156 static void mark_offset_tsc_hpet(void)
157 {
158         unsigned long long this_offset, last_offset;
159         unsigned long offset, temp, hpet_current;
160
161         write_seqlock(&monotonic_lock);
162         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
163         /*
164          * It is important that these two operations happen almost at
165          * the same time. We do the RDTSC stuff first, since it's
166          * faster. To avoid any inconsistencies, we need interrupts
167          * disabled locally.
168          */
169         /*
170          * Interrupts are just disabled locally since the timer irq
171          * has the SA_INTERRUPT flag set. -arca
172          */
173         /* read Pentium cycle counter */
174
175         hpet_current = hpet_readl(HPET_COUNTER);
176         rdtsc(last_tsc_low, last_tsc_high);
177
178         /* lost tick compensation */
179         offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
180         if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
181                                         && detect_lost_ticks) {
182                 int lost_ticks = (offset - hpet_last) / hpet_tick;
183                 jiffies_64 += lost_ticks;
184         }
185         hpet_last = hpet_current;
186
187         /* update the monotonic base value */
188         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
189         monotonic_base += cycles_2_ns(this_offset - last_offset);
190         write_sequnlock(&monotonic_lock);
191
192         /* calculate delay_at_last_interrupt */
193         /*
194          * Time offset = (hpet delta) * ( usecs per HPET clock )
195          *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
196          *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
197          * Where,
198          * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
199          */
200         delay_at_last_interrupt = hpet_current - offset;
201         ASM_MUL64_REG(temp, delay_at_last_interrupt,
202                         hpet_usec_quotient, delay_at_last_interrupt);
203 }
204 #endif
205
206 static void mark_offset_tsc(void)
207 {
208         unsigned long lost,delay;
209         unsigned long delta = last_tsc_low;
210         int count;
211         int countmp;
212         static int count1 = 0;
213         unsigned long long this_offset, last_offset;
214         static int lost_count = 0;
215
216         write_seqlock(&monotonic_lock);
217         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
218         /*
219          * It is important that these two operations happen almost at
220          * the same time. We do the RDTSC stuff first, since it's
221          * faster. To avoid any inconsistencies, we need interrupts
222          * disabled locally.
223          */
224
225         /*
226          * Interrupts are just disabled locally since the timer irq
227          * has the SA_INTERRUPT flag set. -arca
228          */
229
230         /* read Pentium cycle counter */
231
232         rdtsc(last_tsc_low, last_tsc_high);
233
234         spin_lock(&i8253_lock);
235         outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
236
237         count = inb_p(PIT_CH0);    /* read the latched count */
238         count |= inb(PIT_CH0) << 8;
239
240         /*
241          * VIA686a test code... reset the latch if count > max + 1
242          * from timer_pit.c - cjb
243          */
244         if (count > LATCH) {
245                 outb_p(0x34, PIT_MODE);
246                 outb_p(LATCH & 0xff, PIT_CH0);
247                 outb(LATCH >> 8, PIT_CH0);
248                 count = LATCH - 1;
249         }
250
251         spin_unlock(&i8253_lock);
252
253         if (pit_latch_buggy) {
254                 /* get center value of last 3 time lutch */
255                 if ((count2 >= count && count >= count1)
256                     || (count1 >= count && count >= count2)) {
257                         count2 = count1; count1 = count;
258                 } else if ((count1 >= count2 && count2 >= count)
259                            || (count >= count2 && count2 >= count1)) {
260                         countmp = count;count = count2;
261                         count2 = count1;count1 = countmp;
262                 } else {
263                         count2 = count1; count1 = count; count = count1;
264                 }
265         }
266
267         /* lost tick compensation */
268         delta = last_tsc_low - delta;
269         {
270                 register unsigned long eax, edx;
271                 eax = delta;
272                 __asm__("mull %2"
273                 :"=a" (eax), "=d" (edx)
274                 :"rm" (fast_gettimeoffset_quotient),
275                  "0" (eax));
276                 delta = edx;
277         }
278         delta += delay_at_last_interrupt;
279         lost = delta/(1000000/HZ);
280         delay = delta%(1000000/HZ);
281         if (lost >= 2 && detect_lost_ticks) {
282                 jiffies_64 += lost-1;
283
284                 /* sanity check to ensure we're not always losing ticks */
285                 if (lost_count++ > 100) {
286                         printk(KERN_WARNING "Losing too many ticks!\n");
287                         printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
288                         printk(KERN_WARNING "Possible reasons for this are:\n");
289                         printk(KERN_WARNING "  You're running with Speedstep,\n");
290                         printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
291                         printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
292                         printk(KERN_WARNING "Falling back to a sane timesource now.\n");
293
294                         clock_fallback();
295                 }
296         } else
297                 lost_count = 0;
298         /* update the monotonic base value */
299         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
300         monotonic_base += cycles_2_ns(this_offset - last_offset);
301         write_sequnlock(&monotonic_lock);
302
303         /* calculate delay_at_last_interrupt */
304         count = ((LATCH-1) - count) * TICK_SIZE;
305         delay_at_last_interrupt = (count + LATCH/2) / LATCH;
306
307         /* catch corner case where tick rollover occured
308          * between tsc and pit reads (as noted when
309          * usec delta is > 90% # of usecs/tick)
310          */
311         if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
312                 jiffies_64++;
313 }
314
315 static int __init init_tsc(char* override)
316 {
317
318         /* check clock override */
319         if (override[0] && strncmp(override,"tsc",3)) {
320 #ifdef CONFIG_HPET_TIMER
321                 if (is_hpet_enabled()) {
322                         printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
323                 } else
324 #endif
325                 {
326                         return -ENODEV;
327                 }
328         }
329
330         /*
331          * If we have APM enabled or the CPU clock speed is variable
332          * (CPU stops clock on HLT or slows clock to save power)
333          * then the TSC timestamps may diverge by up to 1 jiffy from
334          * 'real time' but nothing will break.
335          * The most frequent case is that the CPU is "woken" from a halt
336          * state by the timer interrupt itself, so we get 0 error. In the
337          * rare cases where a driver would "wake" the CPU and request a
338          * timestamp, the maximum error is < 1 jiffy. But timestamps are
339          * still perfectly ordered.
340          * Note that the TSC counter will be reset if APM suspends
341          * to disk; this won't break the kernel, though, 'cuz we're
342          * smart.  See arch/i386/kernel/apm.c.
343          */
344         /*
345          *      Firstly we have to do a CPU check for chips with
346          *      a potentially buggy TSC. At this point we haven't run
347          *      the ident/bugs checks so we must run this hook as it
348          *      may turn off the TSC flag.
349          *
350          *      NOTE: this doesn't yet handle SMP 486 machines where only
351          *      some CPU's have a TSC. Thats never worked and nobody has
352          *      moaned if you have the only one in the world - you fix it!
353          */
354
355         count2 = LATCH; /* initialize counter for mark_offset_tsc() */
356
357         if (cpu_has_tsc) {
358                 unsigned long tsc_quotient;
359 #ifdef CONFIG_HPET_TIMER
360                 if (is_hpet_enabled() && hpet_use_timer) {
361                         unsigned long result, remain;
362                         printk("Using TSC for gettimeofday\n");
363                         tsc_quotient = calibrate_tsc_hpet(NULL);
364                         timer_tsc.mark_offset = &mark_offset_tsc_hpet;
365                         /*
366                          * Math to calculate hpet to usec multiplier
367                          * Look for the comments at get_offset_tsc_hpet()
368                          */
369                         ASM_DIV64_REG(result, remain, hpet_tick,
370                                         0, KERNEL_TICK_USEC);
371                         if (remain > (hpet_tick >> 1))
372                                 result++; /* rounding the result */
373
374                         hpet_usec_quotient = result;
375                 } else
376 #endif
377                 {
378                         tsc_quotient = calibrate_tsc();
379                 }
380
381                 if (tsc_quotient) {
382                         fast_gettimeoffset_quotient = tsc_quotient;
383                         use_tsc = 1;
384                         /*
385                          *      We could be more selective here I suspect
386                          *      and just enable this for the next intel chips ?
387                          */
388                         /* report CPU clock rate in Hz.
389                          * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
390                          * clock/second. Our precision is about 100 ppm.
391                          */
392                         {       unsigned long eax=0, edx=1000;
393                                 __asm__("divl %2"
394                                 :"=a" (cpu_khz), "=d" (edx)
395                                 :"r" (tsc_quotient),
396                                 "0" (eax), "1" (edx));
397                                 printk("Detected %u.%03u MHz processor.\n",
398                                         cpu_khz / 1000, cpu_khz % 1000);
399                         }
400                         set_cyc2ns_scale(cpu_khz);
401                         return 0;
402                 }
403         }
404         return -ENODEV;
405 }
406
407 static int tsc_resume(void)
408 {
409         write_seqlock(&monotonic_lock);
410         /* Assume this is the last mark offset time */
411         rdtsc(last_tsc_low, last_tsc_high);
412 #ifdef CONFIG_HPET_TIMER
413         if (is_hpet_enabled() && hpet_use_timer)
414                 hpet_last = hpet_readl(HPET_COUNTER);
415 #endif
416         write_sequnlock(&monotonic_lock);
417         return 0;
418 }
419
420
421
422
423 /************************************************************/
424
425 /* tsc timer_opts struct */
426 static struct timer_opts timer_tsc = {
427         .name = "tsc",
428         .mark_offset = mark_offset_tsc, 
429         .get_offset = get_offset_tsc,
430         .monotonic_clock = monotonic_clock_tsc,
431         .delay = delay_tsc,
432         .read_timer = read_timer_tsc,
433         .resume = tsc_resume,
434 };
435
436 struct init_timer_opts __initdata timer_tsc_init = {
437         .init = init_tsc,
438         .opts = &timer_tsc,
439 };