original comment: +Wilson03172004,marked due to this pci host does not support MWI
[linux-2.4.git] / arch / sparc / kernel / sun4m_smp.c
1 /* sun4m_smp.c: Sparc SUN4M SMP support.
2  *
3  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
4  */
5
6 #include <asm/head.h>
7
8 #include <linux/kernel.h>
9 #include <linux/sched.h>
10 #include <linux/threads.h>
11 #include <linux/smp.h>
12 #include <linux/smp_lock.h>
13 #include <linux/interrupt.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/init.h>
16 #include <linux/spinlock.h>
17 #include <linux/mm.h>
18
19 #include <asm/ptrace.h>
20 #include <asm/atomic.h>
21
22 #include <asm/delay.h>
23 #include <asm/irq.h>
24 #include <asm/page.h>
25 #include <asm/pgalloc.h>
26 #include <asm/pgtable.h>
27 #include <asm/oplib.h>
28 #include <asm/hardirq.h>
29 #include <asm/softirq.h>
30
31 #define __KERNEL_SYSCALLS__
32 #include <linux/unistd.h>
33
34 #define IRQ_RESCHEDULE          13
35 #define IRQ_STOP_CPU            14
36 #define IRQ_CROSS_CALL          15
37
38 extern ctxd_t *srmmu_ctx_table_phys;
39 extern int linux_num_cpus;
40
41 extern void calibrate_delay(void);
42
43 extern struct task_struct *current_set[NR_CPUS];
44 extern volatile int smp_processors_ready;
45 extern unsigned long cpu_present_map;
46 extern int smp_num_cpus;
47 extern int smp_threads_ready;
48 extern unsigned char mid_xlate[NR_CPUS];
49 extern volatile unsigned long cpu_callin_map[NR_CPUS];
50 extern unsigned long smp_proc_in_lock[NR_CPUS];
51 extern struct cpuinfo_sparc cpu_data[NR_CPUS];
52 extern unsigned long cpu_offset[NR_CPUS];
53 extern unsigned char boot_cpu_id;
54 extern int smp_activated;
55 extern volatile int __cpu_number_map[NR_CPUS];
56 extern volatile int __cpu_logical_map[NR_CPUS];
57 extern volatile unsigned long ipi_count;
58 extern volatile int smp_process_available;
59 extern volatile int smp_commenced;
60 extern int __smp4m_processor_id(void);
61
62 extern unsigned long totalram_pages;
63
64 /*#define SMP_DEBUG*/
65
66 #ifdef SMP_DEBUG
67 #define SMP_PRINTK(x)   printk x
68 #else
69 #define SMP_PRINTK(x)
70 #endif
71
72 static inline unsigned long swap(volatile unsigned long *ptr, unsigned long val)
73 {
74         __asm__ __volatile__("swap [%1], %0\n\t" :
75                              "=&r" (val), "=&r" (ptr) :
76                              "0" (val), "1" (ptr));
77         return val;
78 }
79
80 static void smp_setup_percpu_timer(void);
81 extern void cpu_probe(void);
82
83 void __init smp4m_callin(void)
84 {
85         int cpuid = hard_smp_processor_id();
86
87         local_flush_cache_all();
88         local_flush_tlb_all();
89
90         set_irq_udt(mid_xlate[boot_cpu_id]);
91
92         /* Get our local ticker going. */
93         smp_setup_percpu_timer();
94
95         calibrate_delay();
96         smp_store_cpu_info(cpuid);
97
98         local_flush_cache_all();
99         local_flush_tlb_all();
100
101         /*
102          * Unblock the master CPU _only_ when the scheduler state
103          * of all secondary CPUs will be up-to-date, so after
104          * the SMP initialization the master will be just allowed
105          * to call the scheduler code.
106          */
107         init_idle();
108
109         /* Allow master to continue. */
110         swap((unsigned long *)&cpu_callin_map[cpuid], 1);
111
112         local_flush_cache_all();
113         local_flush_tlb_all();
114         
115         cpu_probe();
116
117         /* Fix idle thread fields. */
118         __asm__ __volatile__("ld [%0], %%g6\n\t"
119                              : : "r" (&current_set[cpuid])
120                              : "memory" /* paranoid */);
121
122         /* Attach to the address space of init_task. */
123         atomic_inc(&init_mm.mm_count);
124         current->active_mm = &init_mm;
125
126         while(!smp_commenced)
127                 barrier();
128
129         local_flush_cache_all();
130         local_flush_tlb_all();
131
132         __sti();
133 }
134
135 extern int cpu_idle(void *unused);
136 extern void init_IRQ(void);
137 extern void cpu_panic(void);
138 extern int start_secondary(void *unused);
139
140 /*
141  *      Cycle through the processors asking the PROM to start each one.
142  */
143  
144 extern struct prom_cpuinfo linux_cpus[NR_CPUS];
145 extern struct linux_prom_registers smp_penguin_ctable;
146 extern unsigned long trapbase_cpu1[];
147 extern unsigned long trapbase_cpu2[];
148 extern unsigned long trapbase_cpu3[];
149
150 void __init smp4m_boot_cpus(void)
151 {
152         int cpucount = 0;
153         int i = 0;
154         int first, prev;
155
156         printk("Entering SMP Mode...\n");
157
158         __sti();
159         cpu_present_map = 0;
160
161         for(i=0; i < linux_num_cpus; i++)
162                 cpu_present_map |= (1<<i);
163
164         for(i=0; i < NR_CPUS; i++) {
165                 cpu_offset[i] = (char *)&cpu_data[i] - (char *)&cpu_data;
166                 __cpu_number_map[i] = -1;
167                 __cpu_logical_map[i] = -1;
168         }
169
170         mid_xlate[boot_cpu_id] = (linux_cpus[boot_cpu_id].mid & ~8);
171         __cpu_number_map[boot_cpu_id] = 0;
172         __cpu_logical_map[0] = boot_cpu_id;
173         current->processor = boot_cpu_id;
174
175         smp_store_cpu_info(boot_cpu_id);
176         set_irq_udt(mid_xlate[boot_cpu_id]);
177         smp_setup_percpu_timer();
178         init_idle();
179         local_flush_cache_all();
180         if(linux_num_cpus == 1)
181                 return;  /* Not an MP box. */
182         for(i = 0; i < NR_CPUS; i++) {
183                 if(i == boot_cpu_id)
184                         continue;
185
186                 if(cpu_present_map & (1 << i)) {
187                         extern unsigned long sun4m_cpu_startup;
188                         unsigned long *entry = &sun4m_cpu_startup;
189                         struct task_struct *p;
190                         int timeout;
191
192                         /* Cook up an idler for this guy. */
193                         kernel_thread(start_secondary, NULL, CLONE_PID);
194
195                         cpucount++;
196
197                         p = init_task.prev_task;
198                         init_tasks[i] = p;
199
200                         p->processor = i;
201                         p->cpus_runnable = 1 << i; /* we schedule the first task manually */
202
203                         current_set[i] = p;
204
205                         del_from_runqueue(p);
206                         unhash_process(p);
207
208                         /* See trampoline.S for details... */
209                         entry += ((i-1) * 3);
210
211                         /*
212                          * Initialize the contexts table
213                          * Since the call to prom_startcpu() trashes the structure,
214                          * we need to re-initialize it for each cpu
215                          */
216                         smp_penguin_ctable.which_io = 0;
217                         smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys;
218                         smp_penguin_ctable.reg_size = 0;
219
220                         /* whirrr, whirrr, whirrrrrrrrr... */
221                         printk("Starting CPU %d at %p\n", i, entry);
222                         mid_xlate[i] = (linux_cpus[i].mid & ~8);
223                         local_flush_cache_all();
224                         prom_startcpu(linux_cpus[i].prom_node,
225                                       &smp_penguin_ctable, 0, (char *)entry);
226
227                         /* wheee... it's going... */
228                         for(timeout = 0; timeout < 10000; timeout++) {
229                                 if(cpu_callin_map[i])
230                                         break;
231                                 udelay(200);
232                         }
233                         if(cpu_callin_map[i]) {
234                                 /* Another "Red Snapper". */
235                                 __cpu_number_map[i] = i;
236                                 __cpu_logical_map[i] = i;
237                         } else {
238                                 cpucount--;
239                                 printk("Processor %d is stuck.\n", i);
240                         }
241                 }
242                 if(!(cpu_callin_map[i])) {
243                         cpu_present_map &= ~(1 << i);
244                         __cpu_number_map[i] = -1;
245                 }
246         }
247         local_flush_cache_all();
248         if(cpucount == 0) {
249                 printk("Error: only one Processor found.\n");
250                 cpu_present_map = (1 << smp_processor_id());
251         } else {
252                 unsigned long bogosum = 0;
253                 for(i = 0; i < NR_CPUS; i++) {
254                         if(cpu_present_map & (1 << i))
255                                 bogosum += cpu_data[i].udelay_val;
256                 }
257                 printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
258                        cpucount + 1,
259                        bogosum/(500000/HZ),
260                        (bogosum/(5000/HZ))%100);
261                 smp_activated = 1;
262                 smp_num_cpus = cpucount + 1;
263         }
264
265         /* Setup CPU list for IRQ distribution scheme. */
266         first = prev = -1;
267         for(i = 0; i < NR_CPUS; i++) {
268                 if(cpu_present_map & (1 << i)) {
269                         if(first == -1)
270                                 first = i;
271                         if(prev != -1)
272                                 cpu_data[prev].next = i;
273                         cpu_data[i].mid = mid_xlate[i];
274                         prev = i;
275                 }
276         }
277         cpu_data[prev].next = first;
278         
279         /* Free unneeded trap tables */
280         if (!(cpu_present_map & (1 << 1))) {
281                 ClearPageReserved(virt_to_page(trapbase_cpu1));
282                 set_page_count(virt_to_page(trapbase_cpu1), 1);
283                 free_page((unsigned long)trapbase_cpu1);
284                 totalram_pages++;
285                 num_physpages++;
286         }
287         if (!(cpu_present_map & (1 << 2))) {
288                 ClearPageReserved(virt_to_page(trapbase_cpu2));
289                 set_page_count(virt_to_page(trapbase_cpu2), 1);
290                 free_page((unsigned long)trapbase_cpu2);
291                 totalram_pages++;
292                 num_physpages++;
293         }
294         if (!(cpu_present_map & (1 << 3))) {
295                 ClearPageReserved(virt_to_page(trapbase_cpu3));
296                 set_page_count(virt_to_page(trapbase_cpu3), 1);
297                 free_page((unsigned long)trapbase_cpu3);
298                 totalram_pages++;
299                 num_physpages++;
300         }
301
302         /* Ok, they are spinning and ready to go. */
303         smp_processors_ready = 1;
304 }
305
306 /* At each hardware IRQ, we get this called to forward IRQ reception
307  * to the next processor.  The caller must disable the IRQ level being
308  * serviced globally so that there are no double interrupts received.
309  */
310 void smp4m_irq_rotate(int cpu)
311 {
312         if(smp_processors_ready)
313                 set_irq_udt(cpu_data[cpu_data[cpu].next].mid);
314 }
315
316 /* Cross calls, in order to work efficiently and atomically do all
317  * the message passing work themselves, only stopcpu and reschedule
318  * messages come through here.
319  */
320 void smp4m_message_pass(int target, int msg, unsigned long data, int wait)
321 {
322         static unsigned long smp_cpu_in_msg[NR_CPUS];
323         unsigned long mask;
324         int me = smp_processor_id();
325         int irq, i;
326
327         if(msg == MSG_RESCHEDULE) {
328                 irq = IRQ_RESCHEDULE;
329
330                 if(smp_cpu_in_msg[me])
331                         return;
332         } else if(msg == MSG_STOP_CPU) {
333                 irq = IRQ_STOP_CPU;
334         } else {
335                 goto barf;
336         }
337
338         smp_cpu_in_msg[me]++;
339         if(target == MSG_ALL_BUT_SELF || target == MSG_ALL) {
340                 mask = cpu_present_map;
341                 if(target == MSG_ALL_BUT_SELF)
342                         mask &= ~(1 << me);
343                 for(i = 0; i < 4; i++) {
344                         if(mask & (1 << i))
345                                 set_cpu_int(mid_xlate[i], irq);
346                 }
347         } else {
348                 set_cpu_int(mid_xlate[target], irq);
349         }
350         smp_cpu_in_msg[me]--;
351
352         return;
353 barf:
354         printk("Yeeee, trying to send SMP msg(%d) on cpu %d\n", msg, me);
355         panic("Bogon SMP message pass.");
356 }
357
358 static struct smp_funcall {
359         smpfunc_t func;
360         unsigned long arg1;
361         unsigned long arg2;
362         unsigned long arg3;
363         unsigned long arg4;
364         unsigned long arg5;
365         unsigned long processors_in[NR_CPUS];  /* Set when ipi entered. */
366         unsigned long processors_out[NR_CPUS]; /* Set when ipi exited. */
367 } ccall_info;
368
369 static spinlock_t cross_call_lock = SPIN_LOCK_UNLOCKED;
370
371 /* Cross calls must be serialized, at least currently. */
372 void smp4m_cross_call(smpfunc_t func, unsigned long arg1, unsigned long arg2,
373                     unsigned long arg3, unsigned long arg4, unsigned long arg5)
374 {
375         if(smp_processors_ready) {
376                 register int ncpus = smp_num_cpus;
377                 unsigned long flags;
378
379                 spin_lock_irqsave(&cross_call_lock, flags);
380
381                 /* Init function glue. */
382                 ccall_info.func = func;
383                 ccall_info.arg1 = arg1;
384                 ccall_info.arg2 = arg2;
385                 ccall_info.arg3 = arg3;
386                 ccall_info.arg4 = arg4;
387                 ccall_info.arg5 = arg5;
388
389                 /* Init receive/complete mapping, plus fire the IPI's off. */
390                 {
391                         register unsigned long mask;
392                         register int i;
393
394                         mask = (cpu_present_map & ~(1 << smp_processor_id()));
395                         for(i = 0; i < ncpus; i++) {
396                                 if(mask & (1 << i)) {
397                                         ccall_info.processors_in[i] = 0;
398                                         ccall_info.processors_out[i] = 0;
399                                         set_cpu_int(mid_xlate[i], IRQ_CROSS_CALL);
400                                 } else {
401                                         ccall_info.processors_in[i] = 1;
402                                         ccall_info.processors_out[i] = 1;
403                                 }
404                         }
405                 }
406
407                 {
408                         register int i;
409
410                         i = 0;
411                         do {
412                                 while(!ccall_info.processors_in[i])
413                                         barrier();
414                         } while(++i < ncpus);
415
416                         i = 0;
417                         do {
418                                 while(!ccall_info.processors_out[i])
419                                         barrier();
420                         } while(++i < ncpus);
421                 }
422
423                 spin_unlock_irqrestore(&cross_call_lock, flags);
424         }
425 }
426
427 /* Running cross calls. */
428 void smp4m_cross_call_irq(void)
429 {
430         int i = smp_processor_id();
431
432         ccall_info.processors_in[i] = 1;
433         ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3,
434                         ccall_info.arg4, ccall_info.arg5);
435         ccall_info.processors_out[i] = 1;
436 }
437
438 extern unsigned int prof_multiplier[NR_CPUS];
439 extern unsigned int prof_counter[NR_CPUS];
440
441 extern void sparc_do_profile(unsigned long pc, unsigned long o7);
442
443 void smp4m_percpu_timer_interrupt(struct pt_regs *regs)
444 {
445         int cpu = smp_processor_id();
446
447         clear_profile_irq(mid_xlate[cpu]);
448
449         if(!user_mode(regs))
450                 sparc_do_profile(regs->pc, regs->u_regs[UREG_RETPC]);
451
452         if(!--prof_counter[cpu]) {
453                 int user = user_mode(regs);
454
455                 irq_enter(cpu, 0);
456                 update_process_times(user);
457                 irq_exit(cpu, 0);
458
459                 prof_counter[cpu] = prof_multiplier[cpu];
460         }
461 }
462
463 extern unsigned int lvl14_resolution;
464
465 static void __init smp_setup_percpu_timer(void)
466 {
467         int cpu = smp_processor_id();
468
469         prof_counter[cpu] = prof_multiplier[cpu] = 1;
470         load_profile_irq(mid_xlate[cpu], lvl14_resolution);
471
472         if(cpu == boot_cpu_id)
473                 enable_pil_irq(14);
474 }
475
476 void __init smp4m_blackbox_id(unsigned *addr)
477 {
478         int rd = *addr & 0x3e000000;
479         int rs1 = rd >> 11;
480         
481         addr[0] = 0x81580000 | rd;              /* rd %tbr, reg */
482         addr[1] = 0x8130200c | rd | rs1;        /* srl reg, 0xc, reg */
483         addr[2] = 0x80082003 | rd | rs1;        /* and reg, 3, reg */
484 }
485
486 void __init smp4m_blackbox_current(unsigned *addr)
487 {
488         int rd = *addr & 0x3e000000;
489         int rs1 = rd >> 11;
490         
491         addr[0] = 0x81580000 | rd;              /* rd %tbr, reg */
492         addr[2] = 0x8130200a | rd | rs1;        /* srl reg, 0xa, reg */
493         addr[4] = 0x8008200c | rd | rs1;        /* and reg, 3, reg */
494 }
495
496 void __init sun4m_init_smp(void)
497 {
498         BTFIXUPSET_BLACKBOX(smp_processor_id, smp4m_blackbox_id);
499         BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
500         BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
501         BTFIXUPSET_CALL(smp_message_pass, smp4m_message_pass, BTFIXUPCALL_NORM);
502         BTFIXUPSET_CALL(__smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
503 }