added a lot of printk output to ease writing of emulator
[linux-2.4.21-pre4.git] / arch / x86_64 / mm / init.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002  Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #include <linux/blk.h>
23 #include <linux/pagemap.h>
24 #include <linux/bootmem.h>
25
26 #include <asm/processor.h>
27 #include <asm/system.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
30 #include <asm/pgalloc.h>
31 #include <asm/dma.h>
32 #include <asm/fixmap.h>
33 #include <asm/e820.h>
34 #include <asm/apic.h>
35 #include <asm/tlb.h>
36 #include <asm/pda.h>
37 #include <asm/mmu_context.h>
38 #include <asm/proto.h>
39
40 mmu_gather_t mmu_gathers[NR_CPUS];
41
42 static unsigned long totalram_pages;
43
44 int do_check_pgt_cache(int low, int high)
45 {
46         int freed = 0;
47         if(read_pda(pgtable_cache_sz) > high) {
48                 do {
49                         if (read_pda(pgd_quick)) {
50                                 pgd_free_slow(pgd_alloc_one_fast());
51                                 freed++;
52                         }
53                         if (read_pda(pmd_quick)) {
54                                 pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
55                                 freed++;
56                         }
57                         if (read_pda(pte_quick)) {
58                                 pte_free_slow(pte_alloc_one_fast(NULL, 0));
59                                 freed++;
60                         }
61                 } while(read_pda(pgtable_cache_sz) > low);
62         }
63         return freed;
64 }
65
66 #ifndef CONFIG_DISCONTIGMEM
67 /*
68  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69  * physical space so we can cache the place of the first one and move
70  * around without checking the pgd every time.
71  */
72
73 void show_mem(void)
74 {
75         int i, total = 0, reserved = 0;
76         int shared = 0, cached = 0;
77
78         printk("Mem-info:\n");
79         show_free_areas();
80         printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
81         i = max_mapnr;
82         while (i-- > 0) {
83                 total++;
84                 if (PageReserved(mem_map+i))
85                         reserved++;
86                 else if (PageSwapCache(mem_map+i))
87                         cached++;
88                 else if (page_count(mem_map+i))
89                         shared += page_count(mem_map+i) - 1;
90         }
91         printk("%d pages of RAM\n", total);
92         printk("%d reserved pages\n",reserved);
93         printk("%d pages shared\n",shared);
94         printk("%d pages swap cached\n",cached);
95         printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
96         show_buffers();
97 }
98 #endif
99
100 /* References to section boundaries */
101
102 extern char _text, _etext, _edata, __bss_start, _end;
103 extern char __init_begin, __init_end;
104
105 int after_bootmem;
106
107 static void *spp_getpage(void)
108
109         void *ptr;
110         if (after_bootmem)
111                 ptr = (void *) get_free_page(GFP_ATOMIC); 
112         else
113                 ptr = alloc_bootmem_low_pages(PAGE_SIZE); 
114         if (!ptr)
115                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
116         return ptr;
117
118
119 static void set_pte_phys(unsigned long vaddr,
120                          unsigned long phys, pgprot_t prot)
121 {
122         pml4_t *level4;
123         pgd_t *pgd;
124         pmd_t *pmd;
125         pte_t *pte;
126
127         level4 = pml4_offset_k(vaddr);
128         if (pml4_none(*level4)) {
129                 printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
130                 return;
131         }
132         pgd = level3_offset_k(level4, vaddr);
133         if (pgd_none(*pgd)) {
134                 pmd = (pmd_t *) spp_getpage(); 
135                 set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
136                 if (pmd != pmd_offset(pgd, 0)) {
137                         printk("PAGETABLE BUG #01!\n");
138                         return;
139                 }
140         }
141         pmd = pmd_offset(pgd, vaddr);
142         if (pmd_none(*pmd)) {
143                 pte = (pte_t *) spp_getpage();
144                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
145                 if (pte != pte_offset(pmd, 0)) {
146                         printk("PAGETABLE BUG #02!\n");
147                         return;
148                 }
149         }
150         pte = pte_offset(pmd, vaddr);
151         set_pte(pte, mk_pte_phys(phys, prot));
152
153         /*
154          * It's enough to flush this one mapping.
155          * (PGE mappings get flushed as well)
156          */
157         __flush_tlb_one(vaddr);
158 }
159
160 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
161 {
162         unsigned long address = __fix_to_virt(idx);
163
164         if (idx >= __end_of_fixed_addresses) {
165                 printk("Invalid __set_fixmap\n");
166                 return;
167         }
168         set_pte_phys(address, phys, prot);
169 }
170
171 extern pmd_t temp_boot_pmds[]; 
172
173 unsigned long __initdata table_start, table_end; 
174
175 static  struct temp_map { 
176         pmd_t *pmd; 
177         void  *address; 
178         int    allocated; 
179 } temp_mappings[] __initdata = { 
180         { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
181         { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, 
182         {}
183 }; 
184
185 static __init void *alloc_low_page(int *index, unsigned long *phys) 
186
187         struct temp_map *ti;
188         int i; 
189         unsigned long pfn = table_end++, paddr; 
190         void *adr;
191
192         if (table_end >= end_pfn_map) 
193                 panic("alloc_low_page: ran out of page mappings"); 
194         for (i = 0; temp_mappings[i].allocated; i++) {
195                 if (!temp_mappings[i].pmd) 
196                         panic("alloc_low_page: ran out of temp mappings"); 
197         } 
198         ti = &temp_mappings[i];
199         paddr = (pfn << PAGE_SHIFT) & PMD_MASK; 
200         set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); 
201         ti->allocated = 1; 
202         __flush_tlb();         
203         adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 
204         *index = i; 
205         *phys  = pfn * PAGE_SIZE;  
206         return adr; 
207
208
209 static __init void unmap_low_page(int i)
210
211         struct temp_map *ti = &temp_mappings[i];
212         set_pmd(ti->pmd, __pmd(0));
213         ti->allocated = 0; 
214
215
216 static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned long end)
217
218         long i, j; 
219
220         i = pgd_index(address);
221         pgd = pgd + i;
222         for (; i < PTRS_PER_PGD; pgd++, i++) {
223                 int map; 
224                 unsigned long paddr, pmd_phys;
225                 pmd_t *pmd;
226
227                 paddr = (address & PML4_MASK) + i*PGDIR_SIZE; 
228                 if (paddr >= end) { 
229                         for (; i < PTRS_PER_PGD; i++, pgd++) 
230                                 set_pgd(pgd, __pgd(0)); 
231                         break;
232                 } 
233
234                 if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) { 
235                         set_pgd(pgd, __pgd(0)); 
236                         continue;
237                 } 
238
239                 pmd = alloc_low_page(&map, &pmd_phys);
240                 set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
241                 for (j = 0; j < PTRS_PER_PMD; pmd++, j++ , paddr += PMD_SIZE) {
242                         unsigned long pe;
243
244                         if (paddr >= end) { 
245                                 for (; j < PTRS_PER_PMD; j++, pmd++)
246                                         set_pmd(pmd,  __pmd(0)); 
247                                 break;
248                         }
249                         pe = _PAGE_PSE | _KERNPG_TABLE | _PAGE_NX | _PAGE_GLOBAL | paddr;
250                         pe &= __supported_pte_mask; 
251                         set_pmd(pmd, __pmd(pe));
252                 }
253                 unmap_low_page(map);
254         }
255         __flush_tlb();
256
257
258 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
259    This runs before bootmem is initialized and gets pages directly from the 
260    physical memory. To access them they are temporarily mapped. */
261 void __init init_memory_mapping(void) 
262
263         unsigned long adr;             
264         unsigned long end;
265         unsigned long next; 
266         unsigned long pgds, pmds, tables; 
267
268         end = end_pfn_map << PAGE_SHIFT; 
269
270         /* 
271          * Find space for the kernel direct mapping tables.
272          * Later we should allocate these tables in the local node of the memory
273          * mapped.  Unfortunately this is done currently before the nodes are 
274          * discovered.
275          */
276
277         pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
278         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 
279         tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE); 
280
281         /* Direct mapping must currently fit below the kernel in the first MB.
282            This is because we have no way to tell the later passes to not reuse
283            the memory, until bootmem is initialised */
284         /* Should limit MAXMEM for this */
285         table_start = find_e820_area(/*0*/ 0x8000, __pa_symbol(&_text), tables); 
286         if (table_start == -1UL) 
287                 panic("Cannot find space for the kernel page tables"); 
288
289         table_start >>= PAGE_SHIFT; 
290         table_end = table_start;
291        
292         end += __PAGE_OFFSET; /* turn virtual */  
293
294         for (adr = PAGE_OFFSET; adr < end; adr = next) { 
295                 int map;
296                 unsigned long pgd_phys; 
297                 pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
298                 next = adr + PML4_SIZE;
299                 if (next > end) 
300                         next = end; 
301
302                 phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET); 
303                 set_pml4(init_level4_pgt + pml4_index(adr), 
304                          mk_kernel_pml4(pgd_phys, KERNPG_TABLE));
305                 unmap_low_page(map);   
306         } 
307         asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
308         __flush_tlb_all();
309         printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, 
310                table_start<<PAGE_SHIFT, 
311                table_end<<PAGE_SHIFT);
312
313
314 void __init zap_low_mappings (void)
315 {
316         int i;
317         for (i = 0; i < NR_CPUS; i++) {
318                 if (cpu_pda[i].level4_pgt) 
319                         cpu_pda[i].level4_pgt[0] = 0; 
320         }
321
322         flush_tlb_all();
323 }
324
325 #ifndef CONFIG_DISCONTIGMEM
326 void __init paging_init(void)
327 {
328         unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
329         unsigned int max_dma;
330         
331         max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
332         if (end_pfn < max_dma)
333                 zones_size[ZONE_DMA] = end_pfn;
334         else {
335                 zones_size[ZONE_DMA] = max_dma;
336                 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
337         }
338         free_area_init(zones_size);
339 }
340
341 static inline int page_is_ram (unsigned long pagenr)
342 {
343         int i;
344
345         for (i = 0; i < e820.nr_map; i++) {
346                 unsigned long addr, end;
347
348                 if (e820.map[i].type != E820_RAM)       /* not usable memory */
349                         continue;
350                 /*
351                  *      !!!FIXME!!! Some BIOSen report areas as RAM that
352                  *      are not. Notably the 640->1Mb area. We need a sanity
353                  *      check here.
354                  */
355                 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
356                 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
357                 if  ((pagenr >= addr) && (pagenr < end))
358                         return 1;
359         }
360         return 0;
361 }
362 #endif
363
364 void __init mem_init(void)
365 {
366         unsigned long codesize, reservedpages, datasize, initsize;
367         unsigned long tmp;
368
369         max_mapnr = end_pfn; 
370         num_physpages = end_pfn; /* XXX not true because of holes */
371         high_memory = (void *) __va(end_pfn << PAGE_SHIFT);
372
373         /* clear the zero-page */
374         memset(empty_zero_page, 0, PAGE_SIZE);
375
376         reservedpages = 0;
377
378         /* this will put all low memory onto the freelists */
379 #ifdef CONFIG_DISCONTIGMEM
380         totalram_pages += numa_free_all_bootmem();
381         tmp = 0;
382         /* should count reserved pages here for all nodes */ 
383 #else
384         if (!mem_map) BUG();
385
386         totalram_pages += free_all_bootmem();
387
388         for (tmp = 0; tmp < end_pfn; tmp++)
389                 /*
390                  * Only count reserved RAM pages
391                  */
392                 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
393                         reservedpages++;
394 #endif
395
396         after_bootmem = 1;
397
398         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
399         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
400         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
401
402         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
403                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
404                 max_mapnr << (PAGE_SHIFT-10),
405                 codesize >> 10,
406                 reservedpages << (PAGE_SHIFT-10),
407                 datasize >> 10,
408                 initsize >> 10);
409
410         /*
411          * Subtle. SMP is doing its boot stuff late (because it has to
412          * fork idle threads) - but it also needs low mappings for the
413          * protected-mode entry to work. We zap these entries only after
414          * the WP-bit has been tested.
415          */
416 #ifndef CONFIG_SMP
417         zap_low_mappings();
418 #endif
419 }
420
421 void __init __map_kernel_range(void *address, int len, pgprot_t prot) 
422
423         int i;
424         void *end = address + len;
425         BUG_ON((pgprot_val(prot) & _PAGE_PSE) == 0);
426         address = (void *)((unsigned long)address & LARGE_PAGE_MASK); 
427         for (; address < end; address += LARGE_PAGE_SIZE) { 
428                 pml4_t *pml4;
429                 pgd_t *pgd;
430                 pmd_t *pmd;
431
432                 pml4 = pml4_offset_k((unsigned long) address); 
433                 if (pml4_none(*pml4)) { 
434                         void *p = (void *)get_zeroed_page(GFP_KERNEL); 
435                         if (!p) panic("Cannot map kernel range"); 
436                         for (i = 0; i < smp_num_cpus; i++) {
437                                 set_pml4((pml4_t *)(cpu_pda[i].level4_pgt) + 
438                                          pml4_index((unsigned long)address),
439                                          mk_kernel_pml4(virt_to_phys(p),KERNPG_TABLE));
440                         }
441                 } 
442                 pgd = pgd_offset_k((unsigned long)address); 
443                 if (pgd_none(*pgd)) { 
444                         void *p = (void *)get_zeroed_page(GFP_KERNEL); 
445                         if (!p) panic("Cannot map kernel range"); 
446                         set_pgd(pgd, __mk_pgd(virt_to_phys(p), KERNPG_TABLE));
447                 } 
448                 pmd = pmd_offset(pgd, (unsigned long) address); 
449                 set_pmd(pmd, __mk_pmd(virt_to_phys(address), prot));
450         } 
451         __flush_tlb_all(); 
452
453
454 void free_initmem(void)
455 {
456         void *addr;
457
458         addr = (&__init_begin);
459         for (; addr < (void *)(&__init_end); addr += PAGE_SIZE) {
460                 ClearPageReserved(virt_to_page(addr));
461                 set_page_count(virt_to_page(addr), 1);
462 #ifdef CONFIG_INIT_DEBUG
463                 memset((unsigned long)addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE); 
464 #endif
465                 free_page((unsigned long)addr);
466                 totalram_pages++;
467         }
468         printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
469 }
470
471 #ifdef CONFIG_BLK_DEV_INITRD
472 void free_initrd_mem(unsigned long start, unsigned long end)
473 {
474         if (start < (unsigned long)&_end)
475                 return;
476         printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
477         for (; start < end; start += PAGE_SIZE) {
478                 ClearPageReserved(virt_to_page(start));
479                 set_page_count(virt_to_page(start), 1);
480                 free_page(start);
481                 totalram_pages++;
482         }
483 }
484 #endif
485
486 void si_meminfo(struct sysinfo *val)
487 {
488         val->totalram = totalram_pages;
489         val->sharedram = 0;
490         val->freeram = nr_free_pages();
491         val->bufferram = atomic_read(&buffermem_pages);
492         val->totalhigh = 0;
493         val->freehigh = nr_free_highpages();
494         val->mem_unit = PAGE_SIZE;
495         return;
496 }
497
498 void reserve_bootmem_generic(unsigned long phys, unsigned len) 
499
500         /* Should check here against the e820 map to avoid double free */ 
501 #ifdef CONFIG_DISCONTIGMEM
502         reserve_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
503 #else                   
504         reserve_bootmem(phys, len);    
505 #endif
506 }
507
508
509 void free_bootmem_generic(unsigned long phys, unsigned len) 
510
511 #ifdef CONFIG_DISCONTIGMEM
512         free_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
513 #else                   
514         free_bootmem(phys, len);    
515 #endif
516 }