2 * linux/arch/x86_64/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002 Andi Kleen <ak@suse.de>
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #include <linux/blk.h>
23 #include <linux/pagemap.h>
24 #include <linux/bootmem.h>
26 #include <asm/processor.h>
27 #include <asm/system.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
30 #include <asm/pgalloc.h>
32 #include <asm/fixmap.h>
37 #include <asm/mmu_context.h>
38 #include <asm/proto.h>
40 mmu_gather_t mmu_gathers[NR_CPUS];
42 static unsigned long totalram_pages;
44 int do_check_pgt_cache(int low, int high)
47 if(read_pda(pgtable_cache_sz) > high) {
49 if (read_pda(pgd_quick)) {
50 pgd_free_slow(pgd_alloc_one_fast());
53 if (read_pda(pmd_quick)) {
54 pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
57 if (read_pda(pte_quick)) {
58 pte_free_slow(pte_alloc_one_fast(NULL, 0));
61 } while(read_pda(pgtable_cache_sz) > low);
66 #ifndef CONFIG_DISCONTIGMEM
68 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69 * physical space so we can cache the place of the first one and move
70 * around without checking the pgd every time.
75 int i, total = 0, reserved = 0;
76 int shared = 0, cached = 0;
78 printk("Mem-info:\n");
80 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
84 if (PageReserved(mem_map+i))
86 else if (PageSwapCache(mem_map+i))
88 else if (page_count(mem_map+i))
89 shared += page_count(mem_map+i) - 1;
91 printk("%d pages of RAM\n", total);
92 printk("%d reserved pages\n",reserved);
93 printk("%d pages shared\n",shared);
94 printk("%d pages swap cached\n",cached);
95 printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
100 /* References to section boundaries */
102 extern char _text, _etext, _edata, __bss_start, _end;
103 extern char __init_begin, __init_end;
107 static void *spp_getpage(void)
111 ptr = (void *) get_free_page(GFP_ATOMIC);
113 ptr = alloc_bootmem_low_pages(PAGE_SIZE);
115 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
119 static void set_pte_phys(unsigned long vaddr,
120 unsigned long phys, pgprot_t prot)
127 level4 = pml4_offset_k(vaddr);
128 if (pml4_none(*level4)) {
129 printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
132 pgd = level3_offset_k(level4, vaddr);
133 if (pgd_none(*pgd)) {
134 pmd = (pmd_t *) spp_getpage();
135 set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
136 if (pmd != pmd_offset(pgd, 0)) {
137 printk("PAGETABLE BUG #01!\n");
141 pmd = pmd_offset(pgd, vaddr);
142 if (pmd_none(*pmd)) {
143 pte = (pte_t *) spp_getpage();
144 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
145 if (pte != pte_offset(pmd, 0)) {
146 printk("PAGETABLE BUG #02!\n");
150 pte = pte_offset(pmd, vaddr);
151 set_pte(pte, mk_pte_phys(phys, prot));
154 * It's enough to flush this one mapping.
155 * (PGE mappings get flushed as well)
157 __flush_tlb_one(vaddr);
160 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
162 unsigned long address = __fix_to_virt(idx);
164 if (idx >= __end_of_fixed_addresses) {
165 printk("Invalid __set_fixmap\n");
168 set_pte_phys(address, phys, prot);
171 extern pmd_t temp_boot_pmds[];
173 unsigned long __initdata table_start, table_end;
175 static struct temp_map {
179 } temp_mappings[] __initdata = {
180 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
181 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
185 static __init void *alloc_low_page(int *index, unsigned long *phys)
189 unsigned long pfn = table_end++, paddr;
192 if (table_end >= end_pfn_map)
193 panic("alloc_low_page: ran out of page mappings");
194 for (i = 0; temp_mappings[i].allocated; i++) {
195 if (!temp_mappings[i].pmd)
196 panic("alloc_low_page: ran out of temp mappings");
198 ti = &temp_mappings[i];
199 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
200 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
203 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
205 *phys = pfn * PAGE_SIZE;
209 static __init void unmap_low_page(int i)
211 struct temp_map *ti = &temp_mappings[i];
212 set_pmd(ti->pmd, __pmd(0));
216 static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned long end)
220 i = pgd_index(address);
222 for (; i < PTRS_PER_PGD; pgd++, i++) {
224 unsigned long paddr, pmd_phys;
227 paddr = (address & PML4_MASK) + i*PGDIR_SIZE;
229 for (; i < PTRS_PER_PGD; i++, pgd++)
230 set_pgd(pgd, __pgd(0));
234 if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) {
235 set_pgd(pgd, __pgd(0));
239 pmd = alloc_low_page(&map, &pmd_phys);
240 set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
241 for (j = 0; j < PTRS_PER_PMD; pmd++, j++ , paddr += PMD_SIZE) {
245 for (; j < PTRS_PER_PMD; j++, pmd++)
246 set_pmd(pmd, __pmd(0));
249 pe = _PAGE_PSE | _KERNPG_TABLE | _PAGE_NX | _PAGE_GLOBAL | paddr;
250 pe &= __supported_pte_mask;
251 set_pmd(pmd, __pmd(pe));
258 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
259 This runs before bootmem is initialized and gets pages directly from the
260 physical memory. To access them they are temporarily mapped. */
261 void __init init_memory_mapping(void)
266 unsigned long pgds, pmds, tables;
268 end = end_pfn_map << PAGE_SHIFT;
271 * Find space for the kernel direct mapping tables.
272 * Later we should allocate these tables in the local node of the memory
273 * mapped. Unfortunately this is done currently before the nodes are
277 pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
278 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
279 tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE);
281 /* Direct mapping must currently fit below the kernel in the first MB.
282 This is because we have no way to tell the later passes to not reuse
283 the memory, until bootmem is initialised */
284 /* Should limit MAXMEM for this */
285 table_start = find_e820_area(/*0*/ 0x8000, __pa_symbol(&_text), tables);
286 if (table_start == -1UL)
287 panic("Cannot find space for the kernel page tables");
289 table_start >>= PAGE_SHIFT;
290 table_end = table_start;
292 end += __PAGE_OFFSET; /* turn virtual */
294 for (adr = PAGE_OFFSET; adr < end; adr = next) {
296 unsigned long pgd_phys;
297 pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
298 next = adr + PML4_SIZE;
302 phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET);
303 set_pml4(init_level4_pgt + pml4_index(adr),
304 mk_kernel_pml4(pgd_phys, KERNPG_TABLE));
307 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
309 printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
310 table_start<<PAGE_SHIFT,
311 table_end<<PAGE_SHIFT);
314 void __init zap_low_mappings (void)
317 for (i = 0; i < NR_CPUS; i++) {
318 if (cpu_pda[i].level4_pgt)
319 cpu_pda[i].level4_pgt[0] = 0;
325 #ifndef CONFIG_DISCONTIGMEM
326 void __init paging_init(void)
328 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
329 unsigned int max_dma;
331 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
332 if (end_pfn < max_dma)
333 zones_size[ZONE_DMA] = end_pfn;
335 zones_size[ZONE_DMA] = max_dma;
336 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
338 free_area_init(zones_size);
341 static inline int page_is_ram (unsigned long pagenr)
345 for (i = 0; i < e820.nr_map; i++) {
346 unsigned long addr, end;
348 if (e820.map[i].type != E820_RAM) /* not usable memory */
351 * !!!FIXME!!! Some BIOSen report areas as RAM that
352 * are not. Notably the 640->1Mb area. We need a sanity
355 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
356 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
357 if ((pagenr >= addr) && (pagenr < end))
364 void __init mem_init(void)
366 unsigned long codesize, reservedpages, datasize, initsize;
370 num_physpages = end_pfn; /* XXX not true because of holes */
371 high_memory = (void *) __va(end_pfn << PAGE_SHIFT);
373 /* clear the zero-page */
374 memset(empty_zero_page, 0, PAGE_SIZE);
378 /* this will put all low memory onto the freelists */
379 #ifdef CONFIG_DISCONTIGMEM
380 totalram_pages += numa_free_all_bootmem();
382 /* should count reserved pages here for all nodes */
386 totalram_pages += free_all_bootmem();
388 for (tmp = 0; tmp < end_pfn; tmp++)
390 * Only count reserved RAM pages
392 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
398 codesize = (unsigned long) &_etext - (unsigned long) &_text;
399 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
400 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
402 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
403 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
404 max_mapnr << (PAGE_SHIFT-10),
406 reservedpages << (PAGE_SHIFT-10),
411 * Subtle. SMP is doing its boot stuff late (because it has to
412 * fork idle threads) - but it also needs low mappings for the
413 * protected-mode entry to work. We zap these entries only after
414 * the WP-bit has been tested.
421 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
422 from the CPU leading to inconsistent cache lines. address and size
423 must be aligned to 2MB boundaries.
424 Does nothing when the mapping doesn't exist. */
425 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
427 unsigned long end = address + size;
429 BUG_ON(address & ~LARGE_PAGE_MASK);
430 BUG_ON(size & ~LARGE_PAGE_MASK);
432 for (; address < end; address += LARGE_PAGE_SIZE) {
433 pgd_t *pgd = pgd_offset_k(address);
434 if (!pgd || pgd_none(*pgd))
436 pmd_t *pmd = pmd_offset(pgd, address);
437 if (!pmd || pmd_none(*pmd))
439 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
440 /* Could handle this, but it should not happen currently. */
442 "clear_kernel_mapping: mapping has been split. will leak memory\n");
445 set_pmd(pmd, __pmd(0));
450 void free_initmem(void)
454 addr = (&__init_begin);
455 for (; addr < (void *)(&__init_end); addr += PAGE_SIZE) {
456 ClearPageReserved(virt_to_page(addr));
457 set_page_count(virt_to_page(addr), 1);
458 #ifdef CONFIG_INIT_DEBUG
459 memset((unsigned long)addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE);
461 free_page((unsigned long)addr);
464 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
467 #ifdef CONFIG_BLK_DEV_INITRD
468 void free_initrd_mem(unsigned long start, unsigned long end)
470 if (start < (unsigned long)&_end)
472 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
473 for (; start < end; start += PAGE_SIZE) {
474 ClearPageReserved(virt_to_page(start));
475 set_page_count(virt_to_page(start), 1);
482 void si_meminfo(struct sysinfo *val)
484 val->totalram = totalram_pages;
486 val->freeram = nr_free_pages();
487 val->bufferram = atomic_read(&buffermem_pages);
489 val->freehigh = nr_free_highpages();
490 val->mem_unit = PAGE_SIZE;
494 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
496 /* Should check here against the e820 map to avoid double free */
497 #ifdef CONFIG_DISCONTIGMEM
498 reserve_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
500 reserve_bootmem(phys, len);
505 void free_bootmem_generic(unsigned long phys, unsigned len)
507 #ifdef CONFIG_DISCONTIGMEM
508 free_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
510 free_bootmem(phys, len);