2 * linux/arch/x86_64/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002 Andi Kleen <ak@suse.de>
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #include <linux/blk.h>
23 #include <linux/pagemap.h>
24 #include <linux/bootmem.h>
26 #include <asm/processor.h>
27 #include <asm/system.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
30 #include <asm/pgalloc.h>
32 #include <asm/fixmap.h>
37 #include <asm/mmu_context.h>
38 #include <asm/proto.h>
40 mmu_gather_t mmu_gathers[NR_CPUS];
42 static unsigned long totalram_pages;
44 int do_check_pgt_cache(int low, int high)
47 if(read_pda(pgtable_cache_sz) > high) {
49 if (read_pda(pgd_quick)) {
50 pgd_free_slow(pgd_alloc_one_fast());
53 if (read_pda(pmd_quick)) {
54 pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
57 if (read_pda(pte_quick)) {
58 pte_free_slow(pte_alloc_one_fast(NULL, 0));
61 } while(read_pda(pgtable_cache_sz) > low);
66 #ifndef CONFIG_DISCONTIGMEM
68 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69 * physical space so we can cache the place of the first one and move
70 * around without checking the pgd every time.
75 int i, total = 0, reserved = 0;
76 int shared = 0, cached = 0;
78 printk("Mem-info:\n");
80 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
84 if (PageReserved(mem_map+i))
86 else if (PageSwapCache(mem_map+i))
88 else if (page_count(mem_map+i))
89 shared += page_count(mem_map+i) - 1;
91 printk("%d pages of RAM\n", total);
92 printk("%d reserved pages\n",reserved);
93 printk("%d pages shared\n",shared);
94 printk("%d pages swap cached\n",cached);
95 printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
100 /* References to section boundaries */
102 extern char _text, _etext, _edata, __bss_start, _end;
103 extern char __init_begin, __init_end;
107 static void *spp_getpage(void)
111 ptr = (void *) get_free_page(GFP_ATOMIC);
113 ptr = alloc_bootmem_low_pages(PAGE_SIZE);
115 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
119 static void set_pte_phys(unsigned long vaddr,
120 unsigned long phys, pgprot_t prot)
127 level4 = pml4_offset_k(vaddr);
128 if (pml4_none(*level4)) {
129 printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
132 pgd = level3_offset_k(level4, vaddr);
133 if (pgd_none(*pgd)) {
134 pmd = (pmd_t *) spp_getpage();
135 set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
136 if (pmd != pmd_offset(pgd, 0)) {
137 printk("PAGETABLE BUG #01!\n");
141 pmd = pmd_offset(pgd, vaddr);
142 if (pmd_none(*pmd)) {
143 pte = (pte_t *) spp_getpage();
144 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
145 if (pte != pte_offset(pmd, 0)) {
146 printk("PAGETABLE BUG #02!\n");
150 pte = pte_offset(pmd, vaddr);
151 set_pte(pte, mk_pte_phys(phys, prot));
154 * It's enough to flush this one mapping.
155 * (PGE mappings get flushed as well)
157 __flush_tlb_one(vaddr);
160 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
162 unsigned long address = __fix_to_virt(idx);
164 if (idx >= __end_of_fixed_addresses) {
165 printk("Invalid __set_fixmap\n");
168 set_pte_phys(address, phys, prot);
171 extern pmd_t temp_boot_pmds[];
173 unsigned long __initdata table_start, table_end;
175 static struct temp_map {
179 } temp_mappings[] __initdata = {
180 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
181 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
185 static __init void *alloc_low_page(int *index, unsigned long *phys)
189 unsigned long pfn = table_end++, paddr;
192 if (table_end >= end_pfn_map)
193 panic("alloc_low_page: ran out of page mappings");
194 for (i = 0; temp_mappings[i].allocated; i++) {
195 if (!temp_mappings[i].pmd)
196 panic("alloc_low_page: ran out of temp mappings");
198 ti = &temp_mappings[i];
199 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
200 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
203 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
205 *phys = pfn * PAGE_SIZE;
209 static __init void unmap_low_page(int i)
211 struct temp_map *ti = &temp_mappings[i];
212 set_pmd(ti->pmd, __pmd(0));
216 static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned long end)
220 i = pgd_index(address);
222 for (; i < PTRS_PER_PGD; pgd++, i++) {
224 unsigned long paddr, pmd_phys;
227 paddr = (address & PML4_MASK) + i*PGDIR_SIZE;
229 for (; i < PTRS_PER_PGD; i++, pgd++)
230 set_pgd(pgd, __pgd(0));
234 if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) {
235 set_pgd(pgd, __pgd(0));
239 pmd = alloc_low_page(&map, &pmd_phys);
240 set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
241 for (j = 0; j < PTRS_PER_PMD; pmd++, j++ , paddr += PMD_SIZE) {
245 for (; j < PTRS_PER_PMD; j++, pmd++)
246 set_pmd(pmd, __pmd(0));
249 pe = _PAGE_PSE | _KERNPG_TABLE | _PAGE_NX | _PAGE_GLOBAL | paddr;
250 pe &= __supported_pte_mask;
251 set_pmd(pmd, __pmd(pe));
258 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
259 This runs before bootmem is initialized and gets pages directly from the
260 physical memory. To access them they are temporarily mapped. */
261 void __init init_memory_mapping(void)
266 unsigned long pgds, pmds, tables;
268 end = end_pfn_map << PAGE_SHIFT;
271 * Find space for the kernel direct mapping tables.
272 * Later we should allocate these tables in the local node of the memory
273 * mapped. Unfortunately this is done currently before the nodes are
277 pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
278 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
279 tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE);
281 /* Direct mapping must currently fit below the kernel in the first MB.
282 This is because we have no way to tell the later passes to not reuse
283 the memory, until bootmem is initialised */
284 /* Should limit MAXMEM for this */
285 table_start = find_e820_area(/*0*/ 0x8000, __pa_symbol(&_text), tables);
286 if (table_start == -1UL)
287 panic("Cannot find space for the kernel page tables");
289 table_start >>= PAGE_SHIFT;
290 table_end = table_start;
292 end += __PAGE_OFFSET; /* turn virtual */
294 for (adr = PAGE_OFFSET; adr < end; adr = next) {
296 unsigned long pgd_phys;
297 pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
298 next = adr + PML4_SIZE;
302 phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET);
303 set_pml4(init_level4_pgt + pml4_index(adr),
304 mk_kernel_pml4(pgd_phys, KERNPG_TABLE));
307 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
309 printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
310 table_start<<PAGE_SHIFT,
311 table_end<<PAGE_SHIFT);
314 void __init zap_low_mappings (void)
317 for (i = 0; i < NR_CPUS; i++) {
318 if (cpu_pda[i].level4_pgt)
319 cpu_pda[i].level4_pgt[0] = 0;
325 #ifndef CONFIG_DISCONTIGMEM
326 void __init paging_init(void)
328 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
329 unsigned int max_dma;
331 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
332 if (end_pfn < max_dma)
333 zones_size[ZONE_DMA] = end_pfn;
335 zones_size[ZONE_DMA] = max_dma;
336 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
338 free_area_init(zones_size);
341 static inline int page_is_ram (unsigned long pagenr)
345 for (i = 0; i < e820.nr_map; i++) {
346 unsigned long addr, end;
348 if (e820.map[i].type != E820_RAM) /* not usable memory */
351 * !!!FIXME!!! Some BIOSen report areas as RAM that
352 * are not. Notably the 640->1Mb area. We need a sanity
355 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
356 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
357 if ((pagenr >= addr) && (pagenr < end))
364 void __init mem_init(void)
366 unsigned long codesize, reservedpages, datasize, initsize;
370 num_physpages = end_pfn; /* XXX not true because of holes */
371 high_memory = (void *) __va(end_pfn << PAGE_SHIFT);
373 /* clear the zero-page */
374 memset(empty_zero_page, 0, PAGE_SIZE);
378 /* this will put all low memory onto the freelists */
379 #ifdef CONFIG_DISCONTIGMEM
380 totalram_pages += numa_free_all_bootmem();
382 /* should count reserved pages here for all nodes */
386 totalram_pages += free_all_bootmem();
388 for (tmp = 0; tmp < end_pfn; tmp++)
390 * Only count reserved RAM pages
392 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
398 codesize = (unsigned long) &_etext - (unsigned long) &_text;
399 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
400 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
402 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
403 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
404 max_mapnr << (PAGE_SHIFT-10),
406 reservedpages << (PAGE_SHIFT-10),
411 * Subtle. SMP is doing its boot stuff late (because it has to
412 * fork idle threads) - but it also needs low mappings for the
413 * protected-mode entry to work. We zap these entries only after
414 * the WP-bit has been tested.
421 void __init __map_kernel_range(void *address, int len, pgprot_t prot)
424 void *end = address + len;
425 BUG_ON((pgprot_val(prot) & _PAGE_PSE) == 0);
426 address = (void *)((unsigned long)address & LARGE_PAGE_MASK);
427 for (; address < end; address += LARGE_PAGE_SIZE) {
432 pml4 = pml4_offset_k((unsigned long) address);
433 if (pml4_none(*pml4)) {
434 void *p = (void *)get_zeroed_page(GFP_KERNEL);
435 if (!p) panic("Cannot map kernel range");
436 for (i = 0; i < smp_num_cpus; i++) {
437 set_pml4((pml4_t *)(cpu_pda[i].level4_pgt) +
438 pml4_index((unsigned long)address),
439 mk_kernel_pml4(virt_to_phys(p),KERNPG_TABLE));
442 pgd = pgd_offset_k((unsigned long)address);
443 if (pgd_none(*pgd)) {
444 void *p = (void *)get_zeroed_page(GFP_KERNEL);
445 if (!p) panic("Cannot map kernel range");
446 set_pgd(pgd, __mk_pgd(virt_to_phys(p), KERNPG_TABLE));
448 pmd = pmd_offset(pgd, (unsigned long) address);
449 set_pmd(pmd, __mk_pmd(virt_to_phys(address), prot));
454 void free_initmem(void)
458 addr = (&__init_begin);
459 for (; addr < (void *)(&__init_end); addr += PAGE_SIZE) {
460 ClearPageReserved(virt_to_page(addr));
461 set_page_count(virt_to_page(addr), 1);
462 #ifdef CONFIG_INIT_DEBUG
463 memset((unsigned long)addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE);
465 free_page((unsigned long)addr);
468 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
471 #ifdef CONFIG_BLK_DEV_INITRD
472 void free_initrd_mem(unsigned long start, unsigned long end)
474 if (start < (unsigned long)&_end)
476 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
477 for (; start < end; start += PAGE_SIZE) {
478 ClearPageReserved(virt_to_page(start));
479 set_page_count(virt_to_page(start), 1);
486 void si_meminfo(struct sysinfo *val)
488 val->totalram = totalram_pages;
490 val->freeram = nr_free_pages();
491 val->bufferram = atomic_read(&buffermem_pages);
493 val->freehigh = nr_free_highpages();
494 val->mem_unit = PAGE_SIZE;
498 void reserve_bootmem_generic(unsigned long phys, unsigned len)
500 /* Should check here against the e820 map to avoid double free */
501 #ifdef CONFIG_DISCONTIGMEM
502 reserve_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
504 reserve_bootmem(phys, len);
509 void free_bootmem_generic(unsigned long phys, unsigned len)
511 #ifdef CONFIG_DISCONTIGMEM
512 free_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
514 free_bootmem(phys, len);