2 * X86-64 specific CPU setup.
3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog.
6 * $Id: setup64.c,v 1.27 2004/02/27 18:30:19 ak Exp $
8 #include <linux/config.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/sched.h>
12 #include <linux/string.h>
15 #include <asm/processor.h>
17 #include <asm/bitops.h>
18 #include <asm/atomic.h>
19 #include <asm/mmu_context.h>
20 #include <asm/proto.h>
23 char x86_boot_params[2048] __initdata = {0,};
25 static unsigned long cpu_initialized __initdata = 0;
27 struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
29 extern void system_call(void);
30 extern void ia32_cstar_target(void);
32 struct desc_ptr gdt_descr = { 0 /* filled in */, (unsigned long) gdt_table };
33 struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
35 /* When you change the default make sure the no EFER path below sets the
36 correct flags everywhere. */
37 unsigned long __supported_pte_mask = ~0UL;
38 static int do_not_nx __initdata = 0;
39 unsigned long vm_stack_flags = __VM_STACK_FLAGS;
40 unsigned long vm_stack_flags32 = __VM_STACK_FLAGS;
41 unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS;
42 unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS;
43 unsigned long vm_force_exec32 = PROT_EXEC;
45 char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
51 noforce (default) Don't enable by default for heap/stack/data,
52 but allow PROT_EXEC to be effective
56 int __init nonx_setup(char *str)
58 if (!strncmp(str, "on",3)) {
59 __supported_pte_mask |= _PAGE_NX;
61 vm_data_default_flags &= ~VM_EXEC;
62 vm_stack_flags &= ~VM_EXEC;
63 } else if (!strncmp(str, "noforce",7) || !strncmp(str,"off",3)) {
64 do_not_nx = (str[0] == 'o');
66 __supported_pte_mask &= ~_PAGE_NX;
67 vm_data_default_flags |= VM_EXEC;
68 vm_stack_flags |= VM_EXEC;
75 Control the no exec default for 32bit processes. Can be also overwritten
76 per executable using ELF header flags (e.g. needed for the X server)
77 Requires noexec=on or noexec=noforce to be effective.
80 all,on Heap,stack,data is non executable.
81 off (default) Heap,stack,data is executable
82 stack Stack is non executable, heap/data is.
83 force Don't imply PROT_EXEC for PROT_READ
84 compat (default) Imply PROT_EXEC for PROT_READ
87 static int __init nonx32_setup(char *str)
90 while ((s = strsep(&str, ",")) != NULL) {
91 if (!strcmp(s, "all") || !strcmp(s,"on")) {
92 vm_data_default_flags32 &= ~VM_EXEC;
93 vm_stack_flags32 &= ~VM_EXEC;
94 } else if (!strcmp(s, "off")) {
95 vm_data_default_flags32 |= VM_EXEC;
96 vm_stack_flags32 |= VM_EXEC;
97 } else if (!strcmp(s, "stack")) {
98 vm_data_default_flags32 |= VM_EXEC;
99 vm_stack_flags32 &= ~VM_EXEC;
100 } else if (!strcmp(s, "force")) {
102 } else if (!strcmp(s, "compat")) {
103 vm_force_exec32 = PROT_EXEC;
109 __setup("noexec=", nonx_setup);
110 __setup("noexec32=", nonx32_setup);
112 void pda_init(int cpu)
117 /* others are initialized in smpboot.c */
118 cpu_pda[cpu].pcurrent = init_tasks[cpu];
119 cpu_pda[cpu].irqstackptr = boot_cpu_stack;
120 level4 = init_level4_pgt;
122 cpu_pda[cpu].irqstackptr = (char *)
123 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
124 if (!cpu_pda[cpu].irqstackptr)
125 panic("cannot allocate irqstack for cpu %d\n", cpu);
126 level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
129 panic("Cannot allocate top level page for cpu %d", cpu);
131 cpu_pda[cpu].level4_pgt = (unsigned long *)level4;
132 if (level4 != init_level4_pgt)
133 memcpy(level4, &init_level4_pgt, PAGE_SIZE);
134 set_pml4(level4 + 510,
135 mk_kernel_pml4(__pa_symbol(boot_vmalloc_pgt), KERNPG_TABLE));
136 asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
138 cpu_pda[cpu].irqstackptr += IRQSTACKSIZE-64;
139 cpu_pda[cpu].cpunumber = cpu;
140 cpu_pda[cpu].irqcount = -1;
142 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
143 wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
146 void syscall_init(void)
149 * LSTAR and STAR live in a bit strange symbiosis.
150 * They both write to the same internal register. STAR allows to set CS/DS
151 * but only a 32bit target. LSTAR sets the 64bit rip.
153 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
154 wrmsrl(MSR_LSTAR, system_call);
156 #ifdef CONFIG_IA32_EMULATION
157 wrmsrl(MSR_CSTAR, ia32_cstar_target);
161 char boot_exception_stacks[N_EXCEPTION_STACKS*EXCEPTION_STKSZ];
163 void check_efer(void)
166 rdmsrl(MSR_EFER, efer);
167 if (!(efer & EFER_NX) || do_not_nx) {
168 __supported_pte_mask &= ~_PAGE_NX;
170 __supported_pte_mask |= _PAGE_NX;
175 * cpu_init() initializes state that is per-CPU. Some data is already
176 * initialized (naturally) in the bootstrap process, such as the GDT
177 * and IDT. We reload them nevertheless, this function acts as a
178 * 'CPU state barrier', nothing should get across.
179 * A lot of state is already set up in PDA init.
181 void __init cpu_init (void)
184 int nr = stack_smp_processor_id();
186 int nr = smp_processor_id();
188 struct tss_struct * t = &init_tss[nr];
190 unsigned long estack;
192 /* CPU 0 is initialised in head64.c */
196 if (test_and_set_bit(nr, &cpu_initialized))
197 panic("CPU#%d already initialized!\n", nr);
199 printk("Initializing CPU#%d\n", nr);
201 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
203 gdt_descr.size = NR_CPUS * sizeof(struct per_cpu_gdt) + __GDT_HEAD_SIZE;
205 __asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
206 __asm__ __volatile__("lidt %0": "=m" (idt_descr));
212 asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax");
218 t->io_map_base = INVALID_IO_BITMAP_OFFSET;
219 memset(t->io_bitmap, 0xff, sizeof(t->io_bitmap));
221 /* Flags to clear on syscall */
222 wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE);
224 wrmsrl(MSR_FS_BASE, 0);
225 wrmsrl(MSR_KERNEL_GS_BASE, 0);
229 * set up and load the per-CPU TSS
231 estack = (unsigned long)boot_exception_stacks + EXCEPTION_STKSZ;
232 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
235 estack += EXCEPTION_STKSZ;
237 estack = __get_free_pages(GFP_ATOMIC, EXCEPTION_STK_ORDER);
239 panic("Can't allocate exception stack %lu for CPU %d\n", v, nr);
240 t->ist[v] = estack + EXCEPTION_STKSZ;
244 atomic_inc(&init_mm.mm_count);
245 current->active_mm = &init_mm;
248 enter_lazy_tlb(&init_mm, current, nr);
255 * Clear all 6 debug registers:
266 * Force FPU initialization:
268 current->flags &= ~PF_USEDFPU;
269 current->used_math = 0;