2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
8 * $Id: entry.S,v 1.99 2003/10/24 17:48:32 ak Exp $
12 * entry.S contains the system-call and fault low-level handling routines.
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for PT_TRACESYS, signals or fork/exec et.al.
21 * - schedule it carefully for the final hardware.
26 #include <linux/config.h>
27 #include <linux/linkage.h>
28 #include <asm/segment.h>
29 #include <asm/current.h>
31 #include <asm/cache.h>
32 #include <asm/errno.h>
33 #include <asm/calling.h>
34 #include <asm/offset.h>
36 #include <asm/unistd.h>
37 #include <asm/hw_irq.h>
41 #define PDAREF(field) %gs:field
44 * C code is not supposed to know about partial frames. Everytime a C function
45 * that looks at the pt_regs is called these two macros are executed around it.
46 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
50 /* %rsp:at FRAMEEND */
51 .macro FIXUP_TOP_OF_STACK tmp
52 movq PDAREF(pda_oldrsp),\tmp
54 movq $__USER_DS,SS(%rsp)
55 movq $__USER_CS,CS(%rsp)
56 movq $-1,RCX(%rsp) /* contains return address, already in RIP */
57 movq R11(%rsp),\tmp /* get eflags */
58 movq \tmp,EFLAGS(%rsp)
61 .macro RESTORE_TOP_OF_STACK tmp,offset=0
62 movq RSP-\offset(%rsp),\tmp
63 movq \tmp,PDAREF(pda_oldrsp)
64 movq EFLAGS-\offset(%rsp),\tmp
65 movq \tmp,R11-\offset(%rsp)
70 * A newly forked process directly context switches into this.
73 movq %rax,%rdi /* return value of __switch_to -> prev task */
76 testb $PT_TRACESYS,tsk_ptrace(%rcx)
80 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
81 jz int_ret_from_sys_call
82 testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
83 jnz int_ret_from_sys_call
84 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
93 * System call entry. Upto 6 arguments in registers are supported.
95 * SYSCALL does not save anything on the stack and does not change the
96 * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the
97 * current kernel stack.
102 * rax system call number
104 * rcx return address for syscall/sysret, C arg3
107 * r10 arg3 (--> moved to rcx for C)
110 * r11 eflags for syscall/sysret, temporary for C
111 * r12-r15,rbp,rbx saved by C code, not touched.
113 * Interrupts are off on entry.
114 * Only called from user space.
119 movq %rsp,PDAREF(pda_oldrsp)
120 movq PDAREF(pda_kernelstack),%rsp
123 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
124 movq %rcx,RIP-ARGOFFSET(%rsp)
126 testl $PT_TRACESYS,tsk_ptrace(%rcx)
128 cmpq $__NR_syscall_max,%rax
131 call *sys_call_table(,%rax,8) # XXX: rip relative
132 movq %rax,RAX-ARGOFFSET(%rsp)
133 .globl ret_from_sys_call
135 sysret_with_reschedule:
138 cmpq $0,tsk_need_resched(%rcx)
139 jne sysret_reschedule
140 cmpl $0,tsk_sigpending(%rcx)
143 movq RIP-ARGOFFSET(%rsp),%rcx
144 RESTORE_ARGS 0,-ARG_SKIP,1
145 movq PDAREF(pda_oldrsp),%rsp
151 xorl %esi,%esi # oldset
152 leaq -ARGOFFSET(%rsp),%rdi # regs
153 leaq do_signal(%rip),%rax
154 call ptregscall_common
158 cmpq $0,tsk_need_resched(%rcx)
159 je sysret_restore_args
162 jmp sysret_signal_test
167 jmp sysret_with_reschedule
171 movq $-ENOSYS,RAX(%rsp)
172 FIXUP_TOP_OF_STACK %rdi
175 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
177 cmpq $__NR_syscall_max,%rax
179 tracesys_call: /* backtrace marker */
180 movq %r10,%rcx /* fixup for C */
181 call *sys_call_table(,%rax,8)
182 movq %rax,RAX-ARGOFFSET(%rsp)
183 tracesys_done: /* backtrace marker */
187 RESTORE_TOP_OF_STACK %rbx
189 jmp ret_from_sys_call
192 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
193 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
194 jmp ret_from_sys_call
197 * Syscall return path ending with IRET.
198 * This can be either 64bit calls that require restoring of all registers
199 * (impossible with sysret) or 32bit calls.
201 ENTRY(int_ret_from_sys_call)
203 testl $3,CS-ARGOFFSET(%rsp)
204 je retint_restore_args
205 intret_with_reschedule:
208 cmpq $0,tsk_need_resched(%rcx)
209 jne intret_reschedule
210 cmpl $0,tsk_sigpending(%rcx)
212 jmp retint_restore_args_swapgs
217 jmp intret_with_reschedule
222 xorq %rsi,%rsi # oldset -> arg2
223 movq %rsp,%rdi # &ptregs -> arg1
229 cmpq $0,tsk_need_resched(%rcx)
230 je retint_restore_args_swapgs
233 # RED-PEN: can we lose signals here?
234 jmp intret_signal_test
237 * Certain special system calls that need to save a complete stack frame.
240 .macro PTREGSCALL label,func
243 leaq \func(%rip),%rax
244 jmp ptregscall_common
247 PTREGSCALL stub_clone, sys_clone
248 PTREGSCALL stub_fork, sys_fork
249 PTREGSCALL stub_vfork, sys_vfork
250 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
251 PTREGSCALL stub_sigaltstack, sys_sigaltstack
253 .macro PTREGSCALL3 label,func,arg
256 leaq \func(%rip),%rax
257 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
258 jmp ptregscall_common
261 PTREGSCALL3 stub_iopl, sys_iopl, %rsi
263 ENTRY(ptregscall_common)
267 FIXUP_TOP_OF_STACK %r11
269 RESTORE_TOP_OF_STACK %r11
279 FIXUP_TOP_OF_STACK %r11
282 testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
284 RESTORE_TOP_OF_STACK %r11
293 jmp int_ret_from_sys_call
296 * sigreturn is special because it needs to restore all registers on return.
297 * This cannot be done with SYSRET, so use the IRET return path instead.
299 ENTRY(stub_rt_sigreturn)
302 FIXUP_TOP_OF_STACK %r11
303 call sys_rt_sigreturn
304 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
306 jmp int_ret_from_sys_call
309 * Interrupt entry/exit.
311 * Interrupt entry points save only callee clobbered registers, except
314 * Entry runs with interrupts off.
317 /* 0(%rsp): interrupt number */
318 ENTRY(common_interrupt)
319 testl $3,16(%rsp) # from kernel?
323 #ifdef CONFIG_X86_REMOTE_DEBUG
328 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
330 addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount
331 movq PDAREF(pda_irqstackptr),%rax
333 pushq %rdi # save old stack
335 /* 0(%rsp): oldrsp-ARGOFFSET */
339 subl $1,PDAREF(pda_irqcount)
340 leaq ARGOFFSET(%rdi),%rsp
341 testl $3,CS(%rdi) # from kernel?
342 je retint_restore_args
343 /* Interrupt came from user space */
344 retint_with_reschedule:
346 cmpq $0,tsk_need_resched(%rcx)
347 jne retint_reschedule
348 cmpl $0,tsk_sigpending(%rcx)
350 retint_restore_args_swapgs:
356 .section __ex_table,"a"
358 .quad iret_label,bad_iret
361 /* force a signal here? this matches i386 behaviour */
363 /* runs with kernelgs again */
364 movq $-9999,%rdi /* better code? */
371 movq $-1,ORIG_RAX(%rsp)
372 xorq %rsi,%rsi # oldset
373 movq %rsp,%rdi # &pt_regs
379 cmpq $0,tsk_need_resched(%rcx)
380 je retint_restore_args_swapgs
383 jmp retint_signal_test
389 jmp retint_with_reschedule
391 /* IF:off, stack contains irq number on origrax */
407 1: addl $1,%gs:pda_irqcount
408 movq %gs:pda_irqstackptr,%rax
413 .macro BUILD_SMP_INTERRUPT x,v
422 BUILD_SMP_INTERRUPT reschedule_interrupt,RESCHEDULE_VECTOR
423 BUILD_SMP_INTERRUPT invalidate_interrupt,INVALIDATE_TLB_VECTOR
424 BUILD_SMP_INTERRUPT call_function_interrupt,CALL_FUNCTION_VECTOR
426 #ifdef CONFIG_X86_LOCAL_APIC
427 BUILD_SMP_INTERRUPT apic_timer_interrupt,LOCAL_TIMER_VECTOR
428 BUILD_SMP_INTERRUPT error_interrupt,ERROR_APIC_VECTOR
429 BUILD_SMP_INTERRUPT spurious_interrupt,SPURIOUS_APIC_VECTOR
434 * Exception entry points.
437 pushq $0 /* push error code/oldrax */
438 pushq %rax /* push real oldrax to the rdi slot */
443 .macro errorentry sym
450 * Exception entry point. This expects an error code/orig_rax on the stack
451 * and the exception handler in %rax.
455 /* rdi slot contains rax, oldrax contains error code */
457 movq 8(%rsp),%rsi /* load rax */
460 pushq %rsi /* store rax */
474 movq ORIG_RAX(%rsp),%rsi /* get error code */
475 movq $-1,ORIG_RAX(%rsp)
477 /* r15d: swapgs flag */
484 cmpq $0,tsk_need_resched(%rcx)
486 cmpl $0,tsk_sigpending(%rcx)
488 error_restore_swapgs:
492 jmp retint_restore_args
507 cmpq $0,tsk_need_resched(%rcx)
508 je error_restore_swapgs
511 jmp error_signal_test
515 /* There are two places in the kernel that can potentially fault with
516 usergs. Handle them here. */
517 leaq iret_label(%rip),%rdx
520 /* check truncated address too. This works around a CPU issue */
521 movl %edx,%edx /* zero extend */
524 cmpq $gs_change,RIP(%rsp)
526 /* iret_label and gs_change are handled by exception handlers
527 and the exit points run with kernelgs again */
531 /* Reload gs selector with exception handling */
532 /* edi: new selector */
539 2: mfence /* workaround for opteron errata #88 */
544 .section __ex_table,"a"
546 .quad gs_change,bad_gs
555 * Create a kernel thread.
557 * C extern interface:
558 * extern long arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
560 * asm input arguments:
561 * rdi: fn, rsi: arg, rdx: flags
563 ENTRY(arch_kernel_thread)
564 FAKE_STACK_FRAME $child_rip
567 # rdi: flags, rsi: usp, rdx: will be &pt_regs
577 # save retval on the stack so it's popped before `ret`
581 * It isn't worth to check for reschedule here,
582 * so internally to the x86_64 port you can rely on kernel_thread()
583 * not to reschedule the child before returning, this avoids the need
584 * of hacks for example to fork off the per-CPU idle tasks.
585 * [Hopefully no generic code relies on the reschedule -AK]
593 * Here we are in the child and the registers are set as they were
594 * at kernel_thread() invocation in the parent.
604 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
606 * C extern interface:
607 * extern long execve(char *name, char **argv, char **envp)
609 * asm input arguments:
610 * rdi: name, rsi: argv, rdx: envp
612 * We want to fallback into:
613 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
615 * do_sys_execve asm fallback arguments:
616 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
625 je int_ret_from_sys_call
631 errorentry do_page_fault
633 ENTRY(coprocessor_error)
634 zeroentry do_coprocessor_error
636 ENTRY(simd_coprocessor_error)
637 zeroentry do_simd_coprocessor_error
639 ENTRY(device_not_available)
647 leaq math_state_restore(%rip),%rcx
648 leaq math_emulate(%rip),%rbx
662 /* NMI could happen inside the critical section of a swapgs,
663 so it is needed to use this expensive way to check.
664 Rely on arch_prctl forbiding user space from setting a negative
665 GS. Only the kernel value is negative. */
666 movl $MSR_GS_BASE,%ecx
685 zeroentry do_overflow
691 zeroentry do_invalid_op
693 ENTRY(coprocessor_segment_overrun)
694 zeroentry do_coprocessor_segment_overrun
697 zeroentry do_reserved
700 errorentry do_double_fault
703 errorentry do_invalid_TSS
705 ENTRY(segment_not_present)
706 errorentry do_segment_not_present
709 errorentry do_stack_segment
711 ENTRY(general_protection)
712 errorentry do_general_protection
714 ENTRY(alignment_check)
715 errorentry do_alignment_check
718 zeroentry do_divide_error
720 ENTRY(spurious_interrupt_bug)
721 zeroentry do_spurious_interrupt_bug
724 zeroentry do_machine_check
727 zeroentry do_call_debug