2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
44 #include <linux/elf.h>
46 static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
47 static int load_elf_library(struct file*);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
52 #define elf_addr_t unsigned long
56 * If we don't support core dumping, then supply a NULL so we
59 #ifdef USE_ELF_CORE_DUMP
60 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
62 #define elf_core_dump NULL
65 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
66 # define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
68 # define ELF_MIN_ALIGN PAGE_SIZE
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
75 static struct linux_binfmt elf_format = {
76 .module = THIS_MODULE,
77 .load_binary = load_elf_binary,
78 .load_shlib = load_elf_library,
79 .core_dump = elf_core_dump,
80 .min_coredump = ELF_EXEC_PAGESIZE
83 #define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
85 static int set_brk(unsigned long start, unsigned long end)
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
90 unsigned long addr = do_brk(start, end - start);
94 current->mm->start_brk = current->mm->brk = end;
99 /* We need to explicitly zero any fractional pages
100 after the data section (i.e. bss). This would
101 contain the junk from the file that should not
105 static void padzero(unsigned long elf_bss)
109 nbyte = ELF_PAGEOFFSET(elf_bss);
111 nbyte = ELF_MIN_ALIGN - nbyte;
112 clear_user((void __user *) elf_bss, nbyte);
116 /* Let's use some macros to make this stack manipulation a litle clearer */
117 #ifdef CONFIG_STACK_GROWSUP
118 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
119 #define STACK_ROUND(sp, items) \
120 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
121 #define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
123 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
124 #define STACK_ROUND(sp, items) \
125 (((unsigned long) (sp - items)) &~ 15UL)
126 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
130 create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
131 int interp_aout, unsigned long load_addr,
132 unsigned long interp_load_addr)
134 unsigned long p = bprm->p;
135 int argc = bprm->argc;
136 int envc = bprm->envc;
137 elf_addr_t __user *argv;
138 elf_addr_t __user *envp;
139 elf_addr_t __user *sp;
140 elf_addr_t __user *u_platform;
141 const char *k_platform = ELF_PLATFORM;
143 elf_addr_t *elf_info;
145 struct task_struct *tsk = current;
148 * If this architecture has a platform capability string, copy it
149 * to userspace. In some cases (Sparc), this info is impossible
150 * for userspace to get any other way, in others (i386) it is
156 size_t len = strlen(k_platform) + 1;
160 * In some cases (e.g. Hyper-Threading), we want to avoid L1
161 * evictions by the processes running on the same package. One
162 * thing we can do is to shuffle the initial stack for them.
164 * The conditionals here are unneeded, but kept in to make the
165 * code behaviour the same as pre change unless we have
166 * hyperthreaded processors. This should be cleaned up
170 if (smp_num_siblings > 1)
171 STACK_ALLOC(p, ((current->pid % 64) << 7));
173 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
174 __copy_to_user(u_platform, k_platform, len);
177 /* Create the ELF interpreter info */
178 elf_info = (elf_addr_t *) current->mm->saved_auxv;
179 #define NEW_AUX_ENT(id, val) \
180 do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
184 * ARCH_DLINFO must come first so PPC can do its special alignment of
189 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
190 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
191 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
192 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
193 NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
194 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
195 NEW_AUX_ENT(AT_BASE, interp_load_addr);
196 NEW_AUX_ENT(AT_FLAGS, 0);
197 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
198 NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
199 NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
200 NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
201 NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
202 NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
204 NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
206 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
207 NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
210 /* AT_NULL is zero; clear the rest too */
211 memset(&elf_info[ei_index], 0,
212 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
214 /* And advance past the AT_NULL entry. */
217 sp = STACK_ADD(p, ei_index);
219 items = (argc + 1) + (envc + 1);
221 items += 3; /* a.out interpreters require argv & envp too */
223 items += 1; /* ELF interpreters only put argc on the stack */
225 bprm->p = STACK_ROUND(sp, items);
227 /* Point sp at the lowest address on the stack */
228 #ifdef CONFIG_STACK_GROWSUP
229 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
230 bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
232 sp = (elf_addr_t __user *)bprm->p;
235 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
236 __put_user(argc, sp++);
239 envp = argv + argc + 1;
240 __put_user((elf_addr_t)(unsigned long)argv, sp++);
241 __put_user((elf_addr_t)(unsigned long)envp, sp++);
244 envp = argv + argc + 1;
247 /* Populate argv and envp */
248 p = current->mm->arg_start;
251 __put_user((elf_addr_t)p, argv++);
252 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
253 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
258 current->mm->arg_end = current->mm->env_start = p;
261 __put_user((elf_addr_t)p, envp++);
262 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
263 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
268 current->mm->env_end = p;
270 /* Put the elf_info on the stack in the right place. */
271 sp = (elf_addr_t __user *)envp + 1;
272 copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t));
277 static unsigned long elf_map(struct file *filep, unsigned long addr,
278 struct elf_phdr *eppnt, int prot, int type)
280 unsigned long map_addr;
282 down_write(¤t->mm->mmap_sem);
283 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
284 eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type,
285 eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
286 up_write(¤t->mm->mmap_sem);
290 #endif /* !elf_map */
292 /* This is much more generalized than the library routine read function,
293 so we keep this separate. Technically the library read function
294 is only provided so that we can read a.out libraries that have
297 static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
298 struct file * interpreter,
299 unsigned long *interp_load_addr)
301 struct elf_phdr *elf_phdata;
302 struct elf_phdr *eppnt;
303 unsigned long load_addr = 0;
304 int load_addr_set = 0;
305 unsigned long last_bss = 0, elf_bss = 0;
306 unsigned long error = ~0UL;
309 /* First of all, some simple consistency checks */
310 if (interp_elf_ex->e_type != ET_EXEC &&
311 interp_elf_ex->e_type != ET_DYN)
313 if (!elf_check_arch(interp_elf_ex))
315 if (!interpreter->f_op || !interpreter->f_op->mmap)
319 * If the size of this structure has changed, then punt, since
320 * we will be doing the wrong thing.
322 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
324 if (interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
327 /* Now read in all of the header information */
329 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
330 if (size > ELF_MIN_ALIGN)
332 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
336 retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
342 for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
343 if (eppnt->p_type == PT_LOAD) {
344 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
346 unsigned long vaddr = 0;
347 unsigned long k, map_addr;
349 if (eppnt->p_flags & PF_R) elf_prot = PROT_READ;
350 if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
351 if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
352 vaddr = eppnt->p_vaddr;
353 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
354 elf_type |= MAP_FIXED;
356 map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
358 if (BAD_ADDR(map_addr))
361 if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
362 load_addr = map_addr - ELF_PAGESTART(vaddr);
367 * Check to see if the section's size will overflow the
368 * allowed task size. Note that p_filesz must always be
369 * <= p_memsize so it is only necessary to check p_memsz.
371 k = load_addr + eppnt->p_vaddr;
372 if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
373 eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
379 * Find the end of the file mapping for this phdr, and keep
380 * track of the largest address we see for this.
382 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
387 * Do the same thing for the memory mapping - between
388 * elf_bss and last_bss is the bss section.
390 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
397 * Now fill out the bss section. First pad the last page up
398 * to the page boundary, and then perform a mmap to make sure
399 * that there are zero-mapped pages up to and including the
403 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
405 /* Map the last of the bss segment */
406 if (last_bss > elf_bss) {
407 error = do_brk(elf_bss, last_bss - elf_bss);
412 *interp_load_addr = load_addr;
413 error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
421 static unsigned long load_aout_interp(struct exec * interp_ex,
422 struct file * interpreter)
424 unsigned long text_data, elf_entry = ~0UL;
428 current->mm->end_code = interp_ex->a_text;
429 text_data = interp_ex->a_text + interp_ex->a_data;
430 current->mm->end_data = text_data;
431 current->mm->brk = interp_ex->a_bss + text_data;
433 switch (N_MAGIC(*interp_ex)) {
436 addr = (char __user *)0;
440 offset = N_TXTOFF(*interp_ex);
441 addr = (char __user *) N_TXTADDR(*interp_ex);
447 do_brk(0, text_data);
448 if (!interpreter->f_op || !interpreter->f_op->read)
450 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
452 flush_icache_range((unsigned long)addr,
453 (unsigned long)addr + text_data);
455 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
457 elf_entry = interp_ex->a_entry;
464 * These are the functions used to load ELF style executables and shared
465 * libraries. There is no binary dependent code anywhere else.
468 #define INTERPRETER_NONE 0
469 #define INTERPRETER_AOUT 1
470 #define INTERPRETER_ELF 2
473 static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
475 struct file *interpreter = NULL; /* to shut gcc up */
476 unsigned long load_addr = 0, load_bias = 0;
477 int load_addr_set = 0;
478 char * elf_interpreter = NULL;
479 unsigned int interpreter_type = INTERPRETER_NONE;
480 unsigned char ibcs2_interpreter = 0;
482 struct elf_phdr * elf_ppnt, *elf_phdata;
483 unsigned long elf_bss, elf_brk;
487 unsigned long elf_entry, interp_load_addr = 0;
488 unsigned long start_code, end_code, start_data, end_data;
489 unsigned long reloc_func_desc = 0;
490 struct elfhdr elf_ex;
491 struct elfhdr interp_elf_ex;
492 struct exec interp_ex;
493 char passed_fileno[6];
494 struct files_struct *files;
495 int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
496 unsigned long def_flags = 0;
498 /* Get the exec-header */
499 elf_ex = *((struct elfhdr *) bprm->buf);
502 /* First of all, some simple consistency checks */
503 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
506 if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN)
508 if (!elf_check_arch(&elf_ex))
510 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
513 /* Now read in all of the header information */
516 if (elf_ex.e_phentsize != sizeof(struct elf_phdr))
518 if (elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
520 size = elf_ex.e_phnum * sizeof(struct elf_phdr);
521 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
525 retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
529 files = current->files; /* Refcounted so ok */
530 retval = unshare_files();
533 if (files == current->files) {
534 put_files_struct(files);
538 /* exec will make our files private anyway, but for the a.out
539 loader stuff we need to do it earlier */
541 retval = get_unused_fd();
544 get_file(bprm->file);
545 fd_install(elf_exec_fileno = retval, bprm->file);
547 elf_ppnt = elf_phdata;
556 for (i = 0; i < elf_ex.e_phnum; i++) {
557 if (elf_ppnt->p_type == PT_INTERP) {
558 /* This is the program interpreter used for
559 * shared libraries - for now assume that this
560 * is an a.out format binary
564 if (elf_ppnt->p_filesz > PATH_MAX)
566 elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
568 if (!elf_interpreter)
571 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
575 goto out_free_interp;
576 /* If the program interpreter is one of these two,
577 * then assume an iBCS2 image. Otherwise assume
578 * a native linux image.
580 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
581 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
582 ibcs2_interpreter = 1;
585 * The early SET_PERSONALITY here is so that the lookup
586 * for the interpreter happens in the namespace of the
587 * to-be-execed image. SET_PERSONALITY can select an
590 * However, SET_PERSONALITY is NOT allowed to switch
591 * this task into the new images's memory mapping
592 * policy - that is, TASK_SIZE must still evaluate to
593 * that which is appropriate to the execing application.
594 * This is because exit_mmap() needs to have TASK_SIZE
595 * evaluate to the size of the old image.
597 * So if (say) a 64-bit application is execing a 32-bit
598 * application it is the architecture's responsibility
599 * to defer changing the value of TASK_SIZE until the
600 * switch really is going to happen - do this in
601 * flush_thread(). - akpm
603 SET_PERSONALITY(elf_ex, ibcs2_interpreter);
605 interpreter = open_exec(elf_interpreter);
606 retval = PTR_ERR(interpreter);
607 if (IS_ERR(interpreter))
608 goto out_free_interp;
609 retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
611 goto out_free_dentry;
613 /* Get the exec headers */
614 interp_ex = *((struct exec *) bprm->buf);
615 interp_elf_ex = *((struct elfhdr *) bprm->buf);
621 elf_ppnt = elf_phdata;
622 for (i = 0; i < elf_ex.e_phnum; i++, elf_ppnt++)
623 if (elf_ppnt->p_type == PT_GNU_STACK) {
624 if (elf_ppnt->p_flags & PF_X)
625 executable_stack = EXSTACK_ENABLE_X;
627 executable_stack = EXSTACK_DISABLE_X;
630 have_pt_gnu_stack = (i < elf_ex.e_phnum);
632 /* Some simple consistency checks for the interpreter */
633 if (elf_interpreter) {
634 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
636 /* Now figure out which format our binary is */
637 if ((N_MAGIC(interp_ex) != OMAGIC) &&
638 (N_MAGIC(interp_ex) != ZMAGIC) &&
639 (N_MAGIC(interp_ex) != QMAGIC))
640 interpreter_type = INTERPRETER_ELF;
642 if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
643 interpreter_type &= ~INTERPRETER_ELF;
646 if (!interpreter_type)
647 goto out_free_dentry;
649 /* Make sure only one type was selected */
650 if ((interpreter_type & INTERPRETER_ELF) &&
651 interpreter_type != INTERPRETER_ELF) {
652 // FIXME - ratelimit this before re-enabling
653 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
654 interpreter_type = INTERPRETER_ELF;
656 /* Verify the interpreter has a valid arch */
657 if ((interpreter_type == INTERPRETER_ELF) &&
658 !elf_check_arch(&interp_elf_ex))
659 goto out_free_dentry;
661 /* Executables without an interpreter also need a personality */
662 SET_PERSONALITY(elf_ex, ibcs2_interpreter);
665 /* OK, we are done with that, now set up the arg stuff,
666 and then start this sucker up */
668 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
669 char *passed_p = passed_fileno;
670 sprintf(passed_fileno, "%d", elf_exec_fileno);
672 if (elf_interpreter) {
673 retval = copy_strings_kernel(1, &passed_p, bprm);
675 goto out_free_dentry;
680 /* Flush all traces of the currently running executable */
681 retval = flush_old_exec(bprm);
683 goto out_free_dentry;
685 /* Discard our unneeded old files struct */
688 put_files_struct(files);
692 /* OK, This is the point of no return */
693 current->mm->start_data = 0;
694 current->mm->end_data = 0;
695 current->mm->end_code = 0;
696 current->mm->mmap = NULL;
697 current->flags &= ~PF_FORKNOEXEC;
698 current->mm->def_flags = def_flags;
700 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
701 may depend on the personality. */
702 SET_PERSONALITY(elf_ex, ibcs2_interpreter);
703 if (elf_read_implies_exec(elf_ex, have_pt_gnu_stack))
704 current->personality |= READ_IMPLIES_EXEC;
706 /* Do this so that we can load the interpreter, if need be. We will
707 change some of these later */
708 current->mm->rss = 0;
709 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
710 retval = setup_arg_pages(bprm, executable_stack);
712 send_sig(SIGKILL, current, 0);
713 goto out_free_dentry;
716 current->mm->start_stack = bprm->p;
718 /* Now we do a little grungy work by mmaping the ELF image into
719 the correct location in memory. At this point, we assume that
720 the image should be loaded at fixed address, not at a variable
723 for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
724 int elf_prot = 0, elf_flags;
725 unsigned long k, vaddr;
727 if (elf_ppnt->p_type != PT_LOAD)
730 if (unlikely (elf_brk > elf_bss)) {
733 /* There was a PT_LOAD segment with p_memsz > p_filesz
734 before this one. Map anonymous pages, if needed,
735 and clear the area. */
736 retval = set_brk (elf_bss + load_bias,
737 elf_brk + load_bias);
739 send_sig(SIGKILL, current, 0);
740 goto out_free_dentry;
742 nbyte = ELF_PAGEOFFSET(elf_bss);
744 nbyte = ELF_MIN_ALIGN - nbyte;
745 if (nbyte > elf_brk - elf_bss)
746 nbyte = elf_brk - elf_bss;
747 clear_user((void __user *) elf_bss + load_bias, nbyte);
751 if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
752 if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
753 if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
755 elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
757 vaddr = elf_ppnt->p_vaddr;
758 if (elf_ex.e_type == ET_EXEC || load_addr_set) {
759 elf_flags |= MAP_FIXED;
760 } else if (elf_ex.e_type == ET_DYN) {
761 /* Try and get dynamic programs out of the way of the default mmap
762 base, as well as whatever program they might try to exec. This
763 is because the brk will follow the loader, and is not movable. */
764 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
767 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
771 if (!load_addr_set) {
773 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
774 if (elf_ex.e_type == ET_DYN) {
776 ELF_PAGESTART(load_bias + vaddr);
777 load_addr += load_bias;
778 reloc_func_desc = load_bias;
781 k = elf_ppnt->p_vaddr;
782 if (k < start_code) start_code = k;
783 if (start_data < k) start_data = k;
786 * Check to see if the section's size will overflow the
787 * allowed task size. Note that p_filesz must always be
788 * <= p_memsz so it is only necessary to check p_memsz.
790 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
791 elf_ppnt->p_memsz > TASK_SIZE ||
792 TASK_SIZE - elf_ppnt->p_memsz < k) {
793 /* set_brk can never work. Avoid overflows. */
794 send_sig(SIGKILL, current, 0);
795 goto out_free_dentry;
798 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
802 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
806 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
811 elf_ex.e_entry += load_bias;
812 elf_bss += load_bias;
813 elf_brk += load_bias;
814 start_code += load_bias;
815 end_code += load_bias;
816 start_data += load_bias;
817 end_data += load_bias;
819 /* Calling set_brk effectively mmaps the pages that we need
820 * for the bss and break sections. We must do this before
821 * mapping in the interpreter, to make sure it doesn't wind
822 * up getting placed where the bss needs to go.
824 retval = set_brk(elf_bss, elf_brk);
826 send_sig(SIGKILL, current, 0);
827 goto out_free_dentry;
831 if (elf_interpreter) {
832 if (interpreter_type == INTERPRETER_AOUT)
833 elf_entry = load_aout_interp(&interp_ex,
836 elf_entry = load_elf_interp(&interp_elf_ex,
839 if (BAD_ADDR(elf_entry)) {
840 printk(KERN_ERR "Unable to load interpreter\n");
841 send_sig(SIGSEGV, current, 0);
842 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
843 goto out_free_dentry;
845 reloc_func_desc = interp_load_addr;
847 allow_write_access(interpreter);
849 kfree(elf_interpreter);
851 elf_entry = elf_ex.e_entry;
856 if (interpreter_type != INTERPRETER_AOUT)
857 sys_close(elf_exec_fileno);
859 set_binfmt(&elf_format);
862 current->flags &= ~PF_FORKNOEXEC;
863 create_elf_tables(bprm, &elf_ex, (interpreter_type == INTERPRETER_AOUT),
864 load_addr, interp_load_addr);
865 /* N.B. passed_fileno might not be initialized? */
866 if (interpreter_type == INTERPRETER_AOUT)
867 current->mm->arg_start += strlen(passed_fileno) + 1;
868 current->mm->end_code = end_code;
869 current->mm->start_code = start_code;
870 current->mm->start_data = start_data;
871 current->mm->end_data = end_data;
872 current->mm->start_stack = bprm->p;
874 if (current->personality & MMAP_PAGE_ZERO) {
875 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
876 and some applications "depend" upon this behavior.
877 Since we do not have the power to recompile these, we
878 emulate the SVr4 behavior. Sigh. */
879 down_write(¤t->mm->mmap_sem);
880 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
881 MAP_FIXED | MAP_PRIVATE, 0);
882 up_write(¤t->mm->mmap_sem);
887 * The ABI may specify that certain registers be set up in special
888 * ways (on i386 %edx is the address of a DT_FINI function, for
889 * example. In addition, it may also specify (eg, PowerPC64 ELF)
890 * that the e_entry field is the address of the function descriptor
891 * for the startup routine, rather than the address of the startup
892 * routine itself. This macro performs whatever initialization to
893 * the regs structure is required as well as any relocations to the
894 * function descriptor entries when executing dynamically links apps.
896 ELF_PLAT_INIT(regs, reloc_func_desc);
899 start_thread(regs, elf_entry, bprm->p);
900 if (unlikely(current->ptrace & PT_PTRACED)) {
901 if (current->ptrace & PT_TRACE_EXEC)
902 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
904 send_sig(SIGTRAP, current, 0);
912 allow_write_access(interpreter);
917 kfree(elf_interpreter);
919 sys_close(elf_exec_fileno);
922 put_files_struct(current->files);
923 current->files = files;
930 /* This is really simpleminded and specialized - we are loading an
931 a.out library that is given an ELF header. */
933 static int load_elf_library(struct file *file)
935 struct elf_phdr *elf_phdata;
936 unsigned long elf_bss, bss, len;
937 int retval, error, i, j;
938 struct elfhdr elf_ex;
941 retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
942 if (retval != sizeof(elf_ex))
945 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
948 /* First of all, some simple consistency checks */
949 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
950 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
953 /* Now read in all of the header information */
955 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
956 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
959 elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
964 retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j);
968 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
969 if ((elf_phdata + i)->p_type == PT_LOAD) j++;
973 while (elf_phdata->p_type != PT_LOAD) elf_phdata++;
975 /* Now use mmap to map the library into memory. */
976 down_write(¤t->mm->mmap_sem);
977 error = do_mmap(file,
978 ELF_PAGESTART(elf_phdata->p_vaddr),
979 (elf_phdata->p_filesz +
980 ELF_PAGEOFFSET(elf_phdata->p_vaddr)),
981 PROT_READ | PROT_WRITE | PROT_EXEC,
982 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
983 (elf_phdata->p_offset -
984 ELF_PAGEOFFSET(elf_phdata->p_vaddr)));
985 up_write(¤t->mm->mmap_sem);
986 if (error != ELF_PAGESTART(elf_phdata->p_vaddr))
989 elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz;
992 len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
993 bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
995 do_brk(len, bss - len);
1005 * Note that some platforms still use traditional core dumps and not
1006 * the ELF core dump. Each platform can select it as appropriate.
1008 #ifdef USE_ELF_CORE_DUMP
1013 * Modelled on fs/exec.c:aout_core_dump()
1014 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1017 * These are the only things you should do on a core-file: use only these
1018 * functions to write out all the necessary info.
1020 static int dump_write(struct file *file, const void *addr, int nr)
1022 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1025 static int dump_seek(struct file *file, off_t off)
1027 if (file->f_op->llseek) {
1028 if (file->f_op->llseek(file, off, 0) != off)
1036 * Decide whether a segment is worth dumping; default is yes to be
1037 * sure (missing info is worse than too much; etc).
1038 * Personally I'd include everything, and use the coredump limit...
1040 * I think we should skip something. But I am not sure how. H.J.
1042 static int maydump(struct vm_area_struct *vma)
1045 * If we may not read the contents, don't allow us to dump
1046 * them either. "dump_write()" can't handle it anyway.
1048 if (!(vma->vm_flags & VM_READ))
1051 /* Do not dump I/O mapped devices! -DaveM */
1052 if (vma->vm_flags & VM_IO)
1055 if (vma->vm_flags & (VM_WRITE|VM_GROWSUP|VM_GROWSDOWN))
1057 if (vma->vm_flags & (VM_READ|VM_EXEC|VM_EXECUTABLE|VM_SHARED))
1063 #define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
1065 /* An ELF note in memory */
1070 unsigned int datasz;
1074 static int notesize(struct memelfnote *en)
1078 sz = sizeof(struct elf_note);
1079 sz += roundup(strlen(en->name) + 1, 4);
1080 sz += roundup(en->datasz, 4);
1085 #define DUMP_WRITE(addr, nr) \
1086 do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1087 #define DUMP_SEEK(off) \
1088 do { if (!dump_seek(file, (off))) return 0; } while(0)
1090 static int writenote(struct memelfnote *men, struct file *file)
1094 en.n_namesz = strlen(men->name) + 1;
1095 en.n_descsz = men->datasz;
1096 en.n_type = men->type;
1098 DUMP_WRITE(&en, sizeof(en));
1099 DUMP_WRITE(men->name, en.n_namesz);
1100 /* XXX - cast from long long to long to avoid need for libgcc.a */
1101 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1102 DUMP_WRITE(men->data, men->datasz);
1103 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1110 #define DUMP_WRITE(addr, nr) \
1111 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1113 #define DUMP_SEEK(off) \
1114 if (!dump_seek(file, (off))) \
1117 static inline void fill_elf_header(struct elfhdr *elf, int segs)
1119 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1120 elf->e_ident[EI_CLASS] = ELF_CLASS;
1121 elf->e_ident[EI_DATA] = ELF_DATA;
1122 elf->e_ident[EI_VERSION] = EV_CURRENT;
1123 elf->e_ident[EI_OSABI] = ELF_OSABI;
1124 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1126 elf->e_type = ET_CORE;
1127 elf->e_machine = ELF_ARCH;
1128 elf->e_version = EV_CURRENT;
1130 elf->e_phoff = sizeof(struct elfhdr);
1132 #ifdef ELF_CORE_EFLAGS
1133 elf->e_flags = ELF_CORE_EFLAGS;
1137 elf->e_ehsize = sizeof(struct elfhdr);
1138 elf->e_phentsize = sizeof(struct elf_phdr);
1139 elf->e_phnum = segs;
1140 elf->e_shentsize = 0;
1142 elf->e_shstrndx = 0;
1146 static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1148 phdr->p_type = PT_NOTE;
1149 phdr->p_offset = offset;
1152 phdr->p_filesz = sz;
1159 static void fill_note(struct memelfnote *note, const char *name, int type,
1160 unsigned int sz, void *data)
1170 * fill up all the fields in prstatus from the given task struct, except registers
1171 * which need to be filled up separately.
1173 static void fill_prstatus(struct elf_prstatus *prstatus,
1174 struct task_struct *p, long signr)
1176 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1177 prstatus->pr_sigpend = p->pending.signal.sig[0];
1178 prstatus->pr_sighold = p->blocked.sig[0];
1179 prstatus->pr_pid = p->pid;
1180 prstatus->pr_ppid = p->parent->pid;
1181 prstatus->pr_pgrp = process_group(p);
1182 prstatus->pr_sid = p->signal->session;
1183 jiffies_to_timeval(p->utime, &prstatus->pr_utime);
1184 jiffies_to_timeval(p->stime, &prstatus->pr_stime);
1185 jiffies_to_timeval(p->cutime, &prstatus->pr_cutime);
1186 jiffies_to_timeval(p->cstime, &prstatus->pr_cstime);
1189 static void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1190 struct mm_struct *mm)
1194 /* first copy the parameters from user space */
1195 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1197 len = mm->arg_end - mm->arg_start;
1198 if (len >= ELF_PRARGSZ)
1199 len = ELF_PRARGSZ-1;
1200 copy_from_user(&psinfo->pr_psargs,
1201 (const char __user *)mm->arg_start, len);
1202 for(i = 0; i < len; i++)
1203 if (psinfo->pr_psargs[i] == 0)
1204 psinfo->pr_psargs[i] = ' ';
1205 psinfo->pr_psargs[len] = 0;
1207 psinfo->pr_pid = p->pid;
1208 psinfo->pr_ppid = p->parent->pid;
1209 psinfo->pr_pgrp = process_group(p);
1210 psinfo->pr_sid = p->signal->session;
1212 i = p->state ? ffz(~p->state) + 1 : 0;
1213 psinfo->pr_state = i;
1214 psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDTZW"[i];
1215 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1216 psinfo->pr_nice = task_nice(p);
1217 psinfo->pr_flag = p->flags;
1218 SET_UID(psinfo->pr_uid, p->uid);
1219 SET_GID(psinfo->pr_gid, p->gid);
1220 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1225 /* Here is the structure in which status of each thread is captured. */
1226 struct elf_thread_status
1228 struct list_head list;
1229 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1230 elf_fpregset_t fpu; /* NT_PRFPREG */
1231 #ifdef ELF_CORE_COPY_XFPREGS
1232 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1234 struct memelfnote notes[3];
1239 * In order to add the specific thread information for the elf file format,
1240 * we need to keep a linked list of every threads pr_status and then
1241 * create a single section for them in the final core file.
1243 static int elf_dump_thread_status(long signr, struct task_struct * p, struct list_head * thread_list)
1246 struct elf_thread_status *t;
1249 t = kmalloc(sizeof(*t), GFP_ATOMIC);
1252 memset(t, 0, sizeof(*t));
1254 INIT_LIST_HEAD(&t->list);
1257 fill_prstatus(&t->prstatus, p, signr);
1258 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1260 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
1262 sz += notesize(&t->notes[0]);
1264 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
1265 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
1267 sz += notesize(&t->notes[1]);
1270 #ifdef ELF_CORE_COPY_XFPREGS
1271 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1272 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
1274 sz += notesize(&t->notes[2]);
1277 list_add(&t->list, thread_list);
1284 * This is a two-pass process; first we find the offsets of the bits,
1285 * and then they are actually written out. If we run out of core limit
1288 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1296 struct vm_area_struct *vma;
1297 struct elfhdr *elf = NULL;
1298 off_t offset = 0, dataoff;
1299 unsigned long limit = current->rlim[RLIMIT_CORE].rlim_cur;
1301 struct memelfnote *notes = NULL;
1302 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1303 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1304 struct task_struct *g, *p;
1305 LIST_HEAD(thread_list);
1306 struct list_head *t;
1307 elf_fpregset_t *fpu = NULL;
1308 #ifdef ELF_CORE_COPY_XFPREGS
1309 elf_fpxregset_t *xfpu = NULL;
1311 int thread_status_size = 0;
1315 * We no longer stop all VM operations.
1317 * This is because those proceses that could possibly change map_count or
1318 * the mmap / vma pages are now blocked in do_exit on current finishing
1321 * Only ptrace can touch these memory addresses, but it doesn't change
1322 * the map_count or the pages allocated. So no possibility of crashing
1323 * exists while dumping the mm->vm_next areas to the core file.
1326 /* alloc memory for large data structures: too large to be on stack */
1327 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1330 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1333 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1336 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1339 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1342 #ifdef ELF_CORE_COPY_XFPREGS
1343 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1348 /* capture the status of all other threads */
1350 read_lock(&tasklist_lock);
1352 if (current->mm == p->mm && current != p) {
1353 int sz = elf_dump_thread_status(signr, p, &thread_list);
1355 read_unlock(&tasklist_lock);
1358 thread_status_size += sz;
1360 while_each_thread(g,p);
1361 read_unlock(&tasklist_lock);
1364 /* now collect the dump for the current */
1365 memset(prstatus, 0, sizeof(*prstatus));
1366 fill_prstatus(prstatus, current, signr);
1367 elf_core_copy_regs(&prstatus->pr_reg, regs);
1369 segs = current->mm->map_count;
1370 #ifdef ELF_CORE_EXTRA_PHDRS
1371 segs += ELF_CORE_EXTRA_PHDRS;
1375 fill_elf_header(elf, segs+1); /* including notes section */
1378 current->flags |= PF_DUMPCORE;
1381 * Set up the notes in similar form to SVR4 core dumps made
1382 * with info from their /proc.
1385 fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1387 fill_psinfo(psinfo, current->group_leader, current->mm);
1388 fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1390 fill_note(notes +2, "CORE", NT_TASKSTRUCT, sizeof(*current), current);
1394 auxv = (elf_addr_t *) current->mm->saved_auxv;
1399 while (auxv[i - 2] != AT_NULL);
1400 fill_note(¬es[numnote++], "CORE", NT_AUXV,
1401 i * sizeof (elf_addr_t), auxv);
1403 /* Try to dump the FPU. */
1404 if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
1405 fill_note(notes + numnote++,
1406 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1407 #ifdef ELF_CORE_COPY_XFPREGS
1408 if (elf_core_copy_task_xfpregs(current, xfpu))
1409 fill_note(notes + numnote++,
1410 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1416 DUMP_WRITE(elf, sizeof(*elf));
1417 offset += sizeof(*elf); /* Elf header */
1418 offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */
1420 /* Write notes phdr entry */
1422 struct elf_phdr phdr;
1425 for (i = 0; i < numnote; i++)
1426 sz += notesize(notes + i);
1428 sz += thread_status_size;
1430 fill_elf_note_phdr(&phdr, sz, offset);
1432 DUMP_WRITE(&phdr, sizeof(phdr));
1435 /* Page-align dumped data */
1436 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1438 /* Write program headers for segments dump */
1439 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1440 struct elf_phdr phdr;
1443 sz = vma->vm_end - vma->vm_start;
1445 phdr.p_type = PT_LOAD;
1446 phdr.p_offset = offset;
1447 phdr.p_vaddr = vma->vm_start;
1449 phdr.p_filesz = maydump(vma) ? sz : 0;
1451 offset += phdr.p_filesz;
1452 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1453 if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
1454 if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
1455 phdr.p_align = ELF_EXEC_PAGESIZE;
1457 DUMP_WRITE(&phdr, sizeof(phdr));
1460 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1461 ELF_CORE_WRITE_EXTRA_PHDRS;
1464 /* write out the notes section */
1465 for (i = 0; i < numnote; i++)
1466 if (!writenote(notes + i, file))
1469 /* write out the thread status notes section */
1470 list_for_each(t, &thread_list) {
1471 struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
1472 for (i = 0; i < tmp->num_notes; i++)
1473 if (!writenote(&tmp->notes[i], file))
1479 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1485 for (addr = vma->vm_start;
1487 addr += PAGE_SIZE) {
1489 struct vm_area_struct *vma;
1491 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1492 &page, &vma) <= 0) {
1493 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1495 if (page == ZERO_PAGE(addr)) {
1496 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1499 flush_cache_page(vma, addr);
1501 if ((size += PAGE_SIZE) > limit ||
1502 !dump_write(file, kaddr,
1505 page_cache_release(page);
1510 page_cache_release(page);
1515 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1516 ELF_CORE_WRITE_EXTRA_DATA;
1519 if ((off_t) file->f_pos != offset) {
1521 printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1522 (off_t) file->f_pos, offset);
1529 while(!list_empty(&thread_list)) {
1530 struct list_head *tmp = thread_list.next;
1532 kfree(list_entry(tmp, struct elf_thread_status, list));
1540 #ifdef ELF_CORE_COPY_XFPREGS
1547 #endif /* USE_ELF_CORE_DUMP */
1549 static int __init init_elf_binfmt(void)
1551 return register_binfmt(&elf_format);
1554 static void __exit exit_elf_binfmt(void)
1556 /* Remove the COFF and ELF loaders. */
1557 unregister_binfmt(&elf_format);
1560 core_initcall(init_elf_binfmt);
1561 module_exit(exit_elf_binfmt);
1562 MODULE_LICENSE("GPL");