original comment: +Wilson03172004,marked due to this pci host does not support MWI
[linux-2.4.git] / fs / exec.c
1 /*
2  *  linux/fs/exec.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 /*
8  * #!-checking implemented by tytso.
9  */
10 /*
11  * Demand-loading implemented 01.12.91 - no need to read anything but
12  * the header into memory. The inode of the executable is put into
13  * "current->executable", and page faults do the actual loading. Clean.
14  *
15  * Once more I can proudly say that linux stood up to being changed: it
16  * was less than 2 hours work to get demand-loading completely implemented.
17  *
18  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
19  * current->executable is only used by the procfs.  This allows a dispatch
20  * table to check for several different types  of binary formats.  We keep
21  * trying until we recognize the file or we run out of supported binary
22  * formats. 
23  */
24
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/pagemap.h>
35 #include <linux/highmem.h>
36 #include <linux/spinlock.h>
37 #include <linux/personality.h>
38 #include <linux/swap.h>
39 #include <linux/utsname.h>
40 #define __NO_VERSION__
41 #include <linux/module.h>
42
43 #include <asm/uaccess.h>
44 #include <asm/pgalloc.h>
45 #include <asm/mmu_context.h>
46
47 #ifdef CONFIG_KMOD
48 #include <linux/kmod.h>
49 #endif
50
51 int core_uses_pid;
52 char core_pattern[65] = "core";
53 int core_setuid_ok = 0;
54 /* The maximal length of core_pattern is also specified in sysctl.c */ 
55
56 static struct linux_binfmt *formats;
57 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
58
59 int register_binfmt(struct linux_binfmt * fmt)
60 {
61         struct linux_binfmt ** tmp = &formats;
62
63         if (!fmt)
64                 return -EINVAL;
65         if (fmt->next)
66                 return -EBUSY;
67         write_lock(&binfmt_lock);
68         while (*tmp) {
69                 if (fmt == *tmp) {
70                         write_unlock(&binfmt_lock);
71                         return -EBUSY;
72                 }
73                 tmp = &(*tmp)->next;
74         }
75         fmt->next = formats;
76         formats = fmt;
77         write_unlock(&binfmt_lock);
78         return 0;       
79 }
80
81 int unregister_binfmt(struct linux_binfmt * fmt)
82 {
83         struct linux_binfmt ** tmp = &formats;
84
85         write_lock(&binfmt_lock);
86         while (*tmp) {
87                 if (fmt == *tmp) {
88                         *tmp = fmt->next;
89                         write_unlock(&binfmt_lock);
90                         return 0;
91                 }
92                 tmp = &(*tmp)->next;
93         }
94         write_unlock(&binfmt_lock);
95         return -EINVAL;
96 }
97
98 static inline void put_binfmt(struct linux_binfmt * fmt)
99 {
100         if (fmt->module)
101                 __MOD_DEC_USE_COUNT(fmt->module);
102 }
103
104 /*
105  * Note that a shared library must be both readable and executable due to
106  * security reasons.
107  *
108  * Also note that we take the address to load from from the file itself.
109  */
110 asmlinkage long sys_uselib(const char * library)
111 {
112         struct file * file;
113         struct nameidata nd;
114         int error;
115
116         error = user_path_walk(library, &nd);
117         if (error)
118                 goto out;
119
120         error = -EINVAL;
121         if (!S_ISREG(nd.dentry->d_inode->i_mode))
122                 goto exit;
123
124         error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
125         if (error)
126                 goto exit;
127
128         file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
129         error = PTR_ERR(file);
130         if (IS_ERR(file))
131                 goto out;
132
133         error = -ENOEXEC;
134         if(file->f_op && file->f_op->read) {
135                 struct linux_binfmt * fmt;
136
137                 read_lock(&binfmt_lock);
138                 for (fmt = formats ; fmt ; fmt = fmt->next) {
139                         if (!fmt->load_shlib)
140                                 continue;
141                         if (!try_inc_mod_count(fmt->module))
142                                 continue;
143                         read_unlock(&binfmt_lock);
144                         error = fmt->load_shlib(file);
145                         read_lock(&binfmt_lock);
146                         put_binfmt(fmt);
147                         if (error != -ENOEXEC)
148                                 break;
149                 }
150                 read_unlock(&binfmt_lock);
151         }
152         fput(file);
153 out:
154         return error;
155 exit:
156         path_release(&nd);
157         goto out;
158 }
159
160 /*
161  * count() counts the number of arguments/envelopes
162  */
163 static int count(char ** argv, int max)
164 {
165         int i = 0;
166
167         if (argv != NULL) {
168                 for (;;) {
169                         char * p;
170
171                         if (get_user(p, argv))
172                                 return -EFAULT;
173                         if (!p)
174                                 break;
175                         argv++;
176                         if(++i > max)
177                                 return -E2BIG;
178                 }
179         }
180         return i;
181 }
182
183 /*
184  * 'copy_strings()' copies argument/envelope strings from user
185  * memory to free pages in kernel mem. These are in a format ready
186  * to be put directly into the top of new user memory.
187  */
188 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
189 {
190         struct page *kmapped_page = NULL;
191         char *kaddr = NULL;
192         int ret;
193
194         while (argc-- > 0) {
195                 char *str;
196                 int len;
197                 unsigned long pos;
198
199                 if (get_user(str, argv+argc) ||
200                                 !(len = strnlen_user(str, bprm->p))) {
201                         ret = -EFAULT;
202                         goto out;
203                 }
204
205                 if (bprm->p < len)  {
206                         ret = -E2BIG;
207                         goto out;
208                 }
209
210                 bprm->p -= len;
211                 /* XXX: add architecture specific overflow check here. */ 
212                 pos = bprm->p;
213
214                 while (len > 0) {
215                         int i, new, err;
216                         int offset, bytes_to_copy;
217                         struct page *page;
218
219                         offset = pos % PAGE_SIZE;
220                         i = pos/PAGE_SIZE;
221                         page = bprm->page[i];
222                         new = 0;
223                         if (!page) {
224                                 page = alloc_page(GFP_HIGHUSER);
225                                 bprm->page[i] = page;
226                                 if (!page) {
227                                         ret = -ENOMEM;
228                                         goto out;
229                                 }
230                                 new = 1;
231                         }
232
233                         if (page != kmapped_page) {
234                                 if (kmapped_page)
235                                         kunmap(kmapped_page);
236                                 kmapped_page = page;
237                                 kaddr = kmap(kmapped_page);
238                         }
239                         if (new && offset)
240                                 memset(kaddr, 0, offset);
241                         bytes_to_copy = PAGE_SIZE - offset;
242                         if (bytes_to_copy > len) {
243                                 bytes_to_copy = len;
244                                 if (new)
245                                         memset(kaddr+offset+len, 0,
246                                                 PAGE_SIZE-offset-len);
247                         }
248                         err = copy_from_user(kaddr+offset, str, bytes_to_copy);
249                         if (err) {
250                                 ret = -EFAULT;
251                                 goto out;
252                         }
253
254                         pos += bytes_to_copy;
255                         str += bytes_to_copy;
256                         len -= bytes_to_copy;
257                 }
258         }
259         ret = 0;
260 out:
261         if (kmapped_page)
262                 kunmap(kmapped_page);
263         return ret;
264 }
265
266 /*
267  * Like copy_strings, but get argv and its values from kernel memory.
268  */
269 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
270 {
271         int r;
272         mm_segment_t oldfs = get_fs();
273         set_fs(KERNEL_DS); 
274         r = copy_strings(argc, argv, bprm);
275         set_fs(oldfs);
276         return r; 
277 }
278
279 /*
280  * This routine is used to map in a page into an address space: needed by
281  * execve() for the initial stack and environment pages.
282  *
283  * tsk->mmap_sem is held for writing.
284  */
285 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
286 {
287         pgd_t * pgd;
288         pmd_t * pmd;
289         pte_t * pte;
290         struct vm_area_struct *vma; 
291         pgprot_t prot = PAGE_COPY; 
292
293         if (page_count(page) != 1)
294                 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
295         pgd = pgd_offset(tsk->mm, address);
296
297         spin_lock(&tsk->mm->page_table_lock);
298         pmd = pmd_alloc(tsk->mm, pgd, address);
299         if (!pmd)
300                 goto out;
301         pte = pte_alloc(tsk->mm, pmd, address);
302         if (!pte)
303                 goto out;
304         if (!pte_none(*pte))
305                 goto out;
306         lru_cache_add(page);
307         flush_dcache_page(page);
308         flush_page_to_ram(page);
309         /* lookup is cheap because there is only a single entry in the list */
310         vma = find_vma(tsk->mm, address); 
311         if (vma) 
312                 prot = vma->vm_page_prot;
313         set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
314         tsk->mm->rss++;
315         spin_unlock(&tsk->mm->page_table_lock);
316
317         /* no need for flush_tlb */
318         return;
319 out:
320         spin_unlock(&tsk->mm->page_table_lock);
321         __free_page(page);
322         force_sig(SIGKILL, tsk);
323         return;
324 }
325
326 int setup_arg_pages(struct linux_binprm *bprm)
327 {
328         unsigned long stack_base;
329         struct vm_area_struct *mpnt;
330         int i, ret;
331
332         stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
333
334         bprm->p += stack_base;
335         if (bprm->loader)
336                 bprm->loader += stack_base;
337         bprm->exec += stack_base;
338
339         mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
340         if (!mpnt) 
341                 return -ENOMEM; 
342         
343         down_write(&current->mm->mmap_sem);
344         {
345                 mpnt->vm_mm = current->mm;
346                 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
347                 mpnt->vm_end = STACK_TOP;
348                 mpnt->vm_flags = VM_STACK_FLAGS;
349                 mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7];
350                 mpnt->vm_ops = NULL;
351                 mpnt->vm_pgoff = 0;
352                 mpnt->vm_file = NULL;
353                 mpnt->vm_private_data = (void *) 0;
354                 if ((ret = insert_vm_struct(current->mm, mpnt))) {
355                         up_write(&current->mm->mmap_sem);
356                         kmem_cache_free(vm_area_cachep, mpnt);
357                         return ret;
358                 }
359                 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
360         } 
361
362         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
363                 struct page *page = bprm->page[i];
364                 if (page) {
365                         bprm->page[i] = NULL;
366                         put_dirty_page(current,page,stack_base);
367                 }
368                 stack_base += PAGE_SIZE;
369         }
370         up_write(&current->mm->mmap_sem);
371         
372         return 0;
373 }
374
375 struct file *open_exec(const char *name)
376 {
377         struct nameidata nd;
378         struct inode *inode;
379         struct file *file;
380         int err = 0;
381
382         err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
383         file = ERR_PTR(err);
384         if (!err) {
385                 inode = nd.dentry->d_inode;
386                 file = ERR_PTR(-EACCES);
387                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
388                     S_ISREG(inode->i_mode)) {
389                         int err = permission(inode, MAY_EXEC);
390                         if (!err && !(inode->i_mode & 0111))
391                                 err = -EACCES;
392                         file = ERR_PTR(err);
393                         if (!err) {
394                                 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
395                                 if (!IS_ERR(file)) {
396                                         err = deny_write_access(file);
397                                         if (err) {
398                                                 fput(file);
399                                                 file = ERR_PTR(err);
400                                         }
401                                 }
402 out:
403                                 return file;
404                         }
405                 }
406                 path_release(&nd);
407         }
408         goto out;
409 }
410
411 int kernel_read(struct file *file, unsigned long offset,
412         char * addr, unsigned long count)
413 {
414         mm_segment_t old_fs;
415         loff_t pos = offset;
416         int result = -ENOSYS;
417
418         if (!file->f_op->read)
419                 goto fail;
420         old_fs = get_fs();
421         set_fs(get_ds());
422         result = file->f_op->read(file, addr, count, &pos);
423         set_fs(old_fs);
424 fail:
425         return result;
426 }
427
428 static int exec_mmap(void)
429 {
430         struct mm_struct * mm, * old_mm;
431
432         old_mm = current->mm;
433
434         if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
435                 mm_release();
436                 down_write(&old_mm->mmap_sem);
437                 exit_mmap(old_mm);
438                 up_write(&old_mm->mmap_sem);
439                 return 0;
440         }
441
442
443         mm = mm_alloc();
444         if (mm) {
445                 struct mm_struct *active_mm;
446
447                 if (init_new_context(current, mm)) {
448                         mmdrop(mm);
449                         return -ENOMEM;
450                 }
451
452                 /* Add it to the list of mm's */
453                 spin_lock(&mmlist_lock);
454                 list_add(&mm->mmlist, &init_mm.mmlist);
455                 mmlist_nr++;
456                 spin_unlock(&mmlist_lock);
457
458                 task_lock(current);
459                 active_mm = current->active_mm;
460                 current->mm = mm;
461                 current->active_mm = mm;
462                 task_unlock(current);
463                 activate_mm(active_mm, mm);
464                 mm_release();
465                 if (old_mm) {
466                         if (active_mm != old_mm) BUG();
467                         mmput(old_mm);
468                         return 0;
469                 }
470                 mmdrop(active_mm);
471                 return 0;
472         }
473         return -ENOMEM;
474 }
475
476 /*
477  * This function makes sure the current process has its own signal table,
478  * so that flush_signal_handlers can later reset the handlers without
479  * disturbing other processes.  (Other processes might share the signal
480  * table via the CLONE_SIGNAL option to clone().)
481  */
482  
483 static inline int make_private_signals(void)
484 {
485         struct signal_struct * newsig;
486
487         if (atomic_read(&current->sig->count) <= 1)
488                 return 0;
489         newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
490         if (newsig == NULL)
491                 return -ENOMEM;
492         spin_lock_init(&newsig->siglock);
493         atomic_set(&newsig->count, 1);
494         memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
495         spin_lock_irq(&current->sigmask_lock);
496         current->sig = newsig;
497         spin_unlock_irq(&current->sigmask_lock);
498         return 0;
499 }
500         
501 /*
502  * If make_private_signals() made a copy of the signal table, decrement the
503  * refcount of the original table, and free it if necessary.
504  * We don't do that in make_private_signals() so that we can back off
505  * in flush_old_exec() if an error occurs after calling make_private_signals().
506  */
507
508 static inline void release_old_signals(struct signal_struct * oldsig)
509 {
510         if (current->sig == oldsig)
511                 return;
512         if (atomic_dec_and_test(&oldsig->count))
513                 kmem_cache_free(sigact_cachep, oldsig);
514 }
515
516 /*
517  * These functions flushes out all traces of the currently running executable
518  * so that a new one can be started
519  */
520
521 static inline void flush_old_files(struct files_struct * files)
522 {
523         long j = -1;
524
525         write_lock(&files->file_lock);
526         for (;;) {
527                 unsigned long set, i;
528
529                 j++;
530                 i = j * __NFDBITS;
531                 if (i >= files->max_fds || i >= files->max_fdset)
532                         break;
533                 set = files->close_on_exec->fds_bits[j];
534                 if (!set)
535                         continue;
536                 files->close_on_exec->fds_bits[j] = 0;
537                 write_unlock(&files->file_lock);
538                 for ( ; set ; i++,set >>= 1) {
539                         if (set & 1) {
540                                 sys_close(i);
541                         }
542                 }
543                 write_lock(&files->file_lock);
544
545         }
546         write_unlock(&files->file_lock);
547 }
548
549 /*
550  * An execve() will automatically "de-thread" the process.
551  * Note: we don't have to hold the tasklist_lock to test
552  * whether we migth need to do this. If we're not part of
553  * a thread group, there is no way we can become one
554  * dynamically. And if we are, we only need to protect the
555  * unlink - even if we race with the last other thread exit,
556  * at worst the list_del_init() might end up being a no-op.
557  */
558 static inline void de_thread(struct task_struct *tsk)
559 {
560         if (!list_empty(&tsk->thread_group)) {
561                 write_lock_irq(&tasklist_lock);
562                 list_del_init(&tsk->thread_group);
563                 write_unlock_irq(&tasklist_lock);
564         }
565
566         /* Minor oddity: this might stay the same. */
567         tsk->tgid = tsk->pid;
568 }
569
570 void get_task_comm(char *buf, struct task_struct *tsk)
571 {
572         /* buf must be at least sizeof(tsk->comm) in size */
573         task_lock(tsk);
574         memcpy(buf, tsk->comm, sizeof(tsk->comm));
575         task_unlock(tsk);
576 }
577
578 void set_task_comm(struct task_struct *tsk, char *buf)
579 {
580         task_lock(tsk);
581         strncpy(tsk->comm, buf, sizeof(tsk->comm));
582         tsk->comm[sizeof(tsk->comm)-1]='\0';
583         task_unlock(tsk);
584 }
585
586 int flush_old_exec(struct linux_binprm * bprm)
587 {
588         char * name;
589         int i, ch, retval;
590         struct signal_struct * oldsig;
591         struct files_struct * files;
592         char tcomm[sizeof(current->comm)];
593
594         /*
595          * Make sure we have a private signal table
596          */
597         oldsig = current->sig;
598         retval = make_private_signals();
599         if (retval) goto flush_failed;
600
601         /*
602          * Make sure we have private file handles. Ask the
603          * fork helper to do the work for us and the exit
604          * helper to do the cleanup of the old one.
605          */
606          
607         files = current->files;         /* refcounted so safe to hold */
608         retval = unshare_files();
609         if(retval)
610                 goto flush_failed;
611         
612         /* 
613          * Release all of the old mmap stuff
614          */
615         retval = exec_mmap();
616         if (retval) goto mmap_failed;
617
618         /* This is the point of no return */
619         steal_locks(files);
620         put_files_struct(files);
621         release_old_signals(oldsig);
622
623         current->sas_ss_sp = current->sas_ss_size = 0;
624
625         if (current->euid == current->uid && current->egid == current->gid) {
626                 current->mm->dumpable = 1;
627                 current->task_dumpable = 1;
628         }
629         name = bprm->filename;
630         for (i=0; (ch = *(name++)) != '\0';) {
631                 if (ch == '/')
632                         i = 0;
633                 else
634                         if (i < (sizeof(tcomm) - 1))
635                                 tcomm[i++] = ch;
636         }
637         tcomm[i] = '\0';
638         set_task_comm(current, tcomm);
639
640         flush_thread();
641
642         de_thread(current);
643
644         if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
645             permission(bprm->file->f_dentry->d_inode,MAY_READ))
646                 current->mm->dumpable = 0;
647
648         /* An exec changes our domain. We are no longer part of the thread
649            group */
650            
651         current->self_exec_id++;
652                         
653         flush_signal_handlers(current);
654         flush_old_files(current->files);
655
656         return 0;
657
658 mmap_failed:
659         put_files_struct(current->files);
660         current->files = files;
661 flush_failed:
662         spin_lock_irq(&current->sigmask_lock);
663         if (current->sig != oldsig) {
664                 kmem_cache_free(sigact_cachep, current->sig);
665                 current->sig = oldsig;
666         }
667         spin_unlock_irq(&current->sigmask_lock);
668         return retval;
669 }
670
671 /*
672  * We mustn't allow tracing of suid binaries, unless
673  * the tracer has the capability to trace anything..
674  */
675 static inline int must_not_trace_exec(struct task_struct * p)
676 {
677         return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
678 }
679
680 /* 
681  * Fill the binprm structure from the inode. 
682  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
683  */
684 int prepare_binprm(struct linux_binprm *bprm)
685 {
686         int mode;
687         struct inode * inode = bprm->file->f_dentry->d_inode;
688
689         mode = inode->i_mode;
690         /*
691          * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
692          * vfs_permission lets a non-executable through
693          */
694         if (!(mode & 0111))     /* with at least _one_ execute bit set */
695                 return -EACCES;
696         if (bprm->file->f_op == NULL)
697                 return -EACCES;
698
699         bprm->e_uid = current->euid;
700         bprm->e_gid = current->egid;
701
702         if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
703                 /* Set-uid? */
704                 if (mode & S_ISUID)
705                         bprm->e_uid = inode->i_uid;
706
707                 /* Set-gid? */
708                 /*
709                  * If setgid is set but no group execute bit then this
710                  * is a candidate for mandatory locking, not a setgid
711                  * executable.
712                  */
713                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
714                         bprm->e_gid = inode->i_gid;
715         }
716
717         /* We don't have VFS support for capabilities yet */
718         cap_clear(bprm->cap_inheritable);
719         cap_clear(bprm->cap_permitted);
720         cap_clear(bprm->cap_effective);
721
722         /*  To support inheritance of root-permissions and suid-root
723          *  executables under compatibility mode, we raise all three
724          *  capability sets for the file.
725          *
726          *  If only the real uid is 0, we only raise the inheritable
727          *  and permitted sets of the executable file.
728          */
729
730         if (!issecure(SECURE_NOROOT)) {
731                 if (bprm->e_uid == 0 || current->uid == 0) {
732                         cap_set_full(bprm->cap_inheritable);
733                         cap_set_full(bprm->cap_permitted);
734                 }
735                 if (bprm->e_uid == 0) 
736                         cap_set_full(bprm->cap_effective);
737         }
738
739         memset(bprm->buf,0,BINPRM_BUF_SIZE);
740         return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
741 }
742
743 /*
744  * This function is used to produce the new IDs and capabilities
745  * from the old ones and the file's capabilities.
746  *
747  * The formula used for evolving capabilities is:
748  *
749  *       pI' = pI
750  * (***) pP' = (fP & X) | (fI & pI)
751  *       pE' = pP' & fE          [NB. fE is 0 or ~0]
752  *
753  * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
754  * ' indicates post-exec(), and X is the global 'cap_bset'.
755  *
756  */
757
758 void compute_creds(struct linux_binprm *bprm) 
759 {
760         kernel_cap_t new_permitted, working;
761         int do_unlock = 0;
762
763         new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
764         working = cap_intersect(bprm->cap_inheritable,
765                                 current->cap_inheritable);
766         new_permitted = cap_combine(new_permitted, working);
767
768         if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
769             !cap_issubset(new_permitted, current->cap_permitted)) {
770                 current->mm->dumpable = 0;
771                 
772                 lock_kernel();
773                 if (must_not_trace_exec(current)
774                     || atomic_read(&current->fs->count) > 1
775                     || atomic_read(&current->files->count) > 1
776                     || atomic_read(&current->sig->count) > 1) {
777                         if(!capable(CAP_SETUID)) {
778                                 bprm->e_uid = current->uid;
779                                 bprm->e_gid = current->gid;
780                         }
781                         if(!capable(CAP_SETPCAP)) {
782                                 new_permitted = cap_intersect(new_permitted,
783                                                         current->cap_permitted);
784                         }
785                 }
786                 do_unlock = 1;
787         }
788
789
790         /* For init, we want to retain the capabilities set
791          * in the init_task struct. Thus we skip the usual
792          * capability rules */
793         if (current->pid != 1) {
794                 current->cap_permitted = new_permitted;
795                 current->cap_effective =
796                         cap_intersect(new_permitted, bprm->cap_effective);
797         }
798         
799         /* AUD: Audit candidate if current->cap_effective is set */
800
801         current->suid = current->euid = current->fsuid = bprm->e_uid;
802         current->sgid = current->egid = current->fsgid = bprm->e_gid;
803
804         if(do_unlock)
805                 unlock_kernel();
806         current->keep_capabilities = 0;
807 }
808
809
810 void remove_arg_zero(struct linux_binprm *bprm)
811 {
812         if (bprm->argc) {
813                 unsigned long offset;
814                 char * kaddr;
815                 struct page *page;
816
817                 offset = bprm->p % PAGE_SIZE;
818                 goto inside;
819
820                 while (bprm->p++, *(kaddr+offset++)) {
821                         if (offset != PAGE_SIZE)
822                                 continue;
823                         offset = 0;
824                         kunmap(page);
825 inside:
826                         page = bprm->page[bprm->p/PAGE_SIZE];
827                         kaddr = kmap(page);
828                 }
829                 kunmap(page);
830                 bprm->argc--;
831         }
832 }
833
834 /*
835  * cycle the list of binary formats handler, until one recognizes the image
836  */
837 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
838 {
839         int try,retval=0;
840         struct linux_binfmt *fmt;
841 #ifdef __alpha__
842         /* handle /sbin/loader.. */
843         {
844             struct exec * eh = (struct exec *) bprm->buf;
845
846             if (!bprm->loader && eh->fh.f_magic == 0x183 &&
847                 (eh->fh.f_flags & 0x3000) == 0x3000)
848             {
849                 struct file * file;
850                 unsigned long loader;
851
852                 allow_write_access(bprm->file);
853                 fput(bprm->file);
854                 bprm->file = NULL;
855
856                 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
857
858                 file = open_exec("/sbin/loader");
859                 retval = PTR_ERR(file);
860                 if (IS_ERR(file))
861                         return retval;
862
863                 /* Remember if the application is TASO.  */
864                 bprm->sh_bang = eh->ah.entry < 0x100000000;
865
866                 bprm->file = file;
867                 bprm->loader = loader;
868                 retval = prepare_binprm(bprm);
869                 if (retval<0)
870                         return retval;
871                 /* should call search_binary_handler recursively here,
872                    but it does not matter */
873             }
874         }
875 #endif
876         /* kernel module loader fixup */
877         /* so we don't try to load run modprobe in kernel space. */
878         set_fs(USER_DS);
879         for (try=0; try<2; try++) {
880                 read_lock(&binfmt_lock);
881                 for (fmt = formats ; fmt ; fmt = fmt->next) {
882                         int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
883                         if (!fn)
884                                 continue;
885                         if (!try_inc_mod_count(fmt->module))
886                                 continue;
887                         read_unlock(&binfmt_lock);
888                         retval = fn(bprm, regs);
889                         if (retval >= 0) {
890                                 put_binfmt(fmt);
891                                 allow_write_access(bprm->file);
892                                 if (bprm->file)
893                                         fput(bprm->file);
894                                 bprm->file = NULL;
895                                 current->did_exec = 1;
896                                 return retval;
897                         }
898                         read_lock(&binfmt_lock);
899                         put_binfmt(fmt);
900                         if (retval != -ENOEXEC)
901                                 break;
902                         if (!bprm->file) {
903                                 read_unlock(&binfmt_lock);
904                                 return retval;
905                         }
906                 }
907                 read_unlock(&binfmt_lock);
908                 if (retval != -ENOEXEC) {
909                         break;
910 #ifdef CONFIG_KMOD
911                 }else{
912 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
913                         char modname[20];
914                         if (printable(bprm->buf[0]) &&
915                             printable(bprm->buf[1]) &&
916                             printable(bprm->buf[2]) &&
917                             printable(bprm->buf[3]))
918                                 break; /* -ENOEXEC */
919                         sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
920                         request_module(modname);
921 #endif
922                 }
923         }
924         return retval;
925 }
926
927
928 /*
929  * sys_execve() executes a new program.
930  */
931 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
932 {
933         struct linux_binprm bprm;
934         struct file *file;
935         int retval;
936         int i;
937
938         file = open_exec(filename);
939
940         retval = PTR_ERR(file);
941         if (IS_ERR(file))
942                 return retval;
943
944         bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
945         memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 
946
947         bprm.file = file;
948         bprm.filename = filename;
949         bprm.sh_bang = 0;
950         bprm.loader = 0;
951         bprm.exec = 0;
952         if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
953                 allow_write_access(file);
954                 fput(file);
955                 return bprm.argc;
956         }
957
958         if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
959                 allow_write_access(file);
960                 fput(file);
961                 return bprm.envc;
962         }
963
964         retval = prepare_binprm(&bprm);
965         if (retval < 0) 
966                 goto out; 
967
968         retval = copy_strings_kernel(1, &bprm.filename, &bprm);
969         if (retval < 0) 
970                 goto out; 
971
972         bprm.exec = bprm.p;
973         retval = copy_strings(bprm.envc, envp, &bprm);
974         if (retval < 0) 
975                 goto out; 
976
977         retval = copy_strings(bprm.argc, argv, &bprm);
978         if (retval < 0) 
979                 goto out; 
980
981         retval = search_binary_handler(&bprm,regs);
982         if (retval >= 0)
983                 /* execve success */
984                 return retval;
985
986 out:
987         /* Something went wrong, return the inode and free the argument pages*/
988         allow_write_access(bprm.file);
989         if (bprm.file)
990                 fput(bprm.file);
991
992         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
993                 struct page * page = bprm.page[i];
994                 if (page)
995                         __free_page(page);
996         }
997
998         return retval;
999 }
1000
1001 void set_binfmt(struct linux_binfmt *new)
1002 {
1003         struct linux_binfmt *old = current->binfmt;
1004         if (new && new->module)
1005                 __MOD_INC_USE_COUNT(new->module);
1006         current->binfmt = new;
1007         if (old && old->module)
1008                 __MOD_DEC_USE_COUNT(old->module);
1009 }
1010
1011 #define CORENAME_MAX_SIZE 64
1012
1013 /* format_corename will inspect the pattern parameter, and output a
1014  * name into corename, which must have space for at least
1015  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1016  */
1017 void format_corename(char *corename, const char *pattern, long signr)
1018 {
1019         const char *pat_ptr = pattern;
1020         char *out_ptr = corename;
1021         char *const out_end = corename + CORENAME_MAX_SIZE;
1022         int rc;
1023         int pid_in_pattern = 0;
1024
1025         /* Repeat as long as we have more pattern to process and more output
1026            space */
1027         while (*pat_ptr) {
1028                 if (*pat_ptr != '%') {
1029                         if (out_ptr == out_end)
1030                                 goto out;
1031                         *out_ptr++ = *pat_ptr++;
1032                 } else {
1033                         switch (*++pat_ptr) {
1034                         case 0:
1035                                 goto out;
1036                         /* Double percent, output one percent */
1037                         case '%':
1038                                 if (out_ptr == out_end)
1039                                         goto out;
1040                                 *out_ptr++ = '%';
1041                                 break;
1042                         /* pid */
1043                         case 'p':
1044                                 pid_in_pattern = 1;
1045                                 rc = snprintf(out_ptr, out_end - out_ptr,
1046                                               "%d", current->pid);
1047                                 if (rc > out_end - out_ptr)
1048                                         goto out;
1049                                 out_ptr += rc;
1050                                 break;
1051                         /* uid */
1052                         case 'u':
1053                                 rc = snprintf(out_ptr, out_end - out_ptr,
1054                                               "%d", current->uid);
1055                                 if (rc > out_end - out_ptr)
1056                                         goto out;
1057                                 out_ptr += rc;
1058                                 break;
1059                         /* gid */
1060                         case 'g':
1061                                 rc = snprintf(out_ptr, out_end - out_ptr,
1062                                               "%d", current->gid);
1063                                 if (rc > out_end - out_ptr)
1064                                         goto out;
1065                                 out_ptr += rc;
1066                                 break;
1067                         /* signal that caused the coredump */
1068                         case 's':
1069                                 rc = snprintf(out_ptr, out_end - out_ptr,
1070                                               "%ld", signr);
1071                                 if (rc > out_end - out_ptr)
1072                                         goto out;
1073                                 out_ptr += rc;
1074                                 break;
1075                         /* UNIX time of coredump */
1076                         case 't': {
1077                                 struct timeval tv;
1078                                 do_gettimeofday(&tv);
1079                                 rc = snprintf(out_ptr, out_end - out_ptr,
1080                                               "%ld", tv.tv_sec);
1081                                 if (rc > out_end - out_ptr)
1082                                         goto out;
1083                                 out_ptr += rc;
1084                                 break;
1085                         }
1086                         /* hostname */
1087                         case 'h':
1088                                 down_read(&uts_sem);
1089                                 rc = snprintf(out_ptr, out_end - out_ptr,
1090                                               "%s", system_utsname.nodename);
1091                                 up_read(&uts_sem);
1092                                 if (rc > out_end - out_ptr)
1093                                         goto out;
1094                                 out_ptr += rc;
1095                                 break;
1096                         /* executable */
1097                         case 'e':
1098                                 rc = snprintf(out_ptr, out_end - out_ptr,
1099                                               "%s", current->comm);
1100                                 if (rc > out_end - out_ptr)
1101                                         goto out;
1102                                 out_ptr += rc;
1103                                 break;
1104                         default:
1105                                 break;
1106                         }
1107                         ++pat_ptr;
1108                 }
1109         }
1110         /* Backward compatibility with core_uses_pid:
1111          *
1112          * If core_pattern does not include a %p (as is the default)
1113          * and core_uses_pid is set, then .%pid will be appended to
1114          * the filename */
1115         if (!pid_in_pattern
1116             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
1117                 rc = snprintf(out_ptr, out_end - out_ptr,
1118                               ".%d", current->pid);
1119                 if (rc > out_end - out_ptr)
1120                         goto out;
1121                 out_ptr += rc;
1122         }
1123       out:
1124         *out_ptr = 0;
1125 }
1126
1127 int do_coredump(long signr, struct pt_regs * regs)
1128 {
1129         struct linux_binfmt * binfmt;
1130         char corename[CORENAME_MAX_SIZE + 1];
1131         struct file * file;
1132         struct inode * inode;
1133         int retval = 0;
1134         int fsuid = current->fsuid;
1135
1136         lock_kernel();
1137         binfmt = current->binfmt;
1138         if (!binfmt || !binfmt->core_dump)
1139                 goto fail;
1140         if (!is_dumpable(current))
1141         {
1142                 if(!core_setuid_ok || !current->task_dumpable)
1143                         goto fail;
1144                 current->fsuid = 0;
1145         }
1146         current->mm->dumpable = 0;
1147         if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
1148                 goto fail;
1149
1150         format_corename(corename, core_pattern, signr);
1151         file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
1152         if (IS_ERR(file))
1153                 goto fail;
1154         inode = file->f_dentry->d_inode;
1155         if (inode->i_nlink > 1)
1156                 goto close_fail;        /* multiple links - don't dump */
1157         if (d_unhashed(file->f_dentry))
1158                 goto close_fail;
1159
1160         if (!S_ISREG(inode->i_mode))
1161                 goto close_fail;
1162         if (!file->f_op)
1163                 goto close_fail;
1164         if (!file->f_op->write)
1165                 goto close_fail;
1166         if (do_truncate(file->f_dentry, 0) != 0)
1167                 goto close_fail;
1168
1169         retval = binfmt->core_dump(signr, regs, file);
1170
1171 close_fail:
1172         filp_close(file, NULL);
1173 fail:
1174         if (fsuid != current->fsuid)
1175                 current->fsuid = fsuid;
1176         unlock_kernel();
1177         return retval;
1178 }