cleanup
[linux-2.4.git] / kernel / fork.c
1 /*
2  *  linux/kernel/fork.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 /*
8  *  'fork.c' contains the help-routines for the 'fork' system call
9  * (see also entry.S and others).
10  * Fork is rather simple, once you get the hang of it, but the memory
11  * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
12  */
13
14 #include <linux/config.h>
15 #include <linux/slab.h>
16 #include <linux/init.h>
17 #include <linux/unistd.h>
18 #include <linux/smp_lock.h>
19 #include <linux/module.h>
20 #include <linux/vmalloc.h>
21 #include <linux/completion.h>
22 #include <linux/namespace.h>
23 #include <linux/personality.h>
24 #include <linux/compiler.h>
25
26 #include <asm/pgtable.h>
27 #include <asm/pgalloc.h>
28 #include <asm/uaccess.h>
29 #include <asm/mmu_context.h>
30 #include <asm/processor.h>
31
32 /* The idle threads do not count.. */
33 int nr_threads;
34 int nr_running;
35
36 int max_threads;
37 unsigned long total_forks;      /* Handle normal Linux uptimes. */
38 int last_pid;
39
40 struct task_struct *pidhash[PIDHASH_SZ];
41
42 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
43 {
44         unsigned long flags;
45
46         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
47         wq_write_lock_irqsave(&q->lock, flags);
48         __add_wait_queue(q, wait);
49         wq_write_unlock_irqrestore(&q->lock, flags);
50 }
51
52 void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
53 {
54         unsigned long flags;
55
56         wait->flags |= WQ_FLAG_EXCLUSIVE;
57         wq_write_lock_irqsave(&q->lock, flags);
58         __add_wait_queue_tail(q, wait);
59         wq_write_unlock_irqrestore(&q->lock, flags);
60 }
61
62 void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
63 {
64         unsigned long flags;
65
66         wq_write_lock_irqsave(&q->lock, flags);
67         __remove_wait_queue(q, wait);
68         wq_write_unlock_irqrestore(&q->lock, flags);
69 }
70
71 void __init fork_init(unsigned long mempages)
72 {
73         /*
74          * The default maximum number of threads is set to a safe
75          * value: the thread structures can take up at most half
76          * of memory.
77          */
78         max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
79
80         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
81         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
82 }
83
84 /* Protects next_safe and last_pid. */
85 spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
86
87 static int get_pid(unsigned long flags)
88 {
89         static int next_safe = PID_MAX;
90         struct task_struct *p;
91         int pid, beginpid;
92
93         if (flags & CLONE_PID)
94                 return current->pid;
95
96         spin_lock(&lastpid_lock);
97         beginpid = last_pid;
98         if((++last_pid) & 0xffff8000) {
99                 last_pid = 300;         /* Skip daemons etc. */
100                 goto inside;
101         }
102         if(last_pid >= next_safe) {
103 inside:
104                 next_safe = PID_MAX;
105                 read_lock(&tasklist_lock);
106         repeat:
107                 for_each_task(p) {
108                         if(p->pid == last_pid   ||
109                            p->pgrp == last_pid  ||
110                            p->tgid == last_pid  ||
111                            p->session == last_pid) {
112                                 if(++last_pid >= next_safe) {
113                                         if(last_pid & 0xffff8000)
114                                                 last_pid = 300;
115                                         next_safe = PID_MAX;
116                                 }
117                                 if(unlikely(last_pid == beginpid)) {
118                                         next_safe = 0;
119                                         goto nomorepids;
120                                 }
121                                 goto repeat;
122                         }
123                         if(p->pid > last_pid && next_safe > p->pid)
124                                 next_safe = p->pid;
125                         if(p->pgrp > last_pid && next_safe > p->pgrp)
126                                 next_safe = p->pgrp;
127                         if(p->tgid > last_pid && next_safe > p->tgid)
128                                 next_safe = p->tgid;
129                         if(p->session > last_pid && next_safe > p->session)
130                                 next_safe = p->session;
131                 }
132                 read_unlock(&tasklist_lock);
133         }
134         pid = last_pid;
135         spin_unlock(&lastpid_lock);
136
137         return pid;
138
139 nomorepids:
140         read_unlock(&tasklist_lock);
141         spin_unlock(&lastpid_lock);
142         return 0;
143 }
144
145 static inline int dup_mmap(struct mm_struct * mm)
146 {
147         struct vm_area_struct * mpnt, *tmp, **pprev;
148         int retval;
149
150         flush_cache_mm(current->mm);
151         mm->locked_vm = 0;
152         mm->mmap = NULL;
153         mm->mmap_cache = NULL;
154         mm->map_count = 0;
155         mm->rss = 0;
156         mm->cpu_vm_mask = 0;
157         mm->swap_address = 0;
158         pprev = &mm->mmap;
159
160         /*
161          * Add it to the mmlist after the parent.
162          * Doing it this way means that we can order the list,
163          * and fork() won't mess up the ordering significantly.
164          * Add it first so that swapoff can see any swap entries.
165          */
166         spin_lock(&mmlist_lock);
167         list_add(&mm->mmlist, &current->mm->mmlist);
168         mmlist_nr++;
169         spin_unlock(&mmlist_lock);
170
171         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
172                 struct file *file;
173
174                 retval = -ENOMEM;
175                 if(mpnt->vm_flags & VM_DONTCOPY)
176                         continue;
177                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
178                 if (!tmp)
179                         goto fail_nomem;
180                 *tmp = *mpnt;
181                 tmp->vm_flags &= ~VM_LOCKED;
182                 tmp->vm_mm = mm;
183                 tmp->vm_next = NULL;
184                 file = tmp->vm_file;
185                 if (file) {
186                         struct inode *inode = file->f_dentry->d_inode;
187                         get_file(file);
188                         if (tmp->vm_flags & VM_DENYWRITE)
189                                 atomic_dec(&inode->i_writecount);
190       
191                         /* insert tmp into the share list, just after mpnt */
192                         spin_lock(&inode->i_mapping->i_shared_lock);
193                         if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
194                                 mpnt->vm_next_share->vm_pprev_share =
195                                         &tmp->vm_next_share;
196                         mpnt->vm_next_share = tmp;
197                         tmp->vm_pprev_share = &mpnt->vm_next_share;
198                         spin_unlock(&inode->i_mapping->i_shared_lock);
199                 }
200
201                 /*
202                  * Link in the new vma and copy the page table entries:
203                  * link in first so that swapoff can see swap entries.
204                  */
205                 spin_lock(&mm->page_table_lock);
206                 *pprev = tmp;
207                 pprev = &tmp->vm_next;
208                 mm->map_count++;
209                 retval = copy_page_range(mm, current->mm, tmp);
210                 spin_unlock(&mm->page_table_lock);
211
212                 if (tmp->vm_ops && tmp->vm_ops->open)
213                         tmp->vm_ops->open(tmp);
214
215                 if (retval)
216                         goto fail_nomem;
217         }
218         retval = 0;
219         build_mmap_rb(mm);
220
221 fail_nomem:
222         flush_tlb_mm(current->mm);
223         return retval;
224 }
225
226 spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
227 int mmlist_nr;
228
229 #define allocate_mm()   (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
230 #define free_mm(mm)     (kmem_cache_free(mm_cachep, (mm)))
231
232 static struct mm_struct * mm_init(struct mm_struct * mm)
233 {
234         atomic_set(&mm->mm_users, 1);
235         atomic_set(&mm->mm_count, 1);
236         init_rwsem(&mm->mmap_sem);
237         mm->page_table_lock = SPIN_LOCK_UNLOCKED;
238         mm->pgd = pgd_alloc(mm);
239         mm->def_flags = 0;
240         if (mm->pgd)
241                 return mm;
242         free_mm(mm);
243         return NULL;
244 }
245         
246
247 /*
248  * Allocate and initialize an mm_struct.
249  */
250 struct mm_struct * mm_alloc(void)
251 {
252         struct mm_struct * mm;
253
254         mm = allocate_mm();
255         if (mm) {
256                 memset(mm, 0, sizeof(*mm));
257                 return mm_init(mm);
258         }
259         return NULL;
260 }
261
262 /*
263  * Called when the last reference to the mm
264  * is dropped: either by a lazy thread or by
265  * mmput. Free the page directory and the mm.
266  */
267 void fastcall __mmdrop(struct mm_struct *mm)
268 {
269         BUG_ON(mm == &init_mm);
270         pgd_free(mm->pgd);
271         check_pgt_cache();
272         destroy_context(mm);
273         free_mm(mm);
274 }
275
276 /*
277  * Decrement the use count and release all resources for an mm.
278  */
279 void mmput(struct mm_struct *mm)
280 {
281         if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
282                 extern struct mm_struct *swap_mm;
283                 if (swap_mm == mm)
284                         swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
285                 list_del(&mm->mmlist);
286                 mmlist_nr--;
287                 spin_unlock(&mmlist_lock);
288                 exit_mmap(mm);
289                 mmdrop(mm);
290         }
291 }
292
293 /* Please note the differences between mmput and mm_release.
294  * mmput is called whenever we stop holding onto a mm_struct,
295  * error success whatever.
296  *
297  * mm_release is called after a mm_struct has been removed
298  * from the current process.
299  *
300  * This difference is important for error handling, when we
301  * only half set up a mm_struct for a new process and need to restore
302  * the old one.  Because we mmput the new mm_struct before
303  * restoring the old one. . .
304  * Eric Biederman 10 January 1998
305  */
306 void mm_release(void)
307 {
308         struct task_struct *tsk = current;
309         struct completion *vfork_done = tsk->vfork_done;
310
311         /* notify parent sleeping on vfork() */
312         if (vfork_done) {
313                 tsk->vfork_done = NULL;
314                 complete(vfork_done);
315         }
316 }
317
318 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
319 {
320         struct mm_struct * mm, *oldmm;
321         int retval;
322
323         tsk->min_flt = tsk->maj_flt = 0;
324         tsk->cmin_flt = tsk->cmaj_flt = 0;
325         tsk->nswap = tsk->cnswap = 0;
326
327         tsk->mm = NULL;
328         tsk->active_mm = NULL;
329
330         /*
331          * Are we cloning a kernel thread?
332          *
333          * We need to steal a active VM for that..
334          */
335         oldmm = current->mm;
336         if (!oldmm)
337                 return 0;
338
339         if (clone_flags & CLONE_VM) {
340                 atomic_inc(&oldmm->mm_users);
341                 mm = oldmm;
342                 goto good_mm;
343         }
344
345         retval = -ENOMEM;
346         mm = allocate_mm();
347         if (!mm)
348                 goto fail_nomem;
349
350         /* Copy the current MM stuff.. */
351         memcpy(mm, oldmm, sizeof(*mm));
352         if (!mm_init(mm))
353                 goto fail_nomem;
354
355         if (init_new_context(tsk,mm))
356                 goto free_pt;
357
358         down_write(&oldmm->mmap_sem);
359         retval = dup_mmap(mm);
360         up_write(&oldmm->mmap_sem);
361
362         if (retval)
363                 goto free_pt;
364
365         /*
366          * child gets a private LDT (if there was an LDT in the parent)
367          */
368         copy_segments(tsk, mm);
369
370 good_mm:
371         tsk->mm = mm;
372         tsk->active_mm = mm;
373         return 0;
374
375 free_pt:
376         mmput(mm);
377 fail_nomem:
378         return retval;
379 }
380
381 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
382 {
383         struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
384         /* We don't need to lock fs - think why ;-) */
385         if (fs) {
386                 atomic_set(&fs->count, 1);
387                 fs->lock = RW_LOCK_UNLOCKED;
388                 fs->umask = old->umask;
389                 read_lock(&old->lock);
390                 fs->rootmnt = mntget(old->rootmnt);
391                 fs->root = dget(old->root);
392                 fs->pwdmnt = mntget(old->pwdmnt);
393                 fs->pwd = dget(old->pwd);
394                 if (old->altroot) {
395                         fs->altrootmnt = mntget(old->altrootmnt);
396                         fs->altroot = dget(old->altroot);
397                 } else {
398                         fs->altrootmnt = NULL;
399                         fs->altroot = NULL;
400                 }       
401                 read_unlock(&old->lock);
402         }
403         return fs;
404 }
405
406 struct fs_struct *copy_fs_struct(struct fs_struct *old)
407 {
408         return __copy_fs_struct(old);
409 }
410
411 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
412 {
413         if (clone_flags & CLONE_FS) {
414                 atomic_inc(&current->fs->count);
415                 return 0;
416         }
417         tsk->fs = __copy_fs_struct(current->fs);
418         if (!tsk->fs)
419                 return -1;
420         return 0;
421 }
422
423 static int count_open_files(struct files_struct *files, int size)
424 {
425         int i;
426         
427         /* Find the last open fd */
428         for (i = size/(8*sizeof(long)); i > 0; ) {
429                 if (files->open_fds->fds_bits[--i])
430                         break;
431         }
432         i = (i+1) * 8 * sizeof(long);
433         return i;
434 }
435
436 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
437 {
438         struct files_struct *oldf, *newf;
439         struct file **old_fds, **new_fds;
440         int open_files, nfds, size, i, error = 0;
441
442         /*
443          * A background process may not have any files ...
444          */
445         oldf = current->files;
446         if (!oldf)
447                 goto out;
448
449         if (clone_flags & CLONE_FILES) {
450                 atomic_inc(&oldf->count);
451                 goto out;
452         }
453
454         /*
455          * Note: we may be using current for both targets (See exec.c)
456          * This works because we cache current->files (old) as oldf. Don't
457          * break this.
458          */
459         tsk->files = NULL;
460         error = -ENOMEM;
461         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
462         if (!newf) 
463                 goto out;
464
465         atomic_set(&newf->count, 1);
466
467         newf->file_lock     = RW_LOCK_UNLOCKED;
468         newf->next_fd       = 0;
469         newf->max_fds       = NR_OPEN_DEFAULT;
470         newf->max_fdset     = __FD_SETSIZE;
471         newf->close_on_exec = &newf->close_on_exec_init;
472         newf->open_fds      = &newf->open_fds_init;
473         newf->fd            = &newf->fd_array[0];
474
475         /* We don't yet have the oldf readlock, but even if the old
476            fdset gets grown now, we'll only copy up to "size" fds */
477         size = oldf->max_fdset;
478         if (size > __FD_SETSIZE) {
479                 newf->max_fdset = 0;
480                 write_lock(&newf->file_lock);
481                 error = expand_fdset(newf, size-1);
482                 write_unlock(&newf->file_lock);
483                 if (error)
484                         goto out_release;
485         }
486         read_lock(&oldf->file_lock);
487
488         open_files = count_open_files(oldf, size);
489
490         /*
491          * Check whether we need to allocate a larger fd array.
492          * Note: we're not a clone task, so the open count won't
493          * change.
494          */
495         nfds = NR_OPEN_DEFAULT;
496         if (open_files > nfds) {
497                 read_unlock(&oldf->file_lock);
498                 newf->max_fds = 0;
499                 write_lock(&newf->file_lock);
500                 error = expand_fd_array(newf, open_files-1);
501                 write_unlock(&newf->file_lock);
502                 if (error) 
503                         goto out_release;
504                 nfds = newf->max_fds;
505                 read_lock(&oldf->file_lock);
506         }
507
508         old_fds = oldf->fd;
509         new_fds = newf->fd;
510
511         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
512         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
513
514         for (i = open_files; i != 0; i--) {
515                 struct file *f = *old_fds++;
516                 if (f) {
517                         get_file(f);
518                 } else {
519                         /*
520                          * The fd may be claimed in the fd bitmap but not yet
521                          * instantiated in the files array if a sibling thread
522                          * is partway through open().  So make sure that this
523                          * fd is available to the new process.
524                          */
525                         FD_CLR(open_files - i, newf->open_fds);
526                 }
527                 *new_fds++ = f;
528         }
529         read_unlock(&oldf->file_lock);
530
531         /* compute the remainder to be cleared */
532         size = (newf->max_fds - open_files) * sizeof(struct file *);
533
534         /* This is long word aligned thus could use a optimized version */ 
535         memset(new_fds, 0, size); 
536
537         if (newf->max_fdset > open_files) {
538                 int left = (newf->max_fdset-open_files)/8;
539                 int start = open_files / (8 * sizeof(unsigned long));
540                 
541                 memset(&newf->open_fds->fds_bits[start], 0, left);
542                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
543         }
544
545         tsk->files = newf;
546         error = 0;
547 out:
548         return error;
549
550 out_release:
551         free_fdset (newf->close_on_exec, newf->max_fdset);
552         free_fdset (newf->open_fds, newf->max_fdset);
553         kmem_cache_free(files_cachep, newf);
554         goto out;
555 }
556
557 /*
558  *      Helper to unshare the files of the current task. 
559  *      We don't want to expose copy_files internals to 
560  *      the exec layer of the kernel.
561  */
562
563 int unshare_files(void)
564 {
565         struct files_struct *files  = current->files;
566         int rc;
567         
568         if(!files)
569                 BUG();
570                 
571         /* This can race but the race causes us to copy when we don't
572            need to and drop the copy */
573         if(atomic_read(&files->count) == 1)
574         {
575                 atomic_inc(&files->count);
576                 return 0;
577         }
578         rc = copy_files(0, current);
579         if(rc)
580                 current->files = files;
581         return rc;
582 }               
583
584 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
585 {
586         struct signal_struct *sig;
587
588         if (clone_flags & CLONE_SIGHAND) {
589                 atomic_inc(&current->sig->count);
590                 return 0;
591         }
592         sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
593         tsk->sig = sig;
594         if (!sig)
595                 return -1;
596         spin_lock_init(&sig->siglock);
597         atomic_set(&sig->count, 1);
598         memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
599         return 0;
600 }
601
602 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
603 {
604         unsigned long new_flags = p->flags;
605
606         new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU);
607         new_flags |= PF_FORKNOEXEC;
608         if (!(clone_flags & CLONE_PTRACE))
609                 p->ptrace = 0;
610         p->flags = new_flags;
611 }
612
613 long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
614 {
615         struct task_struct *task = current;
616         unsigned old_task_dumpable;
617         long ret;
618
619         /* lock out any potential ptracer */
620         task_lock(task);
621         if (task->ptrace) {
622                 task_unlock(task);
623                 return -EPERM;
624         }
625
626         old_task_dumpable = task->task_dumpable;
627         task->task_dumpable = 0;
628         task_unlock(task);
629
630         ret = arch_kernel_thread(fn, arg, flags);
631
632         /* never reached in child process, only in parent */
633         current->task_dumpable = old_task_dumpable;
634
635         return ret;
636 }
637
638 /*
639  *  Ok, this is the main fork-routine. It copies the system process
640  * information (task[nr]) and sets up the necessary registers. It also
641  * copies the data segment in its entirety.  The "stack_start" and
642  * "stack_top" arguments are simply passed along to the platform
643  * specific copy_thread() routine.  Most platforms ignore stack_top.
644  * For an example that's using stack_top, see
645  * arch/ia64/kernel/process.c.
646  */
647 int do_fork(unsigned long clone_flags, unsigned long stack_start,
648             struct pt_regs *regs, unsigned long stack_size)
649 {
650         int retval;
651         struct task_struct *p;
652         struct completion vfork;
653
654         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
655                 return -EINVAL;
656
657         retval = -EPERM;
658
659         /* 
660          * CLONE_PID is only allowed for the initial SMP swapper
661          * calls
662          */
663         if (clone_flags & CLONE_PID) {
664                 if (current->pid)
665                         goto fork_out;
666         }
667
668         retval = -ENOMEM;
669         p = alloc_task_struct();
670         if (!p)
671                 goto fork_out;
672
673         *p = *current;
674
675         retval = -EAGAIN;
676         /*
677          * Check if we are over our maximum process limit, but be sure to
678          * exclude root. This is needed to make it possible for login and
679          * friends to set the per-user process limit to something lower
680          * than the amount of processes root is running. -- Rik
681          */
682         if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur
683                       && p->user != &root_user
684                       && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
685                 goto bad_fork_free;
686
687         atomic_inc(&p->user->__count);
688         atomic_inc(&p->user->processes);
689
690         /*
691          * Counter increases are protected by
692          * the kernel lock so nr_threads can't
693          * increase under us (but it may decrease).
694          */
695         if (nr_threads >= max_threads)
696                 goto bad_fork_cleanup_count;
697         
698         get_exec_domain(p->exec_domain);
699
700         if (p->binfmt && p->binfmt->module)
701                 __MOD_INC_USE_COUNT(p->binfmt->module);
702
703         p->did_exec = 0;
704         p->swappable = 0;
705         p->state = TASK_UNINTERRUPTIBLE;
706
707         copy_flags(clone_flags, p);
708         p->pid = get_pid(clone_flags);
709         if (p->pid == 0 && current->pid != 0)
710                 goto bad_fork_cleanup;
711
712         p->run_list.next = NULL;
713         p->run_list.prev = NULL;
714
715         p->p_cptr = NULL;
716         init_waitqueue_head(&p->wait_chldexit);
717         p->vfork_done = NULL;
718         if (clone_flags & CLONE_VFORK) {
719                 p->vfork_done = &vfork;
720                 init_completion(&vfork);
721         }
722         spin_lock_init(&p->alloc_lock);
723
724         p->sigpending = 0;
725         init_sigpending(&p->pending);
726
727         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
728         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
729         init_timer(&p->real_timer);
730         p->real_timer.data = (unsigned long) p;
731
732         p->leader = 0;          /* session leadership doesn't inherit */
733         p->tty_old_pgrp = 0;
734         p->times.tms_utime = p->times.tms_stime = 0;
735         p->times.tms_cutime = p->times.tms_cstime = 0;
736 #ifdef CONFIG_SMP
737         {
738                 int i;
739                 p->cpus_runnable = ~0UL;
740                 p->processor = current->processor;
741                 /* ?? should we just memset this ?? */
742                 for(i = 0; i < smp_num_cpus; i++)
743                         p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
744                 spin_lock_init(&p->sigmask_lock);
745         }
746 #endif
747         p->lock_depth = -1;             /* -1 = no lock */
748         p->start_time = jiffies;
749
750         INIT_LIST_HEAD(&p->local_pages);
751
752         retval = -ENOMEM;
753         /* copy all the process information */
754         if (copy_files(clone_flags, p))
755                 goto bad_fork_cleanup;
756         if (copy_fs(clone_flags, p))
757                 goto bad_fork_cleanup_files;
758         if (copy_sighand(clone_flags, p))
759                 goto bad_fork_cleanup_fs;
760         if (copy_mm(clone_flags, p))
761                 goto bad_fork_cleanup_sighand;
762         retval = copy_namespace(clone_flags, p);
763         if (retval)
764                 goto bad_fork_cleanup_mm;
765         retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
766         if (retval)
767                 goto bad_fork_cleanup_namespace;
768         p->semundo = NULL;
769         
770         /* Our parent execution domain becomes current domain
771            These must match for thread signalling to apply */
772            
773         p->parent_exec_id = p->self_exec_id;
774
775         /* ok, now we should be set up.. */
776         p->swappable = 1;
777         p->exit_signal = clone_flags & CSIGNAL;
778         p->pdeath_signal = 0;
779
780         /*
781          * "share" dynamic priority between parent and child, thus the
782          * total amount of dynamic priorities in the system doesn't change,
783          * more scheduling fairness. This is only important in the first
784          * timeslice, on the long run the scheduling behaviour is unchanged.
785          */
786         p->counter = (current->counter + 1) >> 1;
787         current->counter >>= 1;
788         if (!current->counter)
789                 current->need_resched = 1;
790
791         /*
792          * Ok, add it to the run-queues and make it
793          * visible to the rest of the system.
794          *
795          * Let it rip!
796          */
797         retval = p->pid;
798         p->tgid = retval;
799         INIT_LIST_HEAD(&p->thread_group);
800
801         /* Need tasklist lock for parent etc handling! */
802         write_lock_irq(&tasklist_lock);
803
804         /* CLONE_PARENT re-uses the old parent */
805         p->p_opptr = current->p_opptr;
806         p->p_pptr = current->p_pptr;
807         if (!(clone_flags & CLONE_PARENT)) {
808                 p->p_opptr = current;
809                 if (!(p->ptrace & PT_PTRACED))
810                         p->p_pptr = current;
811         }
812
813         if (clone_flags & CLONE_THREAD) {
814                 p->tgid = current->tgid;
815                 list_add(&p->thread_group, &current->thread_group);
816         }
817
818         SET_LINKS(p);
819         hash_pid(p);
820         nr_threads++;
821         write_unlock_irq(&tasklist_lock);
822
823         if (p->ptrace & PT_PTRACED)
824                 send_sig(SIGSTOP, p, 1);
825
826         wake_up_process(p);             /* do this last */
827         ++total_forks;
828         if (clone_flags & CLONE_VFORK)
829                 wait_for_completion(&vfork);
830
831 fork_out:
832         return retval;
833
834 bad_fork_cleanup_namespace:
835         exit_namespace(p);
836 bad_fork_cleanup_mm:
837         exit_mm(p);
838         if (p->active_mm)
839                 mmdrop(p->active_mm);
840 bad_fork_cleanup_sighand:
841         exit_sighand(p);
842 bad_fork_cleanup_fs:
843         exit_fs(p); /* blocking */
844 bad_fork_cleanup_files:
845         exit_files(p); /* blocking */
846 bad_fork_cleanup:
847         put_exec_domain(p->exec_domain);
848         if (p->binfmt && p->binfmt->module)
849                 __MOD_DEC_USE_COUNT(p->binfmt->module);
850 bad_fork_cleanup_count:
851         atomic_dec(&p->user->processes);
852         free_uid(p->user);
853 bad_fork_free:
854         free_task_struct(p);
855         goto fork_out;
856 }
857
858 /* SLAB cache for signal_struct structures (tsk->sig) */
859 kmem_cache_t *sigact_cachep;
860
861 /* SLAB cache for files_struct structures (tsk->files) */
862 kmem_cache_t *files_cachep;
863
864 /* SLAB cache for fs_struct structures (tsk->fs) */
865 kmem_cache_t *fs_cachep;
866
867 /* SLAB cache for vm_area_struct structures */
868 kmem_cache_t *vm_area_cachep;
869
870 /* SLAB cache for mm_struct structures (tsk->mm) */
871 kmem_cache_t *mm_cachep;
872
873 void __init proc_caches_init(void)
874 {
875         sigact_cachep = kmem_cache_create("signal_act",
876                         sizeof(struct signal_struct), 0,
877                         SLAB_HWCACHE_ALIGN, NULL, NULL);
878         if (!sigact_cachep)
879                 panic("Cannot create signal action SLAB cache");
880
881         files_cachep = kmem_cache_create("files_cache", 
882                          sizeof(struct files_struct), 0, 
883                          SLAB_HWCACHE_ALIGN, NULL, NULL);
884         if (!files_cachep) 
885                 panic("Cannot create files SLAB cache");
886
887         fs_cachep = kmem_cache_create("fs_cache", 
888                          sizeof(struct fs_struct), 0, 
889                          SLAB_HWCACHE_ALIGN, NULL, NULL);
890         if (!fs_cachep) 
891                 panic("Cannot create fs_struct SLAB cache");
892  
893         vm_area_cachep = kmem_cache_create("vm_area_struct",
894                         sizeof(struct vm_area_struct), 0,
895                         SLAB_HWCACHE_ALIGN, NULL, NULL);
896         if(!vm_area_cachep)
897                 panic("vma_init: Cannot alloc vm_area_struct SLAB cache");
898
899         mm_cachep = kmem_cache_create("mm_struct",
900                         sizeof(struct mm_struct), 0,
901                         SLAB_HWCACHE_ALIGN, NULL, NULL);
902         if(!mm_cachep)
903                 panic("vma_init: Cannot alloc mm_struct SLAB cache");
904 }