import of ftp.dlink.com/GPL/DSMG-600_reB/ppclinux.tar.gz
[linux-2.4.21-pre4.git] / kernel / exit.c
1 /*
2  *  linux/kernel/exit.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 #include <linux/config.h>
8 #include <linux/slab.h>
9 #include <linux/interrupt.h>
10 #include <linux/smp_lock.h>
11 #include <linux/module.h>
12 #include <linux/completion.h>
13 #include <linux/personality.h>
14 #include <linux/tty.h>
15 #include <linux/namespace.h>
16 #ifdef CONFIG_BSD_PROCESS_ACCT
17 #include <linux/acct.h>
18 #endif
19
20 #include <asm/uaccess.h>
21 #include <asm/pgtable.h>
22 #include <asm/mmu_context.h>
23
24 extern void sem_exit (void);
25 extern struct task_struct *child_reaper;
26
27 int getrusage(struct task_struct *, int, struct rusage *);
28
29 static void release_task(struct task_struct * p)
30 {
31         if (p != current) {
32 #ifdef CONFIG_SMP
33                 /*
34                  * Wait to make sure the process isn't on the
35                  * runqueue (active on some other CPU still)
36                  */
37                 for (;;) {
38                         task_lock(p);
39                         if (!task_has_cpu(p))
40                                 break;
41                         task_unlock(p);
42                         do {
43                                 cpu_relax();
44                                 barrier();
45                         } while (task_has_cpu(p));
46                 }
47                 task_unlock(p);
48 #endif
49                 atomic_dec(&p->user->processes);
50                 free_uid(p->user);
51                 unhash_process(p);
52
53                 release_thread(p);
54                 current->cmin_flt += p->min_flt + p->cmin_flt;
55                 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
56                 current->cnswap += p->nswap + p->cnswap;
57                 /*
58                  * Potentially available timeslices are retrieved
59                  * here - this way the parent does not get penalized
60                  * for creating too many processes.
61                  *
62                  * (this cannot be used to artificially 'generate'
63                  * timeslices, because any timeslice recovered here
64                  * was given away by the parent in the first place.)
65                  */
66                 current->counter += p->counter;
67                 if (current->counter >= MAX_COUNTER)
68                         current->counter = MAX_COUNTER;
69                 p->pid = 0;
70                 free_task_struct(p);
71         } else {
72                 printk("task releasing itself\n");
73         }
74 }
75
76 /*
77  * This checks not only the pgrp, but falls back on the pid if no
78  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
79  * without this...
80  */
81 int session_of_pgrp(int pgrp)
82 {
83         struct task_struct *p;
84         int fallback;
85
86         fallback = -1;
87         read_lock(&tasklist_lock);
88         for_each_task(p) {
89                 if (p->session <= 0)
90                         continue;
91                 if (p->pgrp == pgrp) {
92                         fallback = p->session;
93                         break;
94                 }
95                 if (p->pid == pgrp)
96                         fallback = p->session;
97         }
98         read_unlock(&tasklist_lock);
99         return fallback;
100 }
101
102 /*
103  * Determine if a process group is "orphaned", according to the POSIX
104  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
105  * by terminal-generated stop signals.  Newly orphaned process groups are
106  * to receive a SIGHUP and a SIGCONT.
107  *
108  * "I ask you, have you ever known what it is to be an orphan?"
109  */
110 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
111 {
112         struct task_struct *p;
113
114         read_lock(&tasklist_lock);
115         for_each_task(p) {
116                 if ((p == ignored_task) || (p->pgrp != pgrp) ||
117                     (p->state == TASK_ZOMBIE) ||
118                     (p->p_pptr->pid == 1))
119                         continue;
120                 if ((p->p_pptr->pgrp != pgrp) &&
121                     (p->p_pptr->session == p->session)) {
122                         read_unlock(&tasklist_lock);
123                         return 0;
124                 }
125         }
126         read_unlock(&tasklist_lock);
127         return 1;       /* (sighing) "Often!" */
128 }
129
130 int is_orphaned_pgrp(int pgrp)
131 {
132         return will_become_orphaned_pgrp(pgrp, 0);
133 }
134
135 static inline int has_stopped_jobs(int pgrp)
136 {
137         int retval = 0;
138         struct task_struct * p;
139
140         read_lock(&tasklist_lock);
141         for_each_task(p) {
142                 if (p->pgrp != pgrp)
143                         continue;
144                 if (p->state != TASK_STOPPED)
145                         continue;
146                 retval = 1;
147                 break;
148         }
149         read_unlock(&tasklist_lock);
150         return retval;
151 }
152
153 /*
154  * When we die, we re-parent all our children.
155  * Try to give them to another thread in our thread
156  * group, and if no such member exists, give it to
157  * the global child reaper process (ie "init")
158  */
159 static inline void forget_original_parent(struct task_struct * father)
160 {
161         struct task_struct * p;
162
163         read_lock(&tasklist_lock);
164
165         for_each_task(p) {
166                 if (p->p_opptr == father) {
167                         /* We dont want people slaying init */
168                         p->exit_signal = SIGCHLD;
169                         p->self_exec_id++;
170
171                         /* Make sure we're not reparenting to ourselves */
172                         p->p_opptr = child_reaper;
173
174                         if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
175                 }
176         }
177         read_unlock(&tasklist_lock);
178 }
179
180 static inline void close_files(struct files_struct * files)
181 {
182         int i, j;
183
184         j = 0;
185         for (;;) {
186                 unsigned long set;
187                 i = j * __NFDBITS;
188                 if (i >= files->max_fdset || i >= files->max_fds)
189                         break;
190                 set = files->open_fds->fds_bits[j++];
191                 while (set) {
192                         if (set & 1) {
193                                 struct file * file = xchg(&files->fd[i], NULL);
194                                 if (file)
195                                         filp_close(file, files);
196                         }
197                         i++;
198                         set >>= 1;
199                 }
200         }
201 }
202
203 void put_files_struct(struct files_struct *files)
204 {
205         if (atomic_dec_and_test(&files->count)) {
206                 close_files(files);
207                 /*
208                  * Free the fd and fdset arrays if we expanded them.
209                  */
210                 if (files->fd != &files->fd_array[0])
211                         free_fd_array(files->fd, files->max_fds);
212                 if (files->max_fdset > __FD_SETSIZE) {
213                         free_fdset(files->open_fds, files->max_fdset);
214                         free_fdset(files->close_on_exec, files->max_fdset);
215                 }
216                 kmem_cache_free(files_cachep, files);
217         }
218 }
219
220 static inline void __exit_files(struct task_struct *tsk)
221 {
222         struct files_struct * files = tsk->files;
223
224         if (files) {
225                 task_lock(tsk);
226                 tsk->files = NULL;
227                 task_unlock(tsk);
228                 put_files_struct(files);
229         }
230 }
231
232 void exit_files(struct task_struct *tsk)
233 {
234         __exit_files(tsk);
235 }
236
237 static inline void __put_fs_struct(struct fs_struct *fs)
238 {
239         /* No need to hold fs->lock if we are killing it */
240         if (atomic_dec_and_test(&fs->count)) {
241                 dput(fs->root);
242                 mntput(fs->rootmnt);
243                 dput(fs->pwd);
244                 mntput(fs->pwdmnt);
245                 if (fs->altroot) {
246                         dput(fs->altroot);
247                         mntput(fs->altrootmnt);
248                 }
249                 kmem_cache_free(fs_cachep, fs);
250         }
251 }
252
253 void put_fs_struct(struct fs_struct *fs)
254 {
255         __put_fs_struct(fs);
256 }
257
258 static inline void __exit_fs(struct task_struct *tsk)
259 {
260         struct fs_struct * fs = tsk->fs;
261
262         if (fs) {
263                 task_lock(tsk);
264                 tsk->fs = NULL;
265                 task_unlock(tsk);
266                 __put_fs_struct(fs);
267         }
268 }
269
270 void exit_fs(struct task_struct *tsk)
271 {
272         __exit_fs(tsk);
273 }
274
275 /*
276  * We can use these to temporarily drop into
277  * "lazy TLB" mode and back.
278  */
279 struct mm_struct * start_lazy_tlb(void)
280 {
281         struct mm_struct *mm = current->mm;
282         current->mm = NULL;
283         /* active_mm is still 'mm' */
284         atomic_inc(&mm->mm_count);
285         enter_lazy_tlb(mm, current, smp_processor_id());
286         return mm;
287 }
288
289 void end_lazy_tlb(struct mm_struct *mm)
290 {
291         struct mm_struct *active_mm = current->active_mm;
292
293         current->mm = mm;
294         if (mm != active_mm) {
295                 current->active_mm = mm;
296                 activate_mm(active_mm, mm);
297         }
298         mmdrop(active_mm);
299 }
300
301 /*
302  * Turn us into a lazy TLB process if we
303  * aren't already..
304  */
305 static inline void __exit_mm(struct task_struct * tsk)
306 {
307         struct mm_struct * mm = tsk->mm;
308
309         mm_release();
310         if (mm) {
311                 atomic_inc(&mm->mm_count);
312                 BUG_ON(mm != tsk->active_mm);
313                 /* more a memory barrier than a real lock */
314                 task_lock(tsk);
315                 tsk->mm = NULL;
316                 task_unlock(tsk);
317                 enter_lazy_tlb(mm, current, smp_processor_id());
318                 mmput(mm);
319         }
320 }
321
322 void exit_mm(struct task_struct *tsk)
323 {
324         __exit_mm(tsk);
325 }
326
327 /*
328  * Send signals to all our closest relatives so that they know
329  * to properly mourn us..
330  */
331 static void exit_notify(void)
332 {
333         struct task_struct * p, *t;
334
335         forget_original_parent(current);
336         /*
337          * Check to see if any process groups have become orphaned
338          * as a result of our exiting, and if they have any stopped
339          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
340          *
341          * Case i: Our father is in a different pgrp than we are
342          * and we were the only connection outside, so our pgrp
343          * is about to become orphaned.
344          */
345          
346         t = current->p_pptr;
347         
348         if ((t->pgrp != current->pgrp) &&
349             (t->session == current->session) &&
350             will_become_orphaned_pgrp(current->pgrp, current) &&
351             has_stopped_jobs(current->pgrp)) {
352                 kill_pg(current->pgrp,SIGHUP,1);
353                 kill_pg(current->pgrp,SIGCONT,1);
354         }
355
356         /* Let father know we died 
357          *
358          * Thread signals are configurable, but you aren't going to use
359          * that to send signals to arbitary processes. 
360          * That stops right now.
361          *
362          * If the parent exec id doesn't match the exec id we saved
363          * when we started then we know the parent has changed security
364          * domain.
365          *
366          * If our self_exec id doesn't match our parent_exec_id then
367          * we have changed execution domain as these two values started
368          * the same after a fork.
369          *      
370          */
371         
372         if(current->exit_signal != SIGCHLD &&
373             ( current->parent_exec_id != t->self_exec_id  ||
374               current->self_exec_id != current->parent_exec_id) 
375             && !capable(CAP_KILL))
376                 current->exit_signal = SIGCHLD;
377
378
379         /*
380          * This loop does two things:
381          *
382          * A.  Make init inherit all the child processes
383          * B.  Check to see if any process groups have become orphaned
384          *      as a result of our exiting, and if they have any stopped
385          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
386          */
387
388         write_lock_irq(&tasklist_lock);
389         current->state = TASK_ZOMBIE;
390         do_notify_parent(current, current->exit_signal);
391         while (current->p_cptr != NULL) {
392                 p = current->p_cptr;
393                 current->p_cptr = p->p_osptr;
394                 p->p_ysptr = NULL;
395                 p->ptrace = 0;
396
397                 p->p_pptr = p->p_opptr;
398                 p->p_osptr = p->p_pptr->p_cptr;
399                 if (p->p_osptr)
400                         p->p_osptr->p_ysptr = p;
401                 p->p_pptr->p_cptr = p;
402                 if (p->state == TASK_ZOMBIE)
403                         do_notify_parent(p, p->exit_signal);
404                 /*
405                  * process group orphan check
406                  * Case ii: Our child is in a different pgrp
407                  * than we are, and it was the only connection
408                  * outside, so the child pgrp is now orphaned.
409                  */
410                 if ((p->pgrp != current->pgrp) &&
411                     (p->session == current->session)) {
412                         int pgrp = p->pgrp;
413
414                         write_unlock_irq(&tasklist_lock);
415                         if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
416                                 kill_pg(pgrp,SIGHUP,1);
417                                 kill_pg(pgrp,SIGCONT,1);
418                         }
419                         write_lock_irq(&tasklist_lock);
420                 }
421         }
422         write_unlock_irq(&tasklist_lock);
423 }
424
425 NORET_TYPE void do_exit(long code)
426 {
427         struct task_struct *tsk = current;
428
429         if (in_interrupt())
430                 panic("Aiee, killing interrupt handler!");
431         if (!tsk->pid)
432                 panic("Attempted to kill the idle task!");
433         if (tsk->pid == 1)
434                 panic("Attempted to kill init!");
435         tsk->flags |= PF_EXITING;
436         del_timer_sync(&tsk->real_timer);
437
438 fake_volatile:
439 #ifdef CONFIG_BSD_PROCESS_ACCT
440         acct_process(code);
441 #endif
442         __exit_mm(tsk);
443
444         lock_kernel();
445         sem_exit();
446         __exit_files(tsk);
447         __exit_fs(tsk);
448         exit_namespace(tsk);
449         exit_sighand(tsk);
450         exit_thread();
451
452         if (current->leader)
453                 disassociate_ctty(1);
454
455         put_exec_domain(tsk->exec_domain);
456         if (tsk->binfmt && tsk->binfmt->module)
457                 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
458
459         tsk->exit_code = code;
460         exit_notify();
461         schedule();
462         BUG();
463 /*
464  * In order to get rid of the "volatile function does return" message
465  * I did this little loop that confuses gcc to think do_exit really
466  * is volatile. In fact it's schedule() that is volatile in some
467  * circumstances: when current->state = ZOMBIE, schedule() never
468  * returns.
469  *
470  * In fact the natural way to do all this is to have the label and the
471  * goto right after each other, but I put the fake_volatile label at
472  * the start of the function just in case something /really/ bad
473  * happens, and the schedule returns. This way we can try again. I'm
474  * not paranoid: it's just that everybody is out to get me.
475  */
476         goto fake_volatile;
477 }
478
479 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
480 {
481         if (comp)
482                 complete(comp);
483         
484         do_exit(code);
485 }
486
487 asmlinkage long sys_exit(int error_code)
488 {
489         do_exit((error_code&0xff)<<8);
490 }
491
492 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
493 {
494         int flag, retval;
495         DECLARE_WAITQUEUE(wait, current);
496         struct task_struct *tsk;
497
498         if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
499                 return -EINVAL;
500
501         add_wait_queue(&current->wait_chldexit,&wait);
502 repeat:
503         flag = 0;
504         current->state = TASK_INTERRUPTIBLE;
505         read_lock(&tasklist_lock);
506         tsk = current;
507         do {
508                 struct task_struct *p;
509                 for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
510                         if (pid>0) {
511                                 if (p->pid != pid)
512                                         continue;
513                         } else if (!pid) {
514                                 if (p->pgrp != current->pgrp)
515                                         continue;
516                         } else if (pid != -1) {
517                                 if (p->pgrp != -pid)
518                                         continue;
519                         }
520                         /* Wait for all children (clone and not) if __WALL is set;
521                          * otherwise, wait for clone children *only* if __WCLONE is
522                          * set; otherwise, wait for non-clone children *only*.  (Note:
523                          * A "clone" child here is one that reports to its parent
524                          * using a signal other than SIGCHLD.) */
525                         if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
526                             && !(options & __WALL))
527                                 continue;
528                         flag = 1;
529                         switch (p->state) {
530                         case TASK_STOPPED:
531                                 if (!p->exit_code)
532                                         continue;
533                                 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
534                                         continue;
535                                 read_unlock(&tasklist_lock);
536                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
537                                 if (!retval && stat_addr) 
538                                         retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
539                                 if (!retval) {
540                                         p->exit_code = 0;
541                                         retval = p->pid;
542                                 }
543                                 goto end_wait4;
544                         case TASK_ZOMBIE:
545                                 current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
546                                 current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
547                                 read_unlock(&tasklist_lock);
548                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
549                                 if (!retval && stat_addr)
550                                         retval = put_user(p->exit_code, stat_addr);
551                                 if (retval)
552                                         goto end_wait4; 
553                                 retval = p->pid;
554                                 if (p->p_opptr != p->p_pptr) {
555                                         write_lock_irq(&tasklist_lock);
556                                         REMOVE_LINKS(p);
557                                         p->p_pptr = p->p_opptr;
558                                         SET_LINKS(p);
559                                         do_notify_parent(p, SIGCHLD);
560                                         write_unlock_irq(&tasklist_lock);
561                                 } else
562                                         release_task(p);
563                                 goto end_wait4;
564                         default:
565                                 continue;
566                         }
567                 }
568                 if (options & __WNOTHREAD)
569                         break;
570                 tsk = next_thread(tsk);
571         } while (tsk != current);
572         read_unlock(&tasklist_lock);
573         if (flag) {
574                 retval = 0;
575                 if (options & WNOHANG)
576                         goto end_wait4;
577                 retval = -ERESTARTSYS;
578                 if (signal_pending(current))
579                         goto end_wait4;
580                 schedule();
581                 goto repeat;
582         }
583         retval = -ECHILD;
584 end_wait4:
585         current->state = TASK_RUNNING;
586         remove_wait_queue(&current->wait_chldexit,&wait);
587         return retval;
588 }
589
590 #if !defined(__alpha__) && !defined(__ia64__)
591
592 /*
593  * sys_waitpid() remains for compatibility. waitpid() should be
594  * implemented by calling sys_wait4() from libc.a.
595  */
596 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
597 {
598         return sys_wait4(pid, stat_addr, options, NULL);
599 }
600
601 #endif