import of upstream 2.4.34.4 from kernel.org
[linux-2.4.git] / fs / proc / array.c
1 /*
2  *  linux/fs/proc/array.c
3  *
4  *  Copyright (C) 1992  by Linus Torvalds
5  *  based on ideas by Darren Senn
6  *
7  * Fixes:
8  * Michael. K. Johnson: stat,statm extensions.
9  *                      <johnsonm@stolaf.edu>
10  *
11  * Pauline Middelink :  Made cmdline,envline only break at '\0's, to
12  *                      make sure SET_PROCTITLE works. Also removed
13  *                      bad '!' which forced address recalculation for
14  *                      EVERY character on the current page.
15  *                      <middelin@polyware.iaf.nl>
16  *
17  * Danny ter Haar    :  added cpuinfo
18  *                      <dth@cistron.nl>
19  *
20  * Alessandro Rubini :  profile extension.
21  *                      <rubini@ipvvis.unipv.it>
22  *
23  * Jeff Tranter      :  added BogoMips field to cpuinfo
24  *                      <Jeff_Tranter@Mitel.COM>
25  *
26  * Bruno Haible      :  remove 4K limit for the maps file
27  *                      <haible@ma2s2.mathematik.uni-karlsruhe.de>
28  *
29  * Yves Arrouye      :  remove removal of trailing spaces in get_array.
30  *                      <Yves.Arrouye@marin.fdn.fr>
31  *
32  * Jerome Forissier  :  added per-CPU time information to /proc/stat
33  *                      and /proc/<pid>/cpu extension
34  *                      <forissier@isia.cma.fr>
35  *                      - Incorporation and non-SMP safe operation
36  *                      of forissier patch in 2.1.78 by
37  *                      Hans Marcus <crowbar@concepts.nl>
38  *
39  * aeb@cwi.nl        :  /proc/partitions
40  *
41  *
42  * Alan Cox          :  security fixes.
43  *                      <Alan.Cox@linux.org>
44  *
45  * Al Viro           :  safe handling of mm_struct
46  *
47  * Gerhard Wichert   :  added BIGMEM support
48  * Siemens AG           <Gerhard.Wichert@pdb.siemens.de>
49  *
50  * Al Viro & Jeff Garzik :  moved most of the thing into base.c and
51  *                       :  proc_misc.c. The rest may eventually go into
52  *                       :  base.c too.
53  */
54
55 #include <linux/config.h>
56 #include <linux/types.h>
57 #include <linux/errno.h>
58 #include <linux/sched.h>
59 #include <linux/kernel.h>
60 #include <linux/kernel_stat.h>
61 #include <linux/tty.h>
62 #include <linux/string.h>
63 #include <linux/mman.h>
64 #include <linux/proc_fs.h>
65 #include <linux/ioport.h>
66 #include <linux/mm.h>
67 #include <linux/pagemap.h>
68 #include <linux/swap.h>
69 #include <linux/slab.h>
70 #include <linux/smp.h>
71 #include <linux/signal.h>
72 #include <linux/highmem.h>
73 #include <linux/seq_file.h>
74
75 #include <asm/uaccess.h>
76 #include <asm/pgtable.h>
77 #include <asm/io.h>
78 #include <asm/processor.h>
79
80 /* Gcc optimizes away "strlen(x)" for constant x */
81 #define ADDBUF(buffer, string) \
82 do { memcpy(buffer, string, strlen(string)); \
83      buffer += strlen(string); } while (0)
84
85 static inline char * task_name(struct task_struct *p, char * buf)
86 {
87         int i;
88         char * name;
89         char tcomm[sizeof(p->comm)];
90
91         get_task_comm(tcomm, p);
92
93         ADDBUF(buf, "Name:\t");
94         name = tcomm;
95         i = sizeof(tcomm);
96         do {
97                 unsigned char c = *name;
98                 name++;
99                 i--;
100                 *buf = c;
101                 if (!c)
102                         break;
103                 if (c == '\\') {
104                         buf[1] = c;
105                         buf += 2;
106                         continue;
107                 }
108                 if (c == '\n') {
109                         buf[0] = '\\';
110                         buf[1] = 'n';
111                         buf += 2;
112                         continue;
113                 }
114                 buf++;
115         } while (i);
116         *buf = '\n';
117         return buf+1;
118 }
119
120 /*
121  * The task state array is a strange "bitmap" of
122  * reasons to sleep. Thus "running" is zero, and
123  * you can test for combinations of others with
124  * simple bit tests.
125  */
126 static const char *task_state_array[] = {
127         "R (running)",          /*  0 */
128         "S (sleeping)",         /*  1 */
129         "D (disk sleep)",       /*  2 */
130         "Z (zombie)",           /*  4 */
131         "T (stopped)",          /*  8 */
132         "W (paging)"            /* 16 */
133 };
134
135 static inline const char * get_task_state(struct task_struct *tsk)
136 {
137         unsigned int state = tsk->state & (TASK_RUNNING |
138                                            TASK_INTERRUPTIBLE |
139                                            TASK_UNINTERRUPTIBLE |
140                                            TASK_ZOMBIE |
141                                            TASK_STOPPED);
142         const char **p = &task_state_array[0];
143
144         while (state) {
145                 p++;
146                 state >>= 1;
147         }
148         return *p;
149 }
150
151 static inline char * task_state(struct task_struct *p, char *buffer)
152 {
153         int g;
154
155         read_lock(&tasklist_lock);
156         buffer += sprintf(buffer,
157                 "State:\t%s\n"
158                 "Tgid:\t%d\n"
159                 "Pid:\t%d\n"
160                 "PPid:\t%d\n"
161                 "TracerPid:\t%d\n"
162                 "Uid:\t%d\t%d\t%d\t%d\n"
163                 "Gid:\t%d\t%d\t%d\t%d\n",
164                 get_task_state(p), p->tgid,
165                 p->pid, p->pid ? p->p_opptr->pid : 0, 0,
166                 p->uid, p->euid, p->suid, p->fsuid,
167                 p->gid, p->egid, p->sgid, p->fsgid);
168         read_unlock(&tasklist_lock);    
169         task_lock(p);
170         buffer += sprintf(buffer,
171                 "FDSize:\t%d\n"
172                 "Groups:\t",
173                 p->files ? p->files->max_fds : 0);
174         task_unlock(p);
175
176         for (g = 0; g < p->ngroups; g++)
177                 buffer += sprintf(buffer, "%d ", p->groups[g]);
178
179         buffer += sprintf(buffer, "\n");
180         return buffer;
181 }
182
183 static inline char * task_mem(struct mm_struct *mm, char *buffer)
184 {
185         struct vm_area_struct * vma;
186         unsigned long data = 0, stack = 0;
187         unsigned long exec = 0, lib = 0;
188
189         down_read(&mm->mmap_sem);
190         for (vma = mm->mmap; vma; vma = vma->vm_next) {
191                 unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
192                 if (!vma->vm_file) {
193                         data += len;
194                         if (vma->vm_flags & VM_GROWSDOWN)
195                                 stack += len;
196                         continue;
197                 }
198                 if (vma->vm_flags & VM_WRITE)
199                         continue;
200                 if (vma->vm_flags & VM_EXEC) {
201                         exec += len;
202                         if (vma->vm_flags & VM_EXECUTABLE)
203                                 continue;
204                         lib += len;
205                 }
206         }
207         buffer += sprintf(buffer,
208                 "VmSize:\t%8lu kB\n"
209                 "VmLck:\t%8lu kB\n"
210                 "VmRSS:\t%8lu kB\n"
211                 "VmData:\t%8lu kB\n"
212                 "VmStk:\t%8lu kB\n"
213                 "VmExe:\t%8lu kB\n"
214                 "VmLib:\t%8lu kB\n",
215                 mm->total_vm << (PAGE_SHIFT-10),
216                 mm->locked_vm << (PAGE_SHIFT-10),
217                 mm->rss << (PAGE_SHIFT-10),
218                 data - stack, stack,
219                 exec - lib, lib);
220         up_read(&mm->mmap_sem);
221         return buffer;
222 }
223
224 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
225                                     sigset_t *catch)
226 {
227         struct k_sigaction *k;
228         int i;
229
230         sigemptyset(ign);
231         sigemptyset(catch);
232
233         spin_lock_irq(&p->sigmask_lock);
234
235         if (p->sig) {
236                 k = p->sig->action;
237                 for (i = 1; i <= _NSIG; ++i, ++k) {
238                         if (k->sa.sa_handler == SIG_IGN)
239                                 sigaddset(ign, i);
240                         else if (k->sa.sa_handler != SIG_DFL)
241                                 sigaddset(catch, i);
242                 }
243         }
244         spin_unlock_irq(&p->sigmask_lock);
245 }
246
247 static inline char * task_sig(struct task_struct *p, char *buffer)
248 {
249         sigset_t ign, catch;
250
251         buffer += sprintf(buffer, "SigPnd:\t");
252         buffer = render_sigset_t(&p->pending.signal, buffer);
253         *buffer++ = '\n';
254         buffer += sprintf(buffer, "SigBlk:\t");
255         buffer = render_sigset_t(&p->blocked, buffer);
256         *buffer++ = '\n';
257
258         collect_sigign_sigcatch(p, &ign, &catch);
259         buffer += sprintf(buffer, "SigIgn:\t");
260         buffer = render_sigset_t(&ign, buffer);
261         *buffer++ = '\n';
262         buffer += sprintf(buffer, "SigCgt:\t"); /* Linux 2.0 uses "SigCgt" */
263         buffer = render_sigset_t(&catch, buffer);
264         *buffer++ = '\n';
265
266         return buffer;
267 }
268
269 static inline char *task_cap(struct task_struct *p, char *buffer)
270 {
271     return buffer + sprintf(buffer, "CapInh:\t%016x\n"
272                             "CapPrm:\t%016x\n"
273                             "CapEff:\t%016x\n",
274                             cap_t(p->cap_inheritable),
275                             cap_t(p->cap_permitted),
276                             cap_t(p->cap_effective));
277 }
278
279
280 int proc_pid_status(struct task_struct *task, char * buffer)
281 {
282         char * orig = buffer;
283         struct mm_struct *mm;
284
285         buffer = task_name(task, buffer);
286         buffer = task_state(task, buffer);
287         task_lock(task);
288         mm = task->mm;
289         if(mm)
290                 atomic_inc(&mm->mm_users);
291         task_unlock(task);
292         if (mm) {
293                 buffer = task_mem(mm, buffer);
294                 mmput(mm);
295         }
296         buffer = task_sig(task, buffer);
297         buffer = task_cap(task, buffer);
298 #if defined(CONFIG_ARCH_S390)
299         buffer = task_show_regs(task, buffer);
300 #endif
301         return buffer - orig;
302 }
303
304 int proc_pid_stat(struct task_struct *task, char * buffer)
305 {
306         unsigned long vsize, eip, esp, wchan;
307         long priority, nice;
308         int tty_pgrp = -1, tty_nr = 0;
309         sigset_t sigign, sigcatch;
310         char state;
311         int res;
312         pid_t ppid;
313         struct mm_struct *mm;
314         char tcomm[sizeof(task->comm)];
315
316         state = *get_task_state(task);
317         vsize = eip = esp = 0;
318         task_lock(task);
319         mm = task->mm;
320         if(mm)
321                 atomic_inc(&mm->mm_users);
322         if (task->tty) {
323                 tty_pgrp = task->tty->pgrp;
324                 tty_nr = kdev_t_to_nr(task->tty->device);
325         }
326         task_unlock(task);
327         if (mm) {
328                 struct vm_area_struct *vma;
329                 down_read(&mm->mmap_sem);
330                 vma = mm->mmap;
331                 while (vma) {
332                         vsize += vma->vm_end - vma->vm_start;
333                         vma = vma->vm_next;
334                 }
335                 eip = KSTK_EIP(task);
336                 esp = KSTK_ESP(task);
337                 up_read(&mm->mmap_sem);
338         }
339
340         get_task_comm(tcomm, task);
341
342         wchan = get_wchan(task);
343
344         collect_sigign_sigcatch(task, &sigign, &sigcatch);
345
346         /* scale priority and nice values from timeslices to -20..20 */
347         /* to make it look like a "normal" Unix priority/nice value  */
348         priority = task->counter;
349         priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
350         nice = task->nice;
351
352         read_lock(&tasklist_lock);
353         ppid = task->pid ? task->p_opptr->pid : 0;
354         read_unlock(&tasklist_lock);
355         res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
356 %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
357 %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
358                 task->pid,
359                 tcomm,
360                 state,
361                 ppid,
362                 task->pgrp,
363                 task->session,
364                 tty_nr,
365                 tty_pgrp,
366                 task->flags,
367                 task->min_flt,
368                 task->cmin_flt,
369                 task->maj_flt,
370                 task->cmaj_flt,
371                 task->times.tms_utime,
372                 task->times.tms_stime,
373                 task->times.tms_cutime,
374                 task->times.tms_cstime,
375                 priority,
376                 nice,
377                 0UL /* removed */,
378                 task->it_real_value,
379                 task->start_time,
380                 vsize,
381                 mm ? mm->rss : 0, /* you might want to shift this left 3 */
382                 task->rlim[RLIMIT_RSS].rlim_cur,
383                 mm ? mm->start_code : 0,
384                 mm ? mm->end_code : 0,
385                 mm ? mm->start_stack : 0,
386                 esp,
387                 eip,
388                 /* The signal information here is obsolete.
389                  * It must be decimal for Linux 2.0 compatibility.
390                  * Use /proc/#/status for real-time signals.
391                  */
392                 task->pending.signal.sig[0] & 0x7fffffffUL,
393                 task->blocked.sig[0] & 0x7fffffffUL,
394                 sigign      .sig[0] & 0x7fffffffUL,
395                 sigcatch    .sig[0] & 0x7fffffffUL,
396                 wchan,
397                 task->nswap,
398                 task->cnswap,
399                 task->exit_signal,
400                 task->processor);
401         if(mm)
402                 mmput(mm);
403         return res;
404 }
405                 
406 static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
407         int * pages, int * shared, int * dirty, int * total)
408 {
409         pte_t * pte;
410         unsigned long end;
411
412         if (pmd_none(*pmd))
413                 return;
414         if (pmd_bad(*pmd)) {
415                 pmd_ERROR(*pmd);
416                 pmd_clear(pmd);
417                 return;
418         }
419         pte = pte_offset(pmd, address);
420         address &= ~PMD_MASK;
421         end = address + size;
422         if (end > PMD_SIZE)
423                 end = PMD_SIZE;
424         do {
425                 pte_t page = *pte;
426                 struct page *ptpage;
427
428                 address += PAGE_SIZE;
429                 pte++;
430                 if (pte_none(page))
431                         continue;
432                 ++*total;
433                 if (!pte_present(page))
434                         continue;
435                 ptpage = pte_page(page);
436                 if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
437                         continue;
438                 ++*pages;
439                 if (pte_dirty(page))
440                         ++*dirty;
441                 if (page_count(pte_page(page)) > 1)
442                         ++*shared;
443         } while (address < end);
444 }
445
446 static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size,
447         int * pages, int * shared, int * dirty, int * total)
448 {
449         pmd_t * pmd;
450         unsigned long end;
451
452         if (pgd_none(*pgd))
453                 return;
454         if (pgd_bad(*pgd)) {
455                 pgd_ERROR(*pgd);
456                 pgd_clear(pgd);
457                 return;
458         }
459         pmd = pmd_offset(pgd, address);
460         address &= ~PGDIR_MASK;
461         end = address + size;
462         if (end > PGDIR_SIZE)
463                 end = PGDIR_SIZE;
464         do {
465                 statm_pte_range(pmd, address, end - address, pages, shared, dirty, total);
466                 address = (address + PMD_SIZE) & PMD_MASK;
467                 pmd++;
468         } while (address < end);
469 }
470
471 static void statm_pgd_range(pgd_t * pgd, unsigned long address, unsigned long end,
472         int * pages, int * shared, int * dirty, int * total)
473 {
474         while (address < end) {
475                 statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total);
476                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
477                 pgd++;
478         }
479 }
480
481 int proc_pid_statm(struct task_struct *task, char * buffer)
482 {
483         struct mm_struct *mm;
484         int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;
485
486         task_lock(task);
487         mm = task->mm;
488         if(mm)
489                 atomic_inc(&mm->mm_users);
490         task_unlock(task);
491         if (mm) {
492                 struct vm_area_struct * vma;
493                 down_read(&mm->mmap_sem);
494                 vma = mm->mmap;
495                 while (vma) {
496                         pgd_t *pgd = pgd_offset(mm, vma->vm_start);
497                         int pages = 0, shared = 0, dirty = 0, total = 0;
498
499                         statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
500                         resident += pages;
501                         share += shared;
502                         dt += dirty;
503                         size += total;
504                         if (vma->vm_flags & VM_EXECUTABLE)
505                                 trs += pages;   /* text */
506                         else if (vma->vm_flags & VM_GROWSDOWN)
507                                 drs += pages;   /* stack */
508                         else if (vma->vm_end > 0x60000000)
509                                 lrs += pages;   /* library */
510                         else
511                                 drs += pages;
512                         vma = vma->vm_next;
513                 }
514                 up_read(&mm->mmap_sem);
515                 mmput(mm);
516         }
517         return sprintf(buffer,"%d %d %d %d %d %d %d\n",
518                        size, resident, share, trs, lrs, drs, dt);
519 }
520
521 static int show_map(struct seq_file *m, void *v)
522 {
523         struct vm_area_struct *map = v;
524         struct file *file = map->vm_file;
525         int flags = map->vm_flags;
526         unsigned long ino = 0;
527         dev_t dev = 0;
528         int len;
529
530         if (file) {
531                 struct inode *inode = map->vm_file->f_dentry->d_inode;
532                 dev = kdev_t_to_nr(inode->i_sb->s_dev);
533                 ino = inode->i_ino;
534         }
535
536         seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
537                         map->vm_start,
538                         map->vm_end,
539                         flags & VM_READ ? 'r' : '-',
540                         flags & VM_WRITE ? 'w' : '-',
541                         flags & VM_EXEC ? 'x' : '-',
542                         flags & VM_MAYSHARE ? 's' : 'p',
543                         map->vm_pgoff << PAGE_SHIFT,
544                         MAJOR(dev), MINOR(dev), ino, &len);
545
546         if (map->vm_file) {
547                 len = 25 + sizeof(void*) * 6 - len;
548                 if (len < 1)
549                         len = 1;
550                 seq_printf(m, "%*c", len, ' ');
551                 seq_path(m, file->f_vfsmnt, file->f_dentry, "");
552         }
553         seq_putc(m, '\n');
554         return 0;
555 }
556
557 static void *m_start(struct seq_file *m, loff_t *pos)
558 {
559         struct task_struct *task = m->private;
560         struct mm_struct *mm;
561         struct vm_area_struct * map;
562         loff_t l = *pos;
563
564         task_lock(task);
565         mm = task->mm;
566         if (mm)
567                 atomic_inc(&mm->mm_users);
568         task_unlock(task);
569
570         if (!mm)
571                 return NULL;
572
573         down_read(&mm->mmap_sem);
574         map = mm->mmap;
575         while (l-- && map)
576                 map = map->vm_next;
577         if (!map) {
578                 up_read(&mm->mmap_sem);
579                 mmput(mm);
580         }
581         return map;
582 }
583
584 static void m_stop(struct seq_file *m, void *v)
585 {
586         struct vm_area_struct *map = v;
587         if (map) {
588                 struct mm_struct *mm = map->vm_mm;
589                 up_read(&mm->mmap_sem);
590                 mmput(mm);
591         }
592 }
593
594 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
595 {
596         struct vm_area_struct *map = v;
597         (*pos)++;
598         if (map->vm_next)
599                 return map->vm_next;
600         m_stop(m, v);
601         return NULL;
602 }
603
604 struct seq_operations proc_pid_maps_op = {
605         .start  = m_start,
606         .next   = m_next,
607         .stop   = m_stop,
608         .show   = show_map
609 };
610
611 #ifdef CONFIG_SMP
612 int proc_pid_cpu(struct task_struct *task, char * buffer)
613 {
614         int i, len;
615
616         len = sprintf(buffer,
617                 "cpu  %lu %lu\n",
618                 task->times.tms_utime,
619                 task->times.tms_stime);
620                 
621         for (i = 0 ; i < smp_num_cpus; i++)
622                 len += sprintf(buffer + len, "cpu%d %lu %lu\n",
623                         i,
624                         task->per_cpu_utime[cpu_logical_map(i)],
625                         task->per_cpu_stime[cpu_logical_map(i)]);
626
627         return len;
628 }
629 #endif