make oldconfig will rebuild these...
[linux-2.4.21-pre4.git] / fs / proc / array.c
1 /*
2  *  linux/fs/proc/array.c
3  *
4  *  Copyright (C) 1992  by Linus Torvalds
5  *  based on ideas by Darren Senn
6  *
7  * Fixes:
8  * Michael. K. Johnson: stat,statm extensions.
9  *                      <johnsonm@stolaf.edu>
10  *
11  * Pauline Middelink :  Made cmdline,envline only break at '\0's, to
12  *                      make sure SET_PROCTITLE works. Also removed
13  *                      bad '!' which forced address recalculation for
14  *                      EVERY character on the current page.
15  *                      <middelin@polyware.iaf.nl>
16  *
17  * Danny ter Haar    :  added cpuinfo
18  *                      <dth@cistron.nl>
19  *
20  * Alessandro Rubini :  profile extension.
21  *                      <rubini@ipvvis.unipv.it>
22  *
23  * Jeff Tranter      :  added BogoMips field to cpuinfo
24  *                      <Jeff_Tranter@Mitel.COM>
25  *
26  * Bruno Haible      :  remove 4K limit for the maps file
27  *                      <haible@ma2s2.mathematik.uni-karlsruhe.de>
28  *
29  * Yves Arrouye      :  remove removal of trailing spaces in get_array.
30  *                      <Yves.Arrouye@marin.fdn.fr>
31  *
32  * Jerome Forissier  :  added per-CPU time information to /proc/stat
33  *                      and /proc/<pid>/cpu extension
34  *                      <forissier@isia.cma.fr>
35  *                      - Incorporation and non-SMP safe operation
36  *                      of forissier patch in 2.1.78 by
37  *                      Hans Marcus <crowbar@concepts.nl>
38  *
39  * aeb@cwi.nl        :  /proc/partitions
40  *
41  *
42  * Alan Cox          :  security fixes.
43  *                      <Alan.Cox@linux.org>
44  *
45  * Al Viro           :  safe handling of mm_struct
46  *
47  * Gerhard Wichert   :  added BIGMEM support
48  * Siemens AG           <Gerhard.Wichert@pdb.siemens.de>
49  *
50  * Al Viro & Jeff Garzik :  moved most of the thing into base.c and
51  *                       :  proc_misc.c. The rest may eventually go into
52  *                       :  base.c too.
53  */
54
55 #include <linux/config.h>
56 #include <linux/types.h>
57 #include <linux/errno.h>
58 #include <linux/sched.h>
59 #include <linux/kernel.h>
60 #include <linux/kernel_stat.h>
61 #include <linux/tty.h>
62 #include <linux/string.h>
63 #include <linux/mman.h>
64 #include <linux/proc_fs.h>
65 #include <linux/ioport.h>
66 #include <linux/mm.h>
67 #include <linux/pagemap.h>
68 #include <linux/swap.h>
69 #include <linux/slab.h>
70 #include <linux/smp.h>
71 #include <linux/signal.h>
72 #include <linux/highmem.h>
73
74 #include <asm/uaccess.h>
75 #include <asm/pgtable.h>
76 #include <asm/io.h>
77 #include <asm/processor.h>
78
79 /* Gcc optimizes away "strlen(x)" for constant x */
80 #define ADDBUF(buffer, string) \
81 do { memcpy(buffer, string, strlen(string)); \
82      buffer += strlen(string); } while (0)
83
84 static inline char * task_name(struct task_struct *p, char * buf)
85 {
86         int i;
87         char * name;
88
89         ADDBUF(buf, "Name:\t");
90         name = p->comm;
91         i = sizeof(p->comm);
92         do {
93                 unsigned char c = *name;
94                 name++;
95                 i--;
96                 *buf = c;
97                 if (!c)
98                         break;
99                 if (c == '\\') {
100                         buf[1] = c;
101                         buf += 2;
102                         continue;
103                 }
104                 if (c == '\n') {
105                         buf[0] = '\\';
106                         buf[1] = 'n';
107                         buf += 2;
108                         continue;
109                 }
110                 buf++;
111         } while (i);
112         *buf = '\n';
113         return buf+1;
114 }
115
116 /*
117  * The task state array is a strange "bitmap" of
118  * reasons to sleep. Thus "running" is zero, and
119  * you can test for combinations of others with
120  * simple bit tests.
121  */
122 static const char *task_state_array[] = {
123         "R (running)",          /*  0 */
124         "S (sleeping)",         /*  1 */
125         "D (disk sleep)",       /*  2 */
126         "Z (zombie)",           /*  4 */
127         "T (stopped)",          /*  8 */
128         "W (paging)"            /* 16 */
129 };
130
131 static inline const char * get_task_state(struct task_struct *tsk)
132 {
133         unsigned int state = tsk->state & (TASK_RUNNING |
134                                            TASK_INTERRUPTIBLE |
135                                            TASK_UNINTERRUPTIBLE |
136                                            TASK_ZOMBIE |
137                                            TASK_STOPPED);
138         const char **p = &task_state_array[0];
139
140         while (state) {
141                 p++;
142                 state >>= 1;
143         }
144         return *p;
145 }
146
147 static inline char * task_state(struct task_struct *p, char *buffer)
148 {
149         int g;
150
151         read_lock(&tasklist_lock);
152         buffer += sprintf(buffer,
153                 "State:\t%s\n"
154                 "Tgid:\t%d\n"
155                 "Pid:\t%d\n"
156                 "PPid:\t%d\n"
157                 "TracerPid:\t%d\n"
158                 "Uid:\t%d\t%d\t%d\t%d\n"
159                 "Gid:\t%d\t%d\t%d\t%d\n",
160                 get_task_state(p), p->tgid,
161                 p->pid, p->pid ? p->p_opptr->pid : 0, 0,
162                 p->uid, p->euid, p->suid, p->fsuid,
163                 p->gid, p->egid, p->sgid, p->fsgid);
164         read_unlock(&tasklist_lock);    
165         task_lock(p);
166         buffer += sprintf(buffer,
167                 "FDSize:\t%d\n"
168                 "Groups:\t",
169                 p->files ? p->files->max_fds : 0);
170         task_unlock(p);
171
172         for (g = 0; g < p->ngroups; g++)
173                 buffer += sprintf(buffer, "%d ", p->groups[g]);
174
175         buffer += sprintf(buffer, "\n");
176         return buffer;
177 }
178
179 static inline char * task_mem(struct mm_struct *mm, char *buffer)
180 {
181         struct vm_area_struct * vma;
182         unsigned long data = 0, stack = 0;
183         unsigned long exec = 0, lib = 0;
184
185         down_read(&mm->mmap_sem);
186         for (vma = mm->mmap; vma; vma = vma->vm_next) {
187                 unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
188                 if (!vma->vm_file) {
189                         data += len;
190                         if (vma->vm_flags & VM_GROWSDOWN)
191                                 stack += len;
192                         continue;
193                 }
194                 if (vma->vm_flags & VM_WRITE)
195                         continue;
196                 if (vma->vm_flags & VM_EXEC) {
197                         exec += len;
198                         if (vma->vm_flags & VM_EXECUTABLE)
199                                 continue;
200                         lib += len;
201                 }
202         }
203         buffer += sprintf(buffer,
204                 "VmSize:\t%8lu kB\n"
205                 "VmLck:\t%8lu kB\n"
206                 "VmRSS:\t%8lu kB\n"
207                 "VmData:\t%8lu kB\n"
208                 "VmStk:\t%8lu kB\n"
209                 "VmExe:\t%8lu kB\n"
210                 "VmLib:\t%8lu kB\n",
211                 mm->total_vm << (PAGE_SHIFT-10),
212                 mm->locked_vm << (PAGE_SHIFT-10),
213                 mm->rss << (PAGE_SHIFT-10),
214                 data - stack, stack,
215                 exec - lib, lib);
216         up_read(&mm->mmap_sem);
217         return buffer;
218 }
219
220 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
221                                     sigset_t *catch)
222 {
223         struct k_sigaction *k;
224         int i;
225
226         sigemptyset(ign);
227         sigemptyset(catch);
228
229         spin_lock_irq(&p->sigmask_lock);
230
231         if (p->sig) {
232                 k = p->sig->action;
233                 for (i = 1; i <= _NSIG; ++i, ++k) {
234                         if (k->sa.sa_handler == SIG_IGN)
235                                 sigaddset(ign, i);
236                         else if (k->sa.sa_handler != SIG_DFL)
237                                 sigaddset(catch, i);
238                 }
239         }
240         spin_unlock_irq(&p->sigmask_lock);
241 }
242
243 static inline char * task_sig(struct task_struct *p, char *buffer)
244 {
245         sigset_t ign, catch;
246
247         buffer += sprintf(buffer, "SigPnd:\t");
248         buffer = render_sigset_t(&p->pending.signal, buffer);
249         *buffer++ = '\n';
250         buffer += sprintf(buffer, "SigBlk:\t");
251         buffer = render_sigset_t(&p->blocked, buffer);
252         *buffer++ = '\n';
253
254         collect_sigign_sigcatch(p, &ign, &catch);
255         buffer += sprintf(buffer, "SigIgn:\t");
256         buffer = render_sigset_t(&ign, buffer);
257         *buffer++ = '\n';
258         buffer += sprintf(buffer, "SigCgt:\t"); /* Linux 2.0 uses "SigCgt" */
259         buffer = render_sigset_t(&catch, buffer);
260         *buffer++ = '\n';
261
262         return buffer;
263 }
264
265 static inline char *task_cap(struct task_struct *p, char *buffer)
266 {
267     return buffer + sprintf(buffer, "CapInh:\t%016x\n"
268                             "CapPrm:\t%016x\n"
269                             "CapEff:\t%016x\n",
270                             cap_t(p->cap_inheritable),
271                             cap_t(p->cap_permitted),
272                             cap_t(p->cap_effective));
273 }
274
275
276 int proc_pid_status(struct task_struct *task, char * buffer)
277 {
278         char * orig = buffer;
279         struct mm_struct *mm;
280
281         buffer = task_name(task, buffer);
282         buffer = task_state(task, buffer);
283         task_lock(task);
284         mm = task->mm;
285         if(mm)
286                 atomic_inc(&mm->mm_users);
287         task_unlock(task);
288         if (mm) {
289                 buffer = task_mem(mm, buffer);
290                 mmput(mm);
291         }
292         buffer = task_sig(task, buffer);
293         buffer = task_cap(task, buffer);
294 #if defined(CONFIG_ARCH_S390)
295         buffer = task_show_regs(task, buffer);
296 #endif
297         return buffer - orig;
298 }
299
300 int proc_pid_stat(struct task_struct *task, char * buffer)
301 {
302         unsigned long vsize, eip, esp, wchan;
303         long priority, nice;
304         int tty_pgrp = -1, tty_nr = 0;
305         sigset_t sigign, sigcatch;
306         char state;
307         int res;
308         pid_t ppid;
309         struct mm_struct *mm;
310
311         state = *get_task_state(task);
312         vsize = eip = esp = 0;
313         task_lock(task);
314         mm = task->mm;
315         if(mm)
316                 atomic_inc(&mm->mm_users);
317         if (task->tty) {
318                 tty_pgrp = task->tty->pgrp;
319                 tty_nr = kdev_t_to_nr(task->tty->device);
320         }
321         task_unlock(task);
322         if (mm) {
323                 struct vm_area_struct *vma;
324                 down_read(&mm->mmap_sem);
325                 vma = mm->mmap;
326                 while (vma) {
327                         vsize += vma->vm_end - vma->vm_start;
328                         vma = vma->vm_next;
329                 }
330                 eip = KSTK_EIP(task);
331                 esp = KSTK_ESP(task);
332                 up_read(&mm->mmap_sem);
333         }
334
335         wchan = get_wchan(task);
336
337         collect_sigign_sigcatch(task, &sigign, &sigcatch);
338
339         /* scale priority and nice values from timeslices to -20..20 */
340         /* to make it look like a "normal" Unix priority/nice value  */
341         priority = task->counter;
342         priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
343         nice = task->nice;
344
345         read_lock(&tasklist_lock);
346         ppid = task->pid ? task->p_opptr->pid : 0;
347         read_unlock(&tasklist_lock);
348         res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
349 %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
350 %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
351                 task->pid,
352                 task->comm,
353                 state,
354                 ppid,
355                 task->pgrp,
356                 task->session,
357                 tty_nr,
358                 tty_pgrp,
359                 task->flags,
360                 task->min_flt,
361                 task->cmin_flt,
362                 task->maj_flt,
363                 task->cmaj_flt,
364                 task->times.tms_utime,
365                 task->times.tms_stime,
366                 task->times.tms_cutime,
367                 task->times.tms_cstime,
368                 priority,
369                 nice,
370                 0UL /* removed */,
371                 task->it_real_value,
372                 task->start_time,
373                 vsize,
374                 mm ? mm->rss : 0, /* you might want to shift this left 3 */
375                 task->rlim[RLIMIT_RSS].rlim_cur,
376                 mm ? mm->start_code : 0,
377                 mm ? mm->end_code : 0,
378                 mm ? mm->start_stack : 0,
379                 esp,
380                 eip,
381                 /* The signal information here is obsolete.
382                  * It must be decimal for Linux 2.0 compatibility.
383                  * Use /proc/#/status for real-time signals.
384                  */
385                 task->pending.signal.sig[0] & 0x7fffffffUL,
386                 task->blocked.sig[0] & 0x7fffffffUL,
387                 sigign      .sig[0] & 0x7fffffffUL,
388                 sigcatch    .sig[0] & 0x7fffffffUL,
389                 wchan,
390                 task->nswap,
391                 task->cnswap,
392                 task->exit_signal,
393                 task->processor);
394         if(mm)
395                 mmput(mm);
396         return res;
397 }
398                 
399 static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
400         int * pages, int * shared, int * dirty, int * total)
401 {
402         pte_t * pte;
403         unsigned long end;
404
405         if (pmd_none(*pmd))
406                 return;
407         if (pmd_bad(*pmd)) {
408                 pmd_ERROR(*pmd);
409                 pmd_clear(pmd);
410                 return;
411         }
412         pte = pte_offset(pmd, address);
413         address &= ~PMD_MASK;
414         end = address + size;
415         if (end > PMD_SIZE)
416                 end = PMD_SIZE;
417         do {
418                 pte_t page = *pte;
419                 struct page *ptpage;
420
421                 address += PAGE_SIZE;
422                 pte++;
423                 if (pte_none(page))
424                         continue;
425                 ++*total;
426                 if (!pte_present(page))
427                         continue;
428                 ptpage = pte_page(page);
429                 if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
430                         continue;
431                 ++*pages;
432                 if (pte_dirty(page))
433                         ++*dirty;
434                 if (page_count(pte_page(page)) > 1)
435                         ++*shared;
436         } while (address < end);
437 }
438
439 static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size,
440         int * pages, int * shared, int * dirty, int * total)
441 {
442         pmd_t * pmd;
443         unsigned long end;
444
445         if (pgd_none(*pgd))
446                 return;
447         if (pgd_bad(*pgd)) {
448                 pgd_ERROR(*pgd);
449                 pgd_clear(pgd);
450                 return;
451         }
452         pmd = pmd_offset(pgd, address);
453         address &= ~PGDIR_MASK;
454         end = address + size;
455         if (end > PGDIR_SIZE)
456                 end = PGDIR_SIZE;
457         do {
458                 statm_pte_range(pmd, address, end - address, pages, shared, dirty, total);
459                 address = (address + PMD_SIZE) & PMD_MASK;
460                 pmd++;
461         } while (address < end);
462 }
463
464 static void statm_pgd_range(pgd_t * pgd, unsigned long address, unsigned long end,
465         int * pages, int * shared, int * dirty, int * total)
466 {
467         while (address < end) {
468                 statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total);
469                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
470                 pgd++;
471         }
472 }
473
474 int proc_pid_statm(struct task_struct *task, char * buffer)
475 {
476         struct mm_struct *mm;
477         int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;
478
479         task_lock(task);
480         mm = task->mm;
481         if(mm)
482                 atomic_inc(&mm->mm_users);
483         task_unlock(task);
484         if (mm) {
485                 struct vm_area_struct * vma;
486                 down_read(&mm->mmap_sem);
487                 vma = mm->mmap;
488                 while (vma) {
489                         pgd_t *pgd = pgd_offset(mm, vma->vm_start);
490                         int pages = 0, shared = 0, dirty = 0, total = 0;
491
492                         statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
493                         resident += pages;
494                         share += shared;
495                         dt += dirty;
496                         size += total;
497                         if (vma->vm_flags & VM_EXECUTABLE)
498                                 trs += pages;   /* text */
499                         else if (vma->vm_flags & VM_GROWSDOWN)
500                                 drs += pages;   /* stack */
501                         else if (vma->vm_end > 0x60000000)
502                                 lrs += pages;   /* library */
503                         else
504                                 drs += pages;
505                         vma = vma->vm_next;
506                 }
507                 up_read(&mm->mmap_sem);
508                 mmput(mm);
509         }
510         return sprintf(buffer,"%d %d %d %d %d %d %d\n",
511                        size, resident, share, trs, lrs, drs, dt);
512 }
513
514 /*
515  * The way we support synthetic files > 4K
516  * - without storing their contents in some buffer and
517  * - without walking through the entire synthetic file until we reach the
518  *   position of the requested data
519  * is to cleverly encode the current position in the file's f_pos field.
520  * There is no requirement that a read() call which returns `count' bytes
521  * of data increases f_pos by exactly `count'.
522  *
523  * This idea is Linus' one. Bruno implemented it.
524  */
525
526 /*
527  * For the /proc/<pid>/maps file, we use fixed length records, each containing
528  * a single line.
529  *
530  * f_pos = (number of the vma in the task->mm->mmap list) * PAGE_SIZE
531  *         + (index into the line)
532  */
533 /* for systems with sizeof(void*) == 4: */
534 #define MAPS_LINE_FORMAT4         "%08lx-%08lx %s %08lx %s %lu"
535 #define MAPS_LINE_MAX4  49 /* sum of 8  1  8  1 4 1 8 1 5 1 10 1 */
536
537 /* for systems with sizeof(void*) == 8: */
538 #define MAPS_LINE_FORMAT8         "%016lx-%016lx %s %016lx %s %lu"
539 #define MAPS_LINE_MAX8  73 /* sum of 16  1  16  1 4 1 16 1 5 1 10 1 */
540
541 #define MAPS_LINE_FORMAT        (sizeof(void*) == 4 ? MAPS_LINE_FORMAT4 : MAPS_LINE_FORMAT8)
542 #define MAPS_LINE_MAX   (sizeof(void*) == 4 ?  MAPS_LINE_MAX4 :  MAPS_LINE_MAX8)
543
544 static int proc_pid_maps_get_line (char *buf, struct vm_area_struct *map)
545 {
546         /* produce the next line */
547         char *line;
548         char str[5];
549         int flags;
550         kdev_t dev;
551         unsigned long ino;
552         int len;
553
554         flags = map->vm_flags;
555
556         str[0] = flags & VM_READ ? 'r' : '-';
557         str[1] = flags & VM_WRITE ? 'w' : '-';
558         str[2] = flags & VM_EXEC ? 'x' : '-';
559         str[3] = flags & VM_MAYSHARE ? 's' : 'p';
560         str[4] = 0;
561
562         dev = 0;
563         ino = 0;
564         if (map->vm_file != NULL) {
565                 dev = map->vm_file->f_dentry->d_inode->i_dev;
566                 ino = map->vm_file->f_dentry->d_inode->i_ino;
567                 line = d_path(map->vm_file->f_dentry,
568                               map->vm_file->f_vfsmnt,
569                               buf, PAGE_SIZE);
570                 buf[PAGE_SIZE-1] = '\n';
571                 line -= MAPS_LINE_MAX;
572                 if(line < buf)
573                         line = buf;
574         } else
575                 line = buf;
576
577         len = sprintf(line,
578                       MAPS_LINE_FORMAT,
579                       map->vm_start, map->vm_end, str, map->vm_pgoff << PAGE_SHIFT,
580                       kdevname(dev), ino);
581
582         if(map->vm_file) {
583                 int i;
584                 for(i = len; i < MAPS_LINE_MAX; i++)
585                         line[i] = ' ';
586                 len = buf + PAGE_SIZE - line;
587                 memmove(buf, line, len);
588         } else
589                 line[len++] = '\n';
590         return len;
591 }
592
593 ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * buf,
594                           size_t count, loff_t *ppos)
595 {
596         struct mm_struct *mm;
597         struct vm_area_struct * map;
598         char *tmp, *kbuf;
599         long retval;
600         int off, lineno, loff;
601
602         /* reject calls with out of range parameters immediately */
603         retval = 0;
604         if (*ppos > LONG_MAX)
605                 goto out;
606         if (count == 0)
607                 goto out;
608         off = (long)*ppos;
609         /*
610          * We might sleep getting the page, so get it first.
611          */
612         retval = -ENOMEM;
613         kbuf = (char*)__get_free_page(GFP_KERNEL);
614         if (!kbuf)
615                 goto out;
616
617         tmp = (char*)__get_free_page(GFP_KERNEL);
618         if (!tmp)
619                 goto out_free1;
620
621         task_lock(task);
622         mm = task->mm;
623         if (mm)
624                 atomic_inc(&mm->mm_users);
625         task_unlock(task);
626         retval = 0;
627         if (!mm)
628                 goto out_free2;
629
630         down_read(&mm->mmap_sem);
631         map = mm->mmap;
632         lineno = 0;
633         loff = 0;
634         if (count > PAGE_SIZE)
635                 count = PAGE_SIZE;
636         while (map) {
637                 int len;
638                 if (off > PAGE_SIZE) {
639                         off -= PAGE_SIZE;
640                         goto next;
641                 }
642                 len = proc_pid_maps_get_line(tmp, map);
643                 len -= off;
644                 if (len > 0) {
645                         if (retval+len > count) {
646                                 /* only partial line transfer possible */
647                                 len = count - retval;
648                                 /* save the offset where the next read
649                                  * must start */
650                                 loff = len+off;
651                         }
652                         memcpy(kbuf+retval, tmp+off, len);
653                         retval += len;
654                 }
655                 off = 0;
656 next:
657                 if (!loff)
658                         lineno++;
659                 if (retval >= count)
660                         break;
661                 if (loff) BUG();
662                 map = map->vm_next;
663         }
664         up_read(&mm->mmap_sem);
665         mmput(mm);
666
667         if (retval > count) BUG();
668         if (copy_to_user(buf, kbuf, retval))
669                 retval = -EFAULT;
670         else
671                 *ppos = (lineno << PAGE_SHIFT) + loff;
672
673 out_free2:
674         free_page((unsigned long)tmp);
675 out_free1:
676         free_page((unsigned long)kbuf);
677 out:
678         return retval;
679 }
680
681 #ifdef CONFIG_SMP
682 int proc_pid_cpu(struct task_struct *task, char * buffer)
683 {
684         int i, len;
685
686         len = sprintf(buffer,
687                 "cpu  %lu %lu\n",
688                 task->times.tms_utime,
689                 task->times.tms_stime);
690                 
691         for (i = 0 ; i < smp_num_cpus; i++)
692                 len += sprintf(buffer + len, "cpu%d %lu %lu\n",
693                         i,
694                         task->per_cpu_utime[cpu_logical_map(i)],
695                         task->per_cpu_stime[cpu_logical_map(i)]);
696
697         return len;
698 }
699 #endif