import of upstream 2.4.34.4 from kernel.org
[linux-2.4.git] / fs / proc / kcore.c
1 /*
2  *      fs/proc/kcore.c kernel ELF/AOUT core dumper
3  *
4  *      Modelled on fs/exec.c:aout_core_dump()
5  *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
6  *      ELF version written by David Howells <David.Howells@nexor.co.uk>
7  *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
8  *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
9  *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
10  */
11
12 #include <linux/config.h>
13 #include <linux/mm.h>
14 #include <linux/proc_fs.h>
15 #include <linux/user.h>
16 #include <linux/a.out.h>
17 #include <linux/elf.h>
18 #include <linux/elfcore.h>
19 #include <linux/vmalloc.h>
20 #include <linux/highmem.h>
21 #include <asm/uaccess.h>
22 #include <asm/io.h>
23
24
25 static int open_kcore(struct inode * inode, struct file * filp)
26 {
27         return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
28 }
29
30 static loff_t lseek_kcore(struct file * file, loff_t offset, int origin);
31
32 static ssize_t read_kcore(struct file *, char *, size_t, loff_t *);
33
34 struct file_operations proc_kcore_operations = {
35         read:           read_kcore,
36         open:           open_kcore,
37         llseek:         lseek_kcore,
38 };
39
40 #ifdef CONFIG_KCORE_AOUT
41 static ssize_t read_kcore(struct file *file, char *buf, size_t count, loff_t *ppos)
42 {
43         unsigned long long p = *ppos, memsize;
44         ssize_t read;
45         ssize_t count1;
46         char * pnt;
47         struct user dump;
48 #if defined (__i386__) || defined (__mc68000__) || defined(__x86_64__)
49 #       define FIRST_MAPPED     PAGE_SIZE       /* we don't have page 0 mapped on x86.. */
50 #else
51 #       define FIRST_MAPPED     0
52 #endif
53
54         memset(&dump, 0, sizeof(struct user));
55         dump.magic = CMAGIC;
56         dump.u_dsize = (virt_to_phys(high_memory) >> PAGE_SHIFT);
57 #if defined (__i386__) || defined(__x86_64__)
58         dump.start_code = PAGE_OFFSET;
59 #endif
60 #ifdef __alpha__
61         dump.start_data = PAGE_OFFSET;
62 #endif
63
64         memsize = virt_to_phys(high_memory);
65         if (p >= memsize)
66                 return 0;
67         if (count > memsize - p)
68                 count = memsize - p;
69         read = 0;
70
71         if (p < sizeof(struct user) && count > 0) {
72                 count1 = count;
73                 if (p + count1 > sizeof(struct user))
74                         count1 = sizeof(struct user)-p;
75                 pnt = (char *) &dump + p;
76                 if (copy_to_user(buf,(void *) pnt, count1))
77                         return -EFAULT;
78                 buf += count1;
79                 p += count1;
80                 count -= count1;
81                 read += count1;
82         }
83
84         if (count > 0 && p < PAGE_SIZE + FIRST_MAPPED) {
85                 count1 = PAGE_SIZE + FIRST_MAPPED - p;
86                 if (count1 > count)
87                         count1 = count;
88                 if (clear_user(buf, count1))
89                         return -EFAULT;
90                 buf += count1;
91                 p += count1;
92                 count -= count1;
93                 read += count1;
94         }
95         if (count > 0) {
96                 if (copy_to_user(buf, (void *) (PAGE_OFFSET+p-PAGE_SIZE), count))
97                         return -EFAULT;
98                 read += count;
99                 p += count;
100         }
101         *ppos = p;
102         return read;
103 }
104 #else /* CONFIG_KCORE_AOUT */
105
106 #define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
107
108 /* An ELF note in memory */
109 struct memelfnote
110 {
111         const char *name;
112         int type;
113         unsigned int datasz;
114         void *data;
115 };
116
117 extern char saved_command_line[];
118
119 static unsigned long get_kcore_size(int *num_vma, size_t *elf_buflen)
120 {
121         unsigned long try, size;
122         struct vm_struct *m;
123
124         *num_vma = 0;
125         size = ((size_t)high_memory - PAGE_OFFSET + PAGE_SIZE);
126         if (!vmlist) {
127                 *elf_buflen = PAGE_SIZE;
128                 return (size);
129         }
130
131         for (m=vmlist; m; m=m->next) {
132                 try = (unsigned long)m->addr + m->size;
133                 if (try > size)
134                         size = try;
135                 *num_vma = *num_vma + 1;
136         }
137         *elf_buflen =   sizeof(struct elfhdr) + 
138                         (*num_vma + 2)*sizeof(struct elf_phdr) + 
139                         3 * (sizeof(struct elf_note) + 4) +
140                         sizeof(struct elf_prstatus) +
141                         sizeof(struct elf_prpsinfo) +
142                         sizeof(struct task_struct);
143         *elf_buflen = PAGE_ALIGN(*elf_buflen);
144         return (size - PAGE_OFFSET + *elf_buflen);
145 }
146
147
148 /*****************************************************************************/
149 /*
150  * determine size of ELF note
151  */
152 static int notesize(struct memelfnote *en)
153 {
154         int sz;
155
156         sz = sizeof(struct elf_note);
157         sz += roundup(strlen(en->name), 4);
158         sz += roundup(en->datasz, 4);
159
160         return sz;
161 } /* end notesize() */
162
163 /*****************************************************************************/
164 /*
165  * store a note in the header buffer
166  */
167 static char *storenote(struct memelfnote *men, char *bufp)
168 {
169         struct elf_note en;
170
171 #define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
172
173         en.n_namesz = strlen(men->name);
174         en.n_descsz = men->datasz;
175         en.n_type = men->type;
176
177         DUMP_WRITE(&en, sizeof(en));
178         DUMP_WRITE(men->name, en.n_namesz);
179
180         /* XXX - cast from long long to long to avoid need for libgcc.a */
181         bufp = (char*) roundup((unsigned long)bufp,4);
182         DUMP_WRITE(men->data, men->datasz);
183         bufp = (char*) roundup((unsigned long)bufp,4);
184
185 #undef DUMP_WRITE
186
187         return bufp;
188 } /* end storenote() */
189
190 /*
191  * store an ELF coredump header in the supplied buffer
192  * num_vma is the number of elements in vmlist
193  */
194 static void elf_kcore_store_hdr(char *bufp, int num_vma, int dataoff)
195 {
196         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
197         struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
198         struct elf_phdr *nhdr, *phdr;
199         struct elfhdr *elf;
200         struct memelfnote notes[3];
201         off_t offset = 0;
202         struct vm_struct *m;
203
204         /* setup ELF header */
205         elf = (struct elfhdr *) bufp;
206         bufp += sizeof(struct elfhdr);
207         offset += sizeof(struct elfhdr);
208         memcpy(elf->e_ident, ELFMAG, SELFMAG);
209         elf->e_ident[EI_CLASS]  = ELF_CLASS;
210         elf->e_ident[EI_DATA]   = ELF_DATA;
211         elf->e_ident[EI_VERSION]= EV_CURRENT;
212         memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
213         elf->e_type     = ET_CORE;
214         elf->e_machine  = ELF_ARCH;
215         elf->e_version  = EV_CURRENT;
216         elf->e_entry    = 0;
217         elf->e_phoff    = sizeof(struct elfhdr);
218         elf->e_shoff    = 0;
219         elf->e_flags    = 0;
220         elf->e_ehsize   = sizeof(struct elfhdr);
221         elf->e_phentsize= sizeof(struct elf_phdr);
222         elf->e_phnum    = 2 + num_vma;
223         elf->e_shentsize= 0;
224         elf->e_shnum    = 0;
225         elf->e_shstrndx = 0;
226
227         /* setup ELF PT_NOTE program header */
228         nhdr = (struct elf_phdr *) bufp;
229         bufp += sizeof(struct elf_phdr);
230         offset += sizeof(struct elf_phdr);
231         nhdr->p_type    = PT_NOTE;
232         nhdr->p_offset  = 0;
233         nhdr->p_vaddr   = 0;
234         nhdr->p_paddr   = 0;
235         nhdr->p_filesz  = 0;
236         nhdr->p_memsz   = 0;
237         nhdr->p_flags   = 0;
238         nhdr->p_align   = 0;
239
240         /* setup ELF PT_LOAD program header for the 
241          * virtual range 0xc0000000 -> high_memory */
242         phdr = (struct elf_phdr *) bufp;
243         bufp += sizeof(struct elf_phdr);
244         offset += sizeof(struct elf_phdr);
245         phdr->p_type    = PT_LOAD;
246         phdr->p_flags   = PF_R|PF_W|PF_X;
247         phdr->p_offset  = dataoff;
248         phdr->p_vaddr   = PAGE_OFFSET;
249         phdr->p_paddr   = __pa(PAGE_OFFSET);
250         phdr->p_filesz  = phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET);
251         phdr->p_align   = PAGE_SIZE;
252
253         /* setup ELF PT_LOAD program header for every vmalloc'd area */
254         for (m=vmlist; m; m=m->next) {
255                 if (m->flags & VM_IOREMAP) /* don't dump ioremap'd stuff! (TA) */
256                         continue;
257
258                 phdr = (struct elf_phdr *) bufp;
259                 bufp += sizeof(struct elf_phdr);
260                 offset += sizeof(struct elf_phdr);
261
262                 phdr->p_type    = PT_LOAD;
263                 phdr->p_flags   = PF_R|PF_W|PF_X;
264                 phdr->p_offset  = (size_t)m->addr - PAGE_OFFSET + dataoff;
265                 phdr->p_vaddr   = (size_t)m->addr;
266                 phdr->p_paddr   = __pa(m->addr);
267                 phdr->p_filesz  = phdr->p_memsz = m->size;
268                 phdr->p_align   = PAGE_SIZE;
269         }
270
271         /*
272          * Set up the notes in similar form to SVR4 core dumps made
273          * with info from their /proc.
274          */
275         nhdr->p_offset  = offset;
276
277         /* set up the process status */
278         notes[0].name = "CORE";
279         notes[0].type = NT_PRSTATUS;
280         notes[0].datasz = sizeof(struct elf_prstatus);
281         notes[0].data = &prstatus;
282
283         memset(&prstatus, 0, sizeof(struct elf_prstatus));
284
285         nhdr->p_filesz += notesize(&notes[0]);
286         bufp = storenote(&notes[0], bufp);
287
288         /* set up the process info */
289         notes[1].name   = "CORE";
290         notes[1].type   = NT_PRPSINFO;
291         notes[1].datasz = sizeof(struct elf_prpsinfo);
292         notes[1].data   = &prpsinfo;
293
294         memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
295         prpsinfo.pr_state       = 0;
296         prpsinfo.pr_sname       = 'R';
297         prpsinfo.pr_zomb        = 0;
298
299         strcpy(prpsinfo.pr_fname, "vmlinux");
300         strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
301
302         nhdr->p_filesz += notesize(&notes[1]);
303         bufp = storenote(&notes[1], bufp);
304
305         /* set up the task structure */
306         notes[2].name   = "CORE";
307         notes[2].type   = NT_TASKSTRUCT;
308         notes[2].datasz = sizeof(struct task_struct);
309         notes[2].data   = current;
310
311         nhdr->p_filesz += notesize(&notes[2]);
312         bufp = storenote(&notes[2], bufp);
313
314 } /* end elf_kcore_store_hdr() */
315
316 /*****************************************************************************/
317 /*
318  * read from the ELF header and then kernel memory
319  */
320 static ssize_t read_kcore(struct file *file, char *buffer, size_t buflen, loff_t *fpos)
321 {
322         ssize_t acc = 0;
323         unsigned long size, tsz;
324         size_t elf_buflen;
325         int num_vma;
326         unsigned long start;
327
328         read_lock(&vmlist_lock);
329         proc_root_kcore->size = size = get_kcore_size(&num_vma, &elf_buflen);
330         if (buflen == 0 || (unsigned long long)*fpos >= size) {
331                 read_unlock(&vmlist_lock);
332                 return 0;
333         }
334
335         /* trim buflen to not go beyond EOF */
336         if (buflen > size - *fpos)
337                 buflen = size - *fpos;
338
339         /* construct an ELF core header if we'll need some of it */
340         if (*fpos < elf_buflen) {
341                 char * elf_buf;
342
343                 tsz = elf_buflen - *fpos;
344                 if (buflen < tsz)
345                         tsz = buflen;
346                 elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
347                 if (!elf_buf) {
348                         read_unlock(&vmlist_lock);
349                         return -ENOMEM;
350                 }
351                 memset(elf_buf, 0, elf_buflen);
352                 elf_kcore_store_hdr(elf_buf, num_vma, elf_buflen);
353                 read_unlock(&vmlist_lock);
354                 if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
355                         kfree(elf_buf);
356                         return -EFAULT;
357                 }
358                 kfree(elf_buf);
359                 buflen -= tsz;
360                 *fpos += tsz;
361                 buffer += tsz;
362                 acc += tsz;
363
364                 /* leave now if filled buffer already */
365                 if (buflen == 0)
366                         return acc;
367         } else
368                 read_unlock(&vmlist_lock);
369
370         /* where page 0 not mapped, write zeros into buffer */
371 #if defined (__i386__) || defined (__mc68000__) || defined(__x86_64__)
372         if (*fpos < PAGE_SIZE + elf_buflen) {
373                 /* work out how much to clear */
374                 tsz = PAGE_SIZE + elf_buflen - *fpos;
375                 if (buflen < tsz)
376                         tsz = buflen;
377
378                 /* write zeros to buffer */
379                 if (clear_user(buffer, tsz))
380                         return -EFAULT;
381                 buflen -= tsz;
382                 *fpos += tsz;
383                 buffer += tsz;
384                 acc += tsz;
385
386                 /* leave now if filled buffer already */
387                 if (buflen == 0)
388                         return tsz;
389         }
390 #endif
391         
392         /*
393          * Fill the remainder of the buffer from kernel VM space.
394          * We said in the ELF header that the data which starts
395          * at 'elf_buflen' is virtual address PAGE_OFFSET. --rmk
396          */
397         start = PAGE_OFFSET + (*fpos - elf_buflen);
398         if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
399                 tsz = buflen;
400         while (buflen) {
401                 int err; 
402         
403                 if ((start > PAGE_OFFSET) && (start < (unsigned long)high_memory)) {
404                         if (kern_addr_valid(start)) {
405                                 err = copy_to_user(buffer, (char *)start, tsz);
406                         } else {
407                                 err = clear_user(buffer, tsz);
408                         }
409                 } else {
410                         char * elf_buf;
411                         struct vm_struct *m;
412                         unsigned long curstart = start;
413                         unsigned long cursize = tsz;
414
415                         elf_buf = kmalloc(tsz, GFP_KERNEL);
416                         if (!elf_buf)
417                                 return -ENOMEM;
418                         memset(elf_buf, 0, tsz);
419
420                         read_lock(&vmlist_lock);
421                         for (m=vmlist; m && cursize; m=m->next) {
422                                 unsigned long vmstart;
423                                 unsigned long vmsize;
424                                 unsigned long msize = m->size - PAGE_SIZE;
425
426                                 if (((unsigned long)m->addr + msize) < 
427                                                                 curstart)
428                                         continue;
429                                 if ((unsigned long)m->addr > (curstart + 
430                                                                 cursize))
431                                         break;
432                                 vmstart = (curstart < (unsigned long)m->addr ? 
433                                         (unsigned long)m->addr : curstart);
434                                 if (((unsigned long)m->addr + msize) > 
435                                                         (curstart + cursize))
436                                         vmsize = curstart + cursize - vmstart;
437                                 else
438                                         vmsize = (unsigned long)m->addr + 
439                                                         msize - vmstart;
440                                 curstart = vmstart + vmsize;
441                                 cursize -= vmsize;
442                                 /* don't dump ioremap'd stuff! (TA) */
443                                 if (m->flags & VM_IOREMAP)
444                                         continue;
445                                 memcpy(elf_buf + (vmstart - start),
446                                         (char *)vmstart, vmsize);
447                         }
448                         read_unlock(&vmlist_lock);
449                         err = copy_to_user(buffer, elf_buf, tsz); 
450                                 kfree(elf_buf);
451                         }
452                 if (err)
453                                         return -EFAULT;
454                 buflen -= tsz;
455                 *fpos += tsz;
456                 buffer += tsz;
457                 acc += tsz;
458                 start += tsz;
459                 tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
460         }
461
462         return acc;
463 }
464 #endif /* CONFIG_KCORE_AOUT */
465
466 static loff_t lseek_kcore(struct file * file, loff_t offset, int origin)
467 {
468         switch (origin) {
469                 case 2:
470                         offset += file->f_dentry->d_inode->i_size;
471                         break;
472                 case 1:
473                         offset += file->f_pos;
474         }
475         /* RED-PEN user can fake an error here by setting offset to >=-4095 && <0  */
476         file->f_pos = offset;
477         return offset;
478 }