[JFFS2] Don't trust node headers before the CRC is checked.
[powerpc.git] / fs / jffs2 / readinode.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30         struct rb_node **p = &list->rb_node;
31         struct rb_node * parent = NULL;
32         struct jffs2_tmp_dnode_info *this;
33
34         while (*p) {
35                 parent = *p;
36                 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37
38                 /* There may actually be a collision here, but it doesn't
39                    actually matter. As long as the two nodes with the same
40                    version are together, it's all fine. */
41                 if (tn->version > this->version)
42                         p = &(*p)->rb_left;
43                 else
44                         p = &(*p)->rb_right;
45         }
46
47         rb_link_node(&tn->rb, parent, p);
48         rb_insert_color(&tn->rb, list);
49 }
50
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53         struct rb_node *this;
54         struct jffs2_tmp_dnode_info *tn;
55
56         this = list->rb_node;
57
58         /* Now at bottom of tree */
59         while (this) {
60                 if (this->rb_left)
61                         this = this->rb_left;
62                 else if (this->rb_right)
63                         this = this->rb_right;
64                 else {
65                         tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66                         jffs2_free_full_dnode(tn->fn);
67                         jffs2_free_tmp_dnode_info(tn);
68
69                         this = this->rb_parent;
70                         if (!this)
71                                 break;
72
73                         if (this->rb_left == &tn->rb)
74                                 this->rb_left = NULL;
75                         else if (this->rb_right == &tn->rb)
76                                 this->rb_right = NULL;
77                         else BUG();
78                 }
79         }
80         list->rb_node = NULL;
81 }
82
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85         struct jffs2_full_dirent *next;
86
87         while (fd) {
88                 next = fd->next;
89                 jffs2_free_full_dirent(fd);
90                 fd = next;
91         }
92 }
93
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97         while (ref && ref->next_in_ino) {
98                 if (!ref_obsolete(ref))
99                         return ref;
100                 dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101                 ref = ref->next_in_ino;
102         }
103         return NULL;
104 }
105
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  *          1 if the node should be marked obsolete;
112  *          negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115                                 struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
116                                 uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118         struct jffs2_full_dirent *fd;
119
120         /* The direntry nodes are checked during the flash scanning */
121         BUG_ON(ref_flags(ref) == REF_UNCHECKED);
122         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
123         BUG_ON(ref_obsolete(ref));
124
125         /* Sanity check */
126         if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
127                 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
128                        ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
129                 return 1;
130         }
131
132         fd = jffs2_alloc_full_dirent(rd->nsize + 1);
133         if (unlikely(!fd))
134                 return -ENOMEM;
135
136         fd->raw = ref;
137         fd->version = je32_to_cpu(rd->version);
138         fd->ino = je32_to_cpu(rd->ino);
139         fd->type = rd->type;
140
141         /* Pick out the mctime of the latest dirent */
142         if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
143                 *mctime_ver = fd->version;
144                 *latest_mctime = je32_to_cpu(rd->mctime);
145         }
146
147         /*
148          * Copy as much of the name as possible from the raw
149          * dirent we've already read from the flash.
150          */
151         if (read > sizeof(*rd))
152                 memcpy(&fd->name[0], &rd->name[0],
153                        min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
154
155         /* Do we need to copy any more of the name directly from the flash? */
156         if (rd->nsize + sizeof(*rd) > read) {
157                 /* FIXME: point() */
158                 int err;
159                 int already = read - sizeof(*rd);
160
161                 err = jffs2_flash_read(c, (ref_offset(ref)) + read,
162                                 rd->nsize - already, &read, &fd->name[already]);
163                 if (unlikely(read != rd->nsize - already) && likely(!err))
164                         return -EIO;
165
166                 if (unlikely(err)) {
167                         JFFS2_ERROR("read remainder of name: error %d\n", err);
168                         jffs2_free_full_dirent(fd);
169                         return -EIO;
170                 }
171         }
172
173         fd->nhash = full_name_hash(fd->name, rd->nsize);
174         fd->next = NULL;
175         fd->name[rd->nsize] = '\0';
176
177         /*
178          * Wheee. We now have a complete jffs2_full_dirent structure, with
179          * the name in it and everything. Link it into the list
180          */
181         jffs2_add_fd_to_list(c, fd, fdp);
182
183         return 0;
184 }
185
186 /*
187  * Helper function for jffs2_get_inode_nodes().
188  * It is called every time an inode node is found.
189  *
190  * Returns: 0 on succes;
191  *          1 if the node should be marked obsolete;
192  *          negative error code on failure.
193  */
194 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
195                              struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
196                              uint32_t *latest_mctime, uint32_t *mctime_ver)
197 {
198         struct jffs2_tmp_dnode_info *tn;
199         uint32_t len, csize;
200         int ret = 1;
201
202         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
203         BUG_ON(ref_obsolete(ref));
204
205         tn = jffs2_alloc_tmp_dnode_info();
206         if (!tn) {
207                 JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn));
208                 return -ENOMEM;
209         }
210
211         tn->partial_crc = 0;
212         csize = je32_to_cpu(rd->csize);
213
214         /* If we've never checked the CRCs on this node, check them now */
215         if (ref_flags(ref) == REF_UNCHECKED) {
216                 uint32_t crc;
217
218                 crc = crc32(0, rd, sizeof(*rd) - 8);
219                 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
220                         JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
221                                         ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
222                         goto free_out;
223                 }
224
225                 /* Sanity checks */
226                 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
227                     unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
228                                 JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
229                                 jffs2_dbg_dump_node(c, ref_offset(ref));
230                         goto free_out;
231                 }
232
233                 if (jffs2_is_writebuffered(c) && csize != 0) {
234                         /* At this point we are supposed to check the data CRC
235                          * of our unchecked node. But thus far, we do not
236                          * know whether the node is valid or obsolete. To
237                          * figure this out, we need to walk all the nodes of
238                          * the inode and build the inode fragtree. We don't
239                          * want to spend time checking data of nodes which may
240                          * later be found to be obsolete. So we put off the full
241                          * data CRC checking until we have read all the inode
242                          * nodes and have started building the fragtree.
243                          *
244                          * The fragtree is being built starting with nodes
245                          * having the highest version number, so we'll be able
246                          * to detect whether a node is valid (i.e., it is not
247                          * overlapped by a node with higher version) or not.
248                          * And we'll be able to check only those nodes, which
249                          * are not obsolete.
250                          *
251                          * Of course, this optimization only makes sense in case
252                          * of NAND flashes (or other flashes whith
253                          * !jffs2_can_mark_obsolete()), since on NOR flashes
254                          * nodes are marked obsolete physically.
255                          *
256                          * Since NAND flashes (or other flashes with
257                          * jffs2_is_writebuffered(c)) are anyway read by
258                          * fractions of c->wbuf_pagesize, and we have just read
259                          * the node header, it is likely that the starting part
260                          * of the node data is also read when we read the
261                          * header. So we don't mind to check the CRC of the
262                          * starting part of the data of the node now, and check
263                          * the second part later (in jffs2_check_node_data()).
264                          * Of course, we will not need to re-read and re-check
265                          * the NAND page which we have just read. This is why we
266                          * read the whole NAND page at jffs2_get_inode_nodes(),
267                          * while we needed only the node header.
268                          */
269                         unsigned char *buf;
270
271                         /* 'buf' will point to the start of data */
272                         buf = (unsigned char *)rd + sizeof(*rd);
273                         /* len will be the read data length */
274                         len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
275                         tn->partial_crc = crc32(0, buf, len);
276
277                         dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
278
279                         /* If we actually calculated the whole data CRC
280                          * and it is wrong, drop the node. */
281                         if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
282                                 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
283                                         ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
284                                 goto free_out;
285                         }
286
287                 } else if (csize == 0) {
288                         /*
289                          * We checked the header CRC. If the node has no data, adjust
290                          * the space accounting now. For other nodes this will be done
291                          * later either when the node is marked obsolete or when its
292                          * data is checked.
293                          */
294                         struct jffs2_eraseblock *jeb;
295
296                         dbg_readinode("the node has no data.\n");
297                         jeb = &c->blocks[ref->flash_offset / c->sector_size];
298                         len = ref_totlen(c, jeb, ref);
299
300                         spin_lock(&c->erase_completion_lock);
301                         jeb->used_size += len;
302                         jeb->unchecked_size -= len;
303                         c->used_size += len;
304                         c->unchecked_size -= len;
305                         ref->flash_offset = ref_offset(ref) | REF_NORMAL;
306                         spin_unlock(&c->erase_completion_lock);
307                 }
308         }
309
310         tn->fn = jffs2_alloc_full_dnode();
311         if (!tn->fn) {
312                 JFFS2_ERROR("alloc fn failed\n");
313                 ret = -ENOMEM;
314                 goto free_out;
315         }
316
317         tn->version = je32_to_cpu(rd->version);
318         tn->fn->ofs = je32_to_cpu(rd->offset);
319         tn->data_crc = je32_to_cpu(rd->data_crc);
320         tn->csize = csize;
321         tn->fn->raw = ref;
322
323         /* There was a bug where we wrote hole nodes out with
324            csize/dsize swapped. Deal with it */
325         if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
326                 tn->fn->size = csize;
327         else // normal case...
328                 tn->fn->size = je32_to_cpu(rd->dsize);
329
330         dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
331                   ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
332
333         jffs2_add_tn_to_tree(tn, tnp);
334
335         return 0;
336
337 free_out:
338         jffs2_free_tmp_dnode_info(tn);
339         return ret;
340 }
341
342 /*
343  * Helper function for jffs2_get_inode_nodes().
344  * It is called every time an unknown node is found.
345  *
346  * Returns: 0 on success;
347  *          1 if the node should be marked obsolete;
348  *          negative error code on failure.
349  */
350 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
351 {
352         /* We don't mark unknown nodes as REF_UNCHECKED */
353         BUG_ON(ref_flags(ref) == REF_UNCHECKED);
354
355         un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
356
357         switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
358
359         case JFFS2_FEATURE_INCOMPAT:
360                 JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
361                             je16_to_cpu(un->nodetype), ref_offset(ref));
362                 /* EEP */
363                 BUG();
364                 break;
365
366         case JFFS2_FEATURE_ROCOMPAT:
367                 JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
368                             je16_to_cpu(un->nodetype), ref_offset(ref));
369                 BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
370                 break;
371
372         case JFFS2_FEATURE_RWCOMPAT_COPY:
373                 JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
374                              je16_to_cpu(un->nodetype), ref_offset(ref));
375                 break;
376
377         case JFFS2_FEATURE_RWCOMPAT_DELETE:
378                 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
379                              je16_to_cpu(un->nodetype), ref_offset(ref));
380                 return 1;
381         }
382
383         return 0;
384 }
385
386 /*
387  * Helper function for jffs2_get_inode_nodes().
388  * The function detects whether more data should be read and reads it if yes.
389  *
390  * Returns: 0 on succes;
391  *          negative error code on failure.
392  */
393 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
394                      int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
395 {
396         int right_len, err, len;
397         size_t retlen;
398         uint32_t offs;
399
400         if (jffs2_is_writebuffered(c)) {
401                 right_len = c->wbuf_pagesize - (bufstart - buf);
402                 if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
403                         right_len += c->wbuf_pagesize;
404         } else
405                 right_len = right_size;
406
407         if (*rdlen == right_len)
408                 return 0;
409
410         /* We need to read more data */
411         offs = ref_offset(ref) + *rdlen;
412         if (jffs2_is_writebuffered(c)) {
413                 bufstart = buf + c->wbuf_pagesize;
414                 len = c->wbuf_pagesize;
415         } else {
416                 bufstart = buf + *rdlen;
417                 len = right_size - *rdlen;
418         }
419
420         dbg_readinode("read more %d bytes\n", len);
421
422         err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
423         if (err) {
424                 JFFS2_ERROR("can not read %d bytes from 0x%08x, "
425                         "error code: %d.\n", len, offs, err);
426                 return err;
427         }
428
429         if (retlen < len) {
430                 JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n",
431                                 offs, retlen, len);
432                 return -EIO;
433         }
434
435         *rdlen = right_len;
436
437         return 0;
438 }
439
440 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
441    with this ino, returning the former in order of version */
442 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
443                                  struct rb_root *tnp, struct jffs2_full_dirent **fdp,
444                                  uint32_t *highest_version, uint32_t *latest_mctime,
445                                  uint32_t *mctime_ver)
446 {
447         struct jffs2_raw_node_ref *ref, *valid_ref;
448         struct rb_root ret_tn = RB_ROOT;
449         struct jffs2_full_dirent *ret_fd = NULL;
450         unsigned char *buf = NULL;
451         union jffs2_node_union *node;
452         size_t retlen;
453         int len, err;
454
455         *mctime_ver = 0;
456
457         dbg_readinode("ino #%u\n", f->inocache->ino);
458
459         if (jffs2_is_writebuffered(c)) {
460                 /*
461                  * If we have the write buffer, we assume the minimal I/O unit
462                  * is c->wbuf_pagesize. We implement some optimizations which in
463                  * this case and we need a temporary buffer of size =
464                  * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
465                  * Basically, we want to read not only the node header, but the
466                  * whole wbuf (NAND page in case of NAND) or 2, if the node
467                  * header overlaps the border between the 2 wbufs.
468                  */
469                 len = 2*c->wbuf_pagesize;
470         } else {
471                 /*
472                  * When there is no write buffer, the size of the temporary
473                  * buffer is the size of the larges node header.
474                  */
475                 len = sizeof(union jffs2_node_union);
476         }
477
478         /* FIXME: in case of NOR and available ->point() this
479          * needs to be fixed. */
480         buf = kmalloc(len, GFP_KERNEL);
481         if (!buf)
482                 return -ENOMEM;
483
484         spin_lock(&c->erase_completion_lock);
485         valid_ref = jffs2_first_valid_node(f->inocache->nodes);
486         if (!valid_ref && f->inocache->ino != 1)
487                 JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
488         while (valid_ref) {
489                 unsigned char *bufstart;
490
491                 /* We can hold a pointer to a non-obsolete node without the spinlock,
492                    but _obsolete_ nodes may disappear at any time, if the block
493                    they're in gets erased. So if we mark 'ref' obsolete while we're
494                    not holding the lock, it can go away immediately. For that reason,
495                    we find the next valid node first, before processing 'ref'.
496                 */
497                 ref = valid_ref;
498                 valid_ref = jffs2_first_valid_node(ref->next_in_ino);
499                 spin_unlock(&c->erase_completion_lock);
500
501                 cond_resched();
502
503                 /*
504                  * At this point we don't know the type of the node we're going
505                  * to read, so we do not know the size of its header. In order
506                  * to minimize the amount of flash IO we assume the node has
507                  * size = JFFS2_MIN_NODE_HEADER.
508                  */
509                 if (jffs2_is_writebuffered(c)) {
510                         /*
511                          * We treat 'buf' as 2 adjacent wbufs. We want to
512                          * adjust bufstart such as it points to the
513                          * beginning of the node within this wbuf.
514                          */
515                         bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
516                         /* We will read either one wbuf or 2 wbufs. */
517                         len = c->wbuf_pagesize - (bufstart - buf);
518                         if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
519                                 /* The header spans the border of the first wbuf */
520                                 len += c->wbuf_pagesize;
521                         }
522                 } else {
523                         bufstart = buf;
524                         len = JFFS2_MIN_NODE_HEADER;
525                 }
526
527                 dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
528
529                 /* FIXME: point() */
530                 err = jffs2_flash_read(c, ref_offset(ref), len,
531                                        &retlen, bufstart);
532                 if (err) {
533                         JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
534                         goto free_out;
535                 }
536
537                 if (retlen < len) {
538                         JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len);
539                         err = -EIO;
540                         goto free_out;
541                 }
542
543                 node = (union jffs2_node_union *)bufstart;
544
545                 /* No need to mask in the valid bit; it shouldn't be invalid */
546                 if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) {
547                         JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n",
548                                      ref_offset(ref), je16_to_cpu(node->u.magic),
549                                      je16_to_cpu(node->u.nodetype),
550                                      je32_to_cpu(node->u.totlen),
551                                      je32_to_cpu(node->u.hdr_crc));
552                         jffs2_dbg_dump_node(c, ref_offset(ref));
553                         jffs2_mark_node_obsolete(c, ref);
554                         goto cont;
555                 }
556
557                 switch (je16_to_cpu(node->u.nodetype)) {
558
559                 case JFFS2_NODETYPE_DIRENT:
560
561                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
562                                 err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
563                                 if (unlikely(err))
564                                         goto free_out;
565                         }
566
567                         err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
568                         if (err == 1) {
569                                 jffs2_mark_node_obsolete(c, ref);
570                                 break;
571                         } else if (unlikely(err))
572                                 goto free_out;
573
574                         if (je32_to_cpu(node->d.version) > *highest_version)
575                                 *highest_version = je32_to_cpu(node->d.version);
576
577                         break;
578
579                 case JFFS2_NODETYPE_INODE:
580
581                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
582                                 err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
583                                 if (unlikely(err))
584                                         goto free_out;
585                         }
586
587                         err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
588                         if (err == 1) {
589                                 jffs2_mark_node_obsolete(c, ref);
590                                 break;
591                         } else if (unlikely(err))
592                                 goto free_out;
593
594                         if (je32_to_cpu(node->i.version) > *highest_version)
595                                 *highest_version = je32_to_cpu(node->i.version);
596
597                         break;
598
599                 default:
600                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
601                                 err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
602                                 if (unlikely(err))
603                                         goto free_out;
604                         }
605
606                         err = read_unknown(c, ref, &node->u);
607                         if (err == 1) {
608                                 jffs2_mark_node_obsolete(c, ref);
609                                 break;
610                         } else if (unlikely(err))
611                                 goto free_out;
612
613                 }
614         cont:
615                 spin_lock(&c->erase_completion_lock);
616         }
617
618         spin_unlock(&c->erase_completion_lock);
619         *tnp = ret_tn;
620         *fdp = ret_fd;
621         kfree(buf);
622
623         dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
624                         f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
625         return 0;
626
627  free_out:
628         jffs2_free_tmp_dnode_info_list(&ret_tn);
629         jffs2_free_full_dirent_list(ret_fd);
630         kfree(buf);
631         return err;
632 }
633
634 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
635                                         struct jffs2_inode_info *f,
636                                         struct jffs2_raw_inode *latest_node)
637 {
638         struct jffs2_tmp_dnode_info *tn;
639         struct rb_root tn_list;
640         struct rb_node *rb, *repl_rb;
641         struct jffs2_full_dirent *fd_list;
642         struct jffs2_full_dnode *fn, *first_fn = NULL;
643         uint32_t crc;
644         uint32_t latest_mctime, mctime_ver;
645         size_t retlen;
646         int ret;
647
648         dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
649
650         /* Grab all nodes relevant to this ino */
651         ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
652
653         if (ret) {
654                 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
655                 if (f->inocache->state == INO_STATE_READING)
656                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
657                 return ret;
658         }
659         f->dents = fd_list;
660
661         rb = rb_first(&tn_list);
662
663         while (rb) {
664                 cond_resched();
665                 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
666                 fn = tn->fn;
667                 ret = 1;
668                 dbg_readinode("consider node ver %u, phys offset "
669                         "%#08x(%d), range %u-%u.\n", tn->version,
670                         ref_offset(fn->raw), ref_flags(fn->raw),
671                         fn->ofs, fn->ofs + fn->size);
672
673                 if (fn->size) {
674                         ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
675                         /* TODO: the error code isn't checked, check it */
676                         jffs2_dbg_fragtree_paranoia_check_nolock(f);
677                         BUG_ON(ret < 0);
678                         if (!first_fn && ret == 0)
679                                 first_fn = fn;
680                 } else if (!first_fn) {
681                         first_fn = fn;
682                         f->metadata = fn;
683                         ret = 0; /* Prevent freeing the metadata update node */
684                 } else
685                         jffs2_mark_node_obsolete(c, fn->raw);
686
687                 BUG_ON(rb->rb_left);
688                 if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
689                         /* We were then left-hand child of our parent. We need
690                          * to move our own right-hand child into our place. */
691                         repl_rb = rb->rb_right;
692                         if (repl_rb)
693                                 repl_rb->rb_parent = rb->rb_parent;
694                 } else
695                         repl_rb = NULL;
696
697                 rb = rb_next(rb);
698
699                 /* Remove the spent tn from the tree; don't bother rebalancing
700                  * but put our right-hand child in our own place. */
701                 if (tn->rb.rb_parent) {
702                         if (tn->rb.rb_parent->rb_left == &tn->rb)
703                                 tn->rb.rb_parent->rb_left = repl_rb;
704                         else if (tn->rb.rb_parent->rb_right == &tn->rb)
705                                 tn->rb.rb_parent->rb_right = repl_rb;
706                         else BUG();
707                 } else if (tn->rb.rb_right)
708                         tn->rb.rb_right->rb_parent = NULL;
709
710                 jffs2_free_tmp_dnode_info(tn);
711                 if (ret) {
712                         dbg_readinode("delete dnode %u-%u.\n",
713                                 fn->ofs, fn->ofs + fn->size);
714                         jffs2_free_full_dnode(fn);
715                 }
716         }
717         jffs2_dbg_fragtree_paranoia_check_nolock(f);
718
719         BUG_ON(first_fn && ref_obsolete(first_fn->raw));
720
721         fn = first_fn;
722         if (unlikely(!first_fn)) {
723                 /* No data nodes for this inode. */
724                 if (f->inocache->ino != 1) {
725                         JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
726                         if (!fd_list) {
727                                 if (f->inocache->state == INO_STATE_READING)
728                                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
729                                 return -EIO;
730                         }
731                         JFFS2_NOTICE("but it has children so we fake some modes for it\n");
732                 }
733                 latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
734                 latest_node->version = cpu_to_je32(0);
735                 latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
736                 latest_node->isize = cpu_to_je32(0);
737                 latest_node->gid = cpu_to_je16(0);
738                 latest_node->uid = cpu_to_je16(0);
739                 if (f->inocache->state == INO_STATE_READING)
740                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
741                 return 0;
742         }
743
744         ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
745         if (ret || retlen != sizeof(*latest_node)) {
746                 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
747                         ret, retlen, sizeof(*latest_node));
748                 /* FIXME: If this fails, there seems to be a memory leak. Find it. */
749                 up(&f->sem);
750                 jffs2_do_clear_inode(c, f);
751                 return ret?ret:-EIO;
752         }
753
754         crc = crc32(0, latest_node, sizeof(*latest_node)-8);
755         if (crc != je32_to_cpu(latest_node->node_crc)) {
756                 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
757                         f->inocache->ino, ref_offset(fn->raw));
758                 up(&f->sem);
759                 jffs2_do_clear_inode(c, f);
760                 return -EIO;
761         }
762
763         switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
764         case S_IFDIR:
765                 if (mctime_ver > je32_to_cpu(latest_node->version)) {
766                         /* The times in the latest_node are actually older than
767                            mctime in the latest dirent. Cheat. */
768                         latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
769                 }
770                 break;
771
772
773         case S_IFREG:
774                 /* If it was a regular file, truncate it to the latest node's isize */
775                 jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
776                 break;
777
778         case S_IFLNK:
779                 /* Hack to work around broken isize in old symlink code.
780                    Remove this when dwmw2 comes to his senses and stops
781                    symlinks from being an entirely gratuitous special
782                    case. */
783                 if (!je32_to_cpu(latest_node->isize))
784                         latest_node->isize = latest_node->dsize;
785
786                 if (f->inocache->state != INO_STATE_CHECKING) {
787                         /* Symlink's inode data is the target path. Read it and
788                          * keep in RAM to facilitate quick follow symlink
789                          * operation. */
790                         f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
791                         if (!f->target) {
792                                 JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
793                                 up(&f->sem);
794                                 jffs2_do_clear_inode(c, f);
795                                 return -ENOMEM;
796                         }
797
798                         ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
799                                                 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
800
801                         if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
802                                 if (retlen != je32_to_cpu(latest_node->csize))
803                                         ret = -EIO;
804                                 kfree(f->target);
805                                 f->target = NULL;
806                                 up(&f->sem);
807                                 jffs2_do_clear_inode(c, f);
808                                 return -ret;
809                         }
810
811                         f->target[je32_to_cpu(latest_node->csize)] = '\0';
812                         dbg_readinode("symlink's target '%s' cached\n", f->target);
813                 }
814
815                 /* fall through... */
816
817         case S_IFBLK:
818         case S_IFCHR:
819                 /* Certain inode types should have only one data node, and it's
820                    kept as the metadata node */
821                 if (f->metadata) {
822                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
823                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
824                         up(&f->sem);
825                         jffs2_do_clear_inode(c, f);
826                         return -EIO;
827                 }
828                 if (!frag_first(&f->fragtree)) {
829                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
830                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
831                         up(&f->sem);
832                         jffs2_do_clear_inode(c, f);
833                         return -EIO;
834                 }
835                 /* ASSERT: f->fraglist != NULL */
836                 if (frag_next(frag_first(&f->fragtree))) {
837                         JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
838                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
839                         /* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
840                         up(&f->sem);
841                         jffs2_do_clear_inode(c, f);
842                         return -EIO;
843                 }
844                 /* OK. We're happy */
845                 f->metadata = frag_first(&f->fragtree)->node;
846                 jffs2_free_node_frag(frag_first(&f->fragtree));
847                 f->fragtree = RB_ROOT;
848                 break;
849         }
850         if (f->inocache->state == INO_STATE_READING)
851                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
852
853         return 0;
854 }
855
856 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
857 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
858                         uint32_t ino, struct jffs2_raw_inode *latest_node)
859 {
860         dbg_readinode("read inode #%u\n", ino);
861
862  retry_inocache:
863         spin_lock(&c->inocache_lock);
864         f->inocache = jffs2_get_ino_cache(c, ino);
865
866         if (f->inocache) {
867                 /* Check its state. We may need to wait before we can use it */
868                 switch(f->inocache->state) {
869                 case INO_STATE_UNCHECKED:
870                 case INO_STATE_CHECKEDABSENT:
871                         f->inocache->state = INO_STATE_READING;
872                         break;
873
874                 case INO_STATE_CHECKING:
875                 case INO_STATE_GC:
876                         /* If it's in either of these states, we need
877                            to wait for whoever's got it to finish and
878                            put it back. */
879                         dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
880                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
881                         goto retry_inocache;
882
883                 case INO_STATE_READING:
884                 case INO_STATE_PRESENT:
885                         /* Eep. This should never happen. It can
886                         happen if Linux calls read_inode() again
887                         before clear_inode() has finished though. */
888                         JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
889                         /* Fail. That's probably better than allowing it to succeed */
890                         f->inocache = NULL;
891                         break;
892
893                 default:
894                         BUG();
895                 }
896         }
897         spin_unlock(&c->inocache_lock);
898
899         if (!f->inocache && ino == 1) {
900                 /* Special case - no root inode on medium */
901                 f->inocache = jffs2_alloc_inode_cache();
902                 if (!f->inocache) {
903                         JFFS2_ERROR("cannot allocate inocache for root inode\n");
904                         return -ENOMEM;
905                 }
906                 dbg_readinode("creating inocache for root inode\n");
907                 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
908                 f->inocache->ino = f->inocache->nlink = 1;
909                 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
910                 f->inocache->state = INO_STATE_READING;
911                 jffs2_add_ino_cache(c, f->inocache);
912         }
913         if (!f->inocache) {
914                 JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
915                 return -ENOENT;
916         }
917
918         return jffs2_do_read_inode_internal(c, f, latest_node);
919 }
920
921 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
922 {
923         struct jffs2_raw_inode n;
924         struct jffs2_inode_info *f = kmalloc(sizeof(*f), GFP_KERNEL);
925         int ret;
926
927         if (!f)
928                 return -ENOMEM;
929
930         memset(f, 0, sizeof(*f));
931         init_MUTEX_LOCKED(&f->sem);
932         f->inocache = ic;
933
934         ret = jffs2_do_read_inode_internal(c, f, &n);
935         if (!ret) {
936                 up(&f->sem);
937                 jffs2_do_clear_inode(c, f);
938         }
939         kfree (f);
940         return ret;
941 }
942
943 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
944 {
945         struct jffs2_full_dirent *fd, *fds;
946         int deleted;
947
948         down(&f->sem);
949         deleted = f->inocache && !f->inocache->nlink;
950
951         if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
952                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
953
954         if (f->metadata) {
955                 if (deleted)
956                         jffs2_mark_node_obsolete(c, f->metadata->raw);
957                 jffs2_free_full_dnode(f->metadata);
958         }
959
960         jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
961
962         if (f->target) {
963                 kfree(f->target);
964                 f->target = NULL;
965         }
966
967         fds = f->dents;
968         while(fds) {
969                 fd = fds;
970                 fds = fd->next;
971                 jffs2_free_full_dirent(fd);
972         }
973
974         if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
975                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
976                 if (f->inocache->nodes == (void *)f->inocache)
977                         jffs2_del_ino_cache(c, f->inocache);
978         }
979
980         up(&f->sem);
981 }