import of upstream 2.4.34.4 from kernel.org
[linux-2.4.git] / fs / jfs / jfs_logmgr.c
1 /*
2  *   Copyright (c) International Business Machines Corp., 2000-2003
3  *   Portions Copyright (c) Christoph Hellwig, 2001-2002
4  *
5  *   This program is free software;  you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation; either version 2 of the License, or 
8  *   (at your option) any later version.
9  * 
10  *   This program is distributed in the hope that it will be useful,
11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13  *   the GNU General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program;  if not, write to the Free Software 
17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19
20 /*
21  *      jfs_logmgr.c: log manager
22  *
23  * for related information, see transaction manager (jfs_txnmgr.c), and
24  * recovery manager (jfs_logredo.c).
25  *
26  * note: for detail, RTFS.
27  *
28  *      log buffer manager:
29  * special purpose buffer manager supporting log i/o requirements.
30  * per log serial pageout of logpage
31  * queuing i/o requests and redrive i/o at iodone
32  * maintain current logpage buffer
33  * no caching since append only
34  * appropriate jfs buffer cache buffers as needed
35  *
36  *      group commit:
37  * transactions which wrote COMMIT records in the same in-memory
38  * log page during the pageout of previous/current log page(s) are
39  * committed together by the pageout of the page.
40  *
41  *      TBD lazy commit:
42  * transactions are committed asynchronously when the log page
43  * containing it COMMIT is paged out when it becomes full;
44  *
45  *      serialization:
46  * . a per log lock serialize log write.
47  * . a per log lock serialize group commit.
48  * . a per log lock serialize log open/close;
49  *
50  *      TBD log integrity:
51  * careful-write (ping-pong) of last logpage to recover from crash
52  * in overwrite.
53  * detection of split (out-of-order) write of physical sectors
54  * of last logpage via timestamp at end of each sector
55  * with its mirror data array at trailer).
56  *
57  *      alternatives:
58  * lsn - 64-bit monotonically increasing integer vs
59  * 32-bit lspn and page eor.
60  */
61
62 #include <linux/fs.h>
63 #include <linux/locks.h>
64 #include <linux/blkdev.h>
65 #include <linux/interrupt.h>
66 #include <linux/smp_lock.h>
67 #include <linux/completion.h>
68 #include "jfs_incore.h"
69 #include "jfs_filsys.h"
70 #include "jfs_metapage.h"
71 #include "jfs_txnmgr.h"
72 #include "jfs_debug.h"
73
74
75 /*
76  * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
77  */
78 static struct lbuf *log_redrive_list;
79 static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;
80 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
81
82
83 /*
84  *      log read/write serialization (per log)
85  */
86 #define LOG_LOCK_INIT(log)      init_MUTEX(&(log)->loglock)
87 #define LOG_LOCK(log)           down(&((log)->loglock))
88 #define LOG_UNLOCK(log)         up(&((log)->loglock))
89
90
91 /*
92  *      log group commit serialization (per log)
93  */
94
95 #define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
96 #define LOGGC_LOCK(log)         spin_lock_irq(&(log)->gclock)
97 #define LOGGC_UNLOCK(log)       spin_unlock_irq(&(log)->gclock)
98 #define LOGGC_WAKEUP(tblk)      wake_up_all(&(tblk)->gcwait)
99
100 /*
101  *      log sync serialization (per log)
102  */
103 #define LOGSYNC_DELTA(logsize)          min((logsize)/8, 128*LOGPSIZE)
104 #define LOGSYNC_BARRIER(logsize)        ((logsize)/4)
105 /*
106 #define LOGSYNC_DELTA(logsize)          min((logsize)/4, 256*LOGPSIZE)
107 #define LOGSYNC_BARRIER(logsize)        ((logsize)/2)
108 */
109
110
111 /*
112  *      log buffer cache synchronization
113  */
114 static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;
115
116 #define LCACHE_LOCK(flags)      spin_lock_irqsave(&jfsLCacheLock, flags)
117 #define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
118
119 /*
120  * See __SLEEP_COND in jfs_locks.h
121  */
122 #define LCACHE_SLEEP_COND(wq, cond, flags)      \
123 do {                                            \
124         if (cond)                               \
125                 break;                          \
126         __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
127 } while (0)
128
129 #define LCACHE_WAKEUP(event)    wake_up(event)
130
131
132 /*
133  *      lbuf buffer cache (lCache) control
134  */
135 /* log buffer manager pageout control (cumulative, inclusive) */
136 #define lbmREAD         0x0001
137 #define lbmWRITE        0x0002  /* enqueue at tail of write queue;
138                                  * init pageout if at head of queue;
139                                  */
140 #define lbmRELEASE      0x0004  /* remove from write queue
141                                  * at completion of pageout;
142                                  * do not free/recycle it yet:
143                                  * caller will free it;
144                                  */
145 #define lbmSYNC         0x0008  /* do not return to freelist
146                                  * when removed from write queue;
147                                  */
148 #define lbmFREE         0x0010  /* return to freelist
149                                  * at completion of pageout;
150                                  * the buffer may be recycled;
151                                  */
152 #define lbmDONE         0x0020
153 #define lbmERROR        0x0040
154 #define lbmGC           0x0080  /* lbmIODone to perform post-GC processing
155                                  * of log page
156                                  */
157 #define lbmDIRECT       0x0100
158
159 /*
160  * external references
161  */
162 extern void txLazyUnlock(struct tblock * tblk);
163 extern int jfs_stop_threads;
164 extern struct completion jfsIOwait;
165
166 /*
167  * forward references
168  */
169 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
170                          struct lrd * lrd, struct tlock * tlck);
171
172 static int lmNextPage(struct jfs_log * log);
173 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate);
174
175 static int lbmLogInit(struct jfs_log * log);
176 static void lbmLogShutdown(struct jfs_log * log);
177 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
178 static void lbmFree(struct lbuf * bp);
179 static void lbmfree(struct lbuf * bp);
180 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
181 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
182                      int cant_block);
183 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
184 static int lbmIOWait(struct lbuf * bp, int flag);
185 static void lbmIODone(struct buffer_head *bh, int);
186 static void lbmStartIO(struct lbuf * bp);
187 static void lmGCwrite(struct jfs_log * log, int cant_block);
188 static int lmLogSync(struct jfs_log * log, int nosyncwait);
189
190
191 /*
192  *      statistics
193  */
194 #ifdef CONFIG_JFS_STATISTICS
195 struct lmStat {
196         uint commit;            /* # of commit */
197         uint pagedone;          /* # of page written */
198         uint submitted;         /* # of pages submitted */
199         uint full_page;         /* # of full pages submitted */
200         uint partial_page;      /* # of partial pages submitted */
201 } lmStat;
202 #endif
203
204
205 /*
206  * NAME:        lmLog()
207  *
208  * FUNCTION:    write a log record;
209  *
210  * PARAMETER:
211  *
212  * RETURN:      lsn - offset to the next log record to write (end-of-log);
213  *              -1  - error;
214  *
215  * note: todo: log error handler
216  */
217 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
218           struct tlock * tlck)
219 {
220         int lsn;
221         int diffp, difft;
222         struct metapage *mp = NULL;
223
224         jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
225                  log, tblk, lrd, tlck);
226
227         LOG_LOCK(log);
228
229         /* log by (out-of-transaction) JFS ? */
230         if (tblk == NULL)
231                 goto writeRecord;
232
233         /* log from page ? */
234         if (tlck == NULL ||
235             tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
236                 goto writeRecord;
237
238         /*
239          *      initialize/update page/transaction recovery lsn
240          */
241         lsn = log->lsn;
242
243         LOGSYNC_LOCK(log);
244
245         /*
246          * initialize page lsn if first log write of the page
247          */
248         if (mp->lsn == 0) {
249                 mp->log = log;
250                 mp->lsn = lsn;
251                 log->count++;
252
253                 /* insert page at tail of logsynclist */
254                 list_add_tail(&mp->synclist, &log->synclist);
255         }
256
257         /*
258          *      initialize/update lsn of tblock of the page
259          *
260          * transaction inherits oldest lsn of pages associated
261          * with allocation/deallocation of resources (their
262          * log records are used to reconstruct allocation map
263          * at recovery time: inode for inode allocation map,
264          * B+-tree index of extent descriptors for block
265          * allocation map);
266          * allocation map pages inherit transaction lsn at
267          * commit time to allow forwarding log syncpt past log
268          * records associated with allocation/deallocation of
269          * resources only after persistent map of these map pages
270          * have been updated and propagated to home.
271          */
272         /*
273          * initialize transaction lsn:
274          */
275         if (tblk->lsn == 0) {
276                 /* inherit lsn of its first page logged */
277                 tblk->lsn = mp->lsn;
278                 log->count++;
279
280                 /* insert tblock after the page on logsynclist */
281                 list_add(&tblk->synclist, &mp->synclist);
282         }
283         /*
284          * update transaction lsn:
285          */
286         else {
287                 /* inherit oldest/smallest lsn of page */
288                 logdiff(diffp, mp->lsn, log);
289                 logdiff(difft, tblk->lsn, log);
290                 if (diffp < difft) {
291                         /* update tblock lsn with page lsn */
292                         tblk->lsn = mp->lsn;
293
294                         /* move tblock after page on logsynclist */
295                         list_del(&tblk->synclist);
296                         list_add(&tblk->synclist, &mp->synclist);
297                 }
298         }
299
300         LOGSYNC_UNLOCK(log);
301
302         /*
303          *      write the log record
304          */
305       writeRecord:
306         lsn = lmWriteRecord(log, tblk, lrd, tlck);
307
308         /*
309          * forward log syncpt if log reached next syncpt trigger
310          */
311         logdiff(diffp, lsn, log);
312         if (diffp >= log->nextsync)
313                 lsn = lmLogSync(log, 0);
314
315         /* update end-of-log lsn */
316         log->lsn = lsn;
317
318         LOG_UNLOCK(log);
319
320         /* return end-of-log address */
321         return lsn;
322 }
323
324
325 /*
326  * NAME:        lmWriteRecord()
327  *
328  * FUNCTION:    move the log record to current log page
329  *
330  * PARAMETER:   cd      - commit descriptor
331  *
332  * RETURN:      end-of-log address
333  *                      
334  * serialization: LOG_LOCK() held on entry/exit
335  */
336 static int
337 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
338               struct tlock * tlck)
339 {
340         int lsn = 0;            /* end-of-log address */
341         struct lbuf *bp;        /* dst log page buffer */
342         struct logpage *lp;     /* dst log page */
343         caddr_t dst;            /* destination address in log page */
344         int dstoffset;          /* end-of-log offset in log page */
345         int freespace;          /* free space in log page */
346         caddr_t p;              /* src meta-data page */
347         caddr_t src;
348         int srclen;
349         int nbytes;             /* number of bytes to move */
350         int i;
351         int len;
352         struct linelock *linelock;
353         struct lv *lv;
354         struct lvd *lvd;
355         int l2linesize;
356
357         len = 0;
358
359         /* retrieve destination log page to write */
360         bp = (struct lbuf *) log->bp;
361         lp = (struct logpage *) bp->l_ldata;
362         dstoffset = log->eor;
363
364         /* any log data to write ? */
365         if (tlck == NULL)
366                 goto moveLrd;
367
368         /*
369          *      move log record data
370          */
371         /* retrieve source meta-data page to log */
372         if (tlck->flag & tlckPAGELOCK) {
373                 p = (caddr_t) (tlck->mp->data);
374                 linelock = (struct linelock *) & tlck->lock;
375         }
376         /* retrieve source in-memory inode to log */
377         else if (tlck->flag & tlckINODELOCK) {
378                 if (tlck->type & tlckDTREE)
379                         p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
380                 else
381                         p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
382                 linelock = (struct linelock *) & tlck->lock;
383         }
384 #ifdef  _JFS_WIP
385         else if (tlck->flag & tlckINLINELOCK) {
386
387                 inlinelock = (struct inlinelock *) & tlck;
388                 p = (caddr_t) & inlinelock->pxd;
389                 linelock = (struct linelock *) & tlck;
390         }
391 #endif                          /* _JFS_WIP */
392         else {
393                 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
394                 return 0;       /* Probably should trap */
395         }
396         l2linesize = linelock->l2linesize;
397
398       moveData:
399         ASSERT(linelock->index <= linelock->maxcnt);
400
401         lv = linelock->lv;
402         for (i = 0; i < linelock->index; i++, lv++) {
403                 if (lv->length == 0)
404                         continue;
405
406                 /* is page full ? */
407                 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
408                         /* page become full: move on to next page */
409                         lmNextPage(log);
410
411                         bp = log->bp;
412                         lp = (struct logpage *) bp->l_ldata;
413                         dstoffset = LOGPHDRSIZE;
414                 }
415
416                 /*
417                  * move log vector data
418                  */
419                 src = (u8 *) p + (lv->offset << l2linesize);
420                 srclen = lv->length << l2linesize;
421                 len += srclen;
422                 while (srclen > 0) {
423                         freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
424                         nbytes = min(freespace, srclen);
425                         dst = (caddr_t) lp + dstoffset;
426                         memcpy(dst, src, nbytes);
427                         dstoffset += nbytes;
428
429                         /* is page not full ? */
430                         if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
431                                 break;
432
433                         /* page become full: move on to next page */
434                         lmNextPage(log);
435
436                         bp = (struct lbuf *) log->bp;
437                         lp = (struct logpage *) bp->l_ldata;
438                         dstoffset = LOGPHDRSIZE;
439
440                         srclen -= nbytes;
441                         src += nbytes;
442                 }
443
444                 /*
445                  * move log vector descriptor
446                  */
447                 len += 4;
448                 lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
449                 lvd->offset = cpu_to_le16(lv->offset);
450                 lvd->length = cpu_to_le16(lv->length);
451                 dstoffset += 4;
452                 jfs_info("lmWriteRecord: lv offset:%d length:%d",
453                          lv->offset, lv->length);
454         }
455
456         if ((i = linelock->next)) {
457                 linelock = (struct linelock *) lid_to_tlock(i);
458                 goto moveData;
459         }
460
461         /*
462          *      move log record descriptor
463          */
464       moveLrd:
465         lrd->length = cpu_to_le16(len);
466
467         src = (caddr_t) lrd;
468         srclen = LOGRDSIZE;
469
470         while (srclen > 0) {
471                 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
472                 nbytes = min(freespace, srclen);
473                 dst = (caddr_t) lp + dstoffset;
474                 memcpy(dst, src, nbytes);
475
476                 dstoffset += nbytes;
477                 srclen -= nbytes;
478
479                 /* are there more to move than freespace of page ? */
480                 if (srclen)
481                         goto pageFull;
482
483                 /*
484                  * end of log record descriptor
485                  */
486
487                 /* update last log record eor */
488                 log->eor = dstoffset;
489                 bp->l_eor = dstoffset;
490                 lsn = (log->page << L2LOGPSIZE) + dstoffset;
491
492                 if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
493                         tblk->clsn = lsn;
494                         jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
495                                  bp->l_eor);
496
497                         INCREMENT(lmStat.commit);       /* # of commit */
498
499                         /*
500                          * enqueue tblock for group commit:
501                          *
502                          * enqueue tblock of non-trivial/synchronous COMMIT
503                          * at tail of group commit queue
504                          * (trivial/asynchronous COMMITs are ignored by
505                          * group commit.)
506                          */
507                         LOGGC_LOCK(log);
508
509                         /* init tblock gc state */
510                         tblk->flag = tblkGC_QUEUE;
511                         tblk->bp = log->bp;
512                         tblk->pn = log->page;
513                         tblk->eor = log->eor;
514
515                         /* enqueue transaction to commit queue */
516                         tblk->cqnext = NULL;
517                         if (log->cqueue.head) {
518                                 log->cqueue.tail->cqnext = tblk;
519                                 log->cqueue.tail = tblk;
520                         } else
521                                 log->cqueue.head = log->cqueue.tail = tblk;
522
523                         LOGGC_UNLOCK(log);
524                 }
525
526                 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
527                         le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
528
529                 /* page not full ? */
530                 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
531                         return lsn;
532
533               pageFull:
534                 /* page become full: move on to next page */
535                 lmNextPage(log);
536
537                 bp = (struct lbuf *) log->bp;
538                 lp = (struct logpage *) bp->l_ldata;
539                 dstoffset = LOGPHDRSIZE;
540                 src += nbytes;
541         }
542
543         return lsn;
544 }
545
546
547 /*
548  * NAME:        lmNextPage()
549  *
550  * FUNCTION:    write current page and allocate next page.
551  *
552  * PARAMETER:   log
553  *
554  * RETURN:      0
555  *                      
556  * serialization: LOG_LOCK() held on entry/exit
557  */
558 static int lmNextPage(struct jfs_log * log)
559 {
560         struct logpage *lp;
561         int lspn;               /* log sequence page number */
562         int pn;                 /* current page number */
563         struct lbuf *bp;
564         struct lbuf *nextbp;
565         struct tblock *tblk;
566
567         /* get current log page number and log sequence page number */
568         pn = log->page;
569         bp = log->bp;
570         lp = (struct logpage *) bp->l_ldata;
571         lspn = le32_to_cpu(lp->h.page);
572
573         LOGGC_LOCK(log);
574
575         /*
576          *      write or queue the full page at the tail of write queue
577          */
578         /* get the tail tblk on commit queue */
579         tblk = log->cqueue.tail;
580
581         /* every tblk who has COMMIT record on the current page,
582          * and has not been committed, must be on commit queue
583          * since tblk is queued at commit queueu at the time
584          * of writing its COMMIT record on the page before
585          * page becomes full (even though the tblk thread
586          * who wrote COMMIT record may have been suspended
587          * currently);
588          */
589
590         /* is page bound with outstanding tail tblk ? */
591         if (tblk && tblk->pn == pn) {
592                 /* mark tblk for end-of-page */
593                 tblk->flag |= tblkGC_EOP;
594
595                 if (log->cflag & logGC_PAGEOUT) {
596                         /* if page is not already on write queue,
597                          * just enqueue (no lbmWRITE to prevent redrive)
598                          * buffer to wqueue to ensure correct serial order
599                          * of the pages since log pages will be added
600                          * continuously
601                          */
602                         if (bp->l_wqnext == NULL)
603                                 lbmWrite(log, bp, 0, 0);
604                 } else {
605                         /*
606                          * No current GC leader, initiate group commit
607                          */
608                         log->cflag |= logGC_PAGEOUT;
609                         lmGCwrite(log, 0);
610                 }
611         }
612         /* page is not bound with outstanding tblk:
613          * init write or mark it to be redriven (lbmWRITE)
614          */
615         else {
616                 /* finalize the page */
617                 bp->l_ceor = bp->l_eor;
618                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
619                 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
620         }
621         LOGGC_UNLOCK(log);
622
623         /*
624          *      allocate/initialize next page
625          */
626         /* if log wraps, the first data page of log is 2
627          * (0 never used, 1 is superblock).
628          */
629         log->page = (pn == log->size - 1) ? 2 : pn + 1;
630         log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
631
632         /* allocate/initialize next log page buffer */
633         nextbp = lbmAllocate(log, log->page);
634         nextbp->l_eor = log->eor;
635         log->bp = nextbp;
636
637         /* initialize next log page */
638         lp = (struct logpage *) nextbp->l_ldata;
639         lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
640         lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
641
642         return 0;
643 }
644
645
646 /*
647  * NAME:        lmGroupCommit()
648  *
649  * FUNCTION:    group commit
650  *      initiate pageout of the pages with COMMIT in the order of
651  *      page number - redrive pageout of the page at the head of
652  *      pageout queue until full page has been written.
653  *
654  * RETURN:      
655  *
656  * NOTE:
657  *      LOGGC_LOCK serializes log group commit queue, and
658  *      transaction blocks on the commit queue.
659  *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
660  */
661 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
662 {
663         int rc = 0;
664
665         LOGGC_LOCK(log);
666
667         /* group committed already ? */
668         if (tblk->flag & tblkGC_COMMITTED) {
669                 if (tblk->flag & tblkGC_ERROR)
670                         rc = -EIO;
671
672                 LOGGC_UNLOCK(log);
673                 return rc;
674         }
675         jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
676
677         if (tblk->xflag & COMMIT_LAZY)
678                 tblk->flag |= tblkGC_LAZY;
679
680         if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head &&
681             (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag))) {
682                 /*
683                  * No pageout in progress
684                  *
685                  * start group commit as its group leader.
686                  */
687                 log->cflag |= logGC_PAGEOUT;
688
689                 lmGCwrite(log, 0);
690         }
691
692         if (tblk->xflag & COMMIT_LAZY) {
693                 /*
694                  * Lazy transactions can leave now
695                  */
696                 LOGGC_UNLOCK(log);
697                 return 0;
698         }
699
700         /* lmGCwrite gives up LOGGC_LOCK, check again */
701
702         if (tblk->flag & tblkGC_COMMITTED) {
703                 if (tblk->flag & tblkGC_ERROR)
704                         rc = -EIO;
705
706                 LOGGC_UNLOCK(log);
707                 return rc;
708         }
709
710         /* upcount transaction waiting for completion
711          */
712         log->gcrtc++;
713         tblk->flag |= tblkGC_READY;
714
715         __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
716                      LOGGC_LOCK(log), LOGGC_UNLOCK(log));
717
718         /* removed from commit queue */
719         if (tblk->flag & tblkGC_ERROR)
720                 rc = -EIO;
721
722         LOGGC_UNLOCK(log);
723         return rc;
724 }
725
726 /*
727  * NAME:        lmGCwrite()
728  *
729  * FUNCTION:    group commit write
730  *      initiate write of log page, building a group of all transactions
731  *      with commit records on that page.
732  *
733  * RETURN:      None
734  *
735  * NOTE:
736  *      LOGGC_LOCK must be held by caller.
737  *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
738  */
739 static void lmGCwrite(struct jfs_log * log, int cant_write)
740 {
741         struct lbuf *bp;
742         struct logpage *lp;
743         int gcpn;               /* group commit page number */
744         struct tblock *tblk;
745         struct tblock *xtblk;
746
747         /*
748          * build the commit group of a log page
749          *
750          * scan commit queue and make a commit group of all
751          * transactions with COMMIT records on the same log page.
752          */
753         /* get the head tblk on the commit queue */
754         tblk = xtblk = log->cqueue.head;
755         gcpn = tblk->pn;
756
757         while (tblk && tblk->pn == gcpn) {
758                 xtblk = tblk;
759
760                 /* state transition: (QUEUE, READY) -> COMMIT */
761                 tblk->flag |= tblkGC_COMMIT;
762                 tblk = tblk->cqnext;
763         }
764         tblk = xtblk;           /* last tblk of the page */
765
766         /*
767          * pageout to commit transactions on the log page.
768          */
769         bp = (struct lbuf *) tblk->bp;
770         lp = (struct logpage *) bp->l_ldata;
771         /* is page already full ? */
772         if (tblk->flag & tblkGC_EOP) {
773                 /* mark page to free at end of group commit of the page */
774                 tblk->flag &= ~tblkGC_EOP;
775                 tblk->flag |= tblkGC_FREE;
776                 bp->l_ceor = bp->l_eor;
777                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
778                 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
779                          cant_write);
780                 INCREMENT(lmStat.full_page);
781         }
782         /* page is not yet full */
783         else {
784                 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
785                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
786                 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
787                 INCREMENT(lmStat.partial_page);
788         }
789 }
790
791 /*
792  * NAME:        lmPostGC()
793  *
794  * FUNCTION:    group commit post-processing
795  *      Processes transactions after their commit records have been written
796  *      to disk, redriving log I/O if necessary.
797  *
798  * RETURN:      None
799  *
800  * NOTE:
801  *      This routine is called a interrupt time by lbmIODone
802  */
803 static void lmPostGC(struct lbuf * bp)
804 {
805         unsigned long flags;
806         struct jfs_log *log = bp->l_log;
807         struct logpage *lp;
808         struct tblock *tblk;
809
810         //LOGGC_LOCK(log);
811         spin_lock_irqsave(&log->gclock, flags);
812         /*
813          * current pageout of group commit completed.
814          *
815          * remove/wakeup transactions from commit queue who were
816          * group committed with the current log page
817          */
818         while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) {
819                 /* if transaction was marked GC_COMMIT then
820                  * it has been shipped in the current pageout
821                  * and made it to disk - it is committed.
822                  */
823
824                 if (bp->l_flag & lbmERROR)
825                         tblk->flag |= tblkGC_ERROR;
826
827                 /* remove it from the commit queue */
828                 log->cqueue.head = tblk->cqnext;
829                 if (log->cqueue.head == NULL)
830                         log->cqueue.tail = NULL;
831                 tblk->flag &= ~tblkGC_QUEUE;
832                 tblk->cqnext = 0;
833
834                 if (tblk == log->flush_tblk) {
835                         /* we can stop flushing the log now */
836                         clear_bit(log_FLUSH, &log->flag);
837                         log->flush_tblk = NULL;
838                 }
839
840                 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
841                          tblk->flag);
842
843                 if (!(tblk->xflag & COMMIT_FORCE))
844                         /*
845                          * Hand tblk over to lazy commit thread
846                          */
847                         txLazyUnlock(tblk);
848                 else {
849                         /* state transition: COMMIT -> COMMITTED */
850                         tblk->flag |= tblkGC_COMMITTED;
851
852                         if (tblk->flag & tblkGC_READY)
853                                 log->gcrtc--;
854
855                         LOGGC_WAKEUP(tblk);
856                 }
857
858                 /* was page full before pageout ?
859                  * (and this is the last tblk bound with the page)
860                  */
861                 if (tblk->flag & tblkGC_FREE)
862                         lbmFree(bp);
863                 /* did page become full after pageout ?
864                  * (and this is the last tblk bound with the page)
865                  */
866                 else if (tblk->flag & tblkGC_EOP) {
867                         /* finalize the page */
868                         lp = (struct logpage *) bp->l_ldata;
869                         bp->l_ceor = bp->l_eor;
870                         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
871                         jfs_info("lmPostGC: calling lbmWrite");
872                         lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
873                                  1);
874                 }
875
876         }
877
878         /* are there any transactions who have entered lnGroupCommit()
879          * (whose COMMITs are after that of the last log page written.
880          * They are waiting for new group commit (above at (SLEEP 1))
881          * or lazy transactions are on a full (queued) log page,
882          * select the latest ready transaction as new group leader and
883          * wake her up to lead her group.
884          */
885         if ((tblk = log->cqueue.head) &&
886             ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
887              test_bit(log_FLUSH, &log->flag)))
888                 /*
889                  * Call lmGCwrite with new group leader
890                  */
891                 lmGCwrite(log, 1);
892
893         /* no transaction are ready yet (transactions are only just
894          * queued (GC_QUEUE) and not entered for group commit yet).
895          * the first transaction entering group commit
896          * will elect herself as new group leader.
897          */
898         else
899                 log->cflag &= ~logGC_PAGEOUT;
900
901         //LOGGC_UNLOCK(log);
902         spin_unlock_irqrestore(&log->gclock, flags);
903         return;
904 }
905
906 /*
907  * NAME:        lmLogSync()
908  *
909  * FUNCTION:    write log SYNCPT record for specified log
910  *      if new sync address is available
911  *      (normally the case if sync() is executed by back-ground
912  *      process).
913  *      if not, explicitly run jfs_blogsync() to initiate
914  *      getting of new sync address.
915  *      calculate new value of i_nextsync which determines when
916  *      this code is called again.
917  *
918  *      this is called only from lmLog().
919  *
920  * PARAMETER:   ip      - pointer to logs inode.
921  *
922  * RETURN:      0
923  *                      
924  * serialization: LOG_LOCK() held on entry/exit
925  */
926 static int lmLogSync(struct jfs_log * log, int nosyncwait)
927 {
928         int logsize;
929         int written;            /* written since last syncpt */
930         int free;               /* free space left available */
931         int delta;              /* additional delta to write normally */
932         int more;               /* additional write granted */
933         struct lrd lrd;
934         int lsn;
935         struct logsyncblk *lp;
936
937         /*
938          *      forward syncpt
939          */
940         /* if last sync is same as last syncpt,
941          * invoke sync point forward processing to update sync.
942          */
943
944         if (log->sync == log->syncpt) {
945                 LOGSYNC_LOCK(log);
946                 /* ToDo: push dirty metapages out to disk */
947 //              bmLogSync(log);
948
949                 if (list_empty(&log->synclist))
950                         log->sync = log->lsn;
951                 else {
952                         lp = list_entry(log->synclist.next,
953                                         struct logsyncblk, synclist);
954                         log->sync = lp->lsn;
955                 }
956                 LOGSYNC_UNLOCK(log);
957
958         }
959
960         /* if sync is different from last syncpt,
961          * write a SYNCPT record with syncpt = sync.
962          * reset syncpt = sync
963          */
964         if (log->sync != log->syncpt) {
965                 struct super_block *sb = log->sb;
966                 struct jfs_sb_info *sbi = JFS_SBI(sb);
967
968                 /*
969                  * We need to make sure all of the "written" metapages
970                  * actually make it to disk
971                  */
972                 fsync_inode_data_buffers(sbi->ipbmap);
973                 fsync_inode_data_buffers(sbi->ipimap);
974                 fsync_inode_data_buffers(sb->s_bdev->bd_inode);
975
976                 lrd.logtid = 0;
977                 lrd.backchain = 0;
978                 lrd.type = cpu_to_le16(LOG_SYNCPT);
979                 lrd.length = 0;
980                 lrd.log.syncpt.sync = cpu_to_le32(log->sync);
981                 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
982
983                 log->syncpt = log->sync;
984         } else
985                 lsn = log->lsn;
986
987         /*
988          *      setup next syncpt trigger (SWAG)
989          */
990         logsize = log->logsize;
991
992         logdiff(written, lsn, log);
993         free = logsize - written;
994         delta = LOGSYNC_DELTA(logsize);
995         more = min(free / 2, delta);
996         if (more < 2 * LOGPSIZE) {
997                 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
998                 /*
999                  *      log wrapping
1000                  *
1001                  * option 1 - panic ? No.!
1002                  * option 2 - shutdown file systems
1003                  *            associated with log ?
1004                  * option 3 - extend log ?
1005                  */
1006                 /*
1007                  * option 4 - second chance
1008                  *
1009                  * mark log wrapped, and continue.
1010                  * when all active transactions are completed,
1011                  * mark log vaild for recovery.
1012                  * if crashed during invalid state, log state
1013                  * implies invald log, forcing fsck().
1014                  */
1015                 /* mark log state log wrap in log superblock */
1016                 /* log->state = LOGWRAP; */
1017
1018                 /* reset sync point computation */
1019                 log->syncpt = log->sync = lsn;
1020                 log->nextsync = delta;
1021         } else
1022                 /* next syncpt trigger = written + more */
1023                 log->nextsync = written + more;
1024
1025         /* return if lmLogSync() from outside of transaction, e.g., sync() */
1026         if (nosyncwait)
1027                 return lsn;
1028
1029         /* if number of bytes written from last sync point is more
1030          * than 1/4 of the log size, stop new transactions from
1031          * starting until all current transactions are completed
1032          * by setting syncbarrier flag.
1033          */
1034         if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
1035                 set_bit(log_SYNCBARRIER, &log->flag);
1036                 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1037                          log->syncpt);
1038                 /*
1039                  * We may have to initiate group commit
1040                  */
1041                 jfs_flush_journal(log, 0);
1042         }
1043
1044         return lsn;
1045 }
1046
1047
1048 /*
1049  * NAME:        lmLogOpen()
1050  *
1051  * FUNCTION:    open the log on first open;
1052  *      insert filesystem in the active list of the log.
1053  *
1054  * PARAMETER:   ipmnt   - file system mount inode
1055  *              iplog   - log inode (out)
1056  *
1057  * RETURN:
1058  *
1059  * serialization:
1060  */
1061 int lmLogOpen(struct super_block *sb, struct jfs_log ** logptr)
1062 {
1063         int rc;
1064         struct block_device *bdev;
1065         struct jfs_log *log;
1066
1067         if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1068                 return -ENOMEM;
1069         memset(log, 0, sizeof(struct jfs_log));
1070         init_waitqueue_head(&log->syncwait);
1071
1072         log->sb = sb;           /* This should be a list */
1073
1074         if (!(JFS_SBI(sb)->mntflag & JFS_INLINELOG))
1075                 goto externalLog;
1076
1077         /*
1078          *      in-line log in host file system
1079          *
1080          * file system to log have 1-to-1 relationship;
1081          */
1082
1083         set_bit(log_INLINELOG, &log->flag);
1084         log->bdev = sb->s_bdev;
1085         log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1086         log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1087             (L2LOGPSIZE - sb->s_blocksize_bits);
1088         log->l2bsize = sb->s_blocksize_bits;
1089         ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1090
1091         /*
1092          * initialize log.
1093          */
1094         if ((rc = lmLogInit(log)))
1095                 goto free;
1096         goto out;
1097
1098         /*
1099          *      external log as separate logical volume
1100          *
1101          * file systems to log may have n-to-1 relationship;
1102          */
1103       externalLog:
1104
1105         /*
1106          * TODO: Check for already opened log devices
1107          */
1108
1109         if (!(bdev = bdget(kdev_t_to_nr(JFS_SBI(sb)->logdev)))) {
1110                 rc = -ENODEV;
1111                 goto free;
1112         }
1113
1114         if ((rc = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS))) {
1115                 goto free;
1116         }
1117
1118         log->bdev = bdev;
1119         memcpy(log->uuid, JFS_SBI(sb)->loguuid, sizeof(log->uuid));
1120         
1121         /*
1122          * initialize log:
1123          */
1124         if ((rc = lmLogInit(log)))
1125                 goto close;
1126
1127         /*
1128          * add file system to log active file system list
1129          */
1130         if ((rc = lmLogFileSystem(log, JFS_SBI(sb)->uuid, 1)))
1131                 goto shutdown;
1132
1133       out:
1134         *logptr = log;
1135         return 0;
1136
1137         /*
1138          *      unwind on error
1139          */
1140       shutdown:         /* unwind lbmLogInit() */
1141         lbmLogShutdown(log);
1142
1143       close:            /* close external log device */
1144         blkdev_put(bdev, BDEV_FS);
1145
1146       free:             /* free log descriptor */
1147         kfree(log);
1148
1149         jfs_warn("lmLogOpen: exit(%d)", rc);
1150         return rc;
1151 }
1152
1153
1154 /*
1155  * NAME:        lmLogInit()
1156  *
1157  * FUNCTION:    log initialization at first log open.
1158  *
1159  *      logredo() (or logformat()) should have been run previously.
1160  *      initialize the log inode from log superblock.
1161  *      set the log state in the superblock to LOGMOUNT and
1162  *      write SYNCPT log record.
1163  *              
1164  * PARAMETER:   log     - log structure
1165  *
1166  * RETURN:      0       - if ok
1167  *              -EINVAL - bad log magic number or superblock dirty
1168  *              error returned from logwait()
1169  *                      
1170  * serialization: single first open thread
1171  */
1172 int lmLogInit(struct jfs_log * log)
1173 {
1174         int rc = 0;
1175         struct lrd lrd;
1176         struct logsuper *logsuper;
1177         struct lbuf *bpsuper;
1178         struct lbuf *bp;
1179         struct logpage *lp;
1180         int lsn;
1181
1182         jfs_info("lmLogInit: log:0x%p", log);
1183
1184         /*
1185          * log inode is overlaid on generic inode where
1186          * dinode have been zeroed out by iRead();
1187          */
1188
1189         /*
1190          * initialize log i/o
1191          */
1192         if ((rc = lbmLogInit(log)))
1193                 return rc;
1194
1195         /*
1196          * validate log superblock
1197          */
1198
1199
1200         if (!test_bit(log_INLINELOG, &log->flag))
1201                 log->l2bsize = 12;      /* XXX kludge alert XXX */
1202         if ((rc = lbmRead(log, 1, &bpsuper)))
1203                 goto errout10;
1204
1205         logsuper = (struct logsuper *) bpsuper->l_ldata;
1206
1207         if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1208                 jfs_warn("*** Log Format Error ! ***");
1209                 rc = -EINVAL;
1210                 goto errout20;
1211         }
1212
1213         /* logredo() should have been run successfully. */
1214         if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1215                 jfs_warn("*** Log Is Dirty ! ***");
1216                 rc = -EINVAL;
1217                 goto errout20;
1218         }
1219
1220         /* initialize log inode from log superblock */
1221         if (test_bit(log_INLINELOG,&log->flag)) {
1222                 if (log->size != le32_to_cpu(logsuper->size)) {
1223                         rc = -EINVAL;
1224                         goto errout20;
1225                 }
1226                 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1227                         log, (unsigned long long) log->base, log->size);
1228         } else {
1229                 if (memcmp(logsuper->uuid, log->uuid, 16)) {
1230                         jfs_warn("wrong uuid on JFS log device");
1231                         goto errout20;
1232                 }
1233                 log->size = le32_to_cpu(logsuper->size);
1234                 log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1235                 jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1236                         log, (unsigned long long) log->base, log->size);
1237         }
1238
1239         log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1240         log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1241
1242         /* check for disabled journaling to disk */
1243         if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1244                 log->no_integrity = 1;
1245                 log->ni_page = log->page;
1246                 log->ni_eor = log->eor;
1247         }
1248         else
1249                 log->no_integrity = 0;
1250
1251         /*
1252          * initialize for log append write mode
1253          */
1254         /* establish current/end-of-log page/buffer */
1255         if ((rc = lbmRead(log, log->page, &bp)))
1256                 goto errout20;
1257
1258         lp = (struct logpage *) bp->l_ldata;
1259
1260         jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1261                  le32_to_cpu(logsuper->end), log->page, log->eor,
1262                  le16_to_cpu(lp->h.eor));
1263
1264 //      ASSERT(log->eor == lp->h.eor);
1265
1266         log->bp = bp;
1267         bp->l_pn = log->page;
1268         bp->l_eor = log->eor;
1269
1270         /* initialize the group commit serialization lock */
1271         LOGGC_LOCK_INIT(log);
1272
1273         /* if current page is full, move on to next page */
1274         if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1275                 lmNextPage(log);
1276
1277         /* allocate/initialize the log write serialization lock */
1278         LOG_LOCK_INIT(log);
1279
1280         /*
1281          * initialize log syncpoint
1282          */
1283         /*
1284          * write the first SYNCPT record with syncpoint = 0
1285          * (i.e., log redo up to HERE !);
1286          * remove current page from lbm write queue at end of pageout
1287          * (to write log superblock update), but do not release to freelist;
1288          */
1289         lrd.logtid = 0;
1290         lrd.backchain = 0;
1291         lrd.type = cpu_to_le16(LOG_SYNCPT);
1292         lrd.length = 0;
1293         lrd.log.syncpt.sync = 0;
1294         lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1295         bp = log->bp;
1296         bp->l_ceor = bp->l_eor;
1297         lp = (struct logpage *) bp->l_ldata;
1298         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1299         lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1300         if ((rc = lbmIOWait(bp, 0)))
1301                 goto errout30;
1302
1303         /* initialize logsync parameters */
1304         log->logsize = (log->size - 2) << L2LOGPSIZE;
1305         log->lsn = lsn;
1306         log->syncpt = lsn;
1307         log->sync = log->syncpt;
1308         log->nextsync = LOGSYNC_DELTA(log->logsize);
1309
1310         jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1311                  log->lsn, log->syncpt, log->sync);
1312
1313         LOGSYNC_LOCK_INIT(log);
1314
1315         INIT_LIST_HEAD(&log->synclist);
1316
1317         log->cqueue.head = log->cqueue.tail = NULL;
1318         log->flush_tblk = NULL;
1319
1320         log->count = 0;
1321
1322         /*
1323          * initialize for lazy/group commit
1324          */
1325         log->clsn = lsn;
1326
1327         /*
1328          * update/write superblock
1329          */
1330         logsuper->state = cpu_to_le32(LOGMOUNT);
1331         log->serial = le32_to_cpu(logsuper->serial) + 1;
1332         logsuper->serial = cpu_to_le32(log->serial);
1333         lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1334         if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1335                 goto errout30;
1336
1337         return 0;
1338
1339         /*
1340          *      unwind on error
1341          */
1342       errout30:         /* release log page */
1343         log->wqueue = NULL;
1344         bp->l_wqnext = NULL;
1345         lbmFree(bp);
1346
1347       errout20:         /* release log superblock */
1348         lbmFree(bpsuper);
1349
1350       errout10:         /* unwind lbmLogInit() */
1351         lbmLogShutdown(log);
1352
1353         jfs_warn("lmLogInit: exit(%d)", rc);
1354         return rc;
1355 }
1356
1357
1358 /*
1359  * NAME:        lmLogClose()
1360  *
1361  * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1362  *              and close it on last close.
1363  *
1364  * PARAMETER:   sb      - superblock
1365  *              log     - log inode
1366  *
1367  * RETURN:      errors from subroutines
1368  *
1369  * serialization:
1370  */
1371 int lmLogClose(struct super_block *sb, struct jfs_log * log)
1372 {
1373         int rc;
1374
1375         jfs_info("lmLogClose: log:0x%p", log);
1376
1377         if (!test_bit(log_INLINELOG, &log->flag))
1378                 goto externalLog;
1379         
1380         /*
1381          *      in-line log in host file system
1382          */
1383         rc = lmLogShutdown(log);
1384         goto out;
1385
1386         /*
1387          *      external log as separate logical volume
1388          */
1389       externalLog:
1390         lmLogFileSystem(log, JFS_SBI(sb)->uuid, 0);
1391         rc = lmLogShutdown(log);
1392         blkdev_put(log->bdev, BDEV_FS);
1393
1394       out:
1395         kfree(log);
1396         jfs_info("lmLogClose: exit(%d)", rc);
1397         return rc;
1398 }
1399
1400
1401 /*
1402  * NAME:        jfs_flush_journal()
1403  *
1404  * FUNCTION:    initiate write of any outstanding transactions to the journal
1405  *              and optionally wait until they are all written to disk
1406  *
1407  *              wait == 0  flush until latest txn is committed, don't wait
1408  *              wait == 1  flush until latest txn is committed, wait
1409  *              wait > 1   flush until all txn's are complete, wait
1410  */
1411 void jfs_flush_journal(struct jfs_log *log, int wait)
1412 {
1413         int i;
1414         struct tblock *target;
1415
1416         if (!log)
1417                 /* jfs_write_inode may call us during read-only mount */
1418                 return;
1419
1420         jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1421
1422         LOGGC_LOCK(log);
1423
1424         target = log->cqueue.head;
1425
1426         if (target) {
1427                 /*
1428                  * This ensures that we will keep writing to the journal as long
1429                  * as there are unwritten commit records
1430                  */
1431
1432                 if (test_bit(log_FLUSH, &log->flag)) {
1433                         /*
1434                          * We're already flushing.
1435                          * if flush_tblk is NULL, we are flushing everything,
1436                          * so leave it that way.  Otherwise, update it to the
1437                          * latest transaction
1438                          */
1439                         if (log->flush_tblk)
1440                                 log->flush_tblk = target;
1441                 } else {
1442                         /* Only flush until latest transaction is committed */
1443                         log->flush_tblk = target;
1444                         set_bit(log_FLUSH, &log->flag);
1445
1446                         /*
1447                          * Initiate I/O on outstanding transactions
1448                          */
1449                         if (!(log->cflag & logGC_PAGEOUT)) {
1450                                 log->cflag |= logGC_PAGEOUT;
1451                                 lmGCwrite(log, 0);
1452                         }
1453                 }
1454         }
1455         if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1456                 /* Flush until all activity complete */
1457                 set_bit(log_FLUSH, &log->flag);
1458                 log->flush_tblk = NULL;
1459         }
1460
1461         if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1462                 DECLARE_WAITQUEUE(__wait, current);
1463
1464                 add_wait_queue(&target->gcwait, &__wait);
1465                 set_current_state(TASK_UNINTERRUPTIBLE);
1466                 LOGGC_UNLOCK(log);
1467                 schedule();
1468                 current->state = TASK_RUNNING;
1469                 LOGGC_LOCK(log);
1470                 remove_wait_queue(&target->gcwait, &__wait);
1471         }
1472         LOGGC_UNLOCK(log);
1473
1474         if (wait < 2)
1475                 return;
1476
1477         /*
1478          * If there was recent activity, we may need to wait
1479          * for the lazycommit thread to catch up
1480          */
1481         if (log->cqueue.head || !list_empty(&log->synclist)) {
1482                 for (i = 0; i < 800; i++) {     /* Too much? */
1483                         current->state = TASK_INTERRUPTIBLE;
1484                         schedule_timeout(HZ / 4);
1485                         if ((log->cqueue.head == NULL) &&
1486                             list_empty(&log->synclist))
1487                                 break;
1488                 }
1489         }
1490         assert(log->cqueue.head == NULL);
1491         assert(list_empty(&log->synclist));
1492         clear_bit(log_FLUSH, &log->flag);
1493 }
1494
1495 /*
1496  * NAME:        lmLogShutdown()
1497  *
1498  * FUNCTION:    log shutdown at last LogClose().
1499  *
1500  *              write log syncpt record.
1501  *              update super block to set redone flag to 0.
1502  *
1503  * PARAMETER:   log     - log inode
1504  *
1505  * RETURN:      0       - success
1506  *                      
1507  * serialization: single last close thread
1508  */
1509 int lmLogShutdown(struct jfs_log * log)
1510 {
1511         int rc;
1512         struct lrd lrd;
1513         int lsn;
1514         struct logsuper *logsuper;
1515         struct lbuf *bpsuper;
1516         struct lbuf *bp;
1517         struct logpage *lp;
1518
1519         jfs_info("lmLogShutdown: log:0x%p", log);
1520
1521         jfs_flush_journal(log, 2);
1522
1523         /*
1524          * We need to make sure all of the "written" metapages
1525          * actually make it to disk
1526          */
1527         fsync_no_super(log->sb->s_dev);
1528
1529         /*
1530          * write the last SYNCPT record with syncpoint = 0
1531          * (i.e., log redo up to HERE !)
1532          */
1533         lrd.logtid = 0;
1534         lrd.backchain = 0;
1535         lrd.type = cpu_to_le16(LOG_SYNCPT);
1536         lrd.length = 0;
1537         lrd.log.syncpt.sync = 0;
1538         
1539         /* check for disabled journaling to disk */
1540         if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1541                 log->no_integrity = 0;
1542                 log->page = log->ni_page;
1543                 log->eor = log->ni_eor;
1544         }
1545
1546         lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1547         bp = log->bp;
1548         lp = (struct logpage *) bp->l_ldata;
1549         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1550         lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1551         lbmIOWait(log->bp, lbmFREE);
1552
1553         /*
1554          * synchronous update log superblock
1555          * mark log state as shutdown cleanly
1556          * (i.e., Log does not need to be replayed).
1557          */
1558         if ((rc = lbmRead(log, 1, &bpsuper)))
1559                 goto out;
1560
1561         logsuper = (struct logsuper *) bpsuper->l_ldata;
1562         logsuper->state = cpu_to_le32(LOGREDONE);
1563         logsuper->end = cpu_to_le32(lsn);
1564         lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1565         rc = lbmIOWait(bpsuper, lbmFREE);
1566
1567         jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1568                  lsn, log->page, log->eor);
1569
1570       out:    
1571         /*
1572          * shutdown per log i/o
1573          */
1574         lbmLogShutdown(log);
1575
1576         if (rc) {
1577                 jfs_warn("lmLogShutdown: exit(%d)", rc);
1578         }
1579         return rc;
1580 }
1581
1582
1583 /*
1584  * NAME:        lmLogFileSystem()
1585  *
1586  * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1587  *      file system into/from log active file system list.
1588  *
1589  * PARAMETE:    log     - pointer to logs inode.
1590  *              fsdev   - kdev_t of filesystem.
1591  *              serial  - pointer to returned log serial number
1592  *              activate - insert/remove device from active list.
1593  *
1594  * RETURN:      0       - success
1595  *              errors returned by vms_iowait().
1596  */
1597 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate)
1598 {
1599         int rc = 0;
1600         int i;
1601         struct logsuper *logsuper;
1602         struct lbuf *bpsuper;
1603
1604         /*
1605          * insert/remove file system device to log active file system list.
1606          */
1607         if ((rc = lbmRead(log, 1, &bpsuper)))
1608                 return rc;
1609
1610         logsuper = (struct logsuper *) bpsuper->l_ldata;
1611         if (activate) {
1612                 for (i = 0; i < MAX_ACTIVE; i++)
1613                         if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1614                                 memcpy(logsuper->active[i].uuid, uuid, 16);
1615                                 break;
1616                         }
1617                 if (i == MAX_ACTIVE) {
1618                         jfs_warn("Too many file systems sharing journal!");
1619                         lbmFree(bpsuper);
1620                         return -EMFILE; /* Is there a better rc? */
1621                 }
1622         } else {
1623                 for (i = 0; i < MAX_ACTIVE; i++)
1624                         if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1625                                 memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1626                                 break;
1627                         }
1628                 if (i == MAX_ACTIVE) {
1629                         jfs_warn("Somebody stomped on the journal!");
1630                         lbmFree(bpsuper);
1631                         return -EIO;
1632                 }
1633                 
1634         }
1635
1636         /*
1637          * synchronous write log superblock:
1638          *
1639          * write sidestream bypassing write queue:
1640          * at file system mount, log super block is updated for
1641          * activation of the file system before any log record
1642          * (MOUNT record) of the file system, and at file system
1643          * unmount, all meta data for the file system has been
1644          * flushed before log super block is updated for deactivation
1645          * of the file system.
1646          */
1647         lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1648         rc = lbmIOWait(bpsuper, lbmFREE);
1649
1650         return rc;
1651 }
1652
1653 /*
1654  *              log buffer manager (lbm)
1655  *              ------------------------
1656  *
1657  * special purpose buffer manager supporting log i/o requirements.
1658  *
1659  * per log write queue:
1660  * log pageout occurs in serial order by fifo write queue and
1661  * restricting to a single i/o in pregress at any one time.
1662  * a circular singly-linked list
1663  * (log->wrqueue points to the tail, and buffers are linked via
1664  * bp->wrqueue field), and
1665  * maintains log page in pageout ot waiting for pageout in serial pageout.
1666  */
1667
1668 /*
1669  *      lbmLogInit()
1670  *
1671  * initialize per log I/O setup at lmLogInit()
1672  */
1673 static int lbmLogInit(struct jfs_log * log)
1674 {                               /* log inode */
1675         int i;
1676         struct lbuf *lbuf;
1677
1678         jfs_info("lbmLogInit: log:0x%p", log);
1679
1680         /* initialize current buffer cursor */
1681         log->bp = NULL;
1682
1683         /* initialize log device write queue */
1684         log->wqueue = NULL;
1685
1686         /*
1687          * Each log has its own buffer pages allocated to it.  These are
1688          * not managed by the page cache.  This ensures that a transaction
1689          * writing to the log does not block trying to allocate a page from
1690          * the page cache (for the log).  This would be bad, since page
1691          * allocation waits on the kswapd thread that may be committing inodes
1692          * which would cause log activity.  Was that clear?  I'm trying to
1693          * avoid deadlock here.
1694          */
1695         init_waitqueue_head(&log->free_wait);
1696
1697         log->lbuf_free = NULL;
1698
1699         for (i = 0; i < LOGPAGES; i++) {
1700                 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1701                 if (lbuf == 0)
1702                         goto error;
1703                 lbuf->l_bh.b_data = lbuf->l_ldata =
1704                     (char *) get_zeroed_page(GFP_KERNEL);
1705                 if (lbuf->l_ldata == 0) {
1706                         kfree(lbuf);
1707                         goto error;
1708                 }
1709                 lbuf->l_log = log;
1710                 init_waitqueue_head(&lbuf->l_ioevent);
1711
1712                 lbuf->l_bh.b_size = LOGPSIZE;
1713                 lbuf->l_bh.b_dev = to_kdev_t(log->bdev->bd_dev);
1714                 lbuf->l_bh.b_end_io = lbmIODone;
1715                 lbuf->l_bh.b_private = lbuf;
1716                 lbuf->l_bh.b_page = virt_to_page(lbuf->l_ldata);
1717                 lbuf->l_bh.b_state = 0;
1718                 init_waitqueue_head(&lbuf->l_bh.b_wait);
1719
1720                 lbuf->l_freelist = log->lbuf_free;
1721                 log->lbuf_free = lbuf;
1722         }
1723
1724         return (0);
1725
1726       error:
1727         lbmLogShutdown(log);
1728         return -ENOMEM;
1729 }
1730
1731
1732 /*
1733  *      lbmLogShutdown()
1734  *
1735  * finalize per log I/O setup at lmLogShutdown()
1736  */
1737 static void lbmLogShutdown(struct jfs_log * log)
1738 {
1739         struct lbuf *lbuf;
1740
1741         jfs_info("lbmLogShutdown: log:0x%p", log);
1742
1743         lbuf = log->lbuf_free;
1744         while (lbuf) {
1745                 struct lbuf *next = lbuf->l_freelist;
1746                 free_page((unsigned long) lbuf->l_ldata);
1747                 kfree(lbuf);
1748                 lbuf = next;
1749         }
1750
1751         log->bp = NULL;
1752 }
1753
1754
1755 /*
1756  *      lbmAllocate()
1757  *
1758  * allocate an empty log buffer
1759  */
1760 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1761 {
1762         struct lbuf *bp;
1763         unsigned long flags;
1764
1765         /*
1766          * recycle from log buffer freelist if any
1767          */
1768         LCACHE_LOCK(flags);
1769         LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1770         log->lbuf_free = bp->l_freelist;
1771         LCACHE_UNLOCK(flags);
1772
1773         bp->l_flag = 0;
1774
1775         bp->l_wqnext = NULL;
1776         bp->l_freelist = NULL;
1777
1778         bp->l_pn = pn;
1779         bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1780         bp->l_bh.b_blocknr = bp->l_blkno;
1781         bp->l_ceor = 0;
1782
1783         return bp;
1784 }
1785
1786
1787 /*
1788  *      lbmFree()
1789  *
1790  * release a log buffer to freelist
1791  */
1792 static void lbmFree(struct lbuf * bp)
1793 {
1794         unsigned long flags;
1795
1796         LCACHE_LOCK(flags);
1797
1798         lbmfree(bp);
1799
1800         LCACHE_UNLOCK(flags);
1801 }
1802
1803 static void lbmfree(struct lbuf * bp)
1804 {
1805         struct jfs_log *log = bp->l_log;
1806
1807         assert(bp->l_wqnext == NULL);
1808
1809         /*
1810          * return the buffer to head of freelist
1811          */
1812         bp->l_freelist = log->lbuf_free;
1813         log->lbuf_free = bp;
1814
1815         wake_up(&log->free_wait);
1816         return;
1817 }
1818
1819
1820 /*
1821  * NAME:        lbmRedrive
1822  *
1823  * FUNCTION:    add a log buffer to the the log redrive list
1824  *
1825  * PARAMETER:
1826  *     bp       - log buffer
1827  *
1828  * NOTES:
1829  *      Takes log_redrive_lock.
1830  */
1831 static inline void lbmRedrive(struct lbuf *bp)
1832 {
1833         unsigned long flags;
1834
1835         spin_lock_irqsave(&log_redrive_lock, flags);
1836         bp->l_redrive_next = log_redrive_list;
1837         log_redrive_list = bp;
1838         spin_unlock_irqrestore(&log_redrive_lock, flags);
1839
1840         wake_up(&jfs_IO_thread_wait);
1841 }
1842
1843
1844 /*
1845  *      lbmRead()
1846  */
1847 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1848 {
1849         struct lbuf *bp;
1850
1851         /*
1852          * allocate a log buffer
1853          */
1854         *bpp = bp = lbmAllocate(log, pn);
1855         jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1856
1857         bp->l_flag |= lbmREAD;
1858         bp->l_bh.b_reqnext = NULL;
1859         clear_bit(BH_Uptodate, &bp->l_bh.b_state);
1860         lock_buffer(&bp->l_bh);
1861         set_bit(BH_Mapped, &bp->l_bh.b_state);
1862         set_bit(BH_Req, &bp->l_bh.b_state);
1863         bp->l_bh.b_rdev = bp->l_bh.b_dev;
1864         bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9);
1865         generic_make_request(READ, &bp->l_bh);
1866         run_task_queue(&tq_disk);
1867
1868         wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1869
1870         return 0;
1871 }
1872
1873
1874 /*
1875  *      lbmWrite()
1876  *
1877  * buffer at head of pageout queue stays after completion of
1878  * partial-page pageout and redriven by explicit initiation of
1879  * pageout by caller until full-page pageout is completed and
1880  * released.
1881  *
1882  * device driver i/o done redrives pageout of new buffer at
1883  * head of pageout queue when current buffer at head of pageout
1884  * queue is released at the completion of its full-page pageout.
1885  *
1886  * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
1887  * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
1888  */
1889 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
1890                      int cant_block)
1891 {
1892         struct lbuf *tail;
1893         unsigned long flags;
1894
1895         jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
1896
1897         /* map the logical block address to physical block address */
1898         bp->l_blkno =
1899             log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1900
1901         LCACHE_LOCK(flags);             /* disable+lock */
1902
1903         /*
1904          * initialize buffer for device driver
1905          */
1906         bp->l_flag = flag;
1907
1908         /*
1909          *      insert bp at tail of write queue associated with log
1910          *
1911          * (request is either for bp already/currently at head of queue
1912          * or new bp to be inserted at tail)
1913          */
1914         tail = log->wqueue;
1915
1916         /* is buffer not already on write queue ? */
1917         if (bp->l_wqnext == NULL) {
1918                 /* insert at tail of wqueue */
1919                 if (tail == NULL) {
1920                         log->wqueue = bp;
1921                         bp->l_wqnext = bp;
1922                 } else {
1923                         log->wqueue = bp;
1924                         bp->l_wqnext = tail->l_wqnext;
1925                         tail->l_wqnext = bp;
1926                 }
1927
1928                 tail = bp;
1929         }
1930
1931         /* is buffer at head of wqueue and for write ? */
1932         if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
1933                 LCACHE_UNLOCK(flags);   /* unlock+enable */
1934                 return;
1935         }
1936
1937         LCACHE_UNLOCK(flags);   /* unlock+enable */
1938
1939         if (cant_block)
1940                 lbmRedrive(bp);
1941         else if (flag & lbmSYNC)
1942                 lbmStartIO(bp);
1943         else {
1944                 LOGGC_UNLOCK(log);
1945                 lbmStartIO(bp);
1946                 LOGGC_LOCK(log);
1947         }
1948 }
1949
1950
1951 /*
1952  *      lbmDirectWrite()
1953  *
1954  * initiate pageout bypassing write queue for sidestream
1955  * (e.g., log superblock) write;
1956  */
1957 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
1958 {
1959         jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
1960                  bp, flag, bp->l_pn);
1961
1962         /*
1963          * initialize buffer for device driver
1964          */
1965         bp->l_flag = flag | lbmDIRECT;
1966
1967         /* map the logical block address to physical block address */
1968         bp->l_blkno =
1969             log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1970
1971         /*
1972          *      initiate pageout of the page
1973          */
1974         lbmStartIO(bp);
1975 }
1976
1977
1978 /*
1979  * NAME:        lbmStartIO()
1980  *
1981  * FUNCTION:    Interface to DD strategy routine
1982  *
1983  * RETURN:      none
1984  *
1985  * serialization: LCACHE_LOCK() is NOT held during log i/o;
1986  */
1987 static void lbmStartIO(struct lbuf * bp)
1988 {
1989         jfs_info("lbmStartIO");
1990
1991         bp->l_bh.b_reqnext = NULL;
1992         set_bit(BH_Dirty, &bp->l_bh.b_state);
1993 //      lock_buffer(&bp->l_bh);
1994         assert(!test_bit(BH_Lock, &bp->l_bh.b_state));
1995         set_bit(BH_Lock, &bp->l_bh.b_state);
1996
1997         set_bit(BH_Mapped, &bp->l_bh.b_state);
1998         set_bit(BH_Req, &bp->l_bh.b_state);
1999         bp->l_bh.b_rdev = bp->l_bh.b_dev;
2000         bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9);
2001
2002         if (bp->l_log->no_integrity)
2003                 /* don't really do I/O */
2004                 lbmIODone(&bp->l_bh, 1);
2005          else
2006                 generic_make_request(WRITE, &bp->l_bh);
2007
2008         INCREMENT(lmStat.submitted);
2009         run_task_queue(&tq_disk);
2010 }
2011
2012
2013 /*
2014  *      lbmIOWait()
2015  */
2016 static int lbmIOWait(struct lbuf * bp, int flag)
2017 {
2018         unsigned long flags;
2019         int rc = 0;
2020
2021         jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2022
2023         LCACHE_LOCK(flags);             /* disable+lock */
2024
2025         LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2026
2027         rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2028
2029         if (flag & lbmFREE)
2030                 lbmfree(bp);
2031
2032         LCACHE_UNLOCK(flags);   /* unlock+enable */
2033
2034         jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2035         return rc;
2036 }
2037
2038 /*
2039  *      lbmIODone()
2040  *
2041  * executed at INTIODONE level
2042  */
2043 static void lbmIODone(struct buffer_head *bh, int uptodate)
2044 {
2045         struct lbuf *bp = bh->b_private;
2046         struct lbuf *nextbp, *tail;
2047         struct jfs_log *log;
2048         unsigned long flags;
2049
2050         /*
2051          * get back jfs buffer bound to the i/o buffer
2052          */
2053         jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2054
2055         LCACHE_LOCK(flags);             /* disable+lock */
2056
2057         unlock_buffer(&bp->l_bh);
2058         bp->l_flag |= lbmDONE;
2059
2060         if (!uptodate) {
2061                 bp->l_flag |= lbmERROR;
2062
2063                 jfs_err("lbmIODone: I/O error in JFS log");
2064         }
2065
2066         /*
2067          *      pagein completion
2068          */
2069         if (bp->l_flag & lbmREAD) {
2070                 bp->l_flag &= ~lbmREAD;
2071
2072                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2073
2074                 /* wakeup I/O initiator */
2075                 LCACHE_WAKEUP(&bp->l_ioevent);
2076
2077                 return;
2078         }
2079
2080         /*
2081          *      pageout completion
2082          *
2083          * the bp at the head of write queue has completed pageout.
2084          *
2085          * if single-commit/full-page pageout, remove the current buffer
2086          * from head of pageout queue, and redrive pageout with
2087          * the new buffer at head of pageout queue;
2088          * otherwise, the partial-page pageout buffer stays at
2089          * the head of pageout queue to be redriven for pageout
2090          * by lmGroupCommit() until full-page pageout is completed.
2091          */
2092         bp->l_flag &= ~lbmWRITE;
2093         INCREMENT(lmStat.pagedone);
2094
2095         /* update committed lsn */
2096         log = bp->l_log;
2097         log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2098
2099         if (bp->l_flag & lbmDIRECT) {
2100                 LCACHE_WAKEUP(&bp->l_ioevent);
2101                 LCACHE_UNLOCK(flags);
2102                 return;
2103         }
2104
2105         tail = log->wqueue;
2106
2107         /* single element queue */
2108         if (bp == tail) {
2109                 /* remove head buffer of full-page pageout
2110                  * from log device write queue
2111                  */
2112                 if (bp->l_flag & lbmRELEASE) {
2113                         log->wqueue = NULL;
2114                         bp->l_wqnext = NULL;
2115                 }
2116         }
2117         /* multi element queue */
2118         else {
2119                 /* remove head buffer of full-page pageout
2120                  * from log device write queue
2121                  */
2122                 if (bp->l_flag & lbmRELEASE) {
2123                         nextbp = tail->l_wqnext = bp->l_wqnext;
2124                         bp->l_wqnext = NULL;
2125
2126                         /*
2127                          * redrive pageout of next page at head of write queue:
2128                          * redrive next page without any bound tblk
2129                          * (i.e., page w/o any COMMIT records), or
2130                          * first page of new group commit which has been
2131                          * queued after current page (subsequent pageout
2132                          * is performed synchronously, except page without
2133                          * any COMMITs) by lmGroupCommit() as indicated
2134                          * by lbmWRITE flag;
2135                          */
2136                         if (nextbp->l_flag & lbmWRITE) {
2137                                 /*
2138                                  * We can't do the I/O at interrupt time.
2139                                  * The jfsIO thread can do it
2140                                  */
2141                                 lbmRedrive(nextbp);
2142                         }
2143                 }
2144         }
2145
2146         /*
2147          *      synchronous pageout:
2148          *
2149          * buffer has not necessarily been removed from write queue
2150          * (e.g., synchronous write of partial-page with COMMIT):
2151          * leave buffer for i/o initiator to dispose
2152          */
2153         if (bp->l_flag & lbmSYNC) {
2154                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2155
2156                 /* wakeup I/O initiator */
2157                 LCACHE_WAKEUP(&bp->l_ioevent);
2158         }
2159
2160         /*
2161          *      Group Commit pageout:
2162          */
2163         else if (bp->l_flag & lbmGC) {
2164                 LCACHE_UNLOCK(flags);
2165                 lmPostGC(bp);
2166         }
2167
2168         /*
2169          *      asynchronous pageout:
2170          *
2171          * buffer must have been removed from write queue:
2172          * insert buffer at head of freelist where it can be recycled
2173          */
2174         else {
2175                 assert(bp->l_flag & lbmRELEASE);
2176                 assert(bp->l_flag & lbmFREE);
2177                 lbmfree(bp);
2178
2179                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2180         }
2181 }
2182
2183 int jfsIOWait(void *arg)
2184 {
2185         struct lbuf *bp;
2186
2187         lock_kernel();
2188
2189         daemonize();
2190         current->tty = NULL;
2191         strcpy(current->comm, "jfsIO");
2192
2193         unlock_kernel();
2194
2195         spin_lock_irq(&current->sigmask_lock);
2196         sigfillset(&current->blocked);
2197         recalc_sigpending(current);
2198         spin_unlock_irq(&current->sigmask_lock);
2199
2200         complete(&jfsIOwait);
2201
2202         do {
2203                 DECLARE_WAITQUEUE(wq, current);
2204
2205                 spin_lock_irq(&log_redrive_lock);
2206                 while ((bp = log_redrive_list)) {
2207                         log_redrive_list = bp->l_redrive_next;
2208                         bp->l_redrive_next = NULL;
2209                         spin_unlock_irq(&log_redrive_lock);
2210                         lbmStartIO(bp);
2211                         spin_lock_irq(&log_redrive_lock);
2212                 }
2213                 add_wait_queue(&jfs_IO_thread_wait, &wq);
2214                 set_current_state(TASK_INTERRUPTIBLE);
2215                 spin_unlock_irq(&log_redrive_lock);
2216                 schedule();
2217                 current->state = TASK_RUNNING;
2218                 remove_wait_queue(&jfs_IO_thread_wait, &wq);
2219         } while (!jfs_stop_threads);
2220
2221         jfs_info("jfsIOWait being killed!");
2222         complete_and_exit(&jfsIOwait, 0);
2223 }
2224
2225 /*
2226  * NAME:        lmLogFormat()/jfs_logform()
2227  *
2228  * FUNCTION:    format file system log
2229  *
2230  * PARAMETERS:
2231  *      log     - volume log
2232  *      logAddress - start address of log space in FS block
2233  *      logSize - length of log space in FS block;
2234  *
2235  * RETURN:      0       - success
2236  *              -EIO    - i/o error
2237  *
2238  * XXX: We're synchronously writing one page at a time.  This needs to
2239  *      be improved by writing multiple pages at once.
2240  */
2241 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2242 {
2243         int rc = -EIO;
2244         struct jfs_sb_info *sbi = JFS_SBI(log->sb);
2245         struct logsuper *logsuper;
2246         struct logpage *lp;
2247         int lspn;               /* log sequence page number */
2248         struct lrd *lrd_ptr;
2249         int npages = 0;
2250         struct lbuf *bp;
2251
2252         jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2253                  (long long)logAddress, logSize);
2254
2255         /* allocate a log buffer */
2256         bp = lbmAllocate(log, 1);
2257
2258         npages = logSize >> sbi->l2nbperpage;
2259
2260         /*
2261          *      log space:
2262          *
2263          * page 0 - reserved;
2264          * page 1 - log superblock;
2265          * page 2 - log data page: A SYNC log record is written
2266          *          into this page at logform time;
2267          * pages 3-N - log data page: set to empty log data pages;
2268          */
2269         /*
2270          *      init log superblock: log page 1
2271          */
2272         logsuper = (struct logsuper *) bp->l_ldata;
2273
2274         logsuper->magic = cpu_to_le32(LOGMAGIC);
2275         logsuper->version = cpu_to_le32(LOGVERSION);
2276         logsuper->state = cpu_to_le32(LOGREDONE);
2277         logsuper->flag = cpu_to_le32(sbi->mntflag);     /* ? */
2278         logsuper->size = cpu_to_le32(npages);
2279         logsuper->bsize = cpu_to_le32(sbi->bsize);
2280         logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2281         logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2282
2283         bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2284         bp->l_blkno = logAddress + sbi->nbperpage;
2285         lbmStartIO(bp);
2286         if ((rc = lbmIOWait(bp, 0)))
2287                 goto exit;
2288
2289         /*
2290          *      init pages 2 to npages-1 as log data pages:
2291          *
2292          * log page sequence number (lpsn) initialization:
2293          *
2294          * pn:   0     1     2     3                 n-1
2295          *       +-----+-----+=====+=====+===.....===+=====+
2296          * lspn:             N-1   0     1           N-2
2297          *                   <--- N page circular file ---->
2298          *
2299          * the N (= npages-2) data pages of the log is maintained as
2300          * a circular file for the log records;
2301          * lpsn grows by 1 monotonically as each log page is written
2302          * to the circular file of the log;
2303          * and setLogpage() will not reset the page number even if
2304          * the eor is equal to LOGPHDRSIZE. In order for binary search
2305          * still work in find log end process, we have to simulate the
2306          * log wrap situation at the log format time.
2307          * The 1st log page written will have the highest lpsn. Then
2308          * the succeeding log pages will have ascending order of
2309          * the lspn starting from 0, ... (N-2)
2310          */
2311         lp = (struct logpage *) bp->l_ldata;
2312         /*
2313          * initialize 1st log page to be written: lpsn = N - 1,
2314          * write a SYNCPT log record is written to this page
2315          */
2316         lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2317         lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2318
2319         lrd_ptr = (struct lrd *) &lp->data;
2320         lrd_ptr->logtid = 0;
2321         lrd_ptr->backchain = 0;
2322         lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2323         lrd_ptr->length = 0;
2324         lrd_ptr->log.syncpt.sync = 0;
2325
2326         bp->l_blkno += sbi->nbperpage;
2327         bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2328         lbmStartIO(bp);
2329         if ((rc = lbmIOWait(bp, 0)))
2330                 goto exit;
2331
2332         /*
2333          *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2334          */
2335         for (lspn = 0; lspn < npages - 3; lspn++) {
2336                 lp->h.page = lp->t.page = cpu_to_le32(lspn);
2337                 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2338
2339                 bp->l_blkno += sbi->nbperpage;
2340                 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2341                 lbmStartIO(bp);
2342                 if ((rc = lbmIOWait(bp, 0)))
2343                         goto exit;
2344         }
2345
2346         rc = 0;
2347 exit:
2348         /*
2349          *      finalize log
2350          */
2351         /* release the buffer */
2352         lbmFree(bp);
2353
2354         return rc;
2355 }
2356
2357 #ifdef CONFIG_JFS_STATISTICS
2358 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2359                       int *eof, void *data)
2360 {
2361         int len = 0;
2362         off_t begin;
2363
2364         len += sprintf(buffer,
2365                        "JFS Logmgr stats\n"
2366                        "================\n"
2367                        "commits = %d\n"
2368                        "writes submitted = %d\n"
2369                        "writes completed = %d\n"
2370                        "full pages submitted = %d\n"
2371                        "partial pages submitted = %d\n",
2372                        lmStat.commit,
2373                        lmStat.submitted,
2374                        lmStat.pagedone,
2375                        lmStat.full_page,
2376                        lmStat.partial_page);
2377
2378         begin = offset;
2379         *start = buffer + begin;
2380         len -= begin;
2381
2382         if (len > length)
2383                 len = length;
2384         else
2385                 *eof = 1;
2386
2387         if (len < 0)
2388                 len = 0;
2389
2390         return len;
2391 }
2392 #endif /* CONFIG_JFS_STATISTICS */