1  /*
2   *   Copyright (C) International Business Machines Corp., 2000-2004
3   *   Portions Copyright (C) Christoph Hellwig, 2001-2002
4   *
5   *   This program is free software;  you can redistribute it and/or modify
6   *   it under the terms of the GNU General Public License as published by
7   *   the Free Software Foundation; either version 2 of the License, or
8   *   (at your option) any later version.
9   *
10   *   This program is distributed in the hope that it will be useful,
11   *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13   *   the GNU General Public License for more details.
14   *
15   *   You should have received a copy of the GNU General Public License
16   *   along with this program;  if not, write to the Free Software
17   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18   */
19  #ifndef	_H_JFS_LOGMGR
20  #define _H_JFS_LOGMGR
21  
22  #include "jfs_filsys.h"
23  #include "jfs_lock.h"
24  
25  /*
26   *	log manager configuration parameters
27   */
28  
29  /* log page size */
30  #define	LOGPSIZE	4096
31  #define	L2LOGPSIZE	12
32  
33  #define LOGPAGES	16	/* Log pages per mounted file system */
34  
35  /*
36   *	log logical volume
37   *
38   * a log is used to make the commit operation on journalled
39   * files within the same logical volume group atomic.
40   * a log is implemented with a logical volume.
41   * there is one log per logical volume group.
42   *
43   * block 0 of the log logical volume is not used (ipl etc).
44   * block 1 contains a log "superblock" and is used by logFormat(),
45   * lmLogInit(), lmLogShutdown(), and logRedo() to record status
46   * of the log but is not otherwise used during normal processing.
47   * blocks 2 - (N-1) are used to contain log records.
48   *
49   * when a volume group is varied-on-line, logRedo() must have
50   * been executed before the file systems (logical volumes) in
51   * the volume group can be mounted.
52   */
53  /*
54   *	log superblock (block 1 of logical volume)
55   */
56  #define	LOGSUPER_B	1
57  #define	LOGSTART_B	2
58  
59  #define	LOGMAGIC	0x87654321
60  #define	LOGVERSION	1
61  
62  #define MAX_ACTIVE	128	/* Max active file systems sharing log */
63  
64  struct logsuper {
65  	__le32 magic;		/* 4: log lv identifier */
66  	__le32 version;		/* 4: version number */
67  	__le32 serial;		/* 4: log open/mount counter */
68  	__le32 size;		/* 4: size in number of LOGPSIZE blocks */
69  	__le32 bsize;		/* 4: logical block size in byte */
70  	__le32 l2bsize;		/* 4: log2 of bsize */
71  
72  	__le32 flag;		/* 4: option */
73  	__le32 state;		/* 4: state - see below */
74  
75  	__le32 end;		/* 4: addr of last log record set by logredo */
76  	char uuid[16];		/* 16: 128-bit journal uuid */
77  	char label[16];		/* 16: journal label */
78  	struct {
79  		char uuid[16];
80  	} active[MAX_ACTIVE];	/* 2048: active file systems list */
81  };
82  
83  #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84  
85  /* log flag: commit option (see jfs_filsys.h) */
86  
87  /* log state */
88  #define	LOGMOUNT	0	/* log mounted by lmLogInit() */
89  #define LOGREDONE	1	/* log shutdown by lmLogShutdown().
90  				 * log redo completed by logredo().
91  				 */
92  #define LOGWRAP		2	/* log wrapped */
93  #define LOGREADERR	3	/* log read error detected in logredo() */
94  
95  
96  /*
97   *	log logical page
98   *
99   * (this comment should be rewritten !)
100   * the header and trailer structures (h,t) will normally have
101   * the same page and eor value.
102   * An exception to this occurs when a complete page write is not
103   * accomplished on a power failure. Since the hardware may "split write"
104   * sectors in the page, any out of order sequence may occur during powerfail
105   * and needs to be recognized during log replay.  The xor value is
106   * an "exclusive or" of all log words in the page up to eor.  This
107   * 32 bit eor is stored with the top 16 bits in the header and the
108   * bottom 16 bits in the trailer.  logredo can easily recognize pages
109   * that were not completed by reconstructing this eor and checking
110   * the log page.
111   *
112   * Previous versions of the operating system did not allow split
113   * writes and detected partially written records in logredo by
114   * ordering the updates to the header, trailer, and the move of data
115   * into the logdata area.  The order: (1) data is moved (2) header
116   * is updated (3) trailer is updated.  In logredo, when the header
117   * differed from the trailer, the header and trailer were reconciled
118   * as follows: if h.page != t.page they were set to the smaller of
119   * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
120   * h.eor != t.eor they were set to the smaller of their two values.
121   */
122  struct logpage {
123  	struct {		/* header */
124  		__le32 page;	/* 4: log sequence page number */
125  		__le16 rsrvd;	/* 2: */
126  		__le16 eor;	/* 2: end-of-log offset of lasrt record write */
127  	} h;
128  
129  	__le32 data[LOGPSIZE / 4 - 4];	/* log record area */
130  
131  	struct {		/* trailer */
132  		__le32 page;	/* 4: normally the same as h.page */
133  		__le16 rsrvd;	/* 2: */
134  		__le16 eor;	/* 2: normally the same as h.eor */
135  	} t;
136  };
137  
138  #define LOGPHDRSIZE	8	/* log page header size */
139  #define LOGPTLRSIZE	8	/* log page trailer size */
140  
141  
142  /*
143   *	log record
144   *
145   * (this comment should be rewritten !)
146   * jfs uses only "after" log records (only a single writer is allowed
147   * in a page, pages are written to temporary paging space if
148   * if they must be written to disk before commit, and i/o is
149   * scheduled for modified pages to their home location after
150   * the log records containing the after values and the commit
151   * record is written to the log on disk, undo discards the copy
152   * in main-memory.)
153   *
154   * a log record consists of a data area of variable length followed by
155   * a descriptor of fixed size LOGRDSIZE bytes.
156   * the data area is rounded up to an integral number of 4-bytes and
157   * must be no longer than LOGPSIZE.
158   * the descriptor is of size of multiple of 4-bytes and aligned on a
159   * 4-byte boundary.
160   * records are packed one after the other in the data area of log pages.
161   * (sometimes a DUMMY record is inserted so that at least one record ends
162   * on every page or the longest record is placed on at most two pages).
163   * the field eor in page header/trailer points to the byte following
164   * the last record on a page.
165   */
166  
167  /* log record types */
168  #define LOG_COMMIT		0x8000
169  #define LOG_SYNCPT		0x4000
170  #define LOG_MOUNT		0x2000
171  #define LOG_REDOPAGE		0x0800
172  #define LOG_NOREDOPAGE		0x0080
173  #define LOG_NOREDOINOEXT	0x0040
174  #define LOG_UPDATEMAP		0x0008
175  #define LOG_NOREDOFILE		0x0001
176  
177  /* REDOPAGE/NOREDOPAGE log record data type */
178  #define	LOG_INODE		0x0001
179  #define	LOG_XTREE		0x0002
180  #define	LOG_DTREE		0x0004
181  #define	LOG_BTROOT		0x0010
182  #define	LOG_EA			0x0020
183  #define	LOG_ACL			0x0040
184  #define	LOG_DATA		0x0080
185  #define	LOG_NEW			0x0100
186  #define	LOG_EXTEND		0x0200
187  #define LOG_RELOCATE		0x0400
188  #define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
189  
190  /* UPDATEMAP log record descriptor type */
191  #define	LOG_ALLOCXADLIST	0x0080
192  #define	LOG_ALLOCPXDLIST	0x0040
193  #define	LOG_ALLOCXAD		0x0020
194  #define	LOG_ALLOCPXD		0x0010
195  #define	LOG_FREEXADLIST		0x0008
196  #define	LOG_FREEPXDLIST		0x0004
197  #define	LOG_FREEXAD		0x0002
198  #define	LOG_FREEPXD		0x0001
199  
200  
201  struct lrd {
202  	/*
203  	 * type independent area
204  	 */
205  	__le32 logtid;		/* 4: log transaction identifier */
206  	__le32 backchain;	/* 4: ptr to prev record of same transaction */
207  	__le16 type;		/* 2: record type */
208  	__le16 length;		/* 2: length of data in record (in byte) */
209  	__le32 aggregate;	/* 4: file system lv/aggregate */
210  	/* (16) */
211  
212  	/*
213  	 * type dependent area (20)
214  	 */
215  	union {
216  
217  		/*
218  		 *	COMMIT: commit
219  		 *
220  		 * transaction commit: no type-dependent information;
221  		 */
222  
223  		/*
224  		 *	REDOPAGE: after-image
225  		 *
226  		 * apply after-image;
227  		 *
228  		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
229  		 */
230  		struct {
231  			__le32 fileset;	/* 4: fileset number */
232  			__le32 inode;	/* 4: inode number */
233  			__le16 type;	/* 2: REDOPAGE record type */
234  			__le16 l2linesize;	/* 2: log2 of line size */
235  			pxd_t pxd;	/* 8: on-disk page pxd */
236  		} redopage;	/* (20) */
237  
238  		/*
239  		 *	NOREDOPAGE: the page is freed
240  		 *
241  		 * do not apply after-image records which precede this record
242  		 * in the log with the same page block number to this page.
243  		 *
244  		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
245  		 */
246  		struct {
247  			__le32 fileset;	/* 4: fileset number */
248  			__le32 inode;	/* 4: inode number */
249  			__le16 type;	/* 2: NOREDOPAGE record type */
250  			__le16 rsrvd;	/* 2: reserved */
251  			pxd_t pxd;	/* 8: on-disk page pxd */
252  		} noredopage;	/* (20) */
253  
254  		/*
255  		 *	UPDATEMAP: update block allocation map
256  		 *
257  		 * either in-line PXD,
258  		 * or     out-of-line  XADLIST;
259  		 *
260  		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
261  		 */
262  		struct {
263  			__le32 fileset;	/* 4: fileset number */
264  			__le32 inode;	/* 4: inode number */
265  			__le16 type;	/* 2: UPDATEMAP record type */
266  			__le16 nxd;	/* 2: number of extents */
267  			pxd_t pxd;	/* 8: pxd */
268  		} updatemap;	/* (20) */
269  
270  		/*
271  		 *	NOREDOINOEXT: the inode extent is freed
272  		 *
273  		 * do not apply after-image records which precede this
274  		 * record in the log with the any of the 4 page block
275  		 * numbers in this inode extent.
276  		 *
277  		 * NOTE: The fileset and pxd fields MUST remain in
278  		 *       the same fields in the REDOPAGE record format.
279  		 *
280  		 */
281  		struct {
282  			__le32 fileset;	/* 4: fileset number */
283  			__le32 iagnum;	/* 4: IAG number     */
284  			__le32 inoext_idx;	/* 4: inode extent index */
285  			pxd_t pxd;	/* 8: on-disk page pxd */
286  		} noredoinoext;	/* (20) */
287  
288  		/*
289  		 *	SYNCPT: log sync point
290  		 *
291  		 * replay log up to syncpt address specified;
292  		 */
293  		struct {
294  			__le32 sync;	/* 4: syncpt address (0 = here) */
295  		} syncpt;
296  
297  		/*
298  		 *	MOUNT: file system mount
299  		 *
300  		 * file system mount: no type-dependent information;
301  		 */
302  
303  		/*
304  		 *	? FREEXTENT: free specified extent(s)
305  		 *
306  		 * free specified extent(s) from block allocation map
307  		 * N.B.: nextents should be length of data/sizeof(xad_t)
308  		 */
309  		struct {
310  			__le32 type;	/* 4: FREEXTENT record type */
311  			__le32 nextent;	/* 4: number of extents */
312  
313  			/* data: PXD or XAD list */
314  		} freextent;
315  
316  		/*
317  		 *	? NOREDOFILE: this file is freed
318  		 *
319  		 * do not apply records which precede this record in the log
320  		 * with the same inode number.
321  		 *
322  		 * NOREDOFILE must be the first to be written at commit
323  		 * (last to be read in logredo()) - it prevents
324  		 * replay of preceding updates of all preceding generations
325  		 * of the inumber esp. the on-disk inode itself.
326  		 */
327  		struct {
328  			__le32 fileset;	/* 4: fileset number */
329  			__le32 inode;	/* 4: inode number */
330  		} noredofile;
331  
332  		/*
333  		 *	? NEWPAGE:
334  		 *
335  		 * metadata type dependent
336  		 */
337  		struct {
338  			__le32 fileset;	/* 4: fileset number */
339  			__le32 inode;	/* 4: inode number */
340  			__le32 type;	/* 4: NEWPAGE record type */
341  			pxd_t pxd;	/* 8: on-disk page pxd */
342  		} newpage;
343  
344  		/*
345  		 *	? DUMMY: filler
346  		 *
347  		 * no type-dependent information
348  		 */
349  	} log;
350  };					/* (36) */
351  
352  #define	LOGRDSIZE	(sizeof(struct lrd))
353  
354  /*
355   *	line vector descriptor
356   */
357  struct lvd {
358  	__le16 offset;
359  	__le16 length;
360  };
361  
362  
363  /*
364   *	log logical volume
365   */
366  struct jfs_log {
367  
368  	struct list_head sb_list;/*  This is used to sync metadata
369  				 *    before writing syncpt.
370  				 */
371  	struct list_head journal_list; /* Global list */
372  	struct block_device *bdev; /* 4: log lv pointer */
373  	int serial;		/* 4: log mount serial number */
374  
375  	s64 base;		/* @8: log extent address (inline log ) */
376  	int size;		/* 4: log size in log page (in page) */
377  	int l2bsize;		/* 4: log2 of bsize */
378  
379  	unsigned long flag;	/* 4: flag */
380  
381  	struct lbuf *lbuf_free;	/* 4: free lbufs */
382  	wait_queue_head_t free_wait;	/* 4: */
383  
384  	/* log write */
385  	int logtid;		/* 4: log tid */
386  	int page;		/* 4: page number of eol page */
387  	int eor;		/* 4: eor of last record in eol page */
388  	struct lbuf *bp;	/* 4: current log page buffer */
389  
390  	struct mutex loglock;	/* 4: log write serialization lock */
391  
392  	/* syncpt */
393  	int nextsync;		/* 4: bytes to write before next syncpt */
394  	int active;		/* 4: */
395  	wait_queue_head_t syncwait;	/* 4: */
396  
397  	/* commit */
398  	uint cflag;		/* 4: */
399  	struct list_head cqueue; /* FIFO commit queue */
400  	struct tblock *flush_tblk; /* tblk we're waiting on for flush */
401  	int gcrtc;		/* 4: GC_READY transaction count */
402  	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
403  	spinlock_t gclock;	/* 4: group commit lock */
404  	int logsize;		/* 4: log data area size in byte */
405  	int lsn;		/* 4: end-of-log */
406  	int clsn;		/* 4: clsn */
407  	int syncpt;		/* 4: addr of last syncpt record */
408  	int sync;		/* 4: addr from last logsync() */
409  	struct list_head synclist;	/* 8: logsynclist anchor */
410  	spinlock_t synclock;	/* 4: synclist lock */
411  	struct lbuf *wqueue;	/* 4: log pageout queue */
412  	int count;		/* 4: count */
413  	char uuid[16];		/* 16: 128-bit uuid of log device */
414  
415  	int no_integrity;	/* 3: flag to disable journaling to disk */
416  };
417  
418  /*
419   * Log flag
420   */
421  #define log_INLINELOG	1
422  #define log_SYNCBARRIER	2
423  #define log_QUIESCE	3
424  #define log_FLUSH	4
425  
426  /*
427   * group commit flag
428   */
429  /* jfs_log */
430  #define logGC_PAGEOUT	0x00000001
431  
432  /* tblock/lbuf */
433  #define tblkGC_QUEUE		0x0001
434  #define tblkGC_READY		0x0002
435  #define tblkGC_COMMIT		0x0004
436  #define tblkGC_COMMITTED	0x0008
437  #define tblkGC_EOP		0x0010
438  #define tblkGC_FREE		0x0020
439  #define tblkGC_LEADER		0x0040
440  #define tblkGC_ERROR		0x0080
441  #define tblkGC_LAZY		0x0100	// D230860
442  #define tblkGC_UNLOCKED		0x0200	// D230860
443  
444  /*
445   *		log cache buffer header
446   */
447  struct lbuf {
448  	struct jfs_log *l_log;	/* 4: log associated with buffer */
449  
450  	/*
451  	 * data buffer base area
452  	 */
453  	uint l_flag;		/* 4: pageout control flags */
454  
455  	struct lbuf *l_wqnext;	/* 4: write queue link */
456  	struct lbuf *l_freelist;	/* 4: freelistlink */
457  
458  	int l_pn;		/* 4: log page number */
459  	int l_eor;		/* 4: log record eor */
460  	int l_ceor;		/* 4: committed log record eor */
461  
462  	s64 l_blkno;		/* 8: log page block number */
463  	caddr_t l_ldata;	/* 4: data page */
464  	struct page *l_page;	/* The page itself */
465  	uint l_offset;		/* Offset of l_ldata within the page */
466  
467  	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
468  };
469  
470  /* Reuse l_freelist for redrive list */
471  #define l_redrive_next l_freelist
472  
473  /*
474   *	logsynclist block
475   *
476   * common logsyncblk prefix for jbuf_t and tblock
477   */
478  struct logsyncblk {
479  	u16 xflag;		/* flags */
480  	u16 flag;		/* only meaninful in tblock */
481  	lid_t lid;		/* lock id */
482  	s32 lsn;		/* log sequence number */
483  	struct list_head synclist;	/* log sync list link */
484  };
485  
486  /*
487   *	logsynclist serialization (per log)
488   */
489  
490  #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
491  #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
492  #define LOGSYNC_UNLOCK(log, flags) \
493  	spin_unlock_irqrestore(&(log)->synclock, flags)
494  
495  /* compute the difference in bytes of lsn from sync point */
496  #define logdiff(diff, lsn, log)\
497  {\
498  	diff = (lsn) - (log)->syncpt;\
499  	if (diff < 0)\
500  		diff += (log)->logsize;\
501  }
502  
503  extern int lmLogOpen(struct super_block *sb);
504  extern int lmLogClose(struct super_block *sb);
505  extern int lmLogShutdown(struct jfs_log * log);
506  extern int lmLogInit(struct jfs_log * log);
507  extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
508  extern int lmGroupCommit(struct jfs_log *, struct tblock *);
509  extern int jfsIOWait(void *);
510  extern void jfs_flush_journal(struct jfs_log * log, int wait);
511  extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
512  
513  #endif				/* _H_JFS_LOGMGR */
514