1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_inode_item.h"
16 #include "xfs_trace.h"
17 #include "xfs_trans_priv.h"
18 #include "xfs_buf_item.h"
19 #include "xfs_log.h"
20 #include "xfs_error.h"
21 #include "xfs_log_priv.h"
22 #include "xfs_log_recover.h"
23 #include "xfs_icache.h"
24 #include "xfs_bmap_btree.h"
25
26 STATIC void
xlog_recover_inode_ra_pass2(struct xlog * log,struct xlog_recover_item * item)27 xlog_recover_inode_ra_pass2(
28 struct xlog *log,
29 struct xlog_recover_item *item)
30 {
31 if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
32 struct xfs_inode_log_format *ilfp = item->ri_buf[0].i_addr;
33
34 xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
35 &xfs_inode_buf_ra_ops);
36 } else {
37 struct xfs_inode_log_format_32 *ilfp = item->ri_buf[0].i_addr;
38
39 xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
40 &xfs_inode_buf_ra_ops);
41 }
42 }
43
44 /*
45 * Inode fork owner changes
46 *
47 * If we have been told that we have to reparent the inode fork, it's because an
48 * extent swap operation on a CRC enabled filesystem has been done and we are
49 * replaying it. We need to walk the BMBT of the appropriate fork and change the
50 * owners of it.
51 *
52 * The complexity here is that we don't have an inode context to work with, so
53 * after we've replayed the inode we need to instantiate one. This is where the
54 * fun begins.
55 *
56 * We are in the middle of log recovery, so we can't run transactions. That
57 * means we cannot use cache coherent inode instantiation via xfs_iget(), as
58 * that will result in the corresponding iput() running the inode through
59 * xfs_inactive(). If we've just replayed an inode core that changes the link
60 * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
61 * transactions (bad!).
62 *
63 * So, to avoid this, we instantiate an inode directly from the inode core we've
64 * just recovered. We have the buffer still locked, and all we really need to
65 * instantiate is the inode core and the forks being modified. We can do this
66 * manually, then run the inode btree owner change, and then tear down the
67 * xfs_inode without having to run any transactions at all.
68 *
69 * Also, because we don't have a transaction context available here but need to
70 * gather all the buffers we modify for writeback so we pass the buffer_list
71 * instead for the operation to use.
72 */
73
74 STATIC int
xfs_recover_inode_owner_change(struct xfs_mount * mp,struct xfs_dinode * dip,struct xfs_inode_log_format * in_f,struct list_head * buffer_list)75 xfs_recover_inode_owner_change(
76 struct xfs_mount *mp,
77 struct xfs_dinode *dip,
78 struct xfs_inode_log_format *in_f,
79 struct list_head *buffer_list)
80 {
81 struct xfs_inode *ip;
82 int error;
83
84 ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
85
86 ip = xfs_inode_alloc(mp, in_f->ilf_ino);
87 if (!ip)
88 return -ENOMEM;
89
90 /* instantiate the inode */
91 ASSERT(dip->di_version >= 3);
92
93 error = xfs_inode_from_disk(ip, dip);
94 if (error)
95 goto out_free_ip;
96
97 if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
98 ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
99 error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
100 ip->i_ino, buffer_list);
101 if (error)
102 goto out_free_ip;
103 }
104
105 if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
106 ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
107 error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
108 ip->i_ino, buffer_list);
109 if (error)
110 goto out_free_ip;
111 }
112
113 out_free_ip:
114 xfs_inode_free(ip);
115 return error;
116 }
117
xfs_log_dinode_has_bigtime(const struct xfs_log_dinode * ld)118 static inline bool xfs_log_dinode_has_bigtime(const struct xfs_log_dinode *ld)
119 {
120 return ld->di_version >= 3 &&
121 (ld->di_flags2 & XFS_DIFLAG2_BIGTIME);
122 }
123
124 /* Convert a log timestamp to an ondisk timestamp. */
125 static inline xfs_timestamp_t
xfs_log_dinode_to_disk_ts(struct xfs_log_dinode * from,const xfs_ictimestamp_t its)126 xfs_log_dinode_to_disk_ts(
127 struct xfs_log_dinode *from,
128 const xfs_ictimestamp_t its)
129 {
130 struct xfs_legacy_timestamp *lts;
131 struct xfs_legacy_ictimestamp *lits;
132 xfs_timestamp_t ts;
133
134 if (xfs_log_dinode_has_bigtime(from))
135 return cpu_to_be64(its);
136
137 lts = (struct xfs_legacy_timestamp *)&ts;
138 lits = (struct xfs_legacy_ictimestamp *)&its;
139 lts->t_sec = cpu_to_be32(lits->t_sec);
140 lts->t_nsec = cpu_to_be32(lits->t_nsec);
141
142 return ts;
143 }
144
145 STATIC void
xfs_log_dinode_to_disk(struct xfs_log_dinode * from,struct xfs_dinode * to)146 xfs_log_dinode_to_disk(
147 struct xfs_log_dinode *from,
148 struct xfs_dinode *to)
149 {
150 to->di_magic = cpu_to_be16(from->di_magic);
151 to->di_mode = cpu_to_be16(from->di_mode);
152 to->di_version = from->di_version;
153 to->di_format = from->di_format;
154 to->di_onlink = 0;
155 to->di_uid = cpu_to_be32(from->di_uid);
156 to->di_gid = cpu_to_be32(from->di_gid);
157 to->di_nlink = cpu_to_be32(from->di_nlink);
158 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
159 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
160 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
161
162 to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
163 to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
164 to->di_ctime = xfs_log_dinode_to_disk_ts(from, from->di_ctime);
165
166 to->di_size = cpu_to_be64(from->di_size);
167 to->di_nblocks = cpu_to_be64(from->di_nblocks);
168 to->di_extsize = cpu_to_be32(from->di_extsize);
169 to->di_nextents = cpu_to_be32(from->di_nextents);
170 to->di_anextents = cpu_to_be16(from->di_anextents);
171 to->di_forkoff = from->di_forkoff;
172 to->di_aformat = from->di_aformat;
173 to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
174 to->di_dmstate = cpu_to_be16(from->di_dmstate);
175 to->di_flags = cpu_to_be16(from->di_flags);
176 to->di_gen = cpu_to_be32(from->di_gen);
177
178 if (from->di_version == 3) {
179 to->di_changecount = cpu_to_be64(from->di_changecount);
180 to->di_crtime = xfs_log_dinode_to_disk_ts(from,
181 from->di_crtime);
182 to->di_flags2 = cpu_to_be64(from->di_flags2);
183 to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
184 to->di_ino = cpu_to_be64(from->di_ino);
185 to->di_lsn = cpu_to_be64(from->di_lsn);
186 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
187 uuid_copy(&to->di_uuid, &from->di_uuid);
188 to->di_flushiter = 0;
189 } else {
190 to->di_flushiter = cpu_to_be16(from->di_flushiter);
191 }
192 }
193
194 STATIC int
xlog_recover_inode_commit_pass2(struct xlog * log,struct list_head * buffer_list,struct xlog_recover_item * item,xfs_lsn_t current_lsn)195 xlog_recover_inode_commit_pass2(
196 struct xlog *log,
197 struct list_head *buffer_list,
198 struct xlog_recover_item *item,
199 xfs_lsn_t current_lsn)
200 {
201 struct xfs_inode_log_format *in_f;
202 struct xfs_mount *mp = log->l_mp;
203 struct xfs_buf *bp;
204 struct xfs_dinode *dip;
205 int len;
206 char *src;
207 char *dest;
208 int error;
209 int attr_index;
210 uint fields;
211 struct xfs_log_dinode *ldip;
212 uint isize;
213 int need_free = 0;
214
215 if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
216 in_f = item->ri_buf[0].i_addr;
217 } else {
218 in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
219 need_free = 1;
220 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
221 if (error)
222 goto error;
223 }
224
225 /*
226 * Inode buffers can be freed, look out for it,
227 * and do not replay the inode.
228 */
229 if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
230 error = 0;
231 trace_xfs_log_recover_inode_cancel(log, in_f);
232 goto error;
233 }
234 trace_xfs_log_recover_inode_recover(log, in_f);
235
236 error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
237 0, &bp, &xfs_inode_buf_ops);
238 if (error)
239 goto error;
240 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
241 dip = xfs_buf_offset(bp, in_f->ilf_boffset);
242
243 /*
244 * Make sure the place we're flushing out to really looks
245 * like an inode!
246 */
247 if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
248 xfs_alert(mp,
249 "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
250 __func__, dip, bp, in_f->ilf_ino);
251 error = -EFSCORRUPTED;
252 goto out_release;
253 }
254 ldip = item->ri_buf[1].i_addr;
255 if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
256 xfs_alert(mp,
257 "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
258 __func__, item, in_f->ilf_ino);
259 error = -EFSCORRUPTED;
260 goto out_release;
261 }
262
263 /*
264 * If the inode has an LSN in it, recover the inode only if it's less
265 * than the lsn of the transaction we are replaying. Note: we still
266 * need to replay an owner change even though the inode is more recent
267 * than the transaction as there is no guarantee that all the btree
268 * blocks are more recent than this transaction, too.
269 */
270 if (dip->di_version >= 3) {
271 xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
272
273 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
274 trace_xfs_log_recover_inode_skip(log, in_f);
275 error = 0;
276 goto out_owner_change;
277 }
278 }
279
280 /*
281 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
282 * are transactional and if ordering is necessary we can determine that
283 * more accurately by the LSN field in the V3 inode core. Don't trust
284 * the inode versions we might be changing them here - use the
285 * superblock flag to determine whether we need to look at di_flushiter
286 * to skip replay when the on disk inode is newer than the log one
287 */
288 if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
289 ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
290 /*
291 * Deal with the wrap case, DI_MAX_FLUSH is less
292 * than smaller numbers
293 */
294 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
295 ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
296 /* do nothing */
297 } else {
298 trace_xfs_log_recover_inode_skip(log, in_f);
299 error = 0;
300 goto out_release;
301 }
302 }
303
304 /* Take the opportunity to reset the flush iteration count */
305 ldip->di_flushiter = 0;
306
307 if (unlikely(S_ISREG(ldip->di_mode))) {
308 if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
309 (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
310 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
311 XFS_ERRLEVEL_LOW, mp, ldip,
312 sizeof(*ldip));
313 xfs_alert(mp,
314 "%s: Bad regular inode log record, rec ptr "PTR_FMT", "
315 "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
316 __func__, item, dip, bp, in_f->ilf_ino);
317 error = -EFSCORRUPTED;
318 goto out_release;
319 }
320 } else if (unlikely(S_ISDIR(ldip->di_mode))) {
321 if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
322 (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
323 (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
324 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
325 XFS_ERRLEVEL_LOW, mp, ldip,
326 sizeof(*ldip));
327 xfs_alert(mp,
328 "%s: Bad dir inode log record, rec ptr "PTR_FMT", "
329 "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
330 __func__, item, dip, bp, in_f->ilf_ino);
331 error = -EFSCORRUPTED;
332 goto out_release;
333 }
334 }
335 if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
336 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
337 XFS_ERRLEVEL_LOW, mp, ldip,
338 sizeof(*ldip));
339 xfs_alert(mp,
340 "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
341 "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
342 __func__, item, dip, bp, in_f->ilf_ino,
343 ldip->di_nextents + ldip->di_anextents,
344 ldip->di_nblocks);
345 error = -EFSCORRUPTED;
346 goto out_release;
347 }
348 if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
349 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
350 XFS_ERRLEVEL_LOW, mp, ldip,
351 sizeof(*ldip));
352 xfs_alert(mp,
353 "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
354 "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
355 item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
356 error = -EFSCORRUPTED;
357 goto out_release;
358 }
359 isize = xfs_log_dinode_size(mp);
360 if (unlikely(item->ri_buf[1].i_len > isize)) {
361 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
362 XFS_ERRLEVEL_LOW, mp, ldip,
363 sizeof(*ldip));
364 xfs_alert(mp,
365 "%s: Bad inode log record length %d, rec ptr "PTR_FMT,
366 __func__, item->ri_buf[1].i_len, item);
367 error = -EFSCORRUPTED;
368 goto out_release;
369 }
370
371 /* recover the log dinode inode into the on disk inode */
372 xfs_log_dinode_to_disk(ldip, dip);
373
374 fields = in_f->ilf_fields;
375 if (fields & XFS_ILOG_DEV)
376 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
377
378 if (in_f->ilf_size == 2)
379 goto out_owner_change;
380 len = item->ri_buf[2].i_len;
381 src = item->ri_buf[2].i_addr;
382 ASSERT(in_f->ilf_size <= 4);
383 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
384 ASSERT(!(fields & XFS_ILOG_DFORK) ||
385 (len == in_f->ilf_dsize));
386
387 switch (fields & XFS_ILOG_DFORK) {
388 case XFS_ILOG_DDATA:
389 case XFS_ILOG_DEXT:
390 memcpy(XFS_DFORK_DPTR(dip), src, len);
391 break;
392
393 case XFS_ILOG_DBROOT:
394 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
395 (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
396 XFS_DFORK_DSIZE(dip, mp));
397 break;
398
399 default:
400 /*
401 * There are no data fork flags set.
402 */
403 ASSERT((fields & XFS_ILOG_DFORK) == 0);
404 break;
405 }
406
407 /*
408 * If we logged any attribute data, recover it. There may or
409 * may not have been any other non-core data logged in this
410 * transaction.
411 */
412 if (in_f->ilf_fields & XFS_ILOG_AFORK) {
413 if (in_f->ilf_fields & XFS_ILOG_DFORK) {
414 attr_index = 3;
415 } else {
416 attr_index = 2;
417 }
418 len = item->ri_buf[attr_index].i_len;
419 src = item->ri_buf[attr_index].i_addr;
420 ASSERT(len == in_f->ilf_asize);
421
422 switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
423 case XFS_ILOG_ADATA:
424 case XFS_ILOG_AEXT:
425 dest = XFS_DFORK_APTR(dip);
426 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
427 memcpy(dest, src, len);
428 break;
429
430 case XFS_ILOG_ABROOT:
431 dest = XFS_DFORK_APTR(dip);
432 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
433 len, (struct xfs_bmdr_block *)dest,
434 XFS_DFORK_ASIZE(dip, mp));
435 break;
436
437 default:
438 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
439 ASSERT(0);
440 error = -EFSCORRUPTED;
441 goto out_release;
442 }
443 }
444
445 out_owner_change:
446 /* Recover the swapext owner change unless inode has been deleted */
447 if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
448 (dip->di_mode != 0))
449 error = xfs_recover_inode_owner_change(mp, dip, in_f,
450 buffer_list);
451 /* re-generate the checksum. */
452 xfs_dinode_calc_crc(log->l_mp, dip);
453
454 ASSERT(bp->b_mount == mp);
455 bp->b_flags |= _XBF_LOGRECOVERY;
456 xfs_buf_delwri_queue(bp, buffer_list);
457
458 out_release:
459 xfs_buf_relse(bp);
460 error:
461 if (need_free)
462 kmem_free(in_f);
463 return error;
464 }
465
466 const struct xlog_recover_item_ops xlog_inode_item_ops = {
467 .item_type = XFS_LI_INODE,
468 .ra_pass2 = xlog_recover_inode_ra_pass2,
469 .commit_pass2 = xlog_recover_inode_commit_pass2,
470 };
471