1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014 Red Hat, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_trans.h"
15 #include "xfs_alloc.h"
16 #include "xfs_btree.h"
17 #include "xfs_btree_staging.h"
18 #include "xfs_rmap.h"
19 #include "xfs_rmap_btree.h"
20 #include "xfs_trace.h"
21 #include "xfs_error.h"
22 #include "xfs_extent_busy.h"
23 #include "xfs_ag_resv.h"
24 
25 /*
26  * Reverse map btree.
27  *
28  * This is a per-ag tree used to track the owner(s) of a given extent. With
29  * reflink it is possible for there to be multiple owners, which is a departure
30  * from classic XFS. Owner records for data extents are inserted when the
31  * extent is mapped and removed when an extent is unmapped.  Owner records for
32  * all other block types (i.e. metadata) are inserted when an extent is
33  * allocated and removed when an extent is freed. There can only be one owner
34  * of a metadata extent, usually an inode or some other metadata structure like
35  * an AG btree.
36  *
37  * The rmap btree is part of the free space management, so blocks for the tree
38  * are sourced from the agfl. Hence we need transaction reservation support for
39  * this tree so that the freelist is always large enough. This also impacts on
40  * the minimum space we need to leave free in the AG.
41  *
42  * The tree is ordered by [ag block, owner, offset]. This is a large key size,
43  * but it is the only way to enforce unique keys when a block can be owned by
44  * multiple files at any offset. There's no need to order/search by extent
45  * size for online updating/management of the tree. It is intended that most
46  * reverse lookups will be to find the owner(s) of a particular block, or to
47  * try to recover tree and file data from corrupt primary metadata.
48  */
49 
50 static struct xfs_btree_cur *
xfs_rmapbt_dup_cursor(struct xfs_btree_cur * cur)51 xfs_rmapbt_dup_cursor(
52 	struct xfs_btree_cur	*cur)
53 {
54 	return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
55 			cur->bc_ag.agbp, cur->bc_ag.agno);
56 }
57 
58 STATIC void
xfs_rmapbt_set_root(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int inc)59 xfs_rmapbt_set_root(
60 	struct xfs_btree_cur	*cur,
61 	union xfs_btree_ptr	*ptr,
62 	int			inc)
63 {
64 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
65 	struct xfs_agf		*agf = agbp->b_addr;
66 	int			btnum = cur->bc_btnum;
67 	struct xfs_perag	*pag = agbp->b_pag;
68 
69 	ASSERT(ptr->s != 0);
70 
71 	agf->agf_roots[btnum] = ptr->s;
72 	be32_add_cpu(&agf->agf_levels[btnum], inc);
73 	pag->pagf_levels[btnum] += inc;
74 
75 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
76 }
77 
78 STATIC int
xfs_rmapbt_alloc_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * start,union xfs_btree_ptr * new,int * stat)79 xfs_rmapbt_alloc_block(
80 	struct xfs_btree_cur	*cur,
81 	union xfs_btree_ptr	*start,
82 	union xfs_btree_ptr	*new,
83 	int			*stat)
84 {
85 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
86 	struct xfs_agf		*agf = agbp->b_addr;
87 	int			error;
88 	xfs_agblock_t		bno;
89 
90 	/* Allocate the new block from the freelist. If we can't, give up.  */
91 	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp,
92 				       &bno, 1);
93 	if (error)
94 		return error;
95 
96 	trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_ag.agno,
97 			bno, 1);
98 	if (bno == NULLAGBLOCK) {
99 		*stat = 0;
100 		return 0;
101 	}
102 
103 	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1,
104 			false);
105 
106 	xfs_trans_agbtree_delta(cur->bc_tp, 1);
107 	new->s = cpu_to_be32(bno);
108 	be32_add_cpu(&agf->agf_rmap_blocks, 1);
109 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
110 
111 	xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_ag.agno);
112 
113 	*stat = 1;
114 	return 0;
115 }
116 
117 STATIC int
xfs_rmapbt_free_block(struct xfs_btree_cur * cur,struct xfs_buf * bp)118 xfs_rmapbt_free_block(
119 	struct xfs_btree_cur	*cur,
120 	struct xfs_buf		*bp)
121 {
122 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
123 	struct xfs_agf		*agf = agbp->b_addr;
124 	struct xfs_perag	*pag;
125 	xfs_agblock_t		bno;
126 	int			error;
127 
128 	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
129 	trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_ag.agno,
130 			bno, 1);
131 	be32_add_cpu(&agf->agf_rmap_blocks, -1);
132 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
133 	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
134 	if (error)
135 		return error;
136 
137 	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
138 			      XFS_EXTENT_BUSY_SKIP_DISCARD);
139 	xfs_trans_agbtree_delta(cur->bc_tp, -1);
140 
141 	pag = cur->bc_ag.agbp->b_pag;
142 	xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
143 	return 0;
144 }
145 
146 STATIC int
xfs_rmapbt_get_minrecs(struct xfs_btree_cur * cur,int level)147 xfs_rmapbt_get_minrecs(
148 	struct xfs_btree_cur	*cur,
149 	int			level)
150 {
151 	return cur->bc_mp->m_rmap_mnr[level != 0];
152 }
153 
154 STATIC int
xfs_rmapbt_get_maxrecs(struct xfs_btree_cur * cur,int level)155 xfs_rmapbt_get_maxrecs(
156 	struct xfs_btree_cur	*cur,
157 	int			level)
158 {
159 	return cur->bc_mp->m_rmap_mxr[level != 0];
160 }
161 
162 STATIC void
xfs_rmapbt_init_key_from_rec(union xfs_btree_key * key,union xfs_btree_rec * rec)163 xfs_rmapbt_init_key_from_rec(
164 	union xfs_btree_key	*key,
165 	union xfs_btree_rec	*rec)
166 {
167 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
168 	key->rmap.rm_owner = rec->rmap.rm_owner;
169 	key->rmap.rm_offset = rec->rmap.rm_offset;
170 }
171 
172 /*
173  * The high key for a reverse mapping record can be computed by shifting
174  * the startblock and offset to the highest value that would still map
175  * to that record.  In practice this means that we add blockcount-1 to
176  * the startblock for all records, and if the record is for a data/attr
177  * fork mapping, we add blockcount-1 to the offset too.
178  */
179 STATIC void
xfs_rmapbt_init_high_key_from_rec(union xfs_btree_key * key,union xfs_btree_rec * rec)180 xfs_rmapbt_init_high_key_from_rec(
181 	union xfs_btree_key	*key,
182 	union xfs_btree_rec	*rec)
183 {
184 	uint64_t		off;
185 	int			adj;
186 
187 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
188 
189 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
190 	be32_add_cpu(&key->rmap.rm_startblock, adj);
191 	key->rmap.rm_owner = rec->rmap.rm_owner;
192 	key->rmap.rm_offset = rec->rmap.rm_offset;
193 	if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
194 	    XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
195 		return;
196 	off = be64_to_cpu(key->rmap.rm_offset);
197 	off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
198 	key->rmap.rm_offset = cpu_to_be64(off);
199 }
200 
201 STATIC void
xfs_rmapbt_init_rec_from_cur(struct xfs_btree_cur * cur,union xfs_btree_rec * rec)202 xfs_rmapbt_init_rec_from_cur(
203 	struct xfs_btree_cur	*cur,
204 	union xfs_btree_rec	*rec)
205 {
206 	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
207 	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
208 	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
209 	rec->rmap.rm_offset = cpu_to_be64(
210 			xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
211 }
212 
213 STATIC void
xfs_rmapbt_init_ptr_from_cur(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)214 xfs_rmapbt_init_ptr_from_cur(
215 	struct xfs_btree_cur	*cur,
216 	union xfs_btree_ptr	*ptr)
217 {
218 	struct xfs_agf		*agf = cur->bc_ag.agbp->b_addr;
219 
220 	ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno));
221 
222 	ptr->s = agf->agf_roots[cur->bc_btnum];
223 }
224 
225 STATIC int64_t
xfs_rmapbt_key_diff(struct xfs_btree_cur * cur,union xfs_btree_key * key)226 xfs_rmapbt_key_diff(
227 	struct xfs_btree_cur	*cur,
228 	union xfs_btree_key	*key)
229 {
230 	struct xfs_rmap_irec	*rec = &cur->bc_rec.r;
231 	struct xfs_rmap_key	*kp = &key->rmap;
232 	__u64			x, y;
233 	int64_t			d;
234 
235 	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
236 	if (d)
237 		return d;
238 
239 	x = be64_to_cpu(kp->rm_owner);
240 	y = rec->rm_owner;
241 	if (x > y)
242 		return 1;
243 	else if (y > x)
244 		return -1;
245 
246 	x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
247 	y = rec->rm_offset;
248 	if (x > y)
249 		return 1;
250 	else if (y > x)
251 		return -1;
252 	return 0;
253 }
254 
255 STATIC int64_t
xfs_rmapbt_diff_two_keys(struct xfs_btree_cur * cur,union xfs_btree_key * k1,union xfs_btree_key * k2)256 xfs_rmapbt_diff_two_keys(
257 	struct xfs_btree_cur	*cur,
258 	union xfs_btree_key	*k1,
259 	union xfs_btree_key	*k2)
260 {
261 	struct xfs_rmap_key	*kp1 = &k1->rmap;
262 	struct xfs_rmap_key	*kp2 = &k2->rmap;
263 	int64_t			d;
264 	__u64			x, y;
265 
266 	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
267 		       be32_to_cpu(kp2->rm_startblock);
268 	if (d)
269 		return d;
270 
271 	x = be64_to_cpu(kp1->rm_owner);
272 	y = be64_to_cpu(kp2->rm_owner);
273 	if (x > y)
274 		return 1;
275 	else if (y > x)
276 		return -1;
277 
278 	x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
279 	y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
280 	if (x > y)
281 		return 1;
282 	else if (y > x)
283 		return -1;
284 	return 0;
285 }
286 
287 static xfs_failaddr_t
xfs_rmapbt_verify(struct xfs_buf * bp)288 xfs_rmapbt_verify(
289 	struct xfs_buf		*bp)
290 {
291 	struct xfs_mount	*mp = bp->b_mount;
292 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
293 	struct xfs_perag	*pag = bp->b_pag;
294 	xfs_failaddr_t		fa;
295 	unsigned int		level;
296 
297 	/*
298 	 * magic number and level verification
299 	 *
300 	 * During growfs operations, we can't verify the exact level or owner as
301 	 * the perag is not fully initialised and hence not attached to the
302 	 * buffer.  In this case, check against the maximum tree depth.
303 	 *
304 	 * Similarly, during log recovery we will have a perag structure
305 	 * attached, but the agf information will not yet have been initialised
306 	 * from the on disk AGF. Again, we can only check against maximum limits
307 	 * in this case.
308 	 */
309 	if (!xfs_verify_magic(bp, block->bb_magic))
310 		return __this_address;
311 
312 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
313 		return __this_address;
314 	fa = xfs_btree_sblock_v5hdr_verify(bp);
315 	if (fa)
316 		return fa;
317 
318 	level = be16_to_cpu(block->bb_level);
319 	if (pag && pag->pagf_init) {
320 		if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
321 			return __this_address;
322 	} else if (level >= mp->m_rmap_maxlevels)
323 		return __this_address;
324 
325 	return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
326 }
327 
328 static void
xfs_rmapbt_read_verify(struct xfs_buf * bp)329 xfs_rmapbt_read_verify(
330 	struct xfs_buf	*bp)
331 {
332 	xfs_failaddr_t	fa;
333 
334 	if (!xfs_btree_sblock_verify_crc(bp))
335 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
336 	else {
337 		fa = xfs_rmapbt_verify(bp);
338 		if (fa)
339 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
340 	}
341 
342 	if (bp->b_error)
343 		trace_xfs_btree_corrupt(bp, _RET_IP_);
344 }
345 
346 static void
xfs_rmapbt_write_verify(struct xfs_buf * bp)347 xfs_rmapbt_write_verify(
348 	struct xfs_buf	*bp)
349 {
350 	xfs_failaddr_t	fa;
351 
352 	fa = xfs_rmapbt_verify(bp);
353 	if (fa) {
354 		trace_xfs_btree_corrupt(bp, _RET_IP_);
355 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
356 		return;
357 	}
358 	xfs_btree_sblock_calc_crc(bp);
359 
360 }
361 
362 const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
363 	.name			= "xfs_rmapbt",
364 	.magic			= { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
365 	.verify_read		= xfs_rmapbt_read_verify,
366 	.verify_write		= xfs_rmapbt_write_verify,
367 	.verify_struct		= xfs_rmapbt_verify,
368 };
369 
370 STATIC int
xfs_rmapbt_keys_inorder(struct xfs_btree_cur * cur,union xfs_btree_key * k1,union xfs_btree_key * k2)371 xfs_rmapbt_keys_inorder(
372 	struct xfs_btree_cur	*cur,
373 	union xfs_btree_key	*k1,
374 	union xfs_btree_key	*k2)
375 {
376 	uint32_t		x;
377 	uint32_t		y;
378 	uint64_t		a;
379 	uint64_t		b;
380 
381 	x = be32_to_cpu(k1->rmap.rm_startblock);
382 	y = be32_to_cpu(k2->rmap.rm_startblock);
383 	if (x < y)
384 		return 1;
385 	else if (x > y)
386 		return 0;
387 	a = be64_to_cpu(k1->rmap.rm_owner);
388 	b = be64_to_cpu(k2->rmap.rm_owner);
389 	if (a < b)
390 		return 1;
391 	else if (a > b)
392 		return 0;
393 	a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
394 	b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
395 	if (a <= b)
396 		return 1;
397 	return 0;
398 }
399 
400 STATIC int
xfs_rmapbt_recs_inorder(struct xfs_btree_cur * cur,union xfs_btree_rec * r1,union xfs_btree_rec * r2)401 xfs_rmapbt_recs_inorder(
402 	struct xfs_btree_cur	*cur,
403 	union xfs_btree_rec	*r1,
404 	union xfs_btree_rec	*r2)
405 {
406 	uint32_t		x;
407 	uint32_t		y;
408 	uint64_t		a;
409 	uint64_t		b;
410 
411 	x = be32_to_cpu(r1->rmap.rm_startblock);
412 	y = be32_to_cpu(r2->rmap.rm_startblock);
413 	if (x < y)
414 		return 1;
415 	else if (x > y)
416 		return 0;
417 	a = be64_to_cpu(r1->rmap.rm_owner);
418 	b = be64_to_cpu(r2->rmap.rm_owner);
419 	if (a < b)
420 		return 1;
421 	else if (a > b)
422 		return 0;
423 	a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
424 	b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
425 	if (a <= b)
426 		return 1;
427 	return 0;
428 }
429 
430 static const struct xfs_btree_ops xfs_rmapbt_ops = {
431 	.rec_len		= sizeof(struct xfs_rmap_rec),
432 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
433 
434 	.dup_cursor		= xfs_rmapbt_dup_cursor,
435 	.set_root		= xfs_rmapbt_set_root,
436 	.alloc_block		= xfs_rmapbt_alloc_block,
437 	.free_block		= xfs_rmapbt_free_block,
438 	.get_minrecs		= xfs_rmapbt_get_minrecs,
439 	.get_maxrecs		= xfs_rmapbt_get_maxrecs,
440 	.init_key_from_rec	= xfs_rmapbt_init_key_from_rec,
441 	.init_high_key_from_rec	= xfs_rmapbt_init_high_key_from_rec,
442 	.init_rec_from_cur	= xfs_rmapbt_init_rec_from_cur,
443 	.init_ptr_from_cur	= xfs_rmapbt_init_ptr_from_cur,
444 	.key_diff		= xfs_rmapbt_key_diff,
445 	.buf_ops		= &xfs_rmapbt_buf_ops,
446 	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
447 	.keys_inorder		= xfs_rmapbt_keys_inorder,
448 	.recs_inorder		= xfs_rmapbt_recs_inorder,
449 };
450 
451 static struct xfs_btree_cur *
xfs_rmapbt_init_common(struct xfs_mount * mp,struct xfs_trans * tp,xfs_agnumber_t agno)452 xfs_rmapbt_init_common(
453 	struct xfs_mount	*mp,
454 	struct xfs_trans	*tp,
455 	xfs_agnumber_t		agno)
456 {
457 	struct xfs_btree_cur	*cur;
458 
459 	cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL);
460 	cur->bc_tp = tp;
461 	cur->bc_mp = mp;
462 	/* Overlapping btree; 2 keys per pointer. */
463 	cur->bc_btnum = XFS_BTNUM_RMAP;
464 	cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
465 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
466 	cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
467 	cur->bc_ag.agno = agno;
468 	cur->bc_ops = &xfs_rmapbt_ops;
469 
470 	return cur;
471 }
472 
473 /* Create a new reverse mapping btree cursor. */
474 struct xfs_btree_cur *
xfs_rmapbt_init_cursor(struct xfs_mount * mp,struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agnumber_t agno)475 xfs_rmapbt_init_cursor(
476 	struct xfs_mount	*mp,
477 	struct xfs_trans	*tp,
478 	struct xfs_buf		*agbp,
479 	xfs_agnumber_t		agno)
480 {
481 	struct xfs_agf		*agf = agbp->b_addr;
482 	struct xfs_btree_cur	*cur;
483 
484 	cur = xfs_rmapbt_init_common(mp, tp, agno);
485 	cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
486 	cur->bc_ag.agbp = agbp;
487 	return cur;
488 }
489 
490 /* Create a new reverse mapping btree cursor with a fake root for staging. */
491 struct xfs_btree_cur *
xfs_rmapbt_stage_cursor(struct xfs_mount * mp,struct xbtree_afakeroot * afake,xfs_agnumber_t agno)492 xfs_rmapbt_stage_cursor(
493 	struct xfs_mount	*mp,
494 	struct xbtree_afakeroot	*afake,
495 	xfs_agnumber_t		agno)
496 {
497 	struct xfs_btree_cur	*cur;
498 
499 	cur = xfs_rmapbt_init_common(mp, NULL, agno);
500 	xfs_btree_stage_afakeroot(cur, afake);
501 	return cur;
502 }
503 
504 /*
505  * Install a new reverse mapping btree root.  Caller is responsible for
506  * invalidating and freeing the old btree blocks.
507  */
508 void
xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur * cur,struct xfs_trans * tp,struct xfs_buf * agbp)509 xfs_rmapbt_commit_staged_btree(
510 	struct xfs_btree_cur	*cur,
511 	struct xfs_trans	*tp,
512 	struct xfs_buf		*agbp)
513 {
514 	struct xfs_agf		*agf = agbp->b_addr;
515 	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
516 
517 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
518 
519 	agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
520 	agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
521 	agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
522 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
523 				    XFS_AGF_RMAP_BLOCKS);
524 	xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
525 }
526 
527 /*
528  * Calculate number of records in an rmap btree block.
529  */
530 int
xfs_rmapbt_maxrecs(int blocklen,int leaf)531 xfs_rmapbt_maxrecs(
532 	int			blocklen,
533 	int			leaf)
534 {
535 	blocklen -= XFS_RMAP_BLOCK_LEN;
536 
537 	if (leaf)
538 		return blocklen / sizeof(struct xfs_rmap_rec);
539 	return blocklen /
540 		(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
541 }
542 
543 /* Compute the maximum height of an rmap btree. */
544 void
xfs_rmapbt_compute_maxlevels(struct xfs_mount * mp)545 xfs_rmapbt_compute_maxlevels(
546 	struct xfs_mount		*mp)
547 {
548 	/*
549 	 * On a non-reflink filesystem, the maximum number of rmap
550 	 * records is the number of blocks in the AG, hence the max
551 	 * rmapbt height is log_$maxrecs($agblocks).  However, with
552 	 * reflink each AG block can have up to 2^32 (per the refcount
553 	 * record format) owners, which means that theoretically we
554 	 * could face up to 2^64 rmap records.
555 	 *
556 	 * That effectively means that the max rmapbt height must be
557 	 * XFS_BTREE_MAXLEVELS.  "Fortunately" we'll run out of AG
558 	 * blocks to feed the rmapbt long before the rmapbt reaches
559 	 * maximum height.  The reflink code uses ag_resv_critical to
560 	 * disallow reflinking when less than 10% of the per-AG metadata
561 	 * block reservation since the fallback is a regular file copy.
562 	 */
563 	if (xfs_sb_version_hasreflink(&mp->m_sb))
564 		mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
565 	else
566 		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
567 				mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
568 }
569 
570 /* Calculate the refcount btree size for some records. */
571 xfs_extlen_t
xfs_rmapbt_calc_size(struct xfs_mount * mp,unsigned long long len)572 xfs_rmapbt_calc_size(
573 	struct xfs_mount	*mp,
574 	unsigned long long	len)
575 {
576 	return xfs_btree_calc_size(mp->m_rmap_mnr, len);
577 }
578 
579 /*
580  * Calculate the maximum refcount btree size.
581  */
582 xfs_extlen_t
xfs_rmapbt_max_size(struct xfs_mount * mp,xfs_agblock_t agblocks)583 xfs_rmapbt_max_size(
584 	struct xfs_mount	*mp,
585 	xfs_agblock_t		agblocks)
586 {
587 	/* Bail out if we're uninitialized, which can happen in mkfs. */
588 	if (mp->m_rmap_mxr[0] == 0)
589 		return 0;
590 
591 	return xfs_rmapbt_calc_size(mp, agblocks);
592 }
593 
594 /*
595  * Figure out how many blocks to reserve and how many are used by this btree.
596  */
597 int
xfs_rmapbt_calc_reserves(struct xfs_mount * mp,struct xfs_trans * tp,xfs_agnumber_t agno,xfs_extlen_t * ask,xfs_extlen_t * used)598 xfs_rmapbt_calc_reserves(
599 	struct xfs_mount	*mp,
600 	struct xfs_trans	*tp,
601 	xfs_agnumber_t		agno,
602 	xfs_extlen_t		*ask,
603 	xfs_extlen_t		*used)
604 {
605 	struct xfs_buf		*agbp;
606 	struct xfs_agf		*agf;
607 	xfs_agblock_t		agblocks;
608 	xfs_extlen_t		tree_len;
609 	int			error;
610 
611 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
612 		return 0;
613 
614 	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
615 	if (error)
616 		return error;
617 
618 	agf = agbp->b_addr;
619 	agblocks = be32_to_cpu(agf->agf_length);
620 	tree_len = be32_to_cpu(agf->agf_rmap_blocks);
621 	xfs_trans_brelse(tp, agbp);
622 
623 	/*
624 	 * The log is permanently allocated, so the space it occupies will
625 	 * never be available for the kinds of things that would require btree
626 	 * expansion.  We therefore can pretend the space isn't there.
627 	 */
628 	if (mp->m_sb.sb_logstart &&
629 	    XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno)
630 		agblocks -= mp->m_sb.sb_logblocks;
631 
632 	/* Reserve 1% of the AG or enough for 1 block per record. */
633 	*ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
634 	*used += tree_len;
635 
636 	return error;
637 }
638