1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12 #include <linux/slab.h>
13 #include <linux/spinlock.h>
14 #include <linux/completion.h>
15 #include <linux/buffer_head.h>
16 #include <linux/fs.h>
17 #include <linux/gfs2_ondisk.h>
18 #include <linux/prefetch.h>
19 #include <linux/blkdev.h>
20 #include <linux/rbtree.h>
21 #include <linux/random.h>
22
23 #include "gfs2.h"
24 #include "incore.h"
25 #include "glock.h"
26 #include "glops.h"
27 #include "lops.h"
28 #include "meta_io.h"
29 #include "quota.h"
30 #include "rgrp.h"
31 #include "super.h"
32 #include "trans.h"
33 #include "util.h"
34 #include "log.h"
35 #include "inode.h"
36 #include "trace_gfs2.h"
37 #include "dir.h"
38
39 #define BFITNOENT ((u32)~0)
40 #define NO_BLOCK ((u64)~0)
41
42 #if BITS_PER_LONG == 32
43 #define LBITMASK (0x55555555UL)
44 #define LBITSKIP55 (0x55555555UL)
45 #define LBITSKIP00 (0x00000000UL)
46 #else
47 #define LBITMASK (0x5555555555555555UL)
48 #define LBITSKIP55 (0x5555555555555555UL)
49 #define LBITSKIP00 (0x0000000000000000UL)
50 #endif
51
52 /*
53 * These routines are used by the resource group routines (rgrp.c)
54 * to keep track of block allocation. Each block is represented by two
55 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks.
56 *
57 * 0 = Free
58 * 1 = Used (not metadata)
59 * 2 = Unlinked (still in use) inode
60 * 3 = Used (metadata)
61 */
62
63 struct gfs2_extent {
64 struct gfs2_rbm rbm;
65 u32 len;
66 };
67
68 static const char valid_change[16] = {
69 /* current */
70 /* n */ 0, 1, 1, 1,
71 /* e */ 1, 0, 0, 0,
72 /* w */ 0, 0, 0, 1,
73 1, 0, 0, 0
74 };
75
76 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
77 const struct gfs2_inode *ip, bool nowrap);
78
79
80 /**
81 * gfs2_setbit - Set a bit in the bitmaps
82 * @rbm: The position of the bit to set
83 * @do_clone: Also set the clone bitmap, if it exists
84 * @new_state: the new state of the block
85 *
86 */
87
gfs2_setbit(const struct gfs2_rbm * rbm,bool do_clone,unsigned char new_state)88 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
89 unsigned char new_state)
90 {
91 unsigned char *byte1, *byte2, *end, cur_state;
92 struct gfs2_bitmap *bi = rbm_bi(rbm);
93 unsigned int buflen = bi->bi_len;
94 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
95
96 byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
97 end = bi->bi_bh->b_data + bi->bi_offset + buflen;
98
99 BUG_ON(byte1 >= end);
100
101 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
102
103 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
104 pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
105 rbm->offset, cur_state, new_state);
106 pr_warn("rgrp=0x%llx bi_start=0x%x\n",
107 (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
108 pr_warn("bi_offset=0x%x bi_len=0x%x\n",
109 bi->bi_offset, bi->bi_len);
110 dump_stack();
111 gfs2_consist_rgrpd(rbm->rgd);
112 return;
113 }
114 *byte1 ^= (cur_state ^ new_state) << bit;
115
116 if (do_clone && bi->bi_clone) {
117 byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
118 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
119 *byte2 ^= (cur_state ^ new_state) << bit;
120 }
121 }
122
123 /**
124 * gfs2_testbit - test a bit in the bitmaps
125 * @rbm: The bit to test
126 * @use_clone: If true, test the clone bitmap, not the official bitmap.
127 *
128 * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
129 * not the "real" bitmaps, to avoid allocating recently freed blocks.
130 *
131 * Returns: The two bit block state of the requested bit
132 */
133
gfs2_testbit(const struct gfs2_rbm * rbm,bool use_clone)134 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
135 {
136 struct gfs2_bitmap *bi = rbm_bi(rbm);
137 const u8 *buffer;
138 const u8 *byte;
139 unsigned int bit;
140
141 if (use_clone && bi->bi_clone)
142 buffer = bi->bi_clone;
143 else
144 buffer = bi->bi_bh->b_data;
145 buffer += bi->bi_offset;
146 byte = buffer + (rbm->offset / GFS2_NBBY);
147 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
148
149 return (*byte >> bit) & GFS2_BIT_MASK;
150 }
151
152 /**
153 * gfs2_bit_search
154 * @ptr: Pointer to bitmap data
155 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
156 * @state: The state we are searching for
157 *
158 * We xor the bitmap data with a patter which is the bitwise opposite
159 * of what we are looking for, this gives rise to a pattern of ones
160 * wherever there is a match. Since we have two bits per entry, we
161 * take this pattern, shift it down by one place and then and it with
162 * the original. All the even bit positions (0,2,4, etc) then represent
163 * successful matches, so we mask with 0x55555..... to remove the unwanted
164 * odd bit positions.
165 *
166 * This allows searching of a whole u64 at once (32 blocks) with a
167 * single test (on 64 bit arches).
168 */
169
gfs2_bit_search(const __le64 * ptr,u64 mask,u8 state)170 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
171 {
172 u64 tmp;
173 static const u64 search[] = {
174 [0] = 0xffffffffffffffffULL,
175 [1] = 0xaaaaaaaaaaaaaaaaULL,
176 [2] = 0x5555555555555555ULL,
177 [3] = 0x0000000000000000ULL,
178 };
179 tmp = le64_to_cpu(*ptr) ^ search[state];
180 tmp &= (tmp >> 1);
181 tmp &= mask;
182 return tmp;
183 }
184
185 /**
186 * rs_cmp - multi-block reservation range compare
187 * @blk: absolute file system block number of the new reservation
188 * @len: number of blocks in the new reservation
189 * @rs: existing reservation to compare against
190 *
191 * returns: 1 if the block range is beyond the reach of the reservation
192 * -1 if the block range is before the start of the reservation
193 * 0 if the block range overlaps with the reservation
194 */
rs_cmp(u64 blk,u32 len,struct gfs2_blkreserv * rs)195 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
196 {
197 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
198
199 if (blk >= startblk + rs->rs_free)
200 return 1;
201 if (blk + len - 1 < startblk)
202 return -1;
203 return 0;
204 }
205
206 /**
207 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
208 * a block in a given allocation state.
209 * @buf: the buffer that holds the bitmaps
210 * @len: the length (in bytes) of the buffer
211 * @goal: start search at this block's bit-pair (within @buffer)
212 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
213 *
214 * Scope of @goal and returned block number is only within this bitmap buffer,
215 * not entire rgrp or filesystem. @buffer will be offset from the actual
216 * beginning of a bitmap block buffer, skipping any header structures, but
217 * headers are always a multiple of 64 bits long so that the buffer is
218 * always aligned to a 64 bit boundary.
219 *
220 * The size of the buffer is in bytes, but is it assumed that it is
221 * always ok to read a complete multiple of 64 bits at the end
222 * of the block in case the end is no aligned to a natural boundary.
223 *
224 * Return: the block number (bitmap buffer scope) that was found
225 */
226
gfs2_bitfit(const u8 * buf,const unsigned int len,u32 goal,u8 state)227 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
228 u32 goal, u8 state)
229 {
230 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
231 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
232 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
233 u64 tmp;
234 u64 mask = 0x5555555555555555ULL;
235 u32 bit;
236
237 /* Mask off bits we don't care about at the start of the search */
238 mask <<= spoint;
239 tmp = gfs2_bit_search(ptr, mask, state);
240 ptr++;
241 while(tmp == 0 && ptr < end) {
242 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
243 ptr++;
244 }
245 /* Mask off any bits which are more than len bytes from the start */
246 if (ptr == end && (len & (sizeof(u64) - 1)))
247 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
248 /* Didn't find anything, so return */
249 if (tmp == 0)
250 return BFITNOENT;
251 ptr--;
252 bit = __ffs64(tmp);
253 bit /= 2; /* two bits per entry in the bitmap */
254 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
255 }
256
257 /**
258 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
259 * @rbm: The rbm with rgd already set correctly
260 * @block: The block number (filesystem relative)
261 *
262 * This sets the bi and offset members of an rbm based on a
263 * resource group and a filesystem relative block number. The
264 * resource group must be set in the rbm on entry, the bi and
265 * offset members will be set by this function.
266 *
267 * Returns: 0 on success, or an error code
268 */
269
gfs2_rbm_from_block(struct gfs2_rbm * rbm,u64 block)270 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
271 {
272 u64 rblock = block - rbm->rgd->rd_data0;
273
274 if (WARN_ON_ONCE(rblock > UINT_MAX))
275 return -EINVAL;
276 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
277 return -E2BIG;
278
279 rbm->bii = 0;
280 rbm->offset = (u32)(rblock);
281 /* Check if the block is within the first block */
282 if (rbm->offset < rbm_bi(rbm)->bi_blocks)
283 return 0;
284
285 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
286 rbm->offset += (sizeof(struct gfs2_rgrp) -
287 sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
288 rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
289 rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
290 return 0;
291 }
292
293 /**
294 * gfs2_rbm_incr - increment an rbm structure
295 * @rbm: The rbm with rgd already set correctly
296 *
297 * This function takes an existing rbm structure and increments it to the next
298 * viable block offset.
299 *
300 * Returns: If incrementing the offset would cause the rbm to go past the
301 * end of the rgrp, true is returned, otherwise false.
302 *
303 */
304
gfs2_rbm_incr(struct gfs2_rbm * rbm)305 static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
306 {
307 if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
308 rbm->offset++;
309 return false;
310 }
311 if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
312 return true;
313
314 rbm->offset = 0;
315 rbm->bii++;
316 return false;
317 }
318
319 /**
320 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
321 * @rbm: Position to search (value/result)
322 * @n_unaligned: Number of unaligned blocks to check
323 * @len: Decremented for each block found (terminate on zero)
324 *
325 * Returns: true if a non-free block is encountered
326 */
327
gfs2_unaligned_extlen(struct gfs2_rbm * rbm,u32 n_unaligned,u32 * len)328 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
329 {
330 u32 n;
331 u8 res;
332
333 for (n = 0; n < n_unaligned; n++) {
334 res = gfs2_testbit(rbm, true);
335 if (res != GFS2_BLKST_FREE)
336 return true;
337 (*len)--;
338 if (*len == 0)
339 return true;
340 if (gfs2_rbm_incr(rbm))
341 return true;
342 }
343
344 return false;
345 }
346
347 /**
348 * gfs2_free_extlen - Return extent length of free blocks
349 * @rrbm: Starting position
350 * @len: Max length to check
351 *
352 * Starting at the block specified by the rbm, see how many free blocks
353 * there are, not reading more than len blocks ahead. This can be done
354 * using memchr_inv when the blocks are byte aligned, but has to be done
355 * on a block by block basis in case of unaligned blocks. Also this
356 * function can cope with bitmap boundaries (although it must stop on
357 * a resource group boundary)
358 *
359 * Returns: Number of free blocks in the extent
360 */
361
gfs2_free_extlen(const struct gfs2_rbm * rrbm,u32 len)362 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
363 {
364 struct gfs2_rbm rbm = *rrbm;
365 u32 n_unaligned = rbm.offset & 3;
366 u32 size = len;
367 u32 bytes;
368 u32 chunk_size;
369 u8 *ptr, *start, *end;
370 u64 block;
371 struct gfs2_bitmap *bi;
372
373 if (n_unaligned &&
374 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
375 goto out;
376
377 n_unaligned = len & 3;
378 /* Start is now byte aligned */
379 while (len > 3) {
380 bi = rbm_bi(&rbm);
381 start = bi->bi_bh->b_data;
382 if (bi->bi_clone)
383 start = bi->bi_clone;
384 start += bi->bi_offset;
385 end = start + bi->bi_len;
386 BUG_ON(rbm.offset & 3);
387 start += (rbm.offset / GFS2_NBBY);
388 bytes = min_t(u32, len / GFS2_NBBY, (end - start));
389 ptr = memchr_inv(start, 0, bytes);
390 chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
391 chunk_size *= GFS2_NBBY;
392 BUG_ON(len < chunk_size);
393 len -= chunk_size;
394 block = gfs2_rbm_to_block(&rbm);
395 if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
396 n_unaligned = 0;
397 break;
398 }
399 if (ptr) {
400 n_unaligned = 3;
401 break;
402 }
403 n_unaligned = len & 3;
404 }
405
406 /* Deal with any bits left over at the end */
407 if (n_unaligned)
408 gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
409 out:
410 return size - len;
411 }
412
413 /**
414 * gfs2_bitcount - count the number of bits in a certain state
415 * @rgd: the resource group descriptor
416 * @buffer: the buffer that holds the bitmaps
417 * @buflen: the length (in bytes) of the buffer
418 * @state: the state of the block we're looking for
419 *
420 * Returns: The number of bits
421 */
422
gfs2_bitcount(struct gfs2_rgrpd * rgd,const u8 * buffer,unsigned int buflen,u8 state)423 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
424 unsigned int buflen, u8 state)
425 {
426 const u8 *byte = buffer;
427 const u8 *end = buffer + buflen;
428 const u8 state1 = state << 2;
429 const u8 state2 = state << 4;
430 const u8 state3 = state << 6;
431 u32 count = 0;
432
433 for (; byte < end; byte++) {
434 if (((*byte) & 0x03) == state)
435 count++;
436 if (((*byte) & 0x0C) == state1)
437 count++;
438 if (((*byte) & 0x30) == state2)
439 count++;
440 if (((*byte) & 0xC0) == state3)
441 count++;
442 }
443
444 return count;
445 }
446
447 /**
448 * gfs2_rgrp_verify - Verify that a resource group is consistent
449 * @rgd: the rgrp
450 *
451 */
452
gfs2_rgrp_verify(struct gfs2_rgrpd * rgd)453 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
454 {
455 struct gfs2_sbd *sdp = rgd->rd_sbd;
456 struct gfs2_bitmap *bi = NULL;
457 u32 length = rgd->rd_length;
458 u32 count[4], tmp;
459 int buf, x;
460
461 memset(count, 0, 4 * sizeof(u32));
462
463 /* Count # blocks in each of 4 possible allocation states */
464 for (buf = 0; buf < length; buf++) {
465 bi = rgd->rd_bits + buf;
466 for (x = 0; x < 4; x++)
467 count[x] += gfs2_bitcount(rgd,
468 bi->bi_bh->b_data +
469 bi->bi_offset,
470 bi->bi_len, x);
471 }
472
473 if (count[0] != rgd->rd_free) {
474 if (gfs2_consist_rgrpd(rgd))
475 fs_err(sdp, "free data mismatch: %u != %u\n",
476 count[0], rgd->rd_free);
477 return;
478 }
479
480 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
481 if (count[1] != tmp) {
482 if (gfs2_consist_rgrpd(rgd))
483 fs_err(sdp, "used data mismatch: %u != %u\n",
484 count[1], tmp);
485 return;
486 }
487
488 if (count[2] + count[3] != rgd->rd_dinodes) {
489 if (gfs2_consist_rgrpd(rgd))
490 fs_err(sdp, "used metadata mismatch: %u != %u\n",
491 count[2] + count[3], rgd->rd_dinodes);
492 return;
493 }
494 }
495
496 /**
497 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
498 * @sdp: The GFS2 superblock
499 * @blk: The data block number
500 * @exact: True if this needs to be an exact match
501 *
502 * The @exact argument should be set to true by most callers. The exception
503 * is when we need to match blocks which are not represented by the rgrp
504 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
505 * there for alignment purposes. Another way of looking at it is that @exact
506 * matches only valid data/metadata blocks, but with @exact false, it will
507 * match any block within the extent of the rgrp.
508 *
509 * Returns: The resource group, or NULL if not found
510 */
511
gfs2_blk2rgrpd(struct gfs2_sbd * sdp,u64 blk,bool exact)512 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
513 {
514 struct rb_node *n, *next;
515 struct gfs2_rgrpd *cur;
516
517 spin_lock(&sdp->sd_rindex_spin);
518 n = sdp->sd_rindex_tree.rb_node;
519 while (n) {
520 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
521 next = NULL;
522 if (blk < cur->rd_addr)
523 next = n->rb_left;
524 else if (blk >= cur->rd_data0 + cur->rd_data)
525 next = n->rb_right;
526 if (next == NULL) {
527 spin_unlock(&sdp->sd_rindex_spin);
528 if (exact) {
529 if (blk < cur->rd_addr)
530 return NULL;
531 if (blk >= cur->rd_data0 + cur->rd_data)
532 return NULL;
533 }
534 return cur;
535 }
536 n = next;
537 }
538 spin_unlock(&sdp->sd_rindex_spin);
539
540 return NULL;
541 }
542
543 /**
544 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
545 * @sdp: The GFS2 superblock
546 *
547 * Returns: The first rgrp in the filesystem
548 */
549
gfs2_rgrpd_get_first(struct gfs2_sbd * sdp)550 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
551 {
552 const struct rb_node *n;
553 struct gfs2_rgrpd *rgd;
554
555 spin_lock(&sdp->sd_rindex_spin);
556 n = rb_first(&sdp->sd_rindex_tree);
557 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
558 spin_unlock(&sdp->sd_rindex_spin);
559
560 return rgd;
561 }
562
563 /**
564 * gfs2_rgrpd_get_next - get the next RG
565 * @rgd: the resource group descriptor
566 *
567 * Returns: The next rgrp
568 */
569
gfs2_rgrpd_get_next(struct gfs2_rgrpd * rgd)570 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
571 {
572 struct gfs2_sbd *sdp = rgd->rd_sbd;
573 const struct rb_node *n;
574
575 spin_lock(&sdp->sd_rindex_spin);
576 n = rb_next(&rgd->rd_node);
577 if (n == NULL)
578 n = rb_first(&sdp->sd_rindex_tree);
579
580 if (unlikely(&rgd->rd_node == n)) {
581 spin_unlock(&sdp->sd_rindex_spin);
582 return NULL;
583 }
584 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
585 spin_unlock(&sdp->sd_rindex_spin);
586 return rgd;
587 }
588
check_and_update_goal(struct gfs2_inode * ip)589 void check_and_update_goal(struct gfs2_inode *ip)
590 {
591 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
592 if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
593 ip->i_goal = ip->i_no_addr;
594 }
595
gfs2_free_clones(struct gfs2_rgrpd * rgd)596 void gfs2_free_clones(struct gfs2_rgrpd *rgd)
597 {
598 int x;
599
600 for (x = 0; x < rgd->rd_length; x++) {
601 struct gfs2_bitmap *bi = rgd->rd_bits + x;
602 kfree(bi->bi_clone);
603 bi->bi_clone = NULL;
604 }
605 }
606
607 /**
608 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
609 * plus a quota allocations data structure, if necessary
610 * @ip: the inode for this reservation
611 */
gfs2_rsqa_alloc(struct gfs2_inode * ip)612 int gfs2_rsqa_alloc(struct gfs2_inode *ip)
613 {
614 return gfs2_qa_alloc(ip);
615 }
616
dump_rs(struct seq_file * seq,const struct gfs2_blkreserv * rs)617 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
618 {
619 struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
620
621 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
622 (unsigned long long)ip->i_no_addr,
623 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
624 rs->rs_rbm.offset, rs->rs_free);
625 }
626
627 /**
628 * __rs_deltree - remove a multi-block reservation from the rgd tree
629 * @rs: The reservation to remove
630 *
631 */
__rs_deltree(struct gfs2_blkreserv * rs)632 static void __rs_deltree(struct gfs2_blkreserv *rs)
633 {
634 struct gfs2_rgrpd *rgd;
635
636 if (!gfs2_rs_active(rs))
637 return;
638
639 rgd = rs->rs_rbm.rgd;
640 trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
641 rb_erase(&rs->rs_node, &rgd->rd_rstree);
642 RB_CLEAR_NODE(&rs->rs_node);
643
644 if (rs->rs_free) {
645 struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
646
647 /* return reserved blocks to the rgrp */
648 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
649 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
650 /* The rgrp extent failure point is likely not to increase;
651 it will only do so if the freed blocks are somehow
652 contiguous with a span of free blocks that follows. Still,
653 it will force the number to be recalculated later. */
654 rgd->rd_extfail_pt += rs->rs_free;
655 rs->rs_free = 0;
656 clear_bit(GBF_FULL, &bi->bi_flags);
657 }
658 }
659
660 /**
661 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
662 * @rs: The reservation to remove
663 *
664 */
gfs2_rs_deltree(struct gfs2_blkreserv * rs)665 void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
666 {
667 struct gfs2_rgrpd *rgd;
668
669 rgd = rs->rs_rbm.rgd;
670 if (rgd) {
671 spin_lock(&rgd->rd_rsspin);
672 __rs_deltree(rs);
673 BUG_ON(rs->rs_free);
674 spin_unlock(&rgd->rd_rsspin);
675 }
676 }
677
678 /**
679 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
680 * @ip: The inode for this reservation
681 * @wcount: The inode's write count, or NULL
682 *
683 */
gfs2_rsqa_delete(struct gfs2_inode * ip,atomic_t * wcount)684 void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
685 {
686 down_write(&ip->i_rw_mutex);
687 if ((wcount == NULL) || (atomic_read(wcount) <= 1))
688 gfs2_rs_deltree(&ip->i_res);
689 up_write(&ip->i_rw_mutex);
690 gfs2_qa_delete(ip, wcount);
691 }
692
693 /**
694 * return_all_reservations - return all reserved blocks back to the rgrp.
695 * @rgd: the rgrp that needs its space back
696 *
697 * We previously reserved a bunch of blocks for allocation. Now we need to
698 * give them back. This leave the reservation structures in tact, but removes
699 * all of their corresponding "no-fly zones".
700 */
return_all_reservations(struct gfs2_rgrpd * rgd)701 static void return_all_reservations(struct gfs2_rgrpd *rgd)
702 {
703 struct rb_node *n;
704 struct gfs2_blkreserv *rs;
705
706 spin_lock(&rgd->rd_rsspin);
707 while ((n = rb_first(&rgd->rd_rstree))) {
708 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
709 __rs_deltree(rs);
710 }
711 spin_unlock(&rgd->rd_rsspin);
712 }
713
gfs2_clear_rgrpd(struct gfs2_sbd * sdp)714 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
715 {
716 struct rb_node *n;
717 struct gfs2_rgrpd *rgd;
718 struct gfs2_glock *gl;
719
720 while ((n = rb_first(&sdp->sd_rindex_tree))) {
721 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
722 gl = rgd->rd_gl;
723
724 rb_erase(n, &sdp->sd_rindex_tree);
725
726 if (gl) {
727 glock_clear_object(gl, rgd);
728 gfs2_glock_put(gl);
729 }
730
731 gfs2_free_clones(rgd);
732 kfree(rgd->rd_bits);
733 rgd->rd_bits = NULL;
734 return_all_reservations(rgd);
735 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
736 }
737 }
738
gfs2_rindex_print(const struct gfs2_rgrpd * rgd)739 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
740 {
741 pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
742 pr_info("ri_length = %u\n", rgd->rd_length);
743 pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
744 pr_info("ri_data = %u\n", rgd->rd_data);
745 pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
746 }
747
748 /**
749 * gfs2_compute_bitstructs - Compute the bitmap sizes
750 * @rgd: The resource group descriptor
751 *
752 * Calculates bitmap descriptors, one for each block that contains bitmap data
753 *
754 * Returns: errno
755 */
756
compute_bitstructs(struct gfs2_rgrpd * rgd)757 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
758 {
759 struct gfs2_sbd *sdp = rgd->rd_sbd;
760 struct gfs2_bitmap *bi;
761 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
762 u32 bytes_left, bytes;
763 int x;
764
765 if (!length)
766 return -EINVAL;
767
768 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
769 if (!rgd->rd_bits)
770 return -ENOMEM;
771
772 bytes_left = rgd->rd_bitbytes;
773
774 for (x = 0; x < length; x++) {
775 bi = rgd->rd_bits + x;
776
777 bi->bi_flags = 0;
778 /* small rgrp; bitmap stored completely in header block */
779 if (length == 1) {
780 bytes = bytes_left;
781 bi->bi_offset = sizeof(struct gfs2_rgrp);
782 bi->bi_start = 0;
783 bi->bi_len = bytes;
784 bi->bi_blocks = bytes * GFS2_NBBY;
785 /* header block */
786 } else if (x == 0) {
787 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
788 bi->bi_offset = sizeof(struct gfs2_rgrp);
789 bi->bi_start = 0;
790 bi->bi_len = bytes;
791 bi->bi_blocks = bytes * GFS2_NBBY;
792 /* last block */
793 } else if (x + 1 == length) {
794 bytes = bytes_left;
795 bi->bi_offset = sizeof(struct gfs2_meta_header);
796 bi->bi_start = rgd->rd_bitbytes - bytes_left;
797 bi->bi_len = bytes;
798 bi->bi_blocks = bytes * GFS2_NBBY;
799 /* other blocks */
800 } else {
801 bytes = sdp->sd_sb.sb_bsize -
802 sizeof(struct gfs2_meta_header);
803 bi->bi_offset = sizeof(struct gfs2_meta_header);
804 bi->bi_start = rgd->rd_bitbytes - bytes_left;
805 bi->bi_len = bytes;
806 bi->bi_blocks = bytes * GFS2_NBBY;
807 }
808
809 bytes_left -= bytes;
810 }
811
812 if (bytes_left) {
813 gfs2_consist_rgrpd(rgd);
814 return -EIO;
815 }
816 bi = rgd->rd_bits + (length - 1);
817 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
818 if (gfs2_consist_rgrpd(rgd)) {
819 gfs2_rindex_print(rgd);
820 fs_err(sdp, "start=%u len=%u offset=%u\n",
821 bi->bi_start, bi->bi_len, bi->bi_offset);
822 }
823 return -EIO;
824 }
825
826 return 0;
827 }
828
829 /**
830 * gfs2_ri_total - Total up the file system space, according to the rindex.
831 * @sdp: the filesystem
832 *
833 */
gfs2_ri_total(struct gfs2_sbd * sdp)834 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
835 {
836 u64 total_data = 0;
837 struct inode *inode = sdp->sd_rindex;
838 struct gfs2_inode *ip = GFS2_I(inode);
839 char buf[sizeof(struct gfs2_rindex)];
840 int error, rgrps;
841
842 for (rgrps = 0;; rgrps++) {
843 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
844
845 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
846 break;
847 error = gfs2_internal_read(ip, buf, &pos,
848 sizeof(struct gfs2_rindex));
849 if (error != sizeof(struct gfs2_rindex))
850 break;
851 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
852 }
853 return total_data;
854 }
855
rgd_insert(struct gfs2_rgrpd * rgd)856 static int rgd_insert(struct gfs2_rgrpd *rgd)
857 {
858 struct gfs2_sbd *sdp = rgd->rd_sbd;
859 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
860
861 /* Figure out where to put new node */
862 while (*newn) {
863 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
864 rd_node);
865
866 parent = *newn;
867 if (rgd->rd_addr < cur->rd_addr)
868 newn = &((*newn)->rb_left);
869 else if (rgd->rd_addr > cur->rd_addr)
870 newn = &((*newn)->rb_right);
871 else
872 return -EEXIST;
873 }
874
875 rb_link_node(&rgd->rd_node, parent, newn);
876 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
877 sdp->sd_rgrps++;
878 return 0;
879 }
880
881 /**
882 * read_rindex_entry - Pull in a new resource index entry from the disk
883 * @ip: Pointer to the rindex inode
884 *
885 * Returns: 0 on success, > 0 on EOF, error code otherwise
886 */
887
read_rindex_entry(struct gfs2_inode * ip)888 static int read_rindex_entry(struct gfs2_inode *ip)
889 {
890 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
891 const unsigned bsize = sdp->sd_sb.sb_bsize;
892 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
893 struct gfs2_rindex buf;
894 int error;
895 struct gfs2_rgrpd *rgd;
896
897 if (pos >= i_size_read(&ip->i_inode))
898 return 1;
899
900 error = gfs2_internal_read(ip, (char *)&buf, &pos,
901 sizeof(struct gfs2_rindex));
902
903 if (error != sizeof(struct gfs2_rindex))
904 return (error == 0) ? 1 : error;
905
906 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
907 error = -ENOMEM;
908 if (!rgd)
909 return error;
910
911 rgd->rd_sbd = sdp;
912 rgd->rd_addr = be64_to_cpu(buf.ri_addr);
913 rgd->rd_length = be32_to_cpu(buf.ri_length);
914 rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
915 rgd->rd_data = be32_to_cpu(buf.ri_data);
916 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
917 spin_lock_init(&rgd->rd_rsspin);
918
919 error = compute_bitstructs(rgd);
920 if (error)
921 goto fail;
922
923 error = gfs2_glock_get(sdp, rgd->rd_addr,
924 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
925 if (error)
926 goto fail;
927
928 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
929 rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
930 if (rgd->rd_data > sdp->sd_max_rg_data)
931 sdp->sd_max_rg_data = rgd->rd_data;
932 spin_lock(&sdp->sd_rindex_spin);
933 error = rgd_insert(rgd);
934 spin_unlock(&sdp->sd_rindex_spin);
935 if (!error) {
936 glock_set_object(rgd->rd_gl, rgd);
937 rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
938 rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
939 rgd->rd_length) * bsize) - 1;
940 return 0;
941 }
942
943 error = 0; /* someone else read in the rgrp; free it and ignore it */
944 gfs2_glock_put(rgd->rd_gl);
945
946 fail:
947 kfree(rgd->rd_bits);
948 rgd->rd_bits = NULL;
949 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
950 return error;
951 }
952
953 /**
954 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
955 * @sdp: the GFS2 superblock
956 *
957 * The purpose of this function is to select a subset of the resource groups
958 * and mark them as PREFERRED. We do it in such a way that each node prefers
959 * to use a unique set of rgrps to minimize glock contention.
960 */
set_rgrp_preferences(struct gfs2_sbd * sdp)961 static void set_rgrp_preferences(struct gfs2_sbd *sdp)
962 {
963 struct gfs2_rgrpd *rgd, *first;
964 int i;
965
966 /* Skip an initial number of rgrps, based on this node's journal ID.
967 That should start each node out on its own set. */
968 rgd = gfs2_rgrpd_get_first(sdp);
969 for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
970 rgd = gfs2_rgrpd_get_next(rgd);
971 first = rgd;
972
973 do {
974 rgd->rd_flags |= GFS2_RDF_PREFERRED;
975 for (i = 0; i < sdp->sd_journals; i++) {
976 rgd = gfs2_rgrpd_get_next(rgd);
977 if (!rgd || rgd == first)
978 break;
979 }
980 } while (rgd && rgd != first);
981 }
982
983 /**
984 * gfs2_ri_update - Pull in a new resource index from the disk
985 * @ip: pointer to the rindex inode
986 *
987 * Returns: 0 on successful update, error code otherwise
988 */
989
gfs2_ri_update(struct gfs2_inode * ip)990 static int gfs2_ri_update(struct gfs2_inode *ip)
991 {
992 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
993 int error;
994
995 do {
996 error = read_rindex_entry(ip);
997 } while (error == 0);
998
999 if (error < 0)
1000 return error;
1001
1002 set_rgrp_preferences(sdp);
1003
1004 sdp->sd_rindex_uptodate = 1;
1005 return 0;
1006 }
1007
1008 /**
1009 * gfs2_rindex_update - Update the rindex if required
1010 * @sdp: The GFS2 superblock
1011 *
1012 * We grab a lock on the rindex inode to make sure that it doesn't
1013 * change whilst we are performing an operation. We keep this lock
1014 * for quite long periods of time compared to other locks. This
1015 * doesn't matter, since it is shared and it is very, very rarely
1016 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1017 *
1018 * This makes sure that we're using the latest copy of the resource index
1019 * special file, which might have been updated if someone expanded the
1020 * filesystem (via gfs2_grow utility), which adds new resource groups.
1021 *
1022 * Returns: 0 on succeess, error code otherwise
1023 */
1024
gfs2_rindex_update(struct gfs2_sbd * sdp)1025 int gfs2_rindex_update(struct gfs2_sbd *sdp)
1026 {
1027 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1028 struct gfs2_glock *gl = ip->i_gl;
1029 struct gfs2_holder ri_gh;
1030 int error = 0;
1031 int unlock_required = 0;
1032
1033 /* Read new copy from disk if we don't have the latest */
1034 if (!sdp->sd_rindex_uptodate) {
1035 if (!gfs2_glock_is_locked_by_me(gl)) {
1036 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1037 if (error)
1038 return error;
1039 unlock_required = 1;
1040 }
1041 if (!sdp->sd_rindex_uptodate)
1042 error = gfs2_ri_update(ip);
1043 if (unlock_required)
1044 gfs2_glock_dq_uninit(&ri_gh);
1045 }
1046
1047 return error;
1048 }
1049
gfs2_rgrp_in(struct gfs2_rgrpd * rgd,const void * buf)1050 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1051 {
1052 const struct gfs2_rgrp *str = buf;
1053 u32 rg_flags;
1054
1055 rg_flags = be32_to_cpu(str->rg_flags);
1056 rg_flags &= ~GFS2_RDF_MASK;
1057 rgd->rd_flags &= GFS2_RDF_MASK;
1058 rgd->rd_flags |= rg_flags;
1059 rgd->rd_free = be32_to_cpu(str->rg_free);
1060 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1061 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1062 /* rd_data0, rd_data and rd_bitbytes already set from rindex */
1063 }
1064
gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb * rgl,const void * buf)1065 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1066 {
1067 const struct gfs2_rgrp *str = buf;
1068
1069 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1070 rgl->rl_flags = str->rg_flags;
1071 rgl->rl_free = str->rg_free;
1072 rgl->rl_dinodes = str->rg_dinodes;
1073 rgl->rl_igeneration = str->rg_igeneration;
1074 rgl->__pad = 0UL;
1075 }
1076
gfs2_rgrp_out(struct gfs2_rgrpd * rgd,void * buf)1077 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1078 {
1079 struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
1080 struct gfs2_rgrp *str = buf;
1081 u32 crc;
1082
1083 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1084 str->rg_free = cpu_to_be32(rgd->rd_free);
1085 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1086 if (next == NULL)
1087 str->rg_skip = 0;
1088 else if (next->rd_addr > rgd->rd_addr)
1089 str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
1090 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1091 str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1092 str->rg_data = cpu_to_be32(rgd->rd_data);
1093 str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
1094 str->rg_crc = 0;
1095 crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1096 str->rg_crc = cpu_to_be32(crc);
1097
1098 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
1100 }
1101
gfs2_rgrp_lvb_valid(struct gfs2_rgrpd * rgd)1102 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1103 {
1104 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1105 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1106
1107 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1108 rgl->rl_dinodes != str->rg_dinodes ||
1109 rgl->rl_igeneration != str->rg_igeneration)
1110 return 0;
1111 return 1;
1112 }
1113
count_unlinked(struct gfs2_rgrpd * rgd)1114 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1115 {
1116 struct gfs2_bitmap *bi;
1117 const u32 length = rgd->rd_length;
1118 const u8 *buffer = NULL;
1119 u32 i, goal, count = 0;
1120
1121 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1122 goal = 0;
1123 buffer = bi->bi_bh->b_data + bi->bi_offset;
1124 WARN_ON(!buffer_uptodate(bi->bi_bh));
1125 while (goal < bi->bi_len * GFS2_NBBY) {
1126 goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1127 GFS2_BLKST_UNLINKED);
1128 if (goal == BFITNOENT)
1129 break;
1130 count++;
1131 goal++;
1132 }
1133 }
1134
1135 return count;
1136 }
1137
1138
1139 /**
1140 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1141 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1142 *
1143 * Read in all of a Resource Group's header and bitmap blocks.
1144 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1145 *
1146 * Returns: errno
1147 */
1148
gfs2_rgrp_bh_get(struct gfs2_rgrpd * rgd)1149 static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1150 {
1151 struct gfs2_sbd *sdp = rgd->rd_sbd;
1152 struct gfs2_glock *gl = rgd->rd_gl;
1153 unsigned int length = rgd->rd_length;
1154 struct gfs2_bitmap *bi;
1155 unsigned int x, y;
1156 int error;
1157
1158 if (rgd->rd_bits[0].bi_bh != NULL)
1159 return 0;
1160
1161 for (x = 0; x < length; x++) {
1162 bi = rgd->rd_bits + x;
1163 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1164 if (error)
1165 goto fail;
1166 }
1167
1168 for (y = length; y--;) {
1169 bi = rgd->rd_bits + y;
1170 error = gfs2_meta_wait(sdp, bi->bi_bh);
1171 if (error)
1172 goto fail;
1173 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1174 GFS2_METATYPE_RG)) {
1175 error = -EIO;
1176 goto fail;
1177 }
1178 }
1179
1180 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1181 for (x = 0; x < length; x++)
1182 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1183 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1184 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1185 rgd->rd_free_clone = rgd->rd_free;
1186 /* max out the rgrp allocation failure point */
1187 rgd->rd_extfail_pt = rgd->rd_free;
1188 }
1189 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1190 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1191 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1192 rgd->rd_bits[0].bi_bh->b_data);
1193 }
1194 else if (sdp->sd_args.ar_rgrplvb) {
1195 if (!gfs2_rgrp_lvb_valid(rgd)){
1196 gfs2_consist_rgrpd(rgd);
1197 error = -EIO;
1198 goto fail;
1199 }
1200 if (rgd->rd_rgl->rl_unlinked == 0)
1201 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1202 }
1203 return 0;
1204
1205 fail:
1206 while (x--) {
1207 bi = rgd->rd_bits + x;
1208 brelse(bi->bi_bh);
1209 bi->bi_bh = NULL;
1210 gfs2_assert_warn(sdp, !bi->bi_clone);
1211 }
1212
1213 return error;
1214 }
1215
update_rgrp_lvb(struct gfs2_rgrpd * rgd)1216 static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1217 {
1218 u32 rl_flags;
1219
1220 if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1221 return 0;
1222
1223 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1224 return gfs2_rgrp_bh_get(rgd);
1225
1226 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1227 rl_flags &= ~GFS2_RDF_MASK;
1228 rgd->rd_flags &= GFS2_RDF_MASK;
1229 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1230 if (rgd->rd_rgl->rl_unlinked == 0)
1231 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1232 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1233 rgd->rd_free_clone = rgd->rd_free;
1234 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1235 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1236 return 0;
1237 }
1238
gfs2_rgrp_go_lock(struct gfs2_holder * gh)1239 int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1240 {
1241 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1242 struct gfs2_sbd *sdp = rgd->rd_sbd;
1243
1244 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1245 return 0;
1246 return gfs2_rgrp_bh_get(rgd);
1247 }
1248
1249 /**
1250 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1251 * @rgd: The resource group
1252 *
1253 */
1254
gfs2_rgrp_brelse(struct gfs2_rgrpd * rgd)1255 void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1256 {
1257 int x, length = rgd->rd_length;
1258
1259 for (x = 0; x < length; x++) {
1260 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1261 if (bi->bi_bh) {
1262 brelse(bi->bi_bh);
1263 bi->bi_bh = NULL;
1264 }
1265 }
1266
1267 }
1268
1269 /**
1270 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1271 * @gh: The glock holder for the resource group
1272 *
1273 */
1274
gfs2_rgrp_go_unlock(struct gfs2_holder * gh)1275 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1276 {
1277 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1278 int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1279 test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1280
1281 if (rgd && demote_requested)
1282 gfs2_rgrp_brelse(rgd);
1283 }
1284
gfs2_rgrp_send_discards(struct gfs2_sbd * sdp,u64 offset,struct buffer_head * bh,const struct gfs2_bitmap * bi,unsigned minlen,u64 * ptrimmed)1285 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1286 struct buffer_head *bh,
1287 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1288 {
1289 struct super_block *sb = sdp->sd_vfs;
1290 u64 blk;
1291 sector_t start = 0;
1292 sector_t nr_blks = 0;
1293 int rv;
1294 unsigned int x;
1295 u32 trimmed = 0;
1296 u8 diff;
1297
1298 for (x = 0; x < bi->bi_len; x++) {
1299 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1300 clone += bi->bi_offset;
1301 clone += x;
1302 if (bh) {
1303 const u8 *orig = bh->b_data + bi->bi_offset + x;
1304 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1305 } else {
1306 diff = ~(*clone | (*clone >> 1));
1307 }
1308 diff &= 0x55;
1309 if (diff == 0)
1310 continue;
1311 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1312 while(diff) {
1313 if (diff & 1) {
1314 if (nr_blks == 0)
1315 goto start_new_extent;
1316 if ((start + nr_blks) != blk) {
1317 if (nr_blks >= minlen) {
1318 rv = sb_issue_discard(sb,
1319 start, nr_blks,
1320 GFP_NOFS, 0);
1321 if (rv)
1322 goto fail;
1323 trimmed += nr_blks;
1324 }
1325 nr_blks = 0;
1326 start_new_extent:
1327 start = blk;
1328 }
1329 nr_blks++;
1330 }
1331 diff >>= 2;
1332 blk++;
1333 }
1334 }
1335 if (nr_blks >= minlen) {
1336 rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1337 if (rv)
1338 goto fail;
1339 trimmed += nr_blks;
1340 }
1341 if (ptrimmed)
1342 *ptrimmed = trimmed;
1343 return 0;
1344
1345 fail:
1346 if (sdp->sd_args.ar_discard)
1347 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
1348 sdp->sd_args.ar_discard = 0;
1349 return -EIO;
1350 }
1351
1352 /**
1353 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1354 * @filp: Any file on the filesystem
1355 * @argp: Pointer to the arguments (also used to pass result)
1356 *
1357 * Returns: 0 on success, otherwise error code
1358 */
1359
gfs2_fitrim(struct file * filp,void __user * argp)1360 int gfs2_fitrim(struct file *filp, void __user *argp)
1361 {
1362 struct inode *inode = file_inode(filp);
1363 struct gfs2_sbd *sdp = GFS2_SB(inode);
1364 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1365 struct buffer_head *bh;
1366 struct gfs2_rgrpd *rgd;
1367 struct gfs2_rgrpd *rgd_end;
1368 struct gfs2_holder gh;
1369 struct fstrim_range r;
1370 int ret = 0;
1371 u64 amt;
1372 u64 trimmed = 0;
1373 u64 start, end, minlen;
1374 unsigned int x;
1375 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1376
1377 if (!capable(CAP_SYS_ADMIN))
1378 return -EPERM;
1379
1380 if (!blk_queue_discard(q))
1381 return -EOPNOTSUPP;
1382
1383 if (copy_from_user(&r, argp, sizeof(r)))
1384 return -EFAULT;
1385
1386 ret = gfs2_rindex_update(sdp);
1387 if (ret)
1388 return ret;
1389
1390 start = r.start >> bs_shift;
1391 end = start + (r.len >> bs_shift);
1392 minlen = max_t(u64, r.minlen,
1393 q->limits.discard_granularity) >> bs_shift;
1394
1395 if (end <= start || minlen > sdp->sd_max_rg_data)
1396 return -EINVAL;
1397
1398 rgd = gfs2_blk2rgrpd(sdp, start, 0);
1399 rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1400
1401 if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1402 && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1403 return -EINVAL; /* start is beyond the end of the fs */
1404
1405 while (1) {
1406
1407 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1408 if (ret)
1409 goto out;
1410
1411 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1412 /* Trim each bitmap in the rgrp */
1413 for (x = 0; x < rgd->rd_length; x++) {
1414 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1415 ret = gfs2_rgrp_send_discards(sdp,
1416 rgd->rd_data0, NULL, bi, minlen,
1417 &amt);
1418 if (ret) {
1419 gfs2_glock_dq_uninit(&gh);
1420 goto out;
1421 }
1422 trimmed += amt;
1423 }
1424
1425 /* Mark rgrp as having been trimmed */
1426 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1427 if (ret == 0) {
1428 bh = rgd->rd_bits[0].bi_bh;
1429 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1430 gfs2_trans_add_meta(rgd->rd_gl, bh);
1431 gfs2_rgrp_out(rgd, bh->b_data);
1432 gfs2_trans_end(sdp);
1433 }
1434 }
1435 gfs2_glock_dq_uninit(&gh);
1436
1437 if (rgd == rgd_end)
1438 break;
1439
1440 rgd = gfs2_rgrpd_get_next(rgd);
1441 }
1442
1443 out:
1444 r.len = trimmed << bs_shift;
1445 if (copy_to_user(argp, &r, sizeof(r)))
1446 return -EFAULT;
1447
1448 return ret;
1449 }
1450
1451 /**
1452 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1453 * @ip: the inode structure
1454 *
1455 */
rs_insert(struct gfs2_inode * ip)1456 static void rs_insert(struct gfs2_inode *ip)
1457 {
1458 struct rb_node **newn, *parent = NULL;
1459 int rc;
1460 struct gfs2_blkreserv *rs = &ip->i_res;
1461 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1462 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1463
1464 BUG_ON(gfs2_rs_active(rs));
1465
1466 spin_lock(&rgd->rd_rsspin);
1467 newn = &rgd->rd_rstree.rb_node;
1468 while (*newn) {
1469 struct gfs2_blkreserv *cur =
1470 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1471
1472 parent = *newn;
1473 rc = rs_cmp(fsblock, rs->rs_free, cur);
1474 if (rc > 0)
1475 newn = &((*newn)->rb_right);
1476 else if (rc < 0)
1477 newn = &((*newn)->rb_left);
1478 else {
1479 spin_unlock(&rgd->rd_rsspin);
1480 WARN_ON(1);
1481 return;
1482 }
1483 }
1484
1485 rb_link_node(&rs->rs_node, parent, newn);
1486 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1487
1488 /* Do our rgrp accounting for the reservation */
1489 rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1490 spin_unlock(&rgd->rd_rsspin);
1491 trace_gfs2_rs(rs, TRACE_RS_INSERT);
1492 }
1493
1494 /**
1495 * rgd_free - return the number of free blocks we can allocate.
1496 * @rgd: the resource group
1497 *
1498 * This function returns the number of free blocks for an rgrp.
1499 * That's the clone-free blocks (blocks that are free, not including those
1500 * still being used for unlinked files that haven't been deleted.)
1501 *
1502 * It also subtracts any blocks reserved by someone else, but does not
1503 * include free blocks that are still part of our current reservation,
1504 * because obviously we can (and will) allocate them.
1505 */
rgd_free(struct gfs2_rgrpd * rgd,struct gfs2_blkreserv * rs)1506 static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
1507 {
1508 u32 tot_reserved, tot_free;
1509
1510 if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
1511 return 0;
1512 tot_reserved = rgd->rd_reserved - rs->rs_free;
1513
1514 if (rgd->rd_free_clone < tot_reserved)
1515 tot_reserved = 0;
1516
1517 tot_free = rgd->rd_free_clone - tot_reserved;
1518
1519 return tot_free;
1520 }
1521
1522 /**
1523 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1524 * @rgd: the resource group descriptor
1525 * @ip: pointer to the inode for which we're reserving blocks
1526 * @ap: the allocation parameters
1527 *
1528 */
1529
rg_mblk_search(struct gfs2_rgrpd * rgd,struct gfs2_inode * ip,const struct gfs2_alloc_parms * ap)1530 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1531 const struct gfs2_alloc_parms *ap)
1532 {
1533 struct gfs2_rbm rbm = { .rgd = rgd, };
1534 u64 goal;
1535 struct gfs2_blkreserv *rs = &ip->i_res;
1536 u32 extlen;
1537 u32 free_blocks = rgd_free(rgd, rs);
1538 int ret;
1539 struct inode *inode = &ip->i_inode;
1540
1541 if (S_ISDIR(inode->i_mode))
1542 extlen = 1;
1543 else {
1544 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1545 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1546 }
1547 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1548 return;
1549
1550 /* Find bitmap block that contains bits for goal block */
1551 if (rgrp_contains_block(rgd, ip->i_goal))
1552 goal = ip->i_goal;
1553 else
1554 goal = rgd->rd_last_alloc + rgd->rd_data0;
1555
1556 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1557 return;
1558
1559 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
1560 if (ret == 0) {
1561 rs->rs_rbm = rbm;
1562 rs->rs_free = extlen;
1563 rs_insert(ip);
1564 } else {
1565 if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1566 rgd->rd_last_alloc = 0;
1567 }
1568 }
1569
1570 /**
1571 * gfs2_next_unreserved_block - Return next block that is not reserved
1572 * @rgd: The resource group
1573 * @block: The starting block
1574 * @length: The required length
1575 * @ip: Ignore any reservations for this inode
1576 *
1577 * If the block does not appear in any reservation, then return the
1578 * block number unchanged. If it does appear in the reservation, then
1579 * keep looking through the tree of reservations in order to find the
1580 * first block number which is not reserved.
1581 */
1582
gfs2_next_unreserved_block(struct gfs2_rgrpd * rgd,u64 block,u32 length,const struct gfs2_inode * ip)1583 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1584 u32 length,
1585 const struct gfs2_inode *ip)
1586 {
1587 struct gfs2_blkreserv *rs;
1588 struct rb_node *n;
1589 int rc;
1590
1591 spin_lock(&rgd->rd_rsspin);
1592 n = rgd->rd_rstree.rb_node;
1593 while (n) {
1594 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1595 rc = rs_cmp(block, length, rs);
1596 if (rc < 0)
1597 n = n->rb_left;
1598 else if (rc > 0)
1599 n = n->rb_right;
1600 else
1601 break;
1602 }
1603
1604 if (n) {
1605 while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
1606 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1607 n = n->rb_right;
1608 if (n == NULL)
1609 break;
1610 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1611 }
1612 }
1613
1614 spin_unlock(&rgd->rd_rsspin);
1615 return block;
1616 }
1617
1618 /**
1619 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1620 * @rbm: The current position in the resource group
1621 * @ip: The inode for which we are searching for blocks
1622 * @minext: The minimum extent length
1623 * @maxext: A pointer to the maximum extent structure
1624 *
1625 * This checks the current position in the rgrp to see whether there is
1626 * a reservation covering this block. If not then this function is a
1627 * no-op. If there is, then the position is moved to the end of the
1628 * contiguous reservation(s) so that we are pointing at the first
1629 * non-reserved block.
1630 *
1631 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1632 */
1633
gfs2_reservation_check_and_update(struct gfs2_rbm * rbm,const struct gfs2_inode * ip,u32 minext,struct gfs2_extent * maxext)1634 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1635 const struct gfs2_inode *ip,
1636 u32 minext,
1637 struct gfs2_extent *maxext)
1638 {
1639 u64 block = gfs2_rbm_to_block(rbm);
1640 u32 extlen = 1;
1641 u64 nblock;
1642 int ret;
1643
1644 /*
1645 * If we have a minimum extent length, then skip over any extent
1646 * which is less than the min extent length in size.
1647 */
1648 if (minext) {
1649 extlen = gfs2_free_extlen(rbm, minext);
1650 if (extlen <= maxext->len)
1651 goto fail;
1652 }
1653
1654 /*
1655 * Check the extent which has been found against the reservations
1656 * and skip if parts of it are already reserved
1657 */
1658 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1659 if (nblock == block) {
1660 if (!minext || extlen >= minext)
1661 return 0;
1662
1663 if (extlen > maxext->len) {
1664 maxext->len = extlen;
1665 maxext->rbm = *rbm;
1666 }
1667 fail:
1668 nblock = block + extlen;
1669 }
1670 ret = gfs2_rbm_from_block(rbm, nblock);
1671 if (ret < 0)
1672 return ret;
1673 return 1;
1674 }
1675
1676 /**
1677 * gfs2_rbm_find - Look for blocks of a particular state
1678 * @rbm: Value/result starting position and final position
1679 * @state: The state which we want to find
1680 * @minext: Pointer to the requested extent length (NULL for a single block)
1681 * This is updated to be the actual reservation size.
1682 * @ip: If set, check for reservations
1683 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1684 * around until we've reached the starting point.
1685 *
1686 * Side effects:
1687 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1688 * has no free blocks in it.
1689 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1690 * has come up short on a free block search.
1691 *
1692 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1693 */
1694
gfs2_rbm_find(struct gfs2_rbm * rbm,u8 state,u32 * minext,const struct gfs2_inode * ip,bool nowrap)1695 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1696 const struct gfs2_inode *ip, bool nowrap)
1697 {
1698 struct buffer_head *bh;
1699 int initial_bii;
1700 u32 initial_offset;
1701 int first_bii = rbm->bii;
1702 u32 first_offset = rbm->offset;
1703 u32 offset;
1704 u8 *buffer;
1705 int n = 0;
1706 int iters = rbm->rgd->rd_length;
1707 int ret;
1708 struct gfs2_bitmap *bi;
1709 struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1710
1711 /* If we are not starting at the beginning of a bitmap, then we
1712 * need to add one to the bitmap count to ensure that we search
1713 * the starting bitmap twice.
1714 */
1715 if (rbm->offset != 0)
1716 iters++;
1717
1718 while(1) {
1719 bi = rbm_bi(rbm);
1720 if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
1721 test_bit(GBF_FULL, &bi->bi_flags) &&
1722 (state == GFS2_BLKST_FREE))
1723 goto next_bitmap;
1724
1725 bh = bi->bi_bh;
1726 buffer = bh->b_data + bi->bi_offset;
1727 WARN_ON(!buffer_uptodate(bh));
1728 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1729 buffer = bi->bi_clone + bi->bi_offset;
1730 initial_offset = rbm->offset;
1731 offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1732 if (offset == BFITNOENT)
1733 goto bitmap_full;
1734 rbm->offset = offset;
1735 if (ip == NULL)
1736 return 0;
1737
1738 initial_bii = rbm->bii;
1739 ret = gfs2_reservation_check_and_update(rbm, ip,
1740 minext ? *minext : 0,
1741 &maxext);
1742 if (ret == 0)
1743 return 0;
1744 if (ret > 0) {
1745 n += (rbm->bii - initial_bii);
1746 goto next_iter;
1747 }
1748 if (ret == -E2BIG) {
1749 rbm->bii = 0;
1750 rbm->offset = 0;
1751 n += (rbm->bii - initial_bii);
1752 goto res_covered_end_of_rgrp;
1753 }
1754 return ret;
1755
1756 bitmap_full: /* Mark bitmap as full and fall through */
1757 if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1758 set_bit(GBF_FULL, &bi->bi_flags);
1759
1760 next_bitmap: /* Find next bitmap in the rgrp */
1761 rbm->offset = 0;
1762 rbm->bii++;
1763 if (rbm->bii == rbm->rgd->rd_length)
1764 rbm->bii = 0;
1765 res_covered_end_of_rgrp:
1766 if ((rbm->bii == 0) && nowrap)
1767 break;
1768 n++;
1769 next_iter:
1770 if (n >= iters)
1771 break;
1772 }
1773
1774 if (minext == NULL || state != GFS2_BLKST_FREE)
1775 return -ENOSPC;
1776
1777 /* If the extent was too small, and it's smaller than the smallest
1778 to have failed before, remember for future reference that it's
1779 useless to search this rgrp again for this amount or more. */
1780 if ((first_offset == 0) && (first_bii == 0) &&
1781 (*minext < rbm->rgd->rd_extfail_pt))
1782 rbm->rgd->rd_extfail_pt = *minext;
1783
1784 /* If the maximum extent we found is big enough to fulfill the
1785 minimum requirements, use it anyway. */
1786 if (maxext.len) {
1787 *rbm = maxext.rbm;
1788 *minext = maxext.len;
1789 return 0;
1790 }
1791
1792 return -ENOSPC;
1793 }
1794
1795 /**
1796 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1797 * @rgd: The rgrp
1798 * @last_unlinked: block address of the last dinode we unlinked
1799 * @skip: block address we should explicitly not unlink
1800 *
1801 * Returns: 0 if no error
1802 * The inode, if one has been found, in inode.
1803 */
1804
try_rgrp_unlink(struct gfs2_rgrpd * rgd,u64 * last_unlinked,u64 skip)1805 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1806 {
1807 u64 block;
1808 struct gfs2_sbd *sdp = rgd->rd_sbd;
1809 struct gfs2_glock *gl;
1810 struct gfs2_inode *ip;
1811 int error;
1812 int found = 0;
1813 struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1814
1815 while (1) {
1816 down_write(&sdp->sd_log_flush_lock);
1817 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1818 true);
1819 up_write(&sdp->sd_log_flush_lock);
1820 if (error == -ENOSPC)
1821 break;
1822 if (WARN_ON_ONCE(error))
1823 break;
1824
1825 block = gfs2_rbm_to_block(&rbm);
1826 if (gfs2_rbm_from_block(&rbm, block + 1))
1827 break;
1828 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1829 continue;
1830 if (block == skip)
1831 continue;
1832 *last_unlinked = block;
1833
1834 error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1835 if (error)
1836 continue;
1837
1838 /* If the inode is already in cache, we can ignore it here
1839 * because the existing inode disposal code will deal with
1840 * it when all refs have gone away. Accessing gl_object like
1841 * this is not safe in general. Here it is ok because we do
1842 * not dereference the pointer, and we only need an approx
1843 * answer to whether it is NULL or not.
1844 */
1845 ip = gl->gl_object;
1846
1847 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1848 gfs2_glock_put(gl);
1849 else
1850 found++;
1851
1852 /* Limit reclaim to sensible number of tasks */
1853 if (found > NR_CPUS)
1854 return;
1855 }
1856
1857 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1858 return;
1859 }
1860
1861 /**
1862 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1863 * @rgd: The rgrp in question
1864 * @loops: An indication of how picky we can be (0=very, 1=less so)
1865 *
1866 * This function uses the recently added glock statistics in order to
1867 * figure out whether a parciular resource group is suffering from
1868 * contention from multiple nodes. This is done purely on the basis
1869 * of timings, since this is the only data we have to work with and
1870 * our aim here is to reject a resource group which is highly contended
1871 * but (very important) not to do this too often in order to ensure that
1872 * we do not land up introducing fragmentation by changing resource
1873 * groups when not actually required.
1874 *
1875 * The calculation is fairly simple, we want to know whether the SRTTB
1876 * (i.e. smoothed round trip time for blocking operations) to acquire
1877 * the lock for this rgrp's glock is significantly greater than the
1878 * time taken for resource groups on average. We introduce a margin in
1879 * the form of the variable @var which is computed as the sum of the two
1880 * respective variences, and multiplied by a factor depending on @loops
1881 * and whether we have a lot of data to base the decision on. This is
1882 * then tested against the square difference of the means in order to
1883 * decide whether the result is statistically significant or not.
1884 *
1885 * Returns: A boolean verdict on the congestion status
1886 */
1887
gfs2_rgrp_congested(const struct gfs2_rgrpd * rgd,int loops)1888 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1889 {
1890 const struct gfs2_glock *gl = rgd->rd_gl;
1891 const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1892 struct gfs2_lkstats *st;
1893 u64 r_dcount, l_dcount;
1894 u64 l_srttb, a_srttb = 0;
1895 s64 srttb_diff;
1896 u64 sqr_diff;
1897 u64 var;
1898 int cpu, nonzero = 0;
1899
1900 preempt_disable();
1901 for_each_present_cpu(cpu) {
1902 st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1903 if (st->stats[GFS2_LKS_SRTTB]) {
1904 a_srttb += st->stats[GFS2_LKS_SRTTB];
1905 nonzero++;
1906 }
1907 }
1908 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1909 if (nonzero)
1910 do_div(a_srttb, nonzero);
1911 r_dcount = st->stats[GFS2_LKS_DCOUNT];
1912 var = st->stats[GFS2_LKS_SRTTVARB] +
1913 gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1914 preempt_enable();
1915
1916 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1917 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1918
1919 if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1920 return false;
1921
1922 srttb_diff = a_srttb - l_srttb;
1923 sqr_diff = srttb_diff * srttb_diff;
1924
1925 var *= 2;
1926 if (l_dcount < 8 || r_dcount < 8)
1927 var *= 2;
1928 if (loops == 1)
1929 var *= 2;
1930
1931 return ((srttb_diff < 0) && (sqr_diff > var));
1932 }
1933
1934 /**
1935 * gfs2_rgrp_used_recently
1936 * @rs: The block reservation with the rgrp to test
1937 * @msecs: The time limit in milliseconds
1938 *
1939 * Returns: True if the rgrp glock has been used within the time limit
1940 */
gfs2_rgrp_used_recently(const struct gfs2_blkreserv * rs,u64 msecs)1941 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1942 u64 msecs)
1943 {
1944 u64 tdiff;
1945
1946 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1947 rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1948
1949 return tdiff > (msecs * 1000 * 1000);
1950 }
1951
gfs2_orlov_skip(const struct gfs2_inode * ip)1952 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1953 {
1954 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1955 u32 skip;
1956
1957 get_random_bytes(&skip, sizeof(skip));
1958 return skip % sdp->sd_rgrps;
1959 }
1960
gfs2_select_rgrp(struct gfs2_rgrpd ** pos,const struct gfs2_rgrpd * begin)1961 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1962 {
1963 struct gfs2_rgrpd *rgd = *pos;
1964 struct gfs2_sbd *sdp = rgd->rd_sbd;
1965
1966 rgd = gfs2_rgrpd_get_next(rgd);
1967 if (rgd == NULL)
1968 rgd = gfs2_rgrpd_get_first(sdp);
1969 *pos = rgd;
1970 if (rgd != begin) /* If we didn't wrap */
1971 return true;
1972 return false;
1973 }
1974
1975 /**
1976 * fast_to_acquire - determine if a resource group will be fast to acquire
1977 *
1978 * If this is one of our preferred rgrps, it should be quicker to acquire,
1979 * because we tried to set ourselves up as dlm lock master.
1980 */
fast_to_acquire(struct gfs2_rgrpd * rgd)1981 static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1982 {
1983 struct gfs2_glock *gl = rgd->rd_gl;
1984
1985 if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1986 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1987 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1988 return 1;
1989 if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1990 return 1;
1991 return 0;
1992 }
1993
1994 /**
1995 * gfs2_inplace_reserve - Reserve space in the filesystem
1996 * @ip: the inode to reserve space for
1997 * @ap: the allocation parameters
1998 *
1999 * We try our best to find an rgrp that has at least ap->target blocks
2000 * available. After a couple of passes (loops == 2), the prospects of finding
2001 * such an rgrp diminish. At this stage, we return the first rgrp that has
2002 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
2003 * the number of blocks available in the chosen rgrp.
2004 *
2005 * Returns: 0 on success,
2006 * -ENOMEM if a suitable rgrp can't be found
2007 * errno otherwise
2008 */
2009
gfs2_inplace_reserve(struct gfs2_inode * ip,struct gfs2_alloc_parms * ap)2010 int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
2011 {
2012 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2013 struct gfs2_rgrpd *begin = NULL;
2014 struct gfs2_blkreserv *rs = &ip->i_res;
2015 int error = 0, rg_locked, flags = 0;
2016 u64 last_unlinked = NO_BLOCK;
2017 int loops = 0;
2018 u32 free_blocks, skip = 0;
2019
2020 if (sdp->sd_args.ar_rgrplvb)
2021 flags |= GL_SKIP;
2022 if (gfs2_assert_warn(sdp, ap->target))
2023 return -EINVAL;
2024 if (gfs2_rs_active(rs)) {
2025 begin = rs->rs_rbm.rgd;
2026 } else if (rs->rs_rbm.rgd &&
2027 rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
2028 begin = rs->rs_rbm.rgd;
2029 } else {
2030 check_and_update_goal(ip);
2031 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
2032 }
2033 if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
2034 skip = gfs2_orlov_skip(ip);
2035 if (rs->rs_rbm.rgd == NULL)
2036 return -EBADSLT;
2037
2038 while (loops < 3) {
2039 rg_locked = 1;
2040
2041 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
2042 rg_locked = 0;
2043 if (skip && skip--)
2044 goto next_rgrp;
2045 if (!gfs2_rs_active(rs)) {
2046 if (loops == 0 &&
2047 !fast_to_acquire(rs->rs_rbm.rgd))
2048 goto next_rgrp;
2049 if ((loops < 2) &&
2050 gfs2_rgrp_used_recently(rs, 1000) &&
2051 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2052 goto next_rgrp;
2053 }
2054 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2055 LM_ST_EXCLUSIVE, flags,
2056 &rs->rs_rgd_gh);
2057 if (unlikely(error))
2058 return error;
2059 if (!gfs2_rs_active(rs) && (loops < 2) &&
2060 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2061 goto skip_rgrp;
2062 if (sdp->sd_args.ar_rgrplvb) {
2063 error = update_rgrp_lvb(rs->rs_rbm.rgd);
2064 if (unlikely(error)) {
2065 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2066 return error;
2067 }
2068 }
2069 }
2070
2071 /* Skip unuseable resource groups */
2072 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2073 GFS2_RDF_ERROR)) ||
2074 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2075 goto skip_rgrp;
2076
2077 if (sdp->sd_args.ar_rgrplvb)
2078 gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2079
2080 /* Get a reservation if we don't already have one */
2081 if (!gfs2_rs_active(rs))
2082 rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2083
2084 /* Skip rgrps when we can't get a reservation on first pass */
2085 if (!gfs2_rs_active(rs) && (loops < 1))
2086 goto check_rgrp;
2087
2088 /* If rgrp has enough free space, use it */
2089 free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
2090 if (free_blocks >= ap->target ||
2091 (loops == 2 && ap->min_target &&
2092 free_blocks >= ap->min_target)) {
2093 ap->allowed = free_blocks;
2094 return 0;
2095 }
2096 check_rgrp:
2097 /* Check for unlinked inodes which can be reclaimed */
2098 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2099 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2100 ip->i_no_addr);
2101 skip_rgrp:
2102 /* Drop reservation, if we couldn't use reserved rgrp */
2103 if (gfs2_rs_active(rs))
2104 gfs2_rs_deltree(rs);
2105
2106 /* Unlock rgrp if required */
2107 if (!rg_locked)
2108 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2109 next_rgrp:
2110 /* Find the next rgrp, and continue looking */
2111 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2112 continue;
2113 if (skip)
2114 continue;
2115
2116 /* If we've scanned all the rgrps, but found no free blocks
2117 * then this checks for some less likely conditions before
2118 * trying again.
2119 */
2120 loops++;
2121 /* Check that fs hasn't grown if writing to rindex */
2122 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2123 error = gfs2_ri_update(ip);
2124 if (error)
2125 return error;
2126 }
2127 /* Flushing the log may release space */
2128 if (loops == 2)
2129 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2130 GFS2_LFC_INPLACE_RESERVE);
2131 }
2132
2133 return -ENOSPC;
2134 }
2135
2136 /**
2137 * gfs2_inplace_release - release an inplace reservation
2138 * @ip: the inode the reservation was taken out on
2139 *
2140 * Release a reservation made by gfs2_inplace_reserve().
2141 */
2142
gfs2_inplace_release(struct gfs2_inode * ip)2143 void gfs2_inplace_release(struct gfs2_inode *ip)
2144 {
2145 struct gfs2_blkreserv *rs = &ip->i_res;
2146
2147 if (gfs2_holder_initialized(&rs->rs_rgd_gh))
2148 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2149 }
2150
2151 /**
2152 * gfs2_alloc_extent - allocate an extent from a given bitmap
2153 * @rbm: the resource group information
2154 * @dinode: TRUE if the first block we allocate is for a dinode
2155 * @n: The extent length (value/result)
2156 *
2157 * Add the bitmap buffer to the transaction.
2158 * Set the found bits to @new_state to change block's allocation state.
2159 */
gfs2_alloc_extent(const struct gfs2_rbm * rbm,bool dinode,unsigned int * n)2160 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2161 unsigned int *n)
2162 {
2163 struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2164 const unsigned int elen = *n;
2165 u64 block;
2166 int ret;
2167
2168 *n = 1;
2169 block = gfs2_rbm_to_block(rbm);
2170 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2171 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2172 block++;
2173 while (*n < elen) {
2174 ret = gfs2_rbm_from_block(&pos, block);
2175 if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
2176 break;
2177 gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2178 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2179 (*n)++;
2180 block++;
2181 }
2182 }
2183
2184 /**
2185 * rgblk_free - Change alloc state of given block(s)
2186 * @sdp: the filesystem
2187 * @bstart: the start of a run of blocks to free
2188 * @blen: the length of the block run (all must lie within ONE RG!)
2189 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2190 *
2191 * Returns: Resource group containing the block(s)
2192 */
2193
rgblk_free(struct gfs2_sbd * sdp,u64 bstart,u32 blen,unsigned char new_state)2194 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2195 u32 blen, unsigned char new_state)
2196 {
2197 struct gfs2_rbm rbm;
2198 struct gfs2_bitmap *bi, *bi_prev = NULL;
2199
2200 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2201 if (!rbm.rgd) {
2202 if (gfs2_consist(sdp))
2203 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2204 return NULL;
2205 }
2206
2207 gfs2_rbm_from_block(&rbm, bstart);
2208 while (blen--) {
2209 bi = rbm_bi(&rbm);
2210 if (bi != bi_prev) {
2211 if (!bi->bi_clone) {
2212 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2213 GFP_NOFS | __GFP_NOFAIL);
2214 memcpy(bi->bi_clone + bi->bi_offset,
2215 bi->bi_bh->b_data + bi->bi_offset,
2216 bi->bi_len);
2217 }
2218 gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2219 bi_prev = bi;
2220 }
2221 gfs2_setbit(&rbm, false, new_state);
2222 gfs2_rbm_incr(&rbm);
2223 }
2224
2225 return rbm.rgd;
2226 }
2227
2228 /**
2229 * gfs2_rgrp_dump - print out an rgrp
2230 * @seq: The iterator
2231 * @gl: The glock in question
2232 *
2233 */
2234
gfs2_rgrp_dump(struct seq_file * seq,const struct gfs2_glock * gl)2235 void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2236 {
2237 struct gfs2_rgrpd *rgd = gl->gl_object;
2238 struct gfs2_blkreserv *trs;
2239 const struct rb_node *n;
2240
2241 if (rgd == NULL)
2242 return;
2243 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2244 (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2245 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2246 rgd->rd_reserved, rgd->rd_extfail_pt);
2247 spin_lock(&rgd->rd_rsspin);
2248 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2249 trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2250 dump_rs(seq, trs);
2251 }
2252 spin_unlock(&rgd->rd_rsspin);
2253 }
2254
gfs2_rgrp_error(struct gfs2_rgrpd * rgd)2255 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2256 {
2257 struct gfs2_sbd *sdp = rgd->rd_sbd;
2258 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2259 (unsigned long long)rgd->rd_addr);
2260 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2261 gfs2_rgrp_dump(NULL, rgd->rd_gl);
2262 rgd->rd_flags |= GFS2_RDF_ERROR;
2263 }
2264
2265 /**
2266 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2267 * @ip: The inode we have just allocated blocks for
2268 * @rbm: The start of the allocated blocks
2269 * @len: The extent length
2270 *
2271 * Adjusts a reservation after an allocation has taken place. If the
2272 * reservation does not match the allocation, or if it is now empty
2273 * then it is removed.
2274 */
2275
gfs2_adjust_reservation(struct gfs2_inode * ip,const struct gfs2_rbm * rbm,unsigned len)2276 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2277 const struct gfs2_rbm *rbm, unsigned len)
2278 {
2279 struct gfs2_blkreserv *rs = &ip->i_res;
2280 struct gfs2_rgrpd *rgd = rbm->rgd;
2281 unsigned rlen;
2282 u64 block;
2283 int ret;
2284
2285 spin_lock(&rgd->rd_rsspin);
2286 if (gfs2_rs_active(rs)) {
2287 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2288 block = gfs2_rbm_to_block(rbm);
2289 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2290 rlen = min(rs->rs_free, len);
2291 rs->rs_free -= rlen;
2292 rgd->rd_reserved -= rlen;
2293 trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2294 if (rs->rs_free && !ret)
2295 goto out;
2296 /* We used up our block reservation, so we should
2297 reserve more blocks next time. */
2298 atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2299 }
2300 __rs_deltree(rs);
2301 }
2302 out:
2303 spin_unlock(&rgd->rd_rsspin);
2304 }
2305
2306 /**
2307 * gfs2_set_alloc_start - Set starting point for block allocation
2308 * @rbm: The rbm which will be set to the required location
2309 * @ip: The gfs2 inode
2310 * @dinode: Flag to say if allocation includes a new inode
2311 *
2312 * This sets the starting point from the reservation if one is active
2313 * otherwise it falls back to guessing a start point based on the
2314 * inode's goal block or the last allocation point in the rgrp.
2315 */
2316
gfs2_set_alloc_start(struct gfs2_rbm * rbm,const struct gfs2_inode * ip,bool dinode)2317 static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2318 const struct gfs2_inode *ip, bool dinode)
2319 {
2320 u64 goal;
2321
2322 if (gfs2_rs_active(&ip->i_res)) {
2323 *rbm = ip->i_res.rs_rbm;
2324 return;
2325 }
2326
2327 if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2328 goal = ip->i_goal;
2329 else
2330 goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2331
2332 gfs2_rbm_from_block(rbm, goal);
2333 }
2334
2335 /**
2336 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2337 * @ip: the inode to allocate the block for
2338 * @bn: Used to return the starting block number
2339 * @nblocks: requested number of blocks/extent length (value/result)
2340 * @dinode: 1 if we're allocating a dinode block, else 0
2341 * @generation: the generation number of the inode
2342 *
2343 * Returns: 0 or error
2344 */
2345
gfs2_alloc_blocks(struct gfs2_inode * ip,u64 * bn,unsigned int * nblocks,bool dinode,u64 * generation)2346 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2347 bool dinode, u64 *generation)
2348 {
2349 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2350 struct buffer_head *dibh;
2351 struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
2352 unsigned int ndata;
2353 u64 block; /* block, within the file system scope */
2354 int error;
2355
2356 gfs2_set_alloc_start(&rbm, ip, dinode);
2357 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
2358
2359 if (error == -ENOSPC) {
2360 gfs2_set_alloc_start(&rbm, ip, dinode);
2361 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
2362 }
2363
2364 /* Since all blocks are reserved in advance, this shouldn't happen */
2365 if (error) {
2366 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2367 (unsigned long long)ip->i_no_addr, error, *nblocks,
2368 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2369 rbm.rgd->rd_extfail_pt);
2370 goto rgrp_error;
2371 }
2372
2373 gfs2_alloc_extent(&rbm, dinode, nblocks);
2374 block = gfs2_rbm_to_block(&rbm);
2375 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2376 if (gfs2_rs_active(&ip->i_res))
2377 gfs2_adjust_reservation(ip, &rbm, *nblocks);
2378 ndata = *nblocks;
2379 if (dinode)
2380 ndata--;
2381
2382 if (!dinode) {
2383 ip->i_goal = block + ndata - 1;
2384 error = gfs2_meta_inode_buffer(ip, &dibh);
2385 if (error == 0) {
2386 struct gfs2_dinode *di =
2387 (struct gfs2_dinode *)dibh->b_data;
2388 gfs2_trans_add_meta(ip->i_gl, dibh);
2389 di->di_goal_meta = di->di_goal_data =
2390 cpu_to_be64(ip->i_goal);
2391 brelse(dibh);
2392 }
2393 }
2394 if (rbm.rgd->rd_free < *nblocks) {
2395 pr_warn("nblocks=%u\n", *nblocks);
2396 goto rgrp_error;
2397 }
2398
2399 rbm.rgd->rd_free -= *nblocks;
2400 if (dinode) {
2401 rbm.rgd->rd_dinodes++;
2402 *generation = rbm.rgd->rd_igeneration++;
2403 if (*generation == 0)
2404 *generation = rbm.rgd->rd_igeneration++;
2405 }
2406
2407 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2408 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2409
2410 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2411 if (dinode)
2412 gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2413
2414 gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2415
2416 rbm.rgd->rd_free_clone -= *nblocks;
2417 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2418 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2419 *bn = block;
2420 return 0;
2421
2422 rgrp_error:
2423 gfs2_rgrp_error(rbm.rgd);
2424 return -EIO;
2425 }
2426
2427 /**
2428 * __gfs2_free_blocks - free a contiguous run of block(s)
2429 * @ip: the inode these blocks are being freed from
2430 * @bstart: first block of a run of contiguous blocks
2431 * @blen: the length of the block run
2432 * @meta: 1 if the blocks represent metadata
2433 *
2434 */
2435
__gfs2_free_blocks(struct gfs2_inode * ip,u64 bstart,u32 blen,int meta)2436 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2437 {
2438 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2439 struct gfs2_rgrpd *rgd;
2440
2441 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2442 if (!rgd)
2443 return;
2444 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2445 rgd->rd_free += blen;
2446 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2447 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2448 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2449
2450 /* Directories keep their data in the metadata address space */
2451 if (meta || ip->i_depth)
2452 gfs2_meta_wipe(ip, bstart, blen);
2453 }
2454
2455 /**
2456 * gfs2_free_meta - free a contiguous run of data block(s)
2457 * @ip: the inode these blocks are being freed from
2458 * @bstart: first block of a run of contiguous blocks
2459 * @blen: the length of the block run
2460 *
2461 */
2462
gfs2_free_meta(struct gfs2_inode * ip,u64 bstart,u32 blen)2463 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2464 {
2465 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2466
2467 __gfs2_free_blocks(ip, bstart, blen, 1);
2468 gfs2_statfs_change(sdp, 0, +blen, 0);
2469 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2470 }
2471
gfs2_unlink_di(struct inode * inode)2472 void gfs2_unlink_di(struct inode *inode)
2473 {
2474 struct gfs2_inode *ip = GFS2_I(inode);
2475 struct gfs2_sbd *sdp = GFS2_SB(inode);
2476 struct gfs2_rgrpd *rgd;
2477 u64 blkno = ip->i_no_addr;
2478
2479 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2480 if (!rgd)
2481 return;
2482 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2483 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2484 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2485 be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
2486 }
2487
gfs2_free_di(struct gfs2_rgrpd * rgd,struct gfs2_inode * ip)2488 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2489 {
2490 struct gfs2_sbd *sdp = rgd->rd_sbd;
2491 struct gfs2_rgrpd *tmp_rgd;
2492
2493 tmp_rgd = rgblk_free(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2494 if (!tmp_rgd)
2495 return;
2496 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2497
2498 if (!rgd->rd_dinodes)
2499 gfs2_consist_rgrpd(rgd);
2500 rgd->rd_dinodes--;
2501 rgd->rd_free++;
2502
2503 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2504 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2505 be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
2506
2507 gfs2_statfs_change(sdp, 0, +1, -1);
2508 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2509 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2510 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2511 }
2512
2513 /**
2514 * gfs2_check_blk_type - Check the type of a block
2515 * @sdp: The superblock
2516 * @no_addr: The block number to check
2517 * @type: The block type we are looking for
2518 *
2519 * Returns: 0 if the block type matches the expected type
2520 * -ESTALE if it doesn't match
2521 * or -ve errno if something went wrong while checking
2522 */
2523
gfs2_check_blk_type(struct gfs2_sbd * sdp,u64 no_addr,unsigned int type)2524 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2525 {
2526 struct gfs2_rgrpd *rgd;
2527 struct gfs2_holder rgd_gh;
2528 struct gfs2_rbm rbm;
2529 int error = -EINVAL;
2530
2531 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2532 if (!rgd)
2533 goto fail;
2534
2535 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2536 if (error)
2537 goto fail;
2538
2539 rbm.rgd = rgd;
2540 error = gfs2_rbm_from_block(&rbm, no_addr);
2541 WARN_ON_ONCE(error != 0);
2542
2543 if (gfs2_testbit(&rbm, false) != type)
2544 error = -ESTALE;
2545
2546 gfs2_glock_dq_uninit(&rgd_gh);
2547 fail:
2548 return error;
2549 }
2550
2551 /**
2552 * gfs2_rlist_add - add a RG to a list of RGs
2553 * @ip: the inode
2554 * @rlist: the list of resource groups
2555 * @block: the block
2556 *
2557 * Figure out what RG a block belongs to and add that RG to the list
2558 *
2559 * FIXME: Don't use NOFAIL
2560 *
2561 */
2562
gfs2_rlist_add(struct gfs2_inode * ip,struct gfs2_rgrp_list * rlist,u64 block)2563 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2564 u64 block)
2565 {
2566 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2567 struct gfs2_rgrpd *rgd;
2568 struct gfs2_rgrpd **tmp;
2569 unsigned int new_space;
2570 unsigned int x;
2571
2572 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2573 return;
2574
2575 /*
2576 * The resource group last accessed is kept in the last position.
2577 */
2578
2579 if (rlist->rl_rgrps) {
2580 rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
2581 if (rgrp_contains_block(rgd, block))
2582 return;
2583 rgd = gfs2_blk2rgrpd(sdp, block, 1);
2584 } else {
2585 rgd = ip->i_res.rs_rbm.rgd;
2586 if (!rgd || !rgrp_contains_block(rgd, block))
2587 rgd = gfs2_blk2rgrpd(sdp, block, 1);
2588 }
2589
2590 if (!rgd) {
2591 fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
2592 (unsigned long long)block);
2593 return;
2594 }
2595
2596 for (x = 0; x < rlist->rl_rgrps; x++) {
2597 if (rlist->rl_rgd[x] == rgd) {
2598 swap(rlist->rl_rgd[x],
2599 rlist->rl_rgd[rlist->rl_rgrps - 1]);
2600 return;
2601 }
2602 }
2603
2604 if (rlist->rl_rgrps == rlist->rl_space) {
2605 new_space = rlist->rl_space + 10;
2606
2607 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2608 GFP_NOFS | __GFP_NOFAIL);
2609
2610 if (rlist->rl_rgd) {
2611 memcpy(tmp, rlist->rl_rgd,
2612 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2613 kfree(rlist->rl_rgd);
2614 }
2615
2616 rlist->rl_space = new_space;
2617 rlist->rl_rgd = tmp;
2618 }
2619
2620 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2621 }
2622
2623 /**
2624 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2625 * and initialize an array of glock holders for them
2626 * @rlist: the list of resource groups
2627 * @state: the lock state to acquire the RG lock in
2628 *
2629 * FIXME: Don't use NOFAIL
2630 *
2631 */
2632
gfs2_rlist_alloc(struct gfs2_rgrp_list * rlist,unsigned int state)2633 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2634 {
2635 unsigned int x;
2636
2637 rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
2638 sizeof(struct gfs2_holder),
2639 GFP_NOFS | __GFP_NOFAIL);
2640 for (x = 0; x < rlist->rl_rgrps; x++)
2641 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2642 state, 0,
2643 &rlist->rl_ghs[x]);
2644 }
2645
2646 /**
2647 * gfs2_rlist_free - free a resource group list
2648 * @rlist: the list of resource groups
2649 *
2650 */
2651
gfs2_rlist_free(struct gfs2_rgrp_list * rlist)2652 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2653 {
2654 unsigned int x;
2655
2656 kfree(rlist->rl_rgd);
2657
2658 if (rlist->rl_ghs) {
2659 for (x = 0; x < rlist->rl_rgrps; x++)
2660 gfs2_holder_uninit(&rlist->rl_ghs[x]);
2661 kfree(rlist->rl_ghs);
2662 rlist->rl_ghs = NULL;
2663 }
2664 }
2665
2666