1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * suballoc.c
4   *
5   * metadata alloc and free
6   * Inspired by ext3 block groups.
7   *
8   * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9   */
10  
11  #include <linux/fs.h>
12  #include <linux/types.h>
13  #include <linux/slab.h>
14  #include <linux/highmem.h>
15  
16  #include <cluster/masklog.h>
17  
18  #include "ocfs2.h"
19  
20  #include "alloc.h"
21  #include "blockcheck.h"
22  #include "dlmglue.h"
23  #include "inode.h"
24  #include "journal.h"
25  #include "localalloc.h"
26  #include "suballoc.h"
27  #include "super.h"
28  #include "sysfile.h"
29  #include "uptodate.h"
30  #include "ocfs2_trace.h"
31  
32  #include "buffer_head_io.h"
33  
34  #define NOT_ALLOC_NEW_GROUP		0
35  #define ALLOC_NEW_GROUP			0x1
36  #define ALLOC_GROUPS_FROM_GLOBAL	0x2
37  
38  #define OCFS2_MAX_TO_STEAL		1024
39  
40  struct ocfs2_suballoc_result {
41  	u64		sr_bg_blkno;	/* The bg we allocated from.  Set
42  					   to 0 when a block group is
43  					   contiguous. */
44  	u64		sr_bg_stable_blkno; /*
45  					     * Doesn't change, always
46  					     * set to target block
47  					     * group descriptor
48  					     * block.
49  					     */
50  	u64		sr_blkno;	/* The first allocated block */
51  	unsigned int	sr_bit_offset;	/* The bit in the bg */
52  	unsigned int	sr_bits;	/* How many bits we claimed */
53  };
54  
ocfs2_group_from_res(struct ocfs2_suballoc_result * res)55  static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
56  {
57  	if (res->sr_blkno == 0)
58  		return 0;
59  
60  	if (res->sr_bg_blkno)
61  		return res->sr_bg_blkno;
62  
63  	return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
64  }
65  
66  static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
67  static int ocfs2_block_group_fill(handle_t *handle,
68  				  struct inode *alloc_inode,
69  				  struct buffer_head *bg_bh,
70  				  u64 group_blkno,
71  				  unsigned int group_clusters,
72  				  u16 my_chain,
73  				  struct ocfs2_chain_list *cl);
74  static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
75  				   struct inode *alloc_inode,
76  				   struct buffer_head *bh,
77  				   u64 max_block,
78  				   u64 *last_alloc_group,
79  				   int flags);
80  
81  static int ocfs2_cluster_group_search(struct inode *inode,
82  				      struct buffer_head *group_bh,
83  				      u32 bits_wanted, u32 min_bits,
84  				      u64 max_block,
85  				      struct ocfs2_suballoc_result *res);
86  static int ocfs2_block_group_search(struct inode *inode,
87  				    struct buffer_head *group_bh,
88  				    u32 bits_wanted, u32 min_bits,
89  				    u64 max_block,
90  				    struct ocfs2_suballoc_result *res);
91  static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
92  				     handle_t *handle,
93  				     u32 bits_wanted,
94  				     u32 min_bits,
95  				     struct ocfs2_suballoc_result *res);
96  static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
97  					 int nr);
98  static int ocfs2_relink_block_group(handle_t *handle,
99  				    struct inode *alloc_inode,
100  				    struct buffer_head *fe_bh,
101  				    struct buffer_head *bg_bh,
102  				    struct buffer_head *prev_bg_bh,
103  				    u16 chain);
104  static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
105  						     u32 wanted);
106  static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
107  						   u64 bg_blkno,
108  						   u16 bg_bit_off);
109  static inline void ocfs2_block_to_cluster_group(struct inode *inode,
110  						u64 data_blkno,
111  						u64 *bg_blkno,
112  						u16 *bg_bit_off);
113  static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
114  					     u32 bits_wanted, u64 max_block,
115  					     int flags,
116  					     struct ocfs2_alloc_context **ac);
117  
ocfs2_free_ac_resource(struct ocfs2_alloc_context * ac)118  void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
119  {
120  	struct inode *inode = ac->ac_inode;
121  
122  	if (inode) {
123  		if (ac->ac_which != OCFS2_AC_USE_LOCAL)
124  			ocfs2_inode_unlock(inode, 1);
125  
126  		inode_unlock(inode);
127  
128  		iput(inode);
129  		ac->ac_inode = NULL;
130  	}
131  	brelse(ac->ac_bh);
132  	ac->ac_bh = NULL;
133  	ac->ac_resv = NULL;
134  	kfree(ac->ac_find_loc_priv);
135  	ac->ac_find_loc_priv = NULL;
136  }
137  
ocfs2_free_alloc_context(struct ocfs2_alloc_context * ac)138  void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
139  {
140  	ocfs2_free_ac_resource(ac);
141  	kfree(ac);
142  }
143  
ocfs2_bits_per_group(struct ocfs2_chain_list * cl)144  static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
145  {
146  	return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
147  }
148  
149  #define do_error(fmt, ...)						\
150  do {									\
151  	if (resize)							\
152  		mlog(ML_ERROR, fmt, ##__VA_ARGS__);			\
153  	else								\
154  		return ocfs2_error(sb, fmt, ##__VA_ARGS__);		\
155  } while (0)
156  
ocfs2_validate_gd_self(struct super_block * sb,struct buffer_head * bh,int resize)157  static int ocfs2_validate_gd_self(struct super_block *sb,
158  				  struct buffer_head *bh,
159  				  int resize)
160  {
161  	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
162  
163  	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
164  		do_error("Group descriptor #%llu has bad signature %.*s\n",
165  			 (unsigned long long)bh->b_blocknr, 7,
166  			 gd->bg_signature);
167  	}
168  
169  	if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
170  		do_error("Group descriptor #%llu has an invalid bg_blkno of %llu\n",
171  			 (unsigned long long)bh->b_blocknr,
172  			 (unsigned long long)le64_to_cpu(gd->bg_blkno));
173  	}
174  
175  	if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
176  		do_error("Group descriptor #%llu has an invalid fs_generation of #%u\n",
177  			 (unsigned long long)bh->b_blocknr,
178  			 le32_to_cpu(gd->bg_generation));
179  	}
180  
181  	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
182  		do_error("Group descriptor #%llu has bit count %u but claims that %u are free\n",
183  			 (unsigned long long)bh->b_blocknr,
184  			 le16_to_cpu(gd->bg_bits),
185  			 le16_to_cpu(gd->bg_free_bits_count));
186  	}
187  
188  	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
189  		do_error("Group descriptor #%llu has bit count %u but max bitmap bits of %u\n",
190  			 (unsigned long long)bh->b_blocknr,
191  			 le16_to_cpu(gd->bg_bits),
192  			 8 * le16_to_cpu(gd->bg_size));
193  	}
194  
195  	return 0;
196  }
197  
ocfs2_validate_gd_parent(struct super_block * sb,struct ocfs2_dinode * di,struct buffer_head * bh,int resize)198  static int ocfs2_validate_gd_parent(struct super_block *sb,
199  				    struct ocfs2_dinode *di,
200  				    struct buffer_head *bh,
201  				    int resize)
202  {
203  	unsigned int max_bits;
204  	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
205  
206  	if (di->i_blkno != gd->bg_parent_dinode) {
207  		do_error("Group descriptor #%llu has bad parent pointer (%llu, expected %llu)\n",
208  			 (unsigned long long)bh->b_blocknr,
209  			 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
210  			 (unsigned long long)le64_to_cpu(di->i_blkno));
211  	}
212  
213  	max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
214  	if (le16_to_cpu(gd->bg_bits) > max_bits) {
215  		do_error("Group descriptor #%llu has bit count of %u\n",
216  			 (unsigned long long)bh->b_blocknr,
217  			 le16_to_cpu(gd->bg_bits));
218  	}
219  
220  	/* In resize, we may meet the case bg_chain == cl_next_free_rec. */
221  	if ((le16_to_cpu(gd->bg_chain) >
222  	     le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
223  	    ((le16_to_cpu(gd->bg_chain) ==
224  	     le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
225  		do_error("Group descriptor #%llu has bad chain %u\n",
226  			 (unsigned long long)bh->b_blocknr,
227  			 le16_to_cpu(gd->bg_chain));
228  	}
229  
230  	return 0;
231  }
232  
233  #undef do_error
234  
235  /*
236   * This version only prints errors.  It does not fail the filesystem, and
237   * exists only for resize.
238   */
ocfs2_check_group_descriptor(struct super_block * sb,struct ocfs2_dinode * di,struct buffer_head * bh)239  int ocfs2_check_group_descriptor(struct super_block *sb,
240  				 struct ocfs2_dinode *di,
241  				 struct buffer_head *bh)
242  {
243  	int rc;
244  	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
245  
246  	BUG_ON(!buffer_uptodate(bh));
247  
248  	/*
249  	 * If the ecc fails, we return the error but otherwise
250  	 * leave the filesystem running.  We know any error is
251  	 * local to this block.
252  	 */
253  	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
254  	if (rc) {
255  		mlog(ML_ERROR,
256  		     "Checksum failed for group descriptor %llu\n",
257  		     (unsigned long long)bh->b_blocknr);
258  	} else
259  		rc = ocfs2_validate_gd_self(sb, bh, 1);
260  	if (!rc)
261  		rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
262  
263  	return rc;
264  }
265  
ocfs2_validate_group_descriptor(struct super_block * sb,struct buffer_head * bh)266  static int ocfs2_validate_group_descriptor(struct super_block *sb,
267  					   struct buffer_head *bh)
268  {
269  	int rc;
270  	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
271  
272  	trace_ocfs2_validate_group_descriptor(
273  					(unsigned long long)bh->b_blocknr);
274  
275  	BUG_ON(!buffer_uptodate(bh));
276  
277  	/*
278  	 * If the ecc fails, we return the error but otherwise
279  	 * leave the filesystem running.  We know any error is
280  	 * local to this block.
281  	 */
282  	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
283  	if (rc)
284  		return rc;
285  
286  	/*
287  	 * Errors after here are fatal.
288  	 */
289  
290  	return ocfs2_validate_gd_self(sb, bh, 0);
291  }
292  
ocfs2_read_group_descriptor(struct inode * inode,struct ocfs2_dinode * di,u64 gd_blkno,struct buffer_head ** bh)293  int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
294  				u64 gd_blkno, struct buffer_head **bh)
295  {
296  	int rc;
297  	struct buffer_head *tmp = *bh;
298  
299  	rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
300  			      ocfs2_validate_group_descriptor);
301  	if (rc)
302  		goto out;
303  
304  	rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
305  	if (rc) {
306  		brelse(tmp);
307  		goto out;
308  	}
309  
310  	/* If ocfs2_read_block() got us a new bh, pass it up. */
311  	if (!*bh)
312  		*bh = tmp;
313  
314  out:
315  	return rc;
316  }
317  
ocfs2_bg_discontig_add_extent(struct ocfs2_super * osb,struct ocfs2_group_desc * bg,struct ocfs2_chain_list * cl,u64 p_blkno,unsigned int clusters)318  static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
319  					  struct ocfs2_group_desc *bg,
320  					  struct ocfs2_chain_list *cl,
321  					  u64 p_blkno, unsigned int clusters)
322  {
323  	struct ocfs2_extent_list *el = &bg->bg_list;
324  	struct ocfs2_extent_rec *rec;
325  
326  	BUG_ON(!ocfs2_supports_discontig_bg(osb));
327  	if (!el->l_next_free_rec)
328  		el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb));
329  	rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)];
330  	rec->e_blkno = cpu_to_le64(p_blkno);
331  	rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
332  				  le16_to_cpu(cl->cl_bpc));
333  	rec->e_leaf_clusters = cpu_to_le16(clusters);
334  	le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
335  	le16_add_cpu(&bg->bg_free_bits_count,
336  		     clusters * le16_to_cpu(cl->cl_bpc));
337  	le16_add_cpu(&el->l_next_free_rec, 1);
338  }
339  
ocfs2_block_group_fill(handle_t * handle,struct inode * alloc_inode,struct buffer_head * bg_bh,u64 group_blkno,unsigned int group_clusters,u16 my_chain,struct ocfs2_chain_list * cl)340  static int ocfs2_block_group_fill(handle_t *handle,
341  				  struct inode *alloc_inode,
342  				  struct buffer_head *bg_bh,
343  				  u64 group_blkno,
344  				  unsigned int group_clusters,
345  				  u16 my_chain,
346  				  struct ocfs2_chain_list *cl)
347  {
348  	int status = 0;
349  	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
350  	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
351  	struct super_block * sb = alloc_inode->i_sb;
352  
353  	if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
354  		status = ocfs2_error(alloc_inode->i_sb,
355  				     "group block (%llu) != b_blocknr (%llu)\n",
356  				     (unsigned long long)group_blkno,
357  				     (unsigned long long) bg_bh->b_blocknr);
358  		goto bail;
359  	}
360  
361  	status = ocfs2_journal_access_gd(handle,
362  					 INODE_CACHE(alloc_inode),
363  					 bg_bh,
364  					 OCFS2_JOURNAL_ACCESS_CREATE);
365  	if (status < 0) {
366  		mlog_errno(status);
367  		goto bail;
368  	}
369  
370  	memset(bg, 0, sb->s_blocksize);
371  	strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
372  	bg->bg_generation = cpu_to_le32(osb->fs_generation);
373  	bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
374  						osb->s_feature_incompat));
375  	bg->bg_chain = cpu_to_le16(my_chain);
376  	bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
377  	bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
378  	bg->bg_blkno = cpu_to_le64(group_blkno);
379  	if (group_clusters == le16_to_cpu(cl->cl_cpg))
380  		bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
381  	else
382  		ocfs2_bg_discontig_add_extent(osb, bg, cl, group_blkno,
383  					      group_clusters);
384  
385  	/* set the 1st bit in the bitmap to account for the descriptor block */
386  	ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
387  	bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
388  
389  	ocfs2_journal_dirty(handle, bg_bh);
390  
391  	/* There is no need to zero out or otherwise initialize the
392  	 * other blocks in a group - All valid FS metadata in a block
393  	 * group stores the superblock fs_generation value at
394  	 * allocation time. */
395  
396  bail:
397  	if (status)
398  		mlog_errno(status);
399  	return status;
400  }
401  
ocfs2_find_smallest_chain(struct ocfs2_chain_list * cl)402  static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
403  {
404  	u16 curr, best;
405  
406  	best = curr = 0;
407  	while (curr < le16_to_cpu(cl->cl_count)) {
408  		if (le32_to_cpu(cl->cl_recs[best].c_total) >
409  		    le32_to_cpu(cl->cl_recs[curr].c_total))
410  			best = curr;
411  		curr++;
412  	}
413  	return best;
414  }
415  
416  static struct buffer_head *
ocfs2_block_group_alloc_contig(struct ocfs2_super * osb,handle_t * handle,struct inode * alloc_inode,struct ocfs2_alloc_context * ac,struct ocfs2_chain_list * cl)417  ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
418  			       struct inode *alloc_inode,
419  			       struct ocfs2_alloc_context *ac,
420  			       struct ocfs2_chain_list *cl)
421  {
422  	int status;
423  	u32 bit_off, num_bits;
424  	u64 bg_blkno;
425  	struct buffer_head *bg_bh;
426  	unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
427  
428  	status = ocfs2_claim_clusters(handle, ac,
429  				      le16_to_cpu(cl->cl_cpg), &bit_off,
430  				      &num_bits);
431  	if (status < 0) {
432  		if (status != -ENOSPC)
433  			mlog_errno(status);
434  		goto bail;
435  	}
436  
437  	/* setup the group */
438  	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
439  	trace_ocfs2_block_group_alloc_contig(
440  	     (unsigned long long)bg_blkno, alloc_rec);
441  
442  	bg_bh = sb_getblk(osb->sb, bg_blkno);
443  	if (!bg_bh) {
444  		status = -ENOMEM;
445  		mlog_errno(status);
446  		goto bail;
447  	}
448  	ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
449  
450  	status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
451  					bg_blkno, num_bits, alloc_rec, cl);
452  	if (status < 0) {
453  		brelse(bg_bh);
454  		mlog_errno(status);
455  	}
456  
457  bail:
458  	return status ? ERR_PTR(status) : bg_bh;
459  }
460  
ocfs2_block_group_claim_bits(struct ocfs2_super * osb,handle_t * handle,struct ocfs2_alloc_context * ac,unsigned int min_bits,u32 * bit_off,u32 * num_bits)461  static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb,
462  					handle_t *handle,
463  					struct ocfs2_alloc_context *ac,
464  					unsigned int min_bits,
465  					u32 *bit_off, u32 *num_bits)
466  {
467  	int status = 0;
468  
469  	while (min_bits) {
470  		status = ocfs2_claim_clusters(handle, ac, min_bits,
471  					      bit_off, num_bits);
472  		if (status != -ENOSPC)
473  			break;
474  
475  		min_bits >>= 1;
476  	}
477  
478  	return status;
479  }
480  
ocfs2_block_group_grow_discontig(handle_t * handle,struct inode * alloc_inode,struct buffer_head * bg_bh,struct ocfs2_alloc_context * ac,struct ocfs2_chain_list * cl,unsigned int min_bits)481  static int ocfs2_block_group_grow_discontig(handle_t *handle,
482  					    struct inode *alloc_inode,
483  					    struct buffer_head *bg_bh,
484  					    struct ocfs2_alloc_context *ac,
485  					    struct ocfs2_chain_list *cl,
486  					    unsigned int min_bits)
487  {
488  	int status;
489  	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
490  	struct ocfs2_group_desc *bg =
491  		(struct ocfs2_group_desc *)bg_bh->b_data;
492  	unsigned int needed = le16_to_cpu(cl->cl_cpg) -
493  			 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
494  	u32 p_cpos, clusters;
495  	u64 p_blkno;
496  	struct ocfs2_extent_list *el = &bg->bg_list;
497  
498  	status = ocfs2_journal_access_gd(handle,
499  					 INODE_CACHE(alloc_inode),
500  					 bg_bh,
501  					 OCFS2_JOURNAL_ACCESS_CREATE);
502  	if (status < 0) {
503  		mlog_errno(status);
504  		goto bail;
505  	}
506  
507  	while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) <
508  				le16_to_cpu(el->l_count))) {
509  		if (min_bits > needed)
510  			min_bits = needed;
511  		status = ocfs2_block_group_claim_bits(osb, handle, ac,
512  						      min_bits, &p_cpos,
513  						      &clusters);
514  		if (status < 0) {
515  			if (status != -ENOSPC)
516  				mlog_errno(status);
517  			goto bail;
518  		}
519  		p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos);
520  		ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno,
521  					      clusters);
522  
523  		min_bits = clusters;
524  		needed = le16_to_cpu(cl->cl_cpg) -
525  			 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
526  	}
527  
528  	if (needed > 0) {
529  		/*
530  		 * We have used up all the extent rec but can't fill up
531  		 * the cpg. So bail out.
532  		 */
533  		status = -ENOSPC;
534  		goto bail;
535  	}
536  
537  	ocfs2_journal_dirty(handle, bg_bh);
538  
539  bail:
540  	return status;
541  }
542  
ocfs2_bg_alloc_cleanup(handle_t * handle,struct ocfs2_alloc_context * cluster_ac,struct inode * alloc_inode,struct buffer_head * bg_bh)543  static void ocfs2_bg_alloc_cleanup(handle_t *handle,
544  				   struct ocfs2_alloc_context *cluster_ac,
545  				   struct inode *alloc_inode,
546  				   struct buffer_head *bg_bh)
547  {
548  	int i, ret;
549  	struct ocfs2_group_desc *bg;
550  	struct ocfs2_extent_list *el;
551  	struct ocfs2_extent_rec *rec;
552  
553  	if (!bg_bh)
554  		return;
555  
556  	bg = (struct ocfs2_group_desc *)bg_bh->b_data;
557  	el = &bg->bg_list;
558  	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
559  		rec = &el->l_recs[i];
560  		ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
561  					  cluster_ac->ac_bh,
562  					  le64_to_cpu(rec->e_blkno),
563  					  le16_to_cpu(rec->e_leaf_clusters));
564  		if (ret)
565  			mlog_errno(ret);
566  		/* Try all the clusters to free */
567  	}
568  
569  	ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh);
570  	brelse(bg_bh);
571  }
572  
573  static struct buffer_head *
ocfs2_block_group_alloc_discontig(handle_t * handle,struct inode * alloc_inode,struct ocfs2_alloc_context * ac,struct ocfs2_chain_list * cl)574  ocfs2_block_group_alloc_discontig(handle_t *handle,
575  				  struct inode *alloc_inode,
576  				  struct ocfs2_alloc_context *ac,
577  				  struct ocfs2_chain_list *cl)
578  {
579  	int status;
580  	u32 bit_off, num_bits;
581  	u64 bg_blkno;
582  	unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1;
583  	struct buffer_head *bg_bh = NULL;
584  	unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
585  	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
586  
587  	if (!ocfs2_supports_discontig_bg(osb)) {
588  		status = -ENOSPC;
589  		goto bail;
590  	}
591  
592  	status = ocfs2_extend_trans(handle,
593  				    ocfs2_calc_bg_discontig_credits(osb->sb));
594  	if (status) {
595  		mlog_errno(status);
596  		goto bail;
597  	}
598  
599  	/*
600  	 * We're going to be grabbing from multiple cluster groups.
601  	 * We don't have enough credits to relink them all, and the
602  	 * cluster groups will be staying in cache for the duration of
603  	 * this operation.
604  	 */
605  	ac->ac_disable_chain_relink = 1;
606  
607  	/* Claim the first region */
608  	status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
609  					      &bit_off, &num_bits);
610  	if (status < 0) {
611  		if (status != -ENOSPC)
612  			mlog_errno(status);
613  		goto bail;
614  	}
615  	min_bits = num_bits;
616  
617  	/* setup the group */
618  	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
619  	trace_ocfs2_block_group_alloc_discontig(
620  				(unsigned long long)bg_blkno, alloc_rec);
621  
622  	bg_bh = sb_getblk(osb->sb, bg_blkno);
623  	if (!bg_bh) {
624  		status = -ENOMEM;
625  		mlog_errno(status);
626  		goto bail;
627  	}
628  	ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
629  
630  	status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
631  					bg_blkno, num_bits, alloc_rec, cl);
632  	if (status < 0) {
633  		mlog_errno(status);
634  		goto bail;
635  	}
636  
637  	status = ocfs2_block_group_grow_discontig(handle, alloc_inode,
638  						  bg_bh, ac, cl, min_bits);
639  	if (status)
640  		mlog_errno(status);
641  
642  bail:
643  	if (status)
644  		ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh);
645  	return status ? ERR_PTR(status) : bg_bh;
646  }
647  
648  /*
649   * We expect the block group allocator to already be locked.
650   */
ocfs2_block_group_alloc(struct ocfs2_super * osb,struct inode * alloc_inode,struct buffer_head * bh,u64 max_block,u64 * last_alloc_group,int flags)651  static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
652  				   struct inode *alloc_inode,
653  				   struct buffer_head *bh,
654  				   u64 max_block,
655  				   u64 *last_alloc_group,
656  				   int flags)
657  {
658  	int status, credits;
659  	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
660  	struct ocfs2_chain_list *cl;
661  	struct ocfs2_alloc_context *ac = NULL;
662  	handle_t *handle = NULL;
663  	u16 alloc_rec;
664  	struct buffer_head *bg_bh = NULL;
665  	struct ocfs2_group_desc *bg;
666  
667  	BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
668  
669  	cl = &fe->id2.i_chain;
670  	status = ocfs2_reserve_clusters_with_limit(osb,
671  						   le16_to_cpu(cl->cl_cpg),
672  						   max_block, flags, &ac);
673  	if (status < 0) {
674  		if (status != -ENOSPC)
675  			mlog_errno(status);
676  		goto bail;
677  	}
678  
679  	credits = ocfs2_calc_group_alloc_credits(osb->sb,
680  						 le16_to_cpu(cl->cl_cpg));
681  	handle = ocfs2_start_trans(osb, credits);
682  	if (IS_ERR(handle)) {
683  		status = PTR_ERR(handle);
684  		handle = NULL;
685  		mlog_errno(status);
686  		goto bail;
687  	}
688  
689  	if (last_alloc_group && *last_alloc_group != 0) {
690  		trace_ocfs2_block_group_alloc(
691  				(unsigned long long)*last_alloc_group);
692  		ac->ac_last_group = *last_alloc_group;
693  	}
694  
695  	bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
696  					       ac, cl);
697  	if (PTR_ERR(bg_bh) == -ENOSPC)
698  		bg_bh = ocfs2_block_group_alloc_discontig(handle,
699  							  alloc_inode,
700  							  ac, cl);
701  	if (IS_ERR(bg_bh)) {
702  		status = PTR_ERR(bg_bh);
703  		bg_bh = NULL;
704  		if (status != -ENOSPC)
705  			mlog_errno(status);
706  		goto bail;
707  	}
708  	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
709  
710  	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
711  					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
712  	if (status < 0) {
713  		mlog_errno(status);
714  		goto bail;
715  	}
716  
717  	alloc_rec = le16_to_cpu(bg->bg_chain);
718  	le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
719  		     le16_to_cpu(bg->bg_free_bits_count));
720  	le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
721  		     le16_to_cpu(bg->bg_bits));
722  	cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
723  	if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
724  		le16_add_cpu(&cl->cl_next_free_rec, 1);
725  
726  	le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
727  					le16_to_cpu(bg->bg_free_bits_count));
728  	le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
729  	le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
730  
731  	ocfs2_journal_dirty(handle, bh);
732  
733  	spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
734  	OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
735  	fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
736  					     le32_to_cpu(fe->i_clusters)));
737  	spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
738  	i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
739  	alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
740  	ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0);
741  
742  	status = 0;
743  
744  	/* save the new last alloc group so that the caller can cache it. */
745  	if (last_alloc_group)
746  		*last_alloc_group = ac->ac_last_group;
747  
748  bail:
749  	if (handle)
750  		ocfs2_commit_trans(osb, handle);
751  
752  	if (ac)
753  		ocfs2_free_alloc_context(ac);
754  
755  	brelse(bg_bh);
756  
757  	if (status)
758  		mlog_errno(status);
759  	return status;
760  }
761  
ocfs2_reserve_suballoc_bits(struct ocfs2_super * osb,struct ocfs2_alloc_context * ac,int type,u32 slot,u64 * last_alloc_group,int flags)762  static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
763  				       struct ocfs2_alloc_context *ac,
764  				       int type,
765  				       u32 slot,
766  				       u64 *last_alloc_group,
767  				       int flags)
768  {
769  	int status;
770  	u32 bits_wanted = ac->ac_bits_wanted;
771  	struct inode *alloc_inode;
772  	struct buffer_head *bh = NULL;
773  	struct ocfs2_dinode *fe;
774  	u32 free_bits;
775  
776  	alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
777  	if (!alloc_inode) {
778  		mlog_errno(-EINVAL);
779  		return -EINVAL;
780  	}
781  
782  	inode_lock(alloc_inode);
783  
784  	status = ocfs2_inode_lock(alloc_inode, &bh, 1);
785  	if (status < 0) {
786  		inode_unlock(alloc_inode);
787  		iput(alloc_inode);
788  
789  		mlog_errno(status);
790  		return status;
791  	}
792  
793  	ac->ac_inode = alloc_inode;
794  	ac->ac_alloc_slot = slot;
795  
796  	fe = (struct ocfs2_dinode *) bh->b_data;
797  
798  	/* The bh was validated by the inode read inside
799  	 * ocfs2_inode_lock().  Any corruption is a code bug. */
800  	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
801  
802  	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
803  		status = ocfs2_error(alloc_inode->i_sb,
804  				     "Invalid chain allocator %llu\n",
805  				     (unsigned long long)le64_to_cpu(fe->i_blkno));
806  		goto bail;
807  	}
808  
809  	free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
810  		le32_to_cpu(fe->id1.bitmap1.i_used);
811  
812  	if (bits_wanted > free_bits) {
813  		/* cluster bitmap never grows */
814  		if (ocfs2_is_cluster_bitmap(alloc_inode)) {
815  			trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted,
816  								free_bits);
817  			status = -ENOSPC;
818  			goto bail;
819  		}
820  
821  		if (!(flags & ALLOC_NEW_GROUP)) {
822  			trace_ocfs2_reserve_suballoc_bits_no_new_group(
823  						slot, bits_wanted, free_bits);
824  			status = -ENOSPC;
825  			goto bail;
826  		}
827  
828  		status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
829  						 ac->ac_max_block,
830  						 last_alloc_group, flags);
831  		if (status < 0) {
832  			if (status != -ENOSPC)
833  				mlog_errno(status);
834  			goto bail;
835  		}
836  		atomic_inc(&osb->alloc_stats.bg_extends);
837  
838  		/* You should never ask for this much metadata */
839  		BUG_ON(bits_wanted >
840  		       (le32_to_cpu(fe->id1.bitmap1.i_total)
841  			- le32_to_cpu(fe->id1.bitmap1.i_used)));
842  	}
843  
844  	get_bh(bh);
845  	ac->ac_bh = bh;
846  bail:
847  	brelse(bh);
848  
849  	if (status)
850  		mlog_errno(status);
851  	return status;
852  }
853  
ocfs2_init_inode_steal_slot(struct ocfs2_super * osb)854  static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
855  {
856  	spin_lock(&osb->osb_lock);
857  	osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
858  	spin_unlock(&osb->osb_lock);
859  	atomic_set(&osb->s_num_inodes_stolen, 0);
860  }
861  
ocfs2_init_meta_steal_slot(struct ocfs2_super * osb)862  static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb)
863  {
864  	spin_lock(&osb->osb_lock);
865  	osb->s_meta_steal_slot = OCFS2_INVALID_SLOT;
866  	spin_unlock(&osb->osb_lock);
867  	atomic_set(&osb->s_num_meta_stolen, 0);
868  }
869  
ocfs2_init_steal_slots(struct ocfs2_super * osb)870  void ocfs2_init_steal_slots(struct ocfs2_super *osb)
871  {
872  	ocfs2_init_inode_steal_slot(osb);
873  	ocfs2_init_meta_steal_slot(osb);
874  }
875  
__ocfs2_set_steal_slot(struct ocfs2_super * osb,int slot,int type)876  static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
877  {
878  	spin_lock(&osb->osb_lock);
879  	if (type == INODE_ALLOC_SYSTEM_INODE)
880  		osb->s_inode_steal_slot = (u16)slot;
881  	else if (type == EXTENT_ALLOC_SYSTEM_INODE)
882  		osb->s_meta_steal_slot = (u16)slot;
883  	spin_unlock(&osb->osb_lock);
884  }
885  
__ocfs2_get_steal_slot(struct ocfs2_super * osb,int type)886  static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type)
887  {
888  	int slot = OCFS2_INVALID_SLOT;
889  
890  	spin_lock(&osb->osb_lock);
891  	if (type == INODE_ALLOC_SYSTEM_INODE)
892  		slot = osb->s_inode_steal_slot;
893  	else if (type == EXTENT_ALLOC_SYSTEM_INODE)
894  		slot = osb->s_meta_steal_slot;
895  	spin_unlock(&osb->osb_lock);
896  
897  	return slot;
898  }
899  
ocfs2_get_inode_steal_slot(struct ocfs2_super * osb)900  static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
901  {
902  	return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE);
903  }
904  
ocfs2_get_meta_steal_slot(struct ocfs2_super * osb)905  static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb)
906  {
907  	return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE);
908  }
909  
ocfs2_steal_resource(struct ocfs2_super * osb,struct ocfs2_alloc_context * ac,int type)910  static int ocfs2_steal_resource(struct ocfs2_super *osb,
911  				struct ocfs2_alloc_context *ac,
912  				int type)
913  {
914  	int i, status = -ENOSPC;
915  	int slot = __ocfs2_get_steal_slot(osb, type);
916  
917  	/* Start to steal resource from the first slot after ours. */
918  	if (slot == OCFS2_INVALID_SLOT)
919  		slot = osb->slot_num + 1;
920  
921  	for (i = 0; i < osb->max_slots; i++, slot++) {
922  		if (slot == osb->max_slots)
923  			slot = 0;
924  
925  		if (slot == osb->slot_num)
926  			continue;
927  
928  		status = ocfs2_reserve_suballoc_bits(osb, ac,
929  						     type,
930  						     (u32)slot, NULL,
931  						     NOT_ALLOC_NEW_GROUP);
932  		if (status >= 0) {
933  			__ocfs2_set_steal_slot(osb, slot, type);
934  			break;
935  		}
936  
937  		ocfs2_free_ac_resource(ac);
938  	}
939  
940  	return status;
941  }
942  
ocfs2_steal_inode(struct ocfs2_super * osb,struct ocfs2_alloc_context * ac)943  static int ocfs2_steal_inode(struct ocfs2_super *osb,
944  			     struct ocfs2_alloc_context *ac)
945  {
946  	return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE);
947  }
948  
ocfs2_steal_meta(struct ocfs2_super * osb,struct ocfs2_alloc_context * ac)949  static int ocfs2_steal_meta(struct ocfs2_super *osb,
950  			    struct ocfs2_alloc_context *ac)
951  {
952  	return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE);
953  }
954  
ocfs2_reserve_new_metadata_blocks(struct ocfs2_super * osb,int blocks,struct ocfs2_alloc_context ** ac)955  int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
956  				      int blocks,
957  				      struct ocfs2_alloc_context **ac)
958  {
959  	int status;
960  	int slot = ocfs2_get_meta_steal_slot(osb);
961  
962  	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
963  	if (!(*ac)) {
964  		status = -ENOMEM;
965  		mlog_errno(status);
966  		goto bail;
967  	}
968  
969  	(*ac)->ac_bits_wanted = blocks;
970  	(*ac)->ac_which = OCFS2_AC_USE_META;
971  	(*ac)->ac_group_search = ocfs2_block_group_search;
972  
973  	if (slot != OCFS2_INVALID_SLOT &&
974  		atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL)
975  		goto extent_steal;
976  
977  	atomic_set(&osb->s_num_meta_stolen, 0);
978  	status = ocfs2_reserve_suballoc_bits(osb, (*ac),
979  					     EXTENT_ALLOC_SYSTEM_INODE,
980  					     (u32)osb->slot_num, NULL,
981  					     ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP);
982  
983  
984  	if (status >= 0) {
985  		status = 0;
986  		if (slot != OCFS2_INVALID_SLOT)
987  			ocfs2_init_meta_steal_slot(osb);
988  		goto bail;
989  	} else if (status < 0 && status != -ENOSPC) {
990  		mlog_errno(status);
991  		goto bail;
992  	}
993  
994  	ocfs2_free_ac_resource(*ac);
995  
996  extent_steal:
997  	status = ocfs2_steal_meta(osb, *ac);
998  	atomic_inc(&osb->s_num_meta_stolen);
999  	if (status < 0) {
1000  		if (status != -ENOSPC)
1001  			mlog_errno(status);
1002  		goto bail;
1003  	}
1004  
1005  	status = 0;
1006  bail:
1007  	if ((status < 0) && *ac) {
1008  		ocfs2_free_alloc_context(*ac);
1009  		*ac = NULL;
1010  	}
1011  
1012  	if (status)
1013  		mlog_errno(status);
1014  	return status;
1015  }
1016  
ocfs2_reserve_new_metadata(struct ocfs2_super * osb,struct ocfs2_extent_list * root_el,struct ocfs2_alloc_context ** ac)1017  int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
1018  			       struct ocfs2_extent_list *root_el,
1019  			       struct ocfs2_alloc_context **ac)
1020  {
1021  	return ocfs2_reserve_new_metadata_blocks(osb,
1022  					ocfs2_extend_meta_needed(root_el),
1023  					ac);
1024  }
1025  
ocfs2_reserve_new_inode(struct ocfs2_super * osb,struct ocfs2_alloc_context ** ac)1026  int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
1027  			    struct ocfs2_alloc_context **ac)
1028  {
1029  	int status;
1030  	int slot = ocfs2_get_inode_steal_slot(osb);
1031  	u64 alloc_group;
1032  
1033  	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
1034  	if (!(*ac)) {
1035  		status = -ENOMEM;
1036  		mlog_errno(status);
1037  		goto bail;
1038  	}
1039  
1040  	(*ac)->ac_bits_wanted = 1;
1041  	(*ac)->ac_which = OCFS2_AC_USE_INODE;
1042  
1043  	(*ac)->ac_group_search = ocfs2_block_group_search;
1044  
1045  	/*
1046  	 * stat(2) can't handle i_ino > 32bits, so we tell the
1047  	 * lower levels not to allocate us a block group past that
1048  	 * limit.  The 'inode64' mount option avoids this behavior.
1049  	 */
1050  	if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
1051  		(*ac)->ac_max_block = (u32)~0U;
1052  
1053  	/*
1054  	 * slot is set when we successfully steal inode from other nodes.
1055  	 * It is reset in 3 places:
1056  	 * 1. when we flush the truncate log
1057  	 * 2. when we complete local alloc recovery.
1058  	 * 3. when we successfully allocate from our own slot.
1059  	 * After it is set, we will go on stealing inodes until we find the
1060  	 * need to check our slots to see whether there is some space for us.
1061  	 */
1062  	if (slot != OCFS2_INVALID_SLOT &&
1063  	    atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL)
1064  		goto inode_steal;
1065  
1066  	atomic_set(&osb->s_num_inodes_stolen, 0);
1067  	alloc_group = osb->osb_inode_alloc_group;
1068  	status = ocfs2_reserve_suballoc_bits(osb, *ac,
1069  					     INODE_ALLOC_SYSTEM_INODE,
1070  					     (u32)osb->slot_num,
1071  					     &alloc_group,
1072  					     ALLOC_NEW_GROUP |
1073  					     ALLOC_GROUPS_FROM_GLOBAL);
1074  	if (status >= 0) {
1075  		status = 0;
1076  
1077  		spin_lock(&osb->osb_lock);
1078  		osb->osb_inode_alloc_group = alloc_group;
1079  		spin_unlock(&osb->osb_lock);
1080  		trace_ocfs2_reserve_new_inode_new_group(
1081  			(unsigned long long)alloc_group);
1082  
1083  		/*
1084  		 * Some inodes must be freed by us, so try to allocate
1085  		 * from our own next time.
1086  		 */
1087  		if (slot != OCFS2_INVALID_SLOT)
1088  			ocfs2_init_inode_steal_slot(osb);
1089  		goto bail;
1090  	} else if (status < 0 && status != -ENOSPC) {
1091  		mlog_errno(status);
1092  		goto bail;
1093  	}
1094  
1095  	ocfs2_free_ac_resource(*ac);
1096  
1097  inode_steal:
1098  	status = ocfs2_steal_inode(osb, *ac);
1099  	atomic_inc(&osb->s_num_inodes_stolen);
1100  	if (status < 0) {
1101  		if (status != -ENOSPC)
1102  			mlog_errno(status);
1103  		goto bail;
1104  	}
1105  
1106  	status = 0;
1107  bail:
1108  	if ((status < 0) && *ac) {
1109  		ocfs2_free_alloc_context(*ac);
1110  		*ac = NULL;
1111  	}
1112  
1113  	if (status)
1114  		mlog_errno(status);
1115  	return status;
1116  }
1117  
1118  /* local alloc code has to do the same thing, so rather than do this
1119   * twice.. */
ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super * osb,struct ocfs2_alloc_context * ac)1120  int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
1121  				      struct ocfs2_alloc_context *ac)
1122  {
1123  	int status;
1124  
1125  	ac->ac_which = OCFS2_AC_USE_MAIN;
1126  	ac->ac_group_search = ocfs2_cluster_group_search;
1127  
1128  	status = ocfs2_reserve_suballoc_bits(osb, ac,
1129  					     GLOBAL_BITMAP_SYSTEM_INODE,
1130  					     OCFS2_INVALID_SLOT, NULL,
1131  					     ALLOC_NEW_GROUP);
1132  	if (status < 0 && status != -ENOSPC)
1133  		mlog_errno(status);
1134  
1135  	return status;
1136  }
1137  
1138  /* Callers don't need to care which bitmap (local alloc or main) to
1139   * use so we figure it out for them, but unfortunately this clutters
1140   * things a bit. */
ocfs2_reserve_clusters_with_limit(struct ocfs2_super * osb,u32 bits_wanted,u64 max_block,int flags,struct ocfs2_alloc_context ** ac)1141  static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
1142  					     u32 bits_wanted, u64 max_block,
1143  					     int flags,
1144  					     struct ocfs2_alloc_context **ac)
1145  {
1146  	int status, ret = 0;
1147  	int retried = 0;
1148  
1149  	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
1150  	if (!(*ac)) {
1151  		status = -ENOMEM;
1152  		mlog_errno(status);
1153  		goto bail;
1154  	}
1155  
1156  	(*ac)->ac_bits_wanted = bits_wanted;
1157  	(*ac)->ac_max_block = max_block;
1158  
1159  	status = -ENOSPC;
1160  	if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
1161  	    ocfs2_alloc_should_use_local(osb, bits_wanted)) {
1162  		status = ocfs2_reserve_local_alloc_bits(osb,
1163  							bits_wanted,
1164  							*ac);
1165  		if ((status < 0) && (status != -ENOSPC)) {
1166  			mlog_errno(status);
1167  			goto bail;
1168  		}
1169  	}
1170  
1171  	if (status == -ENOSPC) {
1172  retry:
1173  		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
1174  		/* Retry if there is sufficient space cached in truncate log */
1175  		if (status == -ENOSPC && !retried) {
1176  			retried = 1;
1177  			ocfs2_inode_unlock((*ac)->ac_inode, 1);
1178  			inode_unlock((*ac)->ac_inode);
1179  
1180  			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
1181  			if (ret == 1) {
1182  				iput((*ac)->ac_inode);
1183  				(*ac)->ac_inode = NULL;
1184  				goto retry;
1185  			}
1186  
1187  			if (ret < 0)
1188  				mlog_errno(ret);
1189  
1190  			inode_lock((*ac)->ac_inode);
1191  			ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
1192  			if (ret < 0) {
1193  				mlog_errno(ret);
1194  				inode_unlock((*ac)->ac_inode);
1195  				iput((*ac)->ac_inode);
1196  				(*ac)->ac_inode = NULL;
1197  				goto bail;
1198  			}
1199  		}
1200  		if (status < 0) {
1201  			if (status != -ENOSPC)
1202  				mlog_errno(status);
1203  			goto bail;
1204  		}
1205  	}
1206  
1207  	status = 0;
1208  bail:
1209  	if ((status < 0) && *ac) {
1210  		ocfs2_free_alloc_context(*ac);
1211  		*ac = NULL;
1212  	}
1213  
1214  	if (status)
1215  		mlog_errno(status);
1216  	return status;
1217  }
1218  
ocfs2_reserve_clusters(struct ocfs2_super * osb,u32 bits_wanted,struct ocfs2_alloc_context ** ac)1219  int ocfs2_reserve_clusters(struct ocfs2_super *osb,
1220  			   u32 bits_wanted,
1221  			   struct ocfs2_alloc_context **ac)
1222  {
1223  	return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
1224  						 ALLOC_NEW_GROUP, ac);
1225  }
1226  
1227  /*
1228   * More or less lifted from ext3. I'll leave their description below:
1229   *
1230   * "For ext3 allocations, we must not reuse any blocks which are
1231   * allocated in the bitmap buffer's "last committed data" copy.  This
1232   * prevents deletes from freeing up the page for reuse until we have
1233   * committed the delete transaction.
1234   *
1235   * If we didn't do this, then deleting something and reallocating it as
1236   * data would allow the old block to be overwritten before the
1237   * transaction committed (because we force data to disk before commit).
1238   * This would lead to corruption if we crashed between overwriting the
1239   * data and committing the delete.
1240   *
1241   * @@@ We may want to make this allocation behaviour conditional on
1242   * data-writes at some point, and disable it for metadata allocations or
1243   * sync-data inodes."
1244   *
1245   * Note: OCFS2 already does this differently for metadata vs data
1246   * allocations, as those bitmaps are separate and undo access is never
1247   * called on a metadata group descriptor.
1248   */
ocfs2_test_bg_bit_allocatable(struct buffer_head * bg_bh,int nr)1249  static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
1250  					 int nr)
1251  {
1252  	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
1253  	struct journal_head *jh;
1254  	int ret;
1255  
1256  	if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
1257  		return 0;
1258  
1259  	jh = jbd2_journal_grab_journal_head(bg_bh);
1260  	if (!jh)
1261  		return 1;
1262  
1263  	spin_lock(&jh->b_state_lock);
1264  	bg = (struct ocfs2_group_desc *) jh->b_committed_data;
1265  	if (bg)
1266  		ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
1267  	else
1268  		ret = 1;
1269  	spin_unlock(&jh->b_state_lock);
1270  	jbd2_journal_put_journal_head(jh);
1271  
1272  	return ret;
1273  }
1274  
ocfs2_block_group_find_clear_bits(struct ocfs2_super * osb,struct buffer_head * bg_bh,unsigned int bits_wanted,unsigned int total_bits,struct ocfs2_suballoc_result * res)1275  static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1276  					     struct buffer_head *bg_bh,
1277  					     unsigned int bits_wanted,
1278  					     unsigned int total_bits,
1279  					     struct ocfs2_suballoc_result *res)
1280  {
1281  	void *bitmap;
1282  	u16 best_offset, best_size;
1283  	int offset, start, found, status = 0;
1284  	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
1285  
1286  	/* Callers got this descriptor from
1287  	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
1288  	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1289  
1290  	found = start = best_offset = best_size = 0;
1291  	bitmap = bg->bg_bitmap;
1292  
1293  	while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
1294  		if (offset == total_bits)
1295  			break;
1296  
1297  		if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
1298  			/* We found a zero, but we can't use it as it
1299  			 * hasn't been put to disk yet! */
1300  			found = 0;
1301  			start = offset + 1;
1302  		} else if (offset == start) {
1303  			/* we found a zero */
1304  			found++;
1305  			/* move start to the next bit to test */
1306  			start++;
1307  		} else {
1308  			/* got a zero after some ones */
1309  			found = 1;
1310  			start = offset + 1;
1311  		}
1312  		if (found > best_size) {
1313  			best_size = found;
1314  			best_offset = start - found;
1315  		}
1316  		/* we got everything we needed */
1317  		if (found == bits_wanted) {
1318  			/* mlog(0, "Found it all!\n"); */
1319  			break;
1320  		}
1321  	}
1322  
1323  	if (best_size) {
1324  		res->sr_bit_offset = best_offset;
1325  		res->sr_bits = best_size;
1326  	} else {
1327  		status = -ENOSPC;
1328  		/* No error log here -- see the comment above
1329  		 * ocfs2_test_bg_bit_allocatable */
1330  	}
1331  
1332  	return status;
1333  }
1334  
ocfs2_block_group_set_bits(handle_t * handle,struct inode * alloc_inode,struct ocfs2_group_desc * bg,struct buffer_head * group_bh,unsigned int bit_off,unsigned int num_bits)1335  int ocfs2_block_group_set_bits(handle_t *handle,
1336  					     struct inode *alloc_inode,
1337  					     struct ocfs2_group_desc *bg,
1338  					     struct buffer_head *group_bh,
1339  					     unsigned int bit_off,
1340  					     unsigned int num_bits)
1341  {
1342  	int status;
1343  	void *bitmap = bg->bg_bitmap;
1344  	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1345  
1346  	/* All callers get the descriptor via
1347  	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
1348  	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1349  	BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
1350  
1351  	trace_ocfs2_block_group_set_bits(bit_off, num_bits);
1352  
1353  	if (ocfs2_is_cluster_bitmap(alloc_inode))
1354  		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1355  
1356  	status = ocfs2_journal_access_gd(handle,
1357  					 INODE_CACHE(alloc_inode),
1358  					 group_bh,
1359  					 journal_type);
1360  	if (status < 0) {
1361  		mlog_errno(status);
1362  		goto bail;
1363  	}
1364  
1365  	le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
1366  	if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
1367  		return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
1368  				   (unsigned long long)le64_to_cpu(bg->bg_blkno),
1369  				   le16_to_cpu(bg->bg_bits),
1370  				   le16_to_cpu(bg->bg_free_bits_count),
1371  				   num_bits);
1372  	}
1373  	while(num_bits--)
1374  		ocfs2_set_bit(bit_off++, bitmap);
1375  
1376  	ocfs2_journal_dirty(handle, group_bh);
1377  
1378  bail:
1379  	return status;
1380  }
1381  
1382  /* find the one with the most empty bits */
ocfs2_find_victim_chain(struct ocfs2_chain_list * cl)1383  static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
1384  {
1385  	u16 curr, best;
1386  
1387  	BUG_ON(!cl->cl_next_free_rec);
1388  
1389  	best = curr = 0;
1390  	while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
1391  		if (le32_to_cpu(cl->cl_recs[curr].c_free) >
1392  		    le32_to_cpu(cl->cl_recs[best].c_free))
1393  			best = curr;
1394  		curr++;
1395  	}
1396  
1397  	BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
1398  	return best;
1399  }
1400  
ocfs2_relink_block_group(handle_t * handle,struct inode * alloc_inode,struct buffer_head * fe_bh,struct buffer_head * bg_bh,struct buffer_head * prev_bg_bh,u16 chain)1401  static int ocfs2_relink_block_group(handle_t *handle,
1402  				    struct inode *alloc_inode,
1403  				    struct buffer_head *fe_bh,
1404  				    struct buffer_head *bg_bh,
1405  				    struct buffer_head *prev_bg_bh,
1406  				    u16 chain)
1407  {
1408  	int status;
1409  	/* there is a really tiny chance the journal calls could fail,
1410  	 * but we wouldn't want inconsistent blocks in *any* case. */
1411  	u64 bg_ptr, prev_bg_ptr;
1412  	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
1413  	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
1414  	struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
1415  
1416  	/* The caller got these descriptors from
1417  	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
1418  	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1419  	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
1420  
1421  	trace_ocfs2_relink_block_group(
1422  		(unsigned long long)le64_to_cpu(fe->i_blkno), chain,
1423  		(unsigned long long)le64_to_cpu(bg->bg_blkno),
1424  		(unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
1425  
1426  	bg_ptr = le64_to_cpu(bg->bg_next_group);
1427  	prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
1428  
1429  	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1430  					 prev_bg_bh,
1431  					 OCFS2_JOURNAL_ACCESS_WRITE);
1432  	if (status < 0)
1433  		goto out;
1434  
1435  	prev_bg->bg_next_group = bg->bg_next_group;
1436  	ocfs2_journal_dirty(handle, prev_bg_bh);
1437  
1438  	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1439  					 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1440  	if (status < 0)
1441  		goto out_rollback_prev_bg;
1442  
1443  	bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
1444  	ocfs2_journal_dirty(handle, bg_bh);
1445  
1446  	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1447  					 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1448  	if (status < 0)
1449  		goto out_rollback_bg;
1450  
1451  	fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1452  	ocfs2_journal_dirty(handle, fe_bh);
1453  
1454  out:
1455  	if (status < 0)
1456  		mlog_errno(status);
1457  	return status;
1458  
1459  out_rollback_bg:
1460  	bg->bg_next_group = cpu_to_le64(bg_ptr);
1461  out_rollback_prev_bg:
1462  	prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1463  	goto out;
1464  }
1465  
ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc * bg,u32 wanted)1466  static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
1467  						     u32 wanted)
1468  {
1469  	return le16_to_cpu(bg->bg_free_bits_count) > wanted;
1470  }
1471  
1472  /* return 0 on success, -ENOSPC to keep searching and any other < 0
1473   * value on error. */
ocfs2_cluster_group_search(struct inode * inode,struct buffer_head * group_bh,u32 bits_wanted,u32 min_bits,u64 max_block,struct ocfs2_suballoc_result * res)1474  static int ocfs2_cluster_group_search(struct inode *inode,
1475  				      struct buffer_head *group_bh,
1476  				      u32 bits_wanted, u32 min_bits,
1477  				      u64 max_block,
1478  				      struct ocfs2_suballoc_result *res)
1479  {
1480  	int search = -ENOSPC;
1481  	int ret;
1482  	u64 blkoff;
1483  	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1484  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1485  	unsigned int max_bits, gd_cluster_off;
1486  
1487  	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1488  
1489  	if (gd->bg_free_bits_count) {
1490  		max_bits = le16_to_cpu(gd->bg_bits);
1491  
1492  		/* Tail groups in cluster bitmaps which aren't cpg
1493  		 * aligned are prone to partial extension by a failed
1494  		 * fs resize. If the file system resize never got to
1495  		 * update the dinode cluster count, then we don't want
1496  		 * to trust any clusters past it, regardless of what
1497  		 * the group descriptor says. */
1498  		gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
1499  							  le64_to_cpu(gd->bg_blkno));
1500  		if ((gd_cluster_off + max_bits) >
1501  		    OCFS2_I(inode)->ip_clusters) {
1502  			max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1503  			trace_ocfs2_cluster_group_search_wrong_max_bits(
1504  				(unsigned long long)le64_to_cpu(gd->bg_blkno),
1505  				le16_to_cpu(gd->bg_bits),
1506  				OCFS2_I(inode)->ip_clusters, max_bits);
1507  		}
1508  
1509  		ret = ocfs2_block_group_find_clear_bits(osb,
1510  							group_bh, bits_wanted,
1511  							max_bits, res);
1512  		if (ret)
1513  			return ret;
1514  
1515  		if (max_block) {
1516  			blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1517  							  gd_cluster_off +
1518  							  res->sr_bit_offset +
1519  							  res->sr_bits);
1520  			trace_ocfs2_cluster_group_search_max_block(
1521  				(unsigned long long)blkoff,
1522  				(unsigned long long)max_block);
1523  			if (blkoff > max_block)
1524  				return -ENOSPC;
1525  		}
1526  
1527  		/* ocfs2_block_group_find_clear_bits() might
1528  		 * return success, but we still want to return
1529  		 * -ENOSPC unless it found the minimum number
1530  		 * of bits. */
1531  		if (min_bits <= res->sr_bits)
1532  			search = 0; /* success */
1533  		else if (res->sr_bits) {
1534  			/*
1535  			 * Don't show bits which we'll be returning
1536  			 * for allocation to the local alloc bitmap.
1537  			 */
1538  			ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
1539  		}
1540  	}
1541  
1542  	return search;
1543  }
1544  
ocfs2_block_group_search(struct inode * inode,struct buffer_head * group_bh,u32 bits_wanted,u32 min_bits,u64 max_block,struct ocfs2_suballoc_result * res)1545  static int ocfs2_block_group_search(struct inode *inode,
1546  				    struct buffer_head *group_bh,
1547  				    u32 bits_wanted, u32 min_bits,
1548  				    u64 max_block,
1549  				    struct ocfs2_suballoc_result *res)
1550  {
1551  	int ret = -ENOSPC;
1552  	u64 blkoff;
1553  	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1554  
1555  	BUG_ON(min_bits != 1);
1556  	BUG_ON(ocfs2_is_cluster_bitmap(inode));
1557  
1558  	if (bg->bg_free_bits_count) {
1559  		ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1560  							group_bh, bits_wanted,
1561  							le16_to_cpu(bg->bg_bits),
1562  							res);
1563  		if (!ret && max_block) {
1564  			blkoff = le64_to_cpu(bg->bg_blkno) +
1565  				res->sr_bit_offset + res->sr_bits;
1566  			trace_ocfs2_block_group_search_max_block(
1567  				(unsigned long long)blkoff,
1568  				(unsigned long long)max_block);
1569  			if (blkoff > max_block)
1570  				ret = -ENOSPC;
1571  		}
1572  	}
1573  
1574  	return ret;
1575  }
1576  
ocfs2_alloc_dinode_update_counts(struct inode * inode,handle_t * handle,struct buffer_head * di_bh,u32 num_bits,u16 chain)1577  int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1578  				       handle_t *handle,
1579  				       struct buffer_head *di_bh,
1580  				       u32 num_bits,
1581  				       u16 chain)
1582  {
1583  	int ret;
1584  	u32 tmp_used;
1585  	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1586  	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1587  
1588  	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1589  				      OCFS2_JOURNAL_ACCESS_WRITE);
1590  	if (ret < 0) {
1591  		mlog_errno(ret);
1592  		goto out;
1593  	}
1594  
1595  	tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1596  	di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1597  	le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1598  	ocfs2_journal_dirty(handle, di_bh);
1599  
1600  out:
1601  	return ret;
1602  }
1603  
ocfs2_rollback_alloc_dinode_counts(struct inode * inode,struct buffer_head * di_bh,u32 num_bits,u16 chain)1604  void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
1605  				       struct buffer_head *di_bh,
1606  				       u32 num_bits,
1607  				       u16 chain)
1608  {
1609  	u32 tmp_used;
1610  	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1611  	struct ocfs2_chain_list *cl;
1612  
1613  	cl = (struct ocfs2_chain_list *)&di->id2.i_chain;
1614  	tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1615  	di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits);
1616  	le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits);
1617  }
1618  
ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result * res,struct ocfs2_extent_rec * rec,struct ocfs2_chain_list * cl)1619  static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1620  					 struct ocfs2_extent_rec *rec,
1621  					 struct ocfs2_chain_list *cl)
1622  {
1623  	unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1624  	unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1625  	unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
1626  
1627  	if (res->sr_bit_offset < bitoff)
1628  		return 0;
1629  	if (res->sr_bit_offset >= (bitoff + bitcount))
1630  		return 0;
1631  	res->sr_blkno = le64_to_cpu(rec->e_blkno) +
1632  		(res->sr_bit_offset - bitoff);
1633  	if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount))
1634  		res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset;
1635  	return 1;
1636  }
1637  
ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context * ac,struct ocfs2_group_desc * bg,struct ocfs2_suballoc_result * res)1638  static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac,
1639  					  struct ocfs2_group_desc *bg,
1640  					  struct ocfs2_suballoc_result *res)
1641  {
1642  	int i;
1643  	u64 bg_blkno = res->sr_bg_blkno;  /* Save off */
1644  	struct ocfs2_extent_rec *rec;
1645  	struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1646  	struct ocfs2_chain_list *cl = &di->id2.i_chain;
1647  
1648  	if (ocfs2_is_cluster_bitmap(ac->ac_inode)) {
1649  		res->sr_blkno = 0;
1650  		return;
1651  	}
1652  
1653  	res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset;
1654  	res->sr_bg_blkno = 0;  /* Clear it for contig block groups */
1655  	if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) ||
1656  	    !bg->bg_list.l_next_free_rec)
1657  		return;
1658  
1659  	for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) {
1660  		rec = &bg->bg_list.l_recs[i];
1661  		if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) {
1662  			res->sr_bg_blkno = bg_blkno;  /* Restore */
1663  			break;
1664  		}
1665  	}
1666  }
1667  
ocfs2_search_one_group(struct ocfs2_alloc_context * ac,handle_t * handle,u32 bits_wanted,u32 min_bits,struct ocfs2_suballoc_result * res,u16 * bits_left)1668  static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1669  				  handle_t *handle,
1670  				  u32 bits_wanted,
1671  				  u32 min_bits,
1672  				  struct ocfs2_suballoc_result *res,
1673  				  u16 *bits_left)
1674  {
1675  	int ret;
1676  	struct buffer_head *group_bh = NULL;
1677  	struct ocfs2_group_desc *gd;
1678  	struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1679  	struct inode *alloc_inode = ac->ac_inode;
1680  
1681  	ret = ocfs2_read_group_descriptor(alloc_inode, di,
1682  					  res->sr_bg_blkno, &group_bh);
1683  	if (ret < 0) {
1684  		mlog_errno(ret);
1685  		return ret;
1686  	}
1687  
1688  	gd = (struct ocfs2_group_desc *) group_bh->b_data;
1689  	ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1690  				  ac->ac_max_block, res);
1691  	if (ret < 0) {
1692  		if (ret != -ENOSPC)
1693  			mlog_errno(ret);
1694  		goto out;
1695  	}
1696  
1697  	if (!ret)
1698  		ocfs2_bg_discontig_fix_result(ac, gd, res);
1699  
1700  	/*
1701  	 * sr_bg_blkno might have been changed by
1702  	 * ocfs2_bg_discontig_fix_result
1703  	 */
1704  	res->sr_bg_stable_blkno = group_bh->b_blocknr;
1705  
1706  	if (ac->ac_find_loc_only)
1707  		goto out_loc_only;
1708  
1709  	ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1710  					       res->sr_bits,
1711  					       le16_to_cpu(gd->bg_chain));
1712  	if (ret < 0) {
1713  		mlog_errno(ret);
1714  		goto out;
1715  	}
1716  
1717  	ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1718  					 res->sr_bit_offset, res->sr_bits);
1719  	if (ret < 0) {
1720  		ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
1721  					       res->sr_bits,
1722  					       le16_to_cpu(gd->bg_chain));
1723  		mlog_errno(ret);
1724  	}
1725  
1726  out_loc_only:
1727  	*bits_left = le16_to_cpu(gd->bg_free_bits_count);
1728  
1729  out:
1730  	brelse(group_bh);
1731  
1732  	return ret;
1733  }
1734  
ocfs2_search_chain(struct ocfs2_alloc_context * ac,handle_t * handle,u32 bits_wanted,u32 min_bits,struct ocfs2_suballoc_result * res,u16 * bits_left)1735  static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1736  			      handle_t *handle,
1737  			      u32 bits_wanted,
1738  			      u32 min_bits,
1739  			      struct ocfs2_suballoc_result *res,
1740  			      u16 *bits_left)
1741  {
1742  	int status;
1743  	u16 chain;
1744  	u64 next_group;
1745  	struct inode *alloc_inode = ac->ac_inode;
1746  	struct buffer_head *group_bh = NULL;
1747  	struct buffer_head *prev_group_bh = NULL;
1748  	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1749  	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1750  	struct ocfs2_group_desc *bg;
1751  
1752  	chain = ac->ac_chain;
1753  	trace_ocfs2_search_chain_begin(
1754  		(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
1755  		bits_wanted, chain);
1756  
1757  	status = ocfs2_read_group_descriptor(alloc_inode, fe,
1758  					     le64_to_cpu(cl->cl_recs[chain].c_blkno),
1759  					     &group_bh);
1760  	if (status < 0) {
1761  		mlog_errno(status);
1762  		goto bail;
1763  	}
1764  	bg = (struct ocfs2_group_desc *) group_bh->b_data;
1765  
1766  	status = -ENOSPC;
1767  	/* for now, the chain search is a bit simplistic. We just use
1768  	 * the 1st group with any empty bits. */
1769  	while ((status = ac->ac_group_search(alloc_inode, group_bh,
1770  					     bits_wanted, min_bits,
1771  					     ac->ac_max_block,
1772  					     res)) == -ENOSPC) {
1773  		if (!bg->bg_next_group)
1774  			break;
1775  
1776  		brelse(prev_group_bh);
1777  		prev_group_bh = NULL;
1778  
1779  		next_group = le64_to_cpu(bg->bg_next_group);
1780  		prev_group_bh = group_bh;
1781  		group_bh = NULL;
1782  		status = ocfs2_read_group_descriptor(alloc_inode, fe,
1783  						     next_group, &group_bh);
1784  		if (status < 0) {
1785  			mlog_errno(status);
1786  			goto bail;
1787  		}
1788  		bg = (struct ocfs2_group_desc *) group_bh->b_data;
1789  	}
1790  	if (status < 0) {
1791  		if (status != -ENOSPC)
1792  			mlog_errno(status);
1793  		goto bail;
1794  	}
1795  
1796  	trace_ocfs2_search_chain_succ(
1797  		(unsigned long long)le64_to_cpu(bg->bg_blkno), res->sr_bits);
1798  
1799  	res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
1800  
1801  	BUG_ON(res->sr_bits == 0);
1802  	if (!status)
1803  		ocfs2_bg_discontig_fix_result(ac, bg, res);
1804  
1805  	/*
1806  	 * sr_bg_blkno might have been changed by
1807  	 * ocfs2_bg_discontig_fix_result
1808  	 */
1809  	res->sr_bg_stable_blkno = group_bh->b_blocknr;
1810  
1811  	/*
1812  	 * Keep track of previous block descriptor read. When
1813  	 * we find a target, if we have read more than X
1814  	 * number of descriptors, and the target is reasonably
1815  	 * empty, relink him to top of his chain.
1816  	 *
1817  	 * We've read 0 extra blocks and only send one more to
1818  	 * the transaction, yet the next guy to search has a
1819  	 * much easier time.
1820  	 *
1821  	 * Do this *after* figuring out how many bits we're taking out
1822  	 * of our target group.
1823  	 */
1824  	if (!ac->ac_disable_chain_relink &&
1825  	    (prev_group_bh) &&
1826  	    (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
1827  		status = ocfs2_relink_block_group(handle, alloc_inode,
1828  						  ac->ac_bh, group_bh,
1829  						  prev_group_bh, chain);
1830  		if (status < 0) {
1831  			mlog_errno(status);
1832  			goto bail;
1833  		}
1834  	}
1835  
1836  	if (ac->ac_find_loc_only)
1837  		goto out_loc_only;
1838  
1839  	status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
1840  						  ac->ac_bh, res->sr_bits,
1841  						  chain);
1842  	if (status) {
1843  		mlog_errno(status);
1844  		goto bail;
1845  	}
1846  
1847  	status = ocfs2_block_group_set_bits(handle,
1848  					    alloc_inode,
1849  					    bg,
1850  					    group_bh,
1851  					    res->sr_bit_offset,
1852  					    res->sr_bits);
1853  	if (status < 0) {
1854  		ocfs2_rollback_alloc_dinode_counts(alloc_inode,
1855  					ac->ac_bh, res->sr_bits, chain);
1856  		mlog_errno(status);
1857  		goto bail;
1858  	}
1859  
1860  	trace_ocfs2_search_chain_end(
1861  			(unsigned long long)le64_to_cpu(fe->i_blkno),
1862  			res->sr_bits);
1863  
1864  out_loc_only:
1865  	*bits_left = le16_to_cpu(bg->bg_free_bits_count);
1866  bail:
1867  	brelse(group_bh);
1868  	brelse(prev_group_bh);
1869  
1870  	if (status)
1871  		mlog_errno(status);
1872  	return status;
1873  }
1874  
1875  /* will give out up to bits_wanted contiguous bits. */
ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context * ac,handle_t * handle,u32 bits_wanted,u32 min_bits,struct ocfs2_suballoc_result * res)1876  static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1877  				     handle_t *handle,
1878  				     u32 bits_wanted,
1879  				     u32 min_bits,
1880  				     struct ocfs2_suballoc_result *res)
1881  {
1882  	int status;
1883  	u16 victim, i;
1884  	u16 bits_left = 0;
1885  	u64 hint = ac->ac_last_group;
1886  	struct ocfs2_chain_list *cl;
1887  	struct ocfs2_dinode *fe;
1888  
1889  	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1890  	BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1891  	BUG_ON(!ac->ac_bh);
1892  
1893  	fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1894  
1895  	/* The bh was validated by the inode read during
1896  	 * ocfs2_reserve_suballoc_bits().  Any corruption is a code bug. */
1897  	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1898  
1899  	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1900  	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
1901  		status = ocfs2_error(ac->ac_inode->i_sb,
1902  				     "Chain allocator dinode %llu has %u used bits but only %u total\n",
1903  				     (unsigned long long)le64_to_cpu(fe->i_blkno),
1904  				     le32_to_cpu(fe->id1.bitmap1.i_used),
1905  				     le32_to_cpu(fe->id1.bitmap1.i_total));
1906  		goto bail;
1907  	}
1908  
1909  	res->sr_bg_blkno = hint;
1910  	if (res->sr_bg_blkno) {
1911  		/* Attempt to short-circuit the usual search mechanism
1912  		 * by jumping straight to the most recently used
1913  		 * allocation group. This helps us maintain some
1914  		 * contiguousness across allocations. */
1915  		status = ocfs2_search_one_group(ac, handle, bits_wanted,
1916  						min_bits, res, &bits_left);
1917  		if (!status)
1918  			goto set_hint;
1919  		if (status < 0 && status != -ENOSPC) {
1920  			mlog_errno(status);
1921  			goto bail;
1922  		}
1923  	}
1924  
1925  	cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1926  
1927  	victim = ocfs2_find_victim_chain(cl);
1928  	ac->ac_chain = victim;
1929  
1930  	status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1931  				    res, &bits_left);
1932  	if (!status) {
1933  		if (ocfs2_is_cluster_bitmap(ac->ac_inode))
1934  			hint = res->sr_bg_blkno;
1935  		else
1936  			hint = ocfs2_group_from_res(res);
1937  		goto set_hint;
1938  	}
1939  	if (status < 0 && status != -ENOSPC) {
1940  		mlog_errno(status);
1941  		goto bail;
1942  	}
1943  
1944  	trace_ocfs2_claim_suballoc_bits(victim);
1945  
1946  	/* If we didn't pick a good victim, then just default to
1947  	 * searching each chain in order. Don't allow chain relinking
1948  	 * because we only calculate enough journal credits for one
1949  	 * relink per alloc. */
1950  	ac->ac_disable_chain_relink = 1;
1951  	for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1952  		if (i == victim)
1953  			continue;
1954  		if (!cl->cl_recs[i].c_free)
1955  			continue;
1956  
1957  		ac->ac_chain = i;
1958  		status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1959  					    res, &bits_left);
1960  		if (!status) {
1961  			hint = ocfs2_group_from_res(res);
1962  			break;
1963  		}
1964  		if (status < 0 && status != -ENOSPC) {
1965  			mlog_errno(status);
1966  			goto bail;
1967  		}
1968  	}
1969  
1970  set_hint:
1971  	if (status != -ENOSPC) {
1972  		/* If the next search of this group is not likely to
1973  		 * yield a suitable extent, then we reset the last
1974  		 * group hint so as to not waste a disk read */
1975  		if (bits_left < min_bits)
1976  			ac->ac_last_group = 0;
1977  		else
1978  			ac->ac_last_group = hint;
1979  	}
1980  
1981  bail:
1982  	if (status)
1983  		mlog_errno(status);
1984  	return status;
1985  }
1986  
ocfs2_claim_metadata(handle_t * handle,struct ocfs2_alloc_context * ac,u32 bits_wanted,u64 * suballoc_loc,u16 * suballoc_bit_start,unsigned int * num_bits,u64 * blkno_start)1987  int ocfs2_claim_metadata(handle_t *handle,
1988  			 struct ocfs2_alloc_context *ac,
1989  			 u32 bits_wanted,
1990  			 u64 *suballoc_loc,
1991  			 u16 *suballoc_bit_start,
1992  			 unsigned int *num_bits,
1993  			 u64 *blkno_start)
1994  {
1995  	int status;
1996  	struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1997  
1998  	BUG_ON(!ac);
1999  	BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
2000  	BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
2001  
2002  	status = ocfs2_claim_suballoc_bits(ac,
2003  					   handle,
2004  					   bits_wanted,
2005  					   1,
2006  					   &res);
2007  	if (status < 0) {
2008  		mlog_errno(status);
2009  		goto bail;
2010  	}
2011  	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2012  
2013  	*suballoc_loc = res.sr_bg_blkno;
2014  	*suballoc_bit_start = res.sr_bit_offset;
2015  	*blkno_start = res.sr_blkno;
2016  	ac->ac_bits_given += res.sr_bits;
2017  	*num_bits = res.sr_bits;
2018  	status = 0;
2019  bail:
2020  	if (status)
2021  		mlog_errno(status);
2022  	return status;
2023  }
2024  
ocfs2_init_inode_ac_group(struct inode * dir,struct buffer_head * parent_di_bh,struct ocfs2_alloc_context * ac)2025  static void ocfs2_init_inode_ac_group(struct inode *dir,
2026  				      struct buffer_head *parent_di_bh,
2027  				      struct ocfs2_alloc_context *ac)
2028  {
2029  	struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data;
2030  	/*
2031  	 * Try to allocate inodes from some specific group.
2032  	 *
2033  	 * If the parent dir has recorded the last group used in allocation,
2034  	 * cool, use it. Otherwise if we try to allocate new inode from the
2035  	 * same slot the parent dir belongs to, use the same chunk.
2036  	 *
2037  	 * We are very careful here to avoid the mistake of setting
2038  	 * ac_last_group to a group descriptor from a different (unlocked) slot.
2039  	 */
2040  	if (OCFS2_I(dir)->ip_last_used_group &&
2041  	    OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
2042  		ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
2043  	else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) {
2044  		if (di->i_suballoc_loc)
2045  			ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc);
2046  		else
2047  			ac->ac_last_group = ocfs2_which_suballoc_group(
2048  					le64_to_cpu(di->i_blkno),
2049  					le16_to_cpu(di->i_suballoc_bit));
2050  	}
2051  }
2052  
ocfs2_save_inode_ac_group(struct inode * dir,struct ocfs2_alloc_context * ac)2053  static inline void ocfs2_save_inode_ac_group(struct inode *dir,
2054  					     struct ocfs2_alloc_context *ac)
2055  {
2056  	OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
2057  	OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
2058  }
2059  
ocfs2_find_new_inode_loc(struct inode * dir,struct buffer_head * parent_fe_bh,struct ocfs2_alloc_context * ac,u64 * fe_blkno)2060  int ocfs2_find_new_inode_loc(struct inode *dir,
2061  			     struct buffer_head *parent_fe_bh,
2062  			     struct ocfs2_alloc_context *ac,
2063  			     u64 *fe_blkno)
2064  {
2065  	int ret;
2066  	handle_t *handle = NULL;
2067  	struct ocfs2_suballoc_result *res;
2068  
2069  	BUG_ON(!ac);
2070  	BUG_ON(ac->ac_bits_given != 0);
2071  	BUG_ON(ac->ac_bits_wanted != 1);
2072  	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
2073  
2074  	res = kzalloc(sizeof(*res), GFP_NOFS);
2075  	if (res == NULL) {
2076  		ret = -ENOMEM;
2077  		mlog_errno(ret);
2078  		goto out;
2079  	}
2080  
2081  	ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
2082  
2083  	/*
2084  	 * The handle started here is for chain relink. Alternatively,
2085  	 * we could just disable relink for these calls.
2086  	 */
2087  	handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
2088  	if (IS_ERR(handle)) {
2089  		ret = PTR_ERR(handle);
2090  		handle = NULL;
2091  		mlog_errno(ret);
2092  		goto out;
2093  	}
2094  
2095  	/*
2096  	 * This will instruct ocfs2_claim_suballoc_bits and
2097  	 * ocfs2_search_one_group to search but save actual allocation
2098  	 * for later.
2099  	 */
2100  	ac->ac_find_loc_only = 1;
2101  
2102  	ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
2103  	if (ret < 0) {
2104  		mlog_errno(ret);
2105  		goto out;
2106  	}
2107  
2108  	ac->ac_find_loc_priv = res;
2109  	*fe_blkno = res->sr_blkno;
2110  	ocfs2_update_inode_fsync_trans(handle, dir, 0);
2111  out:
2112  	if (handle)
2113  		ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
2114  
2115  	if (ret)
2116  		kfree(res);
2117  
2118  	return ret;
2119  }
2120  
ocfs2_claim_new_inode_at_loc(handle_t * handle,struct inode * dir,struct ocfs2_alloc_context * ac,u64 * suballoc_loc,u16 * suballoc_bit,u64 di_blkno)2121  int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2122  				 struct inode *dir,
2123  				 struct ocfs2_alloc_context *ac,
2124  				 u64 *suballoc_loc,
2125  				 u16 *suballoc_bit,
2126  				 u64 di_blkno)
2127  {
2128  	int ret;
2129  	u16 chain;
2130  	struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
2131  	struct buffer_head *bg_bh = NULL;
2132  	struct ocfs2_group_desc *bg;
2133  	struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
2134  
2135  	/*
2136  	 * Since di_blkno is being passed back in, we check for any
2137  	 * inconsistencies which may have happened between
2138  	 * calls. These are code bugs as di_blkno is not expected to
2139  	 * change once returned from ocfs2_find_new_inode_loc()
2140  	 */
2141  	BUG_ON(res->sr_blkno != di_blkno);
2142  
2143  	ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
2144  					  res->sr_bg_stable_blkno, &bg_bh);
2145  	if (ret) {
2146  		mlog_errno(ret);
2147  		goto out;
2148  	}
2149  
2150  	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
2151  	chain = le16_to_cpu(bg->bg_chain);
2152  
2153  	ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
2154  					       ac->ac_bh, res->sr_bits,
2155  					       chain);
2156  	if (ret) {
2157  		mlog_errno(ret);
2158  		goto out;
2159  	}
2160  
2161  	ret = ocfs2_block_group_set_bits(handle,
2162  					 ac->ac_inode,
2163  					 bg,
2164  					 bg_bh,
2165  					 res->sr_bit_offset,
2166  					 res->sr_bits);
2167  	if (ret < 0) {
2168  		ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
2169  					       ac->ac_bh, res->sr_bits, chain);
2170  		mlog_errno(ret);
2171  		goto out;
2172  	}
2173  
2174  	trace_ocfs2_claim_new_inode_at_loc((unsigned long long)di_blkno,
2175  					   res->sr_bits);
2176  
2177  	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2178  
2179  	BUG_ON(res->sr_bits != 1);
2180  
2181  	*suballoc_loc = res->sr_bg_blkno;
2182  	*suballoc_bit = res->sr_bit_offset;
2183  	ac->ac_bits_given++;
2184  	ocfs2_save_inode_ac_group(dir, ac);
2185  
2186  out:
2187  	brelse(bg_bh);
2188  
2189  	return ret;
2190  }
2191  
ocfs2_claim_new_inode(handle_t * handle,struct inode * dir,struct buffer_head * parent_fe_bh,struct ocfs2_alloc_context * ac,u64 * suballoc_loc,u16 * suballoc_bit,u64 * fe_blkno)2192  int ocfs2_claim_new_inode(handle_t *handle,
2193  			  struct inode *dir,
2194  			  struct buffer_head *parent_fe_bh,
2195  			  struct ocfs2_alloc_context *ac,
2196  			  u64 *suballoc_loc,
2197  			  u16 *suballoc_bit,
2198  			  u64 *fe_blkno)
2199  {
2200  	int status;
2201  	struct ocfs2_suballoc_result res;
2202  
2203  	BUG_ON(!ac);
2204  	BUG_ON(ac->ac_bits_given != 0);
2205  	BUG_ON(ac->ac_bits_wanted != 1);
2206  	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
2207  
2208  	ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
2209  
2210  	status = ocfs2_claim_suballoc_bits(ac,
2211  					   handle,
2212  					   1,
2213  					   1,
2214  					   &res);
2215  	if (status < 0) {
2216  		mlog_errno(status);
2217  		goto bail;
2218  	}
2219  	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2220  
2221  	BUG_ON(res.sr_bits != 1);
2222  
2223  	*suballoc_loc = res.sr_bg_blkno;
2224  	*suballoc_bit = res.sr_bit_offset;
2225  	*fe_blkno = res.sr_blkno;
2226  	ac->ac_bits_given++;
2227  	ocfs2_save_inode_ac_group(dir, ac);
2228  	status = 0;
2229  bail:
2230  	if (status)
2231  		mlog_errno(status);
2232  	return status;
2233  }
2234  
2235  /* translate a group desc. blkno and it's bitmap offset into
2236   * disk cluster offset. */
ocfs2_desc_bitmap_to_cluster_off(struct inode * inode,u64 bg_blkno,u16 bg_bit_off)2237  static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
2238  						   u64 bg_blkno,
2239  						   u16 bg_bit_off)
2240  {
2241  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2242  	u32 cluster = 0;
2243  
2244  	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
2245  
2246  	if (bg_blkno != osb->first_cluster_group_blkno)
2247  		cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
2248  	cluster += (u32) bg_bit_off;
2249  	return cluster;
2250  }
2251  
2252  /* given a cluster offset, calculate which block group it belongs to
2253   * and return that block offset. */
ocfs2_which_cluster_group(struct inode * inode,u32 cluster)2254  u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
2255  {
2256  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2257  	u32 group_no;
2258  
2259  	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
2260  
2261  	group_no = cluster / osb->bitmap_cpg;
2262  	if (!group_no)
2263  		return osb->first_cluster_group_blkno;
2264  	return ocfs2_clusters_to_blocks(inode->i_sb,
2265  					group_no * osb->bitmap_cpg);
2266  }
2267  
2268  /* given the block number of a cluster start, calculate which cluster
2269   * group and descriptor bitmap offset that corresponds to. */
ocfs2_block_to_cluster_group(struct inode * inode,u64 data_blkno,u64 * bg_blkno,u16 * bg_bit_off)2270  static inline void ocfs2_block_to_cluster_group(struct inode *inode,
2271  						u64 data_blkno,
2272  						u64 *bg_blkno,
2273  						u16 *bg_bit_off)
2274  {
2275  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2276  	u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
2277  
2278  	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
2279  
2280  	*bg_blkno = ocfs2_which_cluster_group(inode,
2281  					      data_cluster);
2282  
2283  	if (*bg_blkno == osb->first_cluster_group_blkno)
2284  		*bg_bit_off = (u16) data_cluster;
2285  	else
2286  		*bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
2287  							     data_blkno - *bg_blkno);
2288  }
2289  
2290  /*
2291   * min_bits - minimum contiguous chunk from this total allocation we
2292   * can handle. set to what we asked for originally for a full
2293   * contig. allocation, set to '1' to indicate we can deal with extents
2294   * of any size.
2295   */
__ocfs2_claim_clusters(handle_t * handle,struct ocfs2_alloc_context * ac,u32 min_clusters,u32 max_clusters,u32 * cluster_start,u32 * num_clusters)2296  int __ocfs2_claim_clusters(handle_t *handle,
2297  			   struct ocfs2_alloc_context *ac,
2298  			   u32 min_clusters,
2299  			   u32 max_clusters,
2300  			   u32 *cluster_start,
2301  			   u32 *num_clusters)
2302  {
2303  	int status;
2304  	unsigned int bits_wanted = max_clusters;
2305  	struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
2306  	struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
2307  
2308  	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
2309  
2310  	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
2311  	       && ac->ac_which != OCFS2_AC_USE_MAIN);
2312  
2313  	if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
2314  		WARN_ON(min_clusters > 1);
2315  
2316  		status = ocfs2_claim_local_alloc_bits(osb,
2317  						      handle,
2318  						      ac,
2319  						      bits_wanted,
2320  						      cluster_start,
2321  						      num_clusters);
2322  		if (!status)
2323  			atomic_inc(&osb->alloc_stats.local_data);
2324  	} else {
2325  		if (min_clusters > (osb->bitmap_cpg - 1)) {
2326  			/* The only paths asking for contiguousness
2327  			 * should know about this already. */
2328  			mlog(ML_ERROR, "minimum allocation requested %u exceeds "
2329  			     "group bitmap size %u!\n", min_clusters,
2330  			     osb->bitmap_cpg);
2331  			status = -ENOSPC;
2332  			goto bail;
2333  		}
2334  		/* clamp the current request down to a realistic size. */
2335  		if (bits_wanted > (osb->bitmap_cpg - 1))
2336  			bits_wanted = osb->bitmap_cpg - 1;
2337  
2338  		status = ocfs2_claim_suballoc_bits(ac,
2339  						   handle,
2340  						   bits_wanted,
2341  						   min_clusters,
2342  						   &res);
2343  		if (!status) {
2344  			BUG_ON(res.sr_blkno); /* cluster alloc can't set */
2345  			*cluster_start =
2346  				ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
2347  								 res.sr_bg_blkno,
2348  								 res.sr_bit_offset);
2349  			atomic_inc(&osb->alloc_stats.bitmap_data);
2350  			*num_clusters = res.sr_bits;
2351  		}
2352  	}
2353  	if (status < 0) {
2354  		if (status != -ENOSPC)
2355  			mlog_errno(status);
2356  		goto bail;
2357  	}
2358  
2359  	ac->ac_bits_given += *num_clusters;
2360  
2361  bail:
2362  	if (status)
2363  		mlog_errno(status);
2364  	return status;
2365  }
2366  
ocfs2_claim_clusters(handle_t * handle,struct ocfs2_alloc_context * ac,u32 min_clusters,u32 * cluster_start,u32 * num_clusters)2367  int ocfs2_claim_clusters(handle_t *handle,
2368  			 struct ocfs2_alloc_context *ac,
2369  			 u32 min_clusters,
2370  			 u32 *cluster_start,
2371  			 u32 *num_clusters)
2372  {
2373  	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
2374  
2375  	return __ocfs2_claim_clusters(handle, ac, min_clusters,
2376  				      bits_wanted, cluster_start, num_clusters);
2377  }
2378  
ocfs2_block_group_clear_bits(handle_t * handle,struct inode * alloc_inode,struct ocfs2_group_desc * bg,struct buffer_head * group_bh,unsigned int bit_off,unsigned int num_bits,void (* undo_fn)(unsigned int bit,unsigned long * bmap))2379  static int ocfs2_block_group_clear_bits(handle_t *handle,
2380  					struct inode *alloc_inode,
2381  					struct ocfs2_group_desc *bg,
2382  					struct buffer_head *group_bh,
2383  					unsigned int bit_off,
2384  					unsigned int num_bits,
2385  					void (*undo_fn)(unsigned int bit,
2386  							unsigned long *bmap))
2387  {
2388  	int status;
2389  	unsigned int tmp;
2390  	struct ocfs2_group_desc *undo_bg = NULL;
2391  	struct journal_head *jh;
2392  
2393  	/* The caller got this descriptor from
2394  	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
2395  	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
2396  
2397  	trace_ocfs2_block_group_clear_bits(bit_off, num_bits);
2398  
2399  	BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
2400  	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
2401  					 group_bh,
2402  					 undo_fn ?
2403  					 OCFS2_JOURNAL_ACCESS_UNDO :
2404  					 OCFS2_JOURNAL_ACCESS_WRITE);
2405  	if (status < 0) {
2406  		mlog_errno(status);
2407  		goto bail;
2408  	}
2409  
2410  	jh = bh2jh(group_bh);
2411  	if (undo_fn) {
2412  		spin_lock(&jh->b_state_lock);
2413  		undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
2414  		BUG_ON(!undo_bg);
2415  	}
2416  
2417  	tmp = num_bits;
2418  	while(tmp--) {
2419  		ocfs2_clear_bit((bit_off + tmp),
2420  				(unsigned long *) bg->bg_bitmap);
2421  		if (undo_fn)
2422  			undo_fn(bit_off + tmp,
2423  				(unsigned long *) undo_bg->bg_bitmap);
2424  	}
2425  	le16_add_cpu(&bg->bg_free_bits_count, num_bits);
2426  	if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
2427  		if (undo_fn)
2428  			spin_unlock(&jh->b_state_lock);
2429  		return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
2430  				   (unsigned long long)le64_to_cpu(bg->bg_blkno),
2431  				   le16_to_cpu(bg->bg_bits),
2432  				   le16_to_cpu(bg->bg_free_bits_count),
2433  				   num_bits);
2434  	}
2435  
2436  	if (undo_fn)
2437  		spin_unlock(&jh->b_state_lock);
2438  
2439  	ocfs2_journal_dirty(handle, group_bh);
2440  bail:
2441  	return status;
2442  }
2443  
2444  /*
2445   * expects the suballoc inode to already be locked.
2446   */
_ocfs2_free_suballoc_bits(handle_t * handle,struct inode * alloc_inode,struct buffer_head * alloc_bh,unsigned int start_bit,u64 bg_blkno,unsigned int count,void (* undo_fn)(unsigned int bit,unsigned long * bitmap))2447  static int _ocfs2_free_suballoc_bits(handle_t *handle,
2448  				     struct inode *alloc_inode,
2449  				     struct buffer_head *alloc_bh,
2450  				     unsigned int start_bit,
2451  				     u64 bg_blkno,
2452  				     unsigned int count,
2453  				     void (*undo_fn)(unsigned int bit,
2454  						     unsigned long *bitmap))
2455  {
2456  	int status = 0;
2457  	u32 tmp_used;
2458  	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
2459  	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
2460  	struct buffer_head *group_bh = NULL;
2461  	struct ocfs2_group_desc *group;
2462  
2463  	/* The alloc_bh comes from ocfs2_free_dinode() or
2464  	 * ocfs2_free_clusters().  The callers have all locked the
2465  	 * allocator and gotten alloc_bh from the lock call.  This
2466  	 * validates the dinode buffer.  Any corruption that has happened
2467  	 * is a code bug. */
2468  	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
2469  	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
2470  
2471  	trace_ocfs2_free_suballoc_bits(
2472  		(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
2473  		(unsigned long long)bg_blkno,
2474  		start_bit, count);
2475  
2476  	status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
2477  					     &group_bh);
2478  	if (status < 0) {
2479  		mlog_errno(status);
2480  		goto bail;
2481  	}
2482  	group = (struct ocfs2_group_desc *) group_bh->b_data;
2483  
2484  	BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
2485  
2486  	status = ocfs2_block_group_clear_bits(handle, alloc_inode,
2487  					      group, group_bh,
2488  					      start_bit, count, undo_fn);
2489  	if (status < 0) {
2490  		mlog_errno(status);
2491  		goto bail;
2492  	}
2493  
2494  	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
2495  					 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2496  	if (status < 0) {
2497  		mlog_errno(status);
2498  		ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
2499  				start_bit, count);
2500  		goto bail;
2501  	}
2502  
2503  	le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
2504  		     count);
2505  	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
2506  	fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
2507  	ocfs2_journal_dirty(handle, alloc_bh);
2508  
2509  bail:
2510  	brelse(group_bh);
2511  	return status;
2512  }
2513  
ocfs2_free_suballoc_bits(handle_t * handle,struct inode * alloc_inode,struct buffer_head * alloc_bh,unsigned int start_bit,u64 bg_blkno,unsigned int count)2514  int ocfs2_free_suballoc_bits(handle_t *handle,
2515  			     struct inode *alloc_inode,
2516  			     struct buffer_head *alloc_bh,
2517  			     unsigned int start_bit,
2518  			     u64 bg_blkno,
2519  			     unsigned int count)
2520  {
2521  	return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh,
2522  					 start_bit, bg_blkno, count, NULL);
2523  }
2524  
ocfs2_free_dinode(handle_t * handle,struct inode * inode_alloc_inode,struct buffer_head * inode_alloc_bh,struct ocfs2_dinode * di)2525  int ocfs2_free_dinode(handle_t *handle,
2526  		      struct inode *inode_alloc_inode,
2527  		      struct buffer_head *inode_alloc_bh,
2528  		      struct ocfs2_dinode *di)
2529  {
2530  	u64 blk = le64_to_cpu(di->i_blkno);
2531  	u16 bit = le16_to_cpu(di->i_suballoc_bit);
2532  	u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2533  
2534  	if (di->i_suballoc_loc)
2535  		bg_blkno = le64_to_cpu(di->i_suballoc_loc);
2536  	return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
2537  					inode_alloc_bh, bit, bg_blkno, 1);
2538  }
2539  
_ocfs2_free_clusters(handle_t * handle,struct inode * bitmap_inode,struct buffer_head * bitmap_bh,u64 start_blk,unsigned int num_clusters,void (* undo_fn)(unsigned int bit,unsigned long * bitmap))2540  static int _ocfs2_free_clusters(handle_t *handle,
2541  				struct inode *bitmap_inode,
2542  				struct buffer_head *bitmap_bh,
2543  				u64 start_blk,
2544  				unsigned int num_clusters,
2545  				void (*undo_fn)(unsigned int bit,
2546  						unsigned long *bitmap))
2547  {
2548  	int status;
2549  	u16 bg_start_bit;
2550  	u64 bg_blkno;
2551  
2552  	/* You can't ever have a contiguous set of clusters
2553  	 * bigger than a block group bitmap so we never have to worry
2554  	 * about looping on them.
2555  	 * This is expensive. We can safely remove once this stuff has
2556  	 * gotten tested really well. */
2557  	BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb,
2558  				ocfs2_blocks_to_clusters(bitmap_inode->i_sb,
2559  							 start_blk)));
2560  
2561  
2562  	ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
2563  				     &bg_start_bit);
2564  
2565  	trace_ocfs2_free_clusters((unsigned long long)bg_blkno,
2566  			(unsigned long long)start_blk,
2567  			bg_start_bit, num_clusters);
2568  
2569  	status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
2570  					   bg_start_bit, bg_blkno,
2571  					   num_clusters, undo_fn);
2572  	if (status < 0) {
2573  		mlog_errno(status);
2574  		goto out;
2575  	}
2576  
2577  	ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
2578  					 num_clusters);
2579  
2580  out:
2581  	return status;
2582  }
2583  
ocfs2_free_clusters(handle_t * handle,struct inode * bitmap_inode,struct buffer_head * bitmap_bh,u64 start_blk,unsigned int num_clusters)2584  int ocfs2_free_clusters(handle_t *handle,
2585  			struct inode *bitmap_inode,
2586  			struct buffer_head *bitmap_bh,
2587  			u64 start_blk,
2588  			unsigned int num_clusters)
2589  {
2590  	return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
2591  				    start_blk, num_clusters,
2592  				    _ocfs2_set_bit);
2593  }
2594  
2595  /*
2596   * Give never-used clusters back to the global bitmap.  We don't need
2597   * to protect these bits in the undo buffer.
2598   */
ocfs2_release_clusters(handle_t * handle,struct inode * bitmap_inode,struct buffer_head * bitmap_bh,u64 start_blk,unsigned int num_clusters)2599  int ocfs2_release_clusters(handle_t *handle,
2600  			   struct inode *bitmap_inode,
2601  			   struct buffer_head *bitmap_bh,
2602  			   u64 start_blk,
2603  			   unsigned int num_clusters)
2604  {
2605  	return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
2606  				    start_blk, num_clusters,
2607  				    _ocfs2_clear_bit);
2608  }
2609  
2610  /*
2611   * For a given allocation, determine which allocators will need to be
2612   * accessed, and lock them, reserving the appropriate number of bits.
2613   *
2614   * Sparse file systems call this from ocfs2_write_begin_nolock()
2615   * and ocfs2_allocate_unwritten_extents().
2616   *
2617   * File systems which don't support holes call this from
2618   * ocfs2_extend_allocation().
2619   */
ocfs2_lock_allocators(struct inode * inode,struct ocfs2_extent_tree * et,u32 clusters_to_add,u32 extents_to_split,struct ocfs2_alloc_context ** data_ac,struct ocfs2_alloc_context ** meta_ac)2620  int ocfs2_lock_allocators(struct inode *inode,
2621  			  struct ocfs2_extent_tree *et,
2622  			  u32 clusters_to_add, u32 extents_to_split,
2623  			  struct ocfs2_alloc_context **data_ac,
2624  			  struct ocfs2_alloc_context **meta_ac)
2625  {
2626  	int ret = 0, num_free_extents;
2627  	unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
2628  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2629  
2630  	*meta_ac = NULL;
2631  	if (data_ac)
2632  		*data_ac = NULL;
2633  
2634  	BUG_ON(clusters_to_add != 0 && data_ac == NULL);
2635  
2636  	num_free_extents = ocfs2_num_free_extents(et);
2637  	if (num_free_extents < 0) {
2638  		ret = num_free_extents;
2639  		mlog_errno(ret);
2640  		goto out;
2641  	}
2642  
2643  	/*
2644  	 * Sparse allocation file systems need to be more conservative
2645  	 * with reserving room for expansion - the actual allocation
2646  	 * happens while we've got a journal handle open so re-taking
2647  	 * a cluster lock (because we ran out of room for another
2648  	 * extent) will violate ordering rules.
2649  	 *
2650  	 * Most of the time we'll only be seeing this 1 cluster at a time
2651  	 * anyway.
2652  	 *
2653  	 * Always lock for any unwritten extents - we might want to
2654  	 * add blocks during a split.
2655  	 */
2656  	if (!num_free_extents ||
2657  	    (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
2658  		ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
2659  		if (ret < 0) {
2660  			if (ret != -ENOSPC)
2661  				mlog_errno(ret);
2662  			goto out;
2663  		}
2664  	}
2665  
2666  	if (clusters_to_add == 0)
2667  		goto out;
2668  
2669  	ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2670  	if (ret < 0) {
2671  		if (ret != -ENOSPC)
2672  			mlog_errno(ret);
2673  		goto out;
2674  	}
2675  
2676  out:
2677  	if (ret) {
2678  		if (*meta_ac) {
2679  			ocfs2_free_alloc_context(*meta_ac);
2680  			*meta_ac = NULL;
2681  		}
2682  
2683  		/*
2684  		 * We cannot have an error and a non null *data_ac.
2685  		 */
2686  	}
2687  
2688  	return ret;
2689  }
2690  
2691  /*
2692   * Read the inode specified by blkno to get suballoc_slot and
2693   * suballoc_bit.
2694   */
ocfs2_get_suballoc_slot_bit(struct ocfs2_super * osb,u64 blkno,u16 * suballoc_slot,u64 * group_blkno,u16 * suballoc_bit)2695  static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2696  				       u16 *suballoc_slot, u64 *group_blkno,
2697  				       u16 *suballoc_bit)
2698  {
2699  	int status;
2700  	struct buffer_head *inode_bh = NULL;
2701  	struct ocfs2_dinode *inode_fe;
2702  
2703  	trace_ocfs2_get_suballoc_slot_bit((unsigned long long)blkno);
2704  
2705  	/* dirty read disk */
2706  	status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
2707  	if (status < 0) {
2708  		mlog(ML_ERROR, "read block %llu failed %d\n",
2709  		     (unsigned long long)blkno, status);
2710  		goto bail;
2711  	}
2712  
2713  	inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
2714  	if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
2715  		mlog(ML_ERROR, "invalid inode %llu requested\n",
2716  		     (unsigned long long)blkno);
2717  		status = -EINVAL;
2718  		goto bail;
2719  	}
2720  
2721  	if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
2722  	    (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
2723  		mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
2724  		     (unsigned long long)blkno,
2725  		     (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
2726  		status = -EINVAL;
2727  		goto bail;
2728  	}
2729  
2730  	if (suballoc_slot)
2731  		*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
2732  	if (suballoc_bit)
2733  		*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
2734  	if (group_blkno)
2735  		*group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
2736  
2737  bail:
2738  	brelse(inode_bh);
2739  
2740  	if (status)
2741  		mlog_errno(status);
2742  	return status;
2743  }
2744  
2745  /*
2746   * test whether bit is SET in allocator bitmap or not.  on success, 0
2747   * is returned and *res is 1 for SET; 0 otherwise.  when fails, errno
2748   * is returned and *res is meaningless.  Call this after you have
2749   * cluster locked against suballoc, or you may get a result based on
2750   * non-up2date contents
2751   */
ocfs2_test_suballoc_bit(struct ocfs2_super * osb,struct inode * suballoc,struct buffer_head * alloc_bh,u64 group_blkno,u64 blkno,u16 bit,int * res)2752  static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2753  				   struct inode *suballoc,
2754  				   struct buffer_head *alloc_bh,
2755  				   u64 group_blkno, u64 blkno,
2756  				   u16 bit, int *res)
2757  {
2758  	struct ocfs2_dinode *alloc_di;
2759  	struct ocfs2_group_desc *group;
2760  	struct buffer_head *group_bh = NULL;
2761  	u64 bg_blkno;
2762  	int status;
2763  
2764  	trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
2765  				      (unsigned int)bit);
2766  
2767  	alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
2768  	if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
2769  		mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2770  		     (unsigned int)bit,
2771  		     ocfs2_bits_per_group(&alloc_di->id2.i_chain));
2772  		status = -EINVAL;
2773  		goto bail;
2774  	}
2775  
2776  	bg_blkno = group_blkno ? group_blkno :
2777  		   ocfs2_which_suballoc_group(blkno, bit);
2778  	status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
2779  					     &group_bh);
2780  	if (status < 0) {
2781  		mlog(ML_ERROR, "read group %llu failed %d\n",
2782  		     (unsigned long long)bg_blkno, status);
2783  		goto bail;
2784  	}
2785  
2786  	group = (struct ocfs2_group_desc *) group_bh->b_data;
2787  	*res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
2788  
2789  bail:
2790  	brelse(group_bh);
2791  
2792  	if (status)
2793  		mlog_errno(status);
2794  	return status;
2795  }
2796  
2797  /*
2798   * Test if the bit representing this inode (blkno) is set in the
2799   * suballocator.
2800   *
2801   * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
2802   *
2803   * In the event of failure, a negative value is returned and *res is
2804   * meaningless.
2805   *
2806   * Callers must make sure to hold nfs_sync_lock to prevent
2807   * ocfs2_delete_inode() on another node from accessing the same
2808   * suballocator concurrently.
2809   */
ocfs2_test_inode_bit(struct ocfs2_super * osb,u64 blkno,int * res)2810  int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2811  {
2812  	int status;
2813  	u64 group_blkno = 0;
2814  	u16 suballoc_bit = 0, suballoc_slot = 0;
2815  	struct inode *inode_alloc_inode;
2816  	struct buffer_head *alloc_bh = NULL;
2817  
2818  	trace_ocfs2_test_inode_bit((unsigned long long)blkno);
2819  
2820  	status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2821  					     &group_blkno, &suballoc_bit);
2822  	if (status < 0) {
2823  		mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
2824  		goto bail;
2825  	}
2826  
2827  	if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
2828  		inode_alloc_inode = ocfs2_get_system_file_inode(osb,
2829  			GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
2830  	else
2831  		inode_alloc_inode = ocfs2_get_system_file_inode(osb,
2832  			INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
2833  	if (!inode_alloc_inode) {
2834  		/* the error code could be inaccurate, but we are not able to
2835  		 * get the correct one. */
2836  		status = -EINVAL;
2837  		mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
2838  		     (u32)suballoc_slot);
2839  		goto bail;
2840  	}
2841  
2842  	inode_lock(inode_alloc_inode);
2843  	status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
2844  	if (status < 0) {
2845  		inode_unlock(inode_alloc_inode);
2846  		iput(inode_alloc_inode);
2847  		mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
2848  		     (u32)suballoc_slot, status);
2849  		goto bail;
2850  	}
2851  
2852  	status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
2853  					 group_blkno, blkno, suballoc_bit, res);
2854  	if (status < 0)
2855  		mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
2856  
2857  	ocfs2_inode_unlock(inode_alloc_inode, 0);
2858  	inode_unlock(inode_alloc_inode);
2859  
2860  	iput(inode_alloc_inode);
2861  	brelse(alloc_bh);
2862  bail:
2863  	if (status)
2864  		mlog_errno(status);
2865  	return status;
2866  }
2867