1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * fs/ext4/fast_commit.c
5 *
6 * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
7 *
8 * Ext4 fast commits routines.
9 */
10 #include "ext4.h"
11 #include "ext4_jbd2.h"
12 #include "ext4_extents.h"
13 #include "mballoc.h"
14
15 /*
16 * Ext4 Fast Commits
17 * -----------------
18 *
19 * Ext4 fast commits implement fine grained journalling for Ext4.
20 *
21 * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22 * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23 * TLV during the recovery phase. For the scenarios for which we currently
24 * don't have replay code, fast commit falls back to full commits.
25 * Fast commits record delta in one of the following three categories.
26 *
27 * (A) Directory entry updates:
28 *
29 * - EXT4_FC_TAG_UNLINK - records directory entry unlink
30 * - EXT4_FC_TAG_LINK - records directory entry link
31 * - EXT4_FC_TAG_CREAT - records inode and directory entry creation
32 *
33 * (B) File specific data range updates:
34 *
35 * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode
36 * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode
37 *
38 * (C) Inode metadata (mtime / ctime etc):
39 *
40 * - EXT4_FC_TAG_INODE - record the inode that should be replayed
41 * during recovery. Note that iblocks field is
42 * not replayed and instead derived during
43 * replay.
44 * Commit Operation
45 * ----------------
46 * With fast commits, we maintain all the directory entry operations in the
47 * order in which they are issued in an in-memory queue. This queue is flushed
48 * to disk during the commit operation. We also maintain a list of inodes
49 * that need to be committed during a fast commit in another in memory queue of
50 * inodes. During the commit operation, we commit in the following order:
51 *
52 * [1] Lock inodes for any further data updates by setting COMMITTING state
53 * [2] Submit data buffers of all the inodes
54 * [3] Wait for [2] to complete
55 * [4] Commit all the directory entry updates in the fast commit space
56 * [5] Commit all the changed inode structures
57 * [6] Write tail tag (this tag ensures the atomicity, please read the following
58 * section for more details).
59 * [7] Wait for [4], [5] and [6] to complete.
60 *
61 * All the inode updates must call ext4_fc_start_update() before starting an
62 * update. If such an ongoing update is present, fast commit waits for it to
63 * complete. The completion of such an update is marked by
64 * ext4_fc_stop_update().
65 *
66 * Fast Commit Ineligibility
67 * -------------------------
68 *
69 * Not all operations are supported by fast commits today (e.g extended
70 * attributes). Fast commit ineligibility is marked by calling
71 * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
72 * to full commit.
73 *
74 * Atomicity of commits
75 * --------------------
76 * In order to guarantee atomicity during the commit operation, fast commit
77 * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
78 * tag contains CRC of the contents and TID of the transaction after which
79 * this fast commit should be applied. Recovery code replays fast commit
80 * logs only if there's at least 1 valid tail present. For every fast commit
81 * operation, there is 1 tail. This means, we may end up with multiple tails
82 * in the fast commit space. Here's an example:
83 *
84 * - Create a new file A and remove existing file B
85 * - fsync()
86 * - Append contents to file A
87 * - Truncate file A
88 * - fsync()
89 *
90 * The fast commit space at the end of above operations would look like this:
91 * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
92 * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->|
93 *
94 * Replay code should thus check for all the valid tails in the FC area.
95 *
96 * Fast Commit Replay Idempotence
97 * ------------------------------
98 *
99 * Fast commits tags are idempotent in nature provided the recovery code follows
100 * certain rules. The guiding principle that the commit path follows while
101 * committing is that it stores the result of a particular operation instead of
102 * storing the procedure.
103 *
104 * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
105 * was associated with inode 10. During fast commit, instead of storing this
106 * operation as a procedure "rename a to b", we store the resulting file system
107 * state as a "series" of outcomes:
108 *
109 * - Link dirent b to inode 10
110 * - Unlink dirent a
111 * - Inode <10> with valid refcount
112 *
113 * Now when recovery code runs, it needs "enforce" this state on the file
114 * system. This is what guarantees idempotence of fast commit replay.
115 *
116 * Let's take an example of a procedure that is not idempotent and see how fast
117 * commits make it idempotent. Consider following sequence of operations:
118 *
119 * rm A; mv B A; read A
120 * (x) (y) (z)
121 *
122 * (x), (y) and (z) are the points at which we can crash. If we store this
123 * sequence of operations as is then the replay is not idempotent. Let's say
124 * while in replay, we crash at (z). During the second replay, file A (which was
125 * actually created as a result of "mv B A" operation) would get deleted. Thus,
126 * file named A would be absent when we try to read A. So, this sequence of
127 * operations is not idempotent. However, as mentioned above, instead of storing
128 * the procedure fast commits store the outcome of each procedure. Thus the fast
129 * commit log for above procedure would be as follows:
130 *
131 * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
132 * inode 11 before the replay)
133 *
134 * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11]
135 * (w) (x) (y) (z)
136 *
137 * If we crash at (z), we will have file A linked to inode 11. During the second
138 * replay, we will remove file A (inode 11). But we will create it back and make
139 * it point to inode 11. We won't find B, so we'll just skip that step. At this
140 * point, the refcount for inode 11 is not reliable, but that gets fixed by the
141 * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
142 * similarly. Thus, by converting a non-idempotent procedure into a series of
143 * idempotent outcomes, fast commits ensured idempotence during the replay.
144 *
145 * TODOs
146 * -----
147 *
148 * 0) Fast commit replay path hardening: Fast commit replay code should use
149 * journal handles to make sure all the updates it does during the replay
150 * path are atomic. With that if we crash during fast commit replay, after
151 * trying to do recovery again, we will find a file system where fast commit
152 * area is invalid (because new full commit would be found). In order to deal
153 * with that, fast commit replay code should ensure that the "FC_REPLAY"
154 * superblock state is persisted before starting the replay, so that after
155 * the crash, fast commit recovery code can look at that flag and perform
156 * fast commit recovery even if that area is invalidated by later full
157 * commits.
158 *
159 * 1) Fast commit's commit path locks the entire file system during fast
160 * commit. This has significant performance penalty. Instead of that, we
161 * should use ext4_fc_start/stop_update functions to start inode level
162 * updates from ext4_journal_start/stop. Once we do that we can drop file
163 * system locking during commit path.
164 *
165 * 2) Handle more ineligible cases.
166 */
167
168 #include <trace/events/ext4.h>
169 static struct kmem_cache *ext4_fc_dentry_cachep;
170
ext4_end_buffer_io_sync(struct buffer_head * bh,int uptodate)171 static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
172 {
173 BUFFER_TRACE(bh, "");
174 if (uptodate) {
175 ext4_debug("%s: Block %lld up-to-date",
176 __func__, bh->b_blocknr);
177 set_buffer_uptodate(bh);
178 } else {
179 ext4_debug("%s: Block %lld not up-to-date",
180 __func__, bh->b_blocknr);
181 clear_buffer_uptodate(bh);
182 }
183
184 unlock_buffer(bh);
185 }
186
ext4_fc_reset_inode(struct inode * inode)187 static inline void ext4_fc_reset_inode(struct inode *inode)
188 {
189 struct ext4_inode_info *ei = EXT4_I(inode);
190
191 ei->i_fc_lblk_start = 0;
192 ei->i_fc_lblk_len = 0;
193 }
194
ext4_fc_init_inode(struct inode * inode)195 void ext4_fc_init_inode(struct inode *inode)
196 {
197 struct ext4_inode_info *ei = EXT4_I(inode);
198
199 ext4_fc_reset_inode(inode);
200 ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
201 INIT_LIST_HEAD(&ei->i_fc_list);
202 INIT_LIST_HEAD(&ei->i_fc_dilist);
203 init_waitqueue_head(&ei->i_fc_wait);
204 atomic_set(&ei->i_fc_updates, 0);
205 }
206
207 /* This function must be called with sbi->s_fc_lock held. */
ext4_fc_wait_committing_inode(struct inode * inode)208 static void ext4_fc_wait_committing_inode(struct inode *inode)
209 __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
210 {
211 wait_queue_head_t *wq;
212 struct ext4_inode_info *ei = EXT4_I(inode);
213
214 #if (BITS_PER_LONG < 64)
215 DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
216 EXT4_STATE_FC_COMMITTING);
217 wq = bit_waitqueue(&ei->i_state_flags,
218 EXT4_STATE_FC_COMMITTING);
219 #else
220 DEFINE_WAIT_BIT(wait, &ei->i_flags,
221 EXT4_STATE_FC_COMMITTING);
222 wq = bit_waitqueue(&ei->i_flags,
223 EXT4_STATE_FC_COMMITTING);
224 #endif
225 lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
226 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
227 spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
228 schedule();
229 finish_wait(wq, &wait.wq_entry);
230 }
231
ext4_fc_disabled(struct super_block * sb)232 static bool ext4_fc_disabled(struct super_block *sb)
233 {
234 return (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
235 (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
236 }
237
238 /*
239 * Inform Ext4's fast about start of an inode update
240 *
241 * This function is called by the high level call VFS callbacks before
242 * performing any inode update. This function blocks if there's an ongoing
243 * fast commit on the inode in question.
244 */
ext4_fc_start_update(struct inode * inode)245 void ext4_fc_start_update(struct inode *inode)
246 {
247 struct ext4_inode_info *ei = EXT4_I(inode);
248
249 if (ext4_fc_disabled(inode->i_sb))
250 return;
251
252 restart:
253 spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
254 if (list_empty(&ei->i_fc_list))
255 goto out;
256
257 if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
258 ext4_fc_wait_committing_inode(inode);
259 goto restart;
260 }
261 out:
262 atomic_inc(&ei->i_fc_updates);
263 spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
264 }
265
266 /*
267 * Stop inode update and wake up waiting fast commits if any.
268 */
ext4_fc_stop_update(struct inode * inode)269 void ext4_fc_stop_update(struct inode *inode)
270 {
271 struct ext4_inode_info *ei = EXT4_I(inode);
272
273 if (ext4_fc_disabled(inode->i_sb))
274 return;
275
276 if (atomic_dec_and_test(&ei->i_fc_updates))
277 wake_up_all(&ei->i_fc_wait);
278 }
279
280 /*
281 * Remove inode from fast commit list. If the inode is being committed
282 * we wait until inode commit is done.
283 */
ext4_fc_del(struct inode * inode)284 void ext4_fc_del(struct inode *inode)
285 {
286 struct ext4_inode_info *ei = EXT4_I(inode);
287 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
288 struct ext4_fc_dentry_update *fc_dentry;
289
290 if (ext4_fc_disabled(inode->i_sb))
291 return;
292
293 restart:
294 spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
295 if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
296 spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
297 return;
298 }
299
300 if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
301 ext4_fc_wait_committing_inode(inode);
302 goto restart;
303 }
304
305 if (!list_empty(&ei->i_fc_list))
306 list_del_init(&ei->i_fc_list);
307
308 /*
309 * Since this inode is getting removed, let's also remove all FC
310 * dentry create references, since it is not needed to log it anyways.
311 */
312 if (list_empty(&ei->i_fc_dilist)) {
313 spin_unlock(&sbi->s_fc_lock);
314 return;
315 }
316
317 fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
318 WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
319 list_del_init(&fc_dentry->fcd_list);
320 list_del_init(&fc_dentry->fcd_dilist);
321
322 WARN_ON(!list_empty(&ei->i_fc_dilist));
323 spin_unlock(&sbi->s_fc_lock);
324
325 if (fc_dentry->fcd_name.name &&
326 fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
327 kfree(fc_dentry->fcd_name.name);
328 kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
329
330 return;
331 }
332
333 /*
334 * Mark file system as fast commit ineligible, and record latest
335 * ineligible transaction tid. This means until the recorded
336 * transaction, commit operation would result in a full jbd2 commit.
337 */
ext4_fc_mark_ineligible(struct super_block * sb,int reason,handle_t * handle)338 void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
339 {
340 struct ext4_sb_info *sbi = EXT4_SB(sb);
341 tid_t tid;
342
343 if (ext4_fc_disabled(sb))
344 return;
345
346 ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
347 if (handle && !IS_ERR(handle))
348 tid = handle->h_transaction->t_tid;
349 else {
350 read_lock(&sbi->s_journal->j_state_lock);
351 tid = sbi->s_journal->j_running_transaction ?
352 sbi->s_journal->j_running_transaction->t_tid : 0;
353 read_unlock(&sbi->s_journal->j_state_lock);
354 }
355 spin_lock(&sbi->s_fc_lock);
356 if (sbi->s_fc_ineligible_tid < tid)
357 sbi->s_fc_ineligible_tid = tid;
358 spin_unlock(&sbi->s_fc_lock);
359 WARN_ON(reason >= EXT4_FC_REASON_MAX);
360 sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
361 }
362
363 /*
364 * Generic fast commit tracking function. If this is the first time this we are
365 * called after a full commit, we initialize fast commit fields and then call
366 * __fc_track_fn() with update = 0. If we have already been called after a full
367 * commit, we pass update = 1. Based on that, the track function can determine
368 * if it needs to track a field for the first time or if it needs to just
369 * update the previously tracked value.
370 *
371 * If enqueue is set, this function enqueues the inode in fast commit list.
372 */
ext4_fc_track_template(handle_t * handle,struct inode * inode,int (* __fc_track_fn)(struct inode *,void *,bool),void * args,int enqueue)373 static int ext4_fc_track_template(
374 handle_t *handle, struct inode *inode,
375 int (*__fc_track_fn)(struct inode *, void *, bool),
376 void *args, int enqueue)
377 {
378 bool update = false;
379 struct ext4_inode_info *ei = EXT4_I(inode);
380 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
381 tid_t tid = 0;
382 int ret;
383
384 tid = handle->h_transaction->t_tid;
385 mutex_lock(&ei->i_fc_lock);
386 if (tid == ei->i_sync_tid) {
387 update = true;
388 } else {
389 ext4_fc_reset_inode(inode);
390 ei->i_sync_tid = tid;
391 }
392 ret = __fc_track_fn(inode, args, update);
393 mutex_unlock(&ei->i_fc_lock);
394
395 if (!enqueue)
396 return ret;
397
398 spin_lock(&sbi->s_fc_lock);
399 if (list_empty(&EXT4_I(inode)->i_fc_list))
400 list_add_tail(&EXT4_I(inode)->i_fc_list,
401 (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
402 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
403 &sbi->s_fc_q[FC_Q_STAGING] :
404 &sbi->s_fc_q[FC_Q_MAIN]);
405 spin_unlock(&sbi->s_fc_lock);
406
407 return ret;
408 }
409
410 struct __track_dentry_update_args {
411 struct dentry *dentry;
412 int op;
413 };
414
415 /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
__track_dentry_update(struct inode * inode,void * arg,bool update)416 static int __track_dentry_update(struct inode *inode, void *arg, bool update)
417 {
418 struct ext4_fc_dentry_update *node;
419 struct ext4_inode_info *ei = EXT4_I(inode);
420 struct __track_dentry_update_args *dentry_update =
421 (struct __track_dentry_update_args *)arg;
422 struct dentry *dentry = dentry_update->dentry;
423 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
424
425 mutex_unlock(&ei->i_fc_lock);
426 node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
427 if (!node) {
428 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
429 mutex_lock(&ei->i_fc_lock);
430 return -ENOMEM;
431 }
432
433 node->fcd_op = dentry_update->op;
434 node->fcd_parent = dentry->d_parent->d_inode->i_ino;
435 node->fcd_ino = inode->i_ino;
436 if (dentry->d_name.len > DNAME_INLINE_LEN) {
437 node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
438 if (!node->fcd_name.name) {
439 kmem_cache_free(ext4_fc_dentry_cachep, node);
440 ext4_fc_mark_ineligible(inode->i_sb,
441 EXT4_FC_REASON_NOMEM, NULL);
442 mutex_lock(&ei->i_fc_lock);
443 return -ENOMEM;
444 }
445 memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
446 dentry->d_name.len);
447 } else {
448 memcpy(node->fcd_iname, dentry->d_name.name,
449 dentry->d_name.len);
450 node->fcd_name.name = node->fcd_iname;
451 }
452 node->fcd_name.len = dentry->d_name.len;
453 INIT_LIST_HEAD(&node->fcd_dilist);
454 spin_lock(&sbi->s_fc_lock);
455 if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
456 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
457 list_add_tail(&node->fcd_list,
458 &sbi->s_fc_dentry_q[FC_Q_STAGING]);
459 else
460 list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
461
462 /*
463 * This helps us keep a track of all fc_dentry updates which is part of
464 * this ext4 inode. So in case the inode is getting unlinked, before
465 * even we get a chance to fsync, we could remove all fc_dentry
466 * references while evicting the inode in ext4_fc_del().
467 * Also with this, we don't need to loop over all the inodes in
468 * sbi->s_fc_q to get the corresponding inode in
469 * ext4_fc_commit_dentry_updates().
470 */
471 if (dentry_update->op == EXT4_FC_TAG_CREAT) {
472 WARN_ON(!list_empty(&ei->i_fc_dilist));
473 list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
474 }
475 spin_unlock(&sbi->s_fc_lock);
476 mutex_lock(&ei->i_fc_lock);
477
478 return 0;
479 }
480
__ext4_fc_track_unlink(handle_t * handle,struct inode * inode,struct dentry * dentry)481 void __ext4_fc_track_unlink(handle_t *handle,
482 struct inode *inode, struct dentry *dentry)
483 {
484 struct __track_dentry_update_args args;
485 int ret;
486
487 args.dentry = dentry;
488 args.op = EXT4_FC_TAG_UNLINK;
489
490 ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
491 (void *)&args, 0);
492 trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
493 }
494
ext4_fc_track_unlink(handle_t * handle,struct dentry * dentry)495 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
496 {
497 struct inode *inode = d_inode(dentry);
498
499 if (ext4_fc_disabled(inode->i_sb))
500 return;
501
502 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
503 return;
504
505 __ext4_fc_track_unlink(handle, inode, dentry);
506 }
507
__ext4_fc_track_link(handle_t * handle,struct inode * inode,struct dentry * dentry)508 void __ext4_fc_track_link(handle_t *handle,
509 struct inode *inode, struct dentry *dentry)
510 {
511 struct __track_dentry_update_args args;
512 int ret;
513
514 args.dentry = dentry;
515 args.op = EXT4_FC_TAG_LINK;
516
517 ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
518 (void *)&args, 0);
519 trace_ext4_fc_track_link(handle, inode, dentry, ret);
520 }
521
ext4_fc_track_link(handle_t * handle,struct dentry * dentry)522 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
523 {
524 struct inode *inode = d_inode(dentry);
525
526 if (ext4_fc_disabled(inode->i_sb))
527 return;
528
529 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
530 return;
531
532 __ext4_fc_track_link(handle, inode, dentry);
533 }
534
__ext4_fc_track_create(handle_t * handle,struct inode * inode,struct dentry * dentry)535 void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
536 struct dentry *dentry)
537 {
538 struct __track_dentry_update_args args;
539 int ret;
540
541 args.dentry = dentry;
542 args.op = EXT4_FC_TAG_CREAT;
543
544 ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
545 (void *)&args, 0);
546 trace_ext4_fc_track_create(handle, inode, dentry, ret);
547 }
548
ext4_fc_track_create(handle_t * handle,struct dentry * dentry)549 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
550 {
551 struct inode *inode = d_inode(dentry);
552
553 if (ext4_fc_disabled(inode->i_sb))
554 return;
555
556 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
557 return;
558
559 __ext4_fc_track_create(handle, inode, dentry);
560 }
561
562 /* __track_fn for inode tracking */
__track_inode(struct inode * inode,void * arg,bool update)563 static int __track_inode(struct inode *inode, void *arg, bool update)
564 {
565 if (update)
566 return -EEXIST;
567
568 EXT4_I(inode)->i_fc_lblk_len = 0;
569
570 return 0;
571 }
572
ext4_fc_track_inode(handle_t * handle,struct inode * inode)573 void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
574 {
575 int ret;
576
577 if (S_ISDIR(inode->i_mode))
578 return;
579
580 if (ext4_fc_disabled(inode->i_sb))
581 return;
582
583 if (ext4_should_journal_data(inode)) {
584 ext4_fc_mark_ineligible(inode->i_sb,
585 EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
586 return;
587 }
588
589 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
590 return;
591
592 ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
593 trace_ext4_fc_track_inode(handle, inode, ret);
594 }
595
596 struct __track_range_args {
597 ext4_lblk_t start, end;
598 };
599
600 /* __track_fn for tracking data updates */
__track_range(struct inode * inode,void * arg,bool update)601 static int __track_range(struct inode *inode, void *arg, bool update)
602 {
603 struct ext4_inode_info *ei = EXT4_I(inode);
604 ext4_lblk_t oldstart;
605 struct __track_range_args *__arg =
606 (struct __track_range_args *)arg;
607
608 if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
609 ext4_debug("Special inode %ld being modified\n", inode->i_ino);
610 return -ECANCELED;
611 }
612
613 oldstart = ei->i_fc_lblk_start;
614
615 if (update && ei->i_fc_lblk_len > 0) {
616 ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
617 ei->i_fc_lblk_len =
618 max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
619 ei->i_fc_lblk_start + 1;
620 } else {
621 ei->i_fc_lblk_start = __arg->start;
622 ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
623 }
624
625 return 0;
626 }
627
ext4_fc_track_range(handle_t * handle,struct inode * inode,ext4_lblk_t start,ext4_lblk_t end)628 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
629 ext4_lblk_t end)
630 {
631 struct __track_range_args args;
632 int ret;
633
634 if (S_ISDIR(inode->i_mode))
635 return;
636
637 if (ext4_fc_disabled(inode->i_sb))
638 return;
639
640 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
641 return;
642
643 args.start = start;
644 args.end = end;
645
646 ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
647
648 trace_ext4_fc_track_range(handle, inode, start, end, ret);
649 }
650
ext4_fc_submit_bh(struct super_block * sb,bool is_tail)651 static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
652 {
653 blk_opf_t write_flags = REQ_SYNC;
654 struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
655
656 /* Add REQ_FUA | REQ_PREFLUSH only its tail */
657 if (test_opt(sb, BARRIER) && is_tail)
658 write_flags |= REQ_FUA | REQ_PREFLUSH;
659 lock_buffer(bh);
660 set_buffer_dirty(bh);
661 set_buffer_uptodate(bh);
662 bh->b_end_io = ext4_end_buffer_io_sync;
663 submit_bh(REQ_OP_WRITE | write_flags, bh);
664 EXT4_SB(sb)->s_fc_bh = NULL;
665 }
666
667 /* Ext4 commit path routines */
668
669 /* memzero and update CRC */
ext4_fc_memzero(struct super_block * sb,void * dst,int len,u32 * crc)670 static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
671 u32 *crc)
672 {
673 void *ret;
674
675 ret = memset(dst, 0, len);
676 if (crc)
677 *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
678 return ret;
679 }
680
681 /*
682 * Allocate len bytes on a fast commit buffer.
683 *
684 * During the commit time this function is used to manage fast commit
685 * block space. We don't split a fast commit log onto different
686 * blocks. So this function makes sure that if there's not enough space
687 * on the current block, the remaining space in the current block is
688 * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
689 * new block is from jbd2 and CRC is updated to reflect the padding
690 * we added.
691 */
ext4_fc_reserve_space(struct super_block * sb,int len,u32 * crc)692 static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
693 {
694 struct ext4_fc_tl *tl;
695 struct ext4_sb_info *sbi = EXT4_SB(sb);
696 struct buffer_head *bh;
697 int bsize = sbi->s_journal->j_blocksize;
698 int ret, off = sbi->s_fc_bytes % bsize;
699 int pad_len;
700
701 /*
702 * After allocating len, we should have space at least for a 0 byte
703 * padding.
704 */
705 if (len + EXT4_FC_TAG_BASE_LEN > bsize)
706 return NULL;
707
708 if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
709 /*
710 * Only allocate from current buffer if we have enough space for
711 * this request AND we have space to add a zero byte padding.
712 */
713 if (!sbi->s_fc_bh) {
714 ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
715 if (ret)
716 return NULL;
717 sbi->s_fc_bh = bh;
718 }
719 sbi->s_fc_bytes += len;
720 return sbi->s_fc_bh->b_data + off;
721 }
722 /* Need to add PAD tag */
723 tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
724 tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
725 pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
726 tl->fc_len = cpu_to_le16(pad_len);
727 if (crc)
728 *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN);
729 if (pad_len > 0)
730 ext4_fc_memzero(sb, tl + 1, pad_len, crc);
731 ext4_fc_submit_bh(sb, false);
732
733 ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
734 if (ret)
735 return NULL;
736 sbi->s_fc_bh = bh;
737 sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
738 return sbi->s_fc_bh->b_data;
739 }
740
741 /* memcpy to fc reserved space and update CRC */
ext4_fc_memcpy(struct super_block * sb,void * dst,const void * src,int len,u32 * crc)742 static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
743 int len, u32 *crc)
744 {
745 if (crc)
746 *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
747 return memcpy(dst, src, len);
748 }
749
750 /*
751 * Complete a fast commit by writing tail tag.
752 *
753 * Writing tail tag marks the end of a fast commit. In order to guarantee
754 * atomicity, after writing tail tag, even if there's space remaining
755 * in the block, next commit shouldn't use it. That's why tail tag
756 * has the length as that of the remaining space on the block.
757 */
ext4_fc_write_tail(struct super_block * sb,u32 crc)758 static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
759 {
760 struct ext4_sb_info *sbi = EXT4_SB(sb);
761 struct ext4_fc_tl tl;
762 struct ext4_fc_tail tail;
763 int off, bsize = sbi->s_journal->j_blocksize;
764 u8 *dst;
765
766 /*
767 * ext4_fc_reserve_space takes care of allocating an extra block if
768 * there's no enough space on this block for accommodating this tail.
769 */
770 dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc);
771 if (!dst)
772 return -ENOSPC;
773
774 off = sbi->s_fc_bytes % bsize;
775
776 tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
777 tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
778 sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
779
780 ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
781 dst += EXT4_FC_TAG_BASE_LEN;
782 tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
783 ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
784 dst += sizeof(tail.fc_tid);
785 tail.fc_crc = cpu_to_le32(crc);
786 ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
787
788 ext4_fc_submit_bh(sb, true);
789
790 return 0;
791 }
792
793 /*
794 * Adds tag, length, value and updates CRC. Returns true if tlv was added.
795 * Returns false if there's not enough space.
796 */
ext4_fc_add_tlv(struct super_block * sb,u16 tag,u16 len,u8 * val,u32 * crc)797 static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
798 u32 *crc)
799 {
800 struct ext4_fc_tl tl;
801 u8 *dst;
802
803 dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
804 if (!dst)
805 return false;
806
807 tl.fc_tag = cpu_to_le16(tag);
808 tl.fc_len = cpu_to_le16(len);
809
810 ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
811 ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
812
813 return true;
814 }
815
816 /* Same as above, but adds dentry tlv. */
ext4_fc_add_dentry_tlv(struct super_block * sb,u32 * crc,struct ext4_fc_dentry_update * fc_dentry)817 static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
818 struct ext4_fc_dentry_update *fc_dentry)
819 {
820 struct ext4_fc_dentry_info fcd;
821 struct ext4_fc_tl tl;
822 int dlen = fc_dentry->fcd_name.len;
823 u8 *dst = ext4_fc_reserve_space(sb,
824 EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
825
826 if (!dst)
827 return false;
828
829 fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
830 fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
831 tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
832 tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
833 ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
834 dst += EXT4_FC_TAG_BASE_LEN;
835 ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
836 dst += sizeof(fcd);
837 ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
838
839 return true;
840 }
841
842 /*
843 * Writes inode in the fast commit space under TLV with tag @tag.
844 * Returns 0 on success, error on failure.
845 */
ext4_fc_write_inode(struct inode * inode,u32 * crc)846 static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
847 {
848 struct ext4_inode_info *ei = EXT4_I(inode);
849 int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
850 int ret;
851 struct ext4_iloc iloc;
852 struct ext4_fc_inode fc_inode;
853 struct ext4_fc_tl tl;
854 u8 *dst;
855
856 ret = ext4_get_inode_loc(inode, &iloc);
857 if (ret)
858 return ret;
859
860 if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
861 inode_len = EXT4_INODE_SIZE(inode->i_sb);
862 else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
863 inode_len += ei->i_extra_isize;
864
865 fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
866 tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
867 tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
868
869 ret = -ECANCELED;
870 dst = ext4_fc_reserve_space(inode->i_sb,
871 EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
872 if (!dst)
873 goto err;
874
875 if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
876 goto err;
877 dst += EXT4_FC_TAG_BASE_LEN;
878 if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
879 goto err;
880 dst += sizeof(fc_inode);
881 if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
882 inode_len, crc))
883 goto err;
884 ret = 0;
885 err:
886 brelse(iloc.bh);
887 return ret;
888 }
889
890 /*
891 * Writes updated data ranges for the inode in question. Updates CRC.
892 * Returns 0 on success, error otherwise.
893 */
ext4_fc_write_inode_data(struct inode * inode,u32 * crc)894 static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
895 {
896 ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
897 struct ext4_inode_info *ei = EXT4_I(inode);
898 struct ext4_map_blocks map;
899 struct ext4_fc_add_range fc_ext;
900 struct ext4_fc_del_range lrange;
901 struct ext4_extent *ex;
902 int ret;
903
904 mutex_lock(&ei->i_fc_lock);
905 if (ei->i_fc_lblk_len == 0) {
906 mutex_unlock(&ei->i_fc_lock);
907 return 0;
908 }
909 old_blk_size = ei->i_fc_lblk_start;
910 new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
911 ei->i_fc_lblk_len = 0;
912 mutex_unlock(&ei->i_fc_lock);
913
914 cur_lblk_off = old_blk_size;
915 ext4_debug("will try writing %d to %d for inode %ld\n",
916 cur_lblk_off, new_blk_size, inode->i_ino);
917
918 while (cur_lblk_off <= new_blk_size) {
919 map.m_lblk = cur_lblk_off;
920 map.m_len = new_blk_size - cur_lblk_off + 1;
921 ret = ext4_map_blocks(NULL, inode, &map, 0);
922 if (ret < 0)
923 return -ECANCELED;
924
925 if (map.m_len == 0) {
926 cur_lblk_off++;
927 continue;
928 }
929
930 if (ret == 0) {
931 lrange.fc_ino = cpu_to_le32(inode->i_ino);
932 lrange.fc_lblk = cpu_to_le32(map.m_lblk);
933 lrange.fc_len = cpu_to_le32(map.m_len);
934 if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
935 sizeof(lrange), (u8 *)&lrange, crc))
936 return -ENOSPC;
937 } else {
938 unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
939 EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
940
941 /* Limit the number of blocks in one extent */
942 map.m_len = min(max, map.m_len);
943
944 fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
945 ex = (struct ext4_extent *)&fc_ext.fc_ex;
946 ex->ee_block = cpu_to_le32(map.m_lblk);
947 ex->ee_len = cpu_to_le16(map.m_len);
948 ext4_ext_store_pblock(ex, map.m_pblk);
949 if (map.m_flags & EXT4_MAP_UNWRITTEN)
950 ext4_ext_mark_unwritten(ex);
951 else
952 ext4_ext_mark_initialized(ex);
953 if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
954 sizeof(fc_ext), (u8 *)&fc_ext, crc))
955 return -ENOSPC;
956 }
957
958 cur_lblk_off += map.m_len;
959 }
960
961 return 0;
962 }
963
964
965 /* Submit data for all the fast commit inodes */
ext4_fc_submit_inode_data_all(journal_t * journal)966 static int ext4_fc_submit_inode_data_all(journal_t *journal)
967 {
968 struct super_block *sb = journal->j_private;
969 struct ext4_sb_info *sbi = EXT4_SB(sb);
970 struct ext4_inode_info *ei;
971 int ret = 0;
972
973 spin_lock(&sbi->s_fc_lock);
974 list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
975 ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
976 while (atomic_read(&ei->i_fc_updates)) {
977 DEFINE_WAIT(wait);
978
979 prepare_to_wait(&ei->i_fc_wait, &wait,
980 TASK_UNINTERRUPTIBLE);
981 if (atomic_read(&ei->i_fc_updates)) {
982 spin_unlock(&sbi->s_fc_lock);
983 schedule();
984 spin_lock(&sbi->s_fc_lock);
985 }
986 finish_wait(&ei->i_fc_wait, &wait);
987 }
988 spin_unlock(&sbi->s_fc_lock);
989 ret = jbd2_submit_inode_data(ei->jinode);
990 if (ret)
991 return ret;
992 spin_lock(&sbi->s_fc_lock);
993 }
994 spin_unlock(&sbi->s_fc_lock);
995
996 return ret;
997 }
998
999 /* Wait for completion of data for all the fast commit inodes */
ext4_fc_wait_inode_data_all(journal_t * journal)1000 static int ext4_fc_wait_inode_data_all(journal_t *journal)
1001 {
1002 struct super_block *sb = journal->j_private;
1003 struct ext4_sb_info *sbi = EXT4_SB(sb);
1004 struct ext4_inode_info *pos, *n;
1005 int ret = 0;
1006
1007 spin_lock(&sbi->s_fc_lock);
1008 list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1009 if (!ext4_test_inode_state(&pos->vfs_inode,
1010 EXT4_STATE_FC_COMMITTING))
1011 continue;
1012 spin_unlock(&sbi->s_fc_lock);
1013
1014 ret = jbd2_wait_inode_data(journal, pos->jinode);
1015 if (ret)
1016 return ret;
1017 spin_lock(&sbi->s_fc_lock);
1018 }
1019 spin_unlock(&sbi->s_fc_lock);
1020
1021 return 0;
1022 }
1023
1024 /* Commit all the directory entry updates */
ext4_fc_commit_dentry_updates(journal_t * journal,u32 * crc)1025 static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
1026 __acquires(&sbi->s_fc_lock)
1027 __releases(&sbi->s_fc_lock)
1028 {
1029 struct super_block *sb = journal->j_private;
1030 struct ext4_sb_info *sbi = EXT4_SB(sb);
1031 struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
1032 struct inode *inode;
1033 struct ext4_inode_info *ei;
1034 int ret;
1035
1036 if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
1037 return 0;
1038 list_for_each_entry_safe(fc_dentry, fc_dentry_n,
1039 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
1040 if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
1041 spin_unlock(&sbi->s_fc_lock);
1042 if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1043 ret = -ENOSPC;
1044 goto lock_and_exit;
1045 }
1046 spin_lock(&sbi->s_fc_lock);
1047 continue;
1048 }
1049 /*
1050 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
1051 * corresponding inode pointer
1052 */
1053 WARN_ON(list_empty(&fc_dentry->fcd_dilist));
1054 ei = list_first_entry(&fc_dentry->fcd_dilist,
1055 struct ext4_inode_info, i_fc_dilist);
1056 inode = &ei->vfs_inode;
1057 WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
1058
1059 spin_unlock(&sbi->s_fc_lock);
1060
1061 /*
1062 * We first write the inode and then the create dirent. This
1063 * allows the recovery code to create an unnamed inode first
1064 * and then link it to a directory entry. This allows us
1065 * to use namei.c routines almost as is and simplifies
1066 * the recovery code.
1067 */
1068 ret = ext4_fc_write_inode(inode, crc);
1069 if (ret)
1070 goto lock_and_exit;
1071
1072 ret = ext4_fc_write_inode_data(inode, crc);
1073 if (ret)
1074 goto lock_and_exit;
1075
1076 if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1077 ret = -ENOSPC;
1078 goto lock_and_exit;
1079 }
1080
1081 spin_lock(&sbi->s_fc_lock);
1082 }
1083 return 0;
1084 lock_and_exit:
1085 spin_lock(&sbi->s_fc_lock);
1086 return ret;
1087 }
1088
ext4_fc_perform_commit(journal_t * journal)1089 static int ext4_fc_perform_commit(journal_t *journal)
1090 {
1091 struct super_block *sb = journal->j_private;
1092 struct ext4_sb_info *sbi = EXT4_SB(sb);
1093 struct ext4_inode_info *iter;
1094 struct ext4_fc_head head;
1095 struct inode *inode;
1096 struct blk_plug plug;
1097 int ret = 0;
1098 u32 crc = 0;
1099
1100 ret = ext4_fc_submit_inode_data_all(journal);
1101 if (ret)
1102 return ret;
1103
1104 ret = ext4_fc_wait_inode_data_all(journal);
1105 if (ret)
1106 return ret;
1107
1108 /*
1109 * If file system device is different from journal device, issue a cache
1110 * flush before we start writing fast commit blocks.
1111 */
1112 if (journal->j_fs_dev != journal->j_dev)
1113 blkdev_issue_flush(journal->j_fs_dev);
1114
1115 blk_start_plug(&plug);
1116 if (sbi->s_fc_bytes == 0) {
1117 /*
1118 * Add a head tag only if this is the first fast commit
1119 * in this TID.
1120 */
1121 head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1122 head.fc_tid = cpu_to_le32(
1123 sbi->s_journal->j_running_transaction->t_tid);
1124 if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1125 (u8 *)&head, &crc)) {
1126 ret = -ENOSPC;
1127 goto out;
1128 }
1129 }
1130
1131 spin_lock(&sbi->s_fc_lock);
1132 ret = ext4_fc_commit_dentry_updates(journal, &crc);
1133 if (ret) {
1134 spin_unlock(&sbi->s_fc_lock);
1135 goto out;
1136 }
1137
1138 list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1139 inode = &iter->vfs_inode;
1140 if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1141 continue;
1142
1143 spin_unlock(&sbi->s_fc_lock);
1144 ret = ext4_fc_write_inode_data(inode, &crc);
1145 if (ret)
1146 goto out;
1147 ret = ext4_fc_write_inode(inode, &crc);
1148 if (ret)
1149 goto out;
1150 spin_lock(&sbi->s_fc_lock);
1151 }
1152 spin_unlock(&sbi->s_fc_lock);
1153
1154 ret = ext4_fc_write_tail(sb, crc);
1155
1156 out:
1157 blk_finish_plug(&plug);
1158 return ret;
1159 }
1160
ext4_fc_update_stats(struct super_block * sb,int status,u64 commit_time,int nblks,tid_t commit_tid)1161 static void ext4_fc_update_stats(struct super_block *sb, int status,
1162 u64 commit_time, int nblks, tid_t commit_tid)
1163 {
1164 struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
1165
1166 ext4_debug("Fast commit ended with status = %d for tid %u",
1167 status, commit_tid);
1168 if (status == EXT4_FC_STATUS_OK) {
1169 stats->fc_num_commits++;
1170 stats->fc_numblks += nblks;
1171 if (likely(stats->s_fc_avg_commit_time))
1172 stats->s_fc_avg_commit_time =
1173 (commit_time +
1174 stats->s_fc_avg_commit_time * 3) / 4;
1175 else
1176 stats->s_fc_avg_commit_time = commit_time;
1177 } else if (status == EXT4_FC_STATUS_FAILED ||
1178 status == EXT4_FC_STATUS_INELIGIBLE) {
1179 if (status == EXT4_FC_STATUS_FAILED)
1180 stats->fc_failed_commits++;
1181 stats->fc_ineligible_commits++;
1182 } else {
1183 stats->fc_skipped_commits++;
1184 }
1185 trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
1186 }
1187
1188 /*
1189 * The main commit entry point. Performs a fast commit for transaction
1190 * commit_tid if needed. If it's not possible to perform a fast commit
1191 * due to various reasons, we fall back to full commit. Returns 0
1192 * on success, error otherwise.
1193 */
ext4_fc_commit(journal_t * journal,tid_t commit_tid)1194 int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1195 {
1196 struct super_block *sb = journal->j_private;
1197 struct ext4_sb_info *sbi = EXT4_SB(sb);
1198 int nblks = 0, ret, bsize = journal->j_blocksize;
1199 int subtid = atomic_read(&sbi->s_fc_subtid);
1200 int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
1201 ktime_t start_time, commit_time;
1202
1203 if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
1204 return jbd2_complete_transaction(journal, commit_tid);
1205
1206 trace_ext4_fc_commit_start(sb, commit_tid);
1207
1208 start_time = ktime_get();
1209
1210 restart_fc:
1211 ret = jbd2_fc_begin_commit(journal, commit_tid);
1212 if (ret == -EALREADY) {
1213 /* There was an ongoing commit, check if we need to restart */
1214 if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1215 commit_tid > journal->j_commit_sequence)
1216 goto restart_fc;
1217 ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
1218 commit_tid);
1219 return 0;
1220 } else if (ret) {
1221 /*
1222 * Commit couldn't start. Just update stats and perform a
1223 * full commit.
1224 */
1225 ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
1226 commit_tid);
1227 return jbd2_complete_transaction(journal, commit_tid);
1228 }
1229
1230 /*
1231 * After establishing journal barrier via jbd2_fc_begin_commit(), check
1232 * if we are fast commit ineligible.
1233 */
1234 if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
1235 status = EXT4_FC_STATUS_INELIGIBLE;
1236 goto fallback;
1237 }
1238
1239 fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1240 ret = ext4_fc_perform_commit(journal);
1241 if (ret < 0) {
1242 status = EXT4_FC_STATUS_FAILED;
1243 goto fallback;
1244 }
1245 nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1246 ret = jbd2_fc_wait_bufs(journal, nblks);
1247 if (ret < 0) {
1248 status = EXT4_FC_STATUS_FAILED;
1249 goto fallback;
1250 }
1251 atomic_inc(&sbi->s_fc_subtid);
1252 ret = jbd2_fc_end_commit(journal);
1253 /*
1254 * weight the commit time higher than the average time so we
1255 * don't react too strongly to vast changes in the commit time
1256 */
1257 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1258 ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
1259 return ret;
1260
1261 fallback:
1262 ret = jbd2_fc_end_commit_fallback(journal);
1263 ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
1264 return ret;
1265 }
1266
1267 /*
1268 * Fast commit cleanup routine. This is called after every fast commit and
1269 * full commit. full is true if we are called after a full commit.
1270 */
ext4_fc_cleanup(journal_t * journal,int full,tid_t tid)1271 static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
1272 {
1273 struct super_block *sb = journal->j_private;
1274 struct ext4_sb_info *sbi = EXT4_SB(sb);
1275 struct ext4_inode_info *iter, *iter_n;
1276 struct ext4_fc_dentry_update *fc_dentry;
1277
1278 if (full && sbi->s_fc_bh)
1279 sbi->s_fc_bh = NULL;
1280
1281 trace_ext4_fc_cleanup(journal, full, tid);
1282 jbd2_fc_release_bufs(journal);
1283
1284 spin_lock(&sbi->s_fc_lock);
1285 list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
1286 i_fc_list) {
1287 list_del_init(&iter->i_fc_list);
1288 ext4_clear_inode_state(&iter->vfs_inode,
1289 EXT4_STATE_FC_COMMITTING);
1290 if (iter->i_sync_tid <= tid)
1291 ext4_fc_reset_inode(&iter->vfs_inode);
1292 /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1293 smp_mb();
1294 #if (BITS_PER_LONG < 64)
1295 wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1296 #else
1297 wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1298 #endif
1299 }
1300
1301 while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1302 fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1303 struct ext4_fc_dentry_update,
1304 fcd_list);
1305 list_del_init(&fc_dentry->fcd_list);
1306 list_del_init(&fc_dentry->fcd_dilist);
1307 spin_unlock(&sbi->s_fc_lock);
1308
1309 if (fc_dentry->fcd_name.name &&
1310 fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1311 kfree(fc_dentry->fcd_name.name);
1312 kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1313 spin_lock(&sbi->s_fc_lock);
1314 }
1315
1316 list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1317 &sbi->s_fc_dentry_q[FC_Q_MAIN]);
1318 list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
1319 &sbi->s_fc_q[FC_Q_MAIN]);
1320
1321 if (tid >= sbi->s_fc_ineligible_tid) {
1322 sbi->s_fc_ineligible_tid = 0;
1323 ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1324 }
1325
1326 if (full)
1327 sbi->s_fc_bytes = 0;
1328 spin_unlock(&sbi->s_fc_lock);
1329 trace_ext4_fc_stats(sb);
1330 }
1331
1332 /* Ext4 Replay Path Routines */
1333
1334 /* Helper struct for dentry replay routines */
1335 struct dentry_info_args {
1336 int parent_ino, dname_len, ino, inode_len;
1337 char *dname;
1338 };
1339
tl_to_darg(struct dentry_info_args * darg,struct ext4_fc_tl * tl,u8 * val)1340 static inline void tl_to_darg(struct dentry_info_args *darg,
1341 struct ext4_fc_tl *tl, u8 *val)
1342 {
1343 struct ext4_fc_dentry_info fcd;
1344
1345 memcpy(&fcd, val, sizeof(fcd));
1346
1347 darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1348 darg->ino = le32_to_cpu(fcd.fc_ino);
1349 darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1350 darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
1351 }
1352
ext4_fc_get_tl(struct ext4_fc_tl * tl,u8 * val)1353 static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
1354 {
1355 memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
1356 tl->fc_len = le16_to_cpu(tl->fc_len);
1357 tl->fc_tag = le16_to_cpu(tl->fc_tag);
1358 }
1359
1360 /* Unlink replay function */
ext4_fc_replay_unlink(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1361 static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1362 u8 *val)
1363 {
1364 struct inode *inode, *old_parent;
1365 struct qstr entry;
1366 struct dentry_info_args darg;
1367 int ret = 0;
1368
1369 tl_to_darg(&darg, tl, val);
1370
1371 trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
1372 darg.parent_ino, darg.dname_len);
1373
1374 entry.name = darg.dname;
1375 entry.len = darg.dname_len;
1376 inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1377
1378 if (IS_ERR(inode)) {
1379 ext4_debug("Inode %d not found", darg.ino);
1380 return 0;
1381 }
1382
1383 old_parent = ext4_iget(sb, darg.parent_ino,
1384 EXT4_IGET_NORMAL);
1385 if (IS_ERR(old_parent)) {
1386 ext4_debug("Dir with inode %d not found", darg.parent_ino);
1387 iput(inode);
1388 return 0;
1389 }
1390
1391 ret = __ext4_unlink(NULL, old_parent, &entry, inode);
1392 /* -ENOENT ok coz it might not exist anymore. */
1393 if (ret == -ENOENT)
1394 ret = 0;
1395 iput(old_parent);
1396 iput(inode);
1397 return ret;
1398 }
1399
ext4_fc_replay_link_internal(struct super_block * sb,struct dentry_info_args * darg,struct inode * inode)1400 static int ext4_fc_replay_link_internal(struct super_block *sb,
1401 struct dentry_info_args *darg,
1402 struct inode *inode)
1403 {
1404 struct inode *dir = NULL;
1405 struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
1406 struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
1407 int ret = 0;
1408
1409 dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
1410 if (IS_ERR(dir)) {
1411 ext4_debug("Dir with inode %d not found.", darg->parent_ino);
1412 dir = NULL;
1413 goto out;
1414 }
1415
1416 dentry_dir = d_obtain_alias(dir);
1417 if (IS_ERR(dentry_dir)) {
1418 ext4_debug("Failed to obtain dentry");
1419 dentry_dir = NULL;
1420 goto out;
1421 }
1422
1423 dentry_inode = d_alloc(dentry_dir, &qstr_dname);
1424 if (!dentry_inode) {
1425 ext4_debug("Inode dentry not created.");
1426 ret = -ENOMEM;
1427 goto out;
1428 }
1429
1430 ret = __ext4_link(dir, inode, dentry_inode);
1431 /*
1432 * It's possible that link already existed since data blocks
1433 * for the dir in question got persisted before we crashed OR
1434 * we replayed this tag and crashed before the entire replay
1435 * could complete.
1436 */
1437 if (ret && ret != -EEXIST) {
1438 ext4_debug("Failed to link\n");
1439 goto out;
1440 }
1441
1442 ret = 0;
1443 out:
1444 if (dentry_dir) {
1445 d_drop(dentry_dir);
1446 dput(dentry_dir);
1447 } else if (dir) {
1448 iput(dir);
1449 }
1450 if (dentry_inode) {
1451 d_drop(dentry_inode);
1452 dput(dentry_inode);
1453 }
1454
1455 return ret;
1456 }
1457
1458 /* Link replay function */
ext4_fc_replay_link(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1459 static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1460 u8 *val)
1461 {
1462 struct inode *inode;
1463 struct dentry_info_args darg;
1464 int ret = 0;
1465
1466 tl_to_darg(&darg, tl, val);
1467 trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
1468 darg.parent_ino, darg.dname_len);
1469
1470 inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1471 if (IS_ERR(inode)) {
1472 ext4_debug("Inode not found.");
1473 return 0;
1474 }
1475
1476 ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1477 iput(inode);
1478 return ret;
1479 }
1480
1481 /*
1482 * Record all the modified inodes during replay. We use this later to setup
1483 * block bitmaps correctly.
1484 */
ext4_fc_record_modified_inode(struct super_block * sb,int ino)1485 static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
1486 {
1487 struct ext4_fc_replay_state *state;
1488 int i;
1489
1490 state = &EXT4_SB(sb)->s_fc_replay_state;
1491 for (i = 0; i < state->fc_modified_inodes_used; i++)
1492 if (state->fc_modified_inodes[i] == ino)
1493 return 0;
1494 if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
1495 int *fc_modified_inodes;
1496
1497 fc_modified_inodes = krealloc(state->fc_modified_inodes,
1498 sizeof(int) * (state->fc_modified_inodes_size +
1499 EXT4_FC_REPLAY_REALLOC_INCREMENT),
1500 GFP_KERNEL);
1501 if (!fc_modified_inodes)
1502 return -ENOMEM;
1503 state->fc_modified_inodes = fc_modified_inodes;
1504 state->fc_modified_inodes_size +=
1505 EXT4_FC_REPLAY_REALLOC_INCREMENT;
1506 }
1507 state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
1508 return 0;
1509 }
1510
1511 /*
1512 * Inode replay function
1513 */
ext4_fc_replay_inode(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1514 static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1515 u8 *val)
1516 {
1517 struct ext4_fc_inode fc_inode;
1518 struct ext4_inode *raw_inode;
1519 struct ext4_inode *raw_fc_inode;
1520 struct inode *inode = NULL;
1521 struct ext4_iloc iloc;
1522 int inode_len, ino, ret, tag = tl->fc_tag;
1523 struct ext4_extent_header *eh;
1524 size_t off_gen = offsetof(struct ext4_inode, i_generation);
1525
1526 memcpy(&fc_inode, val, sizeof(fc_inode));
1527
1528 ino = le32_to_cpu(fc_inode.fc_ino);
1529 trace_ext4_fc_replay(sb, tag, ino, 0, 0);
1530
1531 inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1532 if (!IS_ERR(inode)) {
1533 ext4_ext_clear_bb(inode);
1534 iput(inode);
1535 }
1536 inode = NULL;
1537
1538 ret = ext4_fc_record_modified_inode(sb, ino);
1539 if (ret)
1540 goto out;
1541
1542 raw_fc_inode = (struct ext4_inode *)
1543 (val + offsetof(struct ext4_fc_inode, fc_raw_inode));
1544 ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
1545 if (ret)
1546 goto out;
1547
1548 inode_len = tl->fc_len - sizeof(struct ext4_fc_inode);
1549 raw_inode = ext4_raw_inode(&iloc);
1550
1551 memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
1552 memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen,
1553 inode_len - off_gen);
1554 if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
1555 eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
1556 if (eh->eh_magic != EXT4_EXT_MAGIC) {
1557 memset(eh, 0, sizeof(*eh));
1558 eh->eh_magic = EXT4_EXT_MAGIC;
1559 eh->eh_max = cpu_to_le16(
1560 (sizeof(raw_inode->i_block) -
1561 sizeof(struct ext4_extent_header))
1562 / sizeof(struct ext4_extent));
1563 }
1564 } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
1565 memcpy(raw_inode->i_block, raw_fc_inode->i_block,
1566 sizeof(raw_inode->i_block));
1567 }
1568
1569 /* Immediately update the inode on disk. */
1570 ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1571 if (ret)
1572 goto out;
1573 ret = sync_dirty_buffer(iloc.bh);
1574 if (ret)
1575 goto out;
1576 ret = ext4_mark_inode_used(sb, ino);
1577 if (ret)
1578 goto out;
1579
1580 /* Given that we just wrote the inode on disk, this SHOULD succeed. */
1581 inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1582 if (IS_ERR(inode)) {
1583 ext4_debug("Inode not found.");
1584 return -EFSCORRUPTED;
1585 }
1586
1587 /*
1588 * Our allocator could have made different decisions than before
1589 * crashing. This should be fixed but until then, we calculate
1590 * the number of blocks the inode.
1591 */
1592 if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
1593 ext4_ext_replay_set_iblocks(inode);
1594
1595 inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
1596 ext4_reset_inode_seed(inode);
1597
1598 ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
1599 ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1600 sync_dirty_buffer(iloc.bh);
1601 brelse(iloc.bh);
1602 out:
1603 iput(inode);
1604 if (!ret)
1605 blkdev_issue_flush(sb->s_bdev);
1606
1607 return 0;
1608 }
1609
1610 /*
1611 * Dentry create replay function.
1612 *
1613 * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
1614 * inode for which we are trying to create a dentry here, should already have
1615 * been replayed before we start here.
1616 */
ext4_fc_replay_create(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1617 static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1618 u8 *val)
1619 {
1620 int ret = 0;
1621 struct inode *inode = NULL;
1622 struct inode *dir = NULL;
1623 struct dentry_info_args darg;
1624
1625 tl_to_darg(&darg, tl, val);
1626
1627 trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
1628 darg.parent_ino, darg.dname_len);
1629
1630 /* This takes care of update group descriptor and other metadata */
1631 ret = ext4_mark_inode_used(sb, darg.ino);
1632 if (ret)
1633 goto out;
1634
1635 inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1636 if (IS_ERR(inode)) {
1637 ext4_debug("inode %d not found.", darg.ino);
1638 inode = NULL;
1639 ret = -EINVAL;
1640 goto out;
1641 }
1642
1643 if (S_ISDIR(inode->i_mode)) {
1644 /*
1645 * If we are creating a directory, we need to make sure that the
1646 * dot and dot dot dirents are setup properly.
1647 */
1648 dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
1649 if (IS_ERR(dir)) {
1650 ext4_debug("Dir %d not found.", darg.ino);
1651 goto out;
1652 }
1653 ret = ext4_init_new_dir(NULL, dir, inode);
1654 iput(dir);
1655 if (ret) {
1656 ret = 0;
1657 goto out;
1658 }
1659 }
1660 ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1661 if (ret)
1662 goto out;
1663 set_nlink(inode, 1);
1664 ext4_mark_inode_dirty(NULL, inode);
1665 out:
1666 iput(inode);
1667 return ret;
1668 }
1669
1670 /*
1671 * Record physical disk regions which are in use as per fast commit area,
1672 * and used by inodes during replay phase. Our simple replay phase
1673 * allocator excludes these regions from allocation.
1674 */
ext4_fc_record_regions(struct super_block * sb,int ino,ext4_lblk_t lblk,ext4_fsblk_t pblk,int len,int replay)1675 int ext4_fc_record_regions(struct super_block *sb, int ino,
1676 ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
1677 {
1678 struct ext4_fc_replay_state *state;
1679 struct ext4_fc_alloc_region *region;
1680
1681 state = &EXT4_SB(sb)->s_fc_replay_state;
1682 /*
1683 * during replay phase, the fc_regions_valid may not same as
1684 * fc_regions_used, update it when do new additions.
1685 */
1686 if (replay && state->fc_regions_used != state->fc_regions_valid)
1687 state->fc_regions_used = state->fc_regions_valid;
1688 if (state->fc_regions_used == state->fc_regions_size) {
1689 struct ext4_fc_alloc_region *fc_regions;
1690
1691 fc_regions = krealloc(state->fc_regions,
1692 sizeof(struct ext4_fc_alloc_region) *
1693 (state->fc_regions_size +
1694 EXT4_FC_REPLAY_REALLOC_INCREMENT),
1695 GFP_KERNEL);
1696 if (!fc_regions)
1697 return -ENOMEM;
1698 state->fc_regions_size +=
1699 EXT4_FC_REPLAY_REALLOC_INCREMENT;
1700 state->fc_regions = fc_regions;
1701 }
1702 region = &state->fc_regions[state->fc_regions_used++];
1703 region->ino = ino;
1704 region->lblk = lblk;
1705 region->pblk = pblk;
1706 region->len = len;
1707
1708 if (replay)
1709 state->fc_regions_valid++;
1710
1711 return 0;
1712 }
1713
1714 /* Replay add range tag */
ext4_fc_replay_add_range(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1715 static int ext4_fc_replay_add_range(struct super_block *sb,
1716 struct ext4_fc_tl *tl, u8 *val)
1717 {
1718 struct ext4_fc_add_range fc_add_ex;
1719 struct ext4_extent newex, *ex;
1720 struct inode *inode;
1721 ext4_lblk_t start, cur;
1722 int remaining, len;
1723 ext4_fsblk_t start_pblk;
1724 struct ext4_map_blocks map;
1725 struct ext4_ext_path *path = NULL;
1726 int ret;
1727
1728 memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1729 ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
1730
1731 trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1732 le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
1733 ext4_ext_get_actual_len(ex));
1734
1735 inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
1736 if (IS_ERR(inode)) {
1737 ext4_debug("Inode not found.");
1738 return 0;
1739 }
1740
1741 ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1742 if (ret)
1743 goto out;
1744
1745 start = le32_to_cpu(ex->ee_block);
1746 start_pblk = ext4_ext_pblock(ex);
1747 len = ext4_ext_get_actual_len(ex);
1748
1749 cur = start;
1750 remaining = len;
1751 ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
1752 start, start_pblk, len, ext4_ext_is_unwritten(ex),
1753 inode->i_ino);
1754
1755 while (remaining > 0) {
1756 map.m_lblk = cur;
1757 map.m_len = remaining;
1758 map.m_pblk = 0;
1759 ret = ext4_map_blocks(NULL, inode, &map, 0);
1760
1761 if (ret < 0)
1762 goto out;
1763
1764 if (ret == 0) {
1765 /* Range is not mapped */
1766 path = ext4_find_extent(inode, cur, NULL, 0);
1767 if (IS_ERR(path))
1768 goto out;
1769 memset(&newex, 0, sizeof(newex));
1770 newex.ee_block = cpu_to_le32(cur);
1771 ext4_ext_store_pblock(
1772 &newex, start_pblk + cur - start);
1773 newex.ee_len = cpu_to_le16(map.m_len);
1774 if (ext4_ext_is_unwritten(ex))
1775 ext4_ext_mark_unwritten(&newex);
1776 down_write(&EXT4_I(inode)->i_data_sem);
1777 ret = ext4_ext_insert_extent(
1778 NULL, inode, &path, &newex, 0);
1779 up_write((&EXT4_I(inode)->i_data_sem));
1780 ext4_free_ext_path(path);
1781 if (ret)
1782 goto out;
1783 goto next;
1784 }
1785
1786 if (start_pblk + cur - start != map.m_pblk) {
1787 /*
1788 * Logical to physical mapping changed. This can happen
1789 * if this range was removed and then reallocated to
1790 * map to new physical blocks during a fast commit.
1791 */
1792 ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1793 ext4_ext_is_unwritten(ex),
1794 start_pblk + cur - start);
1795 if (ret)
1796 goto out;
1797 /*
1798 * Mark the old blocks as free since they aren't used
1799 * anymore. We maintain an array of all the modified
1800 * inodes. In case these blocks are still used at either
1801 * a different logical range in the same inode or in
1802 * some different inode, we will mark them as allocated
1803 * at the end of the FC replay using our array of
1804 * modified inodes.
1805 */
1806 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1807 goto next;
1808 }
1809
1810 /* Range is mapped and needs a state change */
1811 ext4_debug("Converting from %ld to %d %lld",
1812 map.m_flags & EXT4_MAP_UNWRITTEN,
1813 ext4_ext_is_unwritten(ex), map.m_pblk);
1814 ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1815 ext4_ext_is_unwritten(ex), map.m_pblk);
1816 if (ret)
1817 goto out;
1818 /*
1819 * We may have split the extent tree while toggling the state.
1820 * Try to shrink the extent tree now.
1821 */
1822 ext4_ext_replay_shrink_inode(inode, start + len);
1823 next:
1824 cur += map.m_len;
1825 remaining -= map.m_len;
1826 }
1827 ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
1828 sb->s_blocksize_bits);
1829 out:
1830 iput(inode);
1831 return 0;
1832 }
1833
1834 /* Replay DEL_RANGE tag */
1835 static int
ext4_fc_replay_del_range(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1836 ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1837 u8 *val)
1838 {
1839 struct inode *inode;
1840 struct ext4_fc_del_range lrange;
1841 struct ext4_map_blocks map;
1842 ext4_lblk_t cur, remaining;
1843 int ret;
1844
1845 memcpy(&lrange, val, sizeof(lrange));
1846 cur = le32_to_cpu(lrange.fc_lblk);
1847 remaining = le32_to_cpu(lrange.fc_len);
1848
1849 trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1850 le32_to_cpu(lrange.fc_ino), cur, remaining);
1851
1852 inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
1853 if (IS_ERR(inode)) {
1854 ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino));
1855 return 0;
1856 }
1857
1858 ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1859 if (ret)
1860 goto out;
1861
1862 ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
1863 inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1864 le32_to_cpu(lrange.fc_len));
1865 while (remaining > 0) {
1866 map.m_lblk = cur;
1867 map.m_len = remaining;
1868
1869 ret = ext4_map_blocks(NULL, inode, &map, 0);
1870 if (ret < 0)
1871 goto out;
1872 if (ret > 0) {
1873 remaining -= ret;
1874 cur += ret;
1875 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1876 } else {
1877 remaining -= map.m_len;
1878 cur += map.m_len;
1879 }
1880 }
1881
1882 down_write(&EXT4_I(inode)->i_data_sem);
1883 ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
1884 le32_to_cpu(lrange.fc_lblk) +
1885 le32_to_cpu(lrange.fc_len) - 1);
1886 up_write(&EXT4_I(inode)->i_data_sem);
1887 if (ret)
1888 goto out;
1889 ext4_ext_replay_shrink_inode(inode,
1890 i_size_read(inode) >> sb->s_blocksize_bits);
1891 ext4_mark_inode_dirty(NULL, inode);
1892 out:
1893 iput(inode);
1894 return 0;
1895 }
1896
ext4_fc_set_bitmaps_and_counters(struct super_block * sb)1897 static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
1898 {
1899 struct ext4_fc_replay_state *state;
1900 struct inode *inode;
1901 struct ext4_ext_path *path = NULL;
1902 struct ext4_map_blocks map;
1903 int i, ret, j;
1904 ext4_lblk_t cur, end;
1905
1906 state = &EXT4_SB(sb)->s_fc_replay_state;
1907 for (i = 0; i < state->fc_modified_inodes_used; i++) {
1908 inode = ext4_iget(sb, state->fc_modified_inodes[i],
1909 EXT4_IGET_NORMAL);
1910 if (IS_ERR(inode)) {
1911 ext4_debug("Inode %d not found.",
1912 state->fc_modified_inodes[i]);
1913 continue;
1914 }
1915 cur = 0;
1916 end = EXT_MAX_BLOCKS;
1917 if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
1918 iput(inode);
1919 continue;
1920 }
1921 while (cur < end) {
1922 map.m_lblk = cur;
1923 map.m_len = end - cur;
1924
1925 ret = ext4_map_blocks(NULL, inode, &map, 0);
1926 if (ret < 0)
1927 break;
1928
1929 if (ret > 0) {
1930 path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
1931 if (!IS_ERR(path)) {
1932 for (j = 0; j < path->p_depth; j++)
1933 ext4_mb_mark_bb(inode->i_sb,
1934 path[j].p_block, 1, 1);
1935 ext4_free_ext_path(path);
1936 }
1937 cur += ret;
1938 ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
1939 map.m_len, 1);
1940 } else {
1941 cur = cur + (map.m_len ? map.m_len : 1);
1942 }
1943 }
1944 iput(inode);
1945 }
1946 }
1947
1948 /*
1949 * Check if block is in excluded regions for block allocation. The simple
1950 * allocator that runs during replay phase is calls this function to see
1951 * if it is okay to use a block.
1952 */
ext4_fc_replay_check_excluded(struct super_block * sb,ext4_fsblk_t blk)1953 bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
1954 {
1955 int i;
1956 struct ext4_fc_replay_state *state;
1957
1958 state = &EXT4_SB(sb)->s_fc_replay_state;
1959 for (i = 0; i < state->fc_regions_valid; i++) {
1960 if (state->fc_regions[i].ino == 0 ||
1961 state->fc_regions[i].len == 0)
1962 continue;
1963 if (in_range(blk, state->fc_regions[i].pblk,
1964 state->fc_regions[i].len))
1965 return true;
1966 }
1967 return false;
1968 }
1969
1970 /* Cleanup function called after replay */
ext4_fc_replay_cleanup(struct super_block * sb)1971 void ext4_fc_replay_cleanup(struct super_block *sb)
1972 {
1973 struct ext4_sb_info *sbi = EXT4_SB(sb);
1974
1975 sbi->s_mount_state &= ~EXT4_FC_REPLAY;
1976 kfree(sbi->s_fc_replay_state.fc_regions);
1977 kfree(sbi->s_fc_replay_state.fc_modified_inodes);
1978 }
1979
ext4_fc_tag_len_isvalid(struct ext4_fc_tl * tl,u8 * val,u8 * end)1980 static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl,
1981 u8 *val, u8 *end)
1982 {
1983 if (val + tl->fc_len > end)
1984 return false;
1985
1986 /* Here only check ADD_RANGE/TAIL/HEAD which will read data when do
1987 * journal rescan before do CRC check. Other tags length check will
1988 * rely on CRC check.
1989 */
1990 switch (tl->fc_tag) {
1991 case EXT4_FC_TAG_ADD_RANGE:
1992 return (sizeof(struct ext4_fc_add_range) == tl->fc_len);
1993 case EXT4_FC_TAG_TAIL:
1994 return (sizeof(struct ext4_fc_tail) <= tl->fc_len);
1995 case EXT4_FC_TAG_HEAD:
1996 return (sizeof(struct ext4_fc_head) == tl->fc_len);
1997 case EXT4_FC_TAG_DEL_RANGE:
1998 case EXT4_FC_TAG_LINK:
1999 case EXT4_FC_TAG_UNLINK:
2000 case EXT4_FC_TAG_CREAT:
2001 case EXT4_FC_TAG_INODE:
2002 case EXT4_FC_TAG_PAD:
2003 default:
2004 return true;
2005 }
2006 }
2007
2008 /*
2009 * Recovery Scan phase handler
2010 *
2011 * This function is called during the scan phase and is responsible
2012 * for doing following things:
2013 * - Make sure the fast commit area has valid tags for replay
2014 * - Count number of tags that need to be replayed by the replay handler
2015 * - Verify CRC
2016 * - Create a list of excluded blocks for allocation during replay phase
2017 *
2018 * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
2019 * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
2020 * to indicate that scan has finished and JBD2 can now start replay phase.
2021 * It returns a negative error to indicate that there was an error. At the end
2022 * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
2023 * to indicate the number of tags that need to replayed during the replay phase.
2024 */
ext4_fc_replay_scan(journal_t * journal,struct buffer_head * bh,int off,tid_t expected_tid)2025 static int ext4_fc_replay_scan(journal_t *journal,
2026 struct buffer_head *bh, int off,
2027 tid_t expected_tid)
2028 {
2029 struct super_block *sb = journal->j_private;
2030 struct ext4_sb_info *sbi = EXT4_SB(sb);
2031 struct ext4_fc_replay_state *state;
2032 int ret = JBD2_FC_REPLAY_CONTINUE;
2033 struct ext4_fc_add_range ext;
2034 struct ext4_fc_tl tl;
2035 struct ext4_fc_tail tail;
2036 __u8 *start, *end, *cur, *val;
2037 struct ext4_fc_head head;
2038 struct ext4_extent *ex;
2039
2040 state = &sbi->s_fc_replay_state;
2041
2042 start = (u8 *)bh->b_data;
2043 end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2044
2045 if (state->fc_replay_expected_off == 0) {
2046 state->fc_cur_tag = 0;
2047 state->fc_replay_num_tags = 0;
2048 state->fc_crc = 0;
2049 state->fc_regions = NULL;
2050 state->fc_regions_valid = state->fc_regions_used =
2051 state->fc_regions_size = 0;
2052 /* Check if we can stop early */
2053 if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
2054 != EXT4_FC_TAG_HEAD)
2055 return 0;
2056 }
2057
2058 if (off != state->fc_replay_expected_off) {
2059 ret = -EFSCORRUPTED;
2060 goto out_err;
2061 }
2062
2063 state->fc_replay_expected_off++;
2064 for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2065 cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2066 ext4_fc_get_tl(&tl, cur);
2067 val = cur + EXT4_FC_TAG_BASE_LEN;
2068 if (!ext4_fc_tag_len_isvalid(&tl, val, end)) {
2069 ret = state->fc_replay_num_tags ?
2070 JBD2_FC_REPLAY_STOP : -ECANCELED;
2071 goto out_err;
2072 }
2073 ext4_debug("Scan phase, tag:%s, blk %lld\n",
2074 tag2str(tl.fc_tag), bh->b_blocknr);
2075 switch (tl.fc_tag) {
2076 case EXT4_FC_TAG_ADD_RANGE:
2077 memcpy(&ext, val, sizeof(ext));
2078 ex = (struct ext4_extent *)&ext.fc_ex;
2079 ret = ext4_fc_record_regions(sb,
2080 le32_to_cpu(ext.fc_ino),
2081 le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
2082 ext4_ext_get_actual_len(ex), 0);
2083 if (ret < 0)
2084 break;
2085 ret = JBD2_FC_REPLAY_CONTINUE;
2086 fallthrough;
2087 case EXT4_FC_TAG_DEL_RANGE:
2088 case EXT4_FC_TAG_LINK:
2089 case EXT4_FC_TAG_UNLINK:
2090 case EXT4_FC_TAG_CREAT:
2091 case EXT4_FC_TAG_INODE:
2092 case EXT4_FC_TAG_PAD:
2093 state->fc_cur_tag++;
2094 state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2095 EXT4_FC_TAG_BASE_LEN + tl.fc_len);
2096 break;
2097 case EXT4_FC_TAG_TAIL:
2098 state->fc_cur_tag++;
2099 memcpy(&tail, val, sizeof(tail));
2100 state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2101 EXT4_FC_TAG_BASE_LEN +
2102 offsetof(struct ext4_fc_tail,
2103 fc_crc));
2104 if (le32_to_cpu(tail.fc_tid) == expected_tid &&
2105 le32_to_cpu(tail.fc_crc) == state->fc_crc) {
2106 state->fc_replay_num_tags = state->fc_cur_tag;
2107 state->fc_regions_valid =
2108 state->fc_regions_used;
2109 } else {
2110 ret = state->fc_replay_num_tags ?
2111 JBD2_FC_REPLAY_STOP : -EFSBADCRC;
2112 }
2113 state->fc_crc = 0;
2114 break;
2115 case EXT4_FC_TAG_HEAD:
2116 memcpy(&head, val, sizeof(head));
2117 if (le32_to_cpu(head.fc_features) &
2118 ~EXT4_FC_SUPPORTED_FEATURES) {
2119 ret = -EOPNOTSUPP;
2120 break;
2121 }
2122 if (le32_to_cpu(head.fc_tid) != expected_tid) {
2123 ret = JBD2_FC_REPLAY_STOP;
2124 break;
2125 }
2126 state->fc_cur_tag++;
2127 state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2128 EXT4_FC_TAG_BASE_LEN + tl.fc_len);
2129 break;
2130 default:
2131 ret = state->fc_replay_num_tags ?
2132 JBD2_FC_REPLAY_STOP : -ECANCELED;
2133 }
2134 if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
2135 break;
2136 }
2137
2138 out_err:
2139 trace_ext4_fc_replay_scan(sb, ret, off);
2140 return ret;
2141 }
2142
2143 /*
2144 * Main recovery path entry point.
2145 * The meaning of return codes is similar as above.
2146 */
ext4_fc_replay(journal_t * journal,struct buffer_head * bh,enum passtype pass,int off,tid_t expected_tid)2147 static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
2148 enum passtype pass, int off, tid_t expected_tid)
2149 {
2150 struct super_block *sb = journal->j_private;
2151 struct ext4_sb_info *sbi = EXT4_SB(sb);
2152 struct ext4_fc_tl tl;
2153 __u8 *start, *end, *cur, *val;
2154 int ret = JBD2_FC_REPLAY_CONTINUE;
2155 struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2156 struct ext4_fc_tail tail;
2157
2158 if (pass == PASS_SCAN) {
2159 state->fc_current_pass = PASS_SCAN;
2160 return ext4_fc_replay_scan(journal, bh, off, expected_tid);
2161 }
2162
2163 if (state->fc_current_pass != pass) {
2164 state->fc_current_pass = pass;
2165 sbi->s_mount_state |= EXT4_FC_REPLAY;
2166 }
2167 if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
2168 ext4_debug("Replay stops\n");
2169 ext4_fc_set_bitmaps_and_counters(sb);
2170 return 0;
2171 }
2172
2173 #ifdef CONFIG_EXT4_DEBUG
2174 if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
2175 pr_warn("Dropping fc block %d because max_replay set\n", off);
2176 return JBD2_FC_REPLAY_STOP;
2177 }
2178 #endif
2179
2180 start = (u8 *)bh->b_data;
2181 end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2182
2183 for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2184 cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2185 ext4_fc_get_tl(&tl, cur);
2186 val = cur + EXT4_FC_TAG_BASE_LEN;
2187
2188 if (state->fc_replay_num_tags == 0) {
2189 ret = JBD2_FC_REPLAY_STOP;
2190 ext4_fc_set_bitmaps_and_counters(sb);
2191 break;
2192 }
2193
2194 ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag));
2195 state->fc_replay_num_tags--;
2196 switch (tl.fc_tag) {
2197 case EXT4_FC_TAG_LINK:
2198 ret = ext4_fc_replay_link(sb, &tl, val);
2199 break;
2200 case EXT4_FC_TAG_UNLINK:
2201 ret = ext4_fc_replay_unlink(sb, &tl, val);
2202 break;
2203 case EXT4_FC_TAG_ADD_RANGE:
2204 ret = ext4_fc_replay_add_range(sb, &tl, val);
2205 break;
2206 case EXT4_FC_TAG_CREAT:
2207 ret = ext4_fc_replay_create(sb, &tl, val);
2208 break;
2209 case EXT4_FC_TAG_DEL_RANGE:
2210 ret = ext4_fc_replay_del_range(sb, &tl, val);
2211 break;
2212 case EXT4_FC_TAG_INODE:
2213 ret = ext4_fc_replay_inode(sb, &tl, val);
2214 break;
2215 case EXT4_FC_TAG_PAD:
2216 trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2217 tl.fc_len, 0);
2218 break;
2219 case EXT4_FC_TAG_TAIL:
2220 trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL,
2221 0, tl.fc_len, 0);
2222 memcpy(&tail, val, sizeof(tail));
2223 WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
2224 break;
2225 case EXT4_FC_TAG_HEAD:
2226 break;
2227 default:
2228 trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
2229 ret = -ECANCELED;
2230 break;
2231 }
2232 if (ret < 0)
2233 break;
2234 ret = JBD2_FC_REPLAY_CONTINUE;
2235 }
2236 return ret;
2237 }
2238
ext4_fc_init(struct super_block * sb,journal_t * journal)2239 void ext4_fc_init(struct super_block *sb, journal_t *journal)
2240 {
2241 /*
2242 * We set replay callback even if fast commit disabled because we may
2243 * could still have fast commit blocks that need to be replayed even if
2244 * fast commit has now been turned off.
2245 */
2246 journal->j_fc_replay_callback = ext4_fc_replay;
2247 if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
2248 return;
2249 journal->j_fc_cleanup_callback = ext4_fc_cleanup;
2250 }
2251
2252 static const char *fc_ineligible_reasons[] = {
2253 "Extended attributes changed",
2254 "Cross rename",
2255 "Journal flag changed",
2256 "Insufficient memory",
2257 "Swap boot",
2258 "Resize",
2259 "Dir renamed",
2260 "Falloc range op",
2261 "Data journalling",
2262 "FC Commit Failed"
2263 };
2264
ext4_fc_info_show(struct seq_file * seq,void * v)2265 int ext4_fc_info_show(struct seq_file *seq, void *v)
2266 {
2267 struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2268 struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2269 int i;
2270
2271 if (v != SEQ_START_TOKEN)
2272 return 0;
2273
2274 seq_printf(seq,
2275 "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2276 stats->fc_num_commits, stats->fc_ineligible_commits,
2277 stats->fc_numblks,
2278 div_u64(stats->s_fc_avg_commit_time, 1000));
2279 seq_puts(seq, "Ineligible reasons:\n");
2280 for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2281 seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2282 stats->fc_ineligible_reason_count[i]);
2283
2284 return 0;
2285 }
2286
ext4_fc_init_dentry_cache(void)2287 int __init ext4_fc_init_dentry_cache(void)
2288 {
2289 ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2290 SLAB_RECLAIM_ACCOUNT);
2291
2292 if (ext4_fc_dentry_cachep == NULL)
2293 return -ENOMEM;
2294
2295 return 0;
2296 }
2297
ext4_fc_destroy_dentry_cache(void)2298 void ext4_fc_destroy_dentry_cache(void)
2299 {
2300 kmem_cache_destroy(ext4_fc_dentry_cachep);
2301 }
2302