1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/drivers/staging/erofs/unzip_vle.c
4  *
5  * Copyright (C) 2018 HUAWEI, Inc.
6  *             http://www.huawei.com/
7  * Created by Gao Xiang <gaoxiang25@huawei.com>
8  *
9  * This file is subject to the terms and conditions of the GNU General Public
10  * License.  See the file COPYING in the main directory of the Linux
11  * distribution for more details.
12  */
13 #include "unzip_vle.h"
14 #include <linux/prefetch.h>
15 
16 static struct workqueue_struct *z_erofs_workqueue __read_mostly;
17 static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
18 
z_erofs_exit_zip_subsystem(void)19 void z_erofs_exit_zip_subsystem(void)
20 {
21 	BUG_ON(z_erofs_workqueue == NULL);
22 	BUG_ON(z_erofs_workgroup_cachep == NULL);
23 
24 	destroy_workqueue(z_erofs_workqueue);
25 	kmem_cache_destroy(z_erofs_workgroup_cachep);
26 }
27 
init_unzip_workqueue(void)28 static inline int init_unzip_workqueue(void)
29 {
30 	const unsigned onlinecpus = num_possible_cpus();
31 
32 	/*
33 	 * we don't need too many threads, limiting threads
34 	 * could improve scheduling performance.
35 	 */
36 	z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
37 		WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
38 		onlinecpus + onlinecpus / 4);
39 
40 	return z_erofs_workqueue != NULL ? 0 : -ENOMEM;
41 }
42 
z_erofs_init_zip_subsystem(void)43 int z_erofs_init_zip_subsystem(void)
44 {
45 	z_erofs_workgroup_cachep =
46 		kmem_cache_create("erofs_compress",
47 		Z_EROFS_WORKGROUP_SIZE, 0,
48 		SLAB_RECLAIM_ACCOUNT, NULL);
49 
50 	if (z_erofs_workgroup_cachep != NULL) {
51 		if (!init_unzip_workqueue())
52 			return 0;
53 
54 		kmem_cache_destroy(z_erofs_workgroup_cachep);
55 	}
56 	return -ENOMEM;
57 }
58 
59 enum z_erofs_vle_work_role {
60 	Z_EROFS_VLE_WORK_SECONDARY,
61 	Z_EROFS_VLE_WORK_PRIMARY,
62 	/*
63 	 * The current work has at least been linked with the following
64 	 * processed chained works, which means if the processing page
65 	 * is the tail partial page of the work, the current work can
66 	 * safely use the whole page, as illustrated below:
67 	 * +--------------+-------------------------------------------+
68 	 * |  tail page   |      head page (of the previous work)     |
69 	 * +--------------+-------------------------------------------+
70 	 *   /\  which belongs to the current work
71 	 * [  (*) this page can be used for the current work itself.  ]
72 	 */
73 	Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
74 	Z_EROFS_VLE_WORK_MAX
75 };
76 
77 struct z_erofs_vle_work_builder {
78 	enum z_erofs_vle_work_role role;
79 	/*
80 	 * 'hosted = false' means that the current workgroup doesn't belong to
81 	 * the owned chained workgroups. In the other words, it is none of our
82 	 * business to submit this workgroup.
83 	 */
84 	bool hosted;
85 
86 	struct z_erofs_vle_workgroup *grp;
87 	struct z_erofs_vle_work *work;
88 	struct z_erofs_pagevec_ctor vector;
89 
90 	/* pages used for reading the compressed data */
91 	struct page **compressed_pages;
92 	unsigned compressed_deficit;
93 };
94 
95 #define VLE_WORK_BUILDER_INIT()	\
96 	{ .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
97 
98 #ifdef EROFS_FS_HAS_MANAGED_CACHE
99 
grab_managed_cache_pages(struct address_space * mapping,erofs_blk_t start,struct page ** compressed_pages,int clusterblks,bool reserve_allocation)100 static bool grab_managed_cache_pages(struct address_space *mapping,
101 				     erofs_blk_t start,
102 				     struct page **compressed_pages,
103 				     int clusterblks,
104 				     bool reserve_allocation)
105 {
106 	bool noio = true;
107 	unsigned int i;
108 
109 	/* TODO: optimize by introducing find_get_pages_range */
110 	for (i = 0; i < clusterblks; ++i) {
111 		struct page *page, *found;
112 
113 		if (READ_ONCE(compressed_pages[i]) != NULL)
114 			continue;
115 
116 		page = found = find_get_page(mapping, start + i);
117 		if (found == NULL) {
118 			noio = false;
119 			if (!reserve_allocation)
120 				continue;
121 			page = EROFS_UNALLOCATED_CACHED_PAGE;
122 		}
123 
124 		if (NULL == cmpxchg(compressed_pages + i, NULL, page))
125 			continue;
126 
127 		if (found != NULL)
128 			put_page(found);
129 	}
130 	return noio;
131 }
132 
133 /* called by erofs_shrinker to get rid of all compressed_pages */
erofs_try_to_free_all_cached_pages(struct erofs_sb_info * sbi,struct erofs_workgroup * egrp)134 int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
135 				       struct erofs_workgroup *egrp)
136 {
137 	struct z_erofs_vle_workgroup *const grp =
138 		container_of(egrp, struct z_erofs_vle_workgroup, obj);
139 	struct address_space *const mapping = sbi->managed_cache->i_mapping;
140 	const int clusterpages = erofs_clusterpages(sbi);
141 	int i;
142 
143 	/*
144 	 * refcount of workgroup is now freezed as 1,
145 	 * therefore no need to worry about available decompression users.
146 	 */
147 	for (i = 0; i < clusterpages; ++i) {
148 		struct page *page = grp->compressed_pages[i];
149 
150 		if (page == NULL || page->mapping != mapping)
151 			continue;
152 
153 		/* block other users from reclaiming or migrating the page */
154 		if (!trylock_page(page))
155 			return -EBUSY;
156 
157 		/* barrier is implied in the following 'unlock_page' */
158 		WRITE_ONCE(grp->compressed_pages[i], NULL);
159 
160 		set_page_private(page, 0);
161 		ClearPagePrivate(page);
162 
163 		unlock_page(page);
164 		put_page(page);
165 	}
166 	return 0;
167 }
168 
erofs_try_to_free_cached_page(struct address_space * mapping,struct page * page)169 int erofs_try_to_free_cached_page(struct address_space *mapping,
170 				  struct page *page)
171 {
172 	struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
173 	const unsigned int clusterpages = erofs_clusterpages(sbi);
174 
175 	struct z_erofs_vle_workgroup *grp;
176 	int ret = 0;	/* 0 - busy */
177 
178 	/* prevent the workgroup from being freed */
179 	rcu_read_lock();
180 	grp = (void *)page_private(page);
181 
182 	if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
183 		unsigned int i;
184 
185 		for (i = 0; i < clusterpages; ++i) {
186 			if (grp->compressed_pages[i] == page) {
187 				WRITE_ONCE(grp->compressed_pages[i], NULL);
188 				ret = 1;
189 				break;
190 			}
191 		}
192 		erofs_workgroup_unfreeze(&grp->obj, 1);
193 	}
194 	rcu_read_unlock();
195 
196 	if (ret) {
197 		ClearPagePrivate(page);
198 		put_page(page);
199 	}
200 	return ret;
201 }
202 #endif
203 
204 /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
try_to_reuse_as_compressed_page(struct z_erofs_vle_work_builder * b,struct page * page)205 static inline bool try_to_reuse_as_compressed_page(
206 	struct z_erofs_vle_work_builder *b,
207 	struct page *page)
208 {
209 	while (b->compressed_deficit) {
210 		--b->compressed_deficit;
211 		if (NULL == cmpxchg(b->compressed_pages++, NULL, page))
212 			return true;
213 	}
214 
215 	return false;
216 }
217 
218 /* callers must be with work->lock held */
z_erofs_vle_work_add_page(struct z_erofs_vle_work_builder * builder,struct page * page,enum z_erofs_page_type type)219 static int z_erofs_vle_work_add_page(
220 	struct z_erofs_vle_work_builder *builder,
221 	struct page *page,
222 	enum z_erofs_page_type type)
223 {
224 	int ret;
225 	bool occupied;
226 
227 	/* give priority for the compressed data storage */
228 	if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
229 		type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
230 		try_to_reuse_as_compressed_page(builder, page))
231 		return 0;
232 
233 	ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
234 		page, type, &occupied);
235 	builder->work->vcnt += (unsigned)ret;
236 
237 	return ret ? 0 : -EAGAIN;
238 }
239 
try_to_claim_workgroup(struct z_erofs_vle_workgroup * grp,z_erofs_vle_owned_workgrp_t * owned_head,bool * hosted)240 static inline bool try_to_claim_workgroup(
241 	struct z_erofs_vle_workgroup *grp,
242 	z_erofs_vle_owned_workgrp_t *owned_head,
243 	bool *hosted)
244 {
245 	DBG_BUGON(*hosted == true);
246 
247 	/* let's claim these following types of workgroup */
248 retry:
249 	if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
250 		/* type 1, nil workgroup */
251 		if (Z_EROFS_VLE_WORKGRP_NIL != cmpxchg(&grp->next,
252 			Z_EROFS_VLE_WORKGRP_NIL, *owned_head))
253 			goto retry;
254 
255 		*owned_head = grp;
256 		*hosted = true;
257 	} else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
258 		/*
259 		 * type 2, link to the end of a existing open chain,
260 		 * be careful that its submission itself is governed
261 		 * by the original owned chain.
262 		 */
263 		if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next,
264 			Z_EROFS_VLE_WORKGRP_TAIL, *owned_head))
265 			goto retry;
266 
267 		*owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
268 	} else
269 		return false;	/* :( better luck next time */
270 
271 	return true;	/* lucky, I am the followee :) */
272 }
273 
274 static struct z_erofs_vle_work *
z_erofs_vle_work_lookup(struct super_block * sb,pgoff_t idx,unsigned pageofs,struct z_erofs_vle_workgroup ** grp_ret,enum z_erofs_vle_work_role * role,z_erofs_vle_owned_workgrp_t * owned_head,bool * hosted)275 z_erofs_vle_work_lookup(struct super_block *sb,
276 			pgoff_t idx, unsigned pageofs,
277 			struct z_erofs_vle_workgroup **grp_ret,
278 			enum z_erofs_vle_work_role *role,
279 			z_erofs_vle_owned_workgrp_t *owned_head,
280 			bool *hosted)
281 {
282 	bool tag, primary;
283 	struct erofs_workgroup *egrp;
284 	struct z_erofs_vle_workgroup *grp;
285 	struct z_erofs_vle_work *work;
286 
287 	egrp = erofs_find_workgroup(sb, idx, &tag);
288 	if (egrp == NULL) {
289 		*grp_ret = NULL;
290 		return NULL;
291 	}
292 
293 	*grp_ret = grp = container_of(egrp,
294 		struct z_erofs_vle_workgroup, obj);
295 
296 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
297 	work = z_erofs_vle_grab_work(grp, pageofs);
298 	primary = true;
299 #else
300 	BUG();
301 #endif
302 
303 	DBG_BUGON(work->pageofs != pageofs);
304 
305 	/*
306 	 * lock must be taken first to avoid grp->next == NIL between
307 	 * claiming workgroup and adding pages:
308 	 *                        grp->next != NIL
309 	 *   grp->next = NIL
310 	 *   mutex_unlock_all
311 	 *                        mutex_lock(&work->lock)
312 	 *                        add all pages to pagevec
313 	 *
314 	 * [correct locking case 1]:
315 	 *   mutex_lock(grp->work[a])
316 	 *   ...
317 	 *   mutex_lock(grp->work[b])     mutex_lock(grp->work[c])
318 	 *   ...                          *role = SECONDARY
319 	 *                                add all pages to pagevec
320 	 *                                ...
321 	 *                                mutex_unlock(grp->work[c])
322 	 *   mutex_lock(grp->work[c])
323 	 *   ...
324 	 *   grp->next = NIL
325 	 *   mutex_unlock_all
326 	 *
327 	 * [correct locking case 2]:
328 	 *   mutex_lock(grp->work[b])
329 	 *   ...
330 	 *   mutex_lock(grp->work[a])
331 	 *   ...
332 	 *   mutex_lock(grp->work[c])
333 	 *   ...
334 	 *   grp->next = NIL
335 	 *   mutex_unlock_all
336 	 *                                mutex_lock(grp->work[a])
337 	 *                                *role = PRIMARY_OWNER
338 	 *                                add all pages to pagevec
339 	 *                                ...
340 	 */
341 	mutex_lock(&work->lock);
342 
343 	*hosted = false;
344 	if (!primary)
345 		*role = Z_EROFS_VLE_WORK_SECONDARY;
346 	/* claim the workgroup if possible */
347 	else if (try_to_claim_workgroup(grp, owned_head, hosted))
348 		*role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
349 	else
350 		*role = Z_EROFS_VLE_WORK_PRIMARY;
351 
352 	return work;
353 }
354 
355 static struct z_erofs_vle_work *
z_erofs_vle_work_register(struct super_block * sb,struct z_erofs_vle_workgroup ** grp_ret,struct erofs_map_blocks * map,pgoff_t index,unsigned pageofs,enum z_erofs_vle_work_role * role,z_erofs_vle_owned_workgrp_t * owned_head,bool * hosted)356 z_erofs_vle_work_register(struct super_block *sb,
357 			  struct z_erofs_vle_workgroup **grp_ret,
358 			  struct erofs_map_blocks *map,
359 			  pgoff_t index, unsigned pageofs,
360 			  enum z_erofs_vle_work_role *role,
361 			  z_erofs_vle_owned_workgrp_t *owned_head,
362 			  bool *hosted)
363 {
364 	bool newgrp = false;
365 	struct z_erofs_vle_workgroup *grp = *grp_ret;
366 	struct z_erofs_vle_work *work;
367 
368 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
369 	BUG_ON(grp != NULL);
370 #else
371 	if (grp != NULL)
372 		goto skip;
373 #endif
374 	/* no available workgroup, let's allocate one */
375 	grp = kmem_cache_zalloc(z_erofs_workgroup_cachep, GFP_NOFS);
376 	if (unlikely(grp == NULL))
377 		return ERR_PTR(-ENOMEM);
378 
379 	grp->obj.index = index;
380 	grp->llen = map->m_llen;
381 
382 	z_erofs_vle_set_workgrp_fmt(grp,
383 		(map->m_flags & EROFS_MAP_ZIPPED) ?
384 			Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
385 			Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
386 	atomic_set(&grp->obj.refcount, 1);
387 
388 	/* new workgrps have been claimed as type 1 */
389 	WRITE_ONCE(grp->next, *owned_head);
390 	/* primary and followed work for all new workgrps */
391 	*role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
392 	/* it should be submitted by ourselves */
393 	*hosted = true;
394 
395 	newgrp = true;
396 #ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
397 skip:
398 	/* currently unimplemented */
399 	BUG();
400 #else
401 	work = z_erofs_vle_grab_primary_work(grp);
402 #endif
403 	work->pageofs = pageofs;
404 
405 	mutex_init(&work->lock);
406 
407 	if (newgrp) {
408 		int err = erofs_register_workgroup(sb, &grp->obj, 0);
409 
410 		if (err) {
411 			kmem_cache_free(z_erofs_workgroup_cachep, grp);
412 			return ERR_PTR(-EAGAIN);
413 		}
414 	}
415 
416 	*owned_head = *grp_ret = grp;
417 
418 	mutex_lock(&work->lock);
419 	return work;
420 }
421 
__update_workgrp_llen(struct z_erofs_vle_workgroup * grp,unsigned int llen)422 static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp,
423 					 unsigned int llen)
424 {
425 	while (1) {
426 		unsigned int orig_llen = grp->llen;
427 
428 		if (orig_llen >= llen || orig_llen ==
429 			cmpxchg(&grp->llen, orig_llen, llen))
430 			break;
431 	}
432 }
433 
434 #define builder_is_followed(builder) \
435 	((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
436 
z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder * builder,struct super_block * sb,struct erofs_map_blocks * map,z_erofs_vle_owned_workgrp_t * owned_head)437 static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
438 				       struct super_block *sb,
439 				       struct erofs_map_blocks *map,
440 				       z_erofs_vle_owned_workgrp_t *owned_head)
441 {
442 	const unsigned clusterpages = erofs_clusterpages(EROFS_SB(sb));
443 	const erofs_blk_t index = erofs_blknr(map->m_pa);
444 	const unsigned pageofs = map->m_la & ~PAGE_MASK;
445 	struct z_erofs_vle_workgroup *grp;
446 	struct z_erofs_vle_work *work;
447 
448 	DBG_BUGON(builder->work != NULL);
449 
450 	/* must be Z_EROFS_WORK_TAIL or the next chained work */
451 	DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
452 	DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
453 
454 	DBG_BUGON(erofs_blkoff(map->m_pa));
455 
456 repeat:
457 	work = z_erofs_vle_work_lookup(sb, index,
458 		pageofs, &grp, &builder->role, owned_head, &builder->hosted);
459 	if (work != NULL) {
460 		__update_workgrp_llen(grp, map->m_llen);
461 		goto got_it;
462 	}
463 
464 	work = z_erofs_vle_work_register(sb, &grp, map, index, pageofs,
465 		&builder->role, owned_head, &builder->hosted);
466 
467 	if (unlikely(work == ERR_PTR(-EAGAIN)))
468 		goto repeat;
469 
470 	if (unlikely(IS_ERR(work)))
471 		return PTR_ERR(work);
472 got_it:
473 	z_erofs_pagevec_ctor_init(&builder->vector,
474 		Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, work->vcnt);
475 
476 	if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
477 		/* enable possibly in-place decompression */
478 		builder->compressed_pages = grp->compressed_pages;
479 		builder->compressed_deficit = clusterpages;
480 	} else {
481 		builder->compressed_pages = NULL;
482 		builder->compressed_deficit = 0;
483 	}
484 
485 	builder->grp = grp;
486 	builder->work = work;
487 	return 0;
488 }
489 
490 /*
491  * keep in mind that no referenced workgroups will be freed
492  * only after a RCU grace period, so rcu_read_lock() could
493  * prevent a workgroup from being freed.
494  */
z_erofs_rcu_callback(struct rcu_head * head)495 static void z_erofs_rcu_callback(struct rcu_head *head)
496 {
497 	struct z_erofs_vle_work *work =	container_of(head,
498 		struct z_erofs_vle_work, rcu);
499 	struct z_erofs_vle_workgroup *grp =
500 		z_erofs_vle_work_workgroup(work, true);
501 
502 	kmem_cache_free(z_erofs_workgroup_cachep, grp);
503 }
504 
erofs_workgroup_free_rcu(struct erofs_workgroup * grp)505 void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
506 {
507 	struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
508 		struct z_erofs_vle_workgroup, obj);
509 	struct z_erofs_vle_work *const work = &vgrp->work;
510 
511 	call_rcu(&work->rcu, z_erofs_rcu_callback);
512 }
513 
__z_erofs_vle_work_release(struct z_erofs_vle_workgroup * grp,struct z_erofs_vle_work * work __maybe_unused)514 static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
515 	struct z_erofs_vle_work *work __maybe_unused)
516 {
517 	erofs_workgroup_put(&grp->obj);
518 }
519 
z_erofs_vle_work_release(struct z_erofs_vle_work * work)520 void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
521 {
522 	struct z_erofs_vle_workgroup *grp =
523 		z_erofs_vle_work_workgroup(work, true);
524 
525 	__z_erofs_vle_work_release(grp, work);
526 }
527 
528 static inline bool
z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder * builder)529 z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
530 {
531 	struct z_erofs_vle_work *work = builder->work;
532 
533 	if (work == NULL)
534 		return false;
535 
536 	z_erofs_pagevec_ctor_exit(&builder->vector, false);
537 	mutex_unlock(&work->lock);
538 
539 	/*
540 	 * if all pending pages are added, don't hold work reference
541 	 * any longer if the current work isn't hosted by ourselves.
542 	 */
543 	if (!builder->hosted)
544 		__z_erofs_vle_work_release(builder->grp, work);
545 
546 	builder->work = NULL;
547 	builder->grp = NULL;
548 	return true;
549 }
550 
__stagingpage_alloc(struct list_head * pagepool,gfp_t gfp)551 static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
552 					       gfp_t gfp)
553 {
554 	struct page *page = erofs_allocpage(pagepool, gfp);
555 
556 	if (unlikely(page == NULL))
557 		return NULL;
558 
559 	page->mapping = Z_EROFS_MAPPING_STAGING;
560 	return page;
561 }
562 
563 struct z_erofs_vle_frontend {
564 	struct inode *const inode;
565 
566 	struct z_erofs_vle_work_builder builder;
567 	struct erofs_map_blocks_iter m_iter;
568 
569 	z_erofs_vle_owned_workgrp_t owned_head;
570 
571 	bool initial;
572 #if (EROFS_FS_ZIP_CACHE_LVL >= 2)
573 	erofs_off_t cachedzone_la;
574 #endif
575 };
576 
577 #define VLE_FRONTEND_INIT(__i) { \
578 	.inode = __i, \
579 	.m_iter = { \
580 		{ .m_llen = 0, .m_plen = 0 }, \
581 		.mpage = NULL \
582 	}, \
583 	.builder = VLE_WORK_BUILDER_INIT(), \
584 	.owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
585 	.initial = true, }
586 
z_erofs_do_read_page(struct z_erofs_vle_frontend * fe,struct page * page,struct list_head * page_pool)587 static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
588 				struct page *page,
589 				struct list_head *page_pool)
590 {
591 	struct super_block *const sb = fe->inode->i_sb;
592 	struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
593 	struct erofs_map_blocks_iter *const m = &fe->m_iter;
594 	struct erofs_map_blocks *const map = &m->map;
595 	struct z_erofs_vle_work_builder *const builder = &fe->builder;
596 	const loff_t offset = page_offset(page);
597 
598 	bool tight = builder_is_followed(builder);
599 	struct z_erofs_vle_work *work = builder->work;
600 
601 #ifdef EROFS_FS_HAS_MANAGED_CACHE
602 	struct address_space *const mngda = sbi->managed_cache->i_mapping;
603 	struct z_erofs_vle_workgroup *grp;
604 	bool noio_outoforder;
605 #endif
606 
607 	enum z_erofs_page_type page_type;
608 	unsigned cur, end, spiltted, index;
609 	int err;
610 
611 	/* register locked file pages as online pages in pack */
612 	z_erofs_onlinepage_init(page);
613 
614 	spiltted = 0;
615 	end = PAGE_SIZE;
616 repeat:
617 	cur = end - 1;
618 
619 	/* lucky, within the range of the current map_blocks */
620 	if (offset + cur >= map->m_la &&
621 		offset + cur < map->m_la + map->m_llen)
622 		goto hitted;
623 
624 	/* go ahead the next map_blocks */
625 	debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
626 
627 	if (!z_erofs_vle_work_iter_end(builder))
628 		fe->initial = false;
629 
630 	map->m_la = offset + cur;
631 	map->m_llen = 0;
632 	err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0);
633 	if (unlikely(err))
634 		goto err_out;
635 
636 	/* deal with hole (FIXME! broken now) */
637 	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
638 		goto hitted;
639 
640 	DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
641 	BUG_ON(erofs_blkoff(map->m_pa));
642 
643 	err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
644 	if (unlikely(err))
645 		goto err_out;
646 
647 #ifdef EROFS_FS_HAS_MANAGED_CACHE
648 	grp = fe->builder.grp;
649 
650 	/* let's do out-of-order decompression for noio */
651 	noio_outoforder = grab_managed_cache_pages(mngda,
652 		erofs_blknr(map->m_pa),
653 		grp->compressed_pages, erofs_blknr(map->m_plen),
654 		/* compressed page caching selection strategy */
655 		fe->initial | (EROFS_FS_ZIP_CACHE_LVL >= 2 ?
656 			map->m_la < fe->cachedzone_la : 0));
657 
658 	if (noio_outoforder && builder_is_followed(builder))
659 		builder->role = Z_EROFS_VLE_WORK_PRIMARY;
660 #endif
661 
662 	tight &= builder_is_followed(builder);
663 	work = builder->work;
664 hitted:
665 	cur = end - min_t(unsigned, offset + end - map->m_la, end);
666 	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
667 		zero_user_segment(page, cur, end);
668 		goto next_part;
669 	}
670 
671 	/* let's derive page type */
672 	page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
673 		(!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
674 			(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
675 				Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
676 
677 retry:
678 	err = z_erofs_vle_work_add_page(builder, page, page_type);
679 	/* should allocate an additional staging page for pagevec */
680 	if (err == -EAGAIN) {
681 		struct page *const newpage =
682 			__stagingpage_alloc(page_pool, GFP_NOFS);
683 
684 		err = z_erofs_vle_work_add_page(builder,
685 			newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE);
686 		if (!err)
687 			goto retry;
688 	}
689 
690 	if (unlikely(err))
691 		goto err_out;
692 
693 	index = page->index - map->m_la / PAGE_SIZE;
694 
695 	/* FIXME! avoid the last relundant fixup & endio */
696 	z_erofs_onlinepage_fixup(page, index, true);
697 	++spiltted;
698 
699 	/* also update nr_pages and increase queued_pages */
700 	work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
701 next_part:
702 	/* can be used for verification */
703 	map->m_llen = offset + cur - map->m_la;
704 
705 	end = cur;
706 	if (end > 0)
707 		goto repeat;
708 
709 	/* FIXME! avoid the last relundant fixup & endio */
710 	z_erofs_onlinepage_endio(page);
711 
712 	debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
713 		__func__, page, spiltted, map->m_llen);
714 	return 0;
715 
716 err_out:
717 	/* TODO: the missing error handing cases */
718 	return err;
719 }
720 
z_erofs_vle_unzip_kickoff(void * ptr,int bios)721 static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
722 {
723 	tagptr1_t t = tagptr_init(tagptr1_t, ptr);
724 	struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
725 	bool background = tagptr_unfold_tags(t);
726 
727 	if (atomic_add_return(bios, &io->pending_bios))
728 		return;
729 
730 	if (background)
731 		queue_work(z_erofs_workqueue, &io->u.work);
732 	else
733 		wake_up(&io->u.wait);
734 }
735 
z_erofs_vle_read_endio(struct bio * bio)736 static inline void z_erofs_vle_read_endio(struct bio *bio)
737 {
738 	const blk_status_t err = bio->bi_status;
739 	unsigned i;
740 	struct bio_vec *bvec;
741 #ifdef EROFS_FS_HAS_MANAGED_CACHE
742 	struct address_space *mngda = NULL;
743 #endif
744 
745 	bio_for_each_segment_all(bvec, bio, i) {
746 		struct page *page = bvec->bv_page;
747 		bool cachemngd = false;
748 
749 		DBG_BUGON(PageUptodate(page));
750 		BUG_ON(page->mapping == NULL);
751 
752 #ifdef EROFS_FS_HAS_MANAGED_CACHE
753 		if (unlikely(mngda == NULL && !z_erofs_is_stagingpage(page))) {
754 			struct inode *const inode = page->mapping->host;
755 			struct super_block *const sb = inode->i_sb;
756 
757 			mngda = EROFS_SB(sb)->managed_cache->i_mapping;
758 		}
759 
760 		/*
761 		 * If mngda has not gotten, it equals NULL,
762 		 * however, page->mapping never be NULL if working properly.
763 		 */
764 		cachemngd = (page->mapping == mngda);
765 #endif
766 
767 		if (unlikely(err))
768 			SetPageError(page);
769 		else if (cachemngd)
770 			SetPageUptodate(page);
771 
772 		if (cachemngd)
773 			unlock_page(page);
774 	}
775 
776 	z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
777 	bio_put(bio);
778 }
779 
780 static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
781 static DEFINE_MUTEX(z_pagemap_global_lock);
782 
z_erofs_vle_unzip(struct super_block * sb,struct z_erofs_vle_workgroup * grp,struct list_head * page_pool)783 static int z_erofs_vle_unzip(struct super_block *sb,
784 	struct z_erofs_vle_workgroup *grp,
785 	struct list_head *page_pool)
786 {
787 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
788 #ifdef EROFS_FS_HAS_MANAGED_CACHE
789 	struct address_space *const mngda = sbi->managed_cache->i_mapping;
790 #endif
791 	const unsigned clusterpages = erofs_clusterpages(sbi);
792 
793 	struct z_erofs_pagevec_ctor ctor;
794 	unsigned nr_pages;
795 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
796 	unsigned sparsemem_pages = 0;
797 #endif
798 	struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
799 	struct page **pages, **compressed_pages, *page;
800 	unsigned i, llen;
801 
802 	enum z_erofs_page_type page_type;
803 	bool overlapped;
804 	struct z_erofs_vle_work *work;
805 	void *vout;
806 	int err;
807 
808 	might_sleep();
809 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
810 	work = z_erofs_vle_grab_primary_work(grp);
811 #else
812 	BUG();
813 #endif
814 	BUG_ON(!READ_ONCE(work->nr_pages));
815 
816 	mutex_lock(&work->lock);
817 	nr_pages = work->nr_pages;
818 
819 	if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
820 		pages = pages_onstack;
821 	else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
822 		mutex_trylock(&z_pagemap_global_lock))
823 		pages = z_pagemap_global;
824 	else {
825 repeat:
826 		pages = kvmalloc_array(nr_pages,
827 			sizeof(struct page *), GFP_KERNEL);
828 
829 		/* fallback to global pagemap for the lowmem scenario */
830 		if (unlikely(pages == NULL)) {
831 			if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
832 				goto repeat;
833 			else {
834 				mutex_lock(&z_pagemap_global_lock);
835 				pages = z_pagemap_global;
836 			}
837 		}
838 	}
839 
840 	for (i = 0; i < nr_pages; ++i)
841 		pages[i] = NULL;
842 
843 	z_erofs_pagevec_ctor_init(&ctor,
844 		Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, 0);
845 
846 	for (i = 0; i < work->vcnt; ++i) {
847 		unsigned pagenr;
848 
849 		page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
850 
851 		/* all pages in pagevec ought to be valid */
852 		DBG_BUGON(page == NULL);
853 		DBG_BUGON(page->mapping == NULL);
854 
855 		if (z_erofs_gather_if_stagingpage(page_pool, page))
856 			continue;
857 
858 		if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
859 			pagenr = 0;
860 		else
861 			pagenr = z_erofs_onlinepage_index(page);
862 
863 		BUG_ON(pagenr >= nr_pages);
864 
865 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
866 		BUG_ON(pages[pagenr] != NULL);
867 		++sparsemem_pages;
868 #endif
869 		pages[pagenr] = page;
870 	}
871 
872 	z_erofs_pagevec_ctor_exit(&ctor, true);
873 
874 	overlapped = false;
875 	compressed_pages = grp->compressed_pages;
876 
877 	for (i = 0; i < clusterpages; ++i) {
878 		unsigned pagenr;
879 
880 		page = compressed_pages[i];
881 
882 		/* all compressed pages ought to be valid */
883 		DBG_BUGON(page == NULL);
884 		DBG_BUGON(page->mapping == NULL);
885 
886 		if (z_erofs_is_stagingpage(page))
887 			continue;
888 #ifdef EROFS_FS_HAS_MANAGED_CACHE
889 		else if (page->mapping == mngda) {
890 			BUG_ON(PageLocked(page));
891 			BUG_ON(!PageUptodate(page));
892 			continue;
893 		}
894 #endif
895 
896 		/* only non-head page could be reused as a compressed page */
897 		pagenr = z_erofs_onlinepage_index(page);
898 
899 		BUG_ON(pagenr >= nr_pages);
900 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
901 		BUG_ON(pages[pagenr] != NULL);
902 		++sparsemem_pages;
903 #endif
904 		pages[pagenr] = page;
905 
906 		overlapped = true;
907 	}
908 
909 	llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
910 
911 	if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
912 		/* FIXME! this should be fixed in the future */
913 		BUG_ON(grp->llen != llen);
914 
915 		err = z_erofs_vle_plain_copy(compressed_pages, clusterpages,
916 			pages, nr_pages, work->pageofs);
917 		goto out;
918 	}
919 
920 	if (llen > grp->llen)
921 		llen = grp->llen;
922 
923 	err = z_erofs_vle_unzip_fast_percpu(compressed_pages,
924 		clusterpages, pages, llen, work->pageofs,
925 		z_erofs_onlinepage_endio);
926 	if (err != -ENOTSUPP)
927 		goto out_percpu;
928 
929 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
930 	if (sparsemem_pages >= nr_pages) {
931 		BUG_ON(sparsemem_pages > nr_pages);
932 		goto skip_allocpage;
933 	}
934 #endif
935 
936 	for (i = 0; i < nr_pages; ++i) {
937 		if (pages[i] != NULL)
938 			continue;
939 
940 		pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS);
941 	}
942 
943 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
944 skip_allocpage:
945 #endif
946 	vout = erofs_vmap(pages, nr_pages);
947 
948 	err = z_erofs_vle_unzip_vmap(compressed_pages,
949 		clusterpages, vout, llen, work->pageofs, overlapped);
950 
951 	erofs_vunmap(vout, nr_pages);
952 
953 out:
954 	for (i = 0; i < nr_pages; ++i) {
955 		page = pages[i];
956 		DBG_BUGON(page->mapping == NULL);
957 
958 		/* recycle all individual staging pages */
959 		if (z_erofs_gather_if_stagingpage(page_pool, page))
960 			continue;
961 
962 		if (unlikely(err < 0))
963 			SetPageError(page);
964 
965 		z_erofs_onlinepage_endio(page);
966 	}
967 
968 out_percpu:
969 	for (i = 0; i < clusterpages; ++i) {
970 		page = compressed_pages[i];
971 
972 #ifdef EROFS_FS_HAS_MANAGED_CACHE
973 		if (page->mapping == mngda)
974 			continue;
975 #endif
976 		/* recycle all individual staging pages */
977 		(void)z_erofs_gather_if_stagingpage(page_pool, page);
978 
979 		WRITE_ONCE(compressed_pages[i], NULL);
980 	}
981 
982 	if (pages == z_pagemap_global)
983 		mutex_unlock(&z_pagemap_global_lock);
984 	else if (unlikely(pages != pages_onstack))
985 		kvfree(pages);
986 
987 	work->nr_pages = 0;
988 	work->vcnt = 0;
989 
990 	/* all work locks MUST be taken before the following line */
991 
992 	WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
993 
994 	/* all work locks SHOULD be released right now */
995 	mutex_unlock(&work->lock);
996 
997 	z_erofs_vle_work_release(work);
998 	return err;
999 }
1000 
z_erofs_vle_unzip_all(struct super_block * sb,struct z_erofs_vle_unzip_io * io,struct list_head * page_pool)1001 static void z_erofs_vle_unzip_all(struct super_block *sb,
1002 				  struct z_erofs_vle_unzip_io *io,
1003 				  struct list_head *page_pool)
1004 {
1005 	z_erofs_vle_owned_workgrp_t owned = io->head;
1006 
1007 	while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
1008 		struct z_erofs_vle_workgroup *grp;
1009 
1010 		/* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
1011 		DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
1012 
1013 		/* no possible that 'owned' equals NULL */
1014 		DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
1015 
1016 		grp = owned;
1017 		owned = READ_ONCE(grp->next);
1018 
1019 		z_erofs_vle_unzip(sb, grp, page_pool);
1020 	}
1021 }
1022 
z_erofs_vle_unzip_wq(struct work_struct * work)1023 static void z_erofs_vle_unzip_wq(struct work_struct *work)
1024 {
1025 	struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
1026 		struct z_erofs_vle_unzip_io_sb, io.u.work);
1027 	LIST_HEAD(page_pool);
1028 
1029 	BUG_ON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1030 	z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
1031 
1032 	put_pages_list(&page_pool);
1033 	kvfree(iosb);
1034 }
1035 
1036 static inline struct z_erofs_vle_unzip_io *
prepare_io_handler(struct super_block * sb,struct z_erofs_vle_unzip_io * io,bool background)1037 prepare_io_handler(struct super_block *sb,
1038 		   struct z_erofs_vle_unzip_io *io,
1039 		   bool background)
1040 {
1041 	struct z_erofs_vle_unzip_io_sb *iosb;
1042 
1043 	if (!background) {
1044 		/* waitqueue available for foreground io */
1045 		BUG_ON(io == NULL);
1046 
1047 		init_waitqueue_head(&io->u.wait);
1048 		atomic_set(&io->pending_bios, 0);
1049 		goto out;
1050 	}
1051 
1052 	if (io != NULL)
1053 		BUG();
1054 	else {
1055 		/* allocate extra io descriptor for background io */
1056 		iosb = kvzalloc(sizeof(struct z_erofs_vle_unzip_io_sb),
1057 			GFP_KERNEL | __GFP_NOFAIL);
1058 		BUG_ON(iosb == NULL);
1059 
1060 		io = &iosb->io;
1061 	}
1062 
1063 	iosb->sb = sb;
1064 	INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
1065 out:
1066 	io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
1067 	return io;
1068 }
1069 
1070 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1071 /* true - unlocked (noio), false - locked (need submit io) */
recover_managed_page(struct z_erofs_vle_workgroup * grp,struct page * page)1072 static inline bool recover_managed_page(struct z_erofs_vle_workgroup *grp,
1073 					struct page *page)
1074 {
1075 	wait_on_page_locked(page);
1076 	if (PagePrivate(page) && PageUptodate(page))
1077 		return true;
1078 
1079 	lock_page(page);
1080 	if (unlikely(!PagePrivate(page))) {
1081 		set_page_private(page, (unsigned long)grp);
1082 		SetPagePrivate(page);
1083 	}
1084 	if (unlikely(PageUptodate(page))) {
1085 		unlock_page(page);
1086 		return true;
1087 	}
1088 	return false;
1089 }
1090 
1091 #define __FSIO_1 1
1092 #else
1093 #define __FSIO_1 0
1094 #endif
1095 
z_erofs_vle_submit_all(struct super_block * sb,z_erofs_vle_owned_workgrp_t owned_head,struct list_head * pagepool,struct z_erofs_vle_unzip_io * fg_io,bool force_fg)1096 static bool z_erofs_vle_submit_all(struct super_block *sb,
1097 				   z_erofs_vle_owned_workgrp_t owned_head,
1098 				   struct list_head *pagepool,
1099 				   struct z_erofs_vle_unzip_io *fg_io,
1100 				   bool force_fg)
1101 {
1102 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
1103 	const unsigned clusterpages = erofs_clusterpages(sbi);
1104 	const gfp_t gfp = GFP_NOFS;
1105 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1106 	struct address_space *const mngda = sbi->managed_cache->i_mapping;
1107 	struct z_erofs_vle_workgroup *lstgrp_noio = NULL, *lstgrp_io = NULL;
1108 #endif
1109 	struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1];
1110 	struct bio *bio;
1111 	tagptr1_t bi_private;
1112 	/* since bio will be NULL, no need to initialize last_index */
1113 	pgoff_t uninitialized_var(last_index);
1114 	bool force_submit = false;
1115 	unsigned nr_bios;
1116 
1117 	if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
1118 		return false;
1119 
1120 	/*
1121 	 * force_fg == 1, (io, fg_io[0]) no io, (io, fg_io[1]) need submit io
1122 	 * force_fg == 0, (io, fg_io[0]) no io; (io[1], bg_io) need submit io
1123 	 */
1124 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1125 	ios[0] = prepare_io_handler(sb, fg_io + 0, false);
1126 #endif
1127 
1128 	if (force_fg) {
1129 		ios[__FSIO_1] = prepare_io_handler(sb, fg_io + __FSIO_1, false);
1130 		bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 0);
1131 	} else {
1132 		ios[__FSIO_1] = prepare_io_handler(sb, NULL, true);
1133 		bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 1);
1134 	}
1135 
1136 	nr_bios = 0;
1137 	force_submit = false;
1138 	bio = NULL;
1139 
1140 	/* by default, all need io submission */
1141 	ios[__FSIO_1]->head = owned_head;
1142 
1143 	do {
1144 		struct z_erofs_vle_workgroup *grp;
1145 		struct page **compressed_pages, *oldpage, *page;
1146 		pgoff_t first_index;
1147 		unsigned i = 0;
1148 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1149 		unsigned int noio = 0;
1150 		bool cachemngd;
1151 #endif
1152 		int err;
1153 
1154 		/* no possible 'owned_head' equals the following */
1155 		DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1156 		DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
1157 
1158 		grp = owned_head;
1159 
1160 		/* close the main owned chain at first */
1161 		owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
1162 			Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1163 
1164 		first_index = grp->obj.index;
1165 		compressed_pages = grp->compressed_pages;
1166 
1167 		force_submit |= (first_index != last_index + 1);
1168 repeat:
1169 		/* fulfill all compressed pages */
1170 		oldpage = page = READ_ONCE(compressed_pages[i]);
1171 
1172 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1173 		cachemngd = false;
1174 
1175 		if (page == EROFS_UNALLOCATED_CACHED_PAGE) {
1176 			cachemngd = true;
1177 			goto do_allocpage;
1178 		} else if (page != NULL) {
1179 			if (page->mapping != mngda)
1180 				BUG_ON(PageUptodate(page));
1181 			else if (recover_managed_page(grp, page)) {
1182 				/* page is uptodate, skip io submission */
1183 				force_submit = true;
1184 				++noio;
1185 				goto skippage;
1186 			}
1187 		} else {
1188 do_allocpage:
1189 #else
1190 		if (page != NULL)
1191 			BUG_ON(PageUptodate(page));
1192 		else {
1193 #endif
1194 			page = __stagingpage_alloc(pagepool, gfp);
1195 
1196 			if (oldpage != cmpxchg(compressed_pages + i,
1197 				oldpage, page)) {
1198 				list_add(&page->lru, pagepool);
1199 				goto repeat;
1200 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1201 			} else if (cachemngd && !add_to_page_cache_lru(page,
1202 				mngda, first_index + i, gfp)) {
1203 				set_page_private(page, (unsigned long)grp);
1204 				SetPagePrivate(page);
1205 #endif
1206 			}
1207 		}
1208 
1209 		if (bio != NULL && force_submit) {
1210 submit_bio_retry:
1211 			__submit_bio(bio, REQ_OP_READ, 0);
1212 			bio = NULL;
1213 		}
1214 
1215 		if (bio == NULL) {
1216 			bio = prepare_bio(sb, first_index + i,
1217 				BIO_MAX_PAGES, z_erofs_vle_read_endio);
1218 			bio->bi_private = tagptr_cast_ptr(bi_private);
1219 
1220 			++nr_bios;
1221 		}
1222 
1223 		err = bio_add_page(bio, page, PAGE_SIZE, 0);
1224 		if (err < PAGE_SIZE)
1225 			goto submit_bio_retry;
1226 
1227 		force_submit = false;
1228 		last_index = first_index + i;
1229 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1230 skippage:
1231 #endif
1232 		if (++i < clusterpages)
1233 			goto repeat;
1234 
1235 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1236 		if (noio < clusterpages) {
1237 			lstgrp_io = grp;
1238 		} else {
1239 			z_erofs_vle_owned_workgrp_t iogrp_next =
1240 				owned_head == Z_EROFS_VLE_WORKGRP_TAIL ?
1241 				Z_EROFS_VLE_WORKGRP_TAIL_CLOSED :
1242 				owned_head;
1243 
1244 			if (lstgrp_io == NULL)
1245 				ios[1]->head = iogrp_next;
1246 			else
1247 				WRITE_ONCE(lstgrp_io->next, iogrp_next);
1248 
1249 			if (lstgrp_noio == NULL)
1250 				ios[0]->head = grp;
1251 			else
1252 				WRITE_ONCE(lstgrp_noio->next, grp);
1253 
1254 			lstgrp_noio = grp;
1255 		}
1256 #endif
1257 	} while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
1258 
1259 	if (bio != NULL)
1260 		__submit_bio(bio, REQ_OP_READ, 0);
1261 
1262 #ifndef EROFS_FS_HAS_MANAGED_CACHE
1263 	BUG_ON(!nr_bios);
1264 #else
1265 	if (lstgrp_noio != NULL)
1266 		WRITE_ONCE(lstgrp_noio->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1267 
1268 	if (!force_fg && !nr_bios) {
1269 		kvfree(container_of(ios[1],
1270 			struct z_erofs_vle_unzip_io_sb, io));
1271 		return true;
1272 	}
1273 #endif
1274 
1275 	z_erofs_vle_unzip_kickoff(tagptr_cast_ptr(bi_private), nr_bios);
1276 	return true;
1277 }
1278 
1279 static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
1280 				     struct list_head *pagepool,
1281 				     bool force_fg)
1282 {
1283 	struct super_block *sb = f->inode->i_sb;
1284 	struct z_erofs_vle_unzip_io io[1 + __FSIO_1];
1285 
1286 	if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
1287 		return;
1288 
1289 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1290 	z_erofs_vle_unzip_all(sb, &io[0], pagepool);
1291 #endif
1292 	if (!force_fg)
1293 		return;
1294 
1295 	/* wait until all bios are completed */
1296 	wait_event(io[__FSIO_1].u.wait,
1297 		!atomic_read(&io[__FSIO_1].pending_bios));
1298 
1299 	/* let's synchronous decompression */
1300 	z_erofs_vle_unzip_all(sb, &io[__FSIO_1], pagepool);
1301 }
1302 
1303 static int z_erofs_vle_normalaccess_readpage(struct file *file,
1304 					     struct page *page)
1305 {
1306 	struct inode *const inode = page->mapping->host;
1307 	struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1308 	int err;
1309 	LIST_HEAD(pagepool);
1310 
1311 #if (EROFS_FS_ZIP_CACHE_LVL >= 2)
1312 	f.cachedzone_la = page->index << PAGE_SHIFT;
1313 #endif
1314 	err = z_erofs_do_read_page(&f, page, &pagepool);
1315 	(void)z_erofs_vle_work_iter_end(&f.builder);
1316 
1317 	if (err) {
1318 		errln("%s, failed to read, err [%d]", __func__, err);
1319 		goto out;
1320 	}
1321 
1322 	z_erofs_submit_and_unzip(&f, &pagepool, true);
1323 out:
1324 	if (f.m_iter.mpage != NULL)
1325 		put_page(f.m_iter.mpage);
1326 
1327 	/* clean up the remaining free pages */
1328 	put_pages_list(&pagepool);
1329 	return 0;
1330 }
1331 
1332 static inline int __z_erofs_vle_normalaccess_readpages(
1333 	struct file *filp,
1334 	struct address_space *mapping,
1335 	struct list_head *pages, unsigned nr_pages, bool sync)
1336 {
1337 	struct inode *const inode = mapping->host;
1338 
1339 	struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1340 	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
1341 	struct page *head = NULL;
1342 	LIST_HEAD(pagepool);
1343 
1344 #if (EROFS_FS_ZIP_CACHE_LVL >= 2)
1345 	f.cachedzone_la = lru_to_page(pages)->index << PAGE_SHIFT;
1346 #endif
1347 	for (; nr_pages; --nr_pages) {
1348 		struct page *page = lru_to_page(pages);
1349 
1350 		prefetchw(&page->flags);
1351 		list_del(&page->lru);
1352 
1353 		if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
1354 			list_add(&page->lru, &pagepool);
1355 			continue;
1356 		}
1357 
1358 		BUG_ON(PagePrivate(page));
1359 		set_page_private(page, (unsigned long)head);
1360 		head = page;
1361 	}
1362 
1363 	while (head != NULL) {
1364 		struct page *page = head;
1365 		int err;
1366 
1367 		/* traversal in reverse order */
1368 		head = (void *)page_private(page);
1369 
1370 		err = z_erofs_do_read_page(&f, page, &pagepool);
1371 		if (err) {
1372 			struct erofs_vnode *vi = EROFS_V(inode);
1373 
1374 			errln("%s, readahead error at page %lu of nid %llu",
1375 				__func__, page->index, vi->nid);
1376 		}
1377 
1378 		put_page(page);
1379 	}
1380 
1381 	(void)z_erofs_vle_work_iter_end(&f.builder);
1382 
1383 	z_erofs_submit_and_unzip(&f, &pagepool, sync);
1384 
1385 	if (f.m_iter.mpage != NULL)
1386 		put_page(f.m_iter.mpage);
1387 
1388 	/* clean up the remaining free pages */
1389 	put_pages_list(&pagepool);
1390 	return 0;
1391 }
1392 
1393 static int z_erofs_vle_normalaccess_readpages(
1394 	struct file *filp,
1395 	struct address_space *mapping,
1396 	struct list_head *pages, unsigned nr_pages)
1397 {
1398 	return __z_erofs_vle_normalaccess_readpages(filp,
1399 		mapping, pages, nr_pages,
1400 		nr_pages < 4 /* sync */);
1401 }
1402 
1403 const struct address_space_operations z_erofs_vle_normalaccess_aops = {
1404 	.readpage = z_erofs_vle_normalaccess_readpage,
1405 	.readpages = z_erofs_vle_normalaccess_readpages,
1406 };
1407 
1408 #define __vle_cluster_advise(x, bit, bits) \
1409 	((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1))
1410 
1411 #define __vle_cluster_type(advise) __vle_cluster_advise(advise, \
1412 	Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT, Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS)
1413 
1414 enum {
1415 	Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
1416 	Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
1417 	Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
1418 	Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
1419 	Z_EROFS_VLE_CLUSTER_TYPE_MAX
1420 };
1421 
1422 #define vle_cluster_type(di)	\
1423 	__vle_cluster_type((di)->di_advise)
1424 
1425 static inline unsigned
1426 vle_compressed_index_clusterofs(unsigned clustersize,
1427 	struct z_erofs_vle_decompressed_index *di)
1428 {
1429 	debugln("%s, vle=%pK, advise=%x (type %u), clusterofs=%x blkaddr=%x",
1430 		__func__, di, di->di_advise, vle_cluster_type(di),
1431 		di->di_clusterofs, di->di_u.blkaddr);
1432 
1433 	switch (vle_cluster_type(di)) {
1434 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
1435 		break;
1436 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
1437 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
1438 		return di->di_clusterofs;
1439 	default:
1440 		BUG_ON(1);
1441 	}
1442 	return clustersize;
1443 }
1444 
1445 static inline erofs_blk_t
1446 vle_extent_blkaddr(struct inode *inode, pgoff_t index)
1447 {
1448 	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
1449 	struct erofs_vnode *vi = EROFS_V(inode);
1450 
1451 	unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
1452 		vi->xattr_isize) + sizeof(struct erofs_extent_header) +
1453 		index * sizeof(struct z_erofs_vle_decompressed_index);
1454 
1455 	return erofs_blknr(iloc(sbi, vi->nid) + ofs);
1456 }
1457 
1458 static inline unsigned int
1459 vle_extent_blkoff(struct inode *inode, pgoff_t index)
1460 {
1461 	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
1462 	struct erofs_vnode *vi = EROFS_V(inode);
1463 
1464 	unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
1465 		vi->xattr_isize) + sizeof(struct erofs_extent_header) +
1466 		index * sizeof(struct z_erofs_vle_decompressed_index);
1467 
1468 	return erofs_blkoff(iloc(sbi, vi->nid) + ofs);
1469 }
1470 
1471 /*
1472  * Variable-sized Logical Extent (Fixed Physical Cluster) Compression Mode
1473  * ---
1474  * VLE compression mode attempts to compress a number of logical data into
1475  * a physical cluster with a fixed size.
1476  * VLE compression mode uses "struct z_erofs_vle_decompressed_index".
1477  */
1478 static erofs_off_t vle_get_logical_extent_head(
1479 	struct inode *inode,
1480 	struct page **page_iter,
1481 	void **kaddr_iter,
1482 	unsigned lcn,	/* logical cluster number */
1483 	erofs_blk_t *pcn,
1484 	unsigned *flags)
1485 {
1486 	/* for extent meta */
1487 	struct page *page = *page_iter;
1488 	erofs_blk_t blkaddr = vle_extent_blkaddr(inode, lcn);
1489 	struct z_erofs_vle_decompressed_index *di;
1490 	unsigned long long ofs;
1491 	const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
1492 	const unsigned int clustersize = 1 << clusterbits;
1493 
1494 	if (page->index != blkaddr) {
1495 		kunmap_atomic(*kaddr_iter);
1496 		unlock_page(page);
1497 		put_page(page);
1498 
1499 		*page_iter = page = erofs_get_meta_page(inode->i_sb,
1500 			blkaddr, false);
1501 		*kaddr_iter = kmap_atomic(page);
1502 	}
1503 
1504 	di = *kaddr_iter + vle_extent_blkoff(inode, lcn);
1505 	switch (vle_cluster_type(di)) {
1506 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
1507 		BUG_ON(!di->di_u.delta[0]);
1508 		BUG_ON(lcn < di->di_u.delta[0]);
1509 
1510 		ofs = vle_get_logical_extent_head(inode,
1511 			page_iter, kaddr_iter,
1512 			lcn - di->di_u.delta[0], pcn, flags);
1513 		break;
1514 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
1515 		*flags ^= EROFS_MAP_ZIPPED;
1516 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
1517 		/* clustersize should be a power of two */
1518 		ofs = ((unsigned long long)lcn << clusterbits) +
1519 			(le16_to_cpu(di->di_clusterofs) & (clustersize - 1));
1520 		*pcn = le32_to_cpu(di->di_u.blkaddr);
1521 		break;
1522 	default:
1523 		BUG_ON(1);
1524 	}
1525 	return ofs;
1526 }
1527 
1528 int z_erofs_map_blocks_iter(struct inode *inode,
1529 	struct erofs_map_blocks *map,
1530 	struct page **mpage_ret, int flags)
1531 {
1532 	/* logicial extent (start, end) offset */
1533 	unsigned long long ofs, end;
1534 	struct z_erofs_vle_decompressed_index *di;
1535 	erofs_blk_t e_blkaddr, pcn;
1536 	unsigned lcn, logical_cluster_ofs, cluster_type;
1537 	u32 ofs_rem;
1538 	struct page *mpage = *mpage_ret;
1539 	void *kaddr;
1540 	bool initial;
1541 	const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
1542 	const unsigned int clustersize = 1 << clusterbits;
1543 	int err = 0;
1544 
1545 	/* if both m_(l,p)len are 0, regularize l_lblk, l_lofs, etc... */
1546 	initial = !map->m_llen;
1547 
1548 	/* when trying to read beyond EOF, leave it unmapped */
1549 	if (unlikely(map->m_la >= inode->i_size)) {
1550 		BUG_ON(!initial);
1551 		map->m_llen = map->m_la + 1 - inode->i_size;
1552 		map->m_la = inode->i_size - 1;
1553 		map->m_flags = 0;
1554 		goto out;
1555 	}
1556 
1557 	debugln("%s, m_la %llu m_llen %llu --- start", __func__,
1558 		map->m_la, map->m_llen);
1559 
1560 	ofs = map->m_la + map->m_llen;
1561 
1562 	/* clustersize should be power of two */
1563 	lcn = ofs >> clusterbits;
1564 	ofs_rem = ofs & (clustersize - 1);
1565 
1566 	e_blkaddr = vle_extent_blkaddr(inode, lcn);
1567 
1568 	if (mpage == NULL || mpage->index != e_blkaddr) {
1569 		if (mpage != NULL)
1570 			put_page(mpage);
1571 
1572 		mpage = erofs_get_meta_page(inode->i_sb, e_blkaddr, false);
1573 		*mpage_ret = mpage;
1574 	} else {
1575 		lock_page(mpage);
1576 		DBG_BUGON(!PageUptodate(mpage));
1577 	}
1578 
1579 	kaddr = kmap_atomic(mpage);
1580 	di = kaddr + vle_extent_blkoff(inode, lcn);
1581 
1582 	debugln("%s, lcn %u e_blkaddr %u e_blkoff %u", __func__, lcn,
1583 		e_blkaddr, vle_extent_blkoff(inode, lcn));
1584 
1585 	logical_cluster_ofs = vle_compressed_index_clusterofs(clustersize, di);
1586 	if (!initial) {
1587 		/* [walking mode] 'map' has been already initialized */
1588 		map->m_llen += logical_cluster_ofs;
1589 		goto unmap_out;
1590 	}
1591 
1592 	/* by default, compressed */
1593 	map->m_flags |= EROFS_MAP_ZIPPED;
1594 
1595 	end = (u64)(lcn + 1) * clustersize;
1596 
1597 	cluster_type = vle_cluster_type(di);
1598 
1599 	switch (cluster_type) {
1600 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
1601 		if (ofs_rem >= logical_cluster_ofs)
1602 			map->m_flags ^= EROFS_MAP_ZIPPED;
1603 		/* fallthrough */
1604 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
1605 		if (ofs_rem == logical_cluster_ofs) {
1606 			pcn = le32_to_cpu(di->di_u.blkaddr);
1607 			goto exact_hitted;
1608 		}
1609 
1610 		if (ofs_rem > logical_cluster_ofs) {
1611 			ofs = lcn * clustersize | logical_cluster_ofs;
1612 			pcn = le32_to_cpu(di->di_u.blkaddr);
1613 			break;
1614 		}
1615 
1616 		/* logical cluster number should be >= 1 */
1617 		if (unlikely(!lcn)) {
1618 			errln("invalid logical cluster 0 at nid %llu",
1619 				EROFS_V(inode)->nid);
1620 			err = -EIO;
1621 			goto unmap_out;
1622 		}
1623 		end = (lcn-- * clustersize) | logical_cluster_ofs;
1624 		/* fallthrough */
1625 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
1626 		/* get the correspoinding first chunk */
1627 		ofs = vle_get_logical_extent_head(inode, mpage_ret,
1628 			&kaddr, lcn, &pcn, &map->m_flags);
1629 		mpage = *mpage_ret;
1630 		break;
1631 	default:
1632 		errln("unknown cluster type %u at offset %llu of nid %llu",
1633 			cluster_type, ofs, EROFS_V(inode)->nid);
1634 		err = -EIO;
1635 		goto unmap_out;
1636 	}
1637 
1638 	map->m_la = ofs;
1639 exact_hitted:
1640 	map->m_llen = end - ofs;
1641 	map->m_plen = clustersize;
1642 	map->m_pa = blknr_to_addr(pcn);
1643 	map->m_flags |= EROFS_MAP_MAPPED;
1644 unmap_out:
1645 	kunmap_atomic(kaddr);
1646 	unlock_page(mpage);
1647 out:
1648 	debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
1649 		__func__, map->m_la, map->m_pa,
1650 		map->m_llen, map->m_plen, map->m_flags);
1651 
1652 	/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
1653 	DBG_BUGON(err < 0);
1654 	return err;
1655 }
1656 
1657