1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/drivers/staging/erofs/unzip_vle.c
4 *
5 * Copyright (C) 2018 HUAWEI, Inc.
6 * http://www.huawei.com/
7 * Created by Gao Xiang <gaoxiang25@huawei.com>
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file COPYING in the main directory of the Linux
11 * distribution for more details.
12 */
13 #include "unzip_vle.h"
14 #include <linux/prefetch.h>
15
16 static struct workqueue_struct *z_erofs_workqueue __read_mostly;
17 static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
18
z_erofs_exit_zip_subsystem(void)19 void z_erofs_exit_zip_subsystem(void)
20 {
21 BUG_ON(z_erofs_workqueue == NULL);
22 BUG_ON(z_erofs_workgroup_cachep == NULL);
23
24 destroy_workqueue(z_erofs_workqueue);
25 kmem_cache_destroy(z_erofs_workgroup_cachep);
26 }
27
init_unzip_workqueue(void)28 static inline int init_unzip_workqueue(void)
29 {
30 const unsigned onlinecpus = num_possible_cpus();
31
32 /*
33 * we don't need too many threads, limiting threads
34 * could improve scheduling performance.
35 */
36 z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
37 WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
38 onlinecpus + onlinecpus / 4);
39
40 return z_erofs_workqueue != NULL ? 0 : -ENOMEM;
41 }
42
z_erofs_init_zip_subsystem(void)43 int z_erofs_init_zip_subsystem(void)
44 {
45 z_erofs_workgroup_cachep =
46 kmem_cache_create("erofs_compress",
47 Z_EROFS_WORKGROUP_SIZE, 0,
48 SLAB_RECLAIM_ACCOUNT, NULL);
49
50 if (z_erofs_workgroup_cachep != NULL) {
51 if (!init_unzip_workqueue())
52 return 0;
53
54 kmem_cache_destroy(z_erofs_workgroup_cachep);
55 }
56 return -ENOMEM;
57 }
58
59 enum z_erofs_vle_work_role {
60 Z_EROFS_VLE_WORK_SECONDARY,
61 Z_EROFS_VLE_WORK_PRIMARY,
62 /*
63 * The current work has at least been linked with the following
64 * processed chained works, which means if the processing page
65 * is the tail partial page of the work, the current work can
66 * safely use the whole page, as illustrated below:
67 * +--------------+-------------------------------------------+
68 * | tail page | head page (of the previous work) |
69 * +--------------+-------------------------------------------+
70 * /\ which belongs to the current work
71 * [ (*) this page can be used for the current work itself. ]
72 */
73 Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
74 Z_EROFS_VLE_WORK_MAX
75 };
76
77 struct z_erofs_vle_work_builder {
78 enum z_erofs_vle_work_role role;
79 /*
80 * 'hosted = false' means that the current workgroup doesn't belong to
81 * the owned chained workgroups. In the other words, it is none of our
82 * business to submit this workgroup.
83 */
84 bool hosted;
85
86 struct z_erofs_vle_workgroup *grp;
87 struct z_erofs_vle_work *work;
88 struct z_erofs_pagevec_ctor vector;
89
90 /* pages used for reading the compressed data */
91 struct page **compressed_pages;
92 unsigned compressed_deficit;
93 };
94
95 #define VLE_WORK_BUILDER_INIT() \
96 { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
97
98 #ifdef EROFS_FS_HAS_MANAGED_CACHE
99
grab_managed_cache_pages(struct address_space * mapping,erofs_blk_t start,struct page ** compressed_pages,int clusterblks,bool reserve_allocation)100 static bool grab_managed_cache_pages(struct address_space *mapping,
101 erofs_blk_t start,
102 struct page **compressed_pages,
103 int clusterblks,
104 bool reserve_allocation)
105 {
106 bool noio = true;
107 unsigned int i;
108
109 /* TODO: optimize by introducing find_get_pages_range */
110 for (i = 0; i < clusterblks; ++i) {
111 struct page *page, *found;
112
113 if (READ_ONCE(compressed_pages[i]) != NULL)
114 continue;
115
116 page = found = find_get_page(mapping, start + i);
117 if (found == NULL) {
118 noio = false;
119 if (!reserve_allocation)
120 continue;
121 page = EROFS_UNALLOCATED_CACHED_PAGE;
122 }
123
124 if (NULL == cmpxchg(compressed_pages + i, NULL, page))
125 continue;
126
127 if (found != NULL)
128 put_page(found);
129 }
130 return noio;
131 }
132
133 /* called by erofs_shrinker to get rid of all compressed_pages */
erofs_try_to_free_all_cached_pages(struct erofs_sb_info * sbi,struct erofs_workgroup * egrp)134 int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
135 struct erofs_workgroup *egrp)
136 {
137 struct z_erofs_vle_workgroup *const grp =
138 container_of(egrp, struct z_erofs_vle_workgroup, obj);
139 struct address_space *const mapping = sbi->managed_cache->i_mapping;
140 const int clusterpages = erofs_clusterpages(sbi);
141 int i;
142
143 /*
144 * refcount of workgroup is now freezed as 1,
145 * therefore no need to worry about available decompression users.
146 */
147 for (i = 0; i < clusterpages; ++i) {
148 struct page *page = grp->compressed_pages[i];
149
150 if (page == NULL || page->mapping != mapping)
151 continue;
152
153 /* block other users from reclaiming or migrating the page */
154 if (!trylock_page(page))
155 return -EBUSY;
156
157 /* barrier is implied in the following 'unlock_page' */
158 WRITE_ONCE(grp->compressed_pages[i], NULL);
159
160 set_page_private(page, 0);
161 ClearPagePrivate(page);
162
163 unlock_page(page);
164 put_page(page);
165 }
166 return 0;
167 }
168
erofs_try_to_free_cached_page(struct address_space * mapping,struct page * page)169 int erofs_try_to_free_cached_page(struct address_space *mapping,
170 struct page *page)
171 {
172 struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
173 const unsigned int clusterpages = erofs_clusterpages(sbi);
174
175 struct z_erofs_vle_workgroup *grp;
176 int ret = 0; /* 0 - busy */
177
178 /* prevent the workgroup from being freed */
179 rcu_read_lock();
180 grp = (void *)page_private(page);
181
182 if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
183 unsigned int i;
184
185 for (i = 0; i < clusterpages; ++i) {
186 if (grp->compressed_pages[i] == page) {
187 WRITE_ONCE(grp->compressed_pages[i], NULL);
188 ret = 1;
189 break;
190 }
191 }
192 erofs_workgroup_unfreeze(&grp->obj, 1);
193 }
194 rcu_read_unlock();
195
196 if (ret) {
197 ClearPagePrivate(page);
198 put_page(page);
199 }
200 return ret;
201 }
202 #endif
203
204 /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
try_to_reuse_as_compressed_page(struct z_erofs_vle_work_builder * b,struct page * page)205 static inline bool try_to_reuse_as_compressed_page(
206 struct z_erofs_vle_work_builder *b,
207 struct page *page)
208 {
209 while (b->compressed_deficit) {
210 --b->compressed_deficit;
211 if (NULL == cmpxchg(b->compressed_pages++, NULL, page))
212 return true;
213 }
214
215 return false;
216 }
217
218 /* callers must be with work->lock held */
z_erofs_vle_work_add_page(struct z_erofs_vle_work_builder * builder,struct page * page,enum z_erofs_page_type type)219 static int z_erofs_vle_work_add_page(
220 struct z_erofs_vle_work_builder *builder,
221 struct page *page,
222 enum z_erofs_page_type type)
223 {
224 int ret;
225 bool occupied;
226
227 /* give priority for the compressed data storage */
228 if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
229 type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
230 try_to_reuse_as_compressed_page(builder, page))
231 return 0;
232
233 ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
234 page, type, &occupied);
235 builder->work->vcnt += (unsigned)ret;
236
237 return ret ? 0 : -EAGAIN;
238 }
239
try_to_claim_workgroup(struct z_erofs_vle_workgroup * grp,z_erofs_vle_owned_workgrp_t * owned_head,bool * hosted)240 static inline bool try_to_claim_workgroup(
241 struct z_erofs_vle_workgroup *grp,
242 z_erofs_vle_owned_workgrp_t *owned_head,
243 bool *hosted)
244 {
245 DBG_BUGON(*hosted == true);
246
247 /* let's claim these following types of workgroup */
248 retry:
249 if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
250 /* type 1, nil workgroup */
251 if (Z_EROFS_VLE_WORKGRP_NIL != cmpxchg(&grp->next,
252 Z_EROFS_VLE_WORKGRP_NIL, *owned_head))
253 goto retry;
254
255 *owned_head = grp;
256 *hosted = true;
257 } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
258 /*
259 * type 2, link to the end of a existing open chain,
260 * be careful that its submission itself is governed
261 * by the original owned chain.
262 */
263 if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next,
264 Z_EROFS_VLE_WORKGRP_TAIL, *owned_head))
265 goto retry;
266
267 *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
268 } else
269 return false; /* :( better luck next time */
270
271 return true; /* lucky, I am the followee :) */
272 }
273
274 static struct z_erofs_vle_work *
z_erofs_vle_work_lookup(struct super_block * sb,pgoff_t idx,unsigned pageofs,struct z_erofs_vle_workgroup ** grp_ret,enum z_erofs_vle_work_role * role,z_erofs_vle_owned_workgrp_t * owned_head,bool * hosted)275 z_erofs_vle_work_lookup(struct super_block *sb,
276 pgoff_t idx, unsigned pageofs,
277 struct z_erofs_vle_workgroup **grp_ret,
278 enum z_erofs_vle_work_role *role,
279 z_erofs_vle_owned_workgrp_t *owned_head,
280 bool *hosted)
281 {
282 bool tag, primary;
283 struct erofs_workgroup *egrp;
284 struct z_erofs_vle_workgroup *grp;
285 struct z_erofs_vle_work *work;
286
287 egrp = erofs_find_workgroup(sb, idx, &tag);
288 if (egrp == NULL) {
289 *grp_ret = NULL;
290 return NULL;
291 }
292
293 *grp_ret = grp = container_of(egrp,
294 struct z_erofs_vle_workgroup, obj);
295
296 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
297 work = z_erofs_vle_grab_work(grp, pageofs);
298 primary = true;
299 #else
300 BUG();
301 #endif
302
303 DBG_BUGON(work->pageofs != pageofs);
304
305 /*
306 * lock must be taken first to avoid grp->next == NIL between
307 * claiming workgroup and adding pages:
308 * grp->next != NIL
309 * grp->next = NIL
310 * mutex_unlock_all
311 * mutex_lock(&work->lock)
312 * add all pages to pagevec
313 *
314 * [correct locking case 1]:
315 * mutex_lock(grp->work[a])
316 * ...
317 * mutex_lock(grp->work[b]) mutex_lock(grp->work[c])
318 * ... *role = SECONDARY
319 * add all pages to pagevec
320 * ...
321 * mutex_unlock(grp->work[c])
322 * mutex_lock(grp->work[c])
323 * ...
324 * grp->next = NIL
325 * mutex_unlock_all
326 *
327 * [correct locking case 2]:
328 * mutex_lock(grp->work[b])
329 * ...
330 * mutex_lock(grp->work[a])
331 * ...
332 * mutex_lock(grp->work[c])
333 * ...
334 * grp->next = NIL
335 * mutex_unlock_all
336 * mutex_lock(grp->work[a])
337 * *role = PRIMARY_OWNER
338 * add all pages to pagevec
339 * ...
340 */
341 mutex_lock(&work->lock);
342
343 *hosted = false;
344 if (!primary)
345 *role = Z_EROFS_VLE_WORK_SECONDARY;
346 /* claim the workgroup if possible */
347 else if (try_to_claim_workgroup(grp, owned_head, hosted))
348 *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
349 else
350 *role = Z_EROFS_VLE_WORK_PRIMARY;
351
352 return work;
353 }
354
355 static struct z_erofs_vle_work *
z_erofs_vle_work_register(struct super_block * sb,struct z_erofs_vle_workgroup ** grp_ret,struct erofs_map_blocks * map,pgoff_t index,unsigned pageofs,enum z_erofs_vle_work_role * role,z_erofs_vle_owned_workgrp_t * owned_head,bool * hosted)356 z_erofs_vle_work_register(struct super_block *sb,
357 struct z_erofs_vle_workgroup **grp_ret,
358 struct erofs_map_blocks *map,
359 pgoff_t index, unsigned pageofs,
360 enum z_erofs_vle_work_role *role,
361 z_erofs_vle_owned_workgrp_t *owned_head,
362 bool *hosted)
363 {
364 bool newgrp = false;
365 struct z_erofs_vle_workgroup *grp = *grp_ret;
366 struct z_erofs_vle_work *work;
367
368 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
369 BUG_ON(grp != NULL);
370 #else
371 if (grp != NULL)
372 goto skip;
373 #endif
374 /* no available workgroup, let's allocate one */
375 grp = kmem_cache_zalloc(z_erofs_workgroup_cachep, GFP_NOFS);
376 if (unlikely(grp == NULL))
377 return ERR_PTR(-ENOMEM);
378
379 grp->obj.index = index;
380 grp->llen = map->m_llen;
381
382 z_erofs_vle_set_workgrp_fmt(grp,
383 (map->m_flags & EROFS_MAP_ZIPPED) ?
384 Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
385 Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
386 atomic_set(&grp->obj.refcount, 1);
387
388 /* new workgrps have been claimed as type 1 */
389 WRITE_ONCE(grp->next, *owned_head);
390 /* primary and followed work for all new workgrps */
391 *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
392 /* it should be submitted by ourselves */
393 *hosted = true;
394
395 newgrp = true;
396 #ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
397 skip:
398 /* currently unimplemented */
399 BUG();
400 #else
401 work = z_erofs_vle_grab_primary_work(grp);
402 #endif
403 work->pageofs = pageofs;
404
405 mutex_init(&work->lock);
406
407 if (newgrp) {
408 int err = erofs_register_workgroup(sb, &grp->obj, 0);
409
410 if (err) {
411 kmem_cache_free(z_erofs_workgroup_cachep, grp);
412 return ERR_PTR(-EAGAIN);
413 }
414 }
415
416 *owned_head = *grp_ret = grp;
417
418 mutex_lock(&work->lock);
419 return work;
420 }
421
__update_workgrp_llen(struct z_erofs_vle_workgroup * grp,unsigned int llen)422 static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp,
423 unsigned int llen)
424 {
425 while (1) {
426 unsigned int orig_llen = grp->llen;
427
428 if (orig_llen >= llen || orig_llen ==
429 cmpxchg(&grp->llen, orig_llen, llen))
430 break;
431 }
432 }
433
434 #define builder_is_followed(builder) \
435 ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
436
z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder * builder,struct super_block * sb,struct erofs_map_blocks * map,z_erofs_vle_owned_workgrp_t * owned_head)437 static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
438 struct super_block *sb,
439 struct erofs_map_blocks *map,
440 z_erofs_vle_owned_workgrp_t *owned_head)
441 {
442 const unsigned clusterpages = erofs_clusterpages(EROFS_SB(sb));
443 const erofs_blk_t index = erofs_blknr(map->m_pa);
444 const unsigned pageofs = map->m_la & ~PAGE_MASK;
445 struct z_erofs_vle_workgroup *grp;
446 struct z_erofs_vle_work *work;
447
448 DBG_BUGON(builder->work != NULL);
449
450 /* must be Z_EROFS_WORK_TAIL or the next chained work */
451 DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
452 DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
453
454 DBG_BUGON(erofs_blkoff(map->m_pa));
455
456 repeat:
457 work = z_erofs_vle_work_lookup(sb, index,
458 pageofs, &grp, &builder->role, owned_head, &builder->hosted);
459 if (work != NULL) {
460 __update_workgrp_llen(grp, map->m_llen);
461 goto got_it;
462 }
463
464 work = z_erofs_vle_work_register(sb, &grp, map, index, pageofs,
465 &builder->role, owned_head, &builder->hosted);
466
467 if (unlikely(work == ERR_PTR(-EAGAIN)))
468 goto repeat;
469
470 if (unlikely(IS_ERR(work)))
471 return PTR_ERR(work);
472 got_it:
473 z_erofs_pagevec_ctor_init(&builder->vector,
474 Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, work->vcnt);
475
476 if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
477 /* enable possibly in-place decompression */
478 builder->compressed_pages = grp->compressed_pages;
479 builder->compressed_deficit = clusterpages;
480 } else {
481 builder->compressed_pages = NULL;
482 builder->compressed_deficit = 0;
483 }
484
485 builder->grp = grp;
486 builder->work = work;
487 return 0;
488 }
489
490 /*
491 * keep in mind that no referenced workgroups will be freed
492 * only after a RCU grace period, so rcu_read_lock() could
493 * prevent a workgroup from being freed.
494 */
z_erofs_rcu_callback(struct rcu_head * head)495 static void z_erofs_rcu_callback(struct rcu_head *head)
496 {
497 struct z_erofs_vle_work *work = container_of(head,
498 struct z_erofs_vle_work, rcu);
499 struct z_erofs_vle_workgroup *grp =
500 z_erofs_vle_work_workgroup(work, true);
501
502 kmem_cache_free(z_erofs_workgroup_cachep, grp);
503 }
504
erofs_workgroup_free_rcu(struct erofs_workgroup * grp)505 void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
506 {
507 struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
508 struct z_erofs_vle_workgroup, obj);
509 struct z_erofs_vle_work *const work = &vgrp->work;
510
511 call_rcu(&work->rcu, z_erofs_rcu_callback);
512 }
513
__z_erofs_vle_work_release(struct z_erofs_vle_workgroup * grp,struct z_erofs_vle_work * work __maybe_unused)514 static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
515 struct z_erofs_vle_work *work __maybe_unused)
516 {
517 erofs_workgroup_put(&grp->obj);
518 }
519
z_erofs_vle_work_release(struct z_erofs_vle_work * work)520 void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
521 {
522 struct z_erofs_vle_workgroup *grp =
523 z_erofs_vle_work_workgroup(work, true);
524
525 __z_erofs_vle_work_release(grp, work);
526 }
527
528 static inline bool
z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder * builder)529 z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
530 {
531 struct z_erofs_vle_work *work = builder->work;
532
533 if (work == NULL)
534 return false;
535
536 z_erofs_pagevec_ctor_exit(&builder->vector, false);
537 mutex_unlock(&work->lock);
538
539 /*
540 * if all pending pages are added, don't hold work reference
541 * any longer if the current work isn't hosted by ourselves.
542 */
543 if (!builder->hosted)
544 __z_erofs_vle_work_release(builder->grp, work);
545
546 builder->work = NULL;
547 builder->grp = NULL;
548 return true;
549 }
550
__stagingpage_alloc(struct list_head * pagepool,gfp_t gfp)551 static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
552 gfp_t gfp)
553 {
554 struct page *page = erofs_allocpage(pagepool, gfp);
555
556 if (unlikely(page == NULL))
557 return NULL;
558
559 page->mapping = Z_EROFS_MAPPING_STAGING;
560 return page;
561 }
562
563 struct z_erofs_vle_frontend {
564 struct inode *const inode;
565
566 struct z_erofs_vle_work_builder builder;
567 struct erofs_map_blocks_iter m_iter;
568
569 z_erofs_vle_owned_workgrp_t owned_head;
570
571 bool initial;
572 #if (EROFS_FS_ZIP_CACHE_LVL >= 2)
573 erofs_off_t cachedzone_la;
574 #endif
575 };
576
577 #define VLE_FRONTEND_INIT(__i) { \
578 .inode = __i, \
579 .m_iter = { \
580 { .m_llen = 0, .m_plen = 0 }, \
581 .mpage = NULL \
582 }, \
583 .builder = VLE_WORK_BUILDER_INIT(), \
584 .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
585 .initial = true, }
586
z_erofs_do_read_page(struct z_erofs_vle_frontend * fe,struct page * page,struct list_head * page_pool)587 static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
588 struct page *page,
589 struct list_head *page_pool)
590 {
591 struct super_block *const sb = fe->inode->i_sb;
592 struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
593 struct erofs_map_blocks_iter *const m = &fe->m_iter;
594 struct erofs_map_blocks *const map = &m->map;
595 struct z_erofs_vle_work_builder *const builder = &fe->builder;
596 const loff_t offset = page_offset(page);
597
598 bool tight = builder_is_followed(builder);
599 struct z_erofs_vle_work *work = builder->work;
600
601 #ifdef EROFS_FS_HAS_MANAGED_CACHE
602 struct address_space *const mngda = sbi->managed_cache->i_mapping;
603 struct z_erofs_vle_workgroup *grp;
604 bool noio_outoforder;
605 #endif
606
607 enum z_erofs_page_type page_type;
608 unsigned cur, end, spiltted, index;
609 int err;
610
611 /* register locked file pages as online pages in pack */
612 z_erofs_onlinepage_init(page);
613
614 spiltted = 0;
615 end = PAGE_SIZE;
616 repeat:
617 cur = end - 1;
618
619 /* lucky, within the range of the current map_blocks */
620 if (offset + cur >= map->m_la &&
621 offset + cur < map->m_la + map->m_llen)
622 goto hitted;
623
624 /* go ahead the next map_blocks */
625 debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
626
627 if (!z_erofs_vle_work_iter_end(builder))
628 fe->initial = false;
629
630 map->m_la = offset + cur;
631 map->m_llen = 0;
632 err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0);
633 if (unlikely(err))
634 goto err_out;
635
636 /* deal with hole (FIXME! broken now) */
637 if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
638 goto hitted;
639
640 DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
641 BUG_ON(erofs_blkoff(map->m_pa));
642
643 err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
644 if (unlikely(err))
645 goto err_out;
646
647 #ifdef EROFS_FS_HAS_MANAGED_CACHE
648 grp = fe->builder.grp;
649
650 /* let's do out-of-order decompression for noio */
651 noio_outoforder = grab_managed_cache_pages(mngda,
652 erofs_blknr(map->m_pa),
653 grp->compressed_pages, erofs_blknr(map->m_plen),
654 /* compressed page caching selection strategy */
655 fe->initial | (EROFS_FS_ZIP_CACHE_LVL >= 2 ?
656 map->m_la < fe->cachedzone_la : 0));
657
658 if (noio_outoforder && builder_is_followed(builder))
659 builder->role = Z_EROFS_VLE_WORK_PRIMARY;
660 #endif
661
662 tight &= builder_is_followed(builder);
663 work = builder->work;
664 hitted:
665 cur = end - min_t(unsigned, offset + end - map->m_la, end);
666 if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
667 zero_user_segment(page, cur, end);
668 goto next_part;
669 }
670
671 /* let's derive page type */
672 page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
673 (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
674 (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
675 Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
676
677 retry:
678 err = z_erofs_vle_work_add_page(builder, page, page_type);
679 /* should allocate an additional staging page for pagevec */
680 if (err == -EAGAIN) {
681 struct page *const newpage =
682 __stagingpage_alloc(page_pool, GFP_NOFS);
683
684 err = z_erofs_vle_work_add_page(builder,
685 newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE);
686 if (!err)
687 goto retry;
688 }
689
690 if (unlikely(err))
691 goto err_out;
692
693 index = page->index - map->m_la / PAGE_SIZE;
694
695 /* FIXME! avoid the last relundant fixup & endio */
696 z_erofs_onlinepage_fixup(page, index, true);
697 ++spiltted;
698
699 /* also update nr_pages and increase queued_pages */
700 work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
701 next_part:
702 /* can be used for verification */
703 map->m_llen = offset + cur - map->m_la;
704
705 end = cur;
706 if (end > 0)
707 goto repeat;
708
709 /* FIXME! avoid the last relundant fixup & endio */
710 z_erofs_onlinepage_endio(page);
711
712 debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
713 __func__, page, spiltted, map->m_llen);
714 return 0;
715
716 err_out:
717 /* TODO: the missing error handing cases */
718 return err;
719 }
720
z_erofs_vle_unzip_kickoff(void * ptr,int bios)721 static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
722 {
723 tagptr1_t t = tagptr_init(tagptr1_t, ptr);
724 struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
725 bool background = tagptr_unfold_tags(t);
726
727 if (atomic_add_return(bios, &io->pending_bios))
728 return;
729
730 if (background)
731 queue_work(z_erofs_workqueue, &io->u.work);
732 else
733 wake_up(&io->u.wait);
734 }
735
z_erofs_vle_read_endio(struct bio * bio)736 static inline void z_erofs_vle_read_endio(struct bio *bio)
737 {
738 const blk_status_t err = bio->bi_status;
739 unsigned i;
740 struct bio_vec *bvec;
741 #ifdef EROFS_FS_HAS_MANAGED_CACHE
742 struct address_space *mngda = NULL;
743 #endif
744
745 bio_for_each_segment_all(bvec, bio, i) {
746 struct page *page = bvec->bv_page;
747 bool cachemngd = false;
748
749 DBG_BUGON(PageUptodate(page));
750 BUG_ON(page->mapping == NULL);
751
752 #ifdef EROFS_FS_HAS_MANAGED_CACHE
753 if (unlikely(mngda == NULL && !z_erofs_is_stagingpage(page))) {
754 struct inode *const inode = page->mapping->host;
755 struct super_block *const sb = inode->i_sb;
756
757 mngda = EROFS_SB(sb)->managed_cache->i_mapping;
758 }
759
760 /*
761 * If mngda has not gotten, it equals NULL,
762 * however, page->mapping never be NULL if working properly.
763 */
764 cachemngd = (page->mapping == mngda);
765 #endif
766
767 if (unlikely(err))
768 SetPageError(page);
769 else if (cachemngd)
770 SetPageUptodate(page);
771
772 if (cachemngd)
773 unlock_page(page);
774 }
775
776 z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
777 bio_put(bio);
778 }
779
780 static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
781 static DEFINE_MUTEX(z_pagemap_global_lock);
782
z_erofs_vle_unzip(struct super_block * sb,struct z_erofs_vle_workgroup * grp,struct list_head * page_pool)783 static int z_erofs_vle_unzip(struct super_block *sb,
784 struct z_erofs_vle_workgroup *grp,
785 struct list_head *page_pool)
786 {
787 struct erofs_sb_info *const sbi = EROFS_SB(sb);
788 #ifdef EROFS_FS_HAS_MANAGED_CACHE
789 struct address_space *const mngda = sbi->managed_cache->i_mapping;
790 #endif
791 const unsigned clusterpages = erofs_clusterpages(sbi);
792
793 struct z_erofs_pagevec_ctor ctor;
794 unsigned nr_pages;
795 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
796 unsigned sparsemem_pages = 0;
797 #endif
798 struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
799 struct page **pages, **compressed_pages, *page;
800 unsigned i, llen;
801
802 enum z_erofs_page_type page_type;
803 bool overlapped;
804 struct z_erofs_vle_work *work;
805 void *vout;
806 int err;
807
808 might_sleep();
809 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
810 work = z_erofs_vle_grab_primary_work(grp);
811 #else
812 BUG();
813 #endif
814 BUG_ON(!READ_ONCE(work->nr_pages));
815
816 mutex_lock(&work->lock);
817 nr_pages = work->nr_pages;
818
819 if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
820 pages = pages_onstack;
821 else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
822 mutex_trylock(&z_pagemap_global_lock))
823 pages = z_pagemap_global;
824 else {
825 repeat:
826 pages = kvmalloc_array(nr_pages,
827 sizeof(struct page *), GFP_KERNEL);
828
829 /* fallback to global pagemap for the lowmem scenario */
830 if (unlikely(pages == NULL)) {
831 if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
832 goto repeat;
833 else {
834 mutex_lock(&z_pagemap_global_lock);
835 pages = z_pagemap_global;
836 }
837 }
838 }
839
840 for (i = 0; i < nr_pages; ++i)
841 pages[i] = NULL;
842
843 z_erofs_pagevec_ctor_init(&ctor,
844 Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, 0);
845
846 for (i = 0; i < work->vcnt; ++i) {
847 unsigned pagenr;
848
849 page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
850
851 /* all pages in pagevec ought to be valid */
852 DBG_BUGON(page == NULL);
853 DBG_BUGON(page->mapping == NULL);
854
855 if (z_erofs_gather_if_stagingpage(page_pool, page))
856 continue;
857
858 if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
859 pagenr = 0;
860 else
861 pagenr = z_erofs_onlinepage_index(page);
862
863 BUG_ON(pagenr >= nr_pages);
864
865 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
866 BUG_ON(pages[pagenr] != NULL);
867 ++sparsemem_pages;
868 #endif
869 pages[pagenr] = page;
870 }
871
872 z_erofs_pagevec_ctor_exit(&ctor, true);
873
874 overlapped = false;
875 compressed_pages = grp->compressed_pages;
876
877 for (i = 0; i < clusterpages; ++i) {
878 unsigned pagenr;
879
880 page = compressed_pages[i];
881
882 /* all compressed pages ought to be valid */
883 DBG_BUGON(page == NULL);
884 DBG_BUGON(page->mapping == NULL);
885
886 if (z_erofs_is_stagingpage(page))
887 continue;
888 #ifdef EROFS_FS_HAS_MANAGED_CACHE
889 else if (page->mapping == mngda) {
890 BUG_ON(PageLocked(page));
891 BUG_ON(!PageUptodate(page));
892 continue;
893 }
894 #endif
895
896 /* only non-head page could be reused as a compressed page */
897 pagenr = z_erofs_onlinepage_index(page);
898
899 BUG_ON(pagenr >= nr_pages);
900 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
901 BUG_ON(pages[pagenr] != NULL);
902 ++sparsemem_pages;
903 #endif
904 pages[pagenr] = page;
905
906 overlapped = true;
907 }
908
909 llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
910
911 if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
912 /* FIXME! this should be fixed in the future */
913 BUG_ON(grp->llen != llen);
914
915 err = z_erofs_vle_plain_copy(compressed_pages, clusterpages,
916 pages, nr_pages, work->pageofs);
917 goto out;
918 }
919
920 if (llen > grp->llen)
921 llen = grp->llen;
922
923 err = z_erofs_vle_unzip_fast_percpu(compressed_pages,
924 clusterpages, pages, llen, work->pageofs,
925 z_erofs_onlinepage_endio);
926 if (err != -ENOTSUPP)
927 goto out_percpu;
928
929 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
930 if (sparsemem_pages >= nr_pages) {
931 BUG_ON(sparsemem_pages > nr_pages);
932 goto skip_allocpage;
933 }
934 #endif
935
936 for (i = 0; i < nr_pages; ++i) {
937 if (pages[i] != NULL)
938 continue;
939
940 pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS);
941 }
942
943 #ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
944 skip_allocpage:
945 #endif
946 vout = erofs_vmap(pages, nr_pages);
947
948 err = z_erofs_vle_unzip_vmap(compressed_pages,
949 clusterpages, vout, llen, work->pageofs, overlapped);
950
951 erofs_vunmap(vout, nr_pages);
952
953 out:
954 for (i = 0; i < nr_pages; ++i) {
955 page = pages[i];
956 DBG_BUGON(page->mapping == NULL);
957
958 /* recycle all individual staging pages */
959 if (z_erofs_gather_if_stagingpage(page_pool, page))
960 continue;
961
962 if (unlikely(err < 0))
963 SetPageError(page);
964
965 z_erofs_onlinepage_endio(page);
966 }
967
968 out_percpu:
969 for (i = 0; i < clusterpages; ++i) {
970 page = compressed_pages[i];
971
972 #ifdef EROFS_FS_HAS_MANAGED_CACHE
973 if (page->mapping == mngda)
974 continue;
975 #endif
976 /* recycle all individual staging pages */
977 (void)z_erofs_gather_if_stagingpage(page_pool, page);
978
979 WRITE_ONCE(compressed_pages[i], NULL);
980 }
981
982 if (pages == z_pagemap_global)
983 mutex_unlock(&z_pagemap_global_lock);
984 else if (unlikely(pages != pages_onstack))
985 kvfree(pages);
986
987 work->nr_pages = 0;
988 work->vcnt = 0;
989
990 /* all work locks MUST be taken before the following line */
991
992 WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
993
994 /* all work locks SHOULD be released right now */
995 mutex_unlock(&work->lock);
996
997 z_erofs_vle_work_release(work);
998 return err;
999 }
1000
z_erofs_vle_unzip_all(struct super_block * sb,struct z_erofs_vle_unzip_io * io,struct list_head * page_pool)1001 static void z_erofs_vle_unzip_all(struct super_block *sb,
1002 struct z_erofs_vle_unzip_io *io,
1003 struct list_head *page_pool)
1004 {
1005 z_erofs_vle_owned_workgrp_t owned = io->head;
1006
1007 while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
1008 struct z_erofs_vle_workgroup *grp;
1009
1010 /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
1011 DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
1012
1013 /* no possible that 'owned' equals NULL */
1014 DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
1015
1016 grp = owned;
1017 owned = READ_ONCE(grp->next);
1018
1019 z_erofs_vle_unzip(sb, grp, page_pool);
1020 }
1021 }
1022
z_erofs_vle_unzip_wq(struct work_struct * work)1023 static void z_erofs_vle_unzip_wq(struct work_struct *work)
1024 {
1025 struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
1026 struct z_erofs_vle_unzip_io_sb, io.u.work);
1027 LIST_HEAD(page_pool);
1028
1029 BUG_ON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1030 z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
1031
1032 put_pages_list(&page_pool);
1033 kvfree(iosb);
1034 }
1035
1036 static inline struct z_erofs_vle_unzip_io *
prepare_io_handler(struct super_block * sb,struct z_erofs_vle_unzip_io * io,bool background)1037 prepare_io_handler(struct super_block *sb,
1038 struct z_erofs_vle_unzip_io *io,
1039 bool background)
1040 {
1041 struct z_erofs_vle_unzip_io_sb *iosb;
1042
1043 if (!background) {
1044 /* waitqueue available for foreground io */
1045 BUG_ON(io == NULL);
1046
1047 init_waitqueue_head(&io->u.wait);
1048 atomic_set(&io->pending_bios, 0);
1049 goto out;
1050 }
1051
1052 if (io != NULL)
1053 BUG();
1054 else {
1055 /* allocate extra io descriptor for background io */
1056 iosb = kvzalloc(sizeof(struct z_erofs_vle_unzip_io_sb),
1057 GFP_KERNEL | __GFP_NOFAIL);
1058 BUG_ON(iosb == NULL);
1059
1060 io = &iosb->io;
1061 }
1062
1063 iosb->sb = sb;
1064 INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
1065 out:
1066 io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
1067 return io;
1068 }
1069
1070 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1071 /* true - unlocked (noio), false - locked (need submit io) */
recover_managed_page(struct z_erofs_vle_workgroup * grp,struct page * page)1072 static inline bool recover_managed_page(struct z_erofs_vle_workgroup *grp,
1073 struct page *page)
1074 {
1075 wait_on_page_locked(page);
1076 if (PagePrivate(page) && PageUptodate(page))
1077 return true;
1078
1079 lock_page(page);
1080 if (unlikely(!PagePrivate(page))) {
1081 set_page_private(page, (unsigned long)grp);
1082 SetPagePrivate(page);
1083 }
1084 if (unlikely(PageUptodate(page))) {
1085 unlock_page(page);
1086 return true;
1087 }
1088 return false;
1089 }
1090
1091 #define __FSIO_1 1
1092 #else
1093 #define __FSIO_1 0
1094 #endif
1095
z_erofs_vle_submit_all(struct super_block * sb,z_erofs_vle_owned_workgrp_t owned_head,struct list_head * pagepool,struct z_erofs_vle_unzip_io * fg_io,bool force_fg)1096 static bool z_erofs_vle_submit_all(struct super_block *sb,
1097 z_erofs_vle_owned_workgrp_t owned_head,
1098 struct list_head *pagepool,
1099 struct z_erofs_vle_unzip_io *fg_io,
1100 bool force_fg)
1101 {
1102 struct erofs_sb_info *const sbi = EROFS_SB(sb);
1103 const unsigned clusterpages = erofs_clusterpages(sbi);
1104 const gfp_t gfp = GFP_NOFS;
1105 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1106 struct address_space *const mngda = sbi->managed_cache->i_mapping;
1107 struct z_erofs_vle_workgroup *lstgrp_noio = NULL, *lstgrp_io = NULL;
1108 #endif
1109 struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1];
1110 struct bio *bio;
1111 tagptr1_t bi_private;
1112 /* since bio will be NULL, no need to initialize last_index */
1113 pgoff_t uninitialized_var(last_index);
1114 bool force_submit = false;
1115 unsigned nr_bios;
1116
1117 if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
1118 return false;
1119
1120 /*
1121 * force_fg == 1, (io, fg_io[0]) no io, (io, fg_io[1]) need submit io
1122 * force_fg == 0, (io, fg_io[0]) no io; (io[1], bg_io) need submit io
1123 */
1124 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1125 ios[0] = prepare_io_handler(sb, fg_io + 0, false);
1126 #endif
1127
1128 if (force_fg) {
1129 ios[__FSIO_1] = prepare_io_handler(sb, fg_io + __FSIO_1, false);
1130 bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 0);
1131 } else {
1132 ios[__FSIO_1] = prepare_io_handler(sb, NULL, true);
1133 bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 1);
1134 }
1135
1136 nr_bios = 0;
1137 force_submit = false;
1138 bio = NULL;
1139
1140 /* by default, all need io submission */
1141 ios[__FSIO_1]->head = owned_head;
1142
1143 do {
1144 struct z_erofs_vle_workgroup *grp;
1145 struct page **compressed_pages, *oldpage, *page;
1146 pgoff_t first_index;
1147 unsigned i = 0;
1148 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1149 unsigned int noio = 0;
1150 bool cachemngd;
1151 #endif
1152 int err;
1153
1154 /* no possible 'owned_head' equals the following */
1155 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1156 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
1157
1158 grp = owned_head;
1159
1160 /* close the main owned chain at first */
1161 owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
1162 Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1163
1164 first_index = grp->obj.index;
1165 compressed_pages = grp->compressed_pages;
1166
1167 force_submit |= (first_index != last_index + 1);
1168 repeat:
1169 /* fulfill all compressed pages */
1170 oldpage = page = READ_ONCE(compressed_pages[i]);
1171
1172 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1173 cachemngd = false;
1174
1175 if (page == EROFS_UNALLOCATED_CACHED_PAGE) {
1176 cachemngd = true;
1177 goto do_allocpage;
1178 } else if (page != NULL) {
1179 if (page->mapping != mngda)
1180 BUG_ON(PageUptodate(page));
1181 else if (recover_managed_page(grp, page)) {
1182 /* page is uptodate, skip io submission */
1183 force_submit = true;
1184 ++noio;
1185 goto skippage;
1186 }
1187 } else {
1188 do_allocpage:
1189 #else
1190 if (page != NULL)
1191 BUG_ON(PageUptodate(page));
1192 else {
1193 #endif
1194 page = __stagingpage_alloc(pagepool, gfp);
1195
1196 if (oldpage != cmpxchg(compressed_pages + i,
1197 oldpage, page)) {
1198 list_add(&page->lru, pagepool);
1199 goto repeat;
1200 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1201 } else if (cachemngd && !add_to_page_cache_lru(page,
1202 mngda, first_index + i, gfp)) {
1203 set_page_private(page, (unsigned long)grp);
1204 SetPagePrivate(page);
1205 #endif
1206 }
1207 }
1208
1209 if (bio != NULL && force_submit) {
1210 submit_bio_retry:
1211 __submit_bio(bio, REQ_OP_READ, 0);
1212 bio = NULL;
1213 }
1214
1215 if (bio == NULL) {
1216 bio = prepare_bio(sb, first_index + i,
1217 BIO_MAX_PAGES, z_erofs_vle_read_endio);
1218 bio->bi_private = tagptr_cast_ptr(bi_private);
1219
1220 ++nr_bios;
1221 }
1222
1223 err = bio_add_page(bio, page, PAGE_SIZE, 0);
1224 if (err < PAGE_SIZE)
1225 goto submit_bio_retry;
1226
1227 force_submit = false;
1228 last_index = first_index + i;
1229 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1230 skippage:
1231 #endif
1232 if (++i < clusterpages)
1233 goto repeat;
1234
1235 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1236 if (noio < clusterpages) {
1237 lstgrp_io = grp;
1238 } else {
1239 z_erofs_vle_owned_workgrp_t iogrp_next =
1240 owned_head == Z_EROFS_VLE_WORKGRP_TAIL ?
1241 Z_EROFS_VLE_WORKGRP_TAIL_CLOSED :
1242 owned_head;
1243
1244 if (lstgrp_io == NULL)
1245 ios[1]->head = iogrp_next;
1246 else
1247 WRITE_ONCE(lstgrp_io->next, iogrp_next);
1248
1249 if (lstgrp_noio == NULL)
1250 ios[0]->head = grp;
1251 else
1252 WRITE_ONCE(lstgrp_noio->next, grp);
1253
1254 lstgrp_noio = grp;
1255 }
1256 #endif
1257 } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
1258
1259 if (bio != NULL)
1260 __submit_bio(bio, REQ_OP_READ, 0);
1261
1262 #ifndef EROFS_FS_HAS_MANAGED_CACHE
1263 BUG_ON(!nr_bios);
1264 #else
1265 if (lstgrp_noio != NULL)
1266 WRITE_ONCE(lstgrp_noio->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1267
1268 if (!force_fg && !nr_bios) {
1269 kvfree(container_of(ios[1],
1270 struct z_erofs_vle_unzip_io_sb, io));
1271 return true;
1272 }
1273 #endif
1274
1275 z_erofs_vle_unzip_kickoff(tagptr_cast_ptr(bi_private), nr_bios);
1276 return true;
1277 }
1278
1279 static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
1280 struct list_head *pagepool,
1281 bool force_fg)
1282 {
1283 struct super_block *sb = f->inode->i_sb;
1284 struct z_erofs_vle_unzip_io io[1 + __FSIO_1];
1285
1286 if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
1287 return;
1288
1289 #ifdef EROFS_FS_HAS_MANAGED_CACHE
1290 z_erofs_vle_unzip_all(sb, &io[0], pagepool);
1291 #endif
1292 if (!force_fg)
1293 return;
1294
1295 /* wait until all bios are completed */
1296 wait_event(io[__FSIO_1].u.wait,
1297 !atomic_read(&io[__FSIO_1].pending_bios));
1298
1299 /* let's synchronous decompression */
1300 z_erofs_vle_unzip_all(sb, &io[__FSIO_1], pagepool);
1301 }
1302
1303 static int z_erofs_vle_normalaccess_readpage(struct file *file,
1304 struct page *page)
1305 {
1306 struct inode *const inode = page->mapping->host;
1307 struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1308 int err;
1309 LIST_HEAD(pagepool);
1310
1311 #if (EROFS_FS_ZIP_CACHE_LVL >= 2)
1312 f.cachedzone_la = page->index << PAGE_SHIFT;
1313 #endif
1314 err = z_erofs_do_read_page(&f, page, &pagepool);
1315 (void)z_erofs_vle_work_iter_end(&f.builder);
1316
1317 if (err) {
1318 errln("%s, failed to read, err [%d]", __func__, err);
1319 goto out;
1320 }
1321
1322 z_erofs_submit_and_unzip(&f, &pagepool, true);
1323 out:
1324 if (f.m_iter.mpage != NULL)
1325 put_page(f.m_iter.mpage);
1326
1327 /* clean up the remaining free pages */
1328 put_pages_list(&pagepool);
1329 return 0;
1330 }
1331
1332 static inline int __z_erofs_vle_normalaccess_readpages(
1333 struct file *filp,
1334 struct address_space *mapping,
1335 struct list_head *pages, unsigned nr_pages, bool sync)
1336 {
1337 struct inode *const inode = mapping->host;
1338
1339 struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1340 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
1341 struct page *head = NULL;
1342 LIST_HEAD(pagepool);
1343
1344 #if (EROFS_FS_ZIP_CACHE_LVL >= 2)
1345 f.cachedzone_la = lru_to_page(pages)->index << PAGE_SHIFT;
1346 #endif
1347 for (; nr_pages; --nr_pages) {
1348 struct page *page = lru_to_page(pages);
1349
1350 prefetchw(&page->flags);
1351 list_del(&page->lru);
1352
1353 if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
1354 list_add(&page->lru, &pagepool);
1355 continue;
1356 }
1357
1358 BUG_ON(PagePrivate(page));
1359 set_page_private(page, (unsigned long)head);
1360 head = page;
1361 }
1362
1363 while (head != NULL) {
1364 struct page *page = head;
1365 int err;
1366
1367 /* traversal in reverse order */
1368 head = (void *)page_private(page);
1369
1370 err = z_erofs_do_read_page(&f, page, &pagepool);
1371 if (err) {
1372 struct erofs_vnode *vi = EROFS_V(inode);
1373
1374 errln("%s, readahead error at page %lu of nid %llu",
1375 __func__, page->index, vi->nid);
1376 }
1377
1378 put_page(page);
1379 }
1380
1381 (void)z_erofs_vle_work_iter_end(&f.builder);
1382
1383 z_erofs_submit_and_unzip(&f, &pagepool, sync);
1384
1385 if (f.m_iter.mpage != NULL)
1386 put_page(f.m_iter.mpage);
1387
1388 /* clean up the remaining free pages */
1389 put_pages_list(&pagepool);
1390 return 0;
1391 }
1392
1393 static int z_erofs_vle_normalaccess_readpages(
1394 struct file *filp,
1395 struct address_space *mapping,
1396 struct list_head *pages, unsigned nr_pages)
1397 {
1398 return __z_erofs_vle_normalaccess_readpages(filp,
1399 mapping, pages, nr_pages,
1400 nr_pages < 4 /* sync */);
1401 }
1402
1403 const struct address_space_operations z_erofs_vle_normalaccess_aops = {
1404 .readpage = z_erofs_vle_normalaccess_readpage,
1405 .readpages = z_erofs_vle_normalaccess_readpages,
1406 };
1407
1408 #define __vle_cluster_advise(x, bit, bits) \
1409 ((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1))
1410
1411 #define __vle_cluster_type(advise) __vle_cluster_advise(advise, \
1412 Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT, Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS)
1413
1414 enum {
1415 Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
1416 Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
1417 Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
1418 Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
1419 Z_EROFS_VLE_CLUSTER_TYPE_MAX
1420 };
1421
1422 #define vle_cluster_type(di) \
1423 __vle_cluster_type((di)->di_advise)
1424
1425 static inline unsigned
1426 vle_compressed_index_clusterofs(unsigned clustersize,
1427 struct z_erofs_vle_decompressed_index *di)
1428 {
1429 debugln("%s, vle=%pK, advise=%x (type %u), clusterofs=%x blkaddr=%x",
1430 __func__, di, di->di_advise, vle_cluster_type(di),
1431 di->di_clusterofs, di->di_u.blkaddr);
1432
1433 switch (vle_cluster_type(di)) {
1434 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
1435 break;
1436 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
1437 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
1438 return di->di_clusterofs;
1439 default:
1440 BUG_ON(1);
1441 }
1442 return clustersize;
1443 }
1444
1445 static inline erofs_blk_t
1446 vle_extent_blkaddr(struct inode *inode, pgoff_t index)
1447 {
1448 struct erofs_sb_info *sbi = EROFS_I_SB(inode);
1449 struct erofs_vnode *vi = EROFS_V(inode);
1450
1451 unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
1452 vi->xattr_isize) + sizeof(struct erofs_extent_header) +
1453 index * sizeof(struct z_erofs_vle_decompressed_index);
1454
1455 return erofs_blknr(iloc(sbi, vi->nid) + ofs);
1456 }
1457
1458 static inline unsigned int
1459 vle_extent_blkoff(struct inode *inode, pgoff_t index)
1460 {
1461 struct erofs_sb_info *sbi = EROFS_I_SB(inode);
1462 struct erofs_vnode *vi = EROFS_V(inode);
1463
1464 unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
1465 vi->xattr_isize) + sizeof(struct erofs_extent_header) +
1466 index * sizeof(struct z_erofs_vle_decompressed_index);
1467
1468 return erofs_blkoff(iloc(sbi, vi->nid) + ofs);
1469 }
1470
1471 /*
1472 * Variable-sized Logical Extent (Fixed Physical Cluster) Compression Mode
1473 * ---
1474 * VLE compression mode attempts to compress a number of logical data into
1475 * a physical cluster with a fixed size.
1476 * VLE compression mode uses "struct z_erofs_vle_decompressed_index".
1477 */
1478 static erofs_off_t vle_get_logical_extent_head(
1479 struct inode *inode,
1480 struct page **page_iter,
1481 void **kaddr_iter,
1482 unsigned lcn, /* logical cluster number */
1483 erofs_blk_t *pcn,
1484 unsigned *flags)
1485 {
1486 /* for extent meta */
1487 struct page *page = *page_iter;
1488 erofs_blk_t blkaddr = vle_extent_blkaddr(inode, lcn);
1489 struct z_erofs_vle_decompressed_index *di;
1490 unsigned long long ofs;
1491 const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
1492 const unsigned int clustersize = 1 << clusterbits;
1493
1494 if (page->index != blkaddr) {
1495 kunmap_atomic(*kaddr_iter);
1496 unlock_page(page);
1497 put_page(page);
1498
1499 *page_iter = page = erofs_get_meta_page(inode->i_sb,
1500 blkaddr, false);
1501 *kaddr_iter = kmap_atomic(page);
1502 }
1503
1504 di = *kaddr_iter + vle_extent_blkoff(inode, lcn);
1505 switch (vle_cluster_type(di)) {
1506 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
1507 BUG_ON(!di->di_u.delta[0]);
1508 BUG_ON(lcn < di->di_u.delta[0]);
1509
1510 ofs = vle_get_logical_extent_head(inode,
1511 page_iter, kaddr_iter,
1512 lcn - di->di_u.delta[0], pcn, flags);
1513 break;
1514 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
1515 *flags ^= EROFS_MAP_ZIPPED;
1516 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
1517 /* clustersize should be a power of two */
1518 ofs = ((unsigned long long)lcn << clusterbits) +
1519 (le16_to_cpu(di->di_clusterofs) & (clustersize - 1));
1520 *pcn = le32_to_cpu(di->di_u.blkaddr);
1521 break;
1522 default:
1523 BUG_ON(1);
1524 }
1525 return ofs;
1526 }
1527
1528 int z_erofs_map_blocks_iter(struct inode *inode,
1529 struct erofs_map_blocks *map,
1530 struct page **mpage_ret, int flags)
1531 {
1532 /* logicial extent (start, end) offset */
1533 unsigned long long ofs, end;
1534 struct z_erofs_vle_decompressed_index *di;
1535 erofs_blk_t e_blkaddr, pcn;
1536 unsigned lcn, logical_cluster_ofs, cluster_type;
1537 u32 ofs_rem;
1538 struct page *mpage = *mpage_ret;
1539 void *kaddr;
1540 bool initial;
1541 const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
1542 const unsigned int clustersize = 1 << clusterbits;
1543 int err = 0;
1544
1545 /* if both m_(l,p)len are 0, regularize l_lblk, l_lofs, etc... */
1546 initial = !map->m_llen;
1547
1548 /* when trying to read beyond EOF, leave it unmapped */
1549 if (unlikely(map->m_la >= inode->i_size)) {
1550 BUG_ON(!initial);
1551 map->m_llen = map->m_la + 1 - inode->i_size;
1552 map->m_la = inode->i_size - 1;
1553 map->m_flags = 0;
1554 goto out;
1555 }
1556
1557 debugln("%s, m_la %llu m_llen %llu --- start", __func__,
1558 map->m_la, map->m_llen);
1559
1560 ofs = map->m_la + map->m_llen;
1561
1562 /* clustersize should be power of two */
1563 lcn = ofs >> clusterbits;
1564 ofs_rem = ofs & (clustersize - 1);
1565
1566 e_blkaddr = vle_extent_blkaddr(inode, lcn);
1567
1568 if (mpage == NULL || mpage->index != e_blkaddr) {
1569 if (mpage != NULL)
1570 put_page(mpage);
1571
1572 mpage = erofs_get_meta_page(inode->i_sb, e_blkaddr, false);
1573 *mpage_ret = mpage;
1574 } else {
1575 lock_page(mpage);
1576 DBG_BUGON(!PageUptodate(mpage));
1577 }
1578
1579 kaddr = kmap_atomic(mpage);
1580 di = kaddr + vle_extent_blkoff(inode, lcn);
1581
1582 debugln("%s, lcn %u e_blkaddr %u e_blkoff %u", __func__, lcn,
1583 e_blkaddr, vle_extent_blkoff(inode, lcn));
1584
1585 logical_cluster_ofs = vle_compressed_index_clusterofs(clustersize, di);
1586 if (!initial) {
1587 /* [walking mode] 'map' has been already initialized */
1588 map->m_llen += logical_cluster_ofs;
1589 goto unmap_out;
1590 }
1591
1592 /* by default, compressed */
1593 map->m_flags |= EROFS_MAP_ZIPPED;
1594
1595 end = (u64)(lcn + 1) * clustersize;
1596
1597 cluster_type = vle_cluster_type(di);
1598
1599 switch (cluster_type) {
1600 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
1601 if (ofs_rem >= logical_cluster_ofs)
1602 map->m_flags ^= EROFS_MAP_ZIPPED;
1603 /* fallthrough */
1604 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
1605 if (ofs_rem == logical_cluster_ofs) {
1606 pcn = le32_to_cpu(di->di_u.blkaddr);
1607 goto exact_hitted;
1608 }
1609
1610 if (ofs_rem > logical_cluster_ofs) {
1611 ofs = lcn * clustersize | logical_cluster_ofs;
1612 pcn = le32_to_cpu(di->di_u.blkaddr);
1613 break;
1614 }
1615
1616 /* logical cluster number should be >= 1 */
1617 if (unlikely(!lcn)) {
1618 errln("invalid logical cluster 0 at nid %llu",
1619 EROFS_V(inode)->nid);
1620 err = -EIO;
1621 goto unmap_out;
1622 }
1623 end = (lcn-- * clustersize) | logical_cluster_ofs;
1624 /* fallthrough */
1625 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
1626 /* get the correspoinding first chunk */
1627 ofs = vle_get_logical_extent_head(inode, mpage_ret,
1628 &kaddr, lcn, &pcn, &map->m_flags);
1629 mpage = *mpage_ret;
1630 break;
1631 default:
1632 errln("unknown cluster type %u at offset %llu of nid %llu",
1633 cluster_type, ofs, EROFS_V(inode)->nid);
1634 err = -EIO;
1635 goto unmap_out;
1636 }
1637
1638 map->m_la = ofs;
1639 exact_hitted:
1640 map->m_llen = end - ofs;
1641 map->m_plen = clustersize;
1642 map->m_pa = blknr_to_addr(pcn);
1643 map->m_flags |= EROFS_MAP_MAPPED;
1644 unmap_out:
1645 kunmap_atomic(kaddr);
1646 unlock_page(mpage);
1647 out:
1648 debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
1649 __func__, map->m_la, map->m_pa,
1650 map->m_llen, map->m_plen, map->m_flags);
1651
1652 /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
1653 DBG_BUGON(err < 0);
1654 return err;
1655 }
1656
1657