Lines Matching +full:page +full:- +full:offset
1 // SPDX-License-Identifier: GPL-2.0-only
22 #include <linux/blk-cgroup.h>
31 #include <linux/backing-dev.h>
65 static int least_priority = -1;
73 static const char Bad_offset[] = "Bad swap offset entry ";
74 static const char Unused_offset[] = "Unused swap offset entry ";
89 * swap_info_struct changes between not-full/full, it needs to
90 * add/remove itself to/from this list, but the swap_info_struct->lock
92 * before any swap_info_struct->lock.
124 * corresponding page
132 unsigned long offset, unsigned long flags) in __try_to_reclaim_swap() argument
134 swp_entry_t entry = swp_entry(si->type, offset); in __try_to_reclaim_swap()
138 folio = filemap_get_folio(swap_address_space(entry), offset); in __try_to_reclaim_swap()
161 struct rb_node *rb = rb_first(&sis->swap_extent_root); in first_se()
167 struct rb_node *rb = rb_next(&se->rb_node); in next_se()
173 * to allow the swap device to optimize its wear-levelling.
182 /* Do not discard the swap header page! */ in discard_swap()
184 start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); in discard_swap()
185 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); in discard_swap()
187 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
195 start_block = se->start_block << (PAGE_SHIFT - 9); in discard_swap()
196 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); in discard_swap()
198 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
205 return err; /* That will often be -EOPNOTSUPP */ in discard_swap()
209 offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) in offset_to_swap_extent() argument
214 rb = sis->swap_extent_root.rb_node; in offset_to_swap_extent()
217 if (offset < se->start_page) in offset_to_swap_extent()
218 rb = rb->rb_left; in offset_to_swap_extent()
219 else if (offset >= se->start_page + se->nr_pages) in offset_to_swap_extent()
220 rb = rb->rb_right; in offset_to_swap_extent()
228 sector_t swap_page_sector(struct page *page) in swap_page_sector() argument
230 struct swap_info_struct *sis = page_swap_info(page); in swap_page_sector()
233 pgoff_t offset; in swap_page_sector() local
235 offset = __page_file_index(page); in swap_page_sector()
236 se = offset_to_swap_extent(sis, offset); in swap_page_sector()
237 sector = se->start_block + (offset - se->start_page); in swap_page_sector()
238 return sector << (PAGE_SHIFT - 9); in swap_page_sector()
243 * to allow the swap device to optimize its wear-levelling.
251 pgoff_t offset = start_page - se->start_page; in discard_swap_cluster() local
252 sector_t start_block = se->start_block + offset; in discard_swap_cluster()
253 sector_t nr_blocks = se->nr_pages - offset; in discard_swap_cluster()
258 nr_pages -= nr_blocks; in discard_swap_cluster()
260 start_block <<= PAGE_SHIFT - 9; in discard_swap_cluster()
261 nr_blocks <<= PAGE_SHIFT - 9; in discard_swap_cluster()
262 if (blkdev_issue_discard(si->bdev, start_block, in discard_swap_cluster()
288 info->flags = flag; in cluster_set_flag()
293 return info->data; in cluster_count()
299 info->data = c; in cluster_set_count()
305 info->flags = f; in cluster_set_count_flag()
306 info->data = c; in cluster_set_count_flag()
311 return info->data; in cluster_next()
317 info->data = n; in cluster_set_next()
323 info->flags = f; in cluster_set_next_flag()
324 info->data = n; in cluster_set_next_flag()
329 return info->flags & CLUSTER_FLAG_FREE; in cluster_is_free()
334 return info->flags & CLUSTER_FLAG_NEXT_NULL; in cluster_is_null()
339 info->flags = CLUSTER_FLAG_NEXT_NULL; in cluster_set_null()
340 info->data = 0; in cluster_set_null()
346 return info->flags & CLUSTER_FLAG_HUGE; in cluster_is_huge()
352 info->flags &= ~CLUSTER_FLAG_HUGE; in cluster_clear_huge()
356 unsigned long offset) in lock_cluster() argument
360 ci = si->cluster_info; in lock_cluster()
362 ci += offset / SWAPFILE_CLUSTER; in lock_cluster()
363 spin_lock(&ci->lock); in lock_cluster()
371 spin_unlock(&ci->lock); in unlock_cluster()
376 * swap_cluster_info if SSD-style cluster-based locking is in place.
379 struct swap_info_struct *si, unsigned long offset) in lock_cluster_or_swap_info() argument
383 /* Try to use fine-grained SSD-style locking if available: */ in lock_cluster_or_swap_info()
384 ci = lock_cluster(si, offset); in lock_cluster_or_swap_info()
387 spin_lock(&si->lock); in lock_cluster_or_swap_info()
398 spin_unlock(&si->lock); in unlock_cluster_or_swap_info()
403 return cluster_is_null(&list->head); in cluster_list_empty()
408 return cluster_next(&list->head); in cluster_list_first()
413 cluster_set_null(&list->head); in cluster_list_init()
414 cluster_set_null(&list->tail); in cluster_list_init()
422 cluster_set_next_flag(&list->head, idx, 0); in cluster_list_add_tail()
423 cluster_set_next_flag(&list->tail, idx, 0); in cluster_list_add_tail()
426 unsigned int tail = cluster_next(&list->tail); in cluster_list_add_tail()
430 * only acquired when we held swap_info_struct->lock in cluster_list_add_tail()
433 spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING); in cluster_list_add_tail()
435 spin_unlock(&ci_tail->lock); in cluster_list_add_tail()
436 cluster_set_next_flag(&list->tail, idx, 0); in cluster_list_add_tail()
445 idx = cluster_next(&list->head); in cluster_list_del_first()
446 if (cluster_next(&list->tail) == idx) { in cluster_list_del_first()
447 cluster_set_null(&list->head); in cluster_list_del_first()
448 cluster_set_null(&list->tail); in cluster_list_del_first()
450 cluster_set_next_flag(&list->head, in cluster_list_del_first()
462 * si->swap_map directly. To make sure the discarding cluster isn't in swap_cluster_schedule_discard()
466 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_cluster_schedule_discard()
469 cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx); in swap_cluster_schedule_discard()
471 schedule_work(&si->discard_work); in swap_cluster_schedule_discard()
476 struct swap_cluster_info *ci = si->cluster_info; in __free_cluster()
479 cluster_list_add_tail(&si->free_clusters, ci, idx); in __free_cluster()
484 * will be added to free cluster list. caller should hold si->lock.
491 info = si->cluster_info; in swap_do_scheduled_discard()
493 while (!cluster_list_empty(&si->discard_clusters)) { in swap_do_scheduled_discard()
494 idx = cluster_list_del_first(&si->discard_clusters, info); in swap_do_scheduled_discard()
495 spin_unlock(&si->lock); in swap_do_scheduled_discard()
500 spin_lock(&si->lock); in swap_do_scheduled_discard()
503 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_do_scheduled_discard()
515 spin_lock(&si->lock); in swap_discard_work()
517 spin_unlock(&si->lock); in swap_discard_work()
525 complete(&si->comp); in swap_users_ref_free()
530 struct swap_cluster_info *ci = si->cluster_info; in alloc_cluster()
532 VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx); in alloc_cluster()
533 cluster_list_del_first(&si->free_clusters, ci); in alloc_cluster()
539 struct swap_cluster_info *ci = si->cluster_info + idx; in free_cluster()
547 if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == in free_cluster()
577 * counter becomes 0, which means no page in the cluster is in using, we can
590 cluster_count(&cluster_info[idx]) - 1); in dec_cluster_info_page()
602 unsigned long offset) in scan_swap_map_ssd_cluster_conflict() argument
607 offset /= SWAPFILE_CLUSTER; in scan_swap_map_ssd_cluster_conflict()
608 conflict = !cluster_list_empty(&si->free_clusters) && in scan_swap_map_ssd_cluster_conflict()
609 offset != cluster_list_first(&si->free_clusters) && in scan_swap_map_ssd_cluster_conflict()
610 cluster_is_free(&si->cluster_info[offset]); in scan_swap_map_ssd_cluster_conflict()
615 percpu_cluster = this_cpu_ptr(si->percpu_cluster); in scan_swap_map_ssd_cluster_conflict()
616 cluster_set_null(&percpu_cluster->index); in scan_swap_map_ssd_cluster_conflict()
625 unsigned long *offset, unsigned long *scan_base) in scan_swap_map_try_ssd_cluster() argument
632 cluster = this_cpu_ptr(si->percpu_cluster); in scan_swap_map_try_ssd_cluster()
633 if (cluster_is_null(&cluster->index)) { in scan_swap_map_try_ssd_cluster()
634 if (!cluster_list_empty(&si->free_clusters)) { in scan_swap_map_try_ssd_cluster()
635 cluster->index = si->free_clusters.head; in scan_swap_map_try_ssd_cluster()
636 cluster->next = cluster_next(&cluster->index) * in scan_swap_map_try_ssd_cluster()
638 } else if (!cluster_list_empty(&si->discard_clusters)) { in scan_swap_map_try_ssd_cluster()
642 * reread cluster_next_cpu since we dropped si->lock in scan_swap_map_try_ssd_cluster()
645 *scan_base = this_cpu_read(*si->cluster_next_cpu); in scan_swap_map_try_ssd_cluster()
646 *offset = *scan_base; in scan_swap_map_try_ssd_cluster()
656 tmp = cluster->next; in scan_swap_map_try_ssd_cluster()
657 max = min_t(unsigned long, si->max, in scan_swap_map_try_ssd_cluster()
658 (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER); in scan_swap_map_try_ssd_cluster()
662 if (!si->swap_map[tmp]) in scan_swap_map_try_ssd_cluster()
669 cluster_set_null(&cluster->index); in scan_swap_map_try_ssd_cluster()
672 cluster->next = tmp + 1; in scan_swap_map_try_ssd_cluster()
673 *offset = tmp; in scan_swap_map_try_ssd_cluster()
683 plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]); in __del_from_avail_list()
693 static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, in swap_range_alloc() argument
696 unsigned int end = offset + nr_entries - 1; in swap_range_alloc()
698 if (offset == si->lowest_bit) in swap_range_alloc()
699 si->lowest_bit += nr_entries; in swap_range_alloc()
700 if (end == si->highest_bit) in swap_range_alloc()
701 WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); in swap_range_alloc()
702 WRITE_ONCE(si->inuse_pages, si->inuse_pages + nr_entries); in swap_range_alloc()
703 if (si->inuse_pages == si->pages) { in swap_range_alloc()
704 si->lowest_bit = si->max; in swap_range_alloc()
705 si->highest_bit = 0; in swap_range_alloc()
716 WARN_ON(!plist_node_empty(&p->avail_lists[nid])); in add_to_avail_list()
717 plist_add(&p->avail_lists[nid], &swap_avail_heads[nid]); in add_to_avail_list()
722 static void swap_range_free(struct swap_info_struct *si, unsigned long offset, in swap_range_free() argument
725 unsigned long begin = offset; in swap_range_free()
726 unsigned long end = offset + nr_entries - 1; in swap_range_free()
729 if (offset < si->lowest_bit) in swap_range_free()
730 si->lowest_bit = offset; in swap_range_free()
731 if (end > si->highest_bit) { in swap_range_free()
732 bool was_full = !si->highest_bit; in swap_range_free()
734 WRITE_ONCE(si->highest_bit, end); in swap_range_free()
735 if (was_full && (si->flags & SWP_WRITEOK)) in swap_range_free()
739 WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries); in swap_range_free()
740 if (si->flags & SWP_BLKDEV) in swap_range_free()
742 si->bdev->bd_disk->fops->swap_slot_free_notify; in swap_range_free()
745 while (offset <= end) { in swap_range_free()
746 arch_swap_invalidate_page(si->type, offset); in swap_range_free()
747 frontswap_invalidate_page(si->type, offset); in swap_range_free()
749 swap_slot_free_notify(si->bdev, offset); in swap_range_free()
750 offset++; in swap_range_free()
752 clear_shadow_from_swap_cache(si->type, begin, end); in swap_range_free()
759 if (!(si->flags & SWP_SOLIDSTATE)) { in set_cluster_next()
760 si->cluster_next = next; in set_cluster_next()
764 prev = this_cpu_read(*si->cluster_next_cpu); in set_cluster_next()
773 if (si->highest_bit <= si->lowest_bit) in set_cluster_next()
775 next = si->lowest_bit + in set_cluster_next()
776 prandom_u32_max(si->highest_bit - si->lowest_bit + 1); in set_cluster_next()
778 next = max_t(unsigned int, next, si->lowest_bit); in set_cluster_next()
780 this_cpu_write(*si->cluster_next_cpu, next); in set_cluster_next()
784 unsigned long offset) in swap_offset_available_and_locked() argument
786 if (data_race(!si->swap_map[offset])) { in swap_offset_available_and_locked()
787 spin_lock(&si->lock); in swap_offset_available_and_locked()
791 if (vm_swap_full() && READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { in swap_offset_available_and_locked()
792 spin_lock(&si->lock); in swap_offset_available_and_locked()
804 unsigned long offset; in scan_swap_map_slots() local
814 * way, however, we resort to first-free allocation, starting in scan_swap_map_slots()
817 * overall disk seek times between swap pages. -- sct in scan_swap_map_slots()
818 * But we do now try to find an empty cluster. -Andrea in scan_swap_map_slots()
822 si->flags += SWP_SCANNING; in scan_swap_map_slots()
828 if (si->flags & SWP_SOLIDSTATE) in scan_swap_map_slots()
829 scan_base = this_cpu_read(*si->cluster_next_cpu); in scan_swap_map_slots()
831 scan_base = si->cluster_next; in scan_swap_map_slots()
832 offset = scan_base; in scan_swap_map_slots()
835 if (si->cluster_info) { in scan_swap_map_slots()
836 if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) in scan_swap_map_slots()
838 } else if (unlikely(!si->cluster_nr--)) { in scan_swap_map_slots()
839 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { in scan_swap_map_slots()
840 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
844 spin_unlock(&si->lock); in scan_swap_map_slots()
849 * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info in scan_swap_map_slots()
852 scan_base = offset = si->lowest_bit; in scan_swap_map_slots()
853 last_in_cluster = offset + SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
856 for (; last_in_cluster <= si->highest_bit; offset++) { in scan_swap_map_slots()
857 if (si->swap_map[offset]) in scan_swap_map_slots()
858 last_in_cluster = offset + SWAPFILE_CLUSTER; in scan_swap_map_slots()
859 else if (offset == last_in_cluster) { in scan_swap_map_slots()
860 spin_lock(&si->lock); in scan_swap_map_slots()
861 offset -= SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
862 si->cluster_next = offset; in scan_swap_map_slots()
863 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
866 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
872 offset = scan_base; in scan_swap_map_slots()
873 spin_lock(&si->lock); in scan_swap_map_slots()
874 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
878 if (si->cluster_info) { in scan_swap_map_slots()
879 while (scan_swap_map_ssd_cluster_conflict(si, offset)) { in scan_swap_map_slots()
883 if (!scan_swap_map_try_ssd_cluster(si, &offset, in scan_swap_map_slots()
888 if (!(si->flags & SWP_WRITEOK)) in scan_swap_map_slots()
890 if (!si->highest_bit) in scan_swap_map_slots()
892 if (offset > si->highest_bit) in scan_swap_map_slots()
893 scan_base = offset = si->lowest_bit; in scan_swap_map_slots()
895 ci = lock_cluster(si, offset); in scan_swap_map_slots()
896 /* reuse swap entry of cache-only swap if not busy. */ in scan_swap_map_slots()
897 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { in scan_swap_map_slots()
900 spin_unlock(&si->lock); in scan_swap_map_slots()
901 swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); in scan_swap_map_slots()
902 spin_lock(&si->lock); in scan_swap_map_slots()
909 if (si->swap_map[offset]) { in scan_swap_map_slots()
916 WRITE_ONCE(si->swap_map[offset], usage); in scan_swap_map_slots()
917 inc_cluster_info_page(si, si->cluster_info, offset); in scan_swap_map_slots()
920 swap_range_alloc(si, offset, 1); in scan_swap_map_slots()
921 slots[n_ret++] = swp_entry(si->type, offset); in scan_swap_map_slots()
924 if ((n_ret == nr) || (offset >= si->highest_bit)) in scan_swap_map_slots()
930 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
933 spin_unlock(&si->lock); in scan_swap_map_slots()
935 spin_lock(&si->lock); in scan_swap_map_slots()
940 if (si->cluster_info) { in scan_swap_map_slots()
941 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) in scan_swap_map_slots()
943 } else if (si->cluster_nr && !si->swap_map[++offset]) { in scan_swap_map_slots()
944 /* non-ssd case, still more slots in cluster? */ in scan_swap_map_slots()
945 --si->cluster_nr; in scan_swap_map_slots()
957 if (offset < scan_base) in scan_swap_map_slots()
960 scan_limit = si->highest_bit; in scan_swap_map_slots()
961 for (; offset <= scan_limit && --latency_ration > 0; in scan_swap_map_slots()
962 offset++) { in scan_swap_map_slots()
963 if (!si->swap_map[offset]) in scan_swap_map_slots()
969 set_cluster_next(si, offset + 1); in scan_swap_map_slots()
970 si->flags -= SWP_SCANNING; in scan_swap_map_slots()
974 spin_unlock(&si->lock); in scan_swap_map_slots()
975 while (++offset <= READ_ONCE(si->highest_bit)) { in scan_swap_map_slots()
976 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
981 if (swap_offset_available_and_locked(si, offset)) in scan_swap_map_slots()
984 offset = si->lowest_bit; in scan_swap_map_slots()
985 while (offset < scan_base) { in scan_swap_map_slots()
986 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
991 if (swap_offset_available_and_locked(si, offset)) in scan_swap_map_slots()
993 offset++; in scan_swap_map_slots()
995 spin_lock(&si->lock); in scan_swap_map_slots()
998 si->flags -= SWP_SCANNING; in scan_swap_map_slots()
1006 unsigned long offset; in swap_alloc_cluster() local
1010 * page swap is disabled. Warn and fail the allocation. in swap_alloc_cluster()
1017 if (cluster_list_empty(&si->free_clusters)) in swap_alloc_cluster()
1020 idx = cluster_list_first(&si->free_clusters); in swap_alloc_cluster()
1021 offset = idx * SWAPFILE_CLUSTER; in swap_alloc_cluster()
1022 ci = lock_cluster(si, offset); in swap_alloc_cluster()
1026 memset(si->swap_map + offset, SWAP_HAS_CACHE, SWAPFILE_CLUSTER); in swap_alloc_cluster()
1028 swap_range_alloc(si, offset, SWAPFILE_CLUSTER); in swap_alloc_cluster()
1029 *slot = swp_entry(si->type, offset); in swap_alloc_cluster()
1036 unsigned long offset = idx * SWAPFILE_CLUSTER; in swap_free_cluster() local
1039 ci = lock_cluster(si, offset); in swap_free_cluster()
1040 memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); in swap_free_cluster()
1044 swap_range_free(si, offset, SWAPFILE_CLUSTER); in swap_free_cluster()
1073 /* requeue si to after same-priority siblings */ in get_swap_pages()
1074 plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); in get_swap_pages()
1076 spin_lock(&si->lock); in get_swap_pages()
1077 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { in get_swap_pages()
1079 if (plist_node_empty(&si->avail_lists[node])) { in get_swap_pages()
1080 spin_unlock(&si->lock); in get_swap_pages()
1083 WARN(!si->highest_bit, in get_swap_pages()
1085 si->type); in get_swap_pages()
1086 WARN(!(si->flags & SWP_WRITEOK), in get_swap_pages()
1088 si->type); in get_swap_pages()
1090 spin_unlock(&si->lock); in get_swap_pages()
1094 if (si->flags & SWP_BLKDEV) in get_swap_pages()
1099 spin_unlock(&si->lock); in get_swap_pages()
1102 pr_debug("scan_swap_map of si %d failed to find offset\n", in get_swap_pages()
1103 si->type); in get_swap_pages()
1109 * and since scan_swap_map_slots() can drop the si->lock, in get_swap_pages()
1110 * multiple callers probably all tried to get a page from the in get_swap_pages()
1113 * si->lock. Since we dropped the swap_avail_lock, the in get_swap_pages()
1118 if (plist_node_empty(&next->avail_lists[node])) in get_swap_pages()
1126 atomic_long_add((long)(n_goal - n_ret) * size, in get_swap_pages()
1135 unsigned long offset; in _swap_info_get() local
1142 if (data_race(!(p->flags & SWP_USED))) in _swap_info_get()
1144 offset = swp_offset(entry); in _swap_info_get()
1145 if (offset >= p->max) in _swap_info_get()
1147 if (data_race(!p->swap_map[swp_offset(entry)])) in _swap_info_get()
1175 spin_unlock(&q->lock); in swap_info_get_cont()
1177 spin_lock(&p->lock); in swap_info_get_cont()
1183 unsigned long offset, in __swap_entry_free_locked() argument
1189 count = p->swap_map[offset]; in __swap_entry_free_locked()
1205 if (swap_count_continued(p, offset, count)) in __swap_entry_free_locked()
1210 count--; in __swap_entry_free_locked()
1215 WRITE_ONCE(p->swap_map[offset], usage); in __swap_entry_free_locked()
1217 WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); in __swap_entry_free_locked()
1231 * to prevent swapoff, such as page lock, page table lock, etc. The
1247 * the page is read from the swap device, the PTE is verified not
1248 * changed with the page table locked to check whether the swap device
1254 unsigned long offset; in get_swap_device() local
1261 if (!percpu_ref_tryget_live(&si->users)) in get_swap_device()
1264 * Guarantee the si->users are checked before accessing other in get_swap_device()
1271 offset = swp_offset(entry); in get_swap_device()
1272 if (offset >= si->max) in get_swap_device()
1282 percpu_ref_put(&si->users); in get_swap_device()
1290 unsigned long offset = swp_offset(entry); in __swap_entry_free() local
1293 ci = lock_cluster_or_swap_info(p, offset); in __swap_entry_free()
1294 usage = __swap_entry_free_locked(p, offset, 1); in __swap_entry_free()
1305 unsigned long offset = swp_offset(entry); in swap_entry_free() local
1308 ci = lock_cluster(p, offset); in swap_entry_free()
1309 count = p->swap_map[offset]; in swap_entry_free()
1311 p->swap_map[offset] = 0; in swap_entry_free()
1312 dec_cluster_info_page(p, p->cluster_info, offset); in swap_entry_free()
1316 swap_range_free(p, offset, 1); in swap_entry_free()
1337 unsigned long offset = swp_offset(entry); in put_swap_folio() local
1338 unsigned long idx = offset / SWAPFILE_CLUSTER; in put_swap_folio()
1350 ci = lock_cluster_or_swap_info(si, offset); in put_swap_folio()
1353 map = si->swap_map + offset; in put_swap_folio()
1363 spin_lock(&si->lock); in put_swap_folio()
1366 spin_unlock(&si->lock); in put_swap_folio()
1371 if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) { in put_swap_folio()
1374 if (i == size - 1) in put_swap_folio()
1376 lock_cluster_or_swap_info(si, offset); in put_swap_folio()
1387 unsigned long offset = swp_offset(entry); in split_swap_cluster() local
1391 return -EBUSY; in split_swap_cluster()
1392 ci = lock_cluster(si, offset); in split_swap_cluster()
1403 return (int)swp_type(*e1) - (int)swp_type(*e2); in swp_entry_cmp()
1431 spin_unlock(&p->lock); in swapcache_free_entries()
1437 pgoff_t offset = swp_offset(entry); in __swap_count() local
1442 count = swap_count(si->swap_map[offset]); in __swap_count()
1455 pgoff_t offset = swp_offset(entry); in swap_swapcount() local
1459 ci = lock_cluster_or_swap_info(si, offset); in swap_swapcount()
1460 count = swap_count(si->swap_map[offset]); in swap_swapcount()
1492 struct page *page; in swp_swapcount() local
1493 pgoff_t offset; in swp_swapcount() local
1500 offset = swp_offset(entry); in swp_swapcount()
1502 ci = lock_cluster_or_swap_info(p, offset); in swp_swapcount()
1504 count = swap_count(p->swap_map[offset]); in swp_swapcount()
1511 page = vmalloc_to_page(p->swap_map + offset); in swp_swapcount()
1512 offset &= ~PAGE_MASK; in swp_swapcount()
1513 VM_BUG_ON(page_private(page) != SWP_CONTINUED); in swp_swapcount()
1516 page = list_next_entry(page, lru); in swp_swapcount()
1517 map = kmap_atomic(page); in swp_swapcount()
1518 tmp_count = map[offset]; in swp_swapcount()
1533 unsigned char *map = si->swap_map; in swap_page_trans_huge_swapped()
1535 unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER); in swap_page_trans_huge_swapped() local
1539 ci = lock_cluster_or_swap_info(si, offset); in swap_page_trans_huge_swapped()
1546 if (swap_count(map[offset + i])) { in swap_page_trans_huge_swapped()
1571 * folio_free_swap() - Free the swap space used for this folio.
1593 * - most probably a call from __try_to_reclaim_swap() while in folio_free_swap()
1595 * but conceivably even a call from memory reclaim - will free in folio_free_swap()
1598 * another page of the image. On waking from hibernation, the in folio_free_swap()
1615 * free the page cache entry if it is the last user.
1647 spin_lock(&si->lock); in get_swap_page_of_type()
1648 if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry)) in get_swap_page_of_type()
1650 spin_unlock(&si->lock); in get_swap_page_of_type()
1658 * @offset - number of the PAGE_SIZE-sized block of the device, starting
1663 int swap_type_of(dev_t device, sector_t offset) in swap_type_of() argument
1668 return -1; in swap_type_of()
1674 if (!(sis->flags & SWP_WRITEOK)) in swap_type_of()
1677 if (device == sis->bdev->bd_dev) { in swap_type_of()
1680 if (se->start_block == offset) { in swap_type_of()
1687 return -ENODEV; in swap_type_of()
1698 if (!(sis->flags & SWP_WRITEOK)) in find_first_swap()
1700 *device = sis->bdev->bd_dev; in find_first_swap()
1705 return -ENODEV; in find_first_swap()
1709 * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
1712 sector_t swapdev_block(int type, pgoff_t offset) in swapdev_block() argument
1717 if (!si || !(si->flags & SWP_WRITEOK)) in swapdev_block()
1719 se = offset_to_swap_extent(si, offset); in swapdev_block()
1720 return se->start_block + (offset - se->start_page); in swapdev_block()
1737 spin_lock(&sis->lock); in count_swap_pages()
1738 if (sis->flags & SWP_WRITEOK) { in count_swap_pages()
1739 n = sis->pages; in count_swap_pages()
1741 n -= sis->inuse_pages; in count_swap_pages()
1743 spin_unlock(&sis->lock); in count_swap_pages()
1757 * just let do_wp_page work it out if a write is requested later - to
1763 struct page *page = folio_file_page(folio, swp_offset(entry)); in unuse_pte() local
1764 struct page *swapcache; in unuse_pte()
1769 swapcache = page; in unuse_pte()
1770 page = ksm_might_need_to_copy(page, vma, addr); in unuse_pte()
1771 if (unlikely(!page)) in unuse_pte()
1772 return -ENOMEM; in unuse_pte()
1774 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in unuse_pte()
1780 if (unlikely(!PageUptodate(page))) { in unuse_pte()
1783 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
1784 pteval = swp_entry_to_pte(make_swapin_error_entry(page)); in unuse_pte()
1785 set_pte_at(vma->vm_mm, addr, pte, pteval); in unuse_pte()
1792 BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); in unuse_pte()
1793 BUG_ON(PageAnon(page) && PageAnonExclusive(page)); in unuse_pte()
1795 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
1796 inc_mm_counter(vma->vm_mm, MM_ANONPAGES); in unuse_pte()
1797 get_page(page); in unuse_pte()
1798 if (page == swapcache) { in unuse_pte()
1804 * call and have the page locked. in unuse_pte()
1806 VM_BUG_ON_PAGE(PageWriteback(page), page); in unuse_pte()
1810 page_add_anon_rmap(page, vma, addr, rmap_flags); in unuse_pte()
1812 page_add_new_anon_rmap(page, vma, addr); in unuse_pte()
1813 lru_cache_add_inactive_or_unevictable(page, vma); in unuse_pte()
1815 new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); in unuse_pte()
1820 set_pte_at(vma->vm_mm, addr, pte, new_pte); in unuse_pte()
1824 if (page != swapcache) { in unuse_pte()
1825 unlock_page(page); in unuse_pte()
1826 put_page(page); in unuse_pte()
1845 unsigned long offset; in unuse_pte_range() local
1854 offset = swp_offset(entry); in unuse_pte_range()
1856 swap_map = &si->swap_map[offset]; in unuse_pte_range()
1859 struct page *page; in unuse_pte_range() local
1867 page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, in unuse_pte_range()
1869 if (page) in unuse_pte_range()
1870 folio = page_folio(page); in unuse_pte_range()
1875 return -ENOMEM; in unuse_pte_range()
1893 pte_unmap(pte - 1); in unuse_pte_range()
1967 addr = vma->vm_start; in unuse_vma()
1968 end = vma->vm_end; in unuse_vma()
1970 pgd = pgd_offset(vma->vm_mm, addr); in unuse_vma()
1990 if (vma->anon_vma) { in unuse_mm()
2019 for (i = prev + 1; i < si->max; i++) { in find_next_to_unuse()
2020 count = READ_ONCE(si->swap_map[i]); in find_next_to_unuse()
2027 if (i == si->max) in find_next_to_unuse()
2044 if (!READ_ONCE(si->inuse_pages)) in try_to_unuse()
2057 while (READ_ONCE(si->inuse_pages) && in try_to_unuse()
2059 (p = p->next) != &init_mm.mmlist) { in try_to_unuse()
2085 while (READ_ONCE(si->inuse_pages) && in try_to_unuse()
2096 * swap cache just before we acquired the page lock. The folio in try_to_unuse()
2118 * and robust (though cpu-intensive) just to keep retrying. in try_to_unuse()
2120 if (READ_ONCE(si->inuse_pages)) { in try_to_unuse()
2123 return -EINTR; in try_to_unuse()
2133 * added to the mmlist just after page_duplicate - before would be racy.
2141 if (swap_info[type]->inuse_pages) in drain_mmlist()
2154 while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { in destroy_swap_extents()
2155 struct rb_node *rb = sis->swap_extent_root.rb_node; in destroy_swap_extents()
2158 rb_erase(rb, &sis->swap_extent_root); in destroy_swap_extents()
2162 if (sis->flags & SWP_ACTIVATED) { in destroy_swap_extents()
2163 struct file *swap_file = sis->swap_file; in destroy_swap_extents()
2164 struct address_space *mapping = swap_file->f_mapping; in destroy_swap_extents()
2166 sis->flags &= ~SWP_ACTIVATED; in destroy_swap_extents()
2167 if (mapping->a_ops->swap_deactivate) in destroy_swap_extents()
2168 mapping->a_ops->swap_deactivate(swap_file); in destroy_swap_extents()
2173 * Add a block range (and the corresponding page range) into this swapdev's
2176 * This function rather assumes that it is called in ascending page order.
2182 struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; in add_swap_extent()
2188 * function is called in ascending page order. in add_swap_extent()
2192 link = &parent->rb_right; in add_swap_extent()
2197 BUG_ON(se->start_page + se->nr_pages != start_page); in add_swap_extent()
2198 if (se->start_block + se->nr_pages == start_block) { in add_swap_extent()
2200 se->nr_pages += nr_pages; in add_swap_extent()
2208 return -ENOMEM; in add_swap_extent()
2209 new_se->start_page = start_page; in add_swap_extent()
2210 new_se->nr_pages = nr_pages; in add_swap_extent()
2211 new_se->start_block = start_block; in add_swap_extent()
2213 rb_link_node(&new_se->rb_node, parent, link); in add_swap_extent()
2214 rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); in add_swap_extent()
2223 * time for locating where on disk a page belongs.
2236 * requirements, they are simply tossed out - we will never use those blocks
2243 * Typically it is in the 1-4 megabyte range. So we can have hundreds of
2244 * extents in the rbtree. - akpm.
2248 struct file *swap_file = sis->swap_file; in setup_swap_extents()
2249 struct address_space *mapping = swap_file->f_mapping; in setup_swap_extents()
2250 struct inode *inode = mapping->host; in setup_swap_extents()
2253 if (S_ISBLK(inode->i_mode)) { in setup_swap_extents()
2254 ret = add_swap_extent(sis, 0, sis->max, 0); in setup_swap_extents()
2255 *span = sis->pages; in setup_swap_extents()
2259 if (mapping->a_ops->swap_activate) { in setup_swap_extents()
2260 ret = mapping->a_ops->swap_activate(sis, swap_file, span); in setup_swap_extents()
2263 sis->flags |= SWP_ACTIVATED; in setup_swap_extents()
2264 if ((sis->flags & SWP_FS_OPS) && in setup_swap_extents()
2267 return -ENOMEM; in setup_swap_extents()
2279 if (p->bdev) in swap_node()
2280 bdev = p->bdev; in swap_node()
2282 bdev = p->swap_file->f_inode->i_sb->s_bdev; in swap_node()
2284 return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; in swap_node()
2294 p->prio = prio; in setup_swap_info()
2296 p->prio = --least_priority; in setup_swap_info()
2299 * low-to-high, while swap ordering is high-to-low in setup_swap_info()
2301 p->list.prio = -p->prio; in setup_swap_info()
2303 if (p->prio >= 0) in setup_swap_info()
2304 p->avail_lists[i].prio = -p->prio; in setup_swap_info()
2307 p->avail_lists[i].prio = 1; in setup_swap_info()
2309 p->avail_lists[i].prio = -p->prio; in setup_swap_info()
2312 p->swap_map = swap_map; in setup_swap_info()
2313 p->cluster_info = cluster_info; in setup_swap_info()
2318 p->flags |= SWP_WRITEOK; in _enable_swap_info()
2319 atomic_long_add(p->pages, &nr_swap_pages); in _enable_swap_info()
2320 total_swap_pages += p->pages; in _enable_swap_info()
2326 * which on removal of any swap_info_struct with an auto-assigned in _enable_swap_info()
2327 * (i.e. negative) priority increments the auto-assigned priority in _enable_swap_info()
2328 * of any lower-priority swap_info_structs. in _enable_swap_info()
2333 plist_add(&p->list, &swap_active_head); in _enable_swap_info()
2343 frontswap_init(p->type, frontswap_map); in enable_swap_info()
2345 spin_lock(&p->lock); in enable_swap_info()
2347 spin_unlock(&p->lock); in enable_swap_info()
2352 percpu_ref_resurrect(&p->users); in enable_swap_info()
2354 spin_lock(&p->lock); in enable_swap_info()
2356 spin_unlock(&p->lock); in enable_swap_info()
2363 spin_lock(&p->lock); in reinsert_swap_info()
2364 setup_swap_info(p, p->prio, p->swap_map, p->cluster_info); in reinsert_swap_info()
2366 spin_unlock(&p->lock); in reinsert_swap_info()
2395 return -EPERM; in SYSCALL_DEFINE1()
2397 BUG_ON(!current->mm); in SYSCALL_DEFINE1()
2408 mapping = victim->f_mapping; in SYSCALL_DEFINE1()
2411 if (p->flags & SWP_WRITEOK) { in SYSCALL_DEFINE1()
2412 if (p->swap_file->f_mapping == mapping) { in SYSCALL_DEFINE1()
2419 err = -EINVAL; in SYSCALL_DEFINE1()
2423 if (!security_vm_enough_memory_mm(current->mm, p->pages)) in SYSCALL_DEFINE1()
2424 vm_unacct_memory(p->pages); in SYSCALL_DEFINE1()
2426 err = -ENOMEM; in SYSCALL_DEFINE1()
2431 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2432 if (p->prio < 0) { in SYSCALL_DEFINE1()
2437 si->prio++; in SYSCALL_DEFINE1()
2438 si->list.prio--; in SYSCALL_DEFINE1()
2440 if (si->avail_lists[nid].prio != 1) in SYSCALL_DEFINE1()
2441 si->avail_lists[nid].prio--; in SYSCALL_DEFINE1()
2446 plist_del(&p->list, &swap_active_head); in SYSCALL_DEFINE1()
2447 atomic_long_sub(p->pages, &nr_swap_pages); in SYSCALL_DEFINE1()
2448 total_swap_pages -= p->pages; in SYSCALL_DEFINE1()
2449 p->flags &= ~SWP_WRITEOK; in SYSCALL_DEFINE1()
2450 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2456 err = try_to_unuse(p->type); in SYSCALL_DEFINE1()
2460 /* re-insert swap space back into swap_list */ in SYSCALL_DEFINE1()
2475 percpu_ref_kill(&p->users); in SYSCALL_DEFINE1()
2477 wait_for_completion(&p->comp); in SYSCALL_DEFINE1()
2479 flush_work(&p->discard_work); in SYSCALL_DEFINE1()
2482 if (p->flags & SWP_CONTINUED) in SYSCALL_DEFINE1()
2485 if (!p->bdev || !bdev_nonrot(p->bdev)) in SYSCALL_DEFINE1()
2490 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2494 p->highest_bit = 0; /* cuts scans short */ in SYSCALL_DEFINE1()
2495 while (p->flags >= SWP_SCANNING) { in SYSCALL_DEFINE1()
2496 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2500 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2503 swap_file = p->swap_file; in SYSCALL_DEFINE1()
2504 old_block_size = p->old_block_size; in SYSCALL_DEFINE1()
2505 p->swap_file = NULL; in SYSCALL_DEFINE1()
2506 p->max = 0; in SYSCALL_DEFINE1()
2507 swap_map = p->swap_map; in SYSCALL_DEFINE1()
2508 p->swap_map = NULL; in SYSCALL_DEFINE1()
2509 cluster_info = p->cluster_info; in SYSCALL_DEFINE1()
2510 p->cluster_info = NULL; in SYSCALL_DEFINE1()
2512 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2514 arch_swap_invalidate_area(p->type); in SYSCALL_DEFINE1()
2515 frontswap_invalidate_area(p->type); in SYSCALL_DEFINE1()
2518 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE1()
2519 p->percpu_cluster = NULL; in SYSCALL_DEFINE1()
2520 free_percpu(p->cluster_next_cpu); in SYSCALL_DEFINE1()
2521 p->cluster_next_cpu = NULL; in SYSCALL_DEFINE1()
2526 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE1()
2527 exit_swap_address_space(p->type); in SYSCALL_DEFINE1()
2529 inode = mapping->host; in SYSCALL_DEFINE1()
2530 if (S_ISBLK(inode->i_mode)) { in SYSCALL_DEFINE1()
2538 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE1()
2545 * not hold p->lock after we cleared its SWP_WRITEOK. in SYSCALL_DEFINE1()
2548 p->flags = 0; in SYSCALL_DEFINE1()
2565 struct seq_file *seq = file->private_data; in swaps_poll()
2569 if (seq->poll_event != atomic_read(&proc_poll_event)) { in swaps_poll()
2570 seq->poll_event = atomic_read(&proc_poll_event); in swaps_poll()
2590 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_start()
2592 if (!--l) in swap_start()
2607 type = si->type + 1; in swap_next()
2611 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_next()
2636 bytes = si->pages << (PAGE_SHIFT - 10); in swap_show()
2637 inuse = READ_ONCE(si->inuse_pages) << (PAGE_SHIFT - 10); in swap_show()
2639 file = si->swap_file; in swap_show()
2642 len < 40 ? 40 - len : 1, " ", in swap_show()
2643 S_ISBLK(file_inode(file)->i_mode) ? in swap_show()
2647 si->prio); in swap_show()
2667 seq = file->private_data; in swaps_open()
2668 seq->poll_event = atomic_read(&proc_poll_event); in swaps_open()
2707 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2709 if (percpu_ref_init(&p->users, swap_users_ref_free, in alloc_swap_info()
2712 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2717 if (!(swap_info[type]->flags & SWP_USED)) in alloc_swap_info()
2722 percpu_ref_exit(&p->users); in alloc_swap_info()
2724 return ERR_PTR(-EPERM); in alloc_swap_info()
2727 p->type = type; in alloc_swap_info()
2739 * would be relying on p->type to remain valid. in alloc_swap_info()
2742 p->swap_extent_root = RB_ROOT; in alloc_swap_info()
2743 plist_node_init(&p->list, 0); in alloc_swap_info()
2745 plist_node_init(&p->avail_lists[i], 0); in alloc_swap_info()
2746 p->flags = SWP_USED; in alloc_swap_info()
2749 percpu_ref_exit(&defer->users); in alloc_swap_info()
2752 spin_lock_init(&p->lock); in alloc_swap_info()
2753 spin_lock_init(&p->cont_lock); in alloc_swap_info()
2754 init_completion(&p->comp); in alloc_swap_info()
2763 if (S_ISBLK(inode->i_mode)) { in claim_swapfile()
2764 p->bdev = blkdev_get_by_dev(inode->i_rdev, in claim_swapfile()
2766 if (IS_ERR(p->bdev)) { in claim_swapfile()
2767 error = PTR_ERR(p->bdev); in claim_swapfile()
2768 p->bdev = NULL; in claim_swapfile()
2771 p->old_block_size = block_size(p->bdev); in claim_swapfile()
2772 error = set_blocksize(p->bdev, PAGE_SIZE); in claim_swapfile()
2780 if (bdev_is_zoned(p->bdev)) in claim_swapfile()
2781 return -EINVAL; in claim_swapfile()
2782 p->flags |= SWP_BLKDEV; in claim_swapfile()
2783 } else if (S_ISREG(inode->i_mode)) { in claim_swapfile()
2784 p->bdev = inode->i_sb->s_bdev; in claim_swapfile()
2794 * 1) the number of bits for the swap offset in the swp_entry_t type, and
2799 * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
2800 * decoded to a swp_entry_t again, and finally the swap offset is
2828 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { in read_swap_header()
2829 pr_err("Unable to find swap-space signature\n"); in read_swap_header()
2834 if (swab32(swap_header->info.version) == 1) { in read_swap_header()
2835 swab32s(&swap_header->info.version); in read_swap_header()
2836 swab32s(&swap_header->info.last_page); in read_swap_header()
2837 swab32s(&swap_header->info.nr_badpages); in read_swap_header()
2838 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
2840 for (i = 0; i < swap_header->info.nr_badpages; i++) in read_swap_header()
2841 swab32s(&swap_header->info.badpages[i]); in read_swap_header()
2843 /* Check the swap header's sub-version */ in read_swap_header()
2844 if (swap_header->info.version != 1) { in read_swap_header()
2846 swap_header->info.version); in read_swap_header()
2850 p->lowest_bit = 1; in read_swap_header()
2851 p->cluster_next = 1; in read_swap_header()
2852 p->cluster_nr = 0; in read_swap_header()
2855 last_page = swap_header->info.last_page; in read_swap_header()
2857 pr_warn("Empty swap-file\n"); in read_swap_header()
2862 maxpages << (PAGE_SHIFT - 10), in read_swap_header()
2863 last_page << (PAGE_SHIFT - 10)); in read_swap_header()
2867 /* p->max is an unsigned int: don't overflow it */ in read_swap_header()
2871 p->highest_bit = maxpages - 1; in read_swap_header()
2880 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) in read_swap_header()
2882 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
2906 unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS; in setup_swap_map_and_extents()
2909 nr_good_pages = maxpages - 1; /* omit header page */ in setup_swap_map_and_extents()
2911 cluster_list_init(&p->free_clusters); in setup_swap_map_and_extents()
2912 cluster_list_init(&p->discard_clusters); in setup_swap_map_and_extents()
2914 for (i = 0; i < swap_header->info.nr_badpages; i++) { in setup_swap_map_and_extents()
2915 unsigned int page_nr = swap_header->info.badpages[i]; in setup_swap_map_and_extents()
2916 if (page_nr == 0 || page_nr > swap_header->info.last_page) in setup_swap_map_and_extents()
2917 return -EINVAL; in setup_swap_map_and_extents()
2920 nr_good_pages--; in setup_swap_map_and_extents()
2940 p->max = maxpages; in setup_swap_map_and_extents()
2941 p->pages = nr_good_pages; in setup_swap_map_and_extents()
2945 nr_good_pages = p->pages; in setup_swap_map_and_extents()
2948 pr_warn("Empty swap-file\n"); in setup_swap_map_and_extents()
2949 return -EINVAL; in setup_swap_map_and_extents()
2969 cluster_list_add_tail(&p->free_clusters, cluster_info, in setup_swap_map_and_extents()
2992 struct page *page = NULL; in SYSCALL_DEFINE2() local
2997 return -EINVAL; in SYSCALL_DEFINE2()
3000 return -EPERM; in SYSCALL_DEFINE2()
3003 return -ENOMEM; in SYSCALL_DEFINE2()
3009 INIT_WORK(&p->discard_work, swap_discard_work); in SYSCALL_DEFINE2()
3024 p->swap_file = swap_file; in SYSCALL_DEFINE2()
3025 mapping = swap_file->f_mapping; in SYSCALL_DEFINE2()
3026 dentry = swap_file->f_path.dentry; in SYSCALL_DEFINE2()
3027 inode = mapping->host; in SYSCALL_DEFINE2()
3035 error = -ENOENT; in SYSCALL_DEFINE2()
3039 error = -EBUSY; in SYSCALL_DEFINE2()
3046 if (!mapping->a_ops->read_folio) { in SYSCALL_DEFINE2()
3047 error = -EINVAL; in SYSCALL_DEFINE2()
3050 page = read_mapping_page(mapping, 0, swap_file); in SYSCALL_DEFINE2()
3051 if (IS_ERR(page)) { in SYSCALL_DEFINE2()
3052 error = PTR_ERR(page); in SYSCALL_DEFINE2()
3055 swap_header = kmap(page); in SYSCALL_DEFINE2()
3059 error = -EINVAL; in SYSCALL_DEFINE2()
3066 error = -ENOMEM; in SYSCALL_DEFINE2()
3070 if (p->bdev && bdev_stable_writes(p->bdev)) in SYSCALL_DEFINE2()
3071 p->flags |= SWP_STABLE_WRITES; in SYSCALL_DEFINE2()
3073 if (p->bdev && p->bdev->bd_disk->fops->rw_page) in SYSCALL_DEFINE2()
3074 p->flags |= SWP_SYNCHRONOUS_IO; in SYSCALL_DEFINE2()
3076 if (p->bdev && bdev_nonrot(p->bdev)) { in SYSCALL_DEFINE2()
3080 p->flags |= SWP_SOLIDSTATE; in SYSCALL_DEFINE2()
3081 p->cluster_next_cpu = alloc_percpu(unsigned int); in SYSCALL_DEFINE2()
3082 if (!p->cluster_next_cpu) { in SYSCALL_DEFINE2()
3083 error = -ENOMEM; in SYSCALL_DEFINE2()
3091 per_cpu(*p->cluster_next_cpu, cpu) = in SYSCALL_DEFINE2()
3092 1 + prandom_u32_max(p->highest_bit); in SYSCALL_DEFINE2()
3099 error = -ENOMEM; in SYSCALL_DEFINE2()
3104 spin_lock_init(&((cluster_info + ci)->lock)); in SYSCALL_DEFINE2()
3106 p->percpu_cluster = alloc_percpu(struct percpu_cluster); in SYSCALL_DEFINE2()
3107 if (!p->percpu_cluster) { in SYSCALL_DEFINE2()
3108 error = -ENOMEM; in SYSCALL_DEFINE2()
3113 cluster = per_cpu_ptr(p->percpu_cluster, cpu); in SYSCALL_DEFINE2()
3114 cluster_set_null(&cluster->index); in SYSCALL_DEFINE2()
3121 error = swap_cgroup_swapon(p->type, maxpages); in SYSCALL_DEFINE2()
3131 /* frontswap enabled? set up bit-per-page map for frontswap */ in SYSCALL_DEFINE2()
3138 p->bdev && bdev_max_discard_sectors(p->bdev)) { in SYSCALL_DEFINE2()
3145 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | in SYSCALL_DEFINE2()
3150 * either do single-time area discards only, or to just in SYSCALL_DEFINE2()
3151 * perform discards for released swap page-clusters. in SYSCALL_DEFINE2()
3152 * Now it's time to adjust the p->flags accordingly. in SYSCALL_DEFINE2()
3155 p->flags &= ~SWP_PAGE_DISCARD; in SYSCALL_DEFINE2()
3157 p->flags &= ~SWP_AREA_DISCARD; in SYSCALL_DEFINE2()
3159 /* issue a swapon-time discard if it's still required */ in SYSCALL_DEFINE2()
3160 if (p->flags & SWP_AREA_DISCARD) { in SYSCALL_DEFINE2()
3168 error = init_swap_address_space(p->type, maxpages); in SYSCALL_DEFINE2()
3176 inode->i_flags |= S_SWAPFILE; in SYSCALL_DEFINE2()
3179 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE2()
3184 prio = -1; in SYSCALL_DEFINE2()
3191 p->pages<<(PAGE_SHIFT-10), name->name, p->prio, in SYSCALL_DEFINE2()
3192 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), in SYSCALL_DEFINE2()
3193 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", in SYSCALL_DEFINE2()
3194 (p->flags & SWP_DISCARDABLE) ? "D" : "", in SYSCALL_DEFINE2()
3195 (p->flags & SWP_AREA_DISCARD) ? "s" : "", in SYSCALL_DEFINE2()
3196 (p->flags & SWP_PAGE_DISCARD) ? "c" : "", in SYSCALL_DEFINE2()
3206 exit_swap_address_space(p->type); in SYSCALL_DEFINE2()
3210 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE2()
3211 p->percpu_cluster = NULL; in SYSCALL_DEFINE2()
3212 free_percpu(p->cluster_next_cpu); in SYSCALL_DEFINE2()
3213 p->cluster_next_cpu = NULL; in SYSCALL_DEFINE2()
3214 if (inode && S_ISBLK(inode->i_mode) && p->bdev) { in SYSCALL_DEFINE2()
3215 set_blocksize(p->bdev, p->old_block_size); in SYSCALL_DEFINE2()
3216 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); in SYSCALL_DEFINE2()
3220 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE2()
3222 p->swap_file = NULL; in SYSCALL_DEFINE2()
3223 p->flags = 0; in SYSCALL_DEFINE2()
3233 if (page && !IS_ERR(page)) { in SYSCALL_DEFINE2()
3234 kunmap(page); in SYSCALL_DEFINE2()
3235 put_page(page); in SYSCALL_DEFINE2()
3255 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) in si_swapinfo()
3256 nr_to_be_unused += READ_ONCE(si->inuse_pages); in si_swapinfo()
3258 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; in si_swapinfo()
3259 val->totalswap = total_swap_pages + nr_to_be_unused; in si_swapinfo()
3267 * - success -> 0
3268 * - swp_entry is invalid -> EINVAL
3269 * - swp_entry is migration entry -> EINVAL
3270 * - swap-cache reference is requested but there is already one. -> EEXIST
3271 * - swap-cache reference is requested but the entry is not used. -> ENOENT
3272 * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3278 unsigned long offset; in __swap_duplicate() local
3285 return -EINVAL; in __swap_duplicate()
3287 offset = swp_offset(entry); in __swap_duplicate()
3288 ci = lock_cluster_or_swap_info(p, offset); in __swap_duplicate()
3290 count = p->swap_map[offset]; in __swap_duplicate()
3297 err = -ENOENT; in __swap_duplicate()
3311 err = -EEXIST; in __swap_duplicate()
3313 err = -ENOENT; in __swap_duplicate()
3320 err = -EINVAL; in __swap_duplicate()
3321 else if (swap_count_continued(p, offset, count)) in __swap_duplicate()
3324 err = -ENOMEM; in __swap_duplicate()
3326 err = -ENOENT; /* unused swap entry */ in __swap_duplicate()
3328 WRITE_ONCE(p->swap_map[offset], count | has_cache); in __swap_duplicate()
3347 * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3349 * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3350 * might occur if a page table entry has got corrupted.
3356 while (!err && __swap_duplicate(entry, 1) == -ENOMEM) in swap_duplicate()
3366 * -EEXIST means there is a swap cache.
3379 struct swap_info_struct *page_swap_info(struct page *page) in page_swap_info() argument
3381 swp_entry_t entry = { .val = page_private(page) }; in page_swap_info()
3386 * out-of-line methods to avoid include hell.
3390 return page_swap_info(&folio->page)->swap_file->f_mapping; in swapcache_mapping()
3394 pgoff_t __page_file_index(struct page *page) in __page_file_index() argument
3396 swp_entry_t swap = { .val = page_private(page) }; in __page_file_index()
3402 * add_swap_count_continuation - called when a swap count is duplicated
3403 * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
3404 * page of the original vmalloc'ed swap_map, to hold the continuation count
3409 * on the original swap_map, only referring to a continuation page when the
3413 * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
3420 struct page *head; in add_swap_count_continuation()
3421 struct page *page; in add_swap_count_continuation() local
3422 struct page *list_page; in add_swap_count_continuation()
3423 pgoff_t offset; in add_swap_count_continuation() local
3429 * for latency not to zero a page while GFP_ATOMIC and holding locks. in add_swap_count_continuation()
3431 page = alloc_page(gfp_mask | __GFP_HIGHMEM); in add_swap_count_continuation()
3441 spin_lock(&si->lock); in add_swap_count_continuation()
3443 offset = swp_offset(entry); in add_swap_count_continuation()
3445 ci = lock_cluster(si, offset); in add_swap_count_continuation()
3447 count = swap_count(si->swap_map[offset]); in add_swap_count_continuation()
3453 * over-provisioning. in add_swap_count_continuation()
3458 if (!page) { in add_swap_count_continuation()
3459 ret = -ENOMEM; in add_swap_count_continuation()
3465 * no architecture is using highmem pages for kernel page tables: so it in add_swap_count_continuation()
3466 * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps. in add_swap_count_continuation()
3468 head = vmalloc_to_page(si->swap_map + offset); in add_swap_count_continuation()
3469 offset &= ~PAGE_MASK; in add_swap_count_continuation()
3471 spin_lock(&si->cont_lock); in add_swap_count_continuation()
3473 * Page allocation does not initialize the page's lru field, in add_swap_count_continuation()
3478 INIT_LIST_HEAD(&head->lru); in add_swap_count_continuation()
3480 si->flags |= SWP_CONTINUED; in add_swap_count_continuation()
3483 list_for_each_entry(list_page, &head->lru, lru) { in add_swap_count_continuation()
3488 * a continuation page, free our allocation and use this one. in add_swap_count_continuation()
3493 map = kmap_atomic(list_page) + offset; in add_swap_count_continuation()
3505 list_add_tail(&page->lru, &head->lru); in add_swap_count_continuation()
3506 page = NULL; /* now it's attached, don't free it */ in add_swap_count_continuation()
3508 spin_unlock(&si->cont_lock); in add_swap_count_continuation()
3511 spin_unlock(&si->lock); in add_swap_count_continuation()
3514 if (page) in add_swap_count_continuation()
3515 __free_page(page); in add_swap_count_continuation()
3520 * swap_count_continued - when the original swap_map count is incremented
3521 * from SWAP_MAP_MAX, check if there is already a continuation page to carry
3522 * into, carry if so, or else fail until a new continuation page is allocated;
3529 pgoff_t offset, unsigned char count) in swap_count_continued() argument
3531 struct page *head; in swap_count_continued()
3532 struct page *page; in swap_count_continued() local
3536 head = vmalloc_to_page(si->swap_map + offset); in swap_count_continued()
3542 spin_lock(&si->cont_lock); in swap_count_continued()
3543 offset &= ~PAGE_MASK; in swap_count_continued()
3544 page = list_next_entry(head, lru); in swap_count_continued()
3545 map = kmap_atomic(page) + offset; in swap_count_continued()
3556 page = list_next_entry(page, lru); in swap_count_continued()
3557 BUG_ON(page == head); in swap_count_continued()
3558 map = kmap_atomic(page) + offset; in swap_count_continued()
3562 page = list_next_entry(page, lru); in swap_count_continued()
3563 if (page == head) { in swap_count_continued()
3567 map = kmap_atomic(page) + offset; in swap_count_continued()
3568 init_map: *map = 0; /* we didn't zero the page */ in swap_count_continued()
3572 while ((page = list_prev_entry(page, lru)) != head) { in swap_count_continued()
3573 map = kmap_atomic(page) + offset; in swap_count_continued()
3586 page = list_next_entry(page, lru); in swap_count_continued()
3587 BUG_ON(page == head); in swap_count_continued()
3588 map = kmap_atomic(page) + offset; in swap_count_continued()
3591 *map -= 1; in swap_count_continued()
3595 while ((page = list_prev_entry(page, lru)) != head) { in swap_count_continued()
3596 map = kmap_atomic(page) + offset; in swap_count_continued()
3604 spin_unlock(&si->cont_lock); in swap_count_continued()
3609 * free_swap_count_continuations - swapoff free all the continuation pages
3614 pgoff_t offset; in free_swap_count_continuations() local
3616 for (offset = 0; offset < si->max; offset += PAGE_SIZE) { in free_swap_count_continuations()
3617 struct page *head; in free_swap_count_continuations()
3618 head = vmalloc_to_page(si->swap_map + offset); in free_swap_count_continuations()
3620 struct page *page, *next; in free_swap_count_continuations() local
3622 list_for_each_entry_safe(page, next, &head->lru, lru) { in free_swap_count_continuations()
3623 list_del(&page->lru); in free_swap_count_continuations()
3624 __free_page(page); in free_swap_count_continuations()
3631 void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) in __cgroup_throttle_swaprate() argument
3634 int nid = page_to_nid(page); in __cgroup_throttle_swaprate()
3646 if (current->throttle_queue) in __cgroup_throttle_swaprate()
3652 if (si->bdev) { in __cgroup_throttle_swaprate()
3653 blkcg_schedule_throttle(si->bdev->bd_disk, true); in __cgroup_throttle_swaprate()
3669 return -ENOMEM; in swapfile_init()