Lines Matching +full:processor +full:- +full:intensive

1 // SPDX-License-Identifier: GPL-2.0
32 #include <linux/backing-dev.h>
163 if ((_page)->lru.prev != _base) { \
166 prev = lru_to_page(&(_page->lru)); \
167 prefetchw(&prev->_field); \
183 WARN_ON_ONCE(rs && task->reclaim_state); in set_task_reclaim_state()
185 /* Check for the nulling of an already-nulled member */ in set_task_reclaim_state()
186 WARN_ON_ONCE(!rs && !task->reclaim_state); in set_task_reclaim_state()
188 task->reclaim_state = rs; in set_task_reclaim_state()
211 return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, in shrinker_info_protected()
225 pn = memcg->nodeinfo[nid]; in expand_one_shrinker_info()
233 return -ENOMEM; in expand_one_shrinker_info()
235 new->nr_deferred = (atomic_long_t *)(new + 1); in expand_one_shrinker_info()
236 new->map = (void *)new->nr_deferred + defer_size; in expand_one_shrinker_info()
239 memset(new->map, (int)0xff, old_map_size); in expand_one_shrinker_info()
240 memset((void *)new->map + old_map_size, 0, map_size - old_map_size); in expand_one_shrinker_info()
242 memcpy(new->nr_deferred, old->nr_deferred, old_defer_size); in expand_one_shrinker_info()
243 memset((void *)new->nr_deferred + old_defer_size, 0, in expand_one_shrinker_info()
244 defer_size - old_defer_size); in expand_one_shrinker_info()
246 rcu_assign_pointer(pn->shrinker_info, new); in expand_one_shrinker_info()
260 pn = memcg->nodeinfo[nid]; in free_shrinker_info()
261 info = rcu_dereference_protected(pn->shrinker_info, true); in free_shrinker_info()
263 rcu_assign_pointer(pn->shrinker_info, NULL); in free_shrinker_info()
281 ret = -ENOMEM; in alloc_shrinker_info()
284 info->nr_deferred = (atomic_long_t *)(info + 1); in alloc_shrinker_info()
285 info->map = (void *)info->nr_deferred + defer_size; in alloc_shrinker_info()
286 rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); in alloc_shrinker_info()
342 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); in set_shrinker_bit()
345 set_bit(shrinker_id, info->map); in set_shrinker_bit()
354 int id, ret = -ENOMEM; in prealloc_memcg_shrinker()
357 return -ENOSYS; in prealloc_memcg_shrinker()
371 shrinker->id = id; in prealloc_memcg_shrinker()
380 int id = shrinker->id; in unregister_memcg_shrinker()
395 return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); in xchg_nr_deferred_memcg()
404 return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); in add_nr_deferred_memcg()
424 nr = atomic_long_read(&child_info->nr_deferred[i]); in reparent_shrinker_deferred()
425 atomic_long_add(nr, &parent_info->nr_deferred[i]); in reparent_shrinker_deferred()
433 return sc->target_mem_cgroup; in cgroup_reclaim()
437 * writeback_throttling_sane - is the usual dirty throttling mechanism available?
462 return -ENOSYS; in prealloc_memcg_shrinker()
495 int nid = sc->nid; in xchg_nr_deferred()
497 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) in xchg_nr_deferred()
500 if (sc->memcg && in xchg_nr_deferred()
501 (shrinker->flags & SHRINKER_MEMCG_AWARE)) in xchg_nr_deferred()
503 sc->memcg); in xchg_nr_deferred()
505 return atomic_long_xchg(&shrinker->nr_deferred[nid], 0); in xchg_nr_deferred()
512 int nid = sc->nid; in add_nr_deferred()
514 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) in add_nr_deferred()
517 if (sc->memcg && in add_nr_deferred()
518 (shrinker->flags & SHRINKER_MEMCG_AWARE)) in add_nr_deferred()
520 sc->memcg); in add_nr_deferred()
522 return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); in add_nr_deferred()
530 if (sc->no_demotion) in can_demote()
548 * For non-memcg reclaim, is there in can_reclaim_anon_pages()
586 * lruvec_lru_size - Returns the number of pages on the given LRU list.
598 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
619 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in prealloc_shrinker()
621 if (err != -ENOSYS) in prealloc_shrinker()
624 shrinker->flags &= ~SHRINKER_MEMCG_AWARE; in prealloc_shrinker()
627 size = sizeof(*shrinker->nr_deferred); in prealloc_shrinker()
628 if (shrinker->flags & SHRINKER_NUMA_AWARE) in prealloc_shrinker()
631 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); in prealloc_shrinker()
632 if (!shrinker->nr_deferred) in prealloc_shrinker()
633 return -ENOMEM; in prealloc_shrinker()
640 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in free_prealloced_shrinker()
647 kfree(shrinker->nr_deferred); in free_prealloced_shrinker()
648 shrinker->nr_deferred = NULL; in free_prealloced_shrinker()
654 list_add_tail(&shrinker->list, &shrinker_list); in register_shrinker_prepared()
655 shrinker->flags |= SHRINKER_REGISTERED; in register_shrinker_prepared()
675 if (!(shrinker->flags & SHRINKER_REGISTERED)) in unregister_shrinker()
679 list_del(&shrinker->list); in unregister_shrinker()
680 shrinker->flags &= ~SHRINKER_REGISTERED; in unregister_shrinker()
681 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in unregister_shrinker()
685 kfree(shrinker->nr_deferred); in unregister_shrinker()
686 shrinker->nr_deferred = NULL; in unregister_shrinker()
701 long batch_size = shrinker->batch ? shrinker->batch in do_shrink_slab()
705 freeable = shrinker->count_objects(shrinker, shrinkctl); in do_shrink_slab()
716 if (shrinker->seeks) { in do_shrink_slab()
719 do_div(delta, shrinker->seeks); in do_shrink_slab()
756 shrinkctl->nr_to_scan = nr_to_scan; in do_shrink_slab()
757 shrinkctl->nr_scanned = nr_to_scan; in do_shrink_slab()
758 ret = shrinker->scan_objects(shrinker, shrinkctl); in do_shrink_slab()
763 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); in do_shrink_slab()
764 total_scan -= shrinkctl->nr_scanned; in do_shrink_slab()
765 scanned += shrinkctl->nr_scanned; in do_shrink_slab()
776 next_deferred = max_t(long, (nr + delta - scanned), 0); in do_shrink_slab()
785 trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan); in do_shrink_slab()
807 for_each_set_bit(i, info->map, shrinker_nr_max) { in shrink_slab_memcg()
816 if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { in shrink_slab_memcg()
818 clear_bit(i, info->map); in shrink_slab_memcg()
822 /* Call non-slab shrinkers even though kmem is disabled */ in shrink_slab_memcg()
824 !(shrinker->flags & SHRINKER_NONSLAB)) in shrink_slab_memcg()
829 clear_bit(i, info->map); in shrink_slab_memcg()
872 * shrink_slab - shrink slab caches
886 * @priority is sc->priority, we take the number of objects and >> by priority
971 * heads at page->private. in is_page_cache_freeable()
974 return page_count(page) - page_has_private(page) == 1 + page_cache_pins; in is_page_cache_freeable()
979 if (current->flags & PF_SWAPWRITE) in may_write_to_inode()
983 if (inode_to_bdi(inode) == current->backing_dev_info) in may_write_to_inode()
990 * -ENOSPC. We need to propagate that into the address_space for a subsequent
1023 * Calls ->writepage().
1029 * will be non-blocking. To prevent this allocation from being in pageout()
1048 * page->mapping == NULL while being dirty with clean buffers. in pageout()
1059 if (mapping->a_ops->writepage == NULL) in pageout()
1061 if (!may_write_to_inode(mapping->host)) in pageout()
1075 res = mapping->a_ops->writepage(page, &wbc); in pageout()
1108 xa_lock_irq(&mapping->i_pages); in __remove_mapping()
1128 * escape unnoticed. The smp_rmb is needed to ensure the page->flags in __remove_mapping()
1129 * load is not satisfied before that of page->_refcount. in __remove_mapping()
1149 xa_unlock_irq(&mapping->i_pages); in __remove_mapping()
1154 freepage = mapping->a_ops->freepage; in __remove_mapping()
1175 xa_unlock_irq(&mapping->i_pages); in __remove_mapping()
1184 xa_unlock_irq(&mapping->i_pages); in __remove_mapping()
1189 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
1209 * putback_lru_page - put previously isolated page onto appropriate LRU list
1236 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, in page_check_references()
1268 * Activate file-backed executable pages after first usage. in page_check_references()
1309 if (mapping && mapping->a_ops->is_dirty_writeback) in page_check_dirty_writeback()
1310 mapping->a_ops->is_dirty_writeback(page, dirty, writeback); in page_check_dirty_writeback()
1338 int target_nid = next_demotion_node(pgdat->node_id); in demote_page_list()
1379 do_demote_pass = can_demote(pgdat->node_id, sc); in shrink_page_list()
1392 list_del(&page->lru); in shrink_page_list()
1402 sc->nr_scanned += nr_pages; in shrink_page_list()
1407 if (!sc->may_unmap && page_mapped(page)) in shrink_page_list()
1410 may_enter_fs = (sc->gfp_mask & __GFP_FS) || in shrink_page_list()
1411 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); in shrink_page_list()
1421 stat->nr_dirty++; in shrink_page_list()
1424 stat->nr_unqueued_dirty++; in shrink_page_list()
1434 inode_write_congested(mapping->host)) || in shrink_page_list()
1436 stat->nr_congested++; in shrink_page_list()
1484 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { in shrink_page_list()
1485 stat->nr_immediate++; in shrink_page_list()
1492 * This is slightly racy - end_page_writeback() in shrink_page_list()
1495 * as PageReadahead - but that does not matter in shrink_page_list()
1503 stat->nr_writeback++; in shrink_page_list()
1511 list_add_tail(&page->lru, page_list); in shrink_page_list()
1523 stat->nr_ref_keep += nr_pages; in shrink_page_list()
1536 list_add(&page->lru, &demote_pages); in shrink_page_list()
1548 if (!(sc->gfp_mask & __GFP_IO)) in shrink_page_list()
1599 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1616 stat->nr_unmap_fail += nr_pages; in shrink_page_list()
1618 stat->nr_lazyfree_fail += nr_pages; in shrink_page_list()
1627 * injecting inefficient single-page IO into in shrink_page_list()
1636 !test_bit(PGDAT_DIRTY, &pgdat->flags))) { in shrink_page_list()
1653 if (!sc->may_writepage) in shrink_page_list()
1668 stat->nr_pageout += thp_nr_pages(page); in shrink_page_list()
1676 * A synchronous write - probably a ramdisk. Go in shrink_page_list()
1702 * drop the buffers and mark the page clean - it can be freed. in shrink_page_list()
1704 * Rarely, pages can have buffers and no ->mapping. These are in shrink_page_list()
1712 if (!try_to_release_page(page, sc->gfp_mask)) in shrink_page_list()
1747 sc->target_mem_cgroup)) in shrink_page_list()
1765 list_add(&page->lru, &free_pages); in shrink_page_list()
1774 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1786 stat->nr_activate[type] += nr_pages; in shrink_page_list()
1792 list_add(&page->lru, &ret_pages); in shrink_page_list()
1807 pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; in shrink_page_list()
1837 list_move(&page->lru, &clean_pages); in reclaim_clean_pages_from_list()
1848 nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, in reclaim_clean_pages_from_list()
1853 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1854 -(long)nr_reclaimed); in reclaim_clean_pages_from_list()
1861 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, in reclaim_clean_pages_from_list()
1863 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1864 -(long)stat.nr_lazyfree_fail); in reclaim_clean_pages_from_list()
1891 * blocking - clean pages for the most part. in __isolate_lru_page_prepare()
1907 * ->migratepage callback are possible to migrate in __isolate_lru_page_prepare()
1918 migrate_dirty = !mapping || mapping->a_ops->migratepage; in __isolate_lru_page_prepare()
1944 update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); in update_lru_sizes()
1952 * lruvec->lru_lock is heavily contended. Some of the functions that
1956 * For pagecache intensive workloads, this function is the hottest
1975 struct list_head *src = &lruvec->lists[lru]; in isolate_lru_pages()
1982 isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); in isolate_lru_pages()
1995 if (page_zonenum(page) > sc->reclaim_idx) { in isolate_lru_pages()
1996 list_move(&page->lru, &pages_skipped); in isolate_lru_pages()
2008 * premature OOM since __isolate_lru_page() returns -EBUSY in isolate_lru_pages()
2014 list_move(&page->lru, src); in isolate_lru_pages()
2019 * sure the page is not being freed elsewhere -- the in isolate_lru_pages()
2023 list_move(&page->lru, src); in isolate_lru_pages()
2030 list_move(&page->lru, src); in isolate_lru_pages()
2036 list_move(&page->lru, dst); in isolate_lru_pages()
2059 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, in isolate_lru_pages()
2066 * isolate_lru_page - tries to isolate a page from its LRU list
2073 * Returns -EBUSY if the page was not on an LRU list.
2093 int ret = -EBUSY; in isolate_lru_page()
2139 * won't get blocked by normal direct-reclaimers, forming a circular in too_many_isolated()
2142 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) in too_many_isolated()
2164 list_del(&page->lru); in move_pages_to_lru()
2166 spin_unlock_irq(&lruvec->lru_lock); in move_pages_to_lru()
2168 spin_lock_irq(&lruvec->lru_lock); in move_pages_to_lru()
2180 * list_add(&page->lru,) in move_pages_to_lru()
2181 * list_add(&page->lru,) in move_pages_to_lru()
2189 spin_unlock_irq(&lruvec->lru_lock); in move_pages_to_lru()
2191 spin_lock_irq(&lruvec->lru_lock); in move_pages_to_lru()
2193 list_add(&page->lru, &pages_to_free); in move_pages_to_lru()
2219 * If a kernel thread (such as nfsd for loop-back mounts) services
2226 return !(current->flags & PF_LOCAL_THROTTLE) || in current_may_throttle()
2227 current->backing_dev_info == NULL || in current_may_throttle()
2228 bdi_write_congested(current->backing_dev_info); in current_may_throttle()
2264 spin_lock_irq(&lruvec->lru_lock); in shrink_inactive_list()
2276 spin_unlock_irq(&lruvec->lru_lock); in shrink_inactive_list()
2283 spin_lock_irq(&lruvec->lru_lock); in shrink_inactive_list()
2286 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_inactive_list()
2292 spin_unlock_irq(&lruvec->lru_lock); in shrink_inactive_list()
2312 sc->nr.dirty += stat.nr_dirty; in shrink_inactive_list()
2313 sc->nr.congested += stat.nr_congested; in shrink_inactive_list()
2314 sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; in shrink_inactive_list()
2315 sc->nr.writeback += stat.nr_writeback; in shrink_inactive_list()
2316 sc->nr.immediate += stat.nr_immediate; in shrink_inactive_list()
2317 sc->nr.taken += nr_taken; in shrink_inactive_list()
2319 sc->nr.file_taken += nr_taken; in shrink_inactive_list()
2321 trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, in shrink_inactive_list()
2322 nr_scanned, nr_reclaimed, &stat, sc->priority, file); in shrink_inactive_list()
2337 * It is safe to rely on PG_active against the non-LRU pages in here because
2338 * nobody will play with that bit on a non-LRU page.
2340 * The downside is that we have to touch page->_refcount against each page.
2341 * But we had to alter page->flags anyway.
2362 spin_lock_irq(&lruvec->lru_lock); in shrink_active_list()
2373 spin_unlock_irq(&lruvec->lru_lock); in shrink_active_list()
2378 list_del(&page->lru); in shrink_active_list()
2393 if (page_referenced(page, 0, sc->target_mem_cgroup, in shrink_active_list()
2396 * Identify referenced, file-backed active pages and in shrink_active_list()
2400 * are not likely to be evicted by use-once streaming in shrink_active_list()
2406 list_add(&page->lru, &l_active); in shrink_active_list()
2411 ClearPageActive(page); /* we are de-activating */ in shrink_active_list()
2413 list_add(&page->lru, &l_inactive); in shrink_active_list()
2419 spin_lock_irq(&lruvec->lru_lock); in shrink_active_list()
2429 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_active_list()
2430 spin_unlock_irq(&lruvec->lru_lock); in shrink_active_list()
2434 trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, in shrink_active_list()
2435 nr_deactivate, nr_rotated, sc->priority, file); in shrink_active_list()
2465 list_move(&page->lru, &node_page_list); in reclaim_pages()
2474 list_del(&page->lru); in reclaim_pages()
2487 list_del(&page->lru); in reclaim_pages()
2501 if (sc->may_deactivate & (1 << is_file_lru(lru))) in shrink_list()
2504 sc->skipped_deactivate = 1; in shrink_list()
2516 * to the established workingset on the scan-resistant active list,
2530 * -------------------------------------
2549 gb = (inactive + active) >> (30 - PAGE_SHIFT); in inactive_is_low()
2588 if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) { in get_scan_count()
2610 if (!sc->priority && swappiness) { in get_scan_count()
2616 * If the system is almost out of file pages, force-scan anon. in get_scan_count()
2618 if (sc->file_is_tiny) { in get_scan_count()
2627 if (sc->cache_trim_mode) { in get_scan_count()
2648 total_cost = sc->anon_cost + sc->file_cost; in get_scan_count()
2649 anon_cost = total_cost + sc->anon_cost; in get_scan_count()
2650 file_cost = total_cost + sc->file_cost; in get_scan_count()
2656 fp = (200 - swappiness) * (total_cost + 1); in get_scan_count()
2669 lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); in get_scan_count()
2670 mem_cgroup_protection(sc->target_mem_cgroup, memcg, in get_scan_count()
2680 * becomes extremely binary -- from nothing as we in get_scan_count()
2695 * the best-effort low protection. However, we still in get_scan_count()
2696 * ideally want to honor how well-behaved groups are in in get_scan_count()
2707 if (!sc->memcg_low_reclaim && low > min) { in get_scan_count()
2709 sc->memcg_low_skipped = 1; in get_scan_count()
2717 scan = lruvec_size - lruvec_size * protection / in get_scan_count()
2723 * sc->priority further than desirable. in get_scan_count()
2730 scan >>= sc->priority; in get_scan_count()
2749 * round-off error. in get_scan_count()
2783 return can_demote(pgdat->node_id, sc); in can_age_anon_pages()
2793 unsigned long nr_to_reclaim = sc->nr_to_reclaim; in shrink_lruvec()
2814 sc->priority == DEF_PRIORITY); in shrink_lruvec()
2825 nr[lru] -= nr_to_scan; in shrink_lruvec()
2877 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2878 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2879 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2882 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2883 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2884 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2889 sc->nr_reclaimed += nr_reclaimed; in shrink_lruvec()
2904 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && in in_reclaim_compaction()
2905 (sc->order > PAGE_ALLOC_COSTLY_ORDER || in in_reclaim_compaction()
2906 sc->priority < DEF_PRIORITY - 2)) in in_reclaim_compaction()
2913 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2914 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2937 * first, by assuming that zero delta of sc->nr_scanned means full LRU in should_continue_reclaim()
2939 * where always a non-zero amount of pages were scanned. in should_continue_reclaim()
2945 for (z = 0; z <= sc->reclaim_idx; z++) { in should_continue_reclaim()
2946 struct zone *zone = &pgdat->node_zones[z]; in should_continue_reclaim()
2950 switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { in should_continue_reclaim()
2964 pages_for_compaction = compact_gap(sc->order); in should_continue_reclaim()
2966 if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) in should_continue_reclaim()
2974 struct mem_cgroup *target_memcg = sc->target_mem_cgroup; in shrink_node_memcgs()
2984 * This loop can become CPU-bound when target memcgs in shrink_node_memcgs()
2985 * aren't eligible for reclaim - either because they in shrink_node_memcgs()
3006 if (!sc->memcg_low_reclaim) { in shrink_node_memcgs()
3007 sc->memcg_low_skipped = 1; in shrink_node_memcgs()
3013 reclaimed = sc->nr_reclaimed; in shrink_node_memcgs()
3014 scanned = sc->nr_scanned; in shrink_node_memcgs()
3018 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, in shrink_node_memcgs()
3019 sc->priority); in shrink_node_memcgs()
3022 vmpressure(sc->gfp_mask, memcg, false, in shrink_node_memcgs()
3023 sc->nr_scanned - scanned, in shrink_node_memcgs()
3024 sc->nr_reclaimed - reclaimed); in shrink_node_memcgs()
3031 struct reclaim_state *reclaim_state = current->reclaim_state; in shrink_node()
3037 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); in shrink_node()
3041 * Flush the memory cgroup stats, so that we read accurate per-memcg in shrink_node()
3046 memset(&sc->nr, 0, sizeof(sc->nr)); in shrink_node()
3048 nr_reclaimed = sc->nr_reclaimed; in shrink_node()
3049 nr_scanned = sc->nr_scanned; in shrink_node()
3054 spin_lock_irq(&target_lruvec->lru_lock); in shrink_node()
3055 sc->anon_cost = target_lruvec->anon_cost; in shrink_node()
3056 sc->file_cost = target_lruvec->file_cost; in shrink_node()
3057 spin_unlock_irq(&target_lruvec->lru_lock); in shrink_node()
3063 if (!sc->force_deactivate) { in shrink_node()
3068 if (refaults != target_lruvec->refaults[0] || in shrink_node()
3070 sc->may_deactivate |= DEACTIVATE_ANON; in shrink_node()
3072 sc->may_deactivate &= ~DEACTIVATE_ANON; in shrink_node()
3081 if (refaults != target_lruvec->refaults[1] || in shrink_node()
3083 sc->may_deactivate |= DEACTIVATE_FILE; in shrink_node()
3085 sc->may_deactivate &= ~DEACTIVATE_FILE; in shrink_node()
3087 sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; in shrink_node()
3095 if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) in shrink_node()
3096 sc->cache_trim_mode = 1; in shrink_node()
3098 sc->cache_trim_mode = 0; in shrink_node()
3114 free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); in shrink_node()
3119 struct zone *zone = &pgdat->node_zones[z]; in shrink_node()
3133 sc->file_is_tiny = in shrink_node()
3135 !(sc->may_deactivate & DEACTIVATE_ANON) && in shrink_node()
3136 anon >> sc->priority; in shrink_node()
3142 sc->nr_reclaimed += reclaim_state->reclaimed_slab; in shrink_node()
3143 reclaim_state->reclaimed_slab = 0; in shrink_node()
3147 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, in shrink_node()
3148 sc->nr_scanned - nr_scanned, in shrink_node()
3149 sc->nr_reclaimed - nr_reclaimed); in shrink_node()
3151 if (sc->nr_reclaimed - nr_reclaimed) in shrink_node()
3157 * it implies that the long-lived page allocation rate in shrink_node()
3172 if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) in shrink_node()
3173 set_bit(PGDAT_WRITEBACK, &pgdat->flags); in shrink_node()
3176 if (sc->nr.unqueued_dirty == sc->nr.file_taken) in shrink_node()
3177 set_bit(PGDAT_DIRTY, &pgdat->flags); in shrink_node()
3185 if (sc->nr.immediate) in shrink_node()
3199 sc->nr.dirty && sc->nr.dirty == sc->nr.congested) in shrink_node()
3200 set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); in shrink_node()
3209 !sc->hibernation_mode && in shrink_node()
3210 test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) in shrink_node()
3213 if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, in shrink_node()
3224 pgdat->kswapd_failures = 0; in shrink_node()
3228 * Returns true if compaction should go ahead for a costly-order request, or
3237 suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); in compaction_ready()
3254 watermark = high_wmark_pages(zone) + compact_gap(sc->order); in compaction_ready()
3256 return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); in compaction_ready()
3260 * This is the direct reclaim path, for page-allocating processes. We only
3281 orig_mask = sc->gfp_mask; in shrink_zones()
3283 sc->gfp_mask |= __GFP_HIGHMEM; in shrink_zones()
3284 sc->reclaim_idx = gfp_zone(sc->gfp_mask); in shrink_zones()
3288 sc->reclaim_idx, sc->nodemask) { in shrink_zones()
3302 * non-zero order, only frequent costly order in shrink_zones()
3308 sc->order > PAGE_ALLOC_COSTLY_ORDER && in shrink_zones()
3310 sc->compaction_ready = true; in shrink_zones()
3320 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
3330 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat, in shrink_zones()
3331 sc->order, sc->gfp_mask, in shrink_zones()
3333 sc->nr_reclaimed += nr_soft_reclaimed; in shrink_zones()
3334 sc->nr_scanned += nr_soft_scanned; in shrink_zones()
3339 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
3341 last_pgdat = zone->zone_pgdat; in shrink_zones()
3342 shrink_node(zone->zone_pgdat, sc); in shrink_zones()
3349 sc->gfp_mask = orig_mask; in shrink_zones()
3359 target_lruvec->refaults[0] = refaults; in snapshot_refaults()
3361 target_lruvec->refaults[1] = refaults; in snapshot_refaults()
3371 * high - the zone may be full of dirty or under-writeback pages, which this
3383 int initial_priority = sc->priority; in do_try_to_free_pages()
3391 __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); in do_try_to_free_pages()
3394 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, in do_try_to_free_pages()
3395 sc->priority); in do_try_to_free_pages()
3396 sc->nr_scanned = 0; in do_try_to_free_pages()
3399 if (sc->nr_reclaimed >= sc->nr_to_reclaim) in do_try_to_free_pages()
3402 if (sc->compaction_ready) in do_try_to_free_pages()
3409 if (sc->priority < DEF_PRIORITY - 2) in do_try_to_free_pages()
3410 sc->may_writepage = 1; in do_try_to_free_pages()
3411 } while (--sc->priority >= 0); in do_try_to_free_pages()
3414 for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, in do_try_to_free_pages()
3415 sc->nodemask) { in do_try_to_free_pages()
3416 if (zone->zone_pgdat == last_pgdat) in do_try_to_free_pages()
3418 last_pgdat = zone->zone_pgdat; in do_try_to_free_pages()
3420 snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); in do_try_to_free_pages()
3425 lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, in do_try_to_free_pages()
3426 zone->zone_pgdat); in do_try_to_free_pages()
3427 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in do_try_to_free_pages()
3433 if (sc->nr_reclaimed) in do_try_to_free_pages()
3434 return sc->nr_reclaimed; in do_try_to_free_pages()
3437 if (sc->compaction_ready) in do_try_to_free_pages()
3449 if (sc->skipped_deactivate) { in do_try_to_free_pages()
3450 sc->priority = initial_priority; in do_try_to_free_pages()
3451 sc->force_deactivate = 1; in do_try_to_free_pages()
3452 sc->skipped_deactivate = 0; in do_try_to_free_pages()
3457 if (sc->memcg_low_skipped) { in do_try_to_free_pages()
3458 sc->priority = initial_priority; in do_try_to_free_pages()
3459 sc->force_deactivate = 0; in do_try_to_free_pages()
3460 sc->memcg_low_reclaim = 1; in do_try_to_free_pages()
3461 sc->memcg_low_skipped = 0; in do_try_to_free_pages()
3476 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in allow_direct_reclaim()
3480 zone = &pgdat->node_zones[i]; in allow_direct_reclaim()
3498 if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { in allow_direct_reclaim()
3499 if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) in allow_direct_reclaim()
3500 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL); in allow_direct_reclaim()
3502 wake_up_interruptible(&pgdat->kswapd_wait); in allow_direct_reclaim()
3531 if (current->flags & PF_KTHREAD) in throttle_direct_reclaim()
3561 pgdat = zone->zone_pgdat; in throttle_direct_reclaim()
3583 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3587 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3654 .reclaim_idx = MAX_NR_ZONES - 1, in mem_cgroup_shrink_node()
3658 WARN_ON_ONCE(!current->reclaim_state); in mem_cgroup_shrink_node()
3693 .reclaim_idx = MAX_NR_ZONES - 1, in try_to_free_mem_cgroup_pages()
3749 * Check for watermark boosts top-down as the higher zones in pgdat_watermark_boosted()
3755 for (i = highest_zoneidx; i >= 0; i--) { in pgdat_watermark_boosted()
3756 zone = pgdat->node_zones + i; in pgdat_watermark_boosted()
3760 if (zone->watermark_boost) in pgdat_watermark_boosted()
3774 unsigned long mark = -1; in pgdat_balanced()
3778 * Check watermarks bottom-up as lower zones are more likely to in pgdat_balanced()
3782 zone = pgdat->node_zones + i; in pgdat_balanced()
3794 * need balancing by definition. This can happen if a zone-restricted in pgdat_balanced()
3797 if (mark == -1) in pgdat_balanced()
3808 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in clear_pgdat_congested()
3809 clear_bit(PGDAT_DIRTY, &pgdat->flags); in clear_pgdat_congested()
3810 clear_bit(PGDAT_WRITEBACK, &pgdat->flags); in clear_pgdat_congested()
3835 if (waitqueue_active(&pgdat->pfmemalloc_wait)) in prepare_kswapd_sleep()
3836 wake_up_all(&pgdat->pfmemalloc_wait); in prepare_kswapd_sleep()
3839 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in prepare_kswapd_sleep()
3865 sc->nr_to_reclaim = 0; in kswapd_shrink_node()
3866 for (z = 0; z <= sc->reclaim_idx; z++) { in kswapd_shrink_node()
3867 zone = pgdat->node_zones + z; in kswapd_shrink_node()
3871 sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); in kswapd_shrink_node()
3882 * high-order allocations. If twice the allocation size has been in kswapd_shrink_node()
3883 * reclaimed then recheck watermarks only at order-0 to prevent in kswapd_shrink_node()
3884 * excessive reclaim. Assume that a process requested a high-order in kswapd_shrink_node()
3887 if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order)) in kswapd_shrink_node()
3888 sc->order = 0; in kswapd_shrink_node()
3890 return sc->nr_scanned >= sc->nr_to_reclaim; in kswapd_shrink_node()
3901 zone = pgdat->node_zones + i; in update_reclaim_active()
3907 set_bit(ZONE_RECLAIM_ACTIVE, &zone->flags); in update_reclaim_active()
3909 clear_bit(ZONE_RECLAIM_ACTIVE, &zone->flags); in update_reclaim_active()
3932 * kswapd scans the zones in the highmem->normal->dma direction. It skips
3967 zone = pgdat->node_zones + i; in balance_pgdat()
3971 nr_boost_reclaim += zone->watermark_boost; in balance_pgdat()
3972 zone_boosts[i] = zone->watermark_boost; in balance_pgdat()
3990 * purpose -- on 64-bit systems it is expected that in balance_pgdat()
3991 * buffer_heads are stripped during active rotation. On 32-bit in balance_pgdat()
3998 for (i = MAX_NR_ZONES - 1; i >= 0; i--) { in balance_pgdat()
3999 zone = pgdat->node_zones + i; in balance_pgdat()
4013 * re-evaluate if boosting is required when kswapd next wakes. in balance_pgdat()
4030 if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) in balance_pgdat()
4035 * intent is to relieve pressure not issue sub-optimal IO in balance_pgdat()
4054 if (sc.priority < DEF_PRIORITY - 2) in balance_pgdat()
4077 if (waitqueue_active(&pgdat->pfmemalloc_wait) && in balance_pgdat()
4079 wake_up_all(&pgdat->pfmemalloc_wait); in balance_pgdat()
4092 nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; in balance_pgdat()
4093 nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); in balance_pgdat()
4104 sc.priority--; in balance_pgdat()
4108 pgdat->kswapd_failures++; in balance_pgdat()
4122 zone = pgdat->node_zones + i; in balance_pgdat()
4123 spin_lock_irqsave(&zone->lock, flags); in balance_pgdat()
4124 zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); in balance_pgdat()
4125 spin_unlock_irqrestore(&zone->lock, flags); in balance_pgdat()
4150 * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to
4159 enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in kswapd_highest_zoneidx()
4173 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
4205 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, in kswapd_try_to_sleep()
4209 if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) in kswapd_try_to_sleep()
4210 WRITE_ONCE(pgdat->kswapd_order, reclaim_order); in kswapd_try_to_sleep()
4213 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
4214 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
4223 trace_mm_vmscan_kswapd_sleep(pgdat->node_id); in kswapd_try_to_sleep()
4230 * per-cpu vmstat threshold while kswapd is awake and restore in kswapd_try_to_sleep()
4245 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
4258 * If there are applications that are active memory-allocators
4264 unsigned int highest_zoneidx = MAX_NR_ZONES - 1; in kswapd()
4267 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); in kswapd()
4284 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; in kswapd()
4287 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
4288 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
4292 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
4301 alloc_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
4304 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
4305 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
4319 * Reclaim begins at the requested order but if a high-order in kswapd()
4321 * order-0. If that happens, kswapd will consider sleeping in kswapd()
4326 trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx, in kswapd()
4334 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); in kswapd()
4340 * A zone is low on free memory or too fragmented for high-order memory. If
4358 pgdat = zone->zone_pgdat; in wakeup_kswapd()
4359 curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in wakeup_kswapd()
4362 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx); in wakeup_kswapd()
4364 if (READ_ONCE(pgdat->kswapd_order) < order) in wakeup_kswapd()
4365 WRITE_ONCE(pgdat->kswapd_order, order); in wakeup_kswapd()
4367 if (!waitqueue_active(&pgdat->kswapd_wait)) in wakeup_kswapd()
4371 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || in wakeup_kswapd()
4376 * fragmented for high-order allocations. Wake up kcompactd in wakeup_kswapd()
4386 trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order, in wakeup_kswapd()
4388 wake_up_interruptible(&pgdat->kswapd_wait); in wakeup_kswapd()
4393 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
4405 .reclaim_idx = MAX_NR_ZONES - 1, in shrink_all_memory()
4431 * This kswapd start function will be called by init and node-hot-add.
4432 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
4438 if (pgdat->kswapd) in kswapd_run()
4441 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); in kswapd_run()
4442 if (IS_ERR(pgdat->kswapd)) { in kswapd_run()
4446 pgdat->kswapd = NULL; in kswapd_run()
4456 struct task_struct *kswapd = NODE_DATA(nid)->kswapd; in kswapd_stop()
4460 NODE_DATA(nid)->kswapd = NULL; in kswapd_stop()
4480 * If non-zero call node_reclaim when the number of free pages falls below
4515 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; in node_unmapped_file_pages()
4543 return nr_pagecache_reclaimable - delta; in node_pagecache_reclaimable()
4567 trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, in __node_reclaim()
4579 p->flags |= PF_SWAPWRITE; in __node_reclaim()
4582 if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { in __node_reclaim()
4589 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); in __node_reclaim()
4593 current->flags &= ~PF_SWAPWRITE; in __node_reclaim()
4617 if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && in node_reclaim()
4619 pgdat->min_slab_pages) in node_reclaim()
4625 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) in node_reclaim()
4630 * have associated processors. This will favor the local processor in node_reclaim()
4634 if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) in node_reclaim()
4637 if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) in node_reclaim()
4641 clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); in node_reclaim()
4651 * check_move_unevictable_pages - check pages for evictability and move to
4666 for (i = 0; i < pvec->nr; i++) { in check_move_unevictable_pages()
4667 struct page *page = pvec->pages[i]; in check_move_unevictable_pages()