Lines Matching +full:processor +full:- +full:intensive
1 // SPDX-License-Identifier: GPL-2.0
34 #include <linux/backing-dev.h>
158 if ((_page)->lru.prev != _base) { \
161 prev = lru_to_page(&(_page->lru)); \
162 prefetchw(&prev->_field); \
178 WARN_ON_ONCE(rs && task->reclaim_state); in set_task_reclaim_state()
180 /* Check for the nulling of an already-nulled member */ in set_task_reclaim_state()
181 WARN_ON_ONCE(!rs && !task->reclaim_state); in set_task_reclaim_state()
183 task->reclaim_state = rs; in set_task_reclaim_state()
191 * We allow subsystems to populate their shrinker-related
208 int id, ret = -ENOMEM; in prealloc_memcg_shrinker()
224 shrinker->id = id; in prealloc_memcg_shrinker()
233 int id = shrinker->id; in unregister_memcg_shrinker()
244 return sc->target_mem_cgroup; in cgroup_reclaim()
248 * writeback_throttling_sane - is the usual dirty throttling mechanism available?
310 * lruvec_lru_size - Returns the number of pages on the given LRU list.
321 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
339 unsigned int size = sizeof(*shrinker->nr_deferred); in prealloc_shrinker()
341 if (shrinker->flags & SHRINKER_NUMA_AWARE) in prealloc_shrinker()
344 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); in prealloc_shrinker()
345 if (!shrinker->nr_deferred) in prealloc_shrinker()
346 return -ENOMEM; in prealloc_shrinker()
348 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in prealloc_shrinker()
356 kfree(shrinker->nr_deferred); in prealloc_shrinker()
357 shrinker->nr_deferred = NULL; in prealloc_shrinker()
358 return -ENOMEM; in prealloc_shrinker()
363 if (!shrinker->nr_deferred) in free_prealloced_shrinker()
366 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in free_prealloced_shrinker()
369 kfree(shrinker->nr_deferred); in free_prealloced_shrinker()
370 shrinker->nr_deferred = NULL; in free_prealloced_shrinker()
376 list_add_tail(&shrinker->list, &shrinker_list); in register_shrinker_prepared()
378 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in register_shrinker_prepared()
379 idr_replace(&shrinker_idr, shrinker, shrinker->id); in register_shrinker_prepared()
400 if (!shrinker->nr_deferred) in unregister_shrinker()
402 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in unregister_shrinker()
405 list_del(&shrinker->list); in unregister_shrinker()
407 kfree(shrinker->nr_deferred); in unregister_shrinker()
408 shrinker->nr_deferred = NULL; in unregister_shrinker()
423 int nid = shrinkctl->nid; in do_shrink_slab()
424 long batch_size = shrinker->batch ? shrinker->batch in do_shrink_slab()
428 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) in do_shrink_slab()
431 freeable = shrinker->count_objects(shrinker, shrinkctl); in do_shrink_slab()
440 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); in do_shrink_slab()
443 if (shrinker->seeks) { in do_shrink_slab()
446 do_div(delta, shrinker->seeks); in do_shrink_slab()
459 shrinker->scan_objects, total_scan); in do_shrink_slab()
468 * shrinkers to return -1 all the time. This results in a large in do_shrink_slab()
511 shrinkctl->nr_to_scan = nr_to_scan; in do_shrink_slab()
512 shrinkctl->nr_scanned = nr_to_scan; in do_shrink_slab()
513 ret = shrinker->scan_objects(shrinker, shrinkctl); in do_shrink_slab()
518 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); in do_shrink_slab()
519 total_scan -= shrinkctl->nr_scanned; in do_shrink_slab()
520 scanned += shrinkctl->nr_scanned; in do_shrink_slab()
526 next_deferred -= scanned; in do_shrink_slab()
536 &shrinker->nr_deferred[nid]); in do_shrink_slab()
538 new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); in do_shrink_slab()
558 map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map, in shrink_slab_memcg()
563 for_each_set_bit(i, map->map, shrinker_nr_max) { in shrink_slab_memcg()
574 clear_bit(i, map->map); in shrink_slab_memcg()
578 /* Call non-slab shrinkers even though kmem is disabled */ in shrink_slab_memcg()
580 !(shrinker->flags & SHRINKER_NONSLAB)) in shrink_slab_memcg()
585 clear_bit(i, map->map); in shrink_slab_memcg()
628 * shrink_slab - shrink slab caches
642 * @priority is sc->priority, we take the number of objects and >> by priority
726 * heads at page->private. in is_page_cache_freeable()
729 return page_count(page) - page_has_private(page) == 1 + page_cache_pins; in is_page_cache_freeable()
734 if (current->flags & PF_SWAPWRITE) in may_write_to_inode()
738 if (inode_to_bdi(inode) == current->backing_dev_info) in may_write_to_inode()
745 * -ENOSPC. We need to propagate that into the address_space for a subsequent
778 * Calls ->writepage().
784 * will be non-blocking. To prevent this allocation from being in pageout()
803 * page->mapping == NULL while being dirty with clean buffers. in pageout()
814 if (mapping->a_ops->writepage == NULL) in pageout()
816 if (!may_write_to_inode(mapping->host)) in pageout()
830 res = mapping->a_ops->writepage(page, &wbc); in pageout()
864 xa_lock_irqsave(&mapping->i_pages, flags); in __remove_mapping()
884 * escape unnoticed. The smp_rmb is needed to ensure the page->flags in __remove_mapping()
885 * load is not satisfied before that of page->_refcount. in __remove_mapping()
905 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
910 freepage = mapping->a_ops->freepage; in __remove_mapping()
931 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
940 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
945 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
965 * putback_lru_page - put previously isolated page onto appropriate LRU list
992 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, in page_check_references()
1024 * Activate file-backed executable pages after first usage. in page_check_references()
1065 if (mapping && mapping->a_ops->is_dirty_writeback) in page_check_dirty_writeback()
1066 mapping->a_ops->is_dirty_writeback(page, dirty, writeback); in page_check_dirty_writeback()
1097 list_del(&page->lru); in shrink_page_list()
1107 sc->nr_scanned += nr_pages; in shrink_page_list()
1112 if (!sc->may_unmap && page_mapped(page)) in shrink_page_list()
1115 may_enter_fs = (sc->gfp_mask & __GFP_FS) || in shrink_page_list()
1116 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); in shrink_page_list()
1126 stat->nr_dirty++; in shrink_page_list()
1129 stat->nr_unqueued_dirty++; in shrink_page_list()
1139 inode_write_congested(mapping->host)) || in shrink_page_list()
1141 stat->nr_congested++; in shrink_page_list()
1189 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { in shrink_page_list()
1190 stat->nr_immediate++; in shrink_page_list()
1197 * This is slightly racy - end_page_writeback() in shrink_page_list()
1200 * as PageReadahead - but that does not matter in shrink_page_list()
1208 stat->nr_writeback++; in shrink_page_list()
1216 list_add_tail(&page->lru, page_list); in shrink_page_list()
1228 stat->nr_ref_keep += nr_pages; in shrink_page_list()
1242 if (!(sc->gfp_mask & __GFP_IO)) in shrink_page_list()
1291 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1307 stat->nr_unmap_fail += nr_pages; in shrink_page_list()
1309 stat->nr_lazyfree_fail += nr_pages; in shrink_page_list()
1318 * injecting inefficient single-page IO into in shrink_page_list()
1327 !test_bit(PGDAT_DIRTY, &pgdat->flags))) { in shrink_page_list()
1344 if (!sc->may_writepage) in shrink_page_list()
1359 stat->nr_pageout += thp_nr_pages(page); in shrink_page_list()
1367 * A synchronous write - probably a ramdisk. Go in shrink_page_list()
1392 * drop the buffers and mark the page clean - it can be freed. in shrink_page_list()
1394 * Rarely, pages can have buffers and no ->mapping. These are in shrink_page_list()
1402 if (!try_to_release_page(page, sc->gfp_mask)) in shrink_page_list()
1434 sc->target_mem_cgroup)) in shrink_page_list()
1452 list_add(&page->lru, &free_pages); in shrink_page_list()
1461 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1473 stat->nr_activate[type] += nr_pages; in shrink_page_list()
1479 list_add(&page->lru, &ret_pages); in shrink_page_list()
1483 pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; in shrink_page_list()
1512 list_move(&page->lru, &clean_pages); in reclaim_clean_pages_from_list()
1516 nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, in reclaim_clean_pages_from_list()
1519 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1520 -(long)nr_reclaimed); in reclaim_clean_pages_from_list()
1527 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, in reclaim_clean_pages_from_list()
1529 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1530 -(long)stat.nr_lazyfree_fail); in reclaim_clean_pages_from_list()
1542 * returns 0 on success, -ve errno on failure.
1546 int ret = -EINVAL; in __isolate_lru_page()
1556 ret = -EBUSY; in __isolate_lru_page()
1561 * blocking - clean pages for the most part. in __isolate_lru_page()
1577 * ->migratepage callback are possible to migrate in __isolate_lru_page()
1588 migrate_dirty = !mapping || mapping->a_ops->migratepage; in __isolate_lru_page()
1601 * sure the page is not being freed elsewhere -- the in __isolate_lru_page()
1625 update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); in update_lru_sizes()
1631 * pgdat->lru_lock is heavily contended. Some of the functions that
1635 * For pagecache intensive workloads, this function is the hottest
1654 struct list_head *src = &lruvec->lists[lru]; in isolate_lru_pages()
1661 isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); in isolate_lru_pages()
1676 if (page_zonenum(page) > sc->reclaim_idx) { in isolate_lru_pages()
1677 list_move(&page->lru, &pages_skipped); in isolate_lru_pages()
1689 * premature OOM since __isolate_lru_page() returns -EBUSY in isolate_lru_pages()
1697 list_move(&page->lru, dst); in isolate_lru_pages()
1700 case -EBUSY: in isolate_lru_pages()
1702 list_move(&page->lru, src); in isolate_lru_pages()
1730 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, in isolate_lru_pages()
1737 * isolate_lru_page - tries to isolate a page from its LRU list
1744 * Returns -EBUSY if the page was not on an LRU list.
1764 int ret = -EBUSY; in isolate_lru_page()
1773 spin_lock_irq(&pgdat->lru_lock); in isolate_lru_page()
1782 spin_unlock_irq(&pgdat->lru_lock); in isolate_lru_page()
1815 * won't get blocked by normal direct-reclaimers, forming a circular in too_many_isolated()
1818 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) in too_many_isolated()
1835 * It is safe to rely on PG_active against the non-LRU pages in here because
1836 * nobody will play with that bit on a non-LRU page.
1838 * The downside is that we have to touch page->_refcount against each page.
1839 * But we had to alter page->flags anyway.
1857 list_del(&page->lru); in move_pages_to_lru()
1858 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1860 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1870 list_move(&page->lru, &lruvec->lists[lru]); in move_pages_to_lru()
1878 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1880 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1882 list_add(&page->lru, &pages_to_free); in move_pages_to_lru()
1899 * If a kernel thread (such as nfsd for loop-back mounts) services
1906 return !(current->flags & PF_LOCAL_THROTTLE) || in current_may_throttle()
1907 current->backing_dev_info == NULL || in current_may_throttle()
1908 bdi_write_congested(current->backing_dev_info); in current_may_throttle()
1944 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1956 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1964 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1968 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_inactive_list()
1976 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1995 sc->nr.dirty += stat.nr_dirty; in shrink_inactive_list()
1996 sc->nr.congested += stat.nr_congested; in shrink_inactive_list()
1997 sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; in shrink_inactive_list()
1998 sc->nr.writeback += stat.nr_writeback; in shrink_inactive_list()
1999 sc->nr.immediate += stat.nr_immediate; in shrink_inactive_list()
2000 sc->nr.taken += nr_taken; in shrink_inactive_list()
2002 sc->nr.file_taken += nr_taken; in shrink_inactive_list()
2004 trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, in shrink_inactive_list()
2005 nr_scanned, nr_reclaimed, &stat, sc->priority, file); in shrink_inactive_list()
2028 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2039 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2044 list_del(&page->lru); in shrink_active_list()
2059 if (page_referenced(page, 0, sc->target_mem_cgroup, in shrink_active_list()
2062 * Identify referenced, file-backed active pages and in shrink_active_list()
2066 * are not likely to be evicted by use-once streaming in shrink_active_list()
2072 list_add(&page->lru, &l_active); in shrink_active_list()
2077 ClearPageActive(page); /* we are de-activating */ in shrink_active_list()
2079 list_add(&page->lru, &l_inactive); in shrink_active_list()
2085 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2095 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_active_list()
2096 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2100 trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, in shrink_active_list()
2101 nr_deactivate, nr_rotated, sc->priority, file); in shrink_active_list()
2128 list_move(&page->lru, &node_page_list); in reclaim_pages()
2138 list_del(&page->lru); in reclaim_pages()
2152 list_del(&page->lru); in reclaim_pages()
2164 if (sc->may_deactivate & (1 << is_file_lru(lru))) in shrink_list()
2167 sc->skipped_deactivate = 1; in shrink_list()
2179 * to the established workingset on the scan-resistant active list,
2193 * -------------------------------------
2212 gb = (inactive + active) >> (30 - PAGE_SHIFT); in inactive_is_low()
2250 if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { in get_scan_count()
2272 if (!sc->priority && swappiness) { in get_scan_count()
2278 * If the system is almost out of file pages, force-scan anon. in get_scan_count()
2280 if (sc->file_is_tiny) { in get_scan_count()
2289 if (sc->cache_trim_mode) { in get_scan_count()
2310 total_cost = sc->anon_cost + sc->file_cost; in get_scan_count()
2311 anon_cost = total_cost + sc->anon_cost; in get_scan_count()
2312 file_cost = total_cost + sc->file_cost; in get_scan_count()
2318 fp = (200 - swappiness) * (total_cost + 1); in get_scan_count()
2331 lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); in get_scan_count()
2332 protection = mem_cgroup_protection(sc->target_mem_cgroup, in get_scan_count()
2334 sc->memcg_low_reclaim); in get_scan_count()
2343 * becomes extremely binary -- from nothing as we in get_scan_count()
2358 * the best-effort low protection. However, we still in get_scan_count()
2359 * ideally want to honor how well-behaved groups are in in get_scan_count()
2371 scan = lruvec_size - lruvec_size * protection / in get_scan_count()
2377 * sc->priority further than desirable. in get_scan_count()
2384 scan >>= sc->priority; in get_scan_count()
2403 * round-off error. in get_scan_count()
2432 unsigned long nr_to_reclaim = sc->nr_to_reclaim; in shrink_lruvec()
2453 sc->priority == DEF_PRIORITY); in shrink_lruvec()
2464 nr[lru] -= nr_to_scan; in shrink_lruvec()
2516 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2517 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2518 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2521 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2522 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2523 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2528 sc->nr_reclaimed += nr_reclaimed; in shrink_lruvec()
2542 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && in in_reclaim_compaction()
2543 (sc->order > PAGE_ALLOC_COSTLY_ORDER || in in_reclaim_compaction()
2544 sc->priority < DEF_PRIORITY - 2)) in in_reclaim_compaction()
2551 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2552 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2575 * first, by assuming that zero delta of sc->nr_scanned means full LRU in should_continue_reclaim()
2577 * where always a non-zero amount of pages were scanned. in should_continue_reclaim()
2583 for (z = 0; z <= sc->reclaim_idx; z++) { in should_continue_reclaim()
2584 struct zone *zone = &pgdat->node_zones[z]; in should_continue_reclaim()
2588 switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { in should_continue_reclaim()
2602 pages_for_compaction = compact_gap(sc->order); in should_continue_reclaim()
2612 struct mem_cgroup *target_memcg = sc->target_mem_cgroup; in shrink_node_memcgs()
2622 * This loop can become CPU-bound when target memcgs in shrink_node_memcgs()
2623 * aren't eligible for reclaim - either because they in shrink_node_memcgs()
2644 if (!sc->memcg_low_reclaim) { in shrink_node_memcgs()
2645 sc->memcg_low_skipped = 1; in shrink_node_memcgs()
2651 reclaimed = sc->nr_reclaimed; in shrink_node_memcgs()
2652 scanned = sc->nr_scanned; in shrink_node_memcgs()
2656 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, in shrink_node_memcgs()
2657 sc->priority); in shrink_node_memcgs()
2660 vmpressure(sc->gfp_mask, memcg, false, in shrink_node_memcgs()
2661 sc->nr_scanned - scanned, in shrink_node_memcgs()
2662 sc->nr_reclaimed - reclaimed); in shrink_node_memcgs()
2669 struct reclaim_state *reclaim_state = current->reclaim_state; in shrink_node()
2675 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); in shrink_node()
2678 memset(&sc->nr, 0, sizeof(sc->nr)); in shrink_node()
2680 nr_reclaimed = sc->nr_reclaimed; in shrink_node()
2681 nr_scanned = sc->nr_scanned; in shrink_node()
2686 spin_lock_irq(&pgdat->lru_lock); in shrink_node()
2687 sc->anon_cost = target_lruvec->anon_cost; in shrink_node()
2688 sc->file_cost = target_lruvec->file_cost; in shrink_node()
2689 spin_unlock_irq(&pgdat->lru_lock); in shrink_node()
2695 if (!sc->force_deactivate) { in shrink_node()
2700 if (refaults != target_lruvec->refaults[0] || in shrink_node()
2702 sc->may_deactivate |= DEACTIVATE_ANON; in shrink_node()
2704 sc->may_deactivate &= ~DEACTIVATE_ANON; in shrink_node()
2713 if (refaults != target_lruvec->refaults[1] || in shrink_node()
2715 sc->may_deactivate |= DEACTIVATE_FILE; in shrink_node()
2717 sc->may_deactivate &= ~DEACTIVATE_FILE; in shrink_node()
2719 sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; in shrink_node()
2727 if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) in shrink_node()
2728 sc->cache_trim_mode = 1; in shrink_node()
2730 sc->cache_trim_mode = 0; in shrink_node()
2746 free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); in shrink_node()
2751 struct zone *zone = &pgdat->node_zones[z]; in shrink_node()
2765 sc->file_is_tiny = in shrink_node()
2767 !(sc->may_deactivate & DEACTIVATE_ANON) && in shrink_node()
2768 anon >> sc->priority; in shrink_node()
2774 sc->nr_reclaimed += reclaim_state->reclaimed_slab; in shrink_node()
2775 reclaim_state->reclaimed_slab = 0; in shrink_node()
2779 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, in shrink_node()
2780 sc->nr_scanned - nr_scanned, in shrink_node()
2781 sc->nr_reclaimed - nr_reclaimed); in shrink_node()
2783 if (sc->nr_reclaimed - nr_reclaimed) in shrink_node()
2789 * it implies that the long-lived page allocation rate in shrink_node()
2804 if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) in shrink_node()
2805 set_bit(PGDAT_WRITEBACK, &pgdat->flags); in shrink_node()
2808 if (sc->nr.unqueued_dirty == sc->nr.file_taken) in shrink_node()
2809 set_bit(PGDAT_DIRTY, &pgdat->flags); in shrink_node()
2817 if (sc->nr.immediate) in shrink_node()
2831 sc->nr.dirty && sc->nr.dirty == sc->nr.congested) in shrink_node()
2832 set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); in shrink_node()
2841 !sc->hibernation_mode && in shrink_node()
2842 test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) in shrink_node()
2845 if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, in shrink_node()
2856 pgdat->kswapd_failures = 0; in shrink_node()
2860 * Returns true if compaction should go ahead for a costly-order request, or
2869 suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); in compaction_ready()
2886 watermark = high_wmark_pages(zone) + compact_gap(sc->order); in compaction_ready()
2888 return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); in compaction_ready()
2892 * This is the direct reclaim path, for page-allocating processes. We only
2913 orig_mask = sc->gfp_mask; in shrink_zones()
2915 sc->gfp_mask |= __GFP_HIGHMEM; in shrink_zones()
2916 sc->reclaim_idx = gfp_zone(sc->gfp_mask); in shrink_zones()
2920 sc->reclaim_idx, sc->nodemask) { in shrink_zones()
2934 * non-zero order, only frequent costly order in shrink_zones()
2940 sc->order > PAGE_ALLOC_COSTLY_ORDER && in shrink_zones()
2942 sc->compaction_ready = true; in shrink_zones()
2952 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
2962 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat, in shrink_zones()
2963 sc->order, sc->gfp_mask, in shrink_zones()
2965 sc->nr_reclaimed += nr_soft_reclaimed; in shrink_zones()
2966 sc->nr_scanned += nr_soft_scanned; in shrink_zones()
2971 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
2973 last_pgdat = zone->zone_pgdat; in shrink_zones()
2974 shrink_node(zone->zone_pgdat, sc); in shrink_zones()
2981 sc->gfp_mask = orig_mask; in shrink_zones()
2991 target_lruvec->refaults[0] = refaults; in snapshot_refaults()
2993 target_lruvec->refaults[1] = refaults; in snapshot_refaults()
3003 * high - the zone may be full of dirty or under-writeback pages, which this
3015 int initial_priority = sc->priority; in do_try_to_free_pages()
3023 __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); in do_try_to_free_pages()
3026 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, in do_try_to_free_pages()
3027 sc->priority); in do_try_to_free_pages()
3028 sc->nr_scanned = 0; in do_try_to_free_pages()
3031 if (sc->nr_reclaimed >= sc->nr_to_reclaim) in do_try_to_free_pages()
3034 if (sc->compaction_ready) in do_try_to_free_pages()
3041 if (sc->priority < DEF_PRIORITY - 2) in do_try_to_free_pages()
3042 sc->may_writepage = 1; in do_try_to_free_pages()
3043 } while (--sc->priority >= 0); in do_try_to_free_pages()
3046 for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, in do_try_to_free_pages()
3047 sc->nodemask) { in do_try_to_free_pages()
3048 if (zone->zone_pgdat == last_pgdat) in do_try_to_free_pages()
3050 last_pgdat = zone->zone_pgdat; in do_try_to_free_pages()
3052 snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); in do_try_to_free_pages()
3057 lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, in do_try_to_free_pages()
3058 zone->zone_pgdat); in do_try_to_free_pages()
3059 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in do_try_to_free_pages()
3065 if (sc->nr_reclaimed) in do_try_to_free_pages()
3066 return sc->nr_reclaimed; in do_try_to_free_pages()
3069 if (sc->compaction_ready) in do_try_to_free_pages()
3081 if (sc->skipped_deactivate) { in do_try_to_free_pages()
3082 sc->priority = initial_priority; in do_try_to_free_pages()
3083 sc->force_deactivate = 1; in do_try_to_free_pages()
3084 sc->skipped_deactivate = 0; in do_try_to_free_pages()
3089 if (sc->memcg_low_skipped) { in do_try_to_free_pages()
3090 sc->priority = initial_priority; in do_try_to_free_pages()
3091 sc->force_deactivate = 0; in do_try_to_free_pages()
3092 sc->memcg_low_reclaim = 1; in do_try_to_free_pages()
3093 sc->memcg_low_skipped = 0; in do_try_to_free_pages()
3108 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in allow_direct_reclaim()
3112 zone = &pgdat->node_zones[i]; in allow_direct_reclaim()
3130 if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { in allow_direct_reclaim()
3131 if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) in allow_direct_reclaim()
3132 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL); in allow_direct_reclaim()
3134 wake_up_interruptible(&pgdat->kswapd_wait); in allow_direct_reclaim()
3163 if (current->flags & PF_KTHREAD) in throttle_direct_reclaim()
3193 pgdat = zone->zone_pgdat; in throttle_direct_reclaim()
3215 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3222 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3290 .reclaim_idx = MAX_NR_ZONES - 1, in mem_cgroup_shrink_node()
3294 WARN_ON_ONCE(!current->reclaim_state); in mem_cgroup_shrink_node()
3329 .reclaim_idx = MAX_NR_ZONES - 1, in try_to_free_mem_cgroup_pages()
3385 * Check for watermark boosts top-down as the higher zones in pgdat_watermark_boosted()
3391 for (i = highest_zoneidx; i >= 0; i--) { in pgdat_watermark_boosted()
3392 zone = pgdat->node_zones + i; in pgdat_watermark_boosted()
3396 if (zone->watermark_boost) in pgdat_watermark_boosted()
3410 unsigned long mark = -1; in pgdat_balanced()
3414 * Check watermarks bottom-up as lower zones are more likely to in pgdat_balanced()
3418 zone = pgdat->node_zones + i; in pgdat_balanced()
3430 * need balancing by definition. This can happen if a zone-restricted in pgdat_balanced()
3433 if (mark == -1) in pgdat_balanced()
3444 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in clear_pgdat_congested()
3445 clear_bit(PGDAT_DIRTY, &pgdat->flags); in clear_pgdat_congested()
3446 clear_bit(PGDAT_WRITEBACK, &pgdat->flags); in clear_pgdat_congested()
3471 if (waitqueue_active(&pgdat->pfmemalloc_wait)) in prepare_kswapd_sleep()
3472 wake_up_all(&pgdat->pfmemalloc_wait); in prepare_kswapd_sleep()
3475 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in prepare_kswapd_sleep()
3501 sc->nr_to_reclaim = 0; in kswapd_shrink_node()
3502 for (z = 0; z <= sc->reclaim_idx; z++) { in kswapd_shrink_node()
3503 zone = pgdat->node_zones + z; in kswapd_shrink_node()
3507 sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); in kswapd_shrink_node()
3518 * high-order allocations. If twice the allocation size has been in kswapd_shrink_node()
3519 * reclaimed then recheck watermarks only at order-0 to prevent in kswapd_shrink_node()
3520 * excessive reclaim. Assume that a process requested a high-order in kswapd_shrink_node()
3523 if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order)) in kswapd_shrink_node()
3524 sc->order = 0; in kswapd_shrink_node()
3526 return sc->nr_scanned >= sc->nr_to_reclaim; in kswapd_shrink_node()
3536 * kswapd scans the zones in the highmem->normal->dma direction. It skips
3571 zone = pgdat->node_zones + i; in balance_pgdat()
3575 nr_boost_reclaim += zone->watermark_boost; in balance_pgdat()
3576 zone_boosts[i] = zone->watermark_boost; in balance_pgdat()
3593 * purpose -- on 64-bit systems it is expected that in balance_pgdat()
3594 * buffer_heads are stripped during active rotation. On 32-bit in balance_pgdat()
3601 for (i = MAX_NR_ZONES - 1; i >= 0; i--) { in balance_pgdat()
3602 zone = pgdat->node_zones + i; in balance_pgdat()
3616 * re-evaluate if boosting is required when kswapd next wakes. in balance_pgdat()
3633 if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) in balance_pgdat()
3638 * intent is to relieve pressure not issue sub-optimal IO in balance_pgdat()
3657 if (sc.priority < DEF_PRIORITY - 2) in balance_pgdat()
3680 if (waitqueue_active(&pgdat->pfmemalloc_wait) && in balance_pgdat()
3682 wake_up_all(&pgdat->pfmemalloc_wait); in balance_pgdat()
3695 nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; in balance_pgdat()
3696 nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); in balance_pgdat()
3707 sc.priority--; in balance_pgdat()
3711 pgdat->kswapd_failures++; in balance_pgdat()
3723 zone = pgdat->node_zones + i; in balance_pgdat()
3724 spin_lock_irqsave(&zone->lock, flags); in balance_pgdat()
3725 zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); in balance_pgdat()
3726 spin_unlock_irqrestore(&zone->lock, flags); in balance_pgdat()
3751 * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to
3760 enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in kswapd_highest_zoneidx()
3774 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3806 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, in kswapd_try_to_sleep()
3810 if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) in kswapd_try_to_sleep()
3811 WRITE_ONCE(pgdat->kswapd_order, reclaim_order); in kswapd_try_to_sleep()
3814 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3815 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3824 trace_mm_vmscan_kswapd_sleep(pgdat->node_id); in kswapd_try_to_sleep()
3831 * per-cpu vmstat threshold while kswapd is awake and restore in kswapd_try_to_sleep()
3846 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3859 * If there are applications that are active memory-allocators
3865 unsigned int highest_zoneidx = MAX_NR_ZONES - 1; in kswapd()
3868 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); in kswapd()
3885 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; in kswapd()
3888 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3889 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3893 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3902 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3905 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3906 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3920 * Reclaim begins at the requested order but if a high-order in kswapd()
3922 * order-0. If that happens, kswapd will consider sleeping in kswapd()
3927 trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx, in kswapd()
3935 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); in kswapd()
3941 * A zone is low on free memory or too fragmented for high-order memory. If
3959 pgdat = zone->zone_pgdat; in wakeup_kswapd()
3960 curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in wakeup_kswapd()
3963 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx); in wakeup_kswapd()
3965 if (READ_ONCE(pgdat->kswapd_order) < order) in wakeup_kswapd()
3966 WRITE_ONCE(pgdat->kswapd_order, order); in wakeup_kswapd()
3968 if (!waitqueue_active(&pgdat->kswapd_wait)) in wakeup_kswapd()
3972 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || in wakeup_kswapd()
3977 * fragmented for high-order allocations. Wake up kcompactd in wakeup_kswapd()
3987 trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order, in wakeup_kswapd()
3989 wake_up_interruptible(&pgdat->kswapd_wait); in wakeup_kswapd()
3994 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
4006 .reclaim_idx = MAX_NR_ZONES - 1, in shrink_all_memory()
4032 * This kswapd start function will be called by init and node-hot-add.
4033 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
4040 if (pgdat->kswapd) in kswapd_run()
4043 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); in kswapd_run()
4044 if (IS_ERR(pgdat->kswapd)) { in kswapd_run()
4048 ret = PTR_ERR(pgdat->kswapd); in kswapd_run()
4049 pgdat->kswapd = NULL; in kswapd_run()
4060 struct task_struct *kswapd = NODE_DATA(nid)->kswapd; in kswapd_stop()
4064 NODE_DATA(nid)->kswapd = NULL; in kswapd_stop()
4084 * If non-zero call node_reclaim when the number of free pages falls below
4122 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; in node_unmapped_file_pages()
4150 return nr_pagecache_reclaimable - delta; in node_pagecache_reclaimable()
4173 trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, in __node_reclaim()
4184 p->flags |= PF_SWAPWRITE; in __node_reclaim()
4187 if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { in __node_reclaim()
4194 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); in __node_reclaim()
4198 current->flags &= ~PF_SWAPWRITE; in __node_reclaim()
4221 if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && in node_reclaim()
4223 pgdat->min_slab_pages) in node_reclaim()
4229 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) in node_reclaim()
4234 * have associated processors. This will favor the local processor in node_reclaim()
4238 if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) in node_reclaim()
4241 if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) in node_reclaim()
4245 clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); in node_reclaim()
4255 * check_move_unevictable_pages - check pages for evictability and move to
4271 for (i = 0; i < pvec->nr; i++) { in check_move_unevictable_pages()
4272 struct page *page = pvec->pages[i]; in check_move_unevictable_pages()
4284 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4286 spin_lock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4307 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()