Lines Matching +full:scrubber +full:- +full:done
1 // SPDX-License-Identifier: GPL-2.0-only
7 * hardware as being corrupted usually due to a multi-bit ECC memory or cache
11 * not-yet-corrupted-by-suspicious pages without killing anything.
23 * - You know how to test it.
24 * - You have a test that can be added to mce-test
25 * https://git.kernel.org/cgit/utils/cpu/mce/mce-test.git/
26 * - The case actually shows up as a frequent (top 10) page state in
27 * tools/vm/page-types when running a real workload.
41 #include <linux/page-flags.h>
42 #include <linux/kernel-page-flags.h>
51 #include <linux/backing-dev.h>
62 #include <linux/page-isolation.h>
101 * returns 0 for non-hugetlb pages as well. in page_handle_poison()
107 * acceptable because soft-offlined page is not broken in page_handle_poison()
146 if (mapping == NULL || mapping->host == NULL) in hwpoison_filter_dev()
147 return -EINVAL; in hwpoison_filter_dev()
149 dev = mapping->host->i_sb->s_dev; in hwpoison_filter_dev()
152 return -EINVAL; in hwpoison_filter_dev()
155 return -EINVAL; in hwpoison_filter_dev()
169 return -EINVAL; in hwpoison_filter_flags()
191 return -EINVAL; in hwpoison_filter_task()
205 return -EINVAL; in hwpoison_filter()
208 return -EINVAL; in hwpoison_filter()
211 return -EINVAL; in hwpoison_filter()
235 * from the VMAs. So do a brute-force search over all
260 struct task_struct *t = tk->tsk; in kill_proc()
261 short addr_lsb = tk->size_shift; in kill_proc()
265 pfn, t->comm, t->pid); in kill_proc()
269 (void __user *)tk->addr, addr_lsb); in kill_proc()
279 ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr, in kill_proc()
283 t->comm, t->pid, ret); in kill_proc()
303 * TODO: Could shrink slab caches here if a lightweight range-based in shake_page()
319 VM_BUG_ON_VMA(address == -EFAULT, vma); in dev_pagemap_mapping_shift()
320 pgd = pgd_offset(vma->vm_mm, address); in dev_pagemap_mapping_shift()
356 * memory_failure event. In all other cases, page->index and
357 * page->mapping are sufficient for mapping the page back to its
372 tk->addr = page_address_in_vma(p, vma); in add_to_kill()
375 tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma); in add_to_kill()
376 tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr); in add_to_kill()
378 tk->size_shift = page_shift(compound_head(p)); in add_to_kill()
381 * Send SIGKILL if "tk->addr == -EFAULT". Also, as in add_to_kill()
382 * "tk->size_shift" is always non-zero for !is_zone_device_page(), in add_to_kill()
383 * so "tk->size_shift == 0" effectively checks no mapping on in add_to_kill()
390 if (tk->addr == -EFAULT) { in add_to_kill()
392 page_to_pfn(p), tsk->comm); in add_to_kill()
393 } else if (tk->size_shift == 0) { in add_to_kill()
399 tk->tsk = tsk; in add_to_kill()
400 list_add_tail(&tk->nd, to_kill); in add_to_kill()
423 if (fail || tk->addr == -EFAULT) { in kill_procs()
425 pfn, tk->tsk->comm, tk->tsk->pid); in kill_procs()
427 tk->tsk, PIDTYPE_PID); in kill_procs()
432 * something else on the address in-between. We could in kill_procs()
438 pfn, tk->tsk->comm, tk->tsk->pid); in kill_procs()
440 list_del(&tk->nd); in kill_procs()
441 put_task_struct(tk->tsk); in kill_procs()
459 if (t->flags & PF_MCE_PROCESS) { in find_early_kill_thread()
460 if (t->flags & PF_MCE_EARLY) in find_early_kill_thread()
485 if (!tsk->mm) in task_early_kill()
488 * Comparing ->mm here because current task might represent in task_early_kill()
491 if (force_early && tsk->mm == current->mm) in task_early_kill()
521 anon_vma_interval_tree_foreach(vmac, &av->rb_root, in collect_procs_anon()
523 vma = vmac->vma; in collect_procs_anon()
524 if (vma->vm_mm != t->mm) in collect_procs_anon()
543 struct address_space *mapping = page->mapping; in collect_procs_file()
554 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, in collect_procs_file()
563 if (vma->vm_mm == t->mm) in collect_procs_file()
590 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { in collect_procs_fsdax()
591 if (vma->vm_mm == t->mm) in collect_procs_fsdax()
606 if (!page->mapping) in collect_procs()
623 tk->addr = addr; in set_to_kill()
624 tk->size_shift = shift; in set_to_kill()
659 if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) { in check_hwpoisoned_pmd_entry()
660 hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT); in check_hwpoisoned_pmd_entry()
661 set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT); in check_hwpoisoned_pmd_entry()
677 struct hwp_walk *hwp = walk->private; in hwpoison_pte_range()
682 ptl = pmd_trans_huge_lock(pmdp, walk->vma); in hwpoison_pte_range()
692 mapped_pte = ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, in hwpoison_pte_range()
696 hwp->pfn, &hwp->tk); in hwpoison_pte_range()
711 struct hwp_walk *hwp = walk->private; in hwpoison_hugetlb_range()
713 struct hstate *h = hstate_vma(walk->vma); in hwpoison_hugetlb_range()
716 hwp->pfn, &hwp->tk); in hwpoison_hugetlb_range()
749 if (!p->mm) in kill_accessing_process()
750 return -EFAULT; in kill_accessing_process()
752 mmap_read_lock(p->mm); in kill_accessing_process()
753 ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, in kill_accessing_process()
759 mmap_read_unlock(p->mm); in kill_accessing_process()
760 return ret > 0 ? -EHWPOISON : -EFAULT; in kill_accessing_process()
772 [MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
804 * complain when the page is unpoison-and-freed. in delete_from_lru_cache()
821 return -EIO; in delete_from_lru_cache()
829 if (mapping->a_ops->error_remove_page) { in truncate_error_page()
830 int err = mapping->a_ops->error_remove_page(mapping, p); in truncate_error_page()
859 /* Callback ->action() has to unlock the relevant page inside it. */
872 int count = page_count(p) - 1; in has_extra_refcount()
875 count -= 1; in has_extra_refcount()
879 page_to_pfn(p), action_page_types[ps->type], count); in has_extra_refcount()
919 * For anonymous pages we're done the only reference left in me_pagecache_clean()
945 * so is expected to have an extra refcount after error-handling. in me_pagecache_clean()
1010 mapping_set_error(mapping, -EIO); in me_pagecache_dirty()
1025 * - clear dirty bit to prevent IO
1026 * - remove from LRU
1027 * - but keep in the swap cache, so that when we return to it on
1075 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
1119 * A page state is defined by its current page->flags bits.
1200 /* page p should be unlocked after returning from ps->action(). */ in page_action()
1201 result = ps->action(ps, p); in page_action()
1203 action_result(pfn, ps->type, result); in page_action()
1210 return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; in page_action()
1237 /* Soft offline could migrate non-LRU movable pages */ in HWPoisonHandlable()
1260 return -EBUSY; in __get_hwpoison_page()
1289 ret = -EBUSY; in get_any_page()
1294 ret = -EIO; in get_any_page()
1297 } else if (ret == -EBUSY) { in get_any_page()
1306 ret = -EIO; in get_any_page()
1325 ret = -EIO; in get_any_page()
1328 if (ret == -EIO) in get_any_page()
1350 return -EHWPOISON; in __get_unpoison_page()
1356 * get_hwpoison_page() - Get refcount for memory error handling
1361 * error on it, after checking that the error page is in a well-defined state
1362 * (defined as a page-type we can successfully handle the memory error on it,
1368 * extra care for the error page's state (as done in __get_hwpoison_page()),
1376 * 1 on success for in-use pages in a well-defined state,
1377 * -EIO for pages on which we can not handle memory errors,
1378 * -EBUSY when get_hwpoison_page() has raced with page lifecycle
1380 * -EHWPOISON when the page is hwpoisoned and taken off from buddy.
1412 * Here we are interested only in user-mapped pages, so skip any in hwpoison_user_mappings()
1457 * mapped in dirty form. This has to be done before try_to_unmap, in hwpoison_user_mappings()
1494 * struct page and all unmaps done we can decide if in hwpoison_user_mappings()
1499 * use a more force-full uncatchable kill to prevent in hwpoison_user_mappings()
1520 if ((p->flags & ps->mask) == ps->res) in identify_page_state()
1523 page_flags |= (p->flags & (1UL << PG_dirty)); in identify_page_state()
1525 if (!ps->mask) in identify_page_state()
1527 if ((page_flags & ps->mask) == ps->res) in identify_page_state()
1553 if (tk->size_shift) in unmap_and_kill()
1554 size = max(size, 1UL << tk->size_shift); in unmap_and_kill()
1558 * Unmap the largest mapping to avoid breaking up device-dax in unmap_and_kill()
1563 loff_t start = (index << PAGE_SHIFT) & ~(size - 1); in unmap_and_kill()
1580 * Pages instantiated by device-dax (not filesystem-dax) in mf_generic_kill_procs()
1594 return -EBUSY; in mf_generic_kill_procs()
1597 rc = -EOPNOTSUPP; in mf_generic_kill_procs()
1601 switch (pgmap->type) { in mf_generic_kill_procs()
1606 * with device-side memory. in mf_generic_kill_procs()
1608 rc = -ENXIO; in mf_generic_kill_procs()
1621 * Unlike System-RAM there is no possibility to swap in a in mf_generic_kill_procs()
1629 unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags); in mf_generic_kill_procs()
1637 * mf_dax_kill_procs - Collect and kill processes who are using this file range
1657 return -EBUSY; in mf_dax_kill_procs()
1677 * constructing singly linked list originated from ->private field of
1678 * SUBPAGE_INDEX_HWPOISON-th tail page.
1697 llist_for_each_safe(tnode, t, head->first) { in __free_raw_hwp_pages()
1701 SetPageHWPoison(p->page); in __free_raw_hwp_pages()
1714 int ret = TestSetPageHWPoison(hpage) ? -EHWPOISON : 0; in hugetlb_set_page_hwpoison()
1722 return -EHWPOISON; in hugetlb_set_page_hwpoison()
1724 llist_for_each_safe(tnode, t, head->first) { in hugetlb_set_page_hwpoison()
1727 if (p->page == page) in hugetlb_set_page_hwpoison()
1728 return -EHWPOISON; in hugetlb_set_page_hwpoison()
1733 raw_hwp->page = page; in hugetlb_set_page_hwpoison()
1734 llist_add(&raw_hwp->node, head); in hugetlb_set_page_hwpoison()
1785 * 0 - free hugepage
1786 * 1 - in-use hugepage
1787 * 2 - not a hugepage
1788 * -EBUSY - the hugepage is busy (try to retry)
1789 * -EHWPOISON - the hugepage is already hwpoisoned
1811 ret = -EBUSY; in __get_huge_page_for_hwpoison()
1817 ret = -EHWPOISON; in __get_huge_page_for_hwpoison()
1832 * PageHWPoison) should be done in single hugetlb_lock range.
1847 } else if (res == -EHWPOISON) { in try_memory_failure_hugetlb()
1854 } else if (res == -EBUSY) { in try_memory_failure_hugetlb()
1871 return -EOPNOTSUPP; in try_memory_failure_hugetlb()
1887 return res == MF_RECOVERED ? 0 : -EBUSY; in try_memory_failure_hugetlb()
1890 page_flags = head->flags; in try_memory_failure_hugetlb()
1894 res = -EBUSY; in try_memory_failure_hugetlb()
1920 int rc = -ENXIO; in memory_failure_dev_pagemap()
1937 rc = pgmap->ops->memory_failure(pgmap, pfn, 1, flags); in memory_failure_dev_pagemap()
1942 if (rc != -EOPNOTSUPP) in memory_failure_dev_pagemap()
1957 * memory_failure - Handle memory failure of a page.
1968 * detected by a background scrubber)
1974 * -EOPNOTSUPP for hwpoison_filter() filtered the error event,
1975 * < 0(except -EOPNOTSUPP) on failure.
2010 res = -ENXIO; in memory_failure()
2021 res = -EHWPOISON; in memory_failure()
2035 * 2) it's part of a non-compound high order page. in memory_failure()
2059 res = res == MF_RECOVERED ? 0 : -EBUSY; in memory_failure()
2062 res = -EBUSY; in memory_failure()
2067 res = -EBUSY; in memory_failure()
2089 res = -EBUSY; in memory_failure()
2096 * We ignore non-LRU pages for good reasons. in memory_failure()
2097 * - PG_locked is only well defined for LRU pages and a few others in memory_failure()
2098 * - to avoid races with __SetPageLocked() in memory_failure()
2099 * - to avoid races with __SetPageSlab*() (and more non-atomic ops) in memory_failure()
2108 * We're only intended to deal with the non-Compound page here. in memory_failure()
2123 res = -EBUSY; in memory_failure()
2134 page_flags = p->flags; in memory_failure()
2140 res = -EOPNOTSUPP; in memory_failure()
2164 res = -EBUSY; in memory_failure()
2171 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { in memory_failure()
2173 res = -EBUSY; in memory_failure()
2207 * memory_failure_queue - Schedule handling memory failure of a page.
2218 * detected by a background scrubber)
2232 spin_lock_irqsave(&mf_cpu->lock, proc_flags); in memory_failure_queue()
2233 if (kfifo_put(&mf_cpu->fifo, entry)) in memory_failure_queue()
2234 schedule_work_on(smp_processor_id(), &mf_cpu->work); in memory_failure_queue()
2238 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); in memory_failure_queue()
2252 spin_lock_irqsave(&mf_cpu->lock, proc_flags); in memory_failure_work_func()
2253 gotten = kfifo_get(&mf_cpu->fifo, &entry); in memory_failure_work_func()
2254 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); in memory_failure_work_func()
2266 * Used to avoid return-to-userspace racing with the memory_failure workqueue.
2273 cancel_work_sync(&mf_cpu->work); in memory_failure_queue_kick()
2274 memory_failure_work_func(&mf_cpu->work); in memory_failure_queue_kick()
2284 spin_lock_init(&mf_cpu->lock); in memory_failure_init()
2285 INIT_KFIFO(mf_cpu->fifo); in memory_failure_init()
2286 INIT_WORK(&mf_cpu->work, memory_failure_work_func); in memory_failure_init()
2302 * unpoison_memory - Unpoison a previously poisoned page
2305 * Software-unpoison a page that has been poisoned by
2308 * This is only done on the software-level, so it only works
2311 * Returns 0 for success, otherwise -errno.
2317 int ret = -EBUSY; in unpoison_memory()
2324 return -ENXIO; in unpoison_memory()
2334 ret = -EOPNOTSUPP; in unpoison_memory()
2357 unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n", in unpoison_memory()
2370 ret = -EBUSY; in unpoison_memory()
2374 ret = TestClearPageHWPoison(page) ? 0 : -EBUSY; in unpoison_memory()
2376 if (ret == -EHWPOISON) { in unpoison_memory()
2377 ret = put_page_back_buddy(p) ? 0 : -EBUSY; in unpoison_memory()
2385 ret = -EBUSY; in unpoison_memory()
2403 unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", in unpoison_memory()
2426 list_add(&page->lru, pagelist); in isolate_page()
2445 * soft_offline_in_use_page handles hugetlb-pages and non-hugetlb pages.
2446 * If the page is a non-dirty unmapped page-cache page, it simply invalidates.
2465 return -EBUSY; in soft_offline_in_use_page()
2501 ret = -EBUSY; in soft_offline_in_use_page()
2507 pfn, msg_page[huge], ret, &page->flags); in soft_offline_in_use_page()
2509 ret = -EBUSY; in soft_offline_in_use_page()
2513 pfn, msg_page[huge], page_count(page), &page->flags); in soft_offline_in_use_page()
2514 ret = -EBUSY; in soft_offline_in_use_page()
2526 * soft_offline_page - Soft offline a page.
2527 * @pfn: pfn to soft-offline
2531 * -EOPNOTSUPP for hwpoison_filter() filtered the error event
2558 return -ENXIO; in soft_offline_page()
2562 /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ in soft_offline_page()
2566 return -EIO; in soft_offline_page()
2588 return -EOPNOTSUPP; in soft_offline_page()