Lines Matching +full:parallel +full:- +full:memories
1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
70 #include <linux/memory-tiers.h>
90 #include "pgalloc-track.h"
95 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
177 if (current->rss_stat.count[i]) { in sync_mm_rss()
178 add_mm_counter(mm, i, current->rss_stat.count[i]); in sync_mm_rss()
179 current->rss_stat.count[i] = 0; in sync_mm_rss()
182 current->rss_stat.events = 0; in sync_mm_rss()
189 if (likely(task->mm == mm)) in add_mm_counter_fast()
190 task->rss_stat.count[member] += val; in add_mm_counter_fast()
195 #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
203 if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) in check_sync_rss_stat()
204 sync_mm_rss(task->mm); in check_sync_rss_stat()
227 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
255 if (end - 1 > ceiling - 1) in free_pmd_range()
261 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
289 if (end - 1 > ceiling - 1) in free_pud_range()
295 mm_dec_nr_puds(tlb->mm); in free_pud_range()
323 if (end - 1 > ceiling - 1) in free_p4d_range()
332 * This function frees user-level page tables of a process.
348 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
349 * of the address space and the top of it (using -1 for the in free_pgd_range()
353 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
364 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
378 if (end - 1 > ceiling - 1) in free_pgd_range()
379 end -= PMD_SIZE; in free_pgd_range()
380 if (addr > end - 1) in free_pgd_range()
387 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
400 MA_STATE(mas, mt, vma->vm_end, vma->vm_end); in free_pgtables()
403 unsigned long addr = vma->vm_start; in free_pgtables()
410 next = mas_find(&mas, ceiling - 1); in free_pgtables()
420 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
421 floor, next ? next->vm_start : ceiling); in free_pgtables()
426 while (next && next->vm_start <= vma->vm_end + PMD_SIZE in free_pgtables()
429 next = mas_find(&mas, ceiling - 1); in free_pgtables()
433 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
434 floor, next ? next->vm_start : ceiling); in free_pgtables()
454 * of a chain of data-dependent loads, meaning most CPUs (alpha in pmd_install()
456 * seen in-order. See the alpha page table accessors for the in pmd_install()
470 return -ENOMEM; in __pte_alloc()
482 return -ENOMEM; in __pte_alloc_kernel()
505 if (current->mm == mm) in add_mm_rss_vec()
514 * is found. For example, we might have a PFN-mapped pte in
522 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); in print_bad_pte()
551 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_pte()
555 current->comm, in print_bad_pte()
560 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_pte()
562 vma->vm_file, in print_bad_pte()
563 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_pte()
564 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_pte()
565 mapping ? mapping->a_ops->read_folio : NULL); in print_bad_pte()
571 * vm_normal_page -- This function gets the "struct page" associated with a pte.
591 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
620 if (vma->vm_ops && vma->vm_ops->find_special_page) in vm_normal_page()
621 return vma->vm_ops->find_special_page(vma, addr); in vm_normal_page()
622 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vm_normal_page()
643 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page()
644 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page()
650 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page()
651 if (pfn == vma->vm_pgoff + off) in vm_normal_page()
653 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page()
683 * in a direct-access (dax) mapping, so let's just replicate the in vm_normal_page_pmd()
686 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page_pmd()
687 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page_pmd()
693 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page_pmd()
694 if (pfn == vma->vm_pgoff + off) in vm_normal_page_pmd()
696 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page_pmd()
724 pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); in restore_exclusive_pte()
749 set_pte_at(vma->vm_mm, address, ptep, pte); in restore_exclusive_pte()
752 * No need to invalidate - it was non-present before. However in restore_exclusive_pte()
775 return -EBUSY; in try_restore_exclusive_pte()
789 unsigned long vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
796 return -EIO; in copy_nonpresent_pte()
799 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
801 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
802 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
803 &src_mm->mmlist); in copy_nonpresent_pte()
851 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
873 VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); in copy_nonpresent_pte()
875 return -EBUSY; in copy_nonpresent_pte()
876 return -ENOENT; in copy_nonpresent_pte()
880 * uffd-wp enabled, do sanity check. in copy_nonpresent_pte()
897 * and re-use the pte the traditional way.
899 * And if we need a pre-allocated page but don't yet have
914 return -EAGAIN; in copy_present_page()
928 pte = mk_pte(new_page, dst_vma->vm_page_prot); in copy_present_page()
931 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
933 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
938 * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page
946 struct mm_struct *src_mm = src_vma->vm_mm; in copy_present_pte()
947 unsigned long vm_flags = src_vma->vm_flags; in copy_present_pte()
994 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_pte()
1022 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
1023 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
1038 ret = -ENOMEM; in copy_pte_range()
1050 * We are holding two locks at this point - either of them in copy_pte_range()
1068 if (ret == -EIO) { in copy_pte_range()
1071 } else if (ret == -EBUSY) { in copy_pte_range()
1082 WARN_ON_ONCE(ret != -ENOENT); in copy_pte_range()
1088 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
1091 if (unlikely(ret == -EAGAIN)) in copy_pte_range()
1095 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1113 if (ret == -EIO) { in copy_pte_range()
1116 ret = -ENOMEM; in copy_pte_range()
1120 } else if (ret == -EBUSY) { in copy_pte_range()
1122 } else if (ret == -EAGAIN) { in copy_pte_range()
1125 return -ENOMEM; in copy_pte_range()
1146 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1147 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1153 return -ENOMEM; in copy_pmd_range()
1160 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1163 if (err == -ENOMEM) in copy_pmd_range()
1164 return -ENOMEM; in copy_pmd_range()
1173 return -ENOMEM; in copy_pmd_range()
1183 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1184 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1190 return -ENOMEM; in copy_pud_range()
1197 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1200 if (err == -ENOMEM) in copy_pud_range()
1201 return -ENOMEM; in copy_pud_range()
1210 return -ENOMEM; in copy_pud_range()
1220 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1226 return -ENOMEM; in copy_p4d_range()
1234 return -ENOMEM; in copy_p4d_range()
1248 * Always copy pgtables when dst_vma has uffd-wp enabled even if it's in vma_needs_copy()
1249 * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable in vma_needs_copy()
1250 * contains uffd-wp protection information, that's something we can't in vma_needs_copy()
1256 if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vma_needs_copy()
1259 if (src_vma->anon_vma) in vma_needs_copy()
1276 unsigned long addr = src_vma->vm_start; in copy_page_range()
1277 unsigned long end = src_vma->vm_end; in copy_page_range()
1278 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1279 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1290 if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { in copy_page_range()
1306 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1320 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1332 ret = -ENOMEM; in copy_page_range()
1338 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1352 return details->even_cows; in should_zap_cows()
1366 /* Otherwise we should only zap non-anon pages */ in should_zap_page()
1375 return details->zap_flags & ZAP_FLAG_DROP_MARKER; in zap_drop_file_uffd_wp()
1379 * This function makes sure that we'll replace the none pte with an uffd-wp
1400 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1430 tlb->fullmm); in zap_pte_range()
1443 likely(!(vma->vm_flags & VM_SEQ_READ))) in zap_pte_range()
1446 rss[mm_counter(page)]--; in zap_pte_range()
1467 * consider uffd-wp bit when zap. For more information, in zap_pte_range()
1471 rss[mm_counter(page)]--; in zap_pte_range()
1479 rss[MM_SWAPENTS]--; in zap_pte_range()
1486 rss[mm_counter(page)]--; in zap_pte_range()
1488 /* Only drop the uffd-wp marker if explicitly requested */ in zap_pte_range()
1499 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in zap_pte_range()
1542 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1547 } else if (details && details->single_folio && in zap_pmd_range()
1548 folio_test_pmd_mappable(details->single_folio) && in zap_pmd_range()
1549 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1550 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1588 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1589 mmap_assert_locked(tlb->mm); in zap_pud_range()
1634 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
1650 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
1653 if (start >= vma->vm_end) in unmap_single_vma()
1655 end = min(vma->vm_end, end_addr); in unmap_single_vma()
1656 if (end <= vma->vm_start) in unmap_single_vma()
1659 if (vma->vm_file) in unmap_single_vma()
1662 if (unlikely(vma->vm_flags & VM_PFNMAP)) in unmap_single_vma()
1668 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
1669 * should be non-null for valid hugetlb area. in unmap_single_vma()
1672 * hugetlbfs ->mmap method fails, in unmap_single_vma()
1673 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
1678 if (vma->vm_file) { in unmap_single_vma()
1680 details->zap_flags : 0; in unmap_single_vma()
1690 * unmap_vmas - unmap a range of memory covered by a list of vma's
1705 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1715 /* Careful - we need to zap private pages too! */ in unmap_vmas()
1718 MA_STATE(mas, mt, vma->vm_end, vma->vm_end); in unmap_vmas()
1720 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, in unmap_vmas()
1725 } while ((vma = mas_find(&mas, end_addr - 1)) != NULL); in unmap_vmas()
1730 * zap_page_range - remove user pages in a given range
1740 struct maple_tree *mt = &vma->vm_mm->mm_mt; in zap_page_range()
1744 MA_STATE(mas, mt, vma->vm_end, vma->vm_end); in zap_page_range()
1747 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, in zap_page_range()
1749 tlb_gather_mmu(&tlb, vma->vm_mm); in zap_page_range()
1750 update_hiwater_rss(vma->vm_mm); in zap_page_range()
1754 } while ((vma = mas_find(&mas, end - 1)) != NULL); in zap_page_range()
1760 * zap_page_range_single - remove user pages in a given range
1776 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, in zap_page_range_single()
1781 tlb_gather_mmu(&tlb, vma->vm_mm); in zap_page_range_single()
1782 update_hiwater_rss(vma->vm_mm); in zap_page_range_single()
1785 * unmap 'address-end' not 'range.start-range.end' as range in zap_page_range_single()
1794 * zap_vma_ptes - remove ptes mapping the vma
1808 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
1850 return -EINVAL; in validate_page_before_insert()
1859 return -EBUSY; in insert_page_into_pte_locked()
1862 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); in insert_page_into_pte_locked()
1864 set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot)); in insert_page_into_pte_locked()
1885 retval = -ENOMEM; in insert_page()
1886 pte = get_locked_pte(vma->vm_mm, addr, &ptl); in insert_page()
1902 return -EINVAL; in insert_page_in_batch_locked()
1918 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
1924 ret = -EFAULT; in insert_pages()
1930 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
1933 ret = -ENOMEM; in insert_pages()
1948 remaining_pages_total -= pte_idx; in insert_pages()
1955 pages_to_write_in_pmd -= batch_size; in insert_pages()
1956 remaining_pages_total -= batch_size; in insert_pages()
1968 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
1986 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
1988 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
1989 return -EFAULT; in vm_insert_pages()
1990 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
1991 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
1992 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
1993 vma->vm_flags |= VM_MIXEDMAP; in vm_insert_pages()
1996 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
1999 int err = -EINVAL; in vm_insert_pages()
2006 *num = pgcount - idx; in vm_insert_pages()
2013 * vm_insert_page - insert single page into user vma
2034 * Usually this function is called from f_op->mmap() handler
2035 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
2037 * function from other places, for example from page-fault handler.
2044 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
2045 return -EFAULT; in vm_insert_page()
2047 return -EINVAL; in vm_insert_page()
2048 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
2049 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
2050 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
2051 vma->vm_flags |= VM_MIXEDMAP; in vm_insert_page()
2053 return insert_page(vma, addr, page, vma->vm_page_prot); in vm_insert_page()
2058 * __vm_map_pages - maps range of kernel pages into user vma
2072 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
2077 return -ENXIO; in __vm_map_pages()
2080 if (count > num - offset) in __vm_map_pages()
2081 return -ENXIO; in __vm_map_pages()
2094 * vm_map_pages - maps range of kernel pages starts with non zero offset
2114 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
2119 * vm_map_pages_zero - map range of kernel pages starts with zero offset
2141 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
2192 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2199 * to override pgprot on a per-page basis.
2207 * a value of @pgprot different from that of @vma->vm_page_prot.
2221 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2222 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2224 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2225 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2227 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2241 * vmf_insert_pfn - insert single pfn into user vma
2249 * This function should only be called from a vm_ops->fault handler, and
2263 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2270 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2289 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2319 if (err == -ENOMEM) in __vm_insert_mixed()
2321 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2328 * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot
2335 * to override pgprot on a per-page basis.
2337 * Typically this function should be used by drivers to set caching- and
2338 * encryption bits different than those of @vma->vm_page_prot, because
2339 * the caching- or encryption mode may not be known at mmap() time.
2340 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2343 * functions that don't touch caching- or encryption bits, using pte_modify()
2345 * Also when new page-table entries are created, this is only done using the
2346 * fault() callback, and never using the value of vma->vm_page_prot,
2347 * except for page-table entries that point to anonymous pages as the result
2363 return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false); in vmf_insert_mixed()
2375 return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true); in vmf_insert_mixed_mkwrite()
2382 * in null mappings (currently treated as "copy-on-access")
2394 return -ENOMEM; in remap_pte_range()
2399 err = -EACCES; in remap_pte_range()
2418 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2421 return -ENOMEM; in remap_pmd_range()
2441 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2444 return -ENOMEM; in remap_pud_range()
2463 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2466 return -ENOMEM; in remap_p4d_range()
2479 * must have pre-validated the caching bits of the pgprot_t.
2487 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range_notrack()
2491 return -EINVAL; in remap_pfn_range_notrack()
2506 * There's a horrible special case to handle copy-on-write in remap_pfn_range_notrack()
2508 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in remap_pfn_range_notrack()
2511 if (is_cow_mapping(vma->vm_flags)) { in remap_pfn_range_notrack()
2512 if (addr != vma->vm_start || end != vma->vm_end) in remap_pfn_range_notrack()
2513 return -EINVAL; in remap_pfn_range_notrack()
2514 vma->vm_pgoff = pfn; in remap_pfn_range_notrack()
2517 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; in remap_pfn_range_notrack()
2520 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range_notrack()
2535 * remap_pfn_range - remap kernel memory to userspace
2553 return -EINVAL; in remap_pfn_range()
2563 * vm_iomap_memory - remap memory to userspace
2572 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
2573 * whatever write-combining details or similar.
2583 return -EINVAL; in vm_iomap_memory()
2585 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
2593 return -EINVAL; in vm_iomap_memory()
2596 if (vma->vm_pgoff > pages) in vm_iomap_memory()
2597 return -EINVAL; in vm_iomap_memory()
2598 pfn += vma->vm_pgoff; in vm_iomap_memory()
2599 pages -= vma->vm_pgoff; in vm_iomap_memory()
2602 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
2604 return -EINVAL; in vm_iomap_memory()
2607 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
2625 return -ENOMEM; in apply_to_pte_range()
2668 return -ENOMEM; in apply_to_pmd_range()
2677 return -EINVAL; in apply_to_pmd_range()
2704 return -ENOMEM; in apply_to_pud_range()
2713 return -EINVAL; in apply_to_pud_range()
2740 return -ENOMEM; in apply_to_p4d_range()
2749 return -EINVAL; in apply_to_p4d_range()
2775 return -EINVAL; in __apply_to_page_range()
2783 return -EINVAL; in __apply_to_page_range()
2828 * read non-atomically. Before making any commitment, on those architectures
2839 spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); in pte_unmap_same()
2841 same = pte_same(*vmf->pte, vmf->orig_pte); in pte_unmap_same()
2845 pte_unmap(vmf->pte); in pte_unmap_same()
2846 vmf->pte = NULL; in pte_unmap_same()
2857 struct vm_area_struct *vma = vmf->vma; in __wp_page_copy_user()
2858 struct mm_struct *mm = vma->vm_mm; in __wp_page_copy_user()
2859 unsigned long addr = vmf->address; in __wp_page_copy_user()
2868 * a "struct page" for it. We do a best-effort copy by in __wp_page_copy_user()
2870 * fails, we just zero-fill it. Live with it. in __wp_page_copy_user()
2879 if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { in __wp_page_copy_user()
2882 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
2884 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { in __wp_page_copy_user()
2889 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
2894 entry = pte_mkyoung(vmf->orig_pte); in __wp_page_copy_user()
2895 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in __wp_page_copy_user()
2896 update_mmu_cache(vma, addr, vmf->pte); in __wp_page_copy_user()
2909 /* Re-validate under PTL if the page is still mapped */ in __wp_page_copy_user()
2910 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
2912 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { in __wp_page_copy_user()
2914 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
2926 * use-case in __wp_page_copy_user()
2938 pte_unmap_unlock(vmf->pte, vmf->ptl); in __wp_page_copy_user()
2947 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
2950 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
2968 struct page *page = vmf->page; in do_page_mkwrite()
2969 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
2971 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
2973 if (vmf->vma->vm_file && in do_page_mkwrite()
2974 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
2977 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
2979 vmf->flags = old_flags; in do_page_mkwrite()
2984 if (!page->mapping) { in do_page_mkwrite()
3001 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
3003 struct page *page = vmf->page; in fault_dirty_shared_page()
3005 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
3010 * Take a local copy of the address_space - page.mapping may be zeroed in fault_dirty_shared_page()
3012 * pinned by vma->vm_file's reference. We rely on unlock_page()'s in fault_dirty_shared_page()
3019 file_update_time(vma->vm_file); in fault_dirty_shared_page()
3050 * any related book-keeping.
3053 __releases(vmf->ptl) in wp_page_reuse()
3055 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
3056 struct page *page = vmf->page; in wp_page_reuse()
3059 VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); in wp_page_reuse()
3068 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
3070 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
3071 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
3073 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
3074 update_mmu_cache(vma, vmf->address, vmf->pte); in wp_page_reuse()
3075 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
3088 * - Allocate a page, copy the content of the old page to the new one.
3089 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
3090 * - Take the PTL. If the pte changed, bail out and release the allocated page
3091 * - If the pte is still the way we remember it, update the page table and all
3092 * relevant references. This includes dropping the reference the page-table
3094 * - In any case, unlock the PTL and drop the reference we took to the old page.
3098 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_page_copy()
3099 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
3100 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
3101 struct page *old_page = vmf->page; in wp_page_copy()
3112 if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { in wp_page_copy()
3114 vmf->address); in wp_page_copy()
3119 vmf->address); in wp_page_copy()
3126 * it's fine. If not, userspace would re-fault on in wp_page_copy()
3147 vmf->address & PAGE_MASK, in wp_page_copy()
3148 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
3152 * Re-check the pte - we dropped the lock in wp_page_copy()
3154 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
3155 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { in wp_page_copy()
3165 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
3166 entry = mk_pte(new_page, vma->vm_page_prot); in wp_page_copy()
3169 if (pte_soft_dirty(vmf->orig_pte)) in wp_page_copy()
3171 if (pte_uffd_wp(vmf->orig_pte)) in wp_page_copy()
3184 ptep_clear_flush_notify(vma, vmf->address, vmf->pte); in wp_page_copy()
3185 page_add_new_anon_rmap(new_page, vma, vmf->address); in wp_page_copy()
3193 set_pte_at_notify(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
3194 update_mmu_cache(vma, vmf->address, vmf->pte); in wp_page_copy()
3225 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
3231 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3233 * No need to double call mmu_notifier->invalidate_range() callback as in wp_page_copy()
3256 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3262 * shared mapping due to PTE being read-only once the mapped page is prepared.
3273 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3274 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3275 &vmf->ptl); in finish_mkwrite_fault()
3280 if (!pte_same(*vmf->pte, vmf->orig_pte)) { in finish_mkwrite_fault()
3281 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3282 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3295 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3297 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3300 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3301 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3302 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3312 __releases(vmf->ptl) in wp_page_shared()
3314 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3317 get_page(vmf->page); in wp_page_shared()
3319 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3322 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3326 put_page(vmf->page); in wp_page_shared()
3331 unlock_page(vmf->page); in wp_page_shared()
3332 put_page(vmf->page); in wp_page_shared()
3337 lock_page(vmf->page); in wp_page_shared()
3340 put_page(vmf->page); in wp_page_shared()
3352 * shared-page counter for the old page.
3355 * done by the caller (the low-level page fault routine in most cases).
3363 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3368 __releases(vmf->ptl) in do_wp_page()
3370 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_wp_page()
3371 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
3374 VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE)); in do_wp_page()
3375 VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE)); in do_wp_page()
3378 if (userfaultfd_pte_wp(vma, *vmf->pte)) { in do_wp_page()
3379 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3384 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
3387 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
3388 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
3389 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
3392 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
3393 if (!vmf->page) { in do_wp_page()
3395 /* No anonymous page -> nothing to do. */ in do_wp_page()
3396 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3405 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
3407 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == in do_wp_page()
3411 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3419 folio = page_folio(vmf->page); in do_wp_page()
3425 if (PageAnonExclusive(vmf->page)) in do_wp_page()
3458 page_move_anon_rmap(vmf->page, vma); in do_wp_page()
3462 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3468 /* No anonymous page -> nothing to do. */ in do_wp_page()
3469 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3471 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == in do_wp_page()
3479 get_page(vmf->page); in do_wp_page()
3481 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3483 if (PageKsm(vmf->page)) in do_wp_page()
3493 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
3505 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
3506 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
3511 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3512 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3518 * unmap_mapping_folio() - Unmap single folio from processes.
3530 struct address_space *mapping = folio->mapping; in unmap_mapping_folio()
3537 first_index = folio->index; in unmap_mapping_folio()
3538 last_index = folio->index + folio_nr_pages(folio) - 1; in unmap_mapping_folio()
3545 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_folio()
3546 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_folio()
3552 * unmap_mapping_pages() - Unmap pages from processes.
3568 pgoff_t last_index = start + nr - 1; in unmap_mapping_pages()
3575 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
3576 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_pages()
3583 * unmap_mapping_range - unmap the portion of all mmaps in the specified
3603 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3608 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3610 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
3622 struct folio *folio = page_folio(vmf->page); in remove_device_exclusive_entry()
3623 struct vm_area_struct *vma = vmf->vma; in remove_device_exclusive_entry()
3626 if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) in remove_device_exclusive_entry()
3629 vma->vm_mm, vmf->address & PAGE_MASK, in remove_device_exclusive_entry()
3630 (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); in remove_device_exclusive_entry()
3633 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in remove_device_exclusive_entry()
3634 &vmf->ptl); in remove_device_exclusive_entry()
3635 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) in remove_device_exclusive_entry()
3636 restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte); in remove_device_exclusive_entry()
3638 pte_unmap_unlock(vmf->pte, vmf->ptl); in remove_device_exclusive_entry()
3651 if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) || in should_try_to_free_swap()
3666 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in pte_marker_clear()
3667 vmf->address, &vmf->ptl); in pte_marker_clear()
3669 * Be careful so that we will only recover a special uffd-wp pte into a in pte_marker_clear()
3672 if (is_pte_marker(*vmf->pte)) in pte_marker_clear()
3673 pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); in pte_marker_clear()
3674 pte_unmap_unlock(vmf->pte, vmf->ptl); in pte_marker_clear()
3679 * This is actually a page-missing access, but with uffd-wp special pte
3680 * installed. It means this pte was wr-protected before being unmapped.
3686 * got unregistered - we can simply clear them. We can also do that in pte_marker_handle_uffd_wp()
3687 * proactively when e.g. when we do UFFDIO_UNREGISTER upon some uffd-wp in pte_marker_handle_uffd_wp()
3690 if (unlikely(!userfaultfd_wp(vmf->vma) || vma_is_anonymous(vmf->vma))) in pte_marker_handle_uffd_wp()
3699 swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte); in handle_pte_marker()
3703 * PTE markers should always be with file-backed memories, and the in handle_pte_marker()
3707 if (WARN_ON_ONCE(vma_is_anonymous(vmf->vma) || !marker)) in handle_pte_marker()
3718 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3727 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
3742 entry = pte_to_swp_entry(vmf->orig_pte); in do_swap_page()
3745 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
3746 vmf->address); in do_swap_page()
3748 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
3751 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
3752 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3753 vmf->address, &vmf->ptl); in do_swap_page()
3754 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_swap_page()
3755 spin_unlock(vmf->ptl); in do_swap_page()
3763 get_page(vmf->page); in do_swap_page()
3764 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3765 ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
3766 put_page(vmf->page); in do_swap_page()
3774 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
3785 folio = swap_cache_get_folio(entry, vma, vmf->address); in do_swap_page()
3791 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
3795 vma, vmf->address, false); in do_swap_page()
3796 page = &folio->page; in do_swap_page()
3802 vma->vm_mm, GFP_KERNEL, in do_swap_page()
3818 folio->private = NULL; in do_swap_page()
3833 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3834 vmf->address, &vmf->ptl); in do_swap_page()
3835 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) in do_swap_page()
3843 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
3853 locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags); in do_swap_page()
3874 * page->index of !PageKSM() pages would be nonlinear inside the in do_swap_page()
3875 * anon VMA -- PageKSM() is lost on actual swapout. in do_swap_page()
3877 page = ksm_might_need_to_copy(page, vma, vmf->address); in do_swap_page()
3890 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && in do_swap_page()
3900 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
3901 &vmf->ptl); in do_swap_page()
3902 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) in do_swap_page()
3930 exclusive = pte_swp_exclusive(vmf->orig_pte); in do_swap_page()
3934 * swapcache -> certainly exclusive. in do_swap_page()
3938 data_race(si->flags & SWP_STABLE_WRITES)) { in do_swap_page()
3967 if (should_try_to_free_swap(folio, vma, vmf->flags)) in do_swap_page()
3970 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_swap_page()
3971 dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); in do_swap_page()
3972 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
3982 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
3984 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
3990 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
3992 if (pte_swp_uffd_wp(vmf->orig_pte)) { in do_swap_page()
3996 vmf->orig_pte = pte; in do_swap_page()
4000 page_add_new_anon_rmap(page, vma, vmf->address); in do_swap_page()
4003 page_add_anon_rmap(page, vma, vmf->address, rmap_flags); in do_swap_page()
4008 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_swap_page()
4009 arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); in do_swap_page()
4019 * parallel locked swapcache. in do_swap_page()
4025 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4032 /* No need to invalidate - it was non-present before */ in do_swap_page()
4033 update_mmu_cache(vma, vmf->address, vmf->pte); in do_swap_page()
4035 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4041 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4056 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4062 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
4067 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
4068 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
4077 * parallel threads are excluded by other means. in do_anonymous_page()
4081 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
4085 if (unlikely(pmd_trans_unstable(vmf->pmd))) in do_anonymous_page()
4088 /* Use the zero-page for reads */ in do_anonymous_page()
4089 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
4090 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
4091 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
4092 vma->vm_page_prot)); in do_anonymous_page()
4093 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
4094 vmf->address, &vmf->ptl); in do_anonymous_page()
4095 if (!pte_none(*vmf->pte)) { in do_anonymous_page()
4096 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
4099 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4104 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4113 page = alloc_zeroed_user_highpage_movable(vma, vmf->address); in do_anonymous_page()
4117 if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) in do_anonymous_page()
4128 entry = mk_pte(page, vma->vm_page_prot); in do_anonymous_page()
4130 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
4133 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_anonymous_page()
4134 &vmf->ptl); in do_anonymous_page()
4135 if (!pte_none(*vmf->pte)) { in do_anonymous_page()
4136 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
4140 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4146 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4151 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_anonymous_page()
4152 page_add_new_anon_rmap(page, vma, vmf->address); in do_anonymous_page()
4155 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); in do_anonymous_page()
4157 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
4158 update_mmu_cache(vma, vmf->address, vmf->pte); in do_anonymous_page()
4160 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4173 * released depending on flags and vma->vm_ops->fault() return value.
4178 struct vm_area_struct *vma = vmf->vma; in __do_fault()
4196 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
4197 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
4198 if (!vmf->prealloc_pte) in __do_fault()
4202 ret = vma->vm_ops->fault(vmf); in __do_fault()
4207 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
4208 struct page *page = vmf->page; in __do_fault()
4213 page->index, 1, false); in __do_fault()
4220 vmf->page = NULL; in __do_fault()
4225 lock_page(vmf->page); in __do_fault()
4227 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); in __do_fault()
4235 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
4237 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
4242 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
4243 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
4248 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
4249 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
4250 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
4275 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
4276 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
4277 if (!vmf->prealloc_pte) in do_set_pmd()
4281 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
4282 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
4288 entry = mk_huge_pmd(page, vma->vm_page_prot); in do_set_pmd()
4292 add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); in do_set_pmd()
4301 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
4303 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
4309 spin_unlock(vmf->ptl); in do_set_pmd()
4321 struct vm_area_struct *vma = vmf->vma; in do_set_pte()
4322 bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte); in do_set_pte()
4323 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pte()
4324 bool prefault = vmf->address != addr; in do_set_pte()
4328 entry = mk_pte(page, vma->vm_page_prot); in do_set_pte()
4339 /* copy-on-write page */ in do_set_pte()
4340 if (write && !(vma->vm_flags & VM_SHARED)) { in do_set_pte()
4341 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_set_pte()
4345 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); in do_set_pte()
4348 set_pte_at(vma->vm_mm, addr, vmf->pte, entry); in do_set_pte()
4353 if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID) in vmf_pte_changed()
4354 return !pte_same(*vmf->pte, vmf->orig_pte); in vmf_pte_changed()
4356 return !pte_none(*vmf->pte); in vmf_pte_changed()
4360 * finish_fault - finish page fault once we have prepared the page to fault
4376 struct vm_area_struct *vma = vmf->vma; in finish_fault()
4381 if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) in finish_fault()
4382 page = vmf->cow_page; in finish_fault()
4384 page = vmf->page; in finish_fault()
4390 if (!(vma->vm_flags & VM_SHARED)) { in finish_fault()
4391 ret = check_stable_address_space(vma->vm_mm); in finish_fault()
4396 if (pmd_none(*vmf->pmd)) { in finish_fault()
4403 if (vmf->prealloc_pte) in finish_fault()
4404 pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); in finish_fault()
4405 else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) in finish_fault()
4413 if (pmd_devmap_trans_unstable(vmf->pmd)) in finish_fault()
4416 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in finish_fault()
4417 vmf->address, &vmf->ptl); in finish_fault()
4419 /* Re-check under ptl */ in finish_fault()
4421 do_set_pte(vmf, page, vmf->address); in finish_fault()
4423 /* no need to invalidate: a not-present page won't be cached */ in finish_fault()
4424 update_mmu_cache(vma, vmf->address, vmf->pte); in finish_fault()
4428 update_mmu_tlb(vma, vmf->address, vmf->pte); in finish_fault()
4432 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
4453 return -EINVAL; in fault_around_bytes_set()
4477 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
4478 * not ready to be mapped: not up-to-date, locked, etc.
4494 unsigned long address = vmf->address, nr_pages, mask; in do_fault_around()
4495 pgoff_t start_pgoff = vmf->pgoff; in do_fault_around()
4500 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; in do_fault_around()
4502 address = max(address & mask, vmf->vma->vm_start); in do_fault_around()
4503 off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); in do_fault_around()
4504 start_pgoff -= off; in do_fault_around()
4510 end_pgoff = start_pgoff - in do_fault_around()
4511 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + in do_fault_around()
4512 PTRS_PER_PTE - 1; in do_fault_around()
4513 end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, in do_fault_around()
4514 start_pgoff + nr_pages - 1); in do_fault_around()
4516 if (pmd_none(*vmf->pmd)) { in do_fault_around()
4517 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
4518 if (!vmf->prealloc_pte) in do_fault_around()
4522 return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); in do_fault_around()
4525 /* Return true if we should do read fault-around, false otherwise */
4528 /* No ->map_pages? No way to fault around... */ in should_fault_around()
4529 if (!vmf->vma->vm_ops->map_pages) in should_fault_around()
4532 if (uffd_disable_fault_around(vmf->vma)) in should_fault_around()
4543 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
4558 unlock_page(vmf->page); in do_read_fault()
4560 put_page(vmf->page); in do_read_fault()
4566 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
4572 vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); in do_cow_fault()
4573 if (!vmf->cow_page) in do_cow_fault()
4576 if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm, in do_cow_fault()
4578 put_page(vmf->cow_page); in do_cow_fault()
4581 cgroup_throttle_swaprate(vmf->cow_page, GFP_KERNEL); in do_cow_fault()
4589 copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); in do_cow_fault()
4590 __SetPageUptodate(vmf->cow_page); in do_cow_fault()
4593 unlock_page(vmf->page); in do_cow_fault()
4594 put_page(vmf->page); in do_cow_fault()
4599 put_page(vmf->cow_page); in do_cow_fault()
4605 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
4616 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
4617 unlock_page(vmf->page); in do_shared_fault()
4621 put_page(vmf->page); in do_shared_fault()
4629 unlock_page(vmf->page); in do_shared_fault()
4630 put_page(vmf->page); in do_shared_fault()
4639 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4648 struct vm_area_struct *vma = vmf->vma; in do_fault()
4649 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
4655 if (!vma->vm_ops->fault) { in do_fault()
4660 if (unlikely(!pmd_present(*vmf->pmd))) in do_fault()
4663 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, in do_fault()
4664 vmf->pmd, in do_fault()
4665 vmf->address, in do_fault()
4666 &vmf->ptl); in do_fault()
4674 if (unlikely(pte_none(*vmf->pte))) in do_fault()
4679 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
4681 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
4683 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
4689 if (vmf->prealloc_pte) { in do_fault()
4690 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
4691 vmf->prealloc_pte = NULL; in do_fault()
4712 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
4718 bool was_writable = pte_savedwrite(vmf->orig_pte); in do_numa_page()
4726 vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); in do_numa_page()
4727 spin_lock(vmf->ptl); in do_numa_page()
4728 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_numa_page()
4729 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4734 old_pte = ptep_get(vmf->pte); in do_numa_page()
4735 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
4737 page = vm_normal_page(vma, vmf->address, pte); in do_numa_page()
4741 /* TODO: handle PTE-mapped THP */ in do_numa_page()
4760 if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) in do_numa_page()
4770 last_cpupid = (-1 & LAST_CPUPID_MASK); in do_numa_page()
4773 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, in do_numa_page()
4779 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4787 vmf->pte = pte_offset_map(vmf->pmd, vmf->address); in do_numa_page()
4788 spin_lock(vmf->ptl); in do_numa_page()
4789 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_numa_page()
4790 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4803 * non-accessible ptes, some can allow access by kernel mode. in do_numa_page()
4805 old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); in do_numa_page()
4806 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
4810 ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); in do_numa_page()
4811 update_mmu_cache(vma, vmf->address, vmf->pte); in do_numa_page()
4812 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4818 if (vma_is_anonymous(vmf->vma)) in create_huge_pmd()
4820 if (vmf->vma->vm_ops->huge_fault) in create_huge_pmd()
4821 return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); in create_huge_pmd()
4828 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_huge_pmd()
4830 if (vma_is_anonymous(vmf->vma)) { in wp_huge_pmd()
4832 userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd)) in wp_huge_pmd()
4836 if (vmf->vma->vm_ops->huge_fault) { in wp_huge_pmd()
4837 vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); in wp_huge_pmd()
4843 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
4844 __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); in wp_huge_pmd()
4854 if (vma_is_anonymous(vmf->vma)) in create_huge_pud()
4856 if (vmf->vma->vm_ops->huge_fault) in create_huge_pud()
4857 return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); in create_huge_pud()
4867 if (vma_is_anonymous(vmf->vma)) in wp_huge_pud()
4869 if (vmf->vma->vm_ops->huge_fault) { in wp_huge_pud()
4870 vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); in wp_huge_pud()
4876 /* COW or write-notify not handled on PUD level: split pud.*/ in wp_huge_pud()
4877 __split_huge_pud(vmf->vma, vmf->pud, vmf->address); in wp_huge_pud()
4891 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
4901 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
4903 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
4908 vmf->pte = NULL; in handle_pte_fault()
4909 vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
4923 if (pmd_devmap_trans_unstable(vmf->pmd)) in handle_pte_fault()
4931 vmf->pte = pte_offset_map(vmf->pmd, vmf->address); in handle_pte_fault()
4932 vmf->orig_pte = *vmf->pte; in handle_pte_fault()
4933 vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
4937 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and in handle_pte_fault()
4944 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
4945 pte_unmap(vmf->pte); in handle_pte_fault()
4946 vmf->pte = NULL; in handle_pte_fault()
4950 if (!vmf->pte) { in handle_pte_fault()
4951 if (vma_is_anonymous(vmf->vma)) in handle_pte_fault()
4957 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
4960 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
4963 vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); in handle_pte_fault()
4964 spin_lock(vmf->ptl); in handle_pte_fault()
4965 entry = vmf->orig_pte; in handle_pte_fault()
4966 if (unlikely(!pte_same(*vmf->pte, entry))) { in handle_pte_fault()
4967 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
4970 if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { in handle_pte_fault()
4973 else if (likely(vmf->flags & FAULT_FLAG_WRITE)) in handle_pte_fault()
4977 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
4978 vmf->flags & FAULT_FLAG_WRITE)) { in handle_pte_fault()
4979 update_mmu_cache(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
4982 if (vmf->flags & FAULT_FLAG_TRIED) in handle_pte_fault()
4990 if (vmf->flags & FAULT_FLAG_WRITE) in handle_pte_fault()
4991 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); in handle_pte_fault()
4994 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
5015 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
5016 unsigned long vm_flags = vma->vm_flags; in __handle_mm_fault()
5100 * mm_account_fault - Do page fault accounting
5103 * of perf event counters, but we'll still do the per-task accounting to
5112 * still be in per-arch page fault handlers at the entry of page fault.
5123 * - Unsuccessful faults (e.g. when the address wasn't valid). That in mm_account_fault()
5128 * - Incomplete faults (VM_FAULT_RETRY). They will only be counted in mm_account_fault()
5142 current->maj_flt++; in mm_account_fault()
5144 current->min_flt++; in mm_account_fault()
5164 current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)); in lru_gen_enter_fault()
5169 current->in_lru_fault = false; in lru_gen_exit_fault()
5195 count_memcg_event_mm(vma->vm_mm, PGFAULT); in handle_mm_fault()
5215 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
5242 * We've already handled the fast-path in-line.
5248 return -ENOMEM; in __p4d_alloc()
5250 spin_lock(&mm->page_table_lock); in __p4d_alloc()
5257 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
5265 * We've already handled the fast-path in-line.
5271 return -ENOMEM; in __pud_alloc()
5273 spin_lock(&mm->page_table_lock); in __pud_alloc()
5280 spin_unlock(&mm->page_table_lock); in __pud_alloc()
5288 * We've already handled the fast-path in-line.
5295 return -ENOMEM; in __pmd_alloc()
5311 * follow_pte - look up PTE at a user virtual address
5327 * it is not a good general-purpose API.
5329 * Return: zero on success, -ve otherwise.
5366 return -EINVAL; in follow_pte()
5371 * follow_pfn - look up PFN at a user virtual address
5381 * Return: zero and the pfn at @pfn on success, -ve otherwise.
5386 int ret = -EINVAL; in follow_pfn()
5390 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfn()
5393 ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); in follow_pfn()
5407 int ret = -EINVAL; in follow_phys()
5411 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_phys()
5414 if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) in follow_phys()
5432 * generic_access_phys - generic implementation for iomem mmap access
5452 int ret = -EINVAL; in generic_access_phys()
5454 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in generic_access_phys()
5455 return -EINVAL; in generic_access_phys()
5458 if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) in generic_access_phys()
5459 return -EINVAL; in generic_access_phys()
5467 return -EINVAL; in generic_access_phys()
5471 return -ENOMEM; in generic_access_phys()
5473 if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) in generic_access_phys()
5529 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
5530 ret = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
5538 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
5539 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
5540 bytes = PAGE_SIZE-offset; in __access_remote_vm()
5554 len -= bytes; in __access_remote_vm()
5560 return buf - old_buf; in __access_remote_vm()
5564 * access_remote_vm - access another process' address space
5609 struct mm_struct *mm = current->mm; in print_vma_addr()
5619 if (vma && vma->vm_file) { in print_vma_addr()
5620 struct file *f = vma->vm_file; in print_vma_addr()
5629 vma->vm_start, in print_vma_addr()
5630 vma->vm_end - vma->vm_start); in print_vma_addr()
5644 if (current->mm) in __might_fault()
5645 might_lock_read(¤t->mm->mmap_lock); in __might_fault()
5664 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in process_huge_page()
5668 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
5674 for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { in process_huge_page()
5680 base = pages_per_huge_page - 2 * (pages_per_huge_page - n); in process_huge_page()
5681 l = pages_per_huge_page - n; in process_huge_page()
5689 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
5694 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
5729 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in clear_huge_page()
5767 copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, in copy_subpage()
5768 addr, copy_arg->vma); in copy_subpage()
5776 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in copy_user_huge_page()
5815 ret_val -= (PAGE_SIZE - rc); in copy_huge_page_from_user()
5833 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
5844 page->ptl = ptl; in ptlock_alloc()
5850 kmem_cache_free(page_ptl_cachep, page->ptl); in ptlock_free()