Lines Matching +full:vm +full:- +full:map
1 // SPDX-License-Identifier: GPL-2.0-only
5 * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
50 #include "pgalloc-track.h"
53 static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
104 return -ENOMEM; in vmap_pte_range()
136 if ((end - addr) != PMD_SIZE) in vmap_try_huge_pmd()
160 return -ENOMEM; in vmap_pmd_range()
171 return -ENOMEM; in vmap_pmd_range()
172 } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pmd_range()
186 if ((end - addr) != PUD_SIZE) in vmap_try_huge_pud()
210 return -ENOMEM; in vmap_pud_range()
222 return -ENOMEM; in vmap_pud_range()
223 } while (pud++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pud_range()
237 if ((end - addr) != P4D_SIZE) in vmap_try_huge_p4d()
261 return -ENOMEM; in vmap_p4d_range()
273 return -ENOMEM; in vmap_p4d_range()
274 } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); in vmap_p4d_range()
299 } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_range_noflush()
409 * or be re-mapped for something else, if TLB flushes are being delayed or
443 * vunmap_range - unmap kernel virtual addresses
444 * @addr: start of the VM area to unmap
445 * @end: end of the VM area to unmap (non-inclusive)
448 * caches. Any subsequent access to the address before it has been re-mapped
471 return -ENOMEM; in vmap_pages_pte_range()
476 return -EBUSY; in vmap_pages_pte_range()
478 return -ENOMEM; in vmap_pages_pte_range()
480 return -EINVAL; in vmap_pages_pte_range()
498 return -ENOMEM; in vmap_pages_pmd_range()
502 return -ENOMEM; in vmap_pages_pmd_range()
516 return -ENOMEM; in vmap_pages_pud_range()
520 return -ENOMEM; in vmap_pages_pud_range()
534 return -ENOMEM; in vmap_pages_p4d_range()
538 return -ENOMEM; in vmap_pages_p4d_range()
582 unsigned int i, nr = (end - addr) >> PAGE_SHIFT; in __vmap_pages_range_noflush()
590 for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { in __vmap_pages_range_noflush()
617 * vmap_pages_range - map pages to a kernel virtual address
618 * @addr: start of the VM area to map
619 * @end: end of the VM area to map (non-inclusive)
621 * @pages: pages to map (always PAGE_SIZE pages)
626 * 0 on success, -errno on failure.
641 * ARM, x86-64 and sparc64 put modules in a special place, in is_vmalloc_or_module_addr()
716 * Map a vmalloc()-space virtual address to the physical page frame number.
757 * This augment red-black tree represents the free vmap space.
758 * All vmap_area objects in this tree are sorted by va->va_start
763 * of its sub-tree, right or left. Therefore it is possible to
778 return (va->va_end - va->va_start); in va_size()
787 return va ? va->subtree_max_size : 0; in get_subtree_max_size()
817 if (tmp->va_end > addr) { in find_vmap_area_exceed_addr()
819 if (tmp->va_start <= addr) in find_vmap_area_exceed_addr()
822 n = n->rb_left; in find_vmap_area_exceed_addr()
824 n = n->rb_right; in find_vmap_area_exceed_addr()
832 struct rb_node *n = root->rb_node; in __find_vmap_area()
840 if (addr < va->va_start) in __find_vmap_area()
841 n = n->rb_left; in __find_vmap_area()
842 else if (addr >= va->va_end) in __find_vmap_area()
843 n = n->rb_right; in __find_vmap_area()
868 link = &root->rb_node; in find_va_links()
880 * it link, where the new va->rb_node will be attached to. in find_va_links()
890 if (va->va_end <= tmp_va->va_start) in find_va_links()
891 link = &(*link)->rb_left; in find_va_links()
892 else if (va->va_start >= tmp_va->va_end) in find_va_links()
893 link = &(*link)->rb_right; in find_va_links()
895 WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n", in find_va_links()
896 va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end); in find_va_links()
902 *parent = &tmp_va->rb_node; in find_va_links()
913 * The red-black tree where we try to find VA neighbors in get_va_next_sibling()
920 list = &rb_entry(parent, struct vmap_area, rb_node)->list; in get_va_next_sibling()
921 return (&parent->rb_right == link ? list->next : list); in get_va_next_sibling()
934 head = &rb_entry(parent, struct vmap_area, rb_node)->list; in __link_va()
935 if (&parent->rb_right != link) in __link_va()
936 head = head->prev; in __link_va()
939 /* Insert to the rb-tree */ in __link_va()
940 rb_link_node(&va->rb_node, parent, link); in __link_va()
944 * to the tree. We do not set va->subtree_max_size to in __link_va()
953 rb_insert_augmented(&va->rb_node, in __link_va()
955 va->subtree_max_size = 0; in __link_va()
957 rb_insert_color(&va->rb_node, root); in __link_va()
960 /* Address-sort this list */ in __link_va()
961 list_add(&va->list, head); in __link_va()
983 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node))) in __unlink_va()
987 rb_erase_augmented(&va->rb_node, in __unlink_va()
990 rb_erase(&va->rb_node, root); in __unlink_va()
992 list_del_init(&va->list); in __unlink_va()
993 RB_CLEAR_NODE(&va->rb_node); in __unlink_va()
1016 get_subtree_max_size(va->rb_node.rb_left), in compute_subtree_max_size()
1017 get_subtree_max_size(va->rb_node.rb_right)); in compute_subtree_max_size()
1028 if (computed_size != va->subtree_max_size) in augment_tree_propagate_check()
1030 va_size(va), va->subtree_max_size); in augment_tree_propagate_check()
1042 * - After VA has been inserted to the tree(free path);
1043 * - After VA has been shrunk(allocation path);
1044 * - After VA has been increased(merging path).
1050 * 4--8
1054 * 2--2 8--8
1060 * node becomes 4--6.
1070 free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL); in augment_tree_propagate_from()
1109 * Merge de-allocated chunk of VA memory with previous
1147 * |<------VA------>|<-----Next----->| in __merge_or_add_vmap_area()
1153 if (sibling->va_start == va->va_end) { in __merge_or_add_vmap_area()
1154 sibling->va_start = va->va_start; in __merge_or_add_vmap_area()
1168 * |<-----Prev----->|<------VA------>| in __merge_or_add_vmap_area()
1172 if (next->prev != head) { in __merge_or_add_vmap_area()
1173 sibling = list_entry(next->prev, struct vmap_area, list); in __merge_or_add_vmap_area()
1174 if (sibling->va_end == va->va_start) { in __merge_or_add_vmap_area()
1185 sibling->va_end = va->va_end; in __merge_or_add_vmap_area()
1227 if (va->va_start > vstart) in is_within_this_va()
1228 nva_start_addr = ALIGN(va->va_start, align); in is_within_this_va()
1237 return (nva_start_addr + size <= va->va_end); in is_within_this_va()
1256 node = root->rb_node; in find_vmap_lowest_match()
1259 length = adjust_search_size ? size + align - 1 : size; in find_vmap_lowest_match()
1264 if (get_subtree_max_size(node->rb_left) >= length && in find_vmap_lowest_match()
1265 vstart < va->va_start) { in find_vmap_lowest_match()
1266 node = node->rb_left; in find_vmap_lowest_match()
1273 * sub-tree if it does not have a free block that is in find_vmap_lowest_match()
1276 if (get_subtree_max_size(node->rb_right) >= length) { in find_vmap_lowest_match()
1277 node = node->rb_right; in find_vmap_lowest_match()
1282 * OK. We roll back and find the first right sub-tree, in find_vmap_lowest_match()
1292 if (get_subtree_max_size(node->rb_right) >= length && in find_vmap_lowest_match()
1293 vstart <= va->va_start) { in find_vmap_lowest_match()
1297 * to enter same sub-tree after it has already been checked in find_vmap_lowest_match()
1300 vstart = va->va_start + 1; in find_vmap_lowest_match()
1301 node = node->rb_right; in find_vmap_lowest_match()
1365 if (nva_start_addr < va->va_start || in classify_va_fit_type()
1366 nva_start_addr + size > va->va_end) in classify_va_fit_type()
1370 if (va->va_start == nva_start_addr) { in classify_va_fit_type()
1371 if (va->va_end == nva_start_addr + size) in classify_va_fit_type()
1375 } else if (va->va_end == nva_start_addr + size) { in classify_va_fit_type()
1398 * |---------------| in adjust_va_to_fit_type()
1408 * |-------|-------| in adjust_va_to_fit_type()
1410 va->va_start += size; in adjust_va_to_fit_type()
1417 * |-------|-------| in adjust_va_to_fit_type()
1419 va->va_end = nva_start_addr; in adjust_va_to_fit_type()
1426 * |---|-------|---| in adjust_va_to_fit_type()
1431 * For percpu allocator we do not do any pre-allocation in adjust_va_to_fit_type()
1457 return -1; in adjust_va_to_fit_type()
1463 lva->va_start = va->va_start; in adjust_va_to_fit_type()
1464 lva->va_end = nva_start_addr; in adjust_va_to_fit_type()
1469 va->va_start = nva_start_addr + size; in adjust_va_to_fit_type()
1471 return -1; in adjust_va_to_fit_type()
1478 insert_vmap_area_augment(lva, &va->rb_node, root, head); in adjust_va_to_fit_type()
1507 if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size)) in __alloc_vmap_area()
1514 if (va->va_start > vstart) in __alloc_vmap_area()
1515 nva_start_addr = ALIGN(va->va_start, align); in __alloc_vmap_area()
1565 * We do it in non-atomic context, thus it allows us to use more in preload_this_cpu_lock()
1595 return ERR_PTR(-EINVAL); in alloc_vmap_area()
1598 return ERR_PTR(-EBUSY); in alloc_vmap_area()
1605 return ERR_PTR(-ENOMEM); in alloc_vmap_area()
1611 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); in alloc_vmap_area()
1628 va->va_start = addr; in alloc_vmap_area()
1629 va->va_end = addr + size; in alloc_vmap_area()
1630 va->vm = NULL; in alloc_vmap_area()
1631 va->flags = va_flags; in alloc_vmap_area()
1637 BUG_ON(!IS_ALIGNED(va->va_start, align)); in alloc_vmap_area()
1638 BUG_ON(va->va_start < vstart); in alloc_vmap_area()
1639 BUG_ON(va->va_end > vend); in alloc_vmap_area()
1669 return ERR_PTR(-EBUSY); in alloc_vmap_area()
1718 /* for per-CPU blocks */
1722 * Purges all lazily-freed vmap areas.
1743 struct vmap_area, list)->va_start); in __purge_vmap_area_lazy()
1747 struct vmap_area, list)->va_end); in __purge_vmap_area_lazy()
1754 unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; in __purge_vmap_area_lazy()
1755 unsigned long orig_start = va->va_start; in __purge_vmap_area_lazy()
1756 unsigned long orig_end = va->va_end; in __purge_vmap_area_lazy()
1759 * Finally insert or merge lazily-freed area. It is in __purge_vmap_area_lazy()
1771 va->va_start, va->va_end); in __purge_vmap_area_lazy()
1820 unsigned long va_start = va->va_start; in free_vmap_area_noflush()
1823 if (WARN_ON_ONCE(!list_empty(&va->list))) in free_vmap_area_noflush()
1826 nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >> in free_vmap_area_noflush()
1849 flush_cache_vunmap(va->va_start, va->va_end); in free_unmap_vmap_area()
1850 vunmap_range_noflush(va->va_start, va->va_end); in free_unmap_vmap_area()
1852 flush_tlb_kernel_range(va->va_start, va->va_end); in free_unmap_vmap_area()
1889 * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
1913 * regular operations: Purge if vb->free is less than 1/4 of the capacity.
1918 #define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/
1927 * be allocated. If it is an issue, we can use rb-tree
1951 * A per-cpu vmap_block_queue is used in both ways, to serialize
1954 * overload it, since we already have the per-cpu array which is
1968 * |------|------|------|------|------|------|...<vmap address space>
1971 * - CPU_1 invokes vm_unmap_ram(6), 6 belongs to CPU0 zone, thus
1972 * it access: CPU0/INDEX0 -> vmap_blocks -> xa_lock;
1974 * - CPU_2 invokes vm_unmap_ram(11), 11 belongs to CPU1 zone, thus
1975 * it access: CPU1/INDEX1 -> vmap_blocks -> xa_lock;
1977 * - CPU_0 invokes vm_unmap_ram(20), 20 belongs to CPU2 zone, thus
1978 * it access: CPU2/INDEX2 -> vmap_blocks -> xa_lock.
2000 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); in addr_to_vb_idx()
2015 * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
2020 * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
2037 return ERR_PTR(-ENOMEM); in new_vmap_block()
2048 vaddr = vmap_block_vaddr(va->va_start, 0); in new_vmap_block()
2049 spin_lock_init(&vb->lock); in new_vmap_block()
2050 vb->va = va; in new_vmap_block()
2053 bitmap_zero(vb->used_map, VMAP_BBMAP_BITS); in new_vmap_block()
2054 vb->free = VMAP_BBMAP_BITS - (1UL << order); in new_vmap_block()
2055 vb->dirty = 0; in new_vmap_block()
2056 vb->dirty_min = VMAP_BBMAP_BITS; in new_vmap_block()
2057 vb->dirty_max = 0; in new_vmap_block()
2058 bitmap_set(vb->used_map, 0, (1UL << order)); in new_vmap_block()
2059 INIT_LIST_HEAD(&vb->free_list); in new_vmap_block()
2061 xa = addr_to_vb_xa(va->va_start); in new_vmap_block()
2062 vb_idx = addr_to_vb_idx(va->va_start); in new_vmap_block()
2071 spin_lock(&vbq->lock); in new_vmap_block()
2072 list_add_tail_rcu(&vb->free_list, &vbq->free); in new_vmap_block()
2073 spin_unlock(&vbq->lock); in new_vmap_block()
2083 xa = addr_to_vb_xa(vb->va->va_start); in free_vmap_block()
2084 tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start)); in free_vmap_block()
2088 unlink_va(vb->va, &vmap_area_root); in free_vmap_block()
2091 free_vmap_area_noflush(vb->va); in free_vmap_block()
2099 if (vb->free + vb->dirty != VMAP_BBMAP_BITS || in purge_fragmented_block()
2100 vb->dirty == VMAP_BBMAP_BITS) in purge_fragmented_block()
2104 if (!(force_purge || vb->free < VMAP_PURGE_THRESHOLD)) in purge_fragmented_block()
2108 WRITE_ONCE(vb->free, 0); in purge_fragmented_block()
2110 WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS); in purge_fragmented_block()
2111 vb->dirty_min = 0; in purge_fragmented_block()
2112 vb->dirty_max = VMAP_BBMAP_BITS; in purge_fragmented_block()
2113 spin_lock(&vbq->lock); in purge_fragmented_block()
2114 list_del_rcu(&vb->free_list); in purge_fragmented_block()
2115 spin_unlock(&vbq->lock); in purge_fragmented_block()
2116 list_add_tail(&vb->purge, purge_list); in purge_fragmented_block()
2125 list_del(&vb->purge); in free_purged_blocks()
2137 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in purge_fragmented_blocks()
2138 unsigned long free = READ_ONCE(vb->free); in purge_fragmented_blocks()
2139 unsigned long dirty = READ_ONCE(vb->dirty); in purge_fragmented_blocks()
2145 spin_lock(&vb->lock); in purge_fragmented_blocks()
2147 spin_unlock(&vb->lock); in purge_fragmented_blocks()
2182 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in vb_alloc()
2185 if (READ_ONCE(vb->free) < (1UL << order)) in vb_alloc()
2188 spin_lock(&vb->lock); in vb_alloc()
2189 if (vb->free < (1UL << order)) { in vb_alloc()
2190 spin_unlock(&vb->lock); in vb_alloc()
2194 pages_off = VMAP_BBMAP_BITS - vb->free; in vb_alloc()
2195 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off); in vb_alloc()
2196 WRITE_ONCE(vb->free, vb->free - (1UL << order)); in vb_alloc()
2197 bitmap_set(vb->used_map, pages_off, (1UL << order)); in vb_alloc()
2198 if (vb->free == 0) { in vb_alloc()
2199 spin_lock(&vbq->lock); in vb_alloc()
2200 list_del_rcu(&vb->free_list); in vb_alloc()
2201 spin_unlock(&vbq->lock); in vb_alloc()
2204 spin_unlock(&vb->lock); in vb_alloc()
2230 offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT; in vb_free()
2235 spin_lock(&vb->lock); in vb_free()
2236 bitmap_clear(vb->used_map, offset, (1UL << order)); in vb_free()
2237 spin_unlock(&vb->lock); in vb_free()
2244 spin_lock(&vb->lock); in vb_free()
2247 vb->dirty_min = min(vb->dirty_min, offset); in vb_free()
2248 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order)); in vb_free()
2250 WRITE_ONCE(vb->dirty, vb->dirty + (1UL << order)); in vb_free()
2251 if (vb->dirty == VMAP_BBMAP_BITS) { in vb_free()
2252 BUG_ON(vb->free); in vb_free()
2253 spin_unlock(&vb->lock); in vb_free()
2256 spin_unlock(&vb->lock); in vb_free()
2275 xa_for_each(&vbq->vmap_blocks, idx, vb) { in _vm_unmap_aliases()
2276 spin_lock(&vb->lock); in _vm_unmap_aliases()
2284 vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { in _vm_unmap_aliases()
2285 unsigned long va_start = vb->va->va_start; in _vm_unmap_aliases()
2288 s = va_start + (vb->dirty_min << PAGE_SHIFT); in _vm_unmap_aliases()
2289 e = va_start + (vb->dirty_max << PAGE_SHIFT); in _vm_unmap_aliases()
2295 vb->dirty_min = VMAP_BBMAP_BITS; in _vm_unmap_aliases()
2296 vb->dirty_max = 0; in _vm_unmap_aliases()
2300 spin_unlock(&vb->lock); in _vm_unmap_aliases()
2312 * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
2334 * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
2362 debug_check_no_locks_freed((void *)va->va_start, in vm_unmap_ram()
2363 (va->va_end - va->va_start)); in vm_unmap_ram()
2369 * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
2375 * faster than vmap so it's good. But if you mix long-life and short-life
2378 * the end. Please use this function for short-lived objects.
2401 addr = va->va_start; in vm_map_ram()
2413 * With hardware tag-based KASAN, marking is skipped for in vm_map_ram()
2414 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in vm_map_ram()
2424 static inline unsigned int vm_area_page_order(struct vm_struct *vm) in vm_area_page_order() argument
2427 return vm->page_order; in vm_area_page_order()
2433 static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order) in set_vm_area_page_order() argument
2436 vm->page_order = order; in set_vm_area_page_order()
2443 * vm_area_add_early - add vmap area early during boot
2444 * @vm: vm_struct to add
2446 * This function is used to add fixed kernel vm area to vmlist before
2447 * vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags
2452 void __init vm_area_add_early(struct vm_struct *vm) in vm_area_add_early() argument
2457 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { in vm_area_add_early()
2458 if (tmp->addr >= vm->addr) { in vm_area_add_early()
2459 BUG_ON(tmp->addr < vm->addr + vm->size); in vm_area_add_early()
2462 BUG_ON(tmp->addr + tmp->size > vm->addr); in vm_area_add_early()
2464 vm->next = *p; in vm_area_add_early()
2465 *p = vm; in vm_area_add_early()
2469 * vm_area_register_early - register vmap area early during boot
2470 * @vm: vm_struct to register
2473 * This function is used to register kernel vm area before
2474 * vmalloc_init() is called. @vm->size and @vm->flags should contain
2476 * vm->addr contains the allocated address.
2480 void __init vm_area_register_early(struct vm_struct *vm, size_t align) in vm_area_register_early() argument
2487 for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { in vm_area_register_early()
2488 if ((unsigned long)cur->addr - addr >= vm->size) in vm_area_register_early()
2490 addr = ALIGN((unsigned long)cur->addr + cur->size, align); in vm_area_register_early()
2493 BUG_ON(addr > VMALLOC_END - vm->size); in vm_area_register_early()
2494 vm->addr = (void *)addr; in vm_area_register_early()
2495 vm->next = *p; in vm_area_register_early()
2496 *p = vm; in vm_area_register_early()
2497 kasan_populate_early_vm_area_shadow(vm->addr, vm->size); in vm_area_register_early()
2508 * -|-----|.....|-----|-----|-----|.....|- in vmap_init_free_space()
2510 * |<--------------------------------->| in vmap_init_free_space()
2513 if (busy->va_start - vmap_start > 0) { in vmap_init_free_space()
2516 free->va_start = vmap_start; in vmap_init_free_space()
2517 free->va_end = busy->va_start; in vmap_init_free_space()
2525 vmap_start = busy->va_end; in vmap_init_free_space()
2528 if (vmap_end - vmap_start > 0) { in vmap_init_free_space()
2531 free->va_start = vmap_start; in vmap_init_free_space()
2532 free->va_end = vmap_end; in vmap_init_free_space()
2541 static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, in setup_vmalloc_vm_locked() argument
2544 vm->flags = flags; in setup_vmalloc_vm_locked()
2545 vm->addr = (void *)va->va_start; in setup_vmalloc_vm_locked()
2546 vm->size = va->va_end - va->va_start; in setup_vmalloc_vm_locked()
2547 vm->caller = caller; in setup_vmalloc_vm_locked()
2548 va->vm = vm; in setup_vmalloc_vm_locked()
2551 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, in setup_vmalloc_vm() argument
2555 setup_vmalloc_vm_locked(vm, va, flags, caller); in setup_vmalloc_vm()
2559 static void clear_vm_uninitialized_flag(struct vm_struct *vm) in clear_vm_uninitialized_flag() argument
2563 * we should make sure that vm has proper values. in clear_vm_uninitialized_flag()
2567 vm->flags &= ~VM_UNINITIALIZED; in clear_vm_uninitialized_flag()
2604 * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a in __get_vm_area_node()
2605 * best-effort approach, as they can be mapped outside of vmalloc code. in __get_vm_area_node()
2608 * With hardware tag-based KASAN, marking is skipped for in __get_vm_area_node()
2609 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in __get_vm_area_node()
2612 area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, in __get_vm_area_node()
2627 * get_vm_area - reserve a contiguous kernel virtual area
2654 * find_vm_area - find a continuous kernel virtual area
2657 * Search for the kernel VM area starting at @addr, and return it.
2671 return va->vm; in find_vm_area()
2675 * remove_vm_area - find and remove a continuous kernel virtual area
2678 * Search for the kernel VM area starting at @addr, and remove it.
2679 * This function returns the found VM area, but using it is NOT safe
2687 struct vm_struct *vm; in remove_vm_area() local
2696 if (!va || !va->vm) in remove_vm_area()
2698 vm = va->vm; in remove_vm_area()
2700 debug_check_no_locks_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
2701 debug_check_no_obj_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
2702 kasan_free_module_shadow(vm); in remove_vm_area()
2703 kasan_poison_vmalloc(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
2706 return vm; in remove_vm_area()
2715 for (i = 0; i < area->nr_pages; i++) in set_area_direct_map()
2716 if (page_address(area->pages[i])) in set_area_direct_map()
2717 set_direct_map(area->pages[i]); in set_area_direct_map()
2721 * Flush the vm mapping and reset the direct map.
2732 * the vm_unmap_aliases() flush includes the direct map. in vm_reset_perms()
2734 for (i = 0; i < area->nr_pages; i += 1U << page_order) { in vm_reset_perms()
2735 unsigned long addr = (unsigned long)page_address(area->pages[i]); in vm_reset_perms()
2748 * Set direct map to something invalid so that it won't be cached if in vm_reset_perms()
2750 * reset the direct map permissions to the default. in vm_reset_perms()
2762 llist_for_each_safe(llnode, t, llist_del_all(&p->list)) in delayed_vfree_work()
2767 * vfree_atomic - release memory allocated by vmalloc()
2786 if (addr && llist_add((struct llist_node *)addr, &p->list)) in vfree_atomic()
2787 schedule_work(&p->wq); in vfree_atomic()
2791 * vfree - Release memory allocated by vmalloc()
2805 * conventions for vfree() arch-dependent would be a really bad idea).
2809 struct vm_struct *vm; in vfree() local
2824 vm = remove_vm_area(addr); in vfree()
2825 if (unlikely(!vm)) { in vfree()
2826 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", in vfree()
2831 if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS)) in vfree()
2832 vm_reset_perms(vm); in vfree()
2833 for (i = 0; i < vm->nr_pages; i++) { in vfree()
2834 struct page *page = vm->pages[i]; in vfree()
2837 mod_memcg_page_state(page, MEMCG_VMALLOC, -1); in vfree()
2839 * High-order allocs for huge vmallocs are split, so in vfree()
2840 * can be freed as an array of order-0 allocations in vfree()
2845 atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages); in vfree()
2846 kvfree(vm->pages); in vfree()
2847 kfree(vm); in vfree()
2852 * vunmap - release virtual mapping obtained by vmap()
2862 struct vm_struct *vm; in vunmap() local
2869 vm = remove_vm_area(addr); in vunmap()
2870 if (unlikely(!vm)) { in vunmap()
2871 WARN(1, KERN_ERR "Trying to vunmap() nonexistent vm area (%p)\n", in vunmap()
2875 kfree(vm); in vunmap()
2880 * vmap - map an array of pages into virtually contiguous space
2882 * @count: number of pages to map
2883 * @flags: vm_area->flags
2921 addr = (unsigned long)area->addr; in vmap()
2924 vunmap(area->addr); in vmap()
2929 area->pages = pages; in vmap()
2930 area->nr_pages = count; in vmap()
2932 return area->addr; in vmap()
2946 unsigned long pfn = data->pfns[data->idx]; in vmap_pfn_apply()
2950 return -EINVAL; in vmap_pfn_apply()
2952 ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); in vmap_pfn_apply()
2955 data->idx++; in vmap_pfn_apply()
2960 * vmap_pfn - map an array of PFNs into virtually contiguous space
2962 * @count: number of pages to map
2977 if (apply_to_page_range(&init_mm, (unsigned long)area->addr, in vmap_pfn()
2983 flush_cache_vmap((unsigned long)area->addr, in vmap_pfn()
2984 (unsigned long)area->addr + count * PAGE_SIZE); in vmap_pfn()
2986 return area->addr; in vmap_pfn()
3002 * For order-0 pages we make use of bulk allocator, if in vm_area_alloc_pages()
3015 * A maximum allowed request is hard-coded and is 100 in vm_area_alloc_pages()
3017 * long preemption off scenario in the bulk-allocator in vm_area_alloc_pages()
3020 nr_pages_request = min(100U, nr_pages - nr_allocated); in vm_area_alloc_pages()
3050 * potentially dangerous (pre-mature OOM, disruptive reclaim in vm_area_alloc_pages()
3057 /* High-order pages or fallback path if "bulk" fails. */ in vm_area_alloc_pages()
3079 * small-page vmallocs). Some drivers do their own refcounting in vm_area_alloc_pages()
3080 * on vmalloc_to_page() pages, some use page->mapping, in vm_area_alloc_pages()
3081 * page->lru, etc. in vm_area_alloc_pages()
3087 * Careful, we allocate and map page-order pages, but in vm_area_alloc_pages()
3107 unsigned long addr = (unsigned long)area->addr; in __vmalloc_area_node()
3122 area->pages = __vmalloc_node(array_size, 1, nested_gfp, node, in __vmalloc_area_node()
3123 area->caller); in __vmalloc_area_node()
3125 area->pages = kmalloc_node(array_size, nested_gfp, node); in __vmalloc_area_node()
3128 if (!area->pages) { in __vmalloc_area_node()
3136 set_vm_area_page_order(area, page_shift - PAGE_SHIFT); in __vmalloc_area_node()
3139 area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN, in __vmalloc_area_node()
3140 node, page_order, nr_small_pages, area->pages); in __vmalloc_area_node()
3142 atomic_long_add(area->nr_pages, &nr_vmalloc_pages); in __vmalloc_area_node()
3146 for (i = 0; i < area->nr_pages; i++) in __vmalloc_area_node()
3147 mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1); in __vmalloc_area_node()
3154 if (area->nr_pages != nr_small_pages) { in __vmalloc_area_node()
3157 * also:- in __vmalloc_area_node()
3159 * - a pending fatal signal in __vmalloc_area_node()
3160 * - insufficient huge page-order pages in __vmalloc_area_node()
3162 * Since we always retry allocations at order-0 in the huge page in __vmalloc_area_node()
3168 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3182 ret = vmap_pages_range(addr, addr + size, prot, area->pages, in __vmalloc_area_node()
3195 "vmalloc error: size %lu, failed to map pages", in __vmalloc_area_node()
3196 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3200 return area->addr; in __vmalloc_area_node()
3203 vfree(area->addr); in __vmalloc_area_node()
3208 * __vmalloc_node_range - allocate virtually contiguous memory
3211 * @start: vm area range start
3212 * @end: vm area range end
3215 * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD)
3230 * Map them into contiguous kernel virtual space, using a pagetable
3319 /* Allocate physical pages and map them into vmalloc space. */ in __vmalloc_node_range()
3329 * Tag-based KASAN modes only assign tags to normal non-executable in __vmalloc_node_range()
3337 area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); in __vmalloc_node_range()
3350 return area->addr; in __vmalloc_node_range()
3364 * __vmalloc_node - allocate virtually contiguous memory
3372 * @gfp_mask flags. Map them into contiguous kernel virtual space.
3374 * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
3405 * vmalloc - allocate virtually contiguous memory
3409 * allocator and map them into contiguous kernel virtual space.
3424 * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
3429 * allocator and map them into contiguous kernel virtual space.
3444 * vzalloc - allocate virtually contiguous memory with zero fill
3448 * allocator and map them into contiguous kernel virtual space.
3464 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
3482 * vmalloc_node - allocate memory on a specific node
3487 * allocator and map them into contiguous kernel virtual space.
3502 * vzalloc_node - allocate memory on a specific node with zero fill
3507 * allocator and map them into contiguous kernel virtual space.
3532 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
3536 * page level allocator and map them into contiguous kernel virtual space.
3548 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
3579 remains -= copied; in zero_iter()
3585 return count - remains; in zero_iter()
3605 length = PAGE_SIZE - offset; in aligned_vread_iter()
3624 remains -= copied; in aligned_vread_iter()
3630 return count - remains; in aligned_vread_iter()
3667 spin_lock(&vb->lock); in vmap_ram_vread_iter()
3668 if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { in vmap_ram_vread_iter()
3669 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
3673 for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { in vmap_ram_vread_iter()
3679 start = vmap_block_vaddr(vb->va->va_start, rs); in vmap_ram_vread_iter()
3682 size_t to_zero = min_t(size_t, start - addr, remains); in vmap_ram_vread_iter()
3686 remains -= zeroed; in vmap_ram_vread_iter()
3694 n = ((re - rs + 1) << PAGE_SHIFT) - offset; in vmap_ram_vread_iter()
3701 remains -= copied; in vmap_ram_vread_iter()
3707 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
3710 /* zero-fill the left dirty or free regions */ in vmap_ram_vread_iter()
3711 return count - remains + zero_iter(iter, remains); in vmap_ram_vread_iter()
3714 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
3715 return count - remains; in vmap_ram_vread_iter()
3719 * vread_iter() - read vmalloc area in a safe way to an iterator.
3721 * @addr: vm address.
3727 * proper area of @buf. If there are memory holes, they'll be zero-filled.
3745 struct vm_struct *vm; in vread_iter() local
3753 count = -(unsigned long) addr; in vread_iter()
3763 if ((unsigned long)addr + remains <= va->va_start) in vread_iter()
3772 vm = va->vm; in vread_iter()
3773 flags = va->flags & VMAP_FLAGS_MASK; in vread_iter()
3775 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need in vread_iter()
3780 if (!vm && !flags) in vread_iter()
3783 if (vm && (vm->flags & VM_UNINITIALIZED)) in vread_iter()
3789 vaddr = (char *) va->va_start; in vread_iter()
3790 size = vm ? get_vm_area_size(vm) : va_size(va); in vread_iter()
3796 size_t to_zero = min_t(size_t, vaddr - addr, remains); in vread_iter()
3800 remains -= zeroed; in vread_iter()
3806 n = vaddr + size - addr; in vread_iter()
3812 else if (!(vm->flags & VM_IOREMAP)) in vread_iter()
3818 remains -= copied; in vread_iter()
3826 /* zero-fill memory holes */ in vread_iter()
3827 return count - remains + zero_iter(iter, remains); in vread_iter()
3832 return count - remains; in vread_iter()
3836 * remap_vmalloc_range_partial - map vmalloc pages to userspace
3841 * @size: size of map area
3843 * Returns: 0 for success, -Exxx on failure
3861 return -EINVAL; in remap_vmalloc_range_partial()
3866 return -EINVAL; in remap_vmalloc_range_partial()
3870 return -EINVAL; in remap_vmalloc_range_partial()
3872 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) in remap_vmalloc_range_partial()
3873 return -EINVAL; in remap_vmalloc_range_partial()
3877 return -EINVAL; in remap_vmalloc_range_partial()
3890 size -= PAGE_SIZE; in remap_vmalloc_range_partial()
3899 * remap_vmalloc_range - map vmalloc pages to userspace
3900 * @vma: vma to cover (map full range of vma)
3902 * @pgoff: number of pages into addr before first page to map
3904 * Returns: 0 for success, -Exxx on failure
3915 return remap_vmalloc_range_partial(vma, vma->vm_start, in remap_vmalloc_range()
3917 vma->vm_end - vma->vm_start); in remap_vmalloc_range()
3924 ret = remove_vm_area(area->addr); in free_vm_area()
3937 * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
3942 * i.e. va->va_start < addr && va->va_end < addr or NULL
3956 if (tmp->va_start <= addr) { in pvm_find_va_enclose_addr()
3958 if (tmp->va_end >= addr) in pvm_find_va_enclose_addr()
3961 n = n->rb_right; in pvm_find_va_enclose_addr()
3963 n = n->rb_left; in pvm_find_va_enclose_addr()
3971 * pvm_determine_end_from_reverse - find the highest aligned address
3974 * in - the VA we start the search(reverse order);
3975 * out - the VA with the highest aligned end address.
3983 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pvm_determine_end_from_reverse()
3989 addr = min((*va)->va_end & ~(align - 1), vmalloc_end); in pvm_determine_end_from_reverse()
3990 if ((*va)->va_start < addr) in pvm_determine_end_from_reverse()
3999 * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
4008 * Percpu allocator wants to use congruent vm areas so that it can
4016 * does everything top-down and scans free blocks from the end looking
4027 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pcpu_get_vm_areas()
4057 if (vmalloc_end - vmalloc_start < last_end) { in pcpu_get_vm_areas()
4076 /* start scanning - we scan from the top, begin with the last area */ in pcpu_get_vm_areas()
4082 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4102 if (base + end > va->va_end) { in pcpu_get_vm_areas()
4103 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4111 if (base + start < va->va_start) { in pcpu_get_vm_areas()
4112 va = node_to_va(rb_prev(&va->rb_node)); in pcpu_get_vm_areas()
4113 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4122 area = (area + nr_vms - 1) % nr_vms; in pcpu_get_vm_areas()
4152 va->va_start = start; in pcpu_get_vm_areas()
4153 va->va_end = start + size; in pcpu_get_vm_areas()
4160 if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) in pcpu_get_vm_areas()
4164 /* insert all vm's */ in pcpu_get_vm_areas()
4175 * Mark allocated areas as accessible. Do it now as a best-effort in pcpu_get_vm_areas()
4177 * With hardware tag-based KASAN, marking is skipped for in pcpu_get_vm_areas()
4178 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in pcpu_get_vm_areas()
4181 vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, in pcpu_get_vm_areas()
4182 vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); in pcpu_get_vm_areas()
4194 while (area--) { in pcpu_get_vm_areas()
4195 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4196 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4201 va->va_start, va->va_end); in pcpu_get_vm_areas()
4245 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4246 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4251 va->va_start, va->va_end); in pcpu_get_vm_areas()
4262 * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
4283 struct vm_struct *vm; in vmalloc_dump_obj() local
4296 vm = va->vm; in vmalloc_dump_obj()
4297 if (!vm) { in vmalloc_dump_obj()
4301 addr = (unsigned long)vm->addr; in vmalloc_dump_obj()
4302 caller = vm->caller; in vmalloc_dump_obj()
4303 nr_pages = vm->nr_pages; in vmalloc_dump_obj()
4305 pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", in vmalloc_dump_obj()
4338 unsigned int nr, *counters = m->private; in show_numa_info()
4344 if (v->flags & VM_UNINITIALIZED) in show_numa_info()
4351 for (nr = 0; nr < v->nr_pages; nr += step) in show_numa_info()
4352 counters[page_to_nid(v->pages[nr])] += step; in show_numa_info()
4365 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", in show_purge_info()
4366 (void *)va->va_start, (void *)va->va_end, in show_purge_info()
4367 va->va_end - va->va_start); in show_purge_info()
4379 if (!va->vm) { in s_show()
4380 if (va->flags & VMAP_RAM) in s_show()
4381 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", in s_show()
4382 (void *)va->va_start, (void *)va->va_end, in s_show()
4383 va->va_end - va->va_start); in s_show()
4388 v = va->vm; in s_show()
4390 seq_printf(m, "0x%pK-0x%pK %7ld", in s_show()
4391 v->addr, v->addr + v->size, v->size); in s_show()
4393 if (v->caller) in s_show()
4394 seq_printf(m, " %pS", v->caller); in s_show()
4396 if (v->nr_pages) in s_show()
4397 seq_printf(m, " pages=%d", v->nr_pages); in s_show()
4399 if (v->phys_addr) in s_show()
4400 seq_printf(m, " phys=%pa", &v->phys_addr); in s_show()
4402 if (v->flags & VM_IOREMAP) in s_show()
4405 if (v->flags & VM_ALLOC) in s_show()
4408 if (v->flags & VM_MAP) in s_show()
4411 if (v->flags & VM_USERMAP) in s_show()
4414 if (v->flags & VM_DMA_COHERENT) in s_show()
4415 seq_puts(m, " dma-coherent"); in s_show()
4417 if (is_vmalloc_addr(v->pages)) in s_show()
4427 if (list_is_last(&va->list, &vmap_area_list)) in s_show()
4470 spin_lock_init(&vbq->lock); in vmalloc_init()
4471 INIT_LIST_HEAD(&vbq->free); in vmalloc_init()
4473 init_llist_head(&p->list); in vmalloc_init()
4474 INIT_WORK(&p->wq, delayed_vfree_work); in vmalloc_init()
4475 xa_init(&vbq->vmap_blocks); in vmalloc_init()
4479 for (tmp = vmlist; tmp; tmp = tmp->next) { in vmalloc_init()
4484 va->va_start = (unsigned long)tmp->addr; in vmalloc_init()
4485 va->va_end = va->va_start + tmp->size; in vmalloc_init()
4486 va->vm = tmp; in vmalloc_init()