Lines Matching +full:charge +full:- +full:current +full:- +full:limit +full:- +full:mapping

1 // SPDX-License-Identifier: GPL-2.0-only
9 * 'fork.c' contains the help-routines for the 'fork' system call
83 #include <linux/posix-timers.h>
84 #include <linux/user-return-notifier.h>
127 static int max_threads; /* tunable limit on nr_threads */
206 vfree(vm_stack->addr); in free_vm_stack_cache()
229 kasan_unpoison_shadow(s->addr, THREAD_SIZE); in alloc_thread_stack_node()
232 memset(s->addr, 0, THREAD_SIZE); in alloc_thread_stack_node()
234 tsk->stack_vm_area = s; in alloc_thread_stack_node()
235 tsk->stack = s->addr; in alloc_thread_stack_node()
236 return s->addr; in alloc_thread_stack_node()
256 tsk->stack_vm_area = find_vm_area(stack); in alloc_thread_stack_node()
257 tsk->stack = stack; in alloc_thread_stack_node()
265 tsk->stack = kasan_reset_tag(page_address(page)); in alloc_thread_stack_node()
266 return tsk->stack; in alloc_thread_stack_node()
281 memcg_kmem_uncharge_page(vm->pages[i], 0); in free_thread_stack()
285 NULL, tsk->stack_vm_area) != NULL) in free_thread_stack()
291 vfree_atomic(tsk->stack); in free_thread_stack()
296 __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); in free_thread_stack()
307 tsk->stack = stack; in alloc_thread_stack_node()
313 kmem_cache_free(thread_stack_cache, tsk->stack); in free_thread_stack()
326 /* SLAB cache for signal_struct structures (tsk->signal) */
329 /* SLAB cache for sighand_struct structures (tsk->sighand) */
332 /* SLAB cache for files_struct structures (tsk->files) */
335 /* SLAB cache for fs_struct structures (tsk->fs) */
341 /* SLAB cache for mm_struct structures (tsk->mm) */
359 ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); in vm_area_dup()
360 ASSERT_EXCLUSIVE_WRITER(orig->vm_file); in vm_area_dup()
362 * orig->shared.rb may be modified concurrently, but the clone in vm_area_dup()
366 INIT_LIST_HEAD(&new->anon_vma_chain); in vm_area_dup()
367 new->vm_next = new->vm_prev = NULL; in vm_area_dup()
385 mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, in account_kernel_stack()
403 BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); in memcg_charge_kernel_stack()
407 * If memcg_kmem_charge_page() fails, page->mem_cgroup in memcg_charge_kernel_stack()
411 ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, in memcg_charge_kernel_stack()
423 if (WARN_ON(tsk->state != TASK_DEAD)) in release_task_stack()
426 account_kernel_stack(tsk, -1); in release_task_stack()
428 tsk->stack = NULL; in release_task_stack()
430 tsk->stack_vm_area = NULL; in release_task_stack()
437 if (refcount_dec_and_test(&tsk->stack_refcount)) in put_task_stack()
457 WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); in free_task()
462 if (tsk->flags & PF_KTHREAD) in free_task()
475 unsigned long charge; in dup_mmap() local
480 retval = -EINTR; in dup_mmap()
486 * Not linked in yet - no deadlock potential: in dup_mmap()
491 RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); in dup_mmap()
493 mm->total_vm = oldmm->total_vm; in dup_mmap()
494 mm->data_vm = oldmm->data_vm; in dup_mmap()
495 mm->exec_vm = oldmm->exec_vm; in dup_mmap()
496 mm->stack_vm = oldmm->stack_vm; in dup_mmap()
498 rb_link = &mm->mm_rb.rb_node; in dup_mmap()
500 pprev = &mm->mmap; in dup_mmap()
509 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { in dup_mmap()
512 if (mpnt->vm_flags & VM_DONTCOPY) { in dup_mmap()
513 vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); in dup_mmap()
516 charge = 0; in dup_mmap()
518 * Don't duplicate many vmas if we've been oom-killed (for in dup_mmap()
521 if (fatal_signal_pending(current)) { in dup_mmap()
522 retval = -EINTR; in dup_mmap()
525 if (mpnt->vm_flags & VM_ACCOUNT) { in dup_mmap()
530 charge = len; in dup_mmap()
538 tmp->vm_mm = mm; in dup_mmap()
542 if (tmp->vm_flags & VM_WIPEONFORK) { in dup_mmap()
546 * copy page for current vma. in dup_mmap()
548 tmp->anon_vma = NULL; in dup_mmap()
551 tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); in dup_mmap()
552 file = tmp->vm_file; in dup_mmap()
555 struct address_space *mapping = file->f_mapping; in dup_mmap() local
558 if (tmp->vm_flags & VM_DENYWRITE) in dup_mmap()
560 i_mmap_lock_write(mapping); in dup_mmap()
561 if (tmp->vm_flags & VM_SHARED) in dup_mmap()
562 mapping_allow_writable(mapping); in dup_mmap()
563 flush_dcache_mmap_lock(mapping); in dup_mmap()
566 &mapping->i_mmap); in dup_mmap()
567 flush_dcache_mmap_unlock(mapping); in dup_mmap()
568 i_mmap_unlock_write(mapping); in dup_mmap()
572 * Clear hugetlb-related page reserves for children. This only in dup_mmap()
574 * are not guaranteed to succeed, even if read-only in dup_mmap()
583 pprev = &tmp->vm_next; in dup_mmap()
584 tmp->vm_prev = prev; in dup_mmap()
588 rb_link = &tmp->vm_rb.rb_right; in dup_mmap()
589 rb_parent = &tmp->vm_rb; in dup_mmap()
591 mm->map_count++; in dup_mmap()
592 if (!(tmp->vm_flags & VM_WIPEONFORK)) in dup_mmap()
595 if (tmp->vm_ops && tmp->vm_ops->open) in dup_mmap()
596 tmp->vm_ops->open(tmp); in dup_mmap()
616 retval = -ENOMEM; in dup_mmap()
617 vm_unacct_memory(charge); in dup_mmap()
623 mm->pgd = pgd_alloc(mm); in mm_alloc_pgd()
624 if (unlikely(!mm->pgd)) in mm_alloc_pgd()
625 return -ENOMEM; in mm_alloc_pgd()
631 pgd_free(mm, mm->pgd); in mm_free_pgd()
637 RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); in dup_mmap()
653 long x = atomic_long_read(&mm->rss_stat.count[i]); in check_mm()
656 pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", in check_mm()
661 pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", in check_mm()
665 VM_BUG_ON_MM(mm->pmd_huge_pte, mm); in check_mm()
680 WARN_ON_ONCE(mm == current->mm); in __mmdrop()
681 WARN_ON_ONCE(mm == current->active_mm); in __mmdrop()
686 put_user_ns(mm->user_ns); in __mmdrop()
701 if (unlikely(atomic_dec_and_test(&mm->mm_count))) { in mmdrop_async()
702 INIT_WORK(&mm->async_put_work, mmdrop_async_fn); in mmdrop_async()
703 schedule_work(&mm->async_put_work); in mmdrop_async()
715 if (sig->oom_mm) in free_signal_struct()
716 mmdrop_async(sig->oom_mm); in free_signal_struct()
722 if (refcount_dec_and_test(&sig->sigcnt)) in put_signal_struct()
728 WARN_ON(!tsk->exit_state); in __put_task_struct()
729 WARN_ON(refcount_read(&tsk->usage)); in __put_task_struct()
730 WARN_ON(tsk == current); in __put_task_struct()
738 put_signal_struct(tsk->signal); in __put_task_struct()
783 * Handle zero-sized whitelist or empty thread_struct, otherwise in task_struct_whitelist()
816 init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; in fork_init()
817 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; in fork_init()
818 init_task.signal->rlim[RLIMIT_SIGPENDING] = in fork_init()
819 init_task.signal->rlim[RLIMIT_NPROC]; in fork_init()
876 * arch_dup_task_struct() clobbers the stack-related fields. Make in dup_task_struct()
877 * sure they're properly initialized before using any stack-related in dup_task_struct()
880 tsk->stack = stack; in dup_task_struct()
882 tsk->stack_vm_area = stack_vm_area; in dup_task_struct()
885 refcount_set(&tsk->stack_refcount, 1); in dup_task_struct()
902 tsk->seccomp.filter = NULL; in dup_task_struct()
911 tsk->stack_canary = get_random_canary(); in dup_task_struct()
913 if (orig->cpus_ptr == &orig->cpus_mask) in dup_task_struct()
914 tsk->cpus_ptr = &tsk->cpus_mask; in dup_task_struct()
920 refcount_set(&tsk->rcu_users, 2); in dup_task_struct()
922 refcount_set(&tsk->usage, 1); in dup_task_struct()
924 tsk->btrace_seq = 0; in dup_task_struct()
926 tsk->splice_pipe = NULL; in dup_task_struct()
927 tsk->task_frag.page = NULL; in dup_task_struct()
928 tsk->wake_q.next = NULL; in dup_task_struct()
935 tsk->fail_nth = 0; in dup_task_struct()
939 tsk->throttle_queue = NULL; in dup_task_struct()
940 tsk->use_memdelay = 0; in dup_task_struct()
944 tsk->active_memcg = NULL; in dup_task_struct()
974 spin_lock_init(&mm->ioctx_lock); in mm_init_aio()
975 mm->ioctx_table = NULL; in mm_init_aio()
983 if (mm->owner == p) in mm_clear_owner()
984 WRITE_ONCE(mm->owner, NULL); in mm_clear_owner()
991 mm->owner = p; in mm_init_owner()
998 mm->uprobes_state.xol_area = NULL; in mm_init_uprobes_state()
1005 mm->mmap = NULL; in mm_init()
1006 mm->mm_rb = RB_ROOT; in mm_init()
1007 mm->vmacache_seqnum = 0; in mm_init()
1008 atomic_set(&mm->mm_users, 1); in mm_init()
1009 atomic_set(&mm->mm_count, 1); in mm_init()
1011 INIT_LIST_HEAD(&mm->mmlist); in mm_init()
1012 mm->core_state = NULL; in mm_init()
1014 mm->map_count = 0; in mm_init()
1015 mm->locked_vm = 0; in mm_init()
1016 atomic_set(&mm->has_pinned, 0); in mm_init()
1017 atomic64_set(&mm->pinned_vm, 0); in mm_init()
1018 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); in mm_init()
1019 spin_lock_init(&mm->page_table_lock); in mm_init()
1020 spin_lock_init(&mm->arg_lock); in mm_init()
1024 RCU_INIT_POINTER(mm->exe_file, NULL); in mm_init()
1028 mm->pmd_huge_pte = NULL; in mm_init()
1032 if (current->mm) { in mm_init()
1033 mm->flags = current->mm->flags & MMF_INIT_MASK; in mm_init()
1034 mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; in mm_init()
1036 mm->flags = default_dump_filter; in mm_init()
1037 mm->def_flags = 0; in mm_init()
1046 mm->user_ns = get_user_ns(user_ns); in mm_init()
1068 return mm_init(mm, current, current_user_ns()); in mm_alloc()
1073 VM_BUG_ON(atomic_read(&mm->mm_users)); in __mmput()
1082 if (!list_empty(&mm->mmlist)) { in __mmput()
1084 list_del(&mm->mmlist); in __mmput()
1087 if (mm->binfmt) in __mmput()
1088 module_put(mm->binfmt->module); in __mmput()
1099 if (atomic_dec_and_test(&mm->mm_users)) in mmput()
1115 if (atomic_dec_and_test(&mm->mm_users)) { in mmput_async()
1116 INIT_WORK(&mm->async_put_work, mmput_async_fn); in mmput_async()
1117 schedule_work(&mm->async_put_work); in mmput_async()
1123 * set_mm_exe_file - change a reference to the mm's executable file
1130 * mm->exe_file, but does so without using set_mm_exe_file() in order
1140 * this mm -- see comment above for justification. in set_mm_exe_file()
1142 old_exe_file = rcu_dereference_raw(mm->exe_file); in set_mm_exe_file()
1146 rcu_assign_pointer(mm->exe_file, new_exe_file); in set_mm_exe_file()
1152 * get_mm_exe_file - acquire a reference to the mm's executable file
1162 exe_file = rcu_dereference(mm->exe_file); in get_mm_exe_file()
1171 * get_task_exe_file - acquire a reference to the task's executable file
1183 mm = task->mm; in get_task_exe_file()
1185 if (!(task->flags & PF_KTHREAD)) in get_task_exe_file()
1194 * get_task_mm - acquire a reference to the task's mm
1207 mm = task->mm; in get_task_mm()
1209 if (task->flags & PF_KTHREAD) in get_task_mm()
1224 err = mutex_lock_killable(&task->signal->exec_update_mutex); in mm_access()
1229 if (mm && mm != current->mm && in mm_access()
1232 mm = ERR_PTR(-EACCES); in mm_access()
1234 mutex_unlock(&task->signal->exec_update_mutex); in mm_access()
1244 vfork = tsk->vfork_done; in complete_vfork_done()
1246 tsk->vfork_done = NULL; in complete_vfork_done()
1265 child->vfork_done = NULL; in wait_for_vfork_done()
1278 * from the current process.
1298 if (tsk->clear_child_tid) { in mm_release()
1299 if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && in mm_release()
1300 atomic_read(&mm->mm_users) > 1) { in mm_release()
1302 * We don't check the error code - if userspace has in mm_release()
1305 put_user(0, tsk->clear_child_tid); in mm_release()
1306 do_futex(tsk->clear_child_tid, FUTEX_WAKE, in mm_release()
1309 tsk->clear_child_tid = NULL; in mm_release()
1316 if (tsk->vfork_done) in mm_release()
1333 * dup_mm() - duplicates an existing mm structure
1354 if (!mm_init(mm, tsk, mm->user_ns)) in dup_mm()
1361 mm->hiwater_rss = get_mm_rss(mm); in dup_mm()
1362 mm->hiwater_vm = mm->total_vm; in dup_mm()
1364 if (mm->binfmt && !try_module_get(mm->binfmt->module)) in dup_mm()
1371 mm->binfmt = NULL; in dup_mm()
1384 tsk->min_flt = tsk->maj_flt = 0; in copy_mm()
1385 tsk->nvcsw = tsk->nivcsw = 0; in copy_mm()
1387 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; in copy_mm()
1388 tsk->last_switch_time = 0; in copy_mm()
1391 tsk->mm = NULL; in copy_mm()
1392 tsk->active_mm = NULL; in copy_mm()
1399 oldmm = current->mm; in copy_mm()
1412 retval = -ENOMEM; in copy_mm()
1413 mm = dup_mm(tsk, current->mm); in copy_mm()
1418 tsk->mm = mm; in copy_mm()
1419 tsk->active_mm = mm; in copy_mm()
1428 struct fs_struct *fs = current->fs; in copy_fs()
1430 /* tsk->fs is already what we want */ in copy_fs()
1431 spin_lock(&fs->lock); in copy_fs()
1432 if (fs->in_exec) { in copy_fs()
1433 spin_unlock(&fs->lock); in copy_fs()
1434 return -EAGAIN; in copy_fs()
1436 fs->users++; in copy_fs()
1437 spin_unlock(&fs->lock); in copy_fs()
1440 tsk->fs = copy_fs_struct(fs); in copy_fs()
1441 if (!tsk->fs) in copy_fs()
1442 return -ENOMEM; in copy_fs()
1454 oldf = current->files; in copy_files()
1459 atomic_inc(&oldf->count); in copy_files()
1467 tsk->files = newf; in copy_files()
1476 struct io_context *ioc = current->io_context; in copy_io()
1486 tsk->io_context = ioc; in copy_io()
1487 } else if (ioprio_valid(ioc->ioprio)) { in copy_io()
1490 return -ENOMEM; in copy_io()
1492 new_ioc->ioprio = ioc->ioprio; in copy_io()
1504 refcount_inc(&current->sighand->count); in copy_sighand()
1508 RCU_INIT_POINTER(tsk->sighand, sig); in copy_sighand()
1510 return -ENOMEM; in copy_sighand()
1512 refcount_set(&sig->count, 1); in copy_sighand()
1513 spin_lock_irq(&current->sighand->siglock); in copy_sighand()
1514 memcpy(sig->action, current->sighand->action, sizeof(sig->action)); in copy_sighand()
1515 spin_unlock_irq(&current->sighand->siglock); in copy_sighand()
1526 if (refcount_dec_and_test(&sighand->count)) { in __cleanup_sighand()
1541 struct posix_cputimers *pct = &sig->posix_cputimers; in posix_cpu_timers_init_group()
1544 cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); in posix_cpu_timers_init_group()
1556 tsk->signal = sig; in copy_signal()
1558 return -ENOMEM; in copy_signal()
1560 sig->nr_threads = 1; in copy_signal()
1561 atomic_set(&sig->live, 1); in copy_signal()
1562 refcount_set(&sig->sigcnt, 1); in copy_signal()
1565 sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); in copy_signal()
1566 tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); in copy_signal()
1568 init_waitqueue_head(&sig->wait_chldexit); in copy_signal()
1569 sig->curr_target = tsk; in copy_signal()
1570 init_sigpending(&sig->shared_pending); in copy_signal()
1571 INIT_HLIST_HEAD(&sig->multiprocess); in copy_signal()
1572 seqlock_init(&sig->stats_lock); in copy_signal()
1573 prev_cputime_init(&sig->prev_cputime); in copy_signal()
1576 INIT_LIST_HEAD(&sig->posix_timers); in copy_signal()
1577 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); in copy_signal()
1578 sig->real_timer.function = it_real_fn; in copy_signal()
1581 task_lock(current->group_leader); in copy_signal()
1582 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); in copy_signal()
1583 task_unlock(current->group_leader); in copy_signal()
1590 sig->oom_score_adj = current->signal->oom_score_adj; in copy_signal()
1591 sig->oom_score_adj_min = current->signal->oom_score_adj_min; in copy_signal()
1593 mutex_init(&sig->cred_guard_mutex); in copy_signal()
1594 mutex_init(&sig->exec_update_mutex); in copy_signal()
1603 * Must be called with sighand->lock held, which is common to in copy_seccomp()
1608 assert_spin_locked(&current->sighand->siglock); in copy_seccomp()
1610 /* Ref-count the new filter user, and assign it. */ in copy_seccomp()
1611 get_seccomp_filter(current); in copy_seccomp()
1612 p->seccomp = current->seccomp; in copy_seccomp()
1619 if (task_no_new_privs(current)) in copy_seccomp()
1627 if (p->seccomp.mode != SECCOMP_MODE_DISABLED) in copy_seccomp()
1634 current->clear_child_tid = tidptr; in SYSCALL_DEFINE1()
1636 return task_pid_vnr(current); in SYSCALL_DEFINE1()
1641 raw_spin_lock_init(&p->pi_lock); in rt_mutex_init_task()
1643 p->pi_waiters = RB_ROOT_CACHED; in rt_mutex_init_task()
1644 p->pi_top_task = NULL; in rt_mutex_init_task()
1645 p->pi_blocked_on = NULL; in rt_mutex_init_task()
1654 INIT_HLIST_NODE(&task->pid_links[type]); in init_task_pid_links()
1662 task->thread_pid = pid; in init_task_pid()
1664 task->signal->pids[type] = pid; in init_task_pid()
1670 p->rcu_read_lock_nesting = 0; in rcu_copy_process()
1671 p->rcu_read_unlock_special.s = 0; in rcu_copy_process()
1672 p->rcu_blocked_node = NULL; in rcu_copy_process()
1673 INIT_LIST_HEAD(&p->rcu_node_entry); in rcu_copy_process()
1676 p->rcu_tasks_holdout = false; in rcu_copy_process()
1677 INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); in rcu_copy_process()
1678 p->rcu_tasks_idle_cpu = -1; in rcu_copy_process()
1681 p->trc_reader_nesting = 0; in rcu_copy_process()
1682 p->trc_reader_special.s = 0; in rcu_copy_process()
1683 INIT_LIST_HEAD(&p->trc_holdout_list); in rcu_copy_process()
1689 if (file->f_op == &pidfd_fops) in pidfd_pid()
1690 return file->private_data; in pidfd_pid()
1692 return ERR_PTR(-EBADF); in pidfd_pid()
1697 struct pid *pid = file->private_data; in pidfd_release()
1699 file->private_data = NULL; in pidfd_release()
1706 * pidfd_show_fdinfo - print information about a pidfd
1721 * starting from the current pid namespace of the instance, i.e. the
1732 * - create two new pid namespaces ns1 and ns2 in the initial pid
1735 * - create a process with a pidfd in ns1
1736 * - send pidfd from ns1 to ns2
1737 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
1742 struct pid *pid = f->private_data; in pidfd_show_fdinfo()
1744 pid_t nr = -1; in pidfd_show_fdinfo()
1747 ns = proc_pid_ns(file_inode(m->file)->i_sb); in pidfd_show_fdinfo()
1758 /* If nr is non-zero it means that 'pid' is valid and that in pidfd_show_fdinfo()
1763 for (i = ns->level + 1; i <= pid->level; i++) in pidfd_show_fdinfo()
1764 seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); in pidfd_show_fdinfo()
1776 struct pid *pid = file->private_data; in pidfd_poll()
1779 poll_wait(file, &pid->wait_pidfd, pts); in pidfd_poll()
1810 call_rcu(&tsk->rcu, __delayed_free_task); in delayed_free_task()
1818 if (!tsk->mm) in copy_oom_score_adj()
1827 set_bit(MMF_MULTIPROCESS, &tsk->mm->flags); in copy_oom_score_adj()
1829 tsk->signal->oom_score_adj = current->signal->oom_score_adj; in copy_oom_score_adj()
1830 tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min; in copy_oom_score_adj()
1840 * flags). The actual kick-off is left to the caller.
1848 int pidfd = -1, retval; in copy_process()
1852 u64 clone_flags = args->flags; in copy_process()
1853 struct nsproxy *nsp = current->nsproxy; in copy_process()
1860 return ERR_PTR(-EINVAL); in copy_process()
1863 return ERR_PTR(-EINVAL); in copy_process()
1870 return ERR_PTR(-EINVAL); in copy_process()
1878 return ERR_PTR(-EINVAL); in copy_process()
1883 * multi-rooted process trees, prevent global and container-inits in copy_process()
1887 current->signal->flags & SIGNAL_UNKILLABLE) in copy_process()
1888 return ERR_PTR(-EINVAL); in copy_process()
1896 (task_active_pid_ns(current) != nsp->pid_ns_for_children)) in copy_process()
1897 return ERR_PTR(-EINVAL); in copy_process()
1905 if (nsp->time_ns != nsp->time_ns_for_children) in copy_process()
1906 return ERR_PTR(-EINVAL); in copy_process()
1911 * - CLONE_DETACHED is blocked so that we can potentially in copy_process()
1913 * - CLONE_THREAD is blocked until someone really needs it. in copy_process()
1916 return ERR_PTR(-EINVAL); in copy_process()
1928 spin_lock_irq(&current->sighand->siglock); in copy_process()
1930 hlist_add_head(&delayed.node, &current->signal->multiprocess); in copy_process()
1932 spin_unlock_irq(&current->sighand->siglock); in copy_process()
1933 retval = -ERESTARTNOINTR; in copy_process()
1934 if (signal_pending(current)) in copy_process()
1937 retval = -ENOMEM; in copy_process()
1938 p = dup_task_struct(current, node); in copy_process()
1945 * p->set_child_tid which is (ab)used as a kthread's data pointer for in copy_process()
1948 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; in copy_process()
1952 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; in copy_process()
1960 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); in copy_process()
1962 retval = -EAGAIN; in copy_process()
1963 if (atomic_read(&p->real_cred->user->processes) >= in copy_process()
1965 if (p->real_cred->user != INIT_USER && in copy_process()
1969 current->flags &= ~PF_NPROC_EXCEEDED; in copy_process()
1980 retval = -EAGAIN; in copy_process()
1985 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); in copy_process()
1986 p->flags |= PF_FORKNOEXEC; in copy_process()
1987 INIT_LIST_HEAD(&p->children); in copy_process()
1988 INIT_LIST_HEAD(&p->sibling); in copy_process()
1990 p->vfork_done = NULL; in copy_process()
1991 spin_lock_init(&p->alloc_lock); in copy_process()
1993 init_sigpending(&p->pending); in copy_process()
1995 p->utime = p->stime = p->gtime = 0; in copy_process()
1997 p->utimescaled = p->stimescaled = 0; in copy_process()
1999 prev_cputime_init(&p->prev_cputime); in copy_process()
2002 seqcount_init(&p->vtime.seqcount); in copy_process()
2003 p->vtime.starttime = 0; in copy_process()
2004 p->vtime.state = VTIME_INACTIVE; in copy_process()
2008 p->io_uring = NULL; in copy_process()
2012 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); in copy_process()
2015 p->default_timer_slack_ns = current->timer_slack_ns; in copy_process()
2018 p->psi_flags = 0; in copy_process()
2021 task_io_accounting_init(&p->ioac); in copy_process()
2024 posix_cputimers_init(&p->posix_cputimers); in copy_process()
2026 p->io_context = NULL; in copy_process()
2030 p->mempolicy = mpol_dup(p->mempolicy); in copy_process()
2031 if (IS_ERR(p->mempolicy)) { in copy_process()
2032 retval = PTR_ERR(p->mempolicy); in copy_process()
2033 p->mempolicy = NULL; in copy_process()
2038 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; in copy_process()
2039 p->cpuset_slab_spread_rotor = NUMA_NO_NODE; in copy_process()
2040 seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock); in copy_process()
2043 memset(&p->irqtrace, 0, sizeof(p->irqtrace)); in copy_process()
2044 p->irqtrace.hardirq_disable_ip = _THIS_IP_; in copy_process()
2045 p->irqtrace.softirq_enable_ip = _THIS_IP_; in copy_process()
2046 p->softirqs_enabled = 1; in copy_process()
2047 p->softirq_context = 0; in copy_process()
2050 p->pagefault_disabled = 0; in copy_process()
2057 p->blocked_on = NULL; /* not blocked yet */ in copy_process()
2060 p->sequential_io = 0; in copy_process()
2061 p->sequential_io_avg = 0; in copy_process()
2104 retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls); in copy_process()
2111 pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, in copy_process()
2112 args->set_tid_size); in copy_process()
2140 retval = put_user(pidfd, args->pidfd); in copy_process()
2146 p->plug = NULL; in copy_process()
2168 p->pid = pid_nr(pid); in copy_process()
2170 p->group_leader = current->group_leader; in copy_process()
2171 p->tgid = current->tgid; in copy_process()
2173 p->group_leader = p; in copy_process()
2174 p->tgid = p->pid; in copy_process()
2177 p->nr_dirtied = 0; in copy_process()
2178 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); in copy_process()
2179 p->dirty_paused_when = 0; in copy_process()
2181 p->pdeath_signal = 0; in copy_process()
2182 INIT_LIST_HEAD(&p->thread_group); in copy_process()
2183 p->task_works = NULL; in copy_process()
2196 * From this point on we must avoid any synchronous user-space in copy_process()
2197 * communication until we take the tasklist-lock. In particular, we do in copy_process()
2198 * not want user-space to be able to predict the process start-time by in copy_process()
2203 p->start_time = ktime_get_ns(); in copy_process()
2204 p->start_boottime = ktime_get_boottime_ns(); in copy_process()
2212 /* CLONE_PARENT re-uses the old parent */ in copy_process()
2214 p->real_parent = current->real_parent; in copy_process()
2215 p->parent_exec_id = current->parent_exec_id; in copy_process()
2217 p->exit_signal = -1; in copy_process()
2219 p->exit_signal = current->group_leader->exit_signal; in copy_process()
2221 p->real_parent = current; in copy_process()
2222 p->parent_exec_id = current->self_exec_id; in copy_process()
2223 p->exit_signal = args->exit_signal; in copy_process()
2228 spin_lock(&current->sighand->siglock); in copy_process()
2239 if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { in copy_process()
2240 retval = -ENOMEM; in copy_process()
2245 if (fatal_signal_pending(current)) { in copy_process()
2246 retval = -EINTR; in copy_process()
2255 if (likely(p->pid)) { in copy_process()
2261 init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); in copy_process()
2262 init_task_pid(p, PIDTYPE_SID, task_session(current)); in copy_process()
2265 ns_of_pid(pid)->child_reaper = p; in copy_process()
2266 p->signal->flags |= SIGNAL_UNKILLABLE; in copy_process()
2268 p->signal->shared_pending.signal = delayed.signal; in copy_process()
2269 p->signal->tty = tty_kref_get(current->signal->tty); in copy_process()
2275 p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || in copy_process()
2276 p->real_parent->signal->is_child_subreaper; in copy_process()
2277 list_add_tail(&p->sibling, &p->real_parent->children); in copy_process()
2278 list_add_tail_rcu(&p->tasks, &init_task.tasks); in copy_process()
2284 current->signal->nr_threads++; in copy_process()
2285 atomic_inc(&current->signal->live); in copy_process()
2286 refcount_inc(&current->signal->sigcnt); in copy_process()
2288 list_add_tail_rcu(&p->thread_group, in copy_process()
2289 &p->group_leader->thread_group); in copy_process()
2290 list_add_tail_rcu(&p->thread_node, in copy_process()
2291 &p->signal->thread_head); in copy_process()
2298 spin_unlock(&current->sighand->siglock); in copy_process()
2315 spin_unlock(&current->sighand->siglock); in copy_process()
2329 if (p->io_context) in copy_process()
2334 if (p->mm) { in copy_process()
2335 mm_clear_owner(p->mm, p); in copy_process()
2336 mmput(p->mm); in copy_process()
2340 free_signal_struct(p->signal); in copy_process()
2342 __cleanup_sighand(p->sighand); in copy_process()
2358 mpol_put(p->mempolicy); in copy_process()
2363 atomic_dec(&p->cred->user->processes); in copy_process()
2366 p->state = TASK_DEAD; in copy_process()
2370 spin_lock_irq(&current->sighand->siglock); in copy_process()
2372 spin_unlock_irq(&current->sighand->siglock); in copy_process()
2381 INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ in init_idle_pids()
2408 * Ok, this is the main fork-routine.
2410 * It copies the process, and if successful kick-starts
2413 * args->exit_signal is expected to be checked for sanity by the caller.
2417 u64 clone_flags = args->flags; in kernel_clone()
2433 if ((args->flags & CLONE_PIDFD) && in kernel_clone()
2434 (args->flags & CLONE_PARENT_SETTID) && in kernel_clone()
2435 (args->pidfd == args->parent_tid)) in kernel_clone()
2436 return -EINVAL; in kernel_clone()
2447 else if (args->exit_signal != SIGCHLD) in kernel_clone()
2452 if (likely(!ptrace_event_enabled(current, trace))) in kernel_clone()
2463 * Do this prior waking up the new thread - the thread pointer in kernel_clone()
2466 trace_sched_process_fork(current, p); in kernel_clone()
2472 put_user(nr, args->parent_tid); in kernel_clone()
2475 p->vfork_done = &vfork; in kernel_clone()
2522 return -EINVAL; in SYSCALL_DEFINE0()
2585 pid_t *kset_tid = kargs->set_tid; in copy_clone_args_from_user()
2596 return -E2BIG; in copy_clone_args_from_user()
2598 return -EINVAL; in copy_clone_args_from_user()
2605 return -EINVAL; in copy_clone_args_from_user()
2608 return -EINVAL; in copy_clone_args_from_user()
2611 return -EINVAL; in copy_clone_args_from_user()
2619 return -EINVAL; in copy_clone_args_from_user()
2623 return -EINVAL; in copy_clone_args_from_user()
2640 (kargs->set_tid_size * sizeof(pid_t)))) in copy_clone_args_from_user()
2641 return -EFAULT; in copy_clone_args_from_user()
2643 kargs->set_tid = kset_tid; in copy_clone_args_from_user()
2649 * clone3_stack_valid - check and prepare stack
2658 if (kargs->stack == 0) { in clone3_stack_valid()
2659 if (kargs->stack_size > 0) in clone3_stack_valid()
2662 if (kargs->stack_size == 0) in clone3_stack_valid()
2665 if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) in clone3_stack_valid()
2669 kargs->stack += kargs->stack_size; in clone3_stack_valid()
2679 if (kargs->flags & in clone3_args_valid()
2684 * - make the CLONE_DETACHED bit reuseable for clone3 in clone3_args_valid()
2685 * - make the CSIGNAL bits reuseable for clone3 in clone3_args_valid()
2687 if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) in clone3_args_valid()
2690 if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == in clone3_args_valid()
2694 if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && in clone3_args_valid()
2695 kargs->exit_signal) in clone3_args_valid()
2705 * clone3 - create a new process with specific properties
2729 return -EINVAL; in SYSCALL_DEFINE2()
2741 leader = top = top->group_leader; in walk_process_tree()
2744 list_for_each_entry(child, &parent->children, sibling) { in walk_process_tree()
2759 parent = child->real_parent; in walk_process_tree()
2760 leader = parent->group_leader; in walk_process_tree()
2775 spin_lock_init(&sighand->siglock); in sighand_ctor()
2776 init_waitqueue_head(&sighand->signalfd_wqh); in sighand_ctor()
2828 return -EINVAL; in check_unshare_flags()
2836 if (!thread_group_empty(current)) in check_unshare_flags()
2837 return -EINVAL; in check_unshare_flags()
2840 if (refcount_read(&current->sighand->count) > 1) in check_unshare_flags()
2841 return -EINVAL; in check_unshare_flags()
2845 return -EINVAL; in check_unshare_flags()
2856 struct fs_struct *fs = current->fs; in unshare_fs()
2862 if (fs->users == 1) in unshare_fs()
2867 return -ENOMEM; in unshare_fs()
2878 struct files_struct *fd = current->files; in unshare_fd()
2882 (fd && atomic_read(&fd->count) > 1)) { in unshare_fd()
2896 * constructed. Here we are modifying the current, active,
2959 exit_sem(current); in ksys_unshare()
2963 exit_shm(current); in ksys_unshare()
2964 shm_init_task(current); in ksys_unshare()
2968 switch_task_namespaces(current, new_nsproxy); in ksys_unshare()
2970 task_lock(current); in ksys_unshare()
2973 fs = current->fs; in ksys_unshare()
2974 spin_lock(&fs->lock); in ksys_unshare()
2975 current->fs = new_fs; in ksys_unshare()
2976 if (--fs->users) in ksys_unshare()
2980 spin_unlock(&fs->lock); in ksys_unshare()
2984 fd = current->files; in ksys_unshare()
2985 current->files = new_fd; in ksys_unshare()
2989 task_unlock(current); in ksys_unshare()
2998 perf_event_namespaces(current); in ksys_unshare()
3021 * Helper to unshare the files of the current task.
3028 struct task_struct *task = current; in unshare_files()
3037 *displaced = task->files; in unshare_files()
3039 task->files = copy; in unshare_files()