Lines Matching +full:per +full:- +full:hart

1 // SPDX-License-Identifier: GPL-2.0-or-later
16 * PI-futex support started by Ingo Molnar and Thomas Gleixner
23 * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
29 * Kirkwood for proof-of-concept implementation.
40 #include <linux/fault-inject.h>
101 * smp_mb(); (A) <-- paired with -.
110 * `--------> smp_mb(); (B)
117 * waiters--; (b) unlock(hash_bucket(futex));
141 * acquiring the lock. It then decrements them again after releasing it -
161 * NOMMU does not have per process address space. Let the compiler optimize
174 * list of 'owned' pi_state instances - these have to be
191 * struct futex_q - The hashed futex queue entry, one per waiting task
192 * @list: priority-sorted list of tasks waiting on this futex
207 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
247 * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE
248 * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT
251 * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS
252 * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED
253 * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed)
254 * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED
255 * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed)
352 debugfs_create_bool("ignore-private", mode, dir, in fail_futex_debugfs()
378 atomic_inc(&hb->waiters); in hb_waiters_inc()
393 atomic_dec(&hb->waiters); in hb_waiters_dec()
404 return atomic_read(&hb->waiters); in hb_waiters_pending()
411 * hash_futex - Return the hash bucket in the global hash
420 key->both.offset); in hash_futex()
422 return &futex_queues[hash & (futex_hashsize - 1)]; in hash_futex()
427 * match_futex - Check whether two futex keys are equal
436 && key1->both.word == key2->both.word in match_futex()
437 && key1->both.ptr == key2->both.ptr in match_futex()
438 && key1->both.offset == key2->both.offset); in match_futex()
447 * futex_setup_timer - set up the sleeping hrtimer.
470 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); in futex_setup_timer()
478 * This relies on u64 not wrapping in the life-time of the machine; which with
489 * It is important that match_futex() will never have a false-positive, esp.
490 * for PI futexes that can mess up the state. The above argues that false-negatives
499 old = atomic64_read(&inode->i_sequence); in get_inode_sequence_number()
508 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); in get_inode_sequence_number()
516 * get_futex_key() - Get parameters which are the keys for a futex
529 * ( inode->i_sequence, page->index, offset_within_page )
535 * ( current->mm, address, 0 )
546 struct mm_struct *mm = current->mm; in get_futex_key()
554 key->both.offset = address % PAGE_SIZE; in get_futex_key()
556 return -EINVAL; in get_futex_key()
557 address -= key->both.offset; in get_futex_key()
560 return -EFAULT; in get_futex_key()
563 return -EFAULT; in get_futex_key()
573 key->private.mm = mm; in get_futex_key()
574 key->private.address = address; in get_futex_key()
581 return -EFAULT; in get_futex_key()
586 * and get read-only access. in get_futex_key()
588 if (err == -EFAULT && rw == FUTEX_READ) { in get_futex_key()
601 * file-backed region case and guards against movement to swap cache. in get_futex_key()
605 * From this point on, mapping will be re-verified if necessary and in get_futex_key()
611 * based on the address. For filesystem-backed pages, the tail is in get_futex_key()
617 mapping = READ_ONCE(page->mapping); in get_futex_key()
620 * If page->mapping is NULL, then it cannot be a PageAnon in get_futex_key()
632 * an unlikely race, but we do need to retry for page->mapping. in get_futex_key()
643 shmem_swizzled = PageSwapCache(page) || page->mapping; in get_futex_key()
650 return -EFAULT; in get_futex_key()
660 * it's a read-only handle, it's expected that futexes attach to in get_futex_key()
669 err = -EFAULT; in get_futex_key()
673 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ in get_futex_key()
674 key->private.mm = mm; in get_futex_key()
675 key->private.address = address; in get_futex_key()
682 * the page->mapping must be traversed. Ordinarily this should in get_futex_key()
689 * mapping->host can be safely accessed as being a valid inode. in get_futex_key()
693 if (READ_ONCE(page->mapping) != mapping) { in get_futex_key()
700 inode = READ_ONCE(mapping->host); in get_futex_key()
708 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ in get_futex_key()
709 key->shared.i_seq = get_inode_sequence_number(inode); in get_futex_key()
710 key->shared.pgoff = page_to_pgoff(tail); in get_futex_key()
720 * fault_in_user_writeable() - Fault in user address and verify RW access
726 * We have no generic implementation of a non-destructive write to the
733 struct mm_struct *mm = current->mm; in fault_in_user_writeable()
745 * futex_top_waiter() - Return the highest priority waiter on a futex
756 plist_for_each_entry(this, &hb->chain, list) { in futex_top_waiter()
757 if (match_futex(&this->key, key)) in futex_top_waiter()
783 return ret ? -EFAULT : 0; in get_futex_value_locked()
794 if (likely(current->pi_state_cache)) in refill_pi_state_cache()
800 return -ENOMEM; in refill_pi_state_cache()
802 INIT_LIST_HEAD(&pi_state->list); in refill_pi_state_cache()
804 pi_state->owner = NULL; in refill_pi_state_cache()
805 refcount_set(&pi_state->refcount, 1); in refill_pi_state_cache()
806 pi_state->key = FUTEX_KEY_INIT; in refill_pi_state_cache()
808 current->pi_state_cache = pi_state; in refill_pi_state_cache()
815 struct futex_pi_state *pi_state = current->pi_state_cache; in alloc_pi_state()
818 current->pi_state_cache = NULL; in alloc_pi_state()
826 struct task_struct *old_owner = pi_state->owner; in pi_state_update_owner()
828 lockdep_assert_held(&pi_state->pi_mutex.wait_lock); in pi_state_update_owner()
831 raw_spin_lock(&old_owner->pi_lock); in pi_state_update_owner()
832 WARN_ON(list_empty(&pi_state->list)); in pi_state_update_owner()
833 list_del_init(&pi_state->list); in pi_state_update_owner()
834 raw_spin_unlock(&old_owner->pi_lock); in pi_state_update_owner()
838 raw_spin_lock(&new_owner->pi_lock); in pi_state_update_owner()
839 WARN_ON(!list_empty(&pi_state->list)); in pi_state_update_owner()
840 list_add(&pi_state->list, &new_owner->pi_state_list); in pi_state_update_owner()
841 pi_state->owner = new_owner; in pi_state_update_owner()
842 raw_spin_unlock(&new_owner->pi_lock); in pi_state_update_owner()
848 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); in get_pi_state()
860 if (!refcount_dec_and_test(&pi_state->refcount)) in put_pi_state()
864 * If pi_state->owner is NULL, the owner is most probably dying in put_pi_state()
867 if (pi_state->owner) { in put_pi_state()
870 raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); in put_pi_state()
872 rt_mutex_proxy_unlock(&pi_state->pi_mutex); in put_pi_state()
873 raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); in put_pi_state()
876 if (current->pi_state_cache) { in put_pi_state()
880 * pi_state->list is already empty. in put_pi_state()
881 * clear pi_state->owner. in put_pi_state()
882 * refcount is at 0 - put it back to 1. in put_pi_state()
884 pi_state->owner = NULL; in put_pi_state()
885 refcount_set(&pi_state->refcount, 1); in put_pi_state()
886 current->pi_state_cache = pi_state; in put_pi_state()
894 * Kernel cleans up PI-state, but userspace is likely hosed.
895 * (Robust-futex cleanup is separate and might save the day for userspace.)
899 struct list_head *next, *head = &curr->pi_state_list; in exit_pi_state_list()
911 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
913 next = head->next; in exit_pi_state_list()
915 key = pi_state->key; in exit_pi_state_list()
928 if (!refcount_inc_not_zero(&pi_state->refcount)) { in exit_pi_state_list()
929 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
931 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
934 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
936 spin_lock(&hb->lock); in exit_pi_state_list()
937 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
938 raw_spin_lock(&curr->pi_lock); in exit_pi_state_list()
940 * We dropped the pi-lock, so re-check whether this in exit_pi_state_list()
941 * task still owns the PI-state: in exit_pi_state_list()
943 if (head->next != next) { in exit_pi_state_list()
944 /* retain curr->pi_lock for the loop invariant */ in exit_pi_state_list()
945 raw_spin_unlock(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
946 spin_unlock(&hb->lock); in exit_pi_state_list()
951 WARN_ON(pi_state->owner != curr); in exit_pi_state_list()
952 WARN_ON(list_empty(&pi_state->list)); in exit_pi_state_list()
953 list_del_init(&pi_state->list); in exit_pi_state_list()
954 pi_state->owner = NULL; in exit_pi_state_list()
956 raw_spin_unlock(&curr->pi_lock); in exit_pi_state_list()
957 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
958 spin_unlock(&hb->lock); in exit_pi_state_list()
960 rt_mutex_futex_unlock(&pi_state->pi_mutex); in exit_pi_state_list()
963 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
965 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
974 * Waiter | pi_state | pi->owner | uTID | uODIED | ?
976 * [1] NULL | --- | --- | 0 | 0/1 | Valid
977 * [2] NULL | --- | --- | >0 | 0/1 | Valid
979 * [3] Found | NULL | -- | Any | 0/1 | Invalid
1009 * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
1024 * hb->lock:
1026 * hb -> futex_q, relation
1027 * futex_q -> pi_state, relation
1032 * pi_mutex->wait_lock:
1038 * p->pi_lock:
1040 * p->pi_state_list -> pi_state->list, relation
1041 * pi_mutex->owner -> pi_state->owner, relation
1043 * pi_state->refcount:
1050 * hb->lock
1051 * pi_mutex->wait_lock
1052 * p->pi_lock
1070 * Userspace might have messed up non-PI and PI futexes [3] in attach_to_pi_state()
1073 return -EINVAL; in attach_to_pi_state()
1076 * We get here with hb->lock held, and having found a in attach_to_pi_state()
1078 * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), in attach_to_pi_state()
1087 WARN_ON(!refcount_read(&pi_state->refcount)); in attach_to_pi_state()
1093 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1114 * pi_state->rt_mutex will fixup owner. in attach_to_pi_state()
1116 if (!pi_state->owner) { in attach_to_pi_state()
1144 if (!pi_state->owner) in attach_to_pi_state()
1153 if (pid != task_pid_vnr(pi_state->owner)) in attach_to_pi_state()
1158 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1163 ret = -EINVAL; in attach_to_pi_state()
1167 ret = -EAGAIN; in attach_to_pi_state()
1171 ret = -EFAULT; in attach_to_pi_state()
1175 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1180 * wait_for_owner_exiting - Block until the owner has exited
1188 if (ret != -EBUSY) { in wait_for_owner_exiting()
1193 if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) in wait_for_owner_exiting()
1196 mutex_lock(&exiting->futex_exit_mutex); in wait_for_owner_exiting()
1199 * while the task was in exec()->exec_futex_release() then it can in wait_for_owner_exiting()
1205 mutex_unlock(&exiting->futex_exit_mutex); in wait_for_owner_exiting()
1219 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) in handle_exit_race()
1220 return -EBUSY; in handle_exit_race()
1231 * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID in handle_exit_race()
1236 * } if (!tsk->flags & PF_EXITING) { in handle_exit_race()
1238 * tsk->futex_state = } else { in handle_exit_race()
1239 * FUTEX_STATE_DEAD; if (tsk->futex_state != in handle_exit_race()
1241 * return -EAGAIN; in handle_exit_race()
1242 * return -ESRCH; <--- FAIL in handle_exit_race()
1252 return -EFAULT; in handle_exit_race()
1256 return -EAGAIN; in handle_exit_race()
1263 return -ESRCH; in handle_exit_race()
1272 * This creates pi_state, we have hb->lock held, this means nothing can in __attach_to_pi_owner()
1281 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); in __attach_to_pi_owner()
1284 pi_state->key = *key; in __attach_to_pi_owner()
1286 WARN_ON(!list_empty(&pi_state->list)); in __attach_to_pi_owner()
1287 list_add(&pi_state->list, &p->pi_state_list); in __attach_to_pi_owner()
1289 * Assignment without holding pi_state->pi_mutex.wait_lock is safe in __attach_to_pi_owner()
1292 pi_state->owner = p; in __attach_to_pi_owner()
1308 * We are the first waiter - try to look up the real owner and attach in attach_to_pi_owner()
1315 return -EAGAIN; in attach_to_pi_owner()
1320 if (unlikely(p->flags & PF_KTHREAD)) { in attach_to_pi_owner()
1322 return -EPERM; in attach_to_pi_owner()
1328 * in futex_exit_release(), we do this protected by p->pi_lock: in attach_to_pi_owner()
1330 raw_spin_lock_irq(&p->pi_lock); in attach_to_pi_owner()
1331 if (unlikely(p->futex_state != FUTEX_STATE_OK)) { in attach_to_pi_owner()
1339 raw_spin_unlock_irq(&p->pi_lock); in attach_to_pi_owner()
1349 if (ret == -EBUSY) in attach_to_pi_owner()
1357 raw_spin_unlock_irq(&p->pi_lock); in attach_to_pi_owner()
1370 return -EFAULT; in lock_pi_update_atomic()
1377 return curval != uval ? -EAGAIN : 0; in lock_pi_update_atomic()
1381 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
1394 * - 0 - ready to wait;
1395 * - 1 - acquired the lock;
1396 * - <0 - error
1398 * The hb->lock must be held by the caller.
1400 * @exiting is only set when the return value is -EBUSY. If so, this holds
1420 return -EFAULT; in futex_lock_pi_atomic()
1423 return -EFAULT; in futex_lock_pi_atomic()
1429 return -EDEADLK; in futex_lock_pi_atomic()
1432 return -EDEADLK; in futex_lock_pi_atomic()
1440 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); in futex_lock_pi_atomic()
1476 raw_spin_lock_irq(&task->pi_lock); in futex_lock_pi_atomic()
1478 raw_spin_unlock_irq(&task->pi_lock); in futex_lock_pi_atomic()
1486 * the kernel and blocked on hb->lock. in futex_lock_pi_atomic()
1501 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
1504 * The q->lock_ptr must not be NULL and must be held by the caller.
1510 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) in __unqueue_futex()
1512 lockdep_assert_held(q->lock_ptr); in __unqueue_futex()
1514 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); in __unqueue_futex()
1515 plist_del(&q->list, &hb->chain); in __unqueue_futex()
1527 struct task_struct *p = q->task; in mark_wake_futex()
1529 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) in mark_wake_futex()
1535 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL in mark_wake_futex()
1541 smp_store_release(&q->lock_ptr, NULL); in mark_wake_futex()
1545 * the hb->lock. in mark_wake_futex()
1562 top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); in wake_futex_pi()
1565 * As per the comment in futex_unlock_pi() this should not happen. in wake_futex_pi()
1572 ret = -EAGAIN; in wake_futex_pi()
1576 new_owner = top_waiter->task; in wake_futex_pi()
1586 ret = -EFAULT; in wake_futex_pi()
1594 * try the TID->0 transition) raced with a waiter setting the in wake_futex_pi()
1599 ret = -EAGAIN; in wake_futex_pi()
1601 ret = -EINVAL; in wake_futex_pi()
1611 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); in wake_futex_pi()
1615 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in wake_futex_pi()
1630 spin_lock(&hb1->lock); in double_lock_hb()
1632 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); in double_lock_hb()
1634 spin_lock(&hb2->lock); in double_lock_hb()
1635 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); in double_lock_hb()
1642 spin_unlock(&hb1->lock); in double_unlock_hb()
1644 spin_unlock(&hb2->lock); in double_unlock_hb()
1660 return -EINVAL; in futex_wake()
1672 spin_lock(&hb->lock); in futex_wake()
1674 plist_for_each_entry_safe(this, next, &hb->chain, list) { in futex_wake()
1675 if (match_futex (&this->key, &key)) { in futex_wake()
1676 if (this->pi_state || this->rt_waiter) { in futex_wake()
1677 ret = -EINVAL; in futex_wake()
1682 if (!(this->bitset & bitset)) in futex_wake()
1691 spin_unlock(&hb->lock); in futex_wake()
1706 char comm[sizeof(current->comm)]; in futex_atomic_op_inuser()
1708 * kill this print and return -EINVAL when userspace in futex_atomic_op_inuser()
1738 return -ENOSYS; in futex_atomic_op_inuser()
1774 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { in futex_wake_op()
1783 if (op_ret == -EFAULT) { in futex_wake_op()
1795 plist_for_each_entry_safe(this, next, &hb1->chain, list) { in futex_wake_op()
1796 if (match_futex (&this->key, &key1)) { in futex_wake_op()
1797 if (this->pi_state || this->rt_waiter) { in futex_wake_op()
1798 ret = -EINVAL; in futex_wake_op()
1809 plist_for_each_entry_safe(this, next, &hb2->chain, list) { in futex_wake_op()
1810 if (match_futex (&this->key, &key2)) { in futex_wake_op()
1811 if (this->pi_state || this->rt_waiter) { in futex_wake_op()
1812 ret = -EINVAL; in futex_wake_op()
1830 * requeue_futex() - Requeue a futex_q from one hb to another
1845 if (likely(&hb1->chain != &hb2->chain)) { in requeue_futex()
1846 plist_del(&q->list, &hb1->chain); in requeue_futex()
1849 plist_add(&q->list, &hb2->chain); in requeue_futex()
1850 q->lock_ptr = &hb2->lock; in requeue_futex()
1852 q->key = *key2; in requeue_futex()
1865 old = atomic_read_acquire(&q->requeue_state); in futex_requeue_pi_prepare()
1882 } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); in futex_requeue_pi_prepare()
1884 q->pi_state = pi_state; in futex_requeue_pi_prepare()
1892 old = atomic_read_acquire(&q->requeue_state); in futex_requeue_pi_complete()
1910 } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); in futex_requeue_pi_complete()
1915 rcuwait_wake_up(&q->requeue_wait); in futex_requeue_pi_complete()
1923 old = atomic_read_acquire(&q->requeue_state); in futex_requeue_pi_wakeup_sync()
1936 } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); in futex_requeue_pi_wakeup_sync()
1941 rcuwait_wait_event(&q->requeue_wait, in futex_requeue_pi_wakeup_sync()
1942 atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT, in futex_requeue_pi_wakeup_sync()
1945 (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT); in futex_requeue_pi_wakeup_sync()
1952 * will modify q->requeue_state after this point. in futex_requeue_pi_wakeup_sync()
1954 return atomic_read(&q->requeue_state); in futex_requeue_pi_wakeup_sync()
1958 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1974 * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
1983 * Must be called with both q->lock_ptr and hb->lock held.
1989 q->key = *key; in requeue_pi_wake_futex()
1993 WARN_ON(!q->rt_waiter); in requeue_pi_wake_futex()
1994 q->rt_waiter = NULL; in requeue_pi_wake_futex()
1996 q->lock_ptr = &hb->lock; in requeue_pi_wake_futex()
2000 wake_up_state(q->task, TASK_NORMAL); in requeue_pi_wake_futex()
2004 * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
2020 * @exiting is only set when the return value is -EBUSY. If so, this holds
2025 * - 0 - failed to acquire the lock atomically;
2026 * - >0 - acquired the lock, return value is vpid of the top_waiter
2027 * - <0 - error
2040 return -EFAULT; in futex_proxy_trylock_atomic()
2043 return -EFAULT; in futex_proxy_trylock_atomic()
2063 if (!top_waiter->rt_waiter || top_waiter->pi_state) in futex_proxy_trylock_atomic()
2064 return -EINVAL; in futex_proxy_trylock_atomic()
2067 if (!match_futex(top_waiter->requeue_pi_key, key2)) in futex_proxy_trylock_atomic()
2068 return -EINVAL; in futex_proxy_trylock_atomic()
2072 return -EAGAIN; in futex_proxy_trylock_atomic()
2080 * the new owner (@top_waiter->task) when @set_waiters is true. in futex_proxy_trylock_atomic()
2082 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, in futex_proxy_trylock_atomic()
2087 * attached to @top_waiter->task. That means state is fully in futex_proxy_trylock_atomic()
2110 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
2115 * @nr_requeue: number of waiters to requeue (0-INT_MAX)
2117 * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
2124 * - >=0 - on success, the number of tasks requeued or woken;
2125 * - <0 - on error
2139 return -EINVAL; in futex_requeue()
2142 * When PI not supported: return -ENOSYS if requeue_pi is true, in futex_requeue()
2148 return -ENOSYS; in futex_requeue()
2156 return -EINVAL; in futex_requeue()
2180 return -EINVAL; in futex_requeue()
2187 return -ENOMEM; in futex_requeue()
2204 return -EINVAL; in futex_requeue()
2232 ret = -EAGAIN; in futex_requeue()
2260 * - If the lock was acquired atomically (ret == 1), then in futex_requeue()
2273 * - If the trylock failed with an error (ret < 0) then in futex_requeue()
2278 * - If the trylock did not succeed (ret == 0) then the in futex_requeue()
2304 case -EFAULT: in futex_requeue()
2311 case -EBUSY: in futex_requeue()
2312 case -EAGAIN: in futex_requeue()
2315 * - EBUSY: Owner is exiting and we just wait for the in futex_requeue()
2317 * - EAGAIN: The user space value changed. in futex_requeue()
2334 plist_for_each_entry_safe(this, next, &hb1->chain, list) { in futex_requeue()
2335 if (task_count - nr_wake >= nr_requeue) in futex_requeue()
2338 if (!match_futex(&this->key, &key1)) in futex_requeue()
2348 if ((requeue_pi && !this->rt_waiter) || in futex_requeue()
2349 (!requeue_pi && this->rt_waiter) || in futex_requeue()
2350 this->pi_state) { in futex_requeue()
2351 ret = -EINVAL; in futex_requeue()
2365 if (!match_futex(this->requeue_pi_key, &key2)) { in futex_requeue()
2366 ret = -EINVAL; in futex_requeue()
2385 * next waiter. @this->pi_state is still NULL. in futex_requeue()
2391 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, in futex_requeue()
2392 this->rt_waiter, in futex_requeue()
2393 this->task); in futex_requeue()
2398 * on pi_state nor clear this->pi_state because the in futex_requeue()
2419 this->pi_state = NULL; in futex_requeue()
2443 /* The key must be already stored in q->key. */
2445 __acquires(&hb->lock) in queue_lock()
2449 hb = hash_futex(&q->key); in queue_lock()
2453 * a potential waker won't miss a to-be-slept task that is in queue_lock()
2461 q->lock_ptr = &hb->lock; in queue_lock()
2463 spin_lock(&hb->lock); in queue_lock()
2469 __releases(&hb->lock) in queue_unlock()
2471 spin_unlock(&hb->lock); in queue_unlock()
2481 * - either the real thread-priority for the real-time threads in __queue_me()
2483 * - or MAX_RT_PRIO for non-RT threads. in __queue_me()
2484 * Thus, all RT-threads are woken first in priority order, and in __queue_me()
2487 prio = min(current->normal_prio, MAX_RT_PRIO); in __queue_me()
2489 plist_node_init(&q->list, prio); in __queue_me()
2490 plist_add(&q->list, &hb->chain); in __queue_me()
2491 q->task = current; in __queue_me()
2495 * queue_me() - Enqueue the futex_q on the futex_hash_bucket
2499 * The hb->lock must be held by the caller, and is released here. A call to
2507 __releases(&hb->lock) in queue_me()
2510 spin_unlock(&hb->lock); in queue_me()
2514 * unqueue_me() - Remove the futex_q from its futex_hash_bucket
2517 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
2521 * - 1 - if the futex_q was still queued (and we removed unqueued it);
2522 * - 0 - if the futex_q was already removed by the waking thread
2532 * q->lock_ptr can change between this read and the following spin_lock. in unqueue_me()
2533 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and in unqueue_me()
2536 lock_ptr = READ_ONCE(q->lock_ptr); in unqueue_me()
2540 * q->lock_ptr can change between reading it and in unqueue_me()
2545 * q->lock_ptr must have changed (maybe several times) in unqueue_me()
2552 if (unlikely(lock_ptr != q->lock_ptr)) { in unqueue_me()
2558 BUG_ON(q->pi_state); in unqueue_me()
2575 BUG_ON(!q->pi_state); in unqueue_me_pi()
2576 put_pi_state(q->pi_state); in unqueue_me_pi()
2577 q->pi_state = NULL; in unqueue_me_pi()
2583 struct futex_pi_state *pi_state = q->pi_state; in __fixup_pi_state_owner()
2588 oldowner = pi_state->owner; in __fixup_pi_state_owner()
2593 * - we stole the lock and pi_state->owner needs updating to reflect in __fixup_pi_state_owner()
2598 * - someone stole our lock and we need to fix things to point to the in __fixup_pi_state_owner()
2623 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { in __fixup_pi_state_owner()
2632 newowner = rt_mutex_owner(&pi_state->pi_mutex); in __fixup_pi_state_owner()
2642 err = -EAGAIN; in __fixup_pi_state_owner()
2659 if (!pi_state->owner) in __fixup_pi_state_owner()
2695 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely in __fixup_pi_state_owner()
2696 * drop hb->lock since the caller owns the hb -> futex_q relation. in __fixup_pi_state_owner()
2697 * Dropping the pi_mutex->wait_lock requires the state revalidate. in __fixup_pi_state_owner()
2700 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in __fixup_pi_state_owner()
2701 spin_unlock(q->lock_ptr); in __fixup_pi_state_owner()
2704 case -EFAULT: in __fixup_pi_state_owner()
2708 case -EAGAIN: in __fixup_pi_state_owner()
2718 spin_lock(q->lock_ptr); in __fixup_pi_state_owner()
2719 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in __fixup_pi_state_owner()
2724 if (pi_state->owner != oldowner) in __fixup_pi_state_owner()
2727 /* Retry if err was -EAGAIN or the fault in succeeded */ in __fixup_pi_state_owner()
2744 * The rtmutex has an owner - either current or some other in __fixup_pi_state_owner()
2747 pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); in __fixup_pi_state_owner()
2755 struct futex_pi_state *pi_state = q->pi_state; in fixup_pi_state_owner()
2758 lockdep_assert_held(q->lock_ptr); in fixup_pi_state_owner()
2760 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2762 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2769 * fixup_owner() - Post lock pi_state and corner case management
2779 * - 1 - success, lock taken;
2780 * - 0 - success, lock not taken;
2781 * - <0 - on error (-EFAULT)
2788 * did a lock-steal - fix up the PI-state in that case: in fixup_owner()
2790 * Speculative pi_state->owner read (we don't hold wait_lock); in fixup_owner()
2791 * since we own the lock pi_state->owner == current is the in fixup_owner()
2794 if (q->pi_state->owner != current) in fixup_owner()
2804 * Another speculative read; pi_state->owner == current is unstable in fixup_owner()
2807 if (q->pi_state->owner == current) in fixup_owner()
2814 if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) in fixup_owner()
2821 * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
2846 if (likely(!plist_node_empty(&q->list))) { in futex_wait_queue_me()
2852 if (!timeout || timeout->task) in futex_wait_queue_me()
2859 * futex_wait_setup() - Prepare to wait on a futex
2871 * - 0 - uaddr contains val and hb has been locked;
2872 * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
2881 * Access the page AFTER the hash-bucket is locked. in futex_wait_setup()
2889 * any cond. If we locked the hash-bucket after testing *uaddr, that in futex_wait_setup()
2893 * On the other hand, we insert q and release the hash-bucket only in futex_wait_setup()
2899 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); in futex_wait_setup()
2923 ret = -EWOULDBLOCK; in futex_wait_setup()
2939 return -EINVAL; in futex_wait()
2943 current->timer_slack_ns); in futex_wait()
2946 * Prepare to wait on uaddr. On success, it holds hb->lock and q in futex_wait()
2960 ret = -ETIMEDOUT; in futex_wait()
2961 if (to && !to->task) in futex_wait()
2971 ret = -ERESTARTSYS; in futex_wait()
2975 restart = &current->restart_block; in futex_wait()
2976 restart->futex.uaddr = uaddr; in futex_wait()
2977 restart->futex.val = val; in futex_wait()
2978 restart->futex.time = *abs_time; in futex_wait()
2979 restart->futex.bitset = bitset; in futex_wait()
2980 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; in futex_wait()
2986 hrtimer_cancel(&to->timer); in futex_wait()
2987 destroy_hrtimer_on_stack(&to->timer); in futex_wait()
2995 u32 __user *uaddr = restart->futex.uaddr; in futex_wait_restart()
2998 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { in futex_wait_restart()
2999 t = restart->futex.time; in futex_wait_restart()
3002 restart->fn = do_no_restart_syscall; in futex_wait_restart()
3004 return (long)futex_wait(uaddr, restart->futex.flags, in futex_wait_restart()
3005 restart->futex.val, tp, restart->futex.bitset); in futex_wait_restart()
3010 * Userspace tried a 0 -> TID atomic transition of the futex value
3013 * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
3029 return -ENOSYS; in futex_lock_pi()
3032 return -ENOMEM; in futex_lock_pi()
3056 case -EFAULT: in futex_lock_pi()
3058 case -EBUSY: in futex_lock_pi()
3059 case -EAGAIN: in futex_lock_pi()
3062 * - EBUSY: Task is exiting and we just wait for the in futex_lock_pi()
3064 * - EAGAIN: The user space value changed. in futex_lock_pi()
3088 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); in futex_lock_pi()
3090 ret = ret ? 0 : -EWOULDBLOCK; in futex_lock_pi()
3097 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not in futex_lock_pi()
3099 * include hb->lock in the blocking chain, even through we'll not in in futex_lock_pi()
3100 * fact hold it while blocking. This will lead it to report -EDEADLK in futex_lock_pi()
3103 * Therefore acquire wait_lock while holding hb->lock, but drop the in futex_lock_pi()
3105 * interleaves with futex_unlock_pi() -- which does a similar lock in futex_lock_pi()
3106 * handoff -- such that the latter can observe the futex_q::pi_state in futex_lock_pi()
3109 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); in futex_lock_pi()
3116 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); in futex_lock_pi()
3117 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); in futex_lock_pi()
3128 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); in futex_lock_pi()
3134 * first acquire the hb->lock before removing the lock from the in futex_lock_pi()
3141 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) in futex_lock_pi()
3152 * the lock, clear our -ETIMEDOUT or -EINTR. in futex_lock_pi()
3166 hrtimer_cancel(&to->timer); in futex_lock_pi()
3167 destroy_hrtimer_on_stack(&to->timer); in futex_lock_pi()
3169 return ret != -EINTR ? ret : -ERESTARTNOINTR; in futex_lock_pi()
3185 * Userspace attempted a TID -> 0 atomic transition, and failed.
3186 * This is the in-kernel slowpath: we look up the PI state (if any),
3187 * and do the rt-mutex unlock.
3198 return -ENOSYS; in futex_unlock_pi()
3202 return -EFAULT; in futex_unlock_pi()
3207 return -EPERM; in futex_unlock_pi()
3214 spin_lock(&hb->lock); in futex_unlock_pi()
3223 struct futex_pi_state *pi_state = top_waiter->pi_state; in futex_unlock_pi()
3225 ret = -EINVAL; in futex_unlock_pi()
3233 if (pi_state->owner != current) in futex_unlock_pi()
3238 * By taking wait_lock while still holding hb->lock, we ensure in futex_unlock_pi()
3247 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in futex_unlock_pi()
3248 spin_unlock(&hb->lock); in futex_unlock_pi()
3250 /* drops pi_state->pi_mutex.wait_lock */ in futex_unlock_pi()
3262 * pagefault, so retry the user-access and the wakeup: in futex_unlock_pi()
3264 if (ret == -EFAULT) in futex_unlock_pi()
3270 if (ret == -EAGAIN) in futex_unlock_pi()
3282 * on hb->lock. So we can safely ignore them. We do neither in futex_unlock_pi()
3287 spin_unlock(&hb->lock); in futex_unlock_pi()
3289 case -EFAULT: in futex_unlock_pi()
3292 case -EAGAIN: in futex_unlock_pi()
3304 ret = (curval == uval) ? 0 : -EAGAIN; in futex_unlock_pi()
3307 spin_unlock(&hb->lock); in futex_unlock_pi()
3324 * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
3332 * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
3348 WARN_ON_ONCE(&hb->lock != q->lock_ptr); in handle_early_requeue_pi_wakeup()
3354 plist_del(&q->list, &hb->chain); in handle_early_requeue_pi_wakeup()
3358 ret = -EWOULDBLOCK; in handle_early_requeue_pi_wakeup()
3359 if (timeout && !timeout->task) in handle_early_requeue_pi_wakeup()
3360 ret = -ETIMEDOUT; in handle_early_requeue_pi_wakeup()
3362 ret = -ERESTARTNOINTR; in handle_early_requeue_pi_wakeup()
3367 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
3368 * @uaddr: the futex we initially wait on (non-pi)
3374 * @uaddr2: the pi futex we will take prior to returning to user-space
3384 * via the following--
3390 * If 3, cleanup and return -ERESTARTNOINTR.
3398 * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
3400 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
3403 * - 0 - On success;
3404 * - <0 - On error
3419 return -ENOSYS; in futex_wait_requeue_pi()
3422 return -EINVAL; in futex_wait_requeue_pi()
3425 return -EINVAL; in futex_wait_requeue_pi()
3428 current->timer_slack_ns); in futex_wait_requeue_pi()
3445 * Prepare to wait on uaddr. On success, it holds hb->lock and q in futex_wait_requeue_pi()
3458 ret = -EINVAL; in futex_wait_requeue_pi()
3468 spin_lock(&hb->lock); in futex_wait_requeue_pi()
3470 spin_unlock(&hb->lock); in futex_wait_requeue_pi()
3475 if (q.pi_state && (q.pi_state->owner != current)) { in futex_wait_requeue_pi()
3485 * Adjust the return value. It's either -EFAULT or in futex_wait_requeue_pi()
3494 pi_mutex = &q.pi_state->pi_mutex; in futex_wait_requeue_pi()
3510 * acquired the lock, clear -ETIMEDOUT or -EINTR. in futex_wait_requeue_pi()
3518 if (ret == -EINTR) { in futex_wait_requeue_pi()
3524 * -EWOULDBLOCK. Save the overhead of the restart in futex_wait_requeue_pi()
3525 * and return -EWOULDBLOCK directly. in futex_wait_requeue_pi()
3527 ret = -EWOULDBLOCK; in futex_wait_requeue_pi()
3536 hrtimer_cancel(&to->timer); in futex_wait_requeue_pi()
3537 destroy_hrtimer_on_stack(&to->timer); in futex_wait_requeue_pi()
3546 * Implementation: user-space maintains a per-thread list of locks it
3551 * per-thread. Userspace also maintains a per-thread 'list_op_pending'
3558 * sys_set_robust_list() - Set the robust-futex list head of a task
3559 * @head: pointer to the list-head
3560 * @len: length of the list-head, as userspace expects
3566 return -ENOSYS; in SYSCALL_DEFINE2()
3571 return -EINVAL; in SYSCALL_DEFINE2()
3573 current->robust_list = head; in SYSCALL_DEFINE2()
3579 * sys_get_robust_list() - Get the robust-futex list head of a task
3581 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
3593 return -ENOSYS; in SYSCALL_DEFINE3()
3597 ret = -ESRCH; in SYSCALL_DEFINE3()
3606 ret = -EPERM; in SYSCALL_DEFINE3()
3610 head = p->robust_list; in SYSCALL_DEFINE3()
3614 return -EFAULT; in SYSCALL_DEFINE3()
3628 * Process a futex-list entry, check whether it's owned by the
3639 return -1; in handle_futex_death()
3643 return -1; in handle_futex_death()
3662 * 1) task->robust_list->list_op_pending != NULL in handle_futex_death()
3689 * futex_wake() even if OWNER_DIED is already set - in handle_futex_death()
3691 * thread-death.) The rest of the cleanup is done in in handle_futex_death()
3707 case -EFAULT: in handle_futex_death()
3709 return -1; in handle_futex_death()
3712 case -EAGAIN: in handle_futex_death()
3726 * Wake robust non-PI futexes here. The wakeup of in handle_futex_death()
3736 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
3745 return -EFAULT; in fetch_robust_entry()
3754 * Walk curr->robust_list (very carefully, it's a userspace list!)
3757 * We silently return on any sign of list-walking problem.
3761 struct robust_list_head __user *head = curr->robust_list; in exit_robust_list()
3775 if (fetch_robust_entry(&entry, &head->list.next, &pi)) in exit_robust_list()
3780 if (get_user(futex_offset, &head->futex_offset)) in exit_robust_list()
3783 * Fetch any possibly pending lock-add first, and handle it in exit_robust_list()
3786 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) in exit_robust_list()
3790 while (entry != &head->list) { in exit_robust_list()
3795 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); in exit_robust_list()
3812 if (!--limit) in exit_robust_list()
3826 if (unlikely(tsk->robust_list)) { in futex_cleanup()
3828 tsk->robust_list = NULL; in futex_cleanup()
3832 if (unlikely(tsk->compat_robust_list)) { in futex_cleanup()
3834 tsk->compat_robust_list = NULL; in futex_cleanup()
3838 if (unlikely(!list_empty(&tsk->pi_state_list))) in futex_cleanup()
3843 * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
3862 if (tsk->futex_state == FUTEX_STATE_EXITING) in futex_exit_recursive()
3863 mutex_unlock(&tsk->futex_exit_mutex); in futex_exit_recursive()
3864 tsk->futex_state = FUTEX_STATE_DEAD; in futex_exit_recursive()
3872 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in in futex_cleanup_begin()
3875 mutex_lock(&tsk->futex_exit_mutex); in futex_cleanup_begin()
3878 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. in futex_cleanup_begin()
3880 * This ensures that all subsequent checks of tsk->futex_state in in futex_cleanup_begin()
3882 * tsk->pi_lock held. in futex_cleanup_begin()
3885 * the state change under tsk->pi_lock by a concurrent waiter must in futex_cleanup_begin()
3888 raw_spin_lock_irq(&tsk->pi_lock); in futex_cleanup_begin()
3889 tsk->futex_state = FUTEX_STATE_EXITING; in futex_cleanup_begin()
3890 raw_spin_unlock_irq(&tsk->pi_lock); in futex_cleanup_begin()
3899 tsk->futex_state = state; in futex_cleanup_end()
3904 mutex_unlock(&tsk->futex_exit_mutex); in futex_cleanup_end()
3945 return -ENOSYS; in do_futex()
3956 return -ENOSYS; in do_futex()
3992 return -ENOSYS; in do_futex()
4012 return -EINVAL; in futex_init_timeout()
4032 return -EFAULT; in SYSCALL_DEFINE6()
4034 return -EFAULT; in SYSCALL_DEFINE6()
4046 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
4053 return -EFAULT; in compat_fetch_robust_entry()
4071 * Walk curr->robust_list (very carefully, it's a userspace list!)
4074 * We silently return on any sign of list-walking problem.
4078 struct compat_robust_list_head __user *head = curr->compat_robust_list; in compat_exit_robust_list()
4093 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) in compat_exit_robust_list()
4098 if (get_user(futex_offset, &head->futex_offset)) in compat_exit_robust_list()
4101 * Fetch any possibly pending lock-add first, and handle it in compat_exit_robust_list()
4105 &head->list_op_pending, &pip)) in compat_exit_robust_list()
4109 while (entry != (struct robust_list __user *) &head->list) { in compat_exit_robust_list()
4115 (compat_uptr_t __user *)&entry->next, &next_pi); in compat_exit_robust_list()
4135 if (!--limit) in compat_exit_robust_list()
4152 return -ENOSYS; in COMPAT_SYSCALL_DEFINE2()
4155 return -EINVAL; in COMPAT_SYSCALL_DEFINE2()
4157 current->compat_robust_list = head; in COMPAT_SYSCALL_DEFINE2()
4171 return -ENOSYS; in COMPAT_SYSCALL_DEFINE3()
4175 ret = -ESRCH; in COMPAT_SYSCALL_DEFINE3()
4184 ret = -EPERM; in COMPAT_SYSCALL_DEFINE3()
4188 head = p->compat_robust_list; in COMPAT_SYSCALL_DEFINE3()
4192 return -EFAULT; in COMPAT_SYSCALL_DEFINE3()
4213 return -EFAULT; in SYSCALL_DEFINE6()
4235 * guaranteed to fault and we get -EFAULT on functional in futex_detect_cmpxchg()
4236 * implementation, the non-functional ones will return in futex_detect_cmpxchg()
4237 * -ENOSYS. in futex_detect_cmpxchg()
4239 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) in futex_detect_cmpxchg()