Lines Matching +full:assigned +full:- +full:resolution +full:- +full:bits
1 /* SPDX-License-Identifier: GPL-2.0 */
124 * Helpers for converting nanosecond timing to jiffy resolution
129 * Increase resolution of nice-level calculations for 64-bit architectures.
130 * The extra resolution improves shares distribution and load balancing of
131 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
132 * hierarchies, especially on larger systems. This is not a user-visible change
133 * and does not change the user-interface for setting shares/weights.
135 * We increase resolution only if we have enough bits to allow this increased
136 * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
140 * increase coverage and consistency always enable it on 64-bit platforms.
160 * independent resolution, but they should be well calibrated. We use
164 * scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
171 * 10 -> just above 1us
172 * 9 -> just above 0.5us
207 return idle_policy(p->policy); in task_has_idle_policy()
212 return rt_policy(p->policy); in task_has_rt_policy()
217 return dl_policy(p->policy); in task_has_dl_policy()
224 s64 diff = sample - *avg; in update_avg()
230 * is UB; cap at size-1.
233 (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
254 return unlikely(dl_se->flags & SCHED_FLAG_SUGOV); in dl_entity_is_special()
267 dl_time_before(a->deadline, b->deadline); in dl_entity_preempt()
271 * This is the priority-queue data structure of the RT scheduling class:
301 * To keep the bandwidth of -deadline tasks under control
303 * - store the maximum -deadline bandwidth of each cpu;
304 * - cache the fraction of bandwidth that is currently allocated in
308 * one used for RT-throttling (rt_bandwidth), with the main difference
315 * - bw (< 100%) is the deadline bandwidth of each CPU;
316 * - total_bw is the currently allocated bandwidth in each root domain;
413 /* The two decimal precision [%] value requested from user-space */
431 * (The default weight is 1024 - so there's no practical
514 * applicable for 32-bits architectures.
549 /* CFS-related fields in a runqueue */
607 * Where f(tg) is the recursive weight fraction assigned to
621 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
662 /* Real-Time classes' related field in a runqueue: */
700 return rt_rq->rt_queued && rt_rq->rt_nr_running; in rt_rq_is_runnable()
727 * an rb-tree, ordered by tasks' deadlines, with caching
742 * Utilization of the tasks "assigned" to this runqueue (including
748 * runqueue (inactive utilization = this_bw - running_bw).
762 #define entity_is_task(se) (!se->my_q)
767 se->runnable_weight = se->my_q->h_nr_running; in se_update_runnable()
773 return !!se->on_rq; in se_runnable()
775 return se->runnable_weight; in se_runnable()
785 return !!se->on_rq; in se_runnable()
791 * XXX we want to get rid of these helpers and use the full load resolution.
795 return scale_load_down(se->load.weight); in se_weight()
812 #define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
815 * We add the notion of a root-domain which will be used to define per-domain
818 * exclusive cpuset is created, we also create and attach a new root-domain
831 * - More than one runnable task
832 * - Running task is misfit
836 /* Indicate one or more cpus over-utilized (tipping point) */
841 * than one runnable -deadline task (as it is below for RT tasks).
880 * NULL-terminated list of performance domains intersecting with the
899 * struct uclamp_bucket - Utilization clamp bucket
908 unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
912 * struct uclamp_rq - rq's utilization clamp
924 * - for util_min: we want to run the CPU at least at the max of the minimum
926 * - for util_max: we want to allow the CPU to run up to the max of the
931 * the metrics required to compute all the per-rq utilization clamp values.
948 * This is the main, per-CPU runqueue data structure.
1111 /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
1144 /* shared state -- careful with sched_core_cpu_deactivate() */
1160 return cfs_rq->rq; in rq_of()
1174 return rq->cpu; in cpu_of()
1185 return p->migration_disabled; in is_migration_disabled()
1196 #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
1207 return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled; in sched_core_enabled()
1217 * stable unless you actually hold a relevant rq->__lock.
1222 return &rq->core->__lock; in rq_lockp()
1224 return &rq->__lock; in rq_lockp()
1229 if (rq->core_enabled) in __rq_lockp()
1230 return &rq->core->__lock; in __rq_lockp()
1232 return &rq->__lock; in __rq_lockp()
1249 return rq->core->core_cookie == p->core_cookie; in sched_cpu_cookie_match()
1272 return idle_core || rq->core->core_cookie == p->core_cookie; in sched_core_cookie_match()
1285 for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) { in sched_group_cookie_match()
1294 return !RB_EMPTY_NODE(&p->core_node); in sched_core_enqueued()
1317 return &rq->__lock; in rq_lockp()
1322 return &rq->__lock; in __rq_lockp()
1410 return p->se.cfs_rq; in task_cfs_rq()
1416 return se->cfs_rq; in cfs_rq_of()
1422 return grp->my_q; in group_cfs_rq()
1434 return &task_rq(p)->cfs; in task_cfs_rq()
1442 return &rq->cfs; in cfs_rq_of()
1455 * rq::clock_update_flags bits
1457 * %RQCF_REQ_SKIP - will request skipping of clock update on the next
1461 * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
1464 * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
1471 * if (rq-clock_update_flags >= RQCF_UPDATED)
1487 SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); in assert_clock_updated()
1495 return rq->clock; in rq_clock()
1503 return rq->clock_task; in rq_clock_task()
1527 rq->clock_update_flags |= RQCF_REQ_SKIP; in rq_clock_skip_update()
1537 rq->clock_update_flags &= ~RQCF_REQ_SKIP; in rq_clock_cancel_skipupdate()
1561 * copy of the (on-stack) 'struct rq_flags rf'.
1563 * Also see Documentation/locking/lockdep-design.rst.
1567 rf->cookie = lockdep_pin_lock(__rq_lockp(rq)); in rq_pin_lock()
1570 rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); in rq_pin_lock()
1571 rf->clock_update_flags = 0; in rq_pin_lock()
1573 SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback); in rq_pin_lock()
1581 if (rq->clock_update_flags > RQCF_ACT_SKIP) in rq_unpin_lock()
1582 rf->clock_update_flags = RQCF_UPDATED; in rq_unpin_lock()
1585 lockdep_unpin_lock(__rq_lockp(rq), rf->cookie); in rq_unpin_lock()
1590 lockdep_repin_lock(__rq_lockp(rq), rf->cookie); in rq_repin_lock()
1596 rq->clock_update_flags |= rf->clock_update_flags; in rq_repin_lock()
1601 __acquires(rq->lock);
1604 __acquires(p->pi_lock)
1605 __acquires(rq->lock);
1608 __releases(rq->lock) in __task_rq_unlock()
1616 __releases(rq->lock) in task_rq_unlock()
1617 __releases(p->pi_lock) in task_rq_unlock()
1621 raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); in task_rq_unlock()
1626 __acquires(rq->lock) in rq_lock_irqsave()
1628 raw_spin_rq_lock_irqsave(rq, rf->flags); in rq_lock_irqsave()
1634 __acquires(rq->lock) in rq_lock_irq()
1642 __acquires(rq->lock) in rq_lock()
1650 __releases(rq->lock) in rq_unlock_irqrestore()
1653 raw_spin_rq_unlock_irqrestore(rq, rf->flags); in rq_unlock_irqrestore()
1658 __releases(rq->lock) in rq_unlock_irq()
1666 __releases(rq->lock) in rq_unlock()
1674 __acquires(rq->lock) in this_rq_lock_irq()
1743 if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) in queue_balance_callback()
1746 head->func = func; in queue_balance_callback()
1747 head->next = rq->balance_callback; in queue_balance_callback()
1748 rq->balance_callback = head; in queue_balance_callback()
1756 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
1760 * preempt-disabled sections.
1763 for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
1764 __sd; __sd = __sd->parent)
1767 * highest_flag_domain - Return highest sched_domain containing flag.
1780 if (!(sd->flags & flag)) in highest_flag_domain()
1793 if (sd->flags & flag) in lowest_flag_domain()
1821 unsigned long min_capacity; /* Min per-CPU capacity in group */
1822 unsigned long max_capacity; /* Max per-CPU capacity in group */
1854 return to_cpumask(sg->cpumask); in sched_group_span()
1862 return to_cpumask(sg->sgc->cpumask); in group_balance_mask()
1919 * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
1927 return p->sched_task_group; in task_group()
1938 set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]); in set_task_rq()
1939 p->se.cfs_rq = tg->cfs_rq[cpu]; in set_task_rq()
1940 p->se.parent = tg->se[cpu]; in set_task_rq()
1941 p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0; in set_task_rq()
1945 p->rt.rt_rq = tg->rt_rq[cpu]; in set_task_rq()
1946 p->rt.parent = tg->rt_se[cpu]; in set_task_rq()
1965 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be in __set_task_cpu()
1967 * per-task data have been completed by this moment. in __set_task_cpu()
1970 WRITE_ONCE(task_thread_info(p)->cpu, cpu); in __set_task_cpu()
1971 p->wake_cpu = cpu; in __set_task_cpu()
1997 * To support run-time toggling of sched features, all the translation units
2057 return rq->curr == p; in task_current()
2063 return p->on_cpu; in task_on_cpu()
2071 return p->on_rq == TASK_ON_RQ_QUEUED; in task_on_rq_queued()
2076 return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; in task_on_rq_migrating()
2111 * DEQUEUE_SLEEP - task is no longer runnable
2112 * ENQUEUE_WAKEUP - task just became runnable
2114 * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
2118 * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
2121 * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
2122 * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
2123 * ENQUEUE_MIGRATED - the task was migrated during wakeup
2145 #define RETRY_TASK ((void *)-1UL)
2190 * The switched_from() call is allowed to drop rq->lock, therefore we
2192 * rq->lock. They are however serialized by p->pi_lock.
2211 WARN_ON_ONCE(rq->curr != prev); in put_prev_task()
2212 prev->sched_class->put_prev_task(rq, prev); in put_prev_task()
2217 next->sched_class->set_next_task(rq, next, false); in set_next_task()
2225 * include/asm-generic/vmlinux.lds.h
2236 /* Defined in include/asm-generic/vmlinux.lds.h */
2256 return rq->stop && task_on_rq_queued(rq->stop); in sched_stop_runnable()
2261 return rq->dl.dl_nr_running > 0; in sched_dl_runnable()
2266 return rq->rt.rt_queued > 0; in sched_rt_runnable()
2271 return rq->cfs.nr_running > 0; in sched_fair_runnable()
2292 struct task_struct *p = rq->curr; in get_push_task()
2296 if (rq->push_busy) in get_push_task()
2299 if (p->nr_cpus_allowed == 1) in get_push_task()
2302 if (p->migration_disabled) in get_push_task()
2305 rq->push_busy = true; in get_push_task()
2317 rq->idle_state = idle_state; in idle_set_state()
2324 return rq->idle_state; in idle_get_state()
2364 #define MAX_BW_BITS (64 - BW_SHIFT)
2365 #define MAX_BW ((1ULL << MAX_BW_BITS) - 1)
2399 unsigned prev_nr = rq->nr_running; in add_nr_running()
2401 rq->nr_running = prev_nr + count; in add_nr_running()
2407 if (prev_nr < 2 && rq->nr_running >= 2) { in add_nr_running()
2408 if (!READ_ONCE(rq->rd->overload)) in add_nr_running()
2409 WRITE_ONCE(rq->rd->overload, 1); in add_nr_running()
2418 rq->nr_running -= count; in sub_nr_running()
2420 call_trace_sched_update_nr_running(rq, -count); in sub_nr_running()
2462 * - enabled by features
2463 * - hrtimer is actually high res
2469 return hrtimer_is_hres_active(&rq->hrtick_timer); in hrtick_enabled()
2516 * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
2522 * ------ * SCHED_CAPACITY_SCALE
2537 * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
2541 rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); in double_rq_clock_clear_update()
2544 rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); in double_rq_clock_clear_update()
2557 * In order to not have {0,2},{1,3} turn into into an AB-BA, in rq_order_less()
2558 * order by core-id first and cpu-id second. in rq_order_less()
2562 * double_rq_lock(0,3); will take core-0, core-1 lock in rq_order_less()
2563 * double_rq_lock(1,2); will take core-1, core-0 lock in rq_order_less()
2565 * when only cpu-id is considered. in rq_order_less()
2567 if (rq1->core->cpu < rq2->core->cpu) in rq_order_less()
2569 if (rq1->core->cpu > rq2->core->cpu) in rq_order_less()
2573 * __sched_core_flip() relies on SMT having cpu-id lock order. in rq_order_less()
2576 return rq1->cpu < rq2->cpu; in rq_order_less()
2584 * fair double_lock_balance: Safely acquires both rq->locks in a fair
2592 __releases(this_rq->lock) in _double_lock_balance()
2593 __acquires(busiest->lock) in _double_lock_balance()
2594 __acquires(this_rq->lock) in _double_lock_balance()
2606 * already in proper order on entry. This favors lower CPU-ids and will
2611 __releases(this_rq->lock) in _double_lock_balance()
2612 __acquires(busiest->lock) in _double_lock_balance()
2613 __acquires(this_rq->lock) in _double_lock_balance()
2636 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
2646 __releases(busiest->lock) in double_unlock_balance()
2650 lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_); in double_unlock_balance()
2681 * double_rq_unlock - safely unlock two runqueues
2687 __releases(rq1->lock) in double_rq_unlock()
2688 __releases(rq2->lock) in double_rq_unlock()
2693 __release(rq2->lock); in double_rq_unlock()
2704 * double_rq_lock - safely lock two runqueues
2710 __acquires(rq1->lock) in double_rq_lock()
2711 __acquires(rq2->lock) in double_rq_lock()
2716 __acquire(rq2->lock); /* Fake it out ;) */ in double_rq_lock()
2721 * double_rq_unlock - safely unlock two runqueues
2727 __releases(rq1->lock) in double_rq_unlock()
2728 __releases(rq2->lock) in double_rq_unlock()
2732 __release(rq2->lock); in double_rq_unlock()
2786 #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
2821 seq = __u64_stats_fetch_begin(&irqtime->sync); in irq_time_read()
2822 total = irqtime->total; in irq_time_read()
2823 } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); in irq_time_read()
2833 * cpufreq_update_util - Take a note about CPU utilization changes.
2840 * It can only be called from RCU-sched read-side critical sections.
2851 * but that really is a band-aid. Going forward it should be replaced with
2861 data->func(data, rq_clock(rq), flags); in cpufreq_update_util()
2878 return cpu_rq(cpu)->cpu_capacity_orig; in capacity_orig_of()
2882 * enum cpu_util_type - CPU utilization type
2906 * (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
2912 return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT); in dl_task_fits_capacity()
2917 return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; in cpu_bw_dl()
2922 return READ_ONCE(rq->avg_dl.util_avg); in cpu_util_dl()
2926 * cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks.
2933 * recent utilization of currently non-runnable tasks on that CPU.
2941 * previously-executed tasks, which helps better deduce how busy a CPU will
2942 * be when a long-sleeping task wakes up. The contribution to CPU utilization
2950 * could be seen as over-utilized even though CPU1 has 20% of spare CPU
2953 * after task migrations (scheduler-driven DVFS).
2962 cfs_rq = &cpu_rq(cpu)->cfs; in cpu_util_cfs()
2963 util = READ_ONCE(cfs_rq->avg.util_avg); in cpu_util_cfs()
2967 READ_ONCE(cfs_rq->avg.util_est.enqueued)); in cpu_util_cfs()
2975 return READ_ONCE(rq->avg_rt.util_avg); in cpu_util_rt()
2983 * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
3017 if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) in uclamp_rq_util_with()
3021 min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); in uclamp_rq_util_with()
3022 max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); in uclamp_rq_util_with()
3045 max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); in uclamp_rq_is_capped()
3055 * Returns true if userspace opted-in to use uclamp and aggregation at rq level
3081 return rq->avg_irq.util_avg; in cpu_util_irq()
3087 util *= (max - irq); in scale_irq_capacity()
3108 #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
3127 * - prior user-space memory accesses and store to rq->membarrier_state,
3128 * - store to rq->membarrier_state and following user-space memory accesses.
3129 * In the same way it provides those guarantees around store to rq->curr.
3140 membarrier_state = atomic_read(&next_mm->membarrier_state); in membarrier_switch_mm()
3141 if (READ_ONCE(rq->membarrier_state) == membarrier_state) in membarrier_switch_mm()
3144 WRITE_ONCE(rq->membarrier_state, membarrier_state); in membarrier_switch_mm()
3157 if (!(p->flags & PF_KTHREAD)) in is_per_cpu_kthread()
3160 if (p->nr_cpus_allowed != 1) in is_per_cpu_kthread()
3179 curr->se.sum_exec_runtime += delta_exec; in update_current_exec_runtime()
3182 curr->se.exec_start = now; in update_current_exec_runtime()