Lines Matching +full:event +full:-
1 // SPDX-License-Identifier: GPL-2.0
6 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
7 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
75 struct task_struct *p = tfc->p; in remote_function()
78 /* -EAGAIN */ in remote_function()
87 tfc->ret = -ESRCH; /* No such (running) process */ in remote_function()
92 tfc->ret = tfc->func(tfc->info); in remote_function()
96 * task_function_call - call a function on the cpu on which a task runs
106 * returns @func return value or -ESRCH or -ENXIO when the process isn't running
115 .ret = -EAGAIN, in task_function_call()
125 if (ret != -EAGAIN) in task_function_call()
135 * cpu_function_call - call a function on the cpu
142 * returns: @func return value or -ENXIO when the cpu is offline
150 .ret = -ENXIO, /* No such CPU */ in cpu_function_call()
161 raw_spin_lock(&cpuctx->ctx.lock); in perf_ctx_lock()
163 raw_spin_lock(&ctx->lock); in perf_ctx_lock()
170 raw_spin_unlock(&ctx->lock); in perf_ctx_unlock()
171 raw_spin_unlock(&cpuctx->ctx.lock); in perf_ctx_unlock()
174 #define TASK_TOMBSTONE ((void *)-1L)
176 static bool is_kernel_event(struct perf_event *event) in is_kernel_event() argument
178 return READ_ONCE(event->owner) == TASK_TOMBSTONE; in is_kernel_event()
186 return this_cpu_ptr(&perf_cpu_context)->task_ctx; in perf_cpu_task_ctx()
192 * When !ctx->nr_events a task context will not be scheduled. This means
198 * - removing the last event from a task ctx; this is relatively straight
201 * - adding the first event to a task ctx; this is tricky because we cannot
202 * rely on ctx->is_active and therefore cannot use event_function_call().
205 * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
212 struct perf_event *event; member
220 struct perf_event *event = efs->event; in event_function() local
221 struct perf_event_context *ctx = event->ctx; in event_function()
223 struct perf_event_context *task_ctx = cpuctx->task_ctx; in event_function()
230 * Since we do the IPI call without holding ctx->lock things can have in event_function()
233 if (ctx->task) { in event_function()
234 if (ctx->task != current) { in event_function()
235 ret = -ESRCH; in event_function()
243 * above ctx->task != current test), therefore we must have in event_function()
244 * ctx->is_active here. in event_function()
246 WARN_ON_ONCE(!ctx->is_active); in event_function()
248 * And since we have ctx->is_active, cpuctx->task_ctx must in event_function()
253 WARN_ON_ONCE(&cpuctx->ctx != ctx); in event_function()
256 efs->func(event, cpuctx, ctx, efs->data); in event_function()
263 static void event_function_call(struct perf_event *event, event_f func, void *data) in event_function_call() argument
265 struct perf_event_context *ctx = event->ctx; in event_function_call()
266 struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */ in event_function_call()
268 .event = event, in event_function_call()
273 if (!event->parent) { in event_function_call()
275 * If this is a !child event, we must hold ctx::mutex to in event_function_call()
276 * stabilize the event->ctx relation. See in event_function_call()
279 lockdep_assert_held(&ctx->mutex); in event_function_call()
283 cpu_function_call(event->cpu, event_function, &efs); in event_function_call()
294 raw_spin_lock_irq(&ctx->lock); in event_function_call()
299 task = ctx->task; in event_function_call()
301 raw_spin_unlock_irq(&ctx->lock); in event_function_call()
304 if (ctx->is_active) { in event_function_call()
305 raw_spin_unlock_irq(&ctx->lock); in event_function_call()
308 func(event, NULL, ctx, data); in event_function_call()
309 raw_spin_unlock_irq(&ctx->lock); in event_function_call()
316 static void event_function_local(struct perf_event *event, event_f func, void *data) in event_function_local() argument
318 struct perf_event_context *ctx = event->ctx; in event_function_local()
320 struct task_struct *task = READ_ONCE(ctx->task); in event_function_local()
334 task = ctx->task; in event_function_local()
344 if (ctx->is_active) { in event_function_local()
348 if (WARN_ON_ONCE(cpuctx->task_ctx != ctx)) in event_function_local()
352 WARN_ON_ONCE(&cpuctx->ctx != ctx); in event_function_local()
355 func(event, cpuctx, ctx, data); in event_function_local()
412 * perf event paranoia level:
413 * -1 - not paranoid at all
414 * 0 - disallow raw tracepoint access for unpriv
415 * 1 - disallow cpu events for unpriv
416 * 2 - disallow kernel profiling for unpriv
424 * max perf event sample rate
461 return -EINVAL; in perf_proc_update_handler()
531 running_len -= running_len/NR_ACCUMULATED_SAMPLES; in perf_sample_event_took()
574 static u64 perf_event_time(struct perf_event *event);
583 static inline u64 perf_event_clock(struct perf_event *event) in perf_event_clock() argument
585 return event->clock(); in perf_event_clock()
589 * State based event timekeeping...
591 * The basic idea is to use event->state to determine which (if any) time
596 * Event groups make things a little more complicated, but not terribly so. The
611 __perf_effective_state(struct perf_event *event) in __perf_effective_state() argument
613 struct perf_event *leader = event->group_leader; in __perf_effective_state()
615 if (leader->state <= PERF_EVENT_STATE_OFF) in __perf_effective_state()
616 return leader->state; in __perf_effective_state()
618 return event->state; in __perf_effective_state()
622 __perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running) in __perf_update_times() argument
624 enum perf_event_state state = __perf_effective_state(event); in __perf_update_times()
625 u64 delta = now - event->tstamp; in __perf_update_times()
627 *enabled = event->total_time_enabled; in __perf_update_times()
631 *running = event->total_time_running; in __perf_update_times()
636 static void perf_event_update_time(struct perf_event *event) in perf_event_update_time() argument
638 u64 now = perf_event_time(event); in perf_event_update_time()
640 __perf_update_times(event, now, &event->total_time_enabled, in perf_event_update_time()
641 &event->total_time_running); in perf_event_update_time()
642 event->tstamp = now; in perf_event_update_time()
654 perf_event_set_state(struct perf_event *event, enum perf_event_state state) in perf_event_set_state() argument
656 if (event->state == state) in perf_event_set_state()
659 perf_event_update_time(event); in perf_event_set_state()
664 if ((event->state < 0) ^ (state < 0)) in perf_event_set_state()
665 perf_event_update_sibling_time(event); in perf_event_set_state()
667 WRITE_ONCE(event->state, state); in perf_event_set_state()
671 * UP store-release, load-acquire
691 list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) in perf_ctx_disable()
692 perf_pmu_disable(pmu_ctx->pmu); in perf_ctx_disable()
699 list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) in perf_ctx_enable()
700 perf_pmu_enable(pmu_ctx->pmu); in perf_ctx_enable()
709 perf_cgroup_match(struct perf_event *event) in perf_cgroup_match() argument
713 /* @event doesn't care about cgroup */ in perf_cgroup_match()
714 if (!event->cgrp) in perf_cgroup_match()
718 if (!cpuctx->cgrp) in perf_cgroup_match()
722 * Cgroup scoping is recursive. An event enabled for a cgroup is in perf_cgroup_match()
724 * cgroup is a descendant of @event's (the test covers identity in perf_cgroup_match()
727 return cgroup_is_descendant(cpuctx->cgrp->css.cgroup, in perf_cgroup_match()
728 event->cgrp->css.cgroup); in perf_cgroup_match()
731 static inline void perf_detach_cgroup(struct perf_event *event) in perf_detach_cgroup() argument
733 css_put(&event->cgrp->css); in perf_detach_cgroup()
734 event->cgrp = NULL; in perf_detach_cgroup()
737 static inline int is_cgroup_event(struct perf_event *event) in is_cgroup_event() argument
739 return event->cgrp != NULL; in is_cgroup_event()
742 static inline u64 perf_cgroup_event_time(struct perf_event *event) in perf_cgroup_event_time() argument
746 t = per_cpu_ptr(event->cgrp->info, event->cpu); in perf_cgroup_event_time()
747 return t->time; in perf_cgroup_event_time()
750 static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) in perf_cgroup_event_time_now() argument
754 t = per_cpu_ptr(event->cgrp->info, event->cpu); in perf_cgroup_event_time_now()
755 if (!__load_acquire(&t->active)) in perf_cgroup_event_time_now()
756 return t->time; in perf_cgroup_event_time_now()
757 now += READ_ONCE(t->timeoffset); in perf_cgroup_event_time_now()
764 info->time += now - info->timestamp; in __update_cgrp_time()
765 info->timestamp = now; in __update_cgrp_time()
769 WRITE_ONCE(info->timeoffset, info->time - info->timestamp); in __update_cgrp_time()
774 struct perf_cgroup *cgrp = cpuctx->cgrp; in update_cgrp_time_from_cpuctx()
781 for (css = &cgrp->css; css; css = css->parent) { in update_cgrp_time_from_cpuctx()
783 info = this_cpu_ptr(cgrp->info); in update_cgrp_time_from_cpuctx()
787 __store_release(&info->active, 0); in update_cgrp_time_from_cpuctx()
792 static inline void update_cgrp_time_from_event(struct perf_event *event) in update_cgrp_time_from_event() argument
800 if (!is_cgroup_event(event)) in update_cgrp_time_from_event()
803 info = this_cpu_ptr(event->cgrp->info); in update_cgrp_time_from_event()
807 if (info->active) in update_cgrp_time_from_event()
814 struct perf_event_context *ctx = &cpuctx->ctx; in perf_cgroup_set_timestamp()
815 struct perf_cgroup *cgrp = cpuctx->cgrp; in perf_cgroup_set_timestamp()
820 * ctx->lock held by caller in perf_cgroup_set_timestamp()
827 WARN_ON_ONCE(!ctx->nr_cgroups); in perf_cgroup_set_timestamp()
829 for (css = &cgrp->css; css; css = css->parent) { in perf_cgroup_set_timestamp()
831 info = this_cpu_ptr(cgrp->info); in perf_cgroup_set_timestamp()
832 __update_cgrp_time(info, ctx->timestamp, false); in perf_cgroup_set_timestamp()
833 __store_release(&info->active, 1); in perf_cgroup_set_timestamp()
846 * cpuctx->cgrp is set when the first cgroup event enabled, in perf_cgroup_switch()
847 * and is cleared when the last cgroup event disabled. in perf_cgroup_switch()
849 if (READ_ONCE(cpuctx->cgrp) == NULL) in perf_cgroup_switch()
852 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); in perf_cgroup_switch()
855 if (READ_ONCE(cpuctx->cgrp) == cgrp) in perf_cgroup_switch()
858 perf_ctx_lock(cpuctx, cpuctx->task_ctx); in perf_cgroup_switch()
859 perf_ctx_disable(&cpuctx->ctx); in perf_cgroup_switch()
861 ctx_sched_out(&cpuctx->ctx, EVENT_ALL); in perf_cgroup_switch()
867 cpuctx->cgrp = cgrp; in perf_cgroup_switch()
873 ctx_sched_in(&cpuctx->ctx, EVENT_ALL); in perf_cgroup_switch()
875 perf_ctx_enable(&cpuctx->ctx); in perf_cgroup_switch()
876 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); in perf_cgroup_switch()
879 static int perf_cgroup_ensure_storage(struct perf_event *event, in perf_cgroup_ensure_storage() argument
890 for (heap_size = 1; css; css = css->parent) in perf_cgroup_ensure_storage()
895 if (heap_size <= cpuctx->heap_size) in perf_cgroup_ensure_storage()
901 ret = -ENOMEM; in perf_cgroup_ensure_storage()
905 raw_spin_lock_irq(&cpuctx->ctx.lock); in perf_cgroup_ensure_storage()
906 if (cpuctx->heap_size < heap_size) { in perf_cgroup_ensure_storage()
907 swap(cpuctx->heap, storage); in perf_cgroup_ensure_storage()
908 if (storage == cpuctx->heap_default) in perf_cgroup_ensure_storage()
910 cpuctx->heap_size = heap_size; in perf_cgroup_ensure_storage()
912 raw_spin_unlock_irq(&cpuctx->ctx.lock); in perf_cgroup_ensure_storage()
920 static inline int perf_cgroup_connect(int fd, struct perf_event *event, in perf_cgroup_connect() argument
930 return -EBADF; in perf_cgroup_connect()
932 css = css_tryget_online_from_dir(f.file->f_path.dentry, in perf_cgroup_connect()
939 ret = perf_cgroup_ensure_storage(event, css); in perf_cgroup_connect()
944 event->cgrp = cgrp; in perf_cgroup_connect()
951 if (group_leader && group_leader->cgrp != cgrp) { in perf_cgroup_connect()
952 perf_detach_cgroup(event); in perf_cgroup_connect()
953 ret = -EINVAL; in perf_cgroup_connect()
961 perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) in perf_cgroup_event_enable() argument
965 if (!is_cgroup_event(event)) in perf_cgroup_event_enable()
969 * Because cgroup events are always per-cpu events, in perf_cgroup_event_enable()
970 * @ctx == &cpuctx->ctx. in perf_cgroup_event_enable()
974 if (ctx->nr_cgroups++) in perf_cgroup_event_enable()
977 cpuctx->cgrp = perf_cgroup_from_task(current, ctx); in perf_cgroup_event_enable()
981 perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx) in perf_cgroup_event_disable() argument
985 if (!is_cgroup_event(event)) in perf_cgroup_event_disable()
989 * Because cgroup events are always per-cpu events, in perf_cgroup_event_disable()
990 * @ctx == &cpuctx->ctx. in perf_cgroup_event_disable()
994 if (--ctx->nr_cgroups) in perf_cgroup_event_disable()
997 cpuctx->cgrp = NULL; in perf_cgroup_event_disable()
1003 perf_cgroup_match(struct perf_event *event) in perf_cgroup_match() argument
1008 static inline void perf_detach_cgroup(struct perf_event *event) in perf_detach_cgroup() argument
1011 static inline int is_cgroup_event(struct perf_event *event) in is_cgroup_event() argument
1016 static inline void update_cgrp_time_from_event(struct perf_event *event) in update_cgrp_time_from_event() argument
1025 static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, in perf_cgroup_connect() argument
1029 return -EINVAL; in perf_cgroup_connect()
1037 static inline u64 perf_cgroup_event_time(struct perf_event *event) in perf_cgroup_event_time() argument
1042 static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) in perf_cgroup_event_time_now() argument
1048 perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) in perf_cgroup_event_enable() argument
1053 perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx) in perf_cgroup_event_disable() argument
1080 raw_spin_lock(&cpc->hrtimer_lock); in perf_mux_hrtimer_handler()
1082 hrtimer_forward_now(hr, cpc->hrtimer_interval); in perf_mux_hrtimer_handler()
1084 cpc->hrtimer_active = 0; in perf_mux_hrtimer_handler()
1085 raw_spin_unlock(&cpc->hrtimer_lock); in perf_mux_hrtimer_handler()
1092 struct hrtimer *timer = &cpc->hrtimer; in __perf_mux_hrtimer_init()
1093 struct pmu *pmu = cpc->epc.pmu; in __perf_mux_hrtimer_init()
1100 interval = pmu->hrtimer_interval_ms; in __perf_mux_hrtimer_init()
1102 interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; in __perf_mux_hrtimer_init()
1104 cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); in __perf_mux_hrtimer_init()
1106 raw_spin_lock_init(&cpc->hrtimer_lock); in __perf_mux_hrtimer_init()
1108 timer->function = perf_mux_hrtimer_handler; in __perf_mux_hrtimer_init()
1113 struct hrtimer *timer = &cpc->hrtimer; in perf_mux_hrtimer_restart()
1116 raw_spin_lock_irqsave(&cpc->hrtimer_lock, flags); in perf_mux_hrtimer_restart()
1117 if (!cpc->hrtimer_active) { in perf_mux_hrtimer_restart()
1118 cpc->hrtimer_active = 1; in perf_mux_hrtimer_restart()
1119 hrtimer_forward_now(timer, cpc->hrtimer_interval); in perf_mux_hrtimer_restart()
1122 raw_spin_unlock_irqrestore(&cpc->hrtimer_lock, flags); in perf_mux_hrtimer_restart()
1134 int *count = this_cpu_ptr(pmu->pmu_disable_count); in perf_pmu_disable()
1136 pmu->pmu_disable(pmu); in perf_pmu_disable()
1141 int *count = this_cpu_ptr(pmu->pmu_disable_count); in perf_pmu_enable()
1142 if (!--(*count)) in perf_pmu_enable()
1143 pmu->pmu_enable(pmu); in perf_pmu_enable()
1148 WARN_ON_ONCE(*this_cpu_ptr(pmu->pmu_disable_count) == 0); in perf_assert_pmu_disabled()
1153 refcount_inc(&ctx->refcount); in get_ctx()
1158 if (pmu->task_ctx_cache) in alloc_task_ctx_data()
1159 return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL); in alloc_task_ctx_data()
1166 if (pmu->task_ctx_cache && task_ctx_data) in free_task_ctx_data()
1167 kmem_cache_free(pmu->task_ctx_cache, task_ctx_data); in free_task_ctx_data()
1180 if (refcount_dec_and_test(&ctx->refcount)) { in put_ctx()
1181 if (ctx->parent_ctx) in put_ctx()
1182 put_ctx(ctx->parent_ctx); in put_ctx()
1183 if (ctx->task && ctx->task != TASK_TOMBSTONE) in put_ctx()
1184 put_task_struct(ctx->task); in put_ctx()
1185 call_rcu(&ctx->rcu_head, free_ctx); in put_ctx()
1199 * - perf_event_exit_task_context() [ child , 0 ]
1203 * - perf_event_init_context() [ parent, 0 ]
1211 * While it appears there is an obvious deadlock here -- the parent and child
1213 * life-time rules separate them. That is an exiting task cannot fork, and a
1216 * But remember that these are parent<->child context relations, and
1221 * because the sys_perf_event_open() case will install a new event and break
1222 * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
1232 * quiesce the event, after which we can install it in the new location. This
1233 * means that only external vectors (perf_fops, prctl) can perturb the event
1237 * However; because event->ctx can change while we're waiting to acquire
1238 * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
1253 * cpuctx->mutex / perf_event_context::mutex
1256 perf_event_ctx_lock_nested(struct perf_event *event, int nesting) in perf_event_ctx_lock_nested() argument
1262 ctx = READ_ONCE(event->ctx); in perf_event_ctx_lock_nested()
1263 if (!refcount_inc_not_zero(&ctx->refcount)) { in perf_event_ctx_lock_nested()
1269 mutex_lock_nested(&ctx->mutex, nesting); in perf_event_ctx_lock_nested()
1270 if (event->ctx != ctx) { in perf_event_ctx_lock_nested()
1271 mutex_unlock(&ctx->mutex); in perf_event_ctx_lock_nested()
1280 perf_event_ctx_lock(struct perf_event *event) in perf_event_ctx_lock() argument
1282 return perf_event_ctx_lock_nested(event, 0); in perf_event_ctx_lock()
1285 static void perf_event_ctx_unlock(struct perf_event *event, in perf_event_ctx_unlock() argument
1288 mutex_unlock(&ctx->mutex); in perf_event_ctx_unlock()
1293 * This must be done under the ctx->lock, such as to serialize against
1295 * calling scheduler related locks and ctx->lock nests inside those.
1300 struct perf_event_context *parent_ctx = ctx->parent_ctx; in unclone_ctx()
1302 lockdep_assert_held(&ctx->lock); in unclone_ctx()
1305 ctx->parent_ctx = NULL; in unclone_ctx()
1306 ctx->generation++; in unclone_ctx()
1311 static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p, in perf_event_pid_type() argument
1318 if (event->parent) in perf_event_pid_type()
1319 event = event->parent; in perf_event_pid_type()
1321 nr = __task_pid_nr_ns(p, type, event->ns); in perf_event_pid_type()
1322 /* avoid -1 if it is idle thread or runs in another ns */ in perf_event_pid_type()
1324 nr = -1; in perf_event_pid_type()
1328 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) in perf_event_pid() argument
1330 return perf_event_pid_type(event, p, PIDTYPE_TGID); in perf_event_pid()
1333 static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) in perf_event_tid() argument
1335 return perf_event_pid_type(event, p, PIDTYPE_PID); in perf_event_tid()
1339 * If we inherit events we want to return the parent event id
1342 static u64 primary_event_id(struct perf_event *event) in primary_event_id() argument
1344 u64 id = event->id; in primary_event_id()
1346 if (event->parent) in primary_event_id()
1347 id = event->parent->id; in primary_event_id()
1367 * part of the read side critical section was irqs-enabled -- see in perf_lock_task_context()
1370 * Since ctx->lock nests under rq->lock we must ensure the entire read in perf_lock_task_context()
1375 ctx = rcu_dereference(task->perf_event_ctxp); in perf_lock_task_context()
1387 raw_spin_lock(&ctx->lock); in perf_lock_task_context()
1388 if (ctx != rcu_dereference(task->perf_event_ctxp)) { in perf_lock_task_context()
1389 raw_spin_unlock(&ctx->lock); in perf_lock_task_context()
1395 if (ctx->task == TASK_TOMBSTONE || in perf_lock_task_context()
1396 !refcount_inc_not_zero(&ctx->refcount)) { in perf_lock_task_context()
1397 raw_spin_unlock(&ctx->lock); in perf_lock_task_context()
1400 WARN_ON_ONCE(ctx->task != task); in perf_lock_task_context()
1422 ++ctx->pin_count; in perf_pin_task_context()
1423 raw_spin_unlock_irqrestore(&ctx->lock, flags); in perf_pin_task_context()
1432 raw_spin_lock_irqsave(&ctx->lock, flags); in perf_unpin_context()
1433 --ctx->pin_count; in perf_unpin_context()
1434 raw_spin_unlock_irqrestore(&ctx->lock, flags); in perf_unpin_context()
1444 lockdep_assert_held(&ctx->lock); in __update_context_time()
1447 ctx->time += now - ctx->timestamp; in __update_context_time()
1448 ctx->timestamp = now; in __update_context_time()
1451 * The above: time' = time + (now - timestamp), can be re-arranged in __update_context_time()
1452 * into: time` = now + (time - timestamp), which gives a single value in __update_context_time()
1456 * it's (obviously) not possible to acquire ctx->lock in order to read in __update_context_time()
1459 WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); in __update_context_time()
1467 static u64 perf_event_time(struct perf_event *event) in perf_event_time() argument
1469 struct perf_event_context *ctx = event->ctx; in perf_event_time()
1474 if (is_cgroup_event(event)) in perf_event_time()
1475 return perf_cgroup_event_time(event); in perf_event_time()
1477 return ctx->time; in perf_event_time()
1480 static u64 perf_event_time_now(struct perf_event *event, u64 now) in perf_event_time_now() argument
1482 struct perf_event_context *ctx = event->ctx; in perf_event_time_now()
1487 if (is_cgroup_event(event)) in perf_event_time_now()
1488 return perf_cgroup_event_time_now(event, now); in perf_event_time_now()
1490 if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) in perf_event_time_now()
1491 return ctx->time; in perf_event_time_now()
1493 now += READ_ONCE(ctx->timeoffset); in perf_event_time_now()
1497 static enum event_type_t get_event_type(struct perf_event *event) in get_event_type() argument
1499 struct perf_event_context *ctx = event->ctx; in get_event_type()
1502 lockdep_assert_held(&ctx->lock); in get_event_type()
1508 if (event->group_leader != event) in get_event_type()
1509 event = event->group_leader; in get_event_type()
1511 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; in get_event_type()
1512 if (!ctx->task) in get_event_type()
1519 * Helper function to initialize event group nodes.
1521 static void init_event_group(struct perf_event *event) in init_event_group() argument
1523 RB_CLEAR_NODE(&event->group_node); in init_event_group()
1524 event->group_index = 0; in init_event_group()
1529 * based on event attrs bits.
1532 get_event_groups(struct perf_event *event, struct perf_event_context *ctx) in get_event_groups() argument
1534 if (event->attr.pinned) in get_event_groups()
1535 return &ctx->pinned_groups; in get_event_groups()
1537 return &ctx->flexible_groups; in get_event_groups()
1545 groups->tree = RB_ROOT; in perf_event_groups_init()
1546 groups->index = 0; in perf_event_groups_init()
1549 static inline struct cgroup *event_cgroup(const struct perf_event *event) in event_cgroup() argument
1554 if (event->cgrp) in event_cgroup()
1555 cgroup = event->cgrp->css.cgroup; in event_cgroup()
1562 * Compare function for event groups;
1572 if (left_cpu < right->cpu) in perf_event_groups_cmp()
1573 return -1; in perf_event_groups_cmp()
1574 if (left_cpu > right->cpu) in perf_event_groups_cmp()
1578 if (left_pmu < right->pmu_ctx->pmu) in perf_event_groups_cmp()
1579 return -1; in perf_event_groups_cmp()
1580 if (left_pmu > right->pmu_ctx->pmu) in perf_event_groups_cmp()
1594 return -1; in perf_event_groups_cmp()
1605 return -1; in perf_event_groups_cmp()
1612 if (left_group_index < right->group_index) in perf_event_groups_cmp()
1613 return -1; in perf_event_groups_cmp()
1614 if (left_group_index > right->group_index) in perf_event_groups_cmp()
1626 return perf_event_groups_cmp(e->cpu, e->pmu_ctx->pmu, event_cgroup(e), in __group_less()
1627 e->group_index, __node_2_pe(b)) < 0; in __group_less()
1642 return perf_event_groups_cmp(a->cpu, a->pmu, a->cgroup, b->group_index, b); in __group_cmp()
1652 return perf_event_groups_cmp(a->cpu, a->pmu, event_cgroup(b), in __group_cmp_ignore_cgroup()
1653 b->group_index, b); in __group_cmp_ignore_cgroup()
1657 * Insert @event into @groups' tree; using
1658 * {@event->cpu, @event->pmu_ctx->pmu, event_cgroup(@event), ++@groups->index}
1663 struct perf_event *event) in perf_event_groups_insert() argument
1665 event->group_index = ++groups->index; in perf_event_groups_insert()
1667 rb_add(&event->group_node, &groups->tree, __group_less); in perf_event_groups_insert()
1671 * Helper function to insert event into the pinned or flexible groups.
1674 add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx) in add_event_to_groups() argument
1678 groups = get_event_groups(event, ctx); in add_event_to_groups()
1679 perf_event_groups_insert(groups, event); in add_event_to_groups()
1687 struct perf_event *event) in perf_event_groups_delete() argument
1689 WARN_ON_ONCE(RB_EMPTY_NODE(&event->group_node) || in perf_event_groups_delete()
1690 RB_EMPTY_ROOT(&groups->tree)); in perf_event_groups_delete()
1692 rb_erase(&event->group_node, &groups->tree); in perf_event_groups_delete()
1693 init_event_group(event); in perf_event_groups_delete()
1697 * Helper function to delete event from its groups.
1700 del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx) in del_event_from_groups() argument
1704 groups = get_event_groups(event, ctx); in del_event_from_groups()
1705 perf_event_groups_delete(groups, event); in del_event_from_groups()
1709 * Get the leftmost event in the {cpu,pmu,cgroup} subtree.
1722 node = rb_find_first(&key, &groups->tree, __group_cmp); in perf_event_groups_first()
1730 perf_event_groups_next(struct perf_event *event, struct pmu *pmu) in perf_event_groups_next() argument
1733 .cpu = event->cpu, in perf_event_groups_next()
1735 .cgroup = event_cgroup(event), in perf_event_groups_next()
1739 next = rb_next_match(&key, &event->group_node, __group_cmp); in perf_event_groups_next()
1746 #define perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) \ argument
1747 for (event = perf_event_groups_first(groups, cpu, pmu, NULL); \
1748 event; event = perf_event_groups_next(event, pmu))
1753 #define perf_event_groups_for_each(event, groups) \ argument
1754 for (event = rb_entry_safe(rb_first(&((groups)->tree)), \
1755 typeof(*event), group_node); event; \
1756 event = rb_entry_safe(rb_next(&event->group_node), \
1757 typeof(*event), group_node))
1760 * Add an event from the lists for its context.
1761 * Must be called with ctx->mutex and ctx->lock held.
1764 list_add_event(struct perf_event *event, struct perf_event_context *ctx) in list_add_event() argument
1766 lockdep_assert_held(&ctx->lock); in list_add_event()
1768 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); in list_add_event()
1769 event->attach_state |= PERF_ATTACH_CONTEXT; in list_add_event()
1771 event->tstamp = perf_event_time(event); in list_add_event()
1774 * If we're a stand alone event or group leader, we go to the context in list_add_event()
1778 if (event->group_leader == event) { in list_add_event()
1779 event->group_caps = event->event_caps; in list_add_event()
1780 add_event_to_groups(event, ctx); in list_add_event()
1783 list_add_rcu(&event->event_entry, &ctx->event_list); in list_add_event()
1784 ctx->nr_events++; in list_add_event()
1785 if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) in list_add_event()
1786 ctx->nr_user++; in list_add_event()
1787 if (event->attr.inherit_stat) in list_add_event()
1788 ctx->nr_stat++; in list_add_event()
1790 if (event->state > PERF_EVENT_STATE_OFF) in list_add_event()
1791 perf_cgroup_event_enable(event, ctx); in list_add_event()
1793 ctx->generation++; in list_add_event()
1794 event->pmu_ctx->nr_events++; in list_add_event()
1798 * Initialize event state based on the perf_event_attr::disabled.
1800 static inline void perf_event__state_init(struct perf_event *event) in perf_event__state_init() argument
1802 event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF : in perf_event__state_init()
1806 static void __perf_event_read_size(struct perf_event *event, int nr_siblings) in __perf_event_read_size() argument
1812 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) in __perf_event_read_size()
1815 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) in __perf_event_read_size()
1818 if (event->attr.read_format & PERF_FORMAT_ID) in __perf_event_read_size()
1821 if (event->attr.read_format & PERF_FORMAT_LOST) in __perf_event_read_size()
1824 if (event->attr.read_format & PERF_FORMAT_GROUP) { in __perf_event_read_size()
1830 event->read_size = size; in __perf_event_read_size()
1833 static void __perf_event_header_size(struct perf_event *event, u64 sample_type) in __perf_event_header_size() argument
1839 size += sizeof(data->ip); in __perf_event_header_size()
1842 size += sizeof(data->addr); in __perf_event_header_size()
1845 size += sizeof(data->period); in __perf_event_header_size()
1848 size += sizeof(data->weight.full); in __perf_event_header_size()
1851 size += event->read_size; in __perf_event_header_size()
1854 size += sizeof(data->data_src.val); in __perf_event_header_size()
1857 size += sizeof(data->txn); in __perf_event_header_size()
1860 size += sizeof(data->phys_addr); in __perf_event_header_size()
1863 size += sizeof(data->cgroup); in __perf_event_header_size()
1866 size += sizeof(data->data_page_size); in __perf_event_header_size()
1869 size += sizeof(data->code_page_size); in __perf_event_header_size()
1871 event->header_size = size; in __perf_event_header_size()
1878 static void perf_event__header_size(struct perf_event *event) in perf_event__header_size() argument
1880 __perf_event_read_size(event, in perf_event__header_size()
1881 event->group_leader->nr_siblings); in perf_event__header_size()
1882 __perf_event_header_size(event, event->attr.sample_type); in perf_event__header_size()
1885 static void perf_event__id_header_size(struct perf_event *event) in perf_event__id_header_size() argument
1888 u64 sample_type = event->attr.sample_type; in perf_event__id_header_size()
1892 size += sizeof(data->tid_entry); in perf_event__id_header_size()
1895 size += sizeof(data->time); in perf_event__id_header_size()
1898 size += sizeof(data->id); in perf_event__id_header_size()
1901 size += sizeof(data->id); in perf_event__id_header_size()
1904 size += sizeof(data->stream_id); in perf_event__id_header_size()
1907 size += sizeof(data->cpu_entry); in perf_event__id_header_size()
1909 event->id_header_size = size; in perf_event__id_header_size()
1912 static bool perf_event_validate_size(struct perf_event *event) in perf_event_validate_size() argument
1915 * The values computed here will be over-written when we actually in perf_event_validate_size()
1916 * attach the event. in perf_event_validate_size()
1918 __perf_event_read_size(event, event->group_leader->nr_siblings + 1); in perf_event_validate_size()
1919 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); in perf_event_validate_size()
1920 perf_event__id_header_size(event); in perf_event_validate_size()
1926 if (event->read_size + event->header_size + in perf_event_validate_size()
1927 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) in perf_event_validate_size()
1933 static void perf_group_attach(struct perf_event *event) in perf_group_attach() argument
1935 struct perf_event *group_leader = event->group_leader, *pos; in perf_group_attach()
1937 lockdep_assert_held(&event->ctx->lock); in perf_group_attach()
1943 if (event->attach_state & PERF_ATTACH_GROUP) in perf_group_attach()
1946 event->attach_state |= PERF_ATTACH_GROUP; in perf_group_attach()
1948 if (group_leader == event) in perf_group_attach()
1951 WARN_ON_ONCE(group_leader->ctx != event->ctx); in perf_group_attach()
1953 group_leader->group_caps &= event->event_caps; in perf_group_attach()
1955 list_add_tail(&event->sibling_list, &group_leader->sibling_list); in perf_group_attach()
1956 group_leader->nr_siblings++; in perf_group_attach()
1957 group_leader->group_generation++; in perf_group_attach()
1966 * Remove an event from the lists for its context.
1967 * Must be called with ctx->mutex and ctx->lock held.
1970 list_del_event(struct perf_event *event, struct perf_event_context *ctx) in list_del_event() argument
1972 WARN_ON_ONCE(event->ctx != ctx); in list_del_event()
1973 lockdep_assert_held(&ctx->lock); in list_del_event()
1976 * We can have double detach due to exit/hot-unplug + close. in list_del_event()
1978 if (!(event->attach_state & PERF_ATTACH_CONTEXT)) in list_del_event()
1981 event->attach_state &= ~PERF_ATTACH_CONTEXT; in list_del_event()
1983 ctx->nr_events--; in list_del_event()
1984 if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) in list_del_event()
1985 ctx->nr_user--; in list_del_event()
1986 if (event->attr.inherit_stat) in list_del_event()
1987 ctx->nr_stat--; in list_del_event()
1989 list_del_rcu(&event->event_entry); in list_del_event()
1991 if (event->group_leader == event) in list_del_event()
1992 del_event_from_groups(event, ctx); in list_del_event()
1995 * If event was in error state, then keep it in list_del_event()
1998 * of error state is by explicit re-enabling in list_del_event()
1999 * of the event in list_del_event()
2001 if (event->state > PERF_EVENT_STATE_OFF) { in list_del_event()
2002 perf_cgroup_event_disable(event, ctx); in list_del_event()
2003 perf_event_set_state(event, PERF_EVENT_STATE_OFF); in list_del_event()
2006 ctx->generation++; in list_del_event()
2007 event->pmu_ctx->nr_events--; in list_del_event()
2011 perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event) in perf_aux_output_match() argument
2016 if (!event->pmu->aux_output_match) in perf_aux_output_match()
2019 return event->pmu->aux_output_match(aux_event); in perf_aux_output_match()
2022 static void put_event(struct perf_event *event);
2023 static void event_sched_out(struct perf_event *event,
2026 static void perf_put_aux_event(struct perf_event *event) in perf_put_aux_event() argument
2028 struct perf_event_context *ctx = event->ctx; in perf_put_aux_event()
2032 * If event uses aux_event tear down the link in perf_put_aux_event()
2034 if (event->aux_event) { in perf_put_aux_event()
2035 iter = event->aux_event; in perf_put_aux_event()
2036 event->aux_event = NULL; in perf_put_aux_event()
2042 * If the event is an aux_event, tear down all links to in perf_put_aux_event()
2045 for_each_sibling_event(iter, event->group_leader) { in perf_put_aux_event()
2046 if (iter->aux_event != event) in perf_put_aux_event()
2049 iter->aux_event = NULL; in perf_put_aux_event()
2050 put_event(event); in perf_put_aux_event()
2058 perf_event_set_state(event, PERF_EVENT_STATE_ERROR); in perf_put_aux_event()
2062 static bool perf_need_aux_event(struct perf_event *event) in perf_need_aux_event() argument
2064 return !!event->attr.aux_output || !!event->attr.aux_sample_size; in perf_need_aux_event()
2067 static int perf_get_aux_event(struct perf_event *event, in perf_get_aux_event() argument
2071 * Our group leader must be an aux event if we want to be in perf_get_aux_event()
2072 * an aux_output. This way, the aux event will precede its in perf_get_aux_event()
2082 if (event->attr.aux_output && event->attr.aux_sample_size) in perf_get_aux_event()
2085 if (event->attr.aux_output && in perf_get_aux_event()
2086 !perf_aux_output_match(event, group_leader)) in perf_get_aux_event()
2089 if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux) in perf_get_aux_event()
2092 if (!atomic_long_inc_not_zero(&group_leader->refcount)) in perf_get_aux_event()
2096 * Link aux_outputs to their aux event; this is undone in in perf_get_aux_event()
2101 event->aux_event = group_leader; in perf_get_aux_event()
2106 static inline struct list_head *get_event_list(struct perf_event *event) in get_event_list() argument
2108 return event->attr.pinned ? &event->pmu_ctx->pinned_active : in get_event_list()
2109 &event->pmu_ctx->flexible_active; in get_event_list()
2118 static inline void perf_remove_sibling_event(struct perf_event *event) in perf_remove_sibling_event() argument
2120 event_sched_out(event, event->ctx); in perf_remove_sibling_event()
2121 perf_event_set_state(event, PERF_EVENT_STATE_ERROR); in perf_remove_sibling_event()
2124 static void perf_group_detach(struct perf_event *event) in perf_group_detach() argument
2126 struct perf_event *leader = event->group_leader; in perf_group_detach()
2128 struct perf_event_context *ctx = event->ctx; in perf_group_detach()
2130 lockdep_assert_held(&ctx->lock); in perf_group_detach()
2133 * We can have double detach due to exit/hot-unplug + close. in perf_group_detach()
2135 if (!(event->attach_state & PERF_ATTACH_GROUP)) in perf_group_detach()
2138 event->attach_state &= ~PERF_ATTACH_GROUP; in perf_group_detach()
2140 perf_put_aux_event(event); in perf_group_detach()
2145 if (leader != event) { in perf_group_detach()
2146 list_del_init(&event->sibling_list); in perf_group_detach()
2147 event->group_leader->nr_siblings--; in perf_group_detach()
2148 event->group_leader->group_generation++; in perf_group_detach()
2153 * If this was a group event with sibling events then in perf_group_detach()
2157 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) { in perf_group_detach()
2159 if (sibling->event_caps & PERF_EV_CAP_SIBLING) in perf_group_detach()
2162 sibling->group_leader = sibling; in perf_group_detach()
2163 list_del_init(&sibling->sibling_list); in perf_group_detach()
2166 sibling->group_caps = event->group_caps; in perf_group_detach()
2168 if (sibling->attach_state & PERF_ATTACH_CONTEXT) { in perf_group_detach()
2169 add_event_to_groups(sibling, event->ctx); in perf_group_detach()
2171 if (sibling->state == PERF_EVENT_STATE_ACTIVE) in perf_group_detach()
2172 list_add_tail(&sibling->active_list, get_event_list(sibling)); in perf_group_detach()
2175 WARN_ON_ONCE(sibling->ctx != event->ctx); in perf_group_detach()
2187 static void perf_child_detach(struct perf_event *event) in perf_child_detach() argument
2189 struct perf_event *parent_event = event->parent; in perf_child_detach()
2191 if (!(event->attach_state & PERF_ATTACH_CHILD)) in perf_child_detach()
2194 event->attach_state &= ~PERF_ATTACH_CHILD; in perf_child_detach()
2199 lockdep_assert_held(&parent_event->child_mutex); in perf_child_detach()
2201 sync_child_event(event); in perf_child_detach()
2202 list_del_init(&event->child_list); in perf_child_detach()
2205 static bool is_orphaned_event(struct perf_event *event) in is_orphaned_event() argument
2207 return event->state == PERF_EVENT_STATE_DEAD; in is_orphaned_event()
2211 event_filter_match(struct perf_event *event) in event_filter_match() argument
2213 return (event->cpu == -1 || event->cpu == smp_processor_id()) && in event_filter_match()
2214 perf_cgroup_match(event); in event_filter_match()
2218 event_sched_out(struct perf_event *event, struct perf_event_context *ctx) in event_sched_out() argument
2220 struct perf_event_pmu_context *epc = event->pmu_ctx; in event_sched_out()
2221 struct perf_cpu_pmu_context *cpc = this_cpu_ptr(epc->pmu->cpu_pmu_context); in event_sched_out()
2224 // XXX cpc serialization, probably per-cpu IRQ disabled in event_sched_out()
2226 WARN_ON_ONCE(event->ctx != ctx); in event_sched_out()
2227 lockdep_assert_held(&ctx->lock); in event_sched_out()
2229 if (event->state != PERF_EVENT_STATE_ACTIVE) in event_sched_out()
2237 list_del_init(&event->active_list); in event_sched_out()
2239 perf_pmu_disable(event->pmu); in event_sched_out()
2241 event->pmu->del(event, 0); in event_sched_out()
2242 event->oncpu = -1; in event_sched_out()
2244 if (event->pending_disable) { in event_sched_out()
2245 event->pending_disable = 0; in event_sched_out()
2246 perf_cgroup_event_disable(event, ctx); in event_sched_out()
2250 if (event->pending_sigtrap) { in event_sched_out()
2253 event->pending_sigtrap = 0; in event_sched_out()
2255 !event->pending_work) { in event_sched_out()
2256 event->pending_work = 1; in event_sched_out()
2258 WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); in event_sched_out()
2259 task_work_add(current, &event->pending_task, TWA_RESUME); in event_sched_out()
2262 local_dec(&event->ctx->nr_pending); in event_sched_out()
2265 perf_event_set_state(event, state); in event_sched_out()
2267 if (!is_software_event(event)) in event_sched_out()
2268 cpc->active_oncpu--; in event_sched_out()
2269 if (event->attr.freq && event->attr.sample_freq) in event_sched_out()
2270 ctx->nr_freq--; in event_sched_out()
2271 if (event->attr.exclusive || !cpc->active_oncpu) in event_sched_out()
2272 cpc->exclusive = 0; in event_sched_out()
2274 perf_pmu_enable(event->pmu); in event_sched_out()
2280 struct perf_event *event; in group_sched_out() local
2282 if (group_event->state != PERF_EVENT_STATE_ACTIVE) in group_sched_out()
2285 perf_assert_pmu_disabled(group_event->pmu_ctx->pmu); in group_sched_out()
2292 for_each_sibling_event(event, group_event) in group_sched_out()
2293 event_sched_out(event, ctx); in group_sched_out()
2301 * Cross CPU call to remove a performance event
2303 * We disable the event on the hardware level first. After that we
2307 __perf_remove_from_context(struct perf_event *event, in __perf_remove_from_context() argument
2312 struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx; in __perf_remove_from_context()
2315 if (ctx->is_active & EVENT_TIME) { in __perf_remove_from_context()
2325 event->pending_disable = 1; in __perf_remove_from_context()
2326 event_sched_out(event, ctx); in __perf_remove_from_context()
2328 perf_group_detach(event); in __perf_remove_from_context()
2330 perf_child_detach(event); in __perf_remove_from_context()
2331 list_del_event(event, ctx); in __perf_remove_from_context()
2333 event->state = PERF_EVENT_STATE_DEAD; in __perf_remove_from_context()
2335 if (!pmu_ctx->nr_events) { in __perf_remove_from_context()
2336 pmu_ctx->rotate_necessary = 0; in __perf_remove_from_context()
2338 if (ctx->task && ctx->is_active) { in __perf_remove_from_context()
2341 cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context); in __perf_remove_from_context()
2342 WARN_ON_ONCE(cpc->task_epc && cpc->task_epc != pmu_ctx); in __perf_remove_from_context()
2343 cpc->task_epc = NULL; in __perf_remove_from_context()
2347 if (!ctx->nr_events && ctx->is_active) { in __perf_remove_from_context()
2348 if (ctx == &cpuctx->ctx) in __perf_remove_from_context()
2351 ctx->is_active = 0; in __perf_remove_from_context()
2352 if (ctx->task) { in __perf_remove_from_context()
2353 WARN_ON_ONCE(cpuctx->task_ctx != ctx); in __perf_remove_from_context()
2354 cpuctx->task_ctx = NULL; in __perf_remove_from_context()
2360 * Remove the event from a task's (or a CPU's) list of events.
2362 * If event->ctx is a cloned context, callers must make sure that
2363 * every task struct that event->ctx->task could possibly point to
2365 * that only calls us on the top-level context, which can't be a clone.
2369 static void perf_remove_from_context(struct perf_event *event, unsigned long flags) in perf_remove_from_context() argument
2371 struct perf_event_context *ctx = event->ctx; in perf_remove_from_context()
2373 lockdep_assert_held(&ctx->mutex); in perf_remove_from_context()
2380 raw_spin_lock_irq(&ctx->lock); in perf_remove_from_context()
2381 if (!ctx->is_active) { in perf_remove_from_context()
2382 __perf_remove_from_context(event, this_cpu_ptr(&perf_cpu_context), in perf_remove_from_context()
2384 raw_spin_unlock_irq(&ctx->lock); in perf_remove_from_context()
2387 raw_spin_unlock_irq(&ctx->lock); in perf_remove_from_context()
2389 event_function_call(event, __perf_remove_from_context, (void *)flags); in perf_remove_from_context()
2393 * Cross CPU call to disable a performance event
2395 static void __perf_event_disable(struct perf_event *event, in __perf_event_disable() argument
2400 if (event->state < PERF_EVENT_STATE_INACTIVE) in __perf_event_disable()
2403 if (ctx->is_active & EVENT_TIME) { in __perf_event_disable()
2405 update_cgrp_time_from_event(event); in __perf_event_disable()
2408 perf_pmu_disable(event->pmu_ctx->pmu); in __perf_event_disable()
2410 if (event == event->group_leader) in __perf_event_disable()
2411 group_sched_out(event, ctx); in __perf_event_disable()
2413 event_sched_out(event, ctx); in __perf_event_disable()
2415 perf_event_set_state(event, PERF_EVENT_STATE_OFF); in __perf_event_disable()
2416 perf_cgroup_event_disable(event, ctx); in __perf_event_disable()
2418 perf_pmu_enable(event->pmu_ctx->pmu); in __perf_event_disable()
2422 * Disable an event.
2424 * If event->ctx is a cloned context, callers must make sure that
2425 * every task struct that event->ctx->task could possibly point to
2428 * hold the top-level event's child_mutex, so any descendant that
2431 * When called from perf_pending_irq it's OK because event->ctx
2435 static void _perf_event_disable(struct perf_event *event) in _perf_event_disable() argument
2437 struct perf_event_context *ctx = event->ctx; in _perf_event_disable()
2439 raw_spin_lock_irq(&ctx->lock); in _perf_event_disable()
2440 if (event->state <= PERF_EVENT_STATE_OFF) { in _perf_event_disable()
2441 raw_spin_unlock_irq(&ctx->lock); in _perf_event_disable()
2444 raw_spin_unlock_irq(&ctx->lock); in _perf_event_disable()
2446 event_function_call(event, __perf_event_disable, NULL); in _perf_event_disable()
2449 void perf_event_disable_local(struct perf_event *event) in perf_event_disable_local() argument
2451 event_function_local(event, __perf_event_disable, NULL); in perf_event_disable_local()
2458 void perf_event_disable(struct perf_event *event) in perf_event_disable() argument
2462 ctx = perf_event_ctx_lock(event); in perf_event_disable()
2463 _perf_event_disable(event); in perf_event_disable()
2464 perf_event_ctx_unlock(event, ctx); in perf_event_disable()
2468 void perf_event_disable_inatomic(struct perf_event *event) in perf_event_disable_inatomic() argument
2470 event->pending_disable = 1; in perf_event_disable_inatomic()
2471 irq_work_queue(&event->pending_irq); in perf_event_disable_inatomic()
2476 static void perf_log_throttle(struct perf_event *event, int enable);
2477 static void perf_log_itrace_start(struct perf_event *event);
2480 event_sched_in(struct perf_event *event, struct perf_event_context *ctx) in event_sched_in() argument
2482 struct perf_event_pmu_context *epc = event->pmu_ctx; in event_sched_in()
2483 struct perf_cpu_pmu_context *cpc = this_cpu_ptr(epc->pmu->cpu_pmu_context); in event_sched_in()
2486 WARN_ON_ONCE(event->ctx != ctx); in event_sched_in()
2488 lockdep_assert_held(&ctx->lock); in event_sched_in()
2490 if (event->state <= PERF_EVENT_STATE_OFF) in event_sched_in()
2493 WRITE_ONCE(event->oncpu, smp_processor_id()); in event_sched_in()
2495 * Order event::oncpu write to happen before the ACTIVE state is in event_sched_in()
2497 * ->oncpu if it sees ACTIVE. in event_sched_in()
2500 perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE); in event_sched_in()
2507 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { in event_sched_in()
2508 perf_log_throttle(event, 1); in event_sched_in()
2509 event->hw.interrupts = 0; in event_sched_in()
2512 perf_pmu_disable(event->pmu); in event_sched_in()
2514 perf_log_itrace_start(event); in event_sched_in()
2516 if (event->pmu->add(event, PERF_EF_START)) { in event_sched_in()
2517 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE); in event_sched_in()
2518 event->oncpu = -1; in event_sched_in()
2519 ret = -EAGAIN; in event_sched_in()
2523 if (!is_software_event(event)) in event_sched_in()
2524 cpc->active_oncpu++; in event_sched_in()
2525 if (event->attr.freq && event->attr.sample_freq) in event_sched_in()
2526 ctx->nr_freq++; in event_sched_in()
2528 if (event->attr.exclusive) in event_sched_in()
2529 cpc->exclusive = 1; in event_sched_in()
2532 perf_pmu_enable(event->pmu); in event_sched_in()
2540 struct perf_event *event, *partial_group = NULL; in group_sched_in() local
2541 struct pmu *pmu = group_event->pmu_ctx->pmu; in group_sched_in()
2543 if (group_event->state == PERF_EVENT_STATE_OFF) in group_sched_in()
2546 pmu->start_txn(pmu, PERF_PMU_TXN_ADD); in group_sched_in()
2554 for_each_sibling_event(event, group_event) { in group_sched_in()
2555 if (event_sched_in(event, ctx)) { in group_sched_in()
2556 partial_group = event; in group_sched_in()
2561 if (!pmu->commit_txn(pmu)) in group_sched_in()
2568 * The events up to the failed event are scheduled out normally. in group_sched_in()
2570 for_each_sibling_event(event, group_event) { in group_sched_in()
2571 if (event == partial_group) in group_sched_in()
2574 event_sched_out(event, ctx); in group_sched_in()
2579 pmu->cancel_txn(pmu); in group_sched_in()
2580 return -EAGAIN; in group_sched_in()
2584 * Work out whether we can put this event group on the CPU now.
2586 static int group_can_go_on(struct perf_event *event, int can_add_hw) in group_can_go_on() argument
2588 struct perf_event_pmu_context *epc = event->pmu_ctx; in group_can_go_on()
2589 struct perf_cpu_pmu_context *cpc = this_cpu_ptr(epc->pmu->cpu_pmu_context); in group_can_go_on()
2594 if (event->group_caps & PERF_EV_CAP_SOFTWARE) in group_can_go_on()
2600 if (cpc->exclusive) in group_can_go_on()
2606 if (event->attr.exclusive && !list_empty(get_event_list(event))) in group_can_go_on()
2615 static void add_event_to_ctx(struct perf_event *event, in add_event_to_ctx() argument
2618 list_add_event(event, ctx); in add_event_to_ctx()
2619 perf_group_attach(event); in add_event_to_ctx()
2627 if (!cpuctx->task_ctx) in task_ctx_sched_out()
2630 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) in task_ctx_sched_out()
2639 ctx_sched_in(&cpuctx->ctx, EVENT_PINNED); in perf_event_sched_in()
2642 ctx_sched_in(&cpuctx->ctx, EVENT_FLEXIBLE); in perf_event_sched_in()
2649 * - CPU pinned (EVENT_CPU | EVENT_PINNED)
2650 * - task pinned (EVENT_PINNED)
2651 * - CPU flexible (EVENT_CPU | EVENT_FLEXIBLE)
2652 * - task flexible (EVENT_FLEXIBLE).
2655 * time an event is added, only do it for the groups of equal priority and
2664 * event to the context or enabling existing event in the context. We can
2682 perf_ctx_disable(&cpuctx->ctx); in ctx_resched()
2691 * - EVENT_CPU: schedule out corresponding groups; in ctx_resched()
2692 * - EVENT_PINNED task events: schedule out EVENT_FLEXIBLE groups; in ctx_resched()
2693 * - otherwise, do nothing more. in ctx_resched()
2696 ctx_sched_out(&cpuctx->ctx, event_type); in ctx_resched()
2698 ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE); in ctx_resched()
2702 perf_ctx_enable(&cpuctx->ctx); in ctx_resched()
2710 struct perf_event_context *task_ctx = cpuctx->task_ctx; in perf_pmu_resched()
2718 * Cross CPU call to install and enable a performance event
2721 * things like ctx->is_active and cpuctx->task_ctx are set.
2725 struct perf_event *event = info; in __perf_install_in_context() local
2726 struct perf_event_context *ctx = event->ctx; in __perf_install_in_context()
2728 struct perf_event_context *task_ctx = cpuctx->task_ctx; in __perf_install_in_context()
2732 raw_spin_lock(&cpuctx->ctx.lock); in __perf_install_in_context()
2733 if (ctx->task) { in __perf_install_in_context()
2734 raw_spin_lock(&ctx->lock); in __perf_install_in_context()
2737 reprogram = (ctx->task == current); in __perf_install_in_context()
2743 * If its not running, we don't care, ctx->lock will in __perf_install_in_context()
2746 if (task_curr(ctx->task) && !reprogram) { in __perf_install_in_context()
2747 ret = -ESRCH; in __perf_install_in_context()
2751 WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx); in __perf_install_in_context()
2753 raw_spin_lock(&task_ctx->lock); in __perf_install_in_context()
2757 if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) { in __perf_install_in_context()
2759 * If the current cgroup doesn't match the event's in __perf_install_in_context()
2763 reprogram = cgroup_is_descendant(cgrp->css.cgroup, in __perf_install_in_context()
2764 event->cgrp->css.cgroup); in __perf_install_in_context()
2770 add_event_to_ctx(event, ctx); in __perf_install_in_context()
2771 ctx_resched(cpuctx, task_ctx, get_event_type(event)); in __perf_install_in_context()
2773 add_event_to_ctx(event, ctx); in __perf_install_in_context()
2782 static bool exclusive_event_installable(struct perf_event *event,
2786 * Attach a performance event to a context.
2792 struct perf_event *event, in perf_install_in_context() argument
2795 struct task_struct *task = READ_ONCE(ctx->task); in perf_install_in_context()
2797 lockdep_assert_held(&ctx->mutex); in perf_install_in_context()
2799 WARN_ON_ONCE(!exclusive_event_installable(event, ctx)); in perf_install_in_context()
2801 if (event->cpu != -1) in perf_install_in_context()
2802 WARN_ON_ONCE(event->cpu != cpu); in perf_install_in_context()
2805 * Ensures that if we can observe event->ctx, both the event and ctx in perf_install_in_context()
2808 smp_store_release(&event->ctx, ctx); in perf_install_in_context()
2812 * without IPI. Except when this is the first event for the context, in in perf_install_in_context()
2813 * that case we need the magic of the IPI to set ctx->is_active. in perf_install_in_context()
2816 * event will issue the IPI and reprogram the hardware. in perf_install_in_context()
2818 if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && in perf_install_in_context()
2819 ctx->nr_events && !is_cgroup_event(event)) { in perf_install_in_context()
2820 raw_spin_lock_irq(&ctx->lock); in perf_install_in_context()
2821 if (ctx->task == TASK_TOMBSTONE) { in perf_install_in_context()
2822 raw_spin_unlock_irq(&ctx->lock); in perf_install_in_context()
2825 add_event_to_ctx(event, ctx); in perf_install_in_context()
2826 raw_spin_unlock_irq(&ctx->lock); in perf_install_in_context()
2831 cpu_function_call(cpu, __perf_install_in_context, event); in perf_install_in_context()
2842 * Installing events is tricky because we cannot rely on ctx->is_active in perf_install_in_context()
2843 * to be set in case this is the nr_events 0 -> 1 transition. in perf_install_in_context()
2857 * our task->perf_event_ctxp[] store, such that it will in fact take in perf_install_in_context()
2866 * This smp_mb() orders the task->perf_event_ctxp[] store with the in perf_install_in_context()
2873 if (!task_function_call(task, __perf_install_in_context, event)) in perf_install_in_context()
2876 raw_spin_lock_irq(&ctx->lock); in perf_install_in_context()
2877 task = ctx->task; in perf_install_in_context()
2881 * cannot happen), and we hold ctx->mutex, which serializes us in perf_install_in_context()
2884 raw_spin_unlock_irq(&ctx->lock); in perf_install_in_context()
2888 * If the task is not running, ctx->lock will avoid it becoming so, in perf_install_in_context()
2889 * thus we can safely install the event. in perf_install_in_context()
2892 raw_spin_unlock_irq(&ctx->lock); in perf_install_in_context()
2895 add_event_to_ctx(event, ctx); in perf_install_in_context()
2896 raw_spin_unlock_irq(&ctx->lock); in perf_install_in_context()
2900 * Cross CPU call to enable a performance event
2902 static void __perf_event_enable(struct perf_event *event, in __perf_event_enable() argument
2907 struct perf_event *leader = event->group_leader; in __perf_event_enable()
2910 if (event->state >= PERF_EVENT_STATE_INACTIVE || in __perf_event_enable()
2911 event->state <= PERF_EVENT_STATE_ERROR) in __perf_event_enable()
2914 if (ctx->is_active) in __perf_event_enable()
2917 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE); in __perf_event_enable()
2918 perf_cgroup_event_enable(event, ctx); in __perf_event_enable()
2920 if (!ctx->is_active) in __perf_event_enable()
2923 if (!event_filter_match(event)) { in __perf_event_enable()
2929 * If the event is in a group and isn't the group leader, in __perf_event_enable()
2932 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { in __perf_event_enable()
2937 task_ctx = cpuctx->task_ctx; in __perf_event_enable()
2938 if (ctx->task) in __perf_event_enable()
2941 ctx_resched(cpuctx, task_ctx, get_event_type(event)); in __perf_event_enable()
2945 * Enable an event.
2947 * If event->ctx is a cloned context, callers must make sure that
2948 * every task struct that event->ctx->task could possibly point to
2953 static void _perf_event_enable(struct perf_event *event) in _perf_event_enable() argument
2955 struct perf_event_context *ctx = event->ctx; in _perf_event_enable()
2957 raw_spin_lock_irq(&ctx->lock); in _perf_event_enable()
2958 if (event->state >= PERF_EVENT_STATE_INACTIVE || in _perf_event_enable()
2959 event->state < PERF_EVENT_STATE_ERROR) { in _perf_event_enable()
2961 raw_spin_unlock_irq(&ctx->lock); in _perf_event_enable()
2966 * If the event is in error state, clear that first. in _perf_event_enable()
2968 * That way, if we see the event in error state below, we know that it in _perf_event_enable()
2970 * been scheduled away before the cross-call arrived. in _perf_event_enable()
2972 if (event->state == PERF_EVENT_STATE_ERROR) { in _perf_event_enable()
2976 if (event->event_caps & PERF_EV_CAP_SIBLING && in _perf_event_enable()
2977 event->group_leader == event) in _perf_event_enable()
2980 event->state = PERF_EVENT_STATE_OFF; in _perf_event_enable()
2982 raw_spin_unlock_irq(&ctx->lock); in _perf_event_enable()
2984 event_function_call(event, __perf_event_enable, NULL); in _perf_event_enable()
2990 void perf_event_enable(struct perf_event *event) in perf_event_enable() argument
2994 ctx = perf_event_ctx_lock(event); in perf_event_enable()
2995 _perf_event_enable(event); in perf_event_enable()
2996 perf_event_ctx_unlock(event, ctx); in perf_event_enable()
3001 struct perf_event *event; member
3008 struct perf_event *event = sd->event; in __perf_event_stop() local
3011 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE) in __perf_event_stop()
3019 * so we need to check again lest we try to stop another CPU's event. in __perf_event_stop()
3021 if (READ_ONCE(event->oncpu) != smp_processor_id()) in __perf_event_stop()
3022 return -EAGAIN; in __perf_event_stop()
3024 event->pmu->stop(event, PERF_EF_UPDATE); in __perf_event_stop()
3032 * Since this is happening on an event-local CPU, no trace is lost in __perf_event_stop()
3035 if (sd->restart) in __perf_event_stop()
3036 event->pmu->start(event, 0); in __perf_event_stop()
3041 static int perf_event_stop(struct perf_event *event, int restart) in perf_event_stop() argument
3044 .event = event, in perf_event_stop()
3050 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE) in perf_event_stop()
3057 * We only want to restart ACTIVE events, so if the event goes in perf_event_stop()
3058 * inactive here (event->oncpu==-1), there's nothing more to do; in perf_event_stop()
3059 * fall through with ret==-ENXIO. in perf_event_stop()
3061 ret = cpu_function_call(READ_ONCE(event->oncpu), in perf_event_stop()
3063 } while (ret == -EAGAIN); in perf_event_stop()
3074 * event::addr_filter_ranges array and bump the event::addr_filters_gen;
3075 * (p2) when an event is scheduled in (pmu::add), it calls
3079 * If (p1) happens while the event is active, we restart it to force (p2).
3082 * pre-existing mappings, called once when new filters arrive via SET_FILTER
3090 void perf_event_addr_filters_sync(struct perf_event *event) in perf_event_addr_filters_sync() argument
3092 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); in perf_event_addr_filters_sync()
3094 if (!has_addr_filter(event)) in perf_event_addr_filters_sync()
3097 raw_spin_lock(&ifh->lock); in perf_event_addr_filters_sync()
3098 if (event->addr_filters_gen != event->hw.addr_filters_gen) { in perf_event_addr_filters_sync()
3099 event->pmu->addr_filters_sync(event); in perf_event_addr_filters_sync()
3100 event->hw.addr_filters_gen = event->addr_filters_gen; in perf_event_addr_filters_sync()
3102 raw_spin_unlock(&ifh->lock); in perf_event_addr_filters_sync()
3106 static int _perf_event_refresh(struct perf_event *event, int refresh) in _perf_event_refresh() argument
3111 if (event->attr.inherit || !is_sampling_event(event)) in _perf_event_refresh()
3112 return -EINVAL; in _perf_event_refresh()
3114 atomic_add(refresh, &event->event_limit); in _perf_event_refresh()
3115 _perf_event_enable(event); in _perf_event_refresh()
3123 int perf_event_refresh(struct perf_event *event, int refresh) in perf_event_refresh() argument
3128 ctx = perf_event_ctx_lock(event); in perf_event_refresh()
3129 ret = _perf_event_refresh(event, refresh); in perf_event_refresh()
3130 perf_event_ctx_unlock(event, ctx); in perf_event_refresh()
3145 if (!bp->attr.disabled) in perf_event_modify_breakpoint()
3152 * Copy event-type-independent attributes that may be modified.
3157 to->sig_data = from->sig_data; in perf_event_modify_copy_attr()
3160 static int perf_event_modify_attr(struct perf_event *event, in perf_event_modify_attr() argument
3167 if (event->attr.type != attr->type) in perf_event_modify_attr()
3168 return -EINVAL; in perf_event_modify_attr()
3170 switch (event->attr.type) { in perf_event_modify_attr()
3176 return -EOPNOTSUPP; in perf_event_modify_attr()
3179 WARN_ON_ONCE(event->ctx->parent_ctx); in perf_event_modify_attr()
3181 mutex_lock(&event->child_mutex); in perf_event_modify_attr()
3183 * Event-type-independent attributes must be copied before event-type in perf_event_modify_attr()
3187 perf_event_modify_copy_attr(&event->attr, attr); in perf_event_modify_attr()
3188 err = func(event, attr); in perf_event_modify_attr()
3191 list_for_each_entry(child, &event->child_list, child_list) { in perf_event_modify_attr()
3192 perf_event_modify_copy_attr(&child->attr, attr); in perf_event_modify_attr()
3198 mutex_unlock(&event->child_mutex); in perf_event_modify_attr()
3205 struct perf_event_context *ctx = pmu_ctx->ctx; in __pmu_ctx_sched_out()
3206 struct perf_event *event, *tmp; in __pmu_ctx_sched_out() local
3207 struct pmu *pmu = pmu_ctx->pmu; in __pmu_ctx_sched_out()
3209 if (ctx->task && !ctx->is_active) { in __pmu_ctx_sched_out()
3212 cpc = this_cpu_ptr(pmu->cpu_pmu_context); in __pmu_ctx_sched_out()
3213 WARN_ON_ONCE(cpc->task_epc && cpc->task_epc != pmu_ctx); in __pmu_ctx_sched_out()
3214 cpc->task_epc = NULL; in __pmu_ctx_sched_out()
3222 list_for_each_entry_safe(event, tmp, in __pmu_ctx_sched_out()
3223 &pmu_ctx->pinned_active, in __pmu_ctx_sched_out()
3225 group_sched_out(event, ctx); in __pmu_ctx_sched_out()
3229 list_for_each_entry_safe(event, tmp, in __pmu_ctx_sched_out()
3230 &pmu_ctx->flexible_active, in __pmu_ctx_sched_out()
3232 group_sched_out(event, ctx); in __pmu_ctx_sched_out()
3238 pmu_ctx->rotate_necessary = 0; in __pmu_ctx_sched_out()
3248 int is_active = ctx->is_active; in ctx_sched_out()
3250 lockdep_assert_held(&ctx->lock); in ctx_sched_out()
3252 if (likely(!ctx->nr_events)) { in ctx_sched_out()
3256 WARN_ON_ONCE(ctx->is_active); in ctx_sched_out()
3257 if (ctx->task) in ctx_sched_out()
3258 WARN_ON_ONCE(cpuctx->task_ctx); in ctx_sched_out()
3275 update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); in ctx_sched_out()
3277 * CPU-release for the below ->is_active store, in ctx_sched_out()
3283 ctx->is_active &= ~event_type; in ctx_sched_out()
3284 if (!(ctx->is_active & EVENT_ALL)) in ctx_sched_out()
3285 ctx->is_active = 0; in ctx_sched_out()
3287 if (ctx->task) { in ctx_sched_out()
3288 WARN_ON_ONCE(cpuctx->task_ctx != ctx); in ctx_sched_out()
3289 if (!ctx->is_active) in ctx_sched_out()
3290 cpuctx->task_ctx = NULL; in ctx_sched_out()
3293 is_active ^= ctx->is_active; /* changed bits */ in ctx_sched_out()
3295 list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) in ctx_sched_out()
3310 lockdep_assert_held(&ctx1->lock); in context_equiv()
3311 lockdep_assert_held(&ctx2->lock); in context_equiv()
3314 if (ctx1->pin_count || ctx2->pin_count) in context_equiv()
3318 if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) in context_equiv()
3322 if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) in context_equiv()
3329 if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && in context_equiv()
3330 ctx1->parent_gen == ctx2->parent_gen) in context_equiv()
3337 static void __perf_event_sync_stat(struct perf_event *event, in __perf_event_sync_stat() argument
3342 if (!event->attr.inherit_stat) in __perf_event_sync_stat()
3346 * Update the event value, we cannot use perf_event_read() in __perf_event_sync_stat()
3349 * we know the event must be on the current CPU, therefore we in __perf_event_sync_stat()
3352 if (event->state == PERF_EVENT_STATE_ACTIVE) in __perf_event_sync_stat()
3353 event->pmu->read(event); in __perf_event_sync_stat()
3355 perf_event_update_time(event); in __perf_event_sync_stat()
3358 * In order to keep per-task stats reliable we need to flip the event in __perf_event_sync_stat()
3361 value = local64_read(&next_event->count); in __perf_event_sync_stat()
3362 value = local64_xchg(&event->count, value); in __perf_event_sync_stat()
3363 local64_set(&next_event->count, value); in __perf_event_sync_stat()
3365 swap(event->total_time_enabled, next_event->total_time_enabled); in __perf_event_sync_stat()
3366 swap(event->total_time_running, next_event->total_time_running); in __perf_event_sync_stat()
3371 perf_event_update_userpage(event); in __perf_event_sync_stat()
3378 struct perf_event *event, *next_event; in perf_event_sync_stat() local
3380 if (!ctx->nr_stat) in perf_event_sync_stat()
3385 event = list_first_entry(&ctx->event_list, in perf_event_sync_stat()
3388 next_event = list_first_entry(&next_ctx->event_list, in perf_event_sync_stat()
3391 while (&event->event_entry != &ctx->event_list && in perf_event_sync_stat()
3392 &next_event->event_entry != &next_ctx->event_list) { in perf_event_sync_stat()
3394 __perf_event_sync_stat(event, next_event); in perf_event_sync_stat()
3396 event = list_next_entry(event, event_entry); in perf_event_sync_stat()
3414 if (!prev_ctx->nr_task_data) in perf_event_swap_task_ctx_data()
3418 &prev_ctx->pmu_ctx_list, &next_ctx->pmu_ctx_list, in perf_event_swap_task_ctx_data()
3421 if (WARN_ON_ONCE(prev_epc->pmu != next_epc->pmu)) in perf_event_swap_task_ctx_data()
3430 if (prev_epc->pmu->swap_task_ctx) in perf_event_swap_task_ctx_data()
3431 prev_epc->pmu->swap_task_ctx(prev_epc, next_epc); in perf_event_swap_task_ctx_data()
3433 swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); in perf_event_swap_task_ctx_data()
3442 list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { in perf_ctx_sched_task_cb()
3443 cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context); in perf_ctx_sched_task_cb()
3445 if (cpc->sched_cb_usage && pmu_ctx->pmu->sched_task) in perf_ctx_sched_task_cb()
3446 pmu_ctx->pmu->sched_task(pmu_ctx, sched_in); in perf_ctx_sched_task_cb()
3453 struct perf_event_context *ctx = task->perf_event_ctxp; in perf_event_context_sched_out()
3462 next_ctx = rcu_dereference(next->perf_event_ctxp); in perf_event_context_sched_out()
3466 parent = rcu_dereference(ctx->parent_ctx); in perf_event_context_sched_out()
3467 next_parent = rcu_dereference(next_ctx->parent_ctx); in perf_event_context_sched_out()
3478 * lock (including re-checking that neither has been in perf_event_context_sched_out()
3483 raw_spin_lock(&ctx->lock); in perf_event_context_sched_out()
3484 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); in perf_event_context_sched_out()
3489 /* PMIs are disabled; ctx->nr_pending is stable. */ in perf_event_context_sched_out()
3490 if (local_read(&ctx->nr_pending) || in perf_event_context_sched_out()
3491 local_read(&next_ctx->nr_pending)) { in perf_event_context_sched_out()
3494 * events that rely on the ctx->task relation. in perf_event_context_sched_out()
3496 raw_spin_unlock(&next_ctx->lock); in perf_event_context_sched_out()
3501 WRITE_ONCE(ctx->task, next); in perf_event_context_sched_out()
3502 WRITE_ONCE(next_ctx->task, task); in perf_event_context_sched_out()
3512 * ctx->task and ctx->task_ctx_data are immaterial in perf_event_context_sched_out()
3514 * ctx->lock which we're now holding. in perf_event_context_sched_out()
3516 RCU_INIT_POINTER(task->perf_event_ctxp, next_ctx); in perf_event_context_sched_out()
3517 RCU_INIT_POINTER(next->perf_event_ctxp, ctx); in perf_event_context_sched_out()
3523 raw_spin_unlock(&next_ctx->lock); in perf_event_context_sched_out()
3524 raw_spin_unlock(&ctx->lock); in perf_event_context_sched_out()
3530 raw_spin_lock(&ctx->lock); in perf_event_context_sched_out()
3538 raw_spin_unlock(&ctx->lock); in perf_event_context_sched_out()
3547 struct perf_cpu_pmu_context *cpc = this_cpu_ptr(pmu->cpu_pmu_context); in perf_sched_cb_dec()
3552 if (!--cpc->sched_cb_usage) in perf_sched_cb_dec()
3553 list_del(&cpc->sched_cb_entry); in perf_sched_cb_dec()
3559 struct perf_cpu_pmu_context *cpc = this_cpu_ptr(pmu->cpu_pmu_context); in perf_sched_cb_inc()
3561 if (!cpc->sched_cb_usage++) in perf_sched_cb_inc()
3562 list_add(&cpc->sched_cb_entry, this_cpu_ptr(&sched_cb_list)); in perf_sched_cb_inc()
3572 * This callback is relevant even to per-cpu events; for example multi event
3581 pmu = cpc->epc.pmu; in __perf_pmu_sched_task()
3584 if (WARN_ON_ONCE(!pmu->sched_task)) in __perf_pmu_sched_task()
3587 perf_ctx_lock(cpuctx, cpuctx->task_ctx); in __perf_pmu_sched_task()
3590 pmu->sched_task(cpc->task_epc, sched_in); in __perf_pmu_sched_task()
3593 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); in __perf_pmu_sched_task()
3603 /* cpuctx->task_ctx will be handled in perf_event_context_sched_in/out */ in perf_pmu_sched_task()
3604 if (prev == next || cpuctx->task_ctx) in perf_pmu_sched_task()
3618 * We stop each event and update the event value in event->count.
3621 * sets the disabled bit in the control field of event _before_
3622 * accessing the event control register. If a NMI hits, then it will
3623 * not restart the event.
3639 * cgroup event are system-wide mode only in __perf_event_task_sched_out()
3649 return le->group_index < re->group_index; in perf_less_group_idx()
3665 static void __heap_add(struct min_heap *heap, struct perf_event *event) in __heap_add() argument
3667 struct perf_event **itrs = heap->data; in __heap_add()
3669 if (event) { in __heap_add()
3670 itrs[heap->nr] = event; in __heap_add()
3671 heap->nr++; in __heap_add()
3679 if (!pmu_ctx->ctx->task) in __link_epc()
3682 cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context); in __link_epc()
3683 WARN_ON_ONCE(cpc->task_epc && cpc->task_epc != pmu_ctx); in __link_epc()
3684 cpc->task_epc = pmu_ctx; in __link_epc()
3697 /* Space for per CPU and/or any CPU event iterators. */ in visit_groups_merge()
3703 if (pmu->filter && pmu->filter(pmu, cpu)) in visit_groups_merge()
3706 if (!ctx->task) { in visit_groups_merge()
3709 .data = cpuctx->heap, in visit_groups_merge()
3711 .size = cpuctx->heap_size, in visit_groups_merge()
3714 lockdep_assert_held(&cpuctx->ctx.lock); in visit_groups_merge()
3717 if (cpuctx->cgrp) in visit_groups_merge()
3718 css = &cpuctx->cgrp->css; in visit_groups_merge()
3727 __heap_add(&event_heap, perf_event_groups_first(groups, -1, pmu, NULL)); in visit_groups_merge()
3734 for (; css; css = css->parent) in visit_groups_merge()
3735 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, css->cgroup)); in visit_groups_merge()
3739 __link_epc((*evt)->pmu_ctx); in visit_groups_merge()
3740 perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu); in visit_groups_merge()
3761 * Because the userpage is strictly per-event (there is no concept of context,
3763 * when context time starts :-(
3767 static inline bool event_update_userpage(struct perf_event *event) in event_update_userpage() argument
3769 if (likely(!atomic_read(&event->mmap_count))) in event_update_userpage()
3772 perf_event_update_time(event); in event_update_userpage()
3773 perf_event_update_userpage(event); in event_update_userpage()
3780 struct perf_event *event; in group_update_userpage() local
3785 for_each_sibling_event(event, group_event) in group_update_userpage()
3786 event_update_userpage(event); in group_update_userpage()
3789 static int merge_sched_in(struct perf_event *event, void *data) in merge_sched_in() argument
3791 struct perf_event_context *ctx = event->ctx; in merge_sched_in()
3794 if (event->state <= PERF_EVENT_STATE_OFF) in merge_sched_in()
3797 if (!event_filter_match(event)) in merge_sched_in()
3800 if (group_can_go_on(event, *can_add_hw)) { in merge_sched_in()
3801 if (!group_sched_in(event, ctx)) in merge_sched_in()
3802 list_add_tail(&event->active_list, get_event_list(event)); in merge_sched_in()
3805 if (event->state == PERF_EVENT_STATE_INACTIVE) { in merge_sched_in()
3807 if (event->attr.pinned) { in merge_sched_in()
3808 perf_cgroup_event_disable(event, ctx); in merge_sched_in()
3809 perf_event_set_state(event, PERF_EVENT_STATE_ERROR); in merge_sched_in()
3813 event->pmu_ctx->rotate_necessary = 1; in merge_sched_in()
3814 cpc = this_cpu_ptr(event->pmu_ctx->pmu->cpu_pmu_context); in merge_sched_in()
3816 group_update_userpage(event); in merge_sched_in()
3829 visit_groups_merge(ctx, &ctx->pinned_groups, in ctx_pinned_sched_in()
3833 list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { in ctx_pinned_sched_in()
3835 visit_groups_merge(ctx, &ctx->pinned_groups, in ctx_pinned_sched_in()
3836 smp_processor_id(), pmu_ctx->pmu, in ctx_pinned_sched_in()
3848 visit_groups_merge(ctx, &ctx->flexible_groups, in ctx_flexible_sched_in()
3852 list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { in ctx_flexible_sched_in()
3854 visit_groups_merge(ctx, &ctx->flexible_groups, in ctx_flexible_sched_in()
3855 smp_processor_id(), pmu_ctx->pmu, in ctx_flexible_sched_in()
3870 int is_active = ctx->is_active; in ctx_sched_in()
3872 lockdep_assert_held(&ctx->lock); in ctx_sched_in()
3874 if (likely(!ctx->nr_events)) in ctx_sched_in()
3882 * CPU-release for the below ->is_active store, in ctx_sched_in()
3888 ctx->is_active |= (event_type | EVENT_TIME); in ctx_sched_in()
3889 if (ctx->task) { in ctx_sched_in()
3891 cpuctx->task_ctx = ctx; in ctx_sched_in()
3893 WARN_ON_ONCE(cpuctx->task_ctx != ctx); in ctx_sched_in()
3896 is_active ^= ctx->is_active; /* changed bits */ in ctx_sched_in()
3916 ctx = rcu_dereference(task->perf_event_ctxp); in perf_event_context_sched_in()
3920 if (cpuctx->task_ctx == ctx) { in perf_event_context_sched_in()
3933 * We must check ctx->nr_events while holding ctx->lock, such in perf_event_context_sched_in()
3936 if (!ctx->nr_events) in perf_event_context_sched_in()
3948 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) { in perf_event_context_sched_in()
3949 perf_ctx_disable(&cpuctx->ctx); in perf_event_context_sched_in()
3950 ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE); in perf_event_context_sched_in()
3955 perf_ctx_sched_task_cb(cpuctx->task_ctx, true); in perf_event_context_sched_in()
3957 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) in perf_event_context_sched_in()
3958 perf_ctx_enable(&cpuctx->ctx); in perf_event_context_sched_in()
3972 * We restore the event value and then enable it.
3975 * sets the enabled bit in the control field of event _before_
3976 * accessing the event control register. If a NMI hits, then it will
3977 * keep the event running.
3991 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) in perf_calculate_period() argument
3993 u64 frequency = event->attr.sample_freq; in perf_calculate_period()
4009 * period = ------------------- in perf_calculate_period()
4022 a##_fls--; \ in perf_calculate_period()
4025 b##_fls--; \ in perf_calculate_period()
4067 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable) in perf_adjust_period() argument
4069 struct hw_perf_event *hwc = &event->hw; in perf_adjust_period()
4073 period = perf_calculate_period(event, nsec, count); in perf_adjust_period()
4075 delta = (s64)(period - hwc->sample_period); in perf_adjust_period()
4078 sample_period = hwc->sample_period + delta; in perf_adjust_period()
4083 hwc->sample_period = sample_period; in perf_adjust_period()
4085 if (local64_read(&hwc->period_left) > 8*sample_period) { in perf_adjust_period()
4087 event->pmu->stop(event, PERF_EF_UPDATE); in perf_adjust_period()
4089 local64_set(&hwc->period_left, 0); in perf_adjust_period()
4092 event->pmu->start(event, PERF_EF_RELOAD); in perf_adjust_period()
4104 struct perf_event *event; in perf_adjust_freq_unthr_context() local
4111 * - context have events in frequency mode (needs freq adjust) in perf_adjust_freq_unthr_context()
4112 * - there are events to unthrottle on this cpu in perf_adjust_freq_unthr_context()
4114 if (!(ctx->nr_freq || unthrottle)) in perf_adjust_freq_unthr_context()
4117 raw_spin_lock(&ctx->lock); in perf_adjust_freq_unthr_context()
4119 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { in perf_adjust_freq_unthr_context()
4120 if (event->state != PERF_EVENT_STATE_ACTIVE) in perf_adjust_freq_unthr_context()
4123 // XXX use visit thingy to avoid the -1,cpu match in perf_adjust_freq_unthr_context()
4124 if (!event_filter_match(event)) in perf_adjust_freq_unthr_context()
4127 perf_pmu_disable(event->pmu); in perf_adjust_freq_unthr_context()
4129 hwc = &event->hw; in perf_adjust_freq_unthr_context()
4131 if (hwc->interrupts == MAX_INTERRUPTS) { in perf_adjust_freq_unthr_context()
4132 hwc->interrupts = 0; in perf_adjust_freq_unthr_context()
4133 perf_log_throttle(event, 1); in perf_adjust_freq_unthr_context()
4134 event->pmu->start(event, 0); in perf_adjust_freq_unthr_context()
4137 if (!event->attr.freq || !event->attr.sample_freq) in perf_adjust_freq_unthr_context()
4141 * stop the event and update event->count in perf_adjust_freq_unthr_context()
4143 event->pmu->stop(event, PERF_EF_UPDATE); in perf_adjust_freq_unthr_context()
4145 now = local64_read(&event->count); in perf_adjust_freq_unthr_context()
4146 delta = now - hwc->freq_count_stamp; in perf_adjust_freq_unthr_context()
4147 hwc->freq_count_stamp = now; in perf_adjust_freq_unthr_context()
4150 * restart the event in perf_adjust_freq_unthr_context()
4152 * we have stopped the event so tell that in perf_adjust_freq_unthr_context()
4157 perf_adjust_period(event, period, delta, false); in perf_adjust_freq_unthr_context()
4159 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); in perf_adjust_freq_unthr_context()
4161 perf_pmu_enable(event->pmu); in perf_adjust_freq_unthr_context()
4164 raw_spin_unlock(&ctx->lock); in perf_adjust_freq_unthr_context()
4168 * Move @event to the tail of the @ctx's elegible events.
4170 static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event) in rotate_ctx() argument
4173 * Rotate the first entry last of non-pinned groups. Rotation might be in rotate_ctx()
4176 if (ctx->rotate_disable) in rotate_ctx()
4179 perf_event_groups_delete(&ctx->flexible_groups, event); in rotate_ctx()
4180 perf_event_groups_insert(&ctx->flexible_groups, event); in rotate_ctx()
4183 /* pick an event from the flexible_groups to rotate */
4187 struct perf_event *event; in ctx_event_to_rotate() local
4191 .pmu = pmu_ctx->pmu, in ctx_event_to_rotate()
4194 /* pick the first active flexible event */ in ctx_event_to_rotate()
4195 event = list_first_entry_or_null(&pmu_ctx->flexible_active, in ctx_event_to_rotate()
4197 if (event) in ctx_event_to_rotate()
4200 /* if no active flexible event, pick the first event */ in ctx_event_to_rotate()
4201 tree = &pmu_ctx->ctx->flexible_groups.tree; in ctx_event_to_rotate()
4203 if (!pmu_ctx->ctx->task) { in ctx_event_to_rotate()
4208 event = __node_2_pe(node); in ctx_event_to_rotate()
4212 key.cpu = -1; in ctx_event_to_rotate()
4215 event = __node_2_pe(node); in ctx_event_to_rotate()
4222 event = __node_2_pe(node); in ctx_event_to_rotate()
4229 pmu_ctx->rotate_necessary = 0; in ctx_event_to_rotate()
4231 return event; in ctx_event_to_rotate()
4244 * events, thus the event count values are stable. in perf_rotate_context()
4247 cpu_epc = &cpc->epc; in perf_rotate_context()
4248 pmu = cpu_epc->pmu; in perf_rotate_context()
4249 task_epc = cpc->task_epc; in perf_rotate_context()
4251 cpu_rotate = cpu_epc->rotate_necessary; in perf_rotate_context()
4252 task_rotate = task_epc ? task_epc->rotate_necessary : 0; in perf_rotate_context()
4257 perf_ctx_lock(cpuctx, cpuctx->task_ctx); in perf_rotate_context()
4270 update_context_time(task_epc->ctx); in perf_rotate_context()
4275 update_context_time(&cpuctx->ctx); in perf_rotate_context()
4277 rotate_ctx(&cpuctx->ctx, cpu_event); in perf_rotate_context()
4278 __pmu_ctx_sched_in(&cpuctx->ctx, pmu); in perf_rotate_context()
4282 rotate_ctx(task_epc->ctx, task_event); in perf_rotate_context()
4285 __pmu_ctx_sched_in(task_epc->ctx, pmu); in perf_rotate_context()
4288 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); in perf_rotate_context()
4305 perf_adjust_freq_unthr_context(&cpuctx->ctx, !!throttled); in perf_event_task_tick()
4308 ctx = rcu_dereference(current->perf_event_ctxp); in perf_event_task_tick()
4314 static int event_enable_on_exec(struct perf_event *event, in event_enable_on_exec() argument
4317 if (!event->attr.enable_on_exec) in event_enable_on_exec()
4320 event->attr.enable_on_exec = 0; in event_enable_on_exec()
4321 if (event->state >= PERF_EVENT_STATE_INACTIVE) in event_enable_on_exec()
4324 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE); in event_enable_on_exec()
4330 * Enable all of a task's events that have been marked enable-on-exec.
4338 struct perf_event *event; in perf_event_enable_on_exec() local
4343 if (WARN_ON_ONCE(current->perf_event_ctxp != ctx)) in perf_event_enable_on_exec()
4346 if (!ctx->nr_events) in perf_event_enable_on_exec()
4353 list_for_each_entry(event, &ctx->event_list, event_entry) { in perf_event_enable_on_exec()
4354 enabled |= event_enable_on_exec(event, ctx); in perf_event_enable_on_exec()
4355 event_type |= get_event_type(event); in perf_event_enable_on_exec()
4359 * Unclone and reschedule this context if we enabled any event. in perf_event_enable_on_exec()
4376 static void perf_remove_from_owner(struct perf_event *event);
4377 static void perf_event_exit_event(struct perf_event *event,
4382 * remove-on-exec, and feeds their values back to parent events.
4387 struct perf_event *event, *next; in perf_event_remove_on_exec() local
4391 mutex_lock(&ctx->mutex); in perf_event_remove_on_exec()
4393 if (WARN_ON_ONCE(ctx->task != current)) in perf_event_remove_on_exec()
4396 list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { in perf_event_remove_on_exec()
4397 if (!event->attr.remove_on_exec) in perf_event_remove_on_exec()
4400 if (!is_kernel_event(event)) in perf_event_remove_on_exec()
4401 perf_remove_from_owner(event); in perf_event_remove_on_exec()
4405 perf_event_exit_event(event, ctx); in perf_event_remove_on_exec()
4408 raw_spin_lock_irqsave(&ctx->lock, flags); in perf_event_remove_on_exec()
4411 raw_spin_unlock_irqrestore(&ctx->lock, flags); in perf_event_remove_on_exec()
4414 mutex_unlock(&ctx->mutex); in perf_event_remove_on_exec()
4421 struct perf_event *event; member
4426 static int __perf_event_read_cpu(struct perf_event *event, int event_cpu) in __perf_event_read_cpu() argument
4430 if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) { in __perf_event_read_cpu()
4444 * Cross CPU call to read the hardware event
4449 struct perf_event *sub, *event = data->event; in __perf_event_read() local
4450 struct perf_event_context *ctx = event->ctx; in __perf_event_read()
4452 struct pmu *pmu = event->pmu; in __perf_event_read()
4458 * event->count would have been updated to a recent sample in __perf_event_read()
4459 * when the event was scheduled out. in __perf_event_read()
4461 if (ctx->task && cpuctx->task_ctx != ctx) in __perf_event_read()
4464 raw_spin_lock(&ctx->lock); in __perf_event_read()
4465 if (ctx->is_active & EVENT_TIME) { in __perf_event_read()
4467 update_cgrp_time_from_event(event); in __perf_event_read()
4470 perf_event_update_time(event); in __perf_event_read()
4471 if (data->group) in __perf_event_read()
4472 perf_event_update_sibling_time(event); in __perf_event_read()
4474 if (event->state != PERF_EVENT_STATE_ACTIVE) in __perf_event_read()
4477 if (!data->group) { in __perf_event_read()
4478 pmu->read(event); in __perf_event_read()
4479 data->ret = 0; in __perf_event_read()
4483 pmu->start_txn(pmu, PERF_PMU_TXN_READ); in __perf_event_read()
4485 pmu->read(event); in __perf_event_read()
4487 for_each_sibling_event(sub, event) { in __perf_event_read()
4488 if (sub->state == PERF_EVENT_STATE_ACTIVE) { in __perf_event_read()
4490 * Use sibling's PMU rather than @event's since in __perf_event_read()
4493 sub->pmu->read(sub); in __perf_event_read()
4497 data->ret = pmu->commit_txn(pmu); in __perf_event_read()
4500 raw_spin_unlock(&ctx->lock); in __perf_event_read()
4503 static inline u64 perf_event_count(struct perf_event *event) in perf_event_count() argument
4505 return local64_read(&event->count) + atomic64_read(&event->child_count); in perf_event_count()
4508 static void calc_timer_values(struct perf_event *event, in calc_timer_values() argument
4516 ctx_time = perf_event_time_now(event, *now); in calc_timer_values()
4517 __perf_update_times(event, ctx_time, enabled, running); in calc_timer_values()
4521 * NMI-safe method to read a local event, that is an event that
4523 * - either for the current task, or for this CPU
4524 * - does not have inherit set, for inherited task events
4526 * - must not have a pmu::count method
4528 int perf_event_read_local(struct perf_event *event, u64 *value, in perf_event_read_local() argument
4541 * It must not be an event with inherit set, we cannot read in perf_event_read_local()
4544 if (event->attr.inherit) { in perf_event_read_local()
4545 ret = -EOPNOTSUPP; in perf_event_read_local()
4549 /* If this is a per-task event, it must be for current */ in perf_event_read_local()
4550 if ((event->attach_state & PERF_ATTACH_TASK) && in perf_event_read_local()
4551 event->hw.target != current) { in perf_event_read_local()
4552 ret = -EINVAL; in perf_event_read_local()
4556 /* If this is a per-CPU event, it must be for this CPU */ in perf_event_read_local()
4557 if (!(event->attach_state & PERF_ATTACH_TASK) && in perf_event_read_local()
4558 event->cpu != smp_processor_id()) { in perf_event_read_local()
4559 ret = -EINVAL; in perf_event_read_local()
4563 /* If this is a pinned event it must be running on this CPU */ in perf_event_read_local()
4564 if (event->attr.pinned && event->oncpu != smp_processor_id()) { in perf_event_read_local()
4565 ret = -EBUSY; in perf_event_read_local()
4570 * If the event is currently on this CPU, its either a per-task event, in perf_event_read_local()
4572 * oncpu == -1). in perf_event_read_local()
4574 if (event->oncpu == smp_processor_id()) in perf_event_read_local()
4575 event->pmu->read(event); in perf_event_read_local()
4577 *value = local64_read(&event->count); in perf_event_read_local()
4581 calc_timer_values(event, &__now, &__enabled, &__running); in perf_event_read_local()
4593 static int perf_event_read(struct perf_event *event, bool group) in perf_event_read() argument
4595 enum perf_event_state state = READ_ONCE(event->state); in perf_event_read()
4599 * If event is enabled and currently active on a CPU, update the in perf_event_read()
4600 * value in the event structure: in perf_event_read()
4607 * Orders the ->state and ->oncpu loads such that if we see in perf_event_read()
4608 * ACTIVE we must also see the right ->oncpu. in perf_event_read()
4614 event_cpu = READ_ONCE(event->oncpu); in perf_event_read()
4619 .event = event, in perf_event_read()
4625 event_cpu = __perf_event_read_cpu(event, event_cpu); in perf_event_read()
4631 * If event_cpu isn't a valid CPU it means the event got in perf_event_read()
4632 * scheduled out and that will have updated the event count. in perf_event_read()
4634 * Therefore, either way, we'll have an up-to-date event count in perf_event_read()
4642 struct perf_event_context *ctx = event->ctx; in perf_event_read()
4645 raw_spin_lock_irqsave(&ctx->lock, flags); in perf_event_read()
4646 state = event->state; in perf_event_read()
4648 raw_spin_unlock_irqrestore(&ctx->lock, flags); in perf_event_read()
4656 if (ctx->is_active & EVENT_TIME) { in perf_event_read()
4658 update_cgrp_time_from_event(event); in perf_event_read()
4661 perf_event_update_time(event); in perf_event_read()
4663 perf_event_update_sibling_time(event); in perf_event_read()
4664 raw_spin_unlock_irqrestore(&ctx->lock, flags); in perf_event_read()
4675 raw_spin_lock_init(&ctx->lock); in __perf_event_init_context()
4676 mutex_init(&ctx->mutex); in __perf_event_init_context()
4677 INIT_LIST_HEAD(&ctx->pmu_ctx_list); in __perf_event_init_context()
4678 perf_event_groups_init(&ctx->pinned_groups); in __perf_event_init_context()
4679 perf_event_groups_init(&ctx->flexible_groups); in __perf_event_init_context()
4680 INIT_LIST_HEAD(&ctx->event_list); in __perf_event_init_context()
4681 refcount_set(&ctx->refcount, 1); in __perf_event_init_context()
4687 epc->pmu = pmu; in __perf_init_event_pmu_context()
4688 INIT_LIST_HEAD(&epc->pmu_ctx_entry); in __perf_init_event_pmu_context()
4689 INIT_LIST_HEAD(&epc->pinned_active); in __perf_init_event_pmu_context()
4690 INIT_LIST_HEAD(&epc->flexible_active); in __perf_init_event_pmu_context()
4691 atomic_set(&epc->refcount, 1); in __perf_init_event_pmu_context()
4705 ctx->task = get_task_struct(task); in alloc_perf_context()
4725 return ERR_PTR(-ESRCH); in find_lively_task_by_vpid()
4734 find_get_context(struct task_struct *task, struct perf_event *event) in find_get_context() argument
4742 /* Must be root to operate on a CPU event: */ in find_get_context()
4743 err = perf_allow_cpu(&event->attr); in find_get_context()
4747 cpuctx = per_cpu_ptr(&perf_cpu_context, event->cpu); in find_get_context()
4748 ctx = &cpuctx->ctx; in find_get_context()
4750 raw_spin_lock_irqsave(&ctx->lock, flags); in find_get_context()
4751 ++ctx->pin_count; in find_get_context()
4752 raw_spin_unlock_irqrestore(&ctx->lock, flags); in find_get_context()
4757 err = -EINVAL; in find_get_context()
4762 ++ctx->pin_count; in find_get_context()
4764 raw_spin_unlock_irqrestore(&ctx->lock, flags); in find_get_context()
4770 err = -ENOMEM; in find_get_context()
4775 mutex_lock(&task->perf_event_mutex); in find_get_context()
4780 if (task->flags & PF_EXITING) in find_get_context()
4781 err = -ESRCH; in find_get_context()
4782 else if (task->perf_event_ctxp) in find_get_context()
4783 err = -EAGAIN; in find_get_context()
4786 ++ctx->pin_count; in find_get_context()
4787 rcu_assign_pointer(task->perf_event_ctxp, ctx); in find_get_context()
4789 mutex_unlock(&task->perf_event_mutex); in find_get_context()
4794 if (err == -EAGAIN) in find_get_context()
4808 struct perf_event *event) in find_get_pmu_context() argument
4813 if (!ctx->task) { in find_get_pmu_context()
4816 cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); in find_get_pmu_context()
4817 epc = &cpc->epc; in find_get_pmu_context()
4818 raw_spin_lock_irq(&ctx->lock); in find_get_pmu_context()
4819 if (!epc->ctx) { in find_get_pmu_context()
4820 atomic_set(&epc->refcount, 1); in find_get_pmu_context()
4821 epc->embedded = 1; in find_get_pmu_context()
4822 list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); in find_get_pmu_context()
4823 epc->ctx = ctx; in find_get_pmu_context()
4825 WARN_ON_ONCE(epc->ctx != ctx); in find_get_pmu_context()
4826 atomic_inc(&epc->refcount); in find_get_pmu_context()
4828 raw_spin_unlock_irq(&ctx->lock); in find_get_pmu_context()
4834 return ERR_PTR(-ENOMEM); in find_get_pmu_context()
4836 if (event->attach_state & PERF_ATTACH_TASK_DATA) { in find_get_pmu_context()
4840 return ERR_PTR(-ENOMEM); in find_get_pmu_context()
4849 * lockdep_assert_held(&ctx->mutex); in find_get_pmu_context()
4852 * child_ctx->mutex. in find_get_pmu_context()
4855 raw_spin_lock_irq(&ctx->lock); in find_get_pmu_context()
4856 list_for_each_entry(epc, &ctx->pmu_ctx_list, pmu_ctx_entry) { in find_get_pmu_context()
4857 if (epc->pmu == pmu) { in find_get_pmu_context()
4858 WARN_ON_ONCE(epc->ctx != ctx); in find_get_pmu_context()
4859 atomic_inc(&epc->refcount); in find_get_pmu_context()
4867 list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); in find_get_pmu_context()
4868 epc->ctx = ctx; in find_get_pmu_context()
4871 if (task_ctx_data && !epc->task_ctx_data) { in find_get_pmu_context()
4872 epc->task_ctx_data = task_ctx_data; in find_get_pmu_context()
4874 ctx->nr_task_data++; in find_get_pmu_context()
4876 raw_spin_unlock_irq(&ctx->lock); in find_get_pmu_context()
4886 WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount)); in get_pmu_ctx()
4893 kfree(epc->task_ctx_data); in free_epc_rcu()
4899 struct perf_event_context *ctx = epc->ctx; in put_pmu_ctx()
4905 * lockdep_assert_held(&ctx->mutex); in put_pmu_ctx()
4907 * can't because of the call-site in _free_event()/put_event() in put_pmu_ctx()
4908 * which isn't always called under ctx->mutex. in put_pmu_ctx()
4910 if (!atomic_dec_and_raw_lock_irqsave(&epc->refcount, &ctx->lock, flags)) in put_pmu_ctx()
4913 WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry)); in put_pmu_ctx()
4915 list_del_init(&epc->pmu_ctx_entry); in put_pmu_ctx()
4916 epc->ctx = NULL; in put_pmu_ctx()
4918 WARN_ON_ONCE(!list_empty(&epc->pinned_active)); in put_pmu_ctx()
4919 WARN_ON_ONCE(!list_empty(&epc->flexible_active)); in put_pmu_ctx()
4921 raw_spin_unlock_irqrestore(&ctx->lock, flags); in put_pmu_ctx()
4923 if (epc->embedded) in put_pmu_ctx()
4926 call_rcu(&epc->rcu_head, free_epc_rcu); in put_pmu_ctx()
4929 static void perf_event_free_filter(struct perf_event *event);
4933 struct perf_event *event = container_of(head, typeof(*event), rcu_head); in free_event_rcu() local
4935 if (event->ns) in free_event_rcu()
4936 put_pid_ns(event->ns); in free_event_rcu()
4937 perf_event_free_filter(event); in free_event_rcu()
4938 kmem_cache_free(perf_event_cache, event); in free_event_rcu()
4941 static void ring_buffer_attach(struct perf_event *event,
4944 static void detach_sb_event(struct perf_event *event) in detach_sb_event() argument
4946 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu); in detach_sb_event()
4948 raw_spin_lock(&pel->lock); in detach_sb_event()
4949 list_del_rcu(&event->sb_list); in detach_sb_event()
4950 raw_spin_unlock(&pel->lock); in detach_sb_event()
4953 static bool is_sb_event(struct perf_event *event) in is_sb_event() argument
4955 struct perf_event_attr *attr = &event->attr; in is_sb_event()
4957 if (event->parent) in is_sb_event()
4960 if (event->attach_state & PERF_ATTACH_TASK) in is_sb_event()
4963 if (attr->mmap || attr->mmap_data || attr->mmap2 || in is_sb_event()
4964 attr->comm || attr->comm_exec || in is_sb_event()
4965 attr->task || attr->ksymbol || in is_sb_event()
4966 attr->context_switch || attr->text_poke || in is_sb_event()
4967 attr->bpf_event) in is_sb_event()
4972 static void unaccount_pmu_sb_event(struct perf_event *event) in unaccount_pmu_sb_event() argument
4974 if (is_sb_event(event)) in unaccount_pmu_sb_event()
4975 detach_sb_event(event); in unaccount_pmu_sb_event()
5000 static void unaccount_event(struct perf_event *event) in unaccount_event() argument
5004 if (event->parent) in unaccount_event()
5007 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) in unaccount_event()
5009 if (event->attr.mmap || event->attr.mmap_data) in unaccount_event()
5011 if (event->attr.build_id) in unaccount_event()
5013 if (event->attr.comm) in unaccount_event()
5015 if (event->attr.namespaces) in unaccount_event()
5017 if (event->attr.cgroup) in unaccount_event()
5019 if (event->attr.task) in unaccount_event()
5021 if (event->attr.freq) in unaccount_event()
5023 if (event->attr.context_switch) { in unaccount_event()
5027 if (is_cgroup_event(event)) in unaccount_event()
5029 if (has_branch_stack(event)) in unaccount_event()
5031 if (event->attr.ksymbol) in unaccount_event()
5033 if (event->attr.bpf_event) in unaccount_event()
5035 if (event->attr.text_poke) in unaccount_event()
5039 if (!atomic_add_unless(&perf_sched_count, -1, 1)) in unaccount_event()
5043 unaccount_pmu_sb_event(event); in unaccount_event()
5056 * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
5059 * 1) cpu-wide events in the presence of per-task events,
5060 * 2) per-task events in the presence of cpu-wide events,
5064 * _free_event()), the latter -- before the first perf_install_in_context().
5066 static int exclusive_event_init(struct perf_event *event) in exclusive_event_init() argument
5068 struct pmu *pmu = event->pmu; in exclusive_event_init()
5074 * Prevent co-existence of per-task and cpu-wide events on the in exclusive_event_init()
5077 * Negative pmu::exclusive_cnt means there are cpu-wide in exclusive_event_init()
5079 * per-task events. in exclusive_event_init()
5081 * Since this is called in perf_event_alloc() path, event::ctx in exclusive_event_init()
5083 * to mean "per-task event", because unlike other attach states it in exclusive_event_init()
5086 if (event->attach_state & PERF_ATTACH_TASK) { in exclusive_event_init()
5087 if (!atomic_inc_unless_negative(&pmu->exclusive_cnt)) in exclusive_event_init()
5088 return -EBUSY; in exclusive_event_init()
5090 if (!atomic_dec_unless_positive(&pmu->exclusive_cnt)) in exclusive_event_init()
5091 return -EBUSY; in exclusive_event_init()
5097 static void exclusive_event_destroy(struct perf_event *event) in exclusive_event_destroy() argument
5099 struct pmu *pmu = event->pmu; in exclusive_event_destroy()
5105 if (event->attach_state & PERF_ATTACH_TASK) in exclusive_event_destroy()
5106 atomic_dec(&pmu->exclusive_cnt); in exclusive_event_destroy()
5108 atomic_inc(&pmu->exclusive_cnt); in exclusive_event_destroy()
5113 if ((e1->pmu == e2->pmu) && in exclusive_event_match()
5114 (e1->cpu == e2->cpu || in exclusive_event_match()
5115 e1->cpu == -1 || in exclusive_event_match()
5116 e2->cpu == -1)) in exclusive_event_match()
5121 static bool exclusive_event_installable(struct perf_event *event, in exclusive_event_installable() argument
5125 struct pmu *pmu = event->pmu; in exclusive_event_installable()
5127 lockdep_assert_held(&ctx->mutex); in exclusive_event_installable()
5132 list_for_each_entry(iter_event, &ctx->event_list, event_entry) { in exclusive_event_installable()
5133 if (exclusive_event_match(iter_event, event)) in exclusive_event_installable()
5140 static void perf_addr_filters_splice(struct perf_event *event,
5143 static void _free_event(struct perf_event *event) in _free_event() argument
5145 irq_work_sync(&event->pending_irq); in _free_event()
5147 unaccount_event(event); in _free_event()
5149 security_perf_event_free(event); in _free_event()
5151 if (event->rb) { in _free_event()
5153 * Can happen when we close an event with re-directed output. in _free_event()
5158 mutex_lock(&event->mmap_mutex); in _free_event()
5159 ring_buffer_attach(event, NULL); in _free_event()
5160 mutex_unlock(&event->mmap_mutex); in _free_event()
5163 if (is_cgroup_event(event)) in _free_event()
5164 perf_detach_cgroup(event); in _free_event()
5166 if (!event->parent) { in _free_event()
5167 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) in _free_event()
5171 perf_event_free_bpf_prog(event); in _free_event()
5172 perf_addr_filters_splice(event, NULL); in _free_event()
5173 kfree(event->addr_filter_ranges); in _free_event()
5175 if (event->destroy) in _free_event()
5176 event->destroy(event); in _free_event()
5179 * Must be after ->destroy(), due to uprobe_perf_close() using in _free_event()
5182 if (event->hw.target) in _free_event()
5183 put_task_struct(event->hw.target); in _free_event()
5185 if (event->pmu_ctx) in _free_event()
5186 put_pmu_ctx(event->pmu_ctx); in _free_event()
5192 if (event->ctx) in _free_event()
5193 put_ctx(event->ctx); in _free_event()
5195 exclusive_event_destroy(event); in _free_event()
5196 module_put(event->pmu->module); in _free_event()
5198 call_rcu(&event->rcu_head, free_event_rcu); in _free_event()
5203 * where the event isn't exposed yet and inherited events.
5205 static void free_event(struct perf_event *event) in free_event() argument
5207 if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1, in free_event()
5208 "unexpected event refcount: %ld; ptr=%p\n", in free_event()
5209 atomic_long_read(&event->refcount), event)) { in free_event()
5210 /* leak to avoid use-after-free */ in free_event()
5214 _free_event(event); in free_event()
5218 * Remove user event from the owner task.
5220 static void perf_remove_from_owner(struct perf_event *event) in perf_remove_from_owner() argument
5228 * indeed free this event, otherwise we need to serialize on in perf_remove_from_owner()
5229 * owner->perf_event_mutex. in perf_remove_from_owner()
5231 owner = READ_ONCE(event->owner); in perf_remove_from_owner()
5245 * holding ctx->mutex which would be an inversion wrt. the in perf_remove_from_owner()
5249 * ctx->mutex. in perf_remove_from_owner()
5251 mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); in perf_remove_from_owner()
5254 * We have to re-check the event->owner field, if it is cleared in perf_remove_from_owner()
5257 * event. in perf_remove_from_owner()
5259 if (event->owner) { in perf_remove_from_owner()
5260 list_del_init(&event->owner_entry); in perf_remove_from_owner()
5261 smp_store_release(&event->owner, NULL); in perf_remove_from_owner()
5263 mutex_unlock(&owner->perf_event_mutex); in perf_remove_from_owner()
5268 static void put_event(struct perf_event *event) in put_event() argument
5270 if (!atomic_long_dec_and_test(&event->refcount)) in put_event()
5273 _free_event(event); in put_event()
5277 * Kill an event dead; while event:refcount will preserve the event
5281 int perf_event_release_kernel(struct perf_event *event) in perf_event_release_kernel() argument
5283 struct perf_event_context *ctx = event->ctx; in perf_event_release_kernel()
5288 * If we got here through err_alloc: free_event(event); we will not in perf_event_release_kernel()
5292 WARN_ON_ONCE(event->attach_state & in perf_event_release_kernel()
5297 if (!is_kernel_event(event)) in perf_event_release_kernel()
5298 perf_remove_from_owner(event); in perf_event_release_kernel()
5300 ctx = perf_event_ctx_lock(event); in perf_event_release_kernel()
5301 WARN_ON_ONCE(ctx->parent_ctx); in perf_event_release_kernel()
5304 * Mark this event as STATE_DEAD, there is no external reference to it in perf_event_release_kernel()
5307 * Anybody acquiring event->child_mutex after the below loop _must_ in perf_event_release_kernel()
5314 perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); in perf_event_release_kernel()
5316 perf_event_ctx_unlock(event, ctx); in perf_event_release_kernel()
5319 mutex_lock(&event->child_mutex); in perf_event_release_kernel()
5320 list_for_each_entry(child, &event->child_list, child_list) { in perf_event_release_kernel()
5326 ctx = READ_ONCE(child->ctx); in perf_event_release_kernel()
5331 * Since the event cannot get freed while we hold the in perf_event_release_kernel()
5340 * can re-acquire child_mutex. in perf_event_release_kernel()
5342 mutex_unlock(&event->child_mutex); in perf_event_release_kernel()
5343 mutex_lock(&ctx->mutex); in perf_event_release_kernel()
5344 mutex_lock(&event->child_mutex); in perf_event_release_kernel()
5351 tmp = list_first_entry_or_null(&event->child_list, in perf_event_release_kernel()
5355 list_move(&child->child_list, &free_list); in perf_event_release_kernel()
5360 put_event(event); in perf_event_release_kernel()
5363 mutex_unlock(&event->child_mutex); in perf_event_release_kernel()
5364 mutex_unlock(&ctx->mutex); in perf_event_release_kernel()
5368 mutex_unlock(&event->child_mutex); in perf_event_release_kernel()
5371 void *var = &child->ctx->refcount; in perf_event_release_kernel()
5373 list_del(&child->child_list); in perf_event_release_kernel()
5377 * Wake any perf_event_free_task() waiting for this event to be in perf_event_release_kernel()
5385 put_event(event); /* Must be the 'last' reference */ in perf_event_release_kernel()
5395 perf_event_release_kernel(file->private_data); in perf_release()
5399 static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) in __perf_event_read_value() argument
5407 mutex_lock(&event->child_mutex); in __perf_event_read_value()
5409 (void)perf_event_read(event, false); in __perf_event_read_value()
5410 total += perf_event_count(event); in __perf_event_read_value()
5412 *enabled += event->total_time_enabled + in __perf_event_read_value()
5413 atomic64_read(&event->child_total_time_enabled); in __perf_event_read_value()
5414 *running += event->total_time_running + in __perf_event_read_value()
5415 atomic64_read(&event->child_total_time_running); in __perf_event_read_value()
5417 list_for_each_entry(child, &event->child_list, child_list) { in __perf_event_read_value()
5420 *enabled += child->total_time_enabled; in __perf_event_read_value()
5421 *running += child->total_time_running; in __perf_event_read_value()
5423 mutex_unlock(&event->child_mutex); in __perf_event_read_value()
5428 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) in perf_event_read_value() argument
5433 ctx = perf_event_ctx_lock(event); in perf_event_read_value()
5434 count = __perf_event_read_value(event, enabled, running); in perf_event_read_value()
5435 perf_event_ctx_unlock(event, ctx); in perf_event_read_value()
5444 struct perf_event_context *ctx = leader->ctx; in __perf_read_group_add()
5454 raw_spin_lock_irqsave(&ctx->lock, flags); in __perf_read_group_add()
5460 * - leader->ctx->lock pins leader->sibling_list in __perf_read_group_add()
5461 * - parent->child_mutex pins parent->child_list in __perf_read_group_add()
5462 * - parent->ctx->mutex pins parent->sibling_list in __perf_read_group_add()
5464 * Because parent->ctx != leader->ctx (and child_list nests inside in __perf_read_group_add()
5465 * ctx->mutex), group destruction is not atomic between children, also in __perf_read_group_add()
5475 parent = leader->parent; in __perf_read_group_add()
5477 (parent->group_generation != leader->group_generation || in __perf_read_group_add()
5478 parent->nr_siblings != leader->nr_siblings)) { in __perf_read_group_add()
5479 ret = -ECHILD; in __perf_read_group_add()
5484 * Since we co-schedule groups, {enabled,running} times of siblings in __perf_read_group_add()
5489 values[n++] += leader->total_time_enabled + in __perf_read_group_add()
5490 atomic64_read(&leader->child_total_time_enabled); in __perf_read_group_add()
5494 values[n++] += leader->total_time_running + in __perf_read_group_add()
5495 atomic64_read(&leader->child_total_time_running); in __perf_read_group_add()
5505 values[n++] = atomic64_read(&leader->lost_samples); in __perf_read_group_add()
5512 values[n++] = atomic64_read(&sub->lost_samples); in __perf_read_group_add()
5516 raw_spin_unlock_irqrestore(&ctx->lock, flags); in __perf_read_group_add()
5520 static int perf_read_group(struct perf_event *event, in perf_read_group() argument
5523 struct perf_event *leader = event->group_leader, *child; in perf_read_group()
5524 struct perf_event_context *ctx = leader->ctx; in perf_read_group()
5528 lockdep_assert_held(&ctx->mutex); in perf_read_group()
5530 values = kzalloc(event->read_size, GFP_KERNEL); in perf_read_group()
5532 return -ENOMEM; in perf_read_group()
5534 values[0] = 1 + leader->nr_siblings; in perf_read_group()
5536 mutex_lock(&leader->child_mutex); in perf_read_group()
5542 list_for_each_entry(child, &leader->child_list, child_list) { in perf_read_group()
5548 mutex_unlock(&leader->child_mutex); in perf_read_group()
5550 ret = event->read_size; in perf_read_group()
5551 if (copy_to_user(buf, values, event->read_size)) in perf_read_group()
5552 ret = -EFAULT; in perf_read_group()
5556 mutex_unlock(&leader->child_mutex); in perf_read_group()
5562 static int perf_read_one(struct perf_event *event, in perf_read_one() argument
5569 values[n++] = __perf_event_read_value(event, &enabled, &running); in perf_read_one()
5575 values[n++] = primary_event_id(event); in perf_read_one()
5577 values[n++] = atomic64_read(&event->lost_samples); in perf_read_one()
5580 return -EFAULT; in perf_read_one()
5585 static bool is_event_hup(struct perf_event *event) in is_event_hup() argument
5589 if (event->state > PERF_EVENT_STATE_EXIT) in is_event_hup()
5592 mutex_lock(&event->child_mutex); in is_event_hup()
5593 no_children = list_empty(&event->child_list); in is_event_hup()
5594 mutex_unlock(&event->child_mutex); in is_event_hup()
5599 * Read the performance event - simple non blocking version for now
5602 __perf_read(struct perf_event *event, char __user *buf, size_t count) in __perf_read() argument
5604 u64 read_format = event->attr.read_format; in __perf_read()
5608 * Return end-of-file for a read on an event that is in in __perf_read()
5612 if (event->state == PERF_EVENT_STATE_ERROR) in __perf_read()
5615 if (count < event->read_size) in __perf_read()
5616 return -ENOSPC; in __perf_read()
5618 WARN_ON_ONCE(event->ctx->parent_ctx); in __perf_read()
5620 ret = perf_read_group(event, read_format, buf); in __perf_read()
5622 ret = perf_read_one(event, read_format, buf); in __perf_read()
5630 struct perf_event *event = file->private_data; in perf_read() local
5634 ret = security_perf_event_read(event); in perf_read()
5638 ctx = perf_event_ctx_lock(event); in perf_read()
5639 ret = __perf_read(event, buf, count); in perf_read()
5640 perf_event_ctx_unlock(event, ctx); in perf_read()
5647 struct perf_event *event = file->private_data; in perf_poll() local
5651 poll_wait(file, &event->waitq, wait); in perf_poll()
5653 if (is_event_hup(event)) in perf_poll()
5657 * Pin the event->rb by taking event->mmap_mutex; otherwise in perf_poll()
5660 mutex_lock(&event->mmap_mutex); in perf_poll()
5661 rb = event->rb; in perf_poll()
5663 events = atomic_xchg(&rb->poll, 0); in perf_poll()
5664 mutex_unlock(&event->mmap_mutex); in perf_poll()
5668 static void _perf_event_reset(struct perf_event *event) in _perf_event_reset() argument
5670 (void)perf_event_read(event, false); in _perf_event_reset()
5671 local64_set(&event->count, 0); in _perf_event_reset()
5672 perf_event_update_userpage(event); in _perf_event_reset()
5675 /* Assume it's not an event with inherit set. */
5676 u64 perf_event_pause(struct perf_event *event, bool reset) in perf_event_pause() argument
5681 ctx = perf_event_ctx_lock(event); in perf_event_pause()
5682 WARN_ON_ONCE(event->attr.inherit); in perf_event_pause()
5683 _perf_event_disable(event); in perf_event_pause()
5684 count = local64_read(&event->count); in perf_event_pause()
5686 local64_set(&event->count, 0); in perf_event_pause()
5687 perf_event_ctx_unlock(event, ctx); in perf_event_pause()
5694 * Holding the top-level event's child_mutex means that any
5695 * descendant process that has inherited this event will block
5699 static void perf_event_for_each_child(struct perf_event *event, in perf_event_for_each_child() argument
5704 WARN_ON_ONCE(event->ctx->parent_ctx); in perf_event_for_each_child()
5706 mutex_lock(&event->child_mutex); in perf_event_for_each_child()
5707 func(event); in perf_event_for_each_child()
5708 list_for_each_entry(child, &event->child_list, child_list) in perf_event_for_each_child()
5710 mutex_unlock(&event->child_mutex); in perf_event_for_each_child()
5713 static void perf_event_for_each(struct perf_event *event, in perf_event_for_each() argument
5716 struct perf_event_context *ctx = event->ctx; in perf_event_for_each()
5719 lockdep_assert_held(&ctx->mutex); in perf_event_for_each()
5721 event = event->group_leader; in perf_event_for_each()
5723 perf_event_for_each_child(event, func); in perf_event_for_each()
5724 for_each_sibling_event(sibling, event) in perf_event_for_each()
5728 static void __perf_event_period(struct perf_event *event, in __perf_event_period() argument
5736 if (event->attr.freq) { in __perf_event_period()
5737 event->attr.sample_freq = value; in __perf_event_period()
5739 event->attr.sample_period = value; in __perf_event_period()
5740 event->hw.sample_period = value; in __perf_event_period()
5743 active = (event->state == PERF_EVENT_STATE_ACTIVE); in __perf_event_period()
5745 perf_pmu_disable(event->pmu); in __perf_event_period()
5748 * trying to unthrottle while we already re-started the event. in __perf_event_period()
5750 if (event->hw.interrupts == MAX_INTERRUPTS) { in __perf_event_period()
5751 event->hw.interrupts = 0; in __perf_event_period()
5752 perf_log_throttle(event, 1); in __perf_event_period()
5754 event->pmu->stop(event, PERF_EF_UPDATE); in __perf_event_period()
5757 local64_set(&event->hw.period_left, 0); in __perf_event_period()
5760 event->pmu->start(event, PERF_EF_RELOAD); in __perf_event_period()
5761 perf_pmu_enable(event->pmu); in __perf_event_period()
5765 static int perf_event_check_period(struct perf_event *event, u64 value) in perf_event_check_period() argument
5767 return event->pmu->check_period(event, value); in perf_event_check_period()
5770 static int _perf_event_period(struct perf_event *event, u64 value) in _perf_event_period() argument
5772 if (!is_sampling_event(event)) in _perf_event_period()
5773 return -EINVAL; in _perf_event_period()
5776 return -EINVAL; in _perf_event_period()
5778 if (event->attr.freq && value > sysctl_perf_event_sample_rate) in _perf_event_period()
5779 return -EINVAL; in _perf_event_period()
5781 if (perf_event_check_period(event, value)) in _perf_event_period()
5782 return -EINVAL; in _perf_event_period()
5784 if (!event->attr.freq && (value & (1ULL << 63))) in _perf_event_period()
5785 return -EINVAL; in _perf_event_period()
5787 event_function_call(event, __perf_event_period, &value); in _perf_event_period()
5792 int perf_event_period(struct perf_event *event, u64 value) in perf_event_period() argument
5797 ctx = perf_event_ctx_lock(event); in perf_event_period()
5798 ret = _perf_event_period(event, value); in perf_event_period()
5799 perf_event_ctx_unlock(event, ctx); in perf_event_period()
5811 return -EBADF; in perf_fget_light()
5813 if (f.file->f_op != &perf_fops) { in perf_fget_light()
5815 return -EBADF; in perf_fget_light()
5821 static int perf_event_set_output(struct perf_event *event,
5823 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
5827 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) in _perf_ioctl() argument
5844 return _perf_event_refresh(event, arg); in _perf_ioctl()
5851 return -EFAULT; in _perf_ioctl()
5853 return _perf_event_period(event, value); in _perf_ioctl()
5857 u64 id = primary_event_id(event); in _perf_ioctl()
5860 return -EFAULT; in _perf_ioctl()
5867 if (arg != -1) { in _perf_ioctl()
5873 output_event = output.file->private_data; in _perf_ioctl()
5874 ret = perf_event_set_output(event, output_event); in _perf_ioctl()
5877 ret = perf_event_set_output(event, NULL); in _perf_ioctl()
5883 return perf_event_set_filter(event, (void __user *)arg); in _perf_ioctl()
5894 err = perf_event_set_bpf_prog(event, prog, 0); in _perf_ioctl()
5907 rb = rcu_dereference(event->rb); in _perf_ioctl()
5908 if (!rb || !rb->nr_pages) { in _perf_ioctl()
5910 return -EINVAL; in _perf_ioctl()
5918 return perf_event_query_prog_array(event, (void __user *)arg); in _perf_ioctl()
5928 return perf_event_modify_attr(event, &new_attr); in _perf_ioctl()
5931 return -ENOTTY; in _perf_ioctl()
5935 perf_event_for_each(event, func); in _perf_ioctl()
5937 perf_event_for_each_child(event, func); in _perf_ioctl()
5944 struct perf_event *event = file->private_data; in perf_ioctl() local
5949 ret = security_perf_event_write(event); in perf_ioctl()
5953 ctx = perf_event_ctx_lock(event); in perf_ioctl()
5954 ret = _perf_ioctl(event, cmd, arg); in perf_ioctl()
5955 perf_event_ctx_unlock(event, ctx); in perf_ioctl()
5969 /* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */ in perf_compat_ioctl()
5985 struct perf_event *event; in perf_event_task_enable() local
5987 mutex_lock(¤t->perf_event_mutex); in perf_event_task_enable()
5988 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { in perf_event_task_enable()
5989 ctx = perf_event_ctx_lock(event); in perf_event_task_enable()
5990 perf_event_for_each_child(event, _perf_event_enable); in perf_event_task_enable()
5991 perf_event_ctx_unlock(event, ctx); in perf_event_task_enable()
5993 mutex_unlock(¤t->perf_event_mutex); in perf_event_task_enable()
6001 struct perf_event *event; in perf_event_task_disable() local
6003 mutex_lock(¤t->perf_event_mutex); in perf_event_task_disable()
6004 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { in perf_event_task_disable()
6005 ctx = perf_event_ctx_lock(event); in perf_event_task_disable()
6006 perf_event_for_each_child(event, _perf_event_disable); in perf_event_task_disable()
6007 perf_event_ctx_unlock(event, ctx); in perf_event_task_disable()
6009 mutex_unlock(¤t->perf_event_mutex); in perf_event_task_disable()
6014 static int perf_event_index(struct perf_event *event) in perf_event_index() argument
6016 if (event->hw.state & PERF_HES_STOPPED) in perf_event_index()
6019 if (event->state != PERF_EVENT_STATE_ACTIVE) in perf_event_index()
6022 return event->pmu->event_idx(event); in perf_event_index()
6025 static void perf_event_init_userpage(struct perf_event *event) in perf_event_init_userpage() argument
6031 rb = rcu_dereference(event->rb); in perf_event_init_userpage()
6035 userpg = rb->user_page; in perf_event_init_userpage()
6038 userpg->cap_bit0_is_deprecated = 1; in perf_event_init_userpage()
6039 userpg->size = offsetof(struct perf_event_mmap_page, __reserved); in perf_event_init_userpage()
6040 userpg->data_offset = PAGE_SIZE; in perf_event_init_userpage()
6041 userpg->data_size = perf_data_size(rb); in perf_event_init_userpage()
6048 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) in arch_perf_update_userpage() argument
6057 void perf_event_update_userpage(struct perf_event *event) in perf_event_update_userpage() argument
6064 rb = rcu_dereference(event->rb); in perf_event_update_userpage()
6070 * based on snapshot values taken when the event in perf_event_update_userpage()
6077 calc_timer_values(event, &now, &enabled, &running); in perf_event_update_userpage()
6079 userpg = rb->user_page; in perf_event_update_userpage()
6085 ++userpg->lock; in perf_event_update_userpage()
6087 userpg->index = perf_event_index(event); in perf_event_update_userpage()
6088 userpg->offset = perf_event_count(event); in perf_event_update_userpage()
6089 if (userpg->index) in perf_event_update_userpage()
6090 userpg->offset -= local64_read(&event->hw.prev_count); in perf_event_update_userpage()
6092 userpg->time_enabled = enabled + in perf_event_update_userpage()
6093 atomic64_read(&event->child_total_time_enabled); in perf_event_update_userpage()
6095 userpg->time_running = running + in perf_event_update_userpage()
6096 atomic64_read(&event->child_total_time_running); in perf_event_update_userpage()
6098 arch_perf_update_userpage(event, userpg, now); in perf_event_update_userpage()
6101 ++userpg->lock; in perf_event_update_userpage()
6110 struct perf_event *event = vmf->vma->vm_file->private_data; in perf_mmap_fault() local
6114 if (vmf->flags & FAULT_FLAG_MKWRITE) { in perf_mmap_fault()
6115 if (vmf->pgoff == 0) in perf_mmap_fault()
6121 rb = rcu_dereference(event->rb); in perf_mmap_fault()
6125 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) in perf_mmap_fault()
6128 vmf->page = perf_mmap_to_page(rb, vmf->pgoff); in perf_mmap_fault()
6129 if (!vmf->page) in perf_mmap_fault()
6132 get_page(vmf->page); in perf_mmap_fault()
6133 vmf->page->mapping = vmf->vma->vm_file->f_mapping; in perf_mmap_fault()
6134 vmf->page->index = vmf->pgoff; in perf_mmap_fault()
6143 static void ring_buffer_attach(struct perf_event *event, in ring_buffer_attach() argument
6149 WARN_ON_ONCE(event->parent); in ring_buffer_attach()
6151 if (event->rb) { in ring_buffer_attach()
6154 * event->rb_entry and wait/clear when adding event->rb_entry. in ring_buffer_attach()
6156 WARN_ON_ONCE(event->rcu_pending); in ring_buffer_attach()
6158 old_rb = event->rb; in ring_buffer_attach()
6159 spin_lock_irqsave(&old_rb->event_lock, flags); in ring_buffer_attach()
6160 list_del_rcu(&event->rb_entry); in ring_buffer_attach()
6161 spin_unlock_irqrestore(&old_rb->event_lock, flags); in ring_buffer_attach()
6163 event->rcu_batches = get_state_synchronize_rcu(); in ring_buffer_attach()
6164 event->rcu_pending = 1; in ring_buffer_attach()
6168 if (event->rcu_pending) { in ring_buffer_attach()
6169 cond_synchronize_rcu(event->rcu_batches); in ring_buffer_attach()
6170 event->rcu_pending = 0; in ring_buffer_attach()
6173 spin_lock_irqsave(&rb->event_lock, flags); in ring_buffer_attach()
6174 list_add_rcu(&event->rb_entry, &rb->event_list); in ring_buffer_attach()
6175 spin_unlock_irqrestore(&rb->event_lock, flags); in ring_buffer_attach()
6179 * Avoid racing with perf_mmap_close(AUX): stop the event in ring_buffer_attach()
6180 * before swizzling the event::rb pointer; if it's getting in ring_buffer_attach()
6185 * mid-air, but then again, whoever does it like this is in ring_buffer_attach()
6188 if (has_aux(event)) in ring_buffer_attach()
6189 perf_event_stop(event, 0); in ring_buffer_attach()
6191 rcu_assign_pointer(event->rb, rb); in ring_buffer_attach()
6200 wake_up_all(&event->waitq); in ring_buffer_attach()
6204 static void ring_buffer_wakeup(struct perf_event *event) in ring_buffer_wakeup() argument
6208 if (event->parent) in ring_buffer_wakeup()
6209 event = event->parent; in ring_buffer_wakeup()
6212 rb = rcu_dereference(event->rb); in ring_buffer_wakeup()
6214 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) in ring_buffer_wakeup()
6215 wake_up_all(&event->waitq); in ring_buffer_wakeup()
6220 struct perf_buffer *ring_buffer_get(struct perf_event *event) in ring_buffer_get() argument
6224 if (event->parent) in ring_buffer_get()
6225 event = event->parent; in ring_buffer_get()
6228 rb = rcu_dereference(event->rb); in ring_buffer_get()
6230 if (!refcount_inc_not_zero(&rb->refcount)) in ring_buffer_get()
6240 if (!refcount_dec_and_test(&rb->refcount)) in ring_buffer_put()
6243 WARN_ON_ONCE(!list_empty(&rb->event_list)); in ring_buffer_put()
6245 call_rcu(&rb->rcu_head, rb_free_rcu); in ring_buffer_put()
6250 struct perf_event *event = vma->vm_file->private_data; in perf_mmap_open() local
6252 atomic_inc(&event->mmap_count); in perf_mmap_open()
6253 atomic_inc(&event->rb->mmap_count); in perf_mmap_open()
6255 if (vma->vm_pgoff) in perf_mmap_open()
6256 atomic_inc(&event->rb->aux_mmap_count); in perf_mmap_open()
6258 if (event->pmu->event_mapped) in perf_mmap_open()
6259 event->pmu->event_mapped(event, vma->vm_mm); in perf_mmap_open()
6262 static void perf_pmu_output_stop(struct perf_event *event);
6266 * event, or through other events by use of perf_event_set_output().
6274 struct perf_event *event = vma->vm_file->private_data; in perf_mmap_close() local
6275 struct perf_buffer *rb = ring_buffer_get(event); in perf_mmap_close()
6276 struct user_struct *mmap_user = rb->mmap_user; in perf_mmap_close()
6277 int mmap_locked = rb->mmap_locked; in perf_mmap_close()
6281 if (event->pmu->event_unmapped) in perf_mmap_close()
6282 event->pmu->event_unmapped(event, vma->vm_mm); in perf_mmap_close()
6285 * rb->aux_mmap_count will always drop before rb->mmap_count and in perf_mmap_close()
6286 * event->mmap_count, so it is ok to use event->mmap_mutex to in perf_mmap_close()
6289 if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && in perf_mmap_close()
6290 atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { in perf_mmap_close()
6297 perf_pmu_output_stop(event); in perf_mmap_close()
6300 atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm); in perf_mmap_close()
6301 atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm); in perf_mmap_close()
6305 WARN_ON_ONCE(refcount_read(&rb->aux_refcount)); in perf_mmap_close()
6307 mutex_unlock(&event->mmap_mutex); in perf_mmap_close()
6310 if (atomic_dec_and_test(&rb->mmap_count)) in perf_mmap_close()
6313 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) in perf_mmap_close()
6316 ring_buffer_attach(event, NULL); in perf_mmap_close()
6317 mutex_unlock(&event->mmap_mutex); in perf_mmap_close()
6330 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { in perf_mmap_close()
6331 if (!atomic_long_inc_not_zero(&event->refcount)) { in perf_mmap_close()
6333 * This event is en-route to free_event() which will in perf_mmap_close()
6340 mutex_lock(&event->mmap_mutex); in perf_mmap_close()
6346 * If we find a different rb; ignore this event, a next in perf_mmap_close()
6351 if (event->rb == rb) in perf_mmap_close()
6352 ring_buffer_attach(event, NULL); in perf_mmap_close()
6354 mutex_unlock(&event->mmap_mutex); in perf_mmap_close()
6355 put_event(event); in perf_mmap_close()
6366 * It could be there's still a few 0-ref events on the list; they'll in perf_mmap_close()
6367 * get cleaned up by free_event() -- they'll also still have their in perf_mmap_close()
6374 atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked, in perf_mmap_close()
6375 &mmap_user->locked_vm); in perf_mmap_close()
6376 atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); in perf_mmap_close()
6392 struct perf_event *event = file->private_data; in perf_mmap() local
6403 * Don't allow mmap() of inherited per-task counters. This would in perf_mmap()
6407 if (event->cpu == -1 && event->attr.inherit) in perf_mmap()
6408 return -EINVAL; in perf_mmap()
6410 if (!(vma->vm_flags & VM_SHARED)) in perf_mmap()
6411 return -EINVAL; in perf_mmap()
6413 ret = security_perf_event_read(event); in perf_mmap()
6417 vma_size = vma->vm_end - vma->vm_start; in perf_mmap()
6419 if (vma->vm_pgoff == 0) { in perf_mmap()
6420 nr_pages = (vma_size / PAGE_SIZE) - 1; in perf_mmap()
6423 * AUX area mapping: if rb->aux_nr_pages != 0, it's already in perf_mmap()
6429 if (!event->rb) in perf_mmap()
6430 return -EINVAL; in perf_mmap()
6434 mutex_lock(&event->mmap_mutex); in perf_mmap()
6435 ret = -EINVAL; in perf_mmap()
6437 rb = event->rb; in perf_mmap()
6441 aux_offset = READ_ONCE(rb->user_page->aux_offset); in perf_mmap()
6442 aux_size = READ_ONCE(rb->user_page->aux_size); in perf_mmap()
6447 if (aux_offset != vma->vm_pgoff << PAGE_SHIFT) in perf_mmap()
6451 if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff) in perf_mmap()
6458 if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages) in perf_mmap()
6464 if (!atomic_inc_not_zero(&rb->mmap_count)) in perf_mmap()
6468 atomic_inc(&rb->aux_mmap_count); in perf_mmap()
6473 atomic_set(&rb->aux_mmap_count, 1); in perf_mmap()
6480 * If we have rb pages ensure they're a power-of-two number, so we in perf_mmap()
6484 return -EINVAL; in perf_mmap()
6487 return -EINVAL; in perf_mmap()
6489 WARN_ON_ONCE(event->ctx->parent_ctx); in perf_mmap()
6491 mutex_lock(&event->mmap_mutex); in perf_mmap()
6492 if (event->rb) { in perf_mmap()
6493 if (data_page_nr(event->rb) != nr_pages) { in perf_mmap()
6494 ret = -EINVAL; in perf_mmap()
6498 if (!atomic_inc_not_zero(&event->rb->mmap_count)) { in perf_mmap()
6501 * event and try again. in perf_mmap()
6503 ring_buffer_attach(event, NULL); in perf_mmap()
6504 mutex_unlock(&event->mmap_mutex); in perf_mmap()
6514 user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); in perf_mmap()
6521 user_locked = atomic_long_read(&user->locked_vm); in perf_mmap()
6525 * user->locked_vm > user_lock_limit in perf_mmap()
6536 extra = user_locked - user_lock_limit; in perf_mmap()
6537 user_extra -= extra; in perf_mmap()
6542 locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra; in perf_mmap()
6546 ret = -EPERM; in perf_mmap()
6550 WARN_ON(!rb && event->rb); in perf_mmap()
6552 if (vma->vm_flags & VM_WRITE) in perf_mmap()
6557 event->attr.watermark ? event->attr.wakeup_watermark : 0, in perf_mmap()
6558 event->cpu, flags); in perf_mmap()
6561 ret = -ENOMEM; in perf_mmap()
6565 atomic_set(&rb->mmap_count, 1); in perf_mmap()
6566 rb->mmap_user = get_current_user(); in perf_mmap()
6567 rb->mmap_locked = extra; in perf_mmap()
6569 ring_buffer_attach(event, rb); in perf_mmap()
6571 perf_event_update_time(event); in perf_mmap()
6572 perf_event_init_userpage(event); in perf_mmap()
6573 perf_event_update_userpage(event); in perf_mmap()
6575 ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, in perf_mmap()
6576 event->attr.aux_watermark, flags); in perf_mmap()
6578 rb->aux_mmap_locked = extra; in perf_mmap()
6583 atomic_long_add(user_extra, &user->locked_vm); in perf_mmap()
6584 atomic64_add(extra, &vma->vm_mm->pinned_vm); in perf_mmap()
6586 atomic_inc(&event->mmap_count); in perf_mmap()
6588 atomic_dec(&rb->mmap_count); in perf_mmap()
6591 mutex_unlock(&event->mmap_mutex); in perf_mmap()
6598 vma->vm_ops = &perf_mmap_vmops; in perf_mmap()
6600 if (event->pmu->event_mapped) in perf_mmap()
6601 event->pmu->event_mapped(event, vma->vm_mm); in perf_mmap()
6609 struct perf_event *event = filp->private_data; in perf_fasync() local
6613 retval = fasync_helper(fd, filp, on, &event->fasync); in perf_fasync()
6634 * Perf event wakeup
6637 * to user-space before waking everybody up.
6640 static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) in perf_event_fasync() argument
6643 if (event->parent) in perf_event_fasync()
6644 event = event->parent; in perf_event_fasync()
6645 return &event->fasync; in perf_event_fasync()
6648 void perf_event_wakeup(struct perf_event *event) in perf_event_wakeup() argument
6650 ring_buffer_wakeup(event); in perf_event_wakeup()
6652 if (event->pending_kill) { in perf_event_wakeup()
6653 kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); in perf_event_wakeup()
6654 event->pending_kill = 0; in perf_event_wakeup()
6658 static void perf_sigtrap(struct perf_event *event) in perf_sigtrap() argument
6662 * ctx->task or current has changed in the meantime. This can be the in perf_sigtrap()
6665 if (WARN_ON_ONCE(event->ctx->task != current)) in perf_sigtrap()
6672 if (current->flags & PF_EXITING) in perf_sigtrap()
6675 send_sig_perf((void __user *)event->pending_addr, in perf_sigtrap()
6676 event->orig_type, event->attr.sig_data); in perf_sigtrap()
6680 * Deliver the pending work in-event-context or follow the context.
6682 static void __perf_pending_irq(struct perf_event *event) in __perf_pending_irq() argument
6684 int cpu = READ_ONCE(event->oncpu); in __perf_pending_irq()
6687 * If the event isn't running; we done. event_sched_out() will have in __perf_pending_irq()
6694 * Yay, we hit home and are in the context of the event. in __perf_pending_irq()
6697 if (event->pending_sigtrap) { in __perf_pending_irq()
6698 event->pending_sigtrap = 0; in __perf_pending_irq()
6699 perf_sigtrap(event); in __perf_pending_irq()
6700 local_dec(&event->ctx->nr_pending); in __perf_pending_irq()
6702 if (event->pending_disable) { in __perf_pending_irq()
6703 event->pending_disable = 0; in __perf_pending_irq()
6704 perf_event_disable_local(event); in __perf_pending_irq()
6710 * CPU-A CPU-B in __perf_pending_irq()
6713 * @pending_disable = CPU-A; in __perf_pending_irq()
6716 * sched-out in __perf_pending_irq()
6717 * @pending_disable = -1; in __perf_pending_irq()
6719 * sched-in in __perf_pending_irq()
6721 * @pending_disable = CPU-B; in __perf_pending_irq()
6727 * But the event runs on CPU-B and wants disabling there. in __perf_pending_irq()
6729 irq_work_queue_on(&event->pending_irq, cpu); in __perf_pending_irq()
6734 struct perf_event *event = container_of(entry, struct perf_event, pending_irq); in perf_pending_irq() local
6744 * The wakeup isn't bound to the context of the event -- it can happen in perf_pending_irq()
6745 * irrespective of where the event is. in perf_pending_irq()
6747 if (event->pending_wakeup) { in perf_pending_irq()
6748 event->pending_wakeup = 0; in perf_pending_irq()
6749 perf_event_wakeup(event); in perf_pending_irq()
6752 __perf_pending_irq(event); in perf_pending_irq()
6760 struct perf_event *event = container_of(head, struct perf_event, pending_task); in perf_pending_task() local
6770 if (event->pending_work) { in perf_pending_task()
6771 event->pending_work = 0; in perf_pending_task()
6772 perf_sigtrap(event); in perf_pending_task()
6773 local_dec(&event->ctx->nr_pending); in perf_pending_task()
6780 put_event(event); in perf_pending_task()
6786 DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
6787 DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
6788 DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
6796 static_call_update(__perf_guest_state, cbs->state); in perf_register_guest_info_callbacks()
6797 static_call_update(__perf_guest_get_ip, cbs->get_ip); in perf_register_guest_info_callbacks()
6799 /* Implementing ->handle_intel_pt_intr is optional. */ in perf_register_guest_info_callbacks()
6800 if (cbs->handle_intel_pt_intr) in perf_register_guest_info_callbacks()
6802 cbs->handle_intel_pt_intr); in perf_register_guest_info_callbacks()
6841 regs_user->abi = perf_reg_abi(current); in perf_sample_regs_user()
6842 regs_user->regs = regs; in perf_sample_regs_user()
6843 } else if (!(current->flags & PF_KTHREAD)) { in perf_sample_regs_user()
6846 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; in perf_sample_regs_user()
6847 regs_user->regs = NULL; in perf_sample_regs_user()
6854 regs_intr->regs = regs; in perf_sample_regs_intr()
6855 regs_intr->abi = perf_reg_abi(current); in perf_sample_regs_intr()
6873 return TASK_SIZE - addr; in perf_ustack_task_size()
6888 * - TASK_SIZE in perf_sample_ustack_size()
6891 * - remaining sample size in perf_sample_ustack_size()
6908 stack_size = USHRT_MAX - header_size - sizeof(u64); in perf_sample_ustack_size()
6931 * - the size requested by user or the best one we can fit in perf_output_sample_ustack()
6934 * - user stack dump data in perf_output_sample_ustack()
6936 * - the actual dumped size in perf_output_sample_ustack()
6945 dyn_size = dump_size - rem; in perf_output_sample_ustack()
6954 static unsigned long perf_prepare_sample_aux(struct perf_event *event, in perf_prepare_sample_aux() argument
6958 struct perf_event *sampler = event->aux_event; in perf_prepare_sample_aux()
6961 data->aux_size = 0; in perf_prepare_sample_aux()
6966 if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE)) in perf_prepare_sample_aux()
6969 if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) in perf_prepare_sample_aux()
6980 if (READ_ONCE(rb->aux_in_sampling)) { in perf_prepare_sample_aux()
6981 data->aux_size = 0; in perf_prepare_sample_aux()
6984 data->aux_size = ALIGN(size, sizeof(u64)); in perf_prepare_sample_aux()
6989 return data->aux_size; in perf_prepare_sample_aux()
6993 struct perf_event *event, in perf_pmu_snapshot_aux() argument
7001 * Normal ->start()/->stop() callbacks run in IRQ mode in scheduler in perf_pmu_snapshot_aux()
7003 * the IRQ ones, that is, for example, re-starting an event that's just in perf_pmu_snapshot_aux()
7005 * doesn't change the event state. in perf_pmu_snapshot_aux()
7014 WRITE_ONCE(rb->aux_in_sampling, 1); in perf_pmu_snapshot_aux()
7017 ret = event->pmu->snapshot_aux(event, handle, size); in perf_pmu_snapshot_aux()
7020 WRITE_ONCE(rb->aux_in_sampling, 0); in perf_pmu_snapshot_aux()
7026 static void perf_aux_sample_output(struct perf_event *event, in perf_aux_sample_output() argument
7030 struct perf_event *sampler = event->aux_event; in perf_aux_sample_output()
7035 if (WARN_ON_ONCE(!sampler || !data->aux_size)) in perf_aux_sample_output()
7042 size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size); in perf_aux_sample_output()
7046 * non-zero surplus that it didn't copy), which in its current in perf_aux_sample_output()
7054 * The pad comes from ALIGN()ing data->aux_size up to u64 in in perf_aux_sample_output()
7057 pad = data->aux_size - size; in perf_aux_sample_output()
7071 * A set of common sample data types saved even for non-sample records
7072 * when event->attr.sample_id_all is set.
7079 struct perf_event *event, in __perf_event_header__init_id() argument
7082 data->type = event->attr.sample_type; in __perf_event_header__init_id()
7083 data->sample_flags |= data->type & PERF_SAMPLE_ID_ALL; in __perf_event_header__init_id()
7087 data->tid_entry.pid = perf_event_pid(event, current); in __perf_event_header__init_id()
7088 data->tid_entry.tid = perf_event_tid(event, current); in __perf_event_header__init_id()
7092 data->time = perf_event_clock(event); in __perf_event_header__init_id()
7095 data->id = primary_event_id(event); in __perf_event_header__init_id()
7098 data->stream_id = event->id; in __perf_event_header__init_id()
7101 data->cpu_entry.cpu = raw_smp_processor_id(); in __perf_event_header__init_id()
7102 data->cpu_entry.reserved = 0; in __perf_event_header__init_id()
7108 struct perf_event *event) in perf_event_header__init_id() argument
7110 if (event->attr.sample_id_all) { in perf_event_header__init_id()
7111 header->size += event->id_header_size; in perf_event_header__init_id()
7112 __perf_event_header__init_id(data, event, event->attr.sample_type); in perf_event_header__init_id()
7119 u64 sample_type = data->type; in __perf_event__output_id_sample()
7122 perf_output_put(handle, data->tid_entry); in __perf_event__output_id_sample()
7125 perf_output_put(handle, data->time); in __perf_event__output_id_sample()
7128 perf_output_put(handle, data->id); in __perf_event__output_id_sample()
7131 perf_output_put(handle, data->stream_id); in __perf_event__output_id_sample()
7134 perf_output_put(handle, data->cpu_entry); in __perf_event__output_id_sample()
7137 perf_output_put(handle, data->id); in __perf_event__output_id_sample()
7140 void perf_event__output_id_sample(struct perf_event *event, in perf_event__output_id_sample() argument
7144 if (event->attr.sample_id_all) in perf_event__output_id_sample()
7149 struct perf_event *event, in perf_output_read_one() argument
7152 u64 read_format = event->attr.read_format; in perf_output_read_one()
7156 values[n++] = perf_event_count(event); in perf_output_read_one()
7159 atomic64_read(&event->child_total_time_enabled); in perf_output_read_one()
7163 atomic64_read(&event->child_total_time_running); in perf_output_read_one()
7166 values[n++] = primary_event_id(event); in perf_output_read_one()
7168 values[n++] = atomic64_read(&event->lost_samples); in perf_output_read_one()
7174 struct perf_event *event, in perf_output_read_group() argument
7177 struct perf_event *leader = event->group_leader, *sub; in perf_output_read_group()
7178 u64 read_format = event->attr.read_format; in perf_output_read_group()
7189 values[n++] = 1 + leader->nr_siblings; in perf_output_read_group()
7197 if ((leader != event) && in perf_output_read_group()
7198 (leader->state == PERF_EVENT_STATE_ACTIVE)) in perf_output_read_group()
7199 leader->pmu->read(leader); in perf_output_read_group()
7205 values[n++] = atomic64_read(&leader->lost_samples); in perf_output_read_group()
7212 if ((sub != event) && in perf_output_read_group()
7213 (sub->state == PERF_EVENT_STATE_ACTIVE)) in perf_output_read_group()
7214 sub->pmu->read(sub); in perf_output_read_group()
7220 values[n++] = atomic64_read(&sub->lost_samples); in perf_output_read_group()
7239 struct perf_event *event) in perf_output_read() argument
7242 u64 read_format = event->attr.read_format; in perf_output_read()
7246 * based on snapshot values taken when the event in perf_output_read()
7254 calc_timer_values(event, &now, &enabled, &running); in perf_output_read()
7256 if (event->attr.read_format & PERF_FORMAT_GROUP) in perf_output_read()
7257 perf_output_read_group(handle, event, enabled, running); in perf_output_read()
7259 perf_output_read_one(handle, event, enabled, running); in perf_output_read()
7265 struct perf_event *event) in perf_output_sample() argument
7267 u64 sample_type = data->type; in perf_output_sample()
7272 perf_output_put(handle, data->id); in perf_output_sample()
7275 perf_output_put(handle, data->ip); in perf_output_sample()
7278 perf_output_put(handle, data->tid_entry); in perf_output_sample()
7281 perf_output_put(handle, data->time); in perf_output_sample()
7284 perf_output_put(handle, data->addr); in perf_output_sample()
7287 perf_output_put(handle, data->id); in perf_output_sample()
7290 perf_output_put(handle, data->stream_id); in perf_output_sample()
7293 perf_output_put(handle, data->cpu_entry); in perf_output_sample()
7296 perf_output_put(handle, data->period); in perf_output_sample()
7299 perf_output_read(handle, event); in perf_output_sample()
7304 size += data->callchain->nr; in perf_output_sample()
7306 __output_copy(handle, data->callchain, size); in perf_output_sample()
7310 struct perf_raw_record *raw = data->raw; in perf_output_sample()
7313 struct perf_raw_frag *frag = &raw->frag; in perf_output_sample()
7315 perf_output_put(handle, raw->size); in perf_output_sample()
7317 if (frag->copy) { in perf_output_sample()
7318 __output_custom(handle, frag->copy, in perf_output_sample()
7319 frag->data, frag->size); in perf_output_sample()
7321 __output_copy(handle, frag->data, in perf_output_sample()
7322 frag->size); in perf_output_sample()
7326 frag = frag->next; in perf_output_sample()
7328 if (frag->pad) in perf_output_sample()
7329 __output_skip(handle, NULL, frag->pad); in perf_output_sample()
7343 if (data->br_stack) { in perf_output_sample()
7346 size = data->br_stack->nr in perf_output_sample()
7349 perf_output_put(handle, data->br_stack->nr); in perf_output_sample()
7350 if (branch_sample_hw_index(event)) in perf_output_sample()
7351 perf_output_put(handle, data->br_stack->hw_idx); in perf_output_sample()
7352 perf_output_copy(handle, data->br_stack->entries, size); in perf_output_sample()
7363 u64 abi = data->regs_user.abi; in perf_output_sample()
7372 u64 mask = event->attr.sample_regs_user; in perf_output_sample()
7374 data->regs_user.regs, in perf_output_sample()
7381 data->stack_user_size, in perf_output_sample()
7382 data->regs_user.regs); in perf_output_sample()
7386 perf_output_put(handle, data->weight.full); in perf_output_sample()
7389 perf_output_put(handle, data->data_src.val); in perf_output_sample()
7392 perf_output_put(handle, data->txn); in perf_output_sample()
7395 u64 abi = data->regs_intr.abi; in perf_output_sample()
7403 u64 mask = event->attr.sample_regs_intr; in perf_output_sample()
7406 data->regs_intr.regs, in perf_output_sample()
7412 perf_output_put(handle, data->phys_addr); in perf_output_sample()
7415 perf_output_put(handle, data->cgroup); in perf_output_sample()
7418 perf_output_put(handle, data->data_page_size); in perf_output_sample()
7421 perf_output_put(handle, data->code_page_size); in perf_output_sample()
7424 perf_output_put(handle, data->aux_size); in perf_output_sample()
7426 if (data->aux_size) in perf_output_sample()
7427 perf_aux_sample_output(event, handle, data); in perf_output_sample()
7430 if (!event->attr.watermark) { in perf_output_sample()
7431 int wakeup_events = event->attr.wakeup_events; in perf_output_sample()
7434 struct perf_buffer *rb = handle->rb; in perf_output_sample()
7435 int events = local_inc_return(&rb->events); in perf_output_sample()
7438 local_sub(wakeup_events, &rb->events); in perf_output_sample()
7439 local_inc(&rb->wakeup); in perf_output_sample()
7462 * Try IRQ-safe get_user_page_fast_only first. in perf_virt_to_phys()
7465 if (current->mm != NULL) { in perf_virt_to_phys()
7550 * Software page-table walkers must disable IRQs, in perf_get_page_size()
7555 mm = current->mm; in perf_get_page_size()
7574 perf_callchain(struct perf_event *event, struct pt_regs *regs) in perf_callchain() argument
7576 bool kernel = !event->attr.exclude_callchain_kernel; in perf_callchain()
7577 bool user = !event->attr.exclude_callchain_user; in perf_callchain()
7578 /* Disallow cross-task user callchains. */ in perf_callchain()
7579 bool crosstask = event->ctx->task && event->ctx->task != current; in perf_callchain()
7580 const u32 max_stack = event->attr.sample_max_stack; in perf_callchain()
7597 struct perf_event *event, in perf_prepare_sample() argument
7600 u64 sample_type = event->attr.sample_type; in perf_prepare_sample()
7614 filtered_sample_type &= ~data->sample_flags; in perf_prepare_sample()
7617 /* Make sure it has the correct data->type for output */ in perf_prepare_sample()
7618 data->type = event->attr.sample_type; in perf_prepare_sample()
7622 __perf_event_header__init_id(data, event, filtered_sample_type); in perf_prepare_sample()
7625 data->ip = perf_instruction_pointer(regs); in perf_prepare_sample()
7626 data->sample_flags |= PERF_SAMPLE_IP; in perf_prepare_sample()
7630 perf_sample_save_callchain(data, event, regs); in perf_prepare_sample()
7633 data->raw = NULL; in perf_prepare_sample()
7634 data->dyn_size += sizeof(u64); in perf_prepare_sample()
7635 data->sample_flags |= PERF_SAMPLE_RAW; in perf_prepare_sample()
7639 data->br_stack = NULL; in perf_prepare_sample()
7640 data->dyn_size += sizeof(u64); in perf_prepare_sample()
7641 data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; in perf_prepare_sample()
7645 perf_sample_regs_user(&data->regs_user, regs); in perf_prepare_sample()
7652 if ((sample_type & ~data->sample_flags) & PERF_SAMPLE_REGS_USER) { in perf_prepare_sample()
7656 if (data->regs_user.regs) { in perf_prepare_sample()
7657 u64 mask = event->attr.sample_regs_user; in perf_prepare_sample()
7661 data->dyn_size += size; in perf_prepare_sample()
7662 data->sample_flags |= PERF_SAMPLE_REGS_USER; in perf_prepare_sample()
7672 u16 stack_size = event->attr.sample_stack_user; in perf_prepare_sample()
7673 u16 header_size = perf_sample_data_size(data, event); in perf_prepare_sample()
7677 data->regs_user.regs); in perf_prepare_sample()
7687 data->stack_user_size = stack_size; in perf_prepare_sample()
7688 data->dyn_size += size; in perf_prepare_sample()
7689 data->sample_flags |= PERF_SAMPLE_STACK_USER; in perf_prepare_sample()
7693 data->weight.full = 0; in perf_prepare_sample()
7694 data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; in perf_prepare_sample()
7698 data->data_src.val = PERF_MEM_NA; in perf_prepare_sample()
7699 data->sample_flags |= PERF_SAMPLE_DATA_SRC; in perf_prepare_sample()
7703 data->txn = 0; in perf_prepare_sample()
7704 data->sample_flags |= PERF_SAMPLE_TRANSACTION; in perf_prepare_sample()
7708 data->addr = 0; in perf_prepare_sample()
7709 data->sample_flags |= PERF_SAMPLE_ADDR; in perf_prepare_sample()
7716 perf_sample_regs_intr(&data->regs_intr, regs); in perf_prepare_sample()
7718 if (data->regs_intr.regs) { in perf_prepare_sample()
7719 u64 mask = event->attr.sample_regs_intr; in perf_prepare_sample()
7724 data->dyn_size += size; in perf_prepare_sample()
7725 data->sample_flags |= PERF_SAMPLE_REGS_INTR; in perf_prepare_sample()
7729 data->phys_addr = perf_virt_to_phys(data->addr); in perf_prepare_sample()
7730 data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; in perf_prepare_sample()
7738 cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup; in perf_prepare_sample()
7739 data->cgroup = cgroup_id(cgrp); in perf_prepare_sample()
7740 data->sample_flags |= PERF_SAMPLE_CGROUP; in perf_prepare_sample()
7746 * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr, in perf_prepare_sample()
7750 data->data_page_size = perf_get_page_size(data->addr); in perf_prepare_sample()
7751 data->sample_flags |= PERF_SAMPLE_DATA_PAGE_SIZE; in perf_prepare_sample()
7755 data->code_page_size = perf_get_page_size(data->ip); in perf_prepare_sample()
7756 data->sample_flags |= PERF_SAMPLE_CODE_PAGE_SIZE; in perf_prepare_sample()
7761 u16 header_size = perf_sample_data_size(data, event); in perf_prepare_sample()
7771 size = min_t(size_t, U16_MAX - header_size, in perf_prepare_sample()
7772 event->attr.aux_sample_size); in perf_prepare_sample()
7774 size = perf_prepare_sample_aux(event, data, size); in perf_prepare_sample()
7777 data->dyn_size += size + sizeof(u64); /* size above */ in perf_prepare_sample()
7778 data->sample_flags |= PERF_SAMPLE_AUX; in perf_prepare_sample()
7784 struct perf_event *event, in perf_prepare_header() argument
7787 header->type = PERF_RECORD_SAMPLE; in perf_prepare_header()
7788 header->size = perf_sample_data_size(data, event); in perf_prepare_header()
7789 header->misc = perf_misc_flags(regs); in perf_prepare_header()
7799 WARN_ON_ONCE(header->size & 7); in perf_prepare_header()
7803 __perf_event_output(struct perf_event *event, in __perf_event_output() argument
7818 perf_prepare_sample(data, event, regs); in __perf_event_output()
7819 perf_prepare_header(&header, data, event, regs); in __perf_event_output()
7821 err = output_begin(&handle, data, event, header.size); in __perf_event_output()
7825 perf_output_sample(&handle, &header, data, event); in __perf_event_output()
7835 perf_event_output_forward(struct perf_event *event, in perf_event_output_forward() argument
7839 __perf_event_output(event, data, regs, perf_output_begin_forward); in perf_event_output_forward()
7843 perf_event_output_backward(struct perf_event *event, in perf_event_output_backward() argument
7847 __perf_event_output(event, data, regs, perf_output_begin_backward); in perf_event_output_backward()
7851 perf_event_output(struct perf_event *event, in perf_event_output() argument
7855 return __perf_event_output(event, data, regs, perf_output_begin); in perf_event_output()
7870 perf_event_read_event(struct perf_event *event, in perf_event_read_event() argument
7879 .size = sizeof(read_event) + event->read_size, in perf_event_read_event()
7881 .pid = perf_event_pid(event, task), in perf_event_read_event()
7882 .tid = perf_event_tid(event, task), in perf_event_read_event()
7886 perf_event_header__init_id(&read_event.header, &sample, event); in perf_event_read_event()
7887 ret = perf_output_begin(&handle, &sample, event, read_event.header.size); in perf_event_read_event()
7892 perf_output_read(&handle, event); in perf_event_read_event()
7893 perf_event__output_id_sample(event, &handle, &sample); in perf_event_read_event()
7898 typedef void (perf_iterate_f)(struct perf_event *event, void *data);
7905 struct perf_event *event; in perf_iterate_ctx() local
7907 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { in perf_iterate_ctx()
7909 if (event->state < PERF_EVENT_STATE_INACTIVE) in perf_iterate_ctx()
7911 if (!event_filter_match(event)) in perf_iterate_ctx()
7915 output(event, data); in perf_iterate_ctx()
7922 struct perf_event *event; in perf_iterate_sb_cpu() local
7924 list_for_each_entry_rcu(event, &pel->list, sb_list) { in perf_iterate_sb_cpu()
7927 * if we observe event->ctx, both event and ctx will be in perf_iterate_sb_cpu()
7930 if (!smp_load_acquire(&event->ctx)) in perf_iterate_sb_cpu()
7933 if (event->state < PERF_EVENT_STATE_INACTIVE) in perf_iterate_sb_cpu()
7935 if (!event_filter_match(event)) in perf_iterate_sb_cpu()
7937 output(event, data); in perf_iterate_sb_cpu()
7942 * Iterate all events that need to receive side-band events.
7945 * your event, otherwise it might not get delivered.
7968 ctx = rcu_dereference(current->perf_event_ctxp); in perf_iterate_sb()
7977 * Clear all file-based filters at exec, they'll have to be
7978 * re-instated when/if these objects are mmapped again.
7980 static void perf_event_addr_filters_exec(struct perf_event *event, void *data) in perf_event_addr_filters_exec() argument
7982 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); in perf_event_addr_filters_exec()
7987 if (!has_addr_filter(event)) in perf_event_addr_filters_exec()
7990 raw_spin_lock_irqsave(&ifh->lock, flags); in perf_event_addr_filters_exec()
7991 list_for_each_entry(filter, &ifh->list, entry) { in perf_event_addr_filters_exec()
7992 if (filter->path.dentry) { in perf_event_addr_filters_exec()
7993 event->addr_filter_ranges[count].start = 0; in perf_event_addr_filters_exec()
7994 event->addr_filter_ranges[count].size = 0; in perf_event_addr_filters_exec()
8002 event->addr_filters_gen++; in perf_event_addr_filters_exec()
8003 raw_spin_unlock_irqrestore(&ifh->lock, flags); in perf_event_addr_filters_exec()
8006 perf_event_stop(event, 1); in perf_event_addr_filters_exec()
8030 static void __perf_event_output_stop(struct perf_event *event, void *data) in __perf_event_output_stop() argument
8032 struct perf_event *parent = event->parent; in __perf_event_output_stop()
8034 struct perf_buffer *rb = ro->rb; in __perf_event_output_stop()
8036 .event = event, in __perf_event_output_stop()
8039 if (!has_aux(event)) in __perf_event_output_stop()
8043 parent = event; in __perf_event_output_stop()
8047 * ring-buffer, but it will be the child that's actually using it. in __perf_event_output_stop()
8049 * We are using event::rb to determine if the event should be stopped, in __perf_event_output_stop()
8051 * which will make us skip the event that actually needs to be stopped. in __perf_event_output_stop()
8052 * So ring_buffer_attach() has to stop an aux event before re-assigning in __perf_event_output_stop()
8055 if (rcu_dereference(parent->rb) == rb) in __perf_event_output_stop()
8056 ro->err = __perf_event_stop(&sd); in __perf_event_output_stop()
8061 struct perf_event *event = info; in __perf_pmu_output_stop() local
8064 .rb = event->rb, in __perf_pmu_output_stop()
8068 perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false); in __perf_pmu_output_stop()
8069 if (cpuctx->task_ctx) in __perf_pmu_output_stop()
8070 perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop, in __perf_pmu_output_stop()
8077 static void perf_pmu_output_stop(struct perf_event *event) in perf_pmu_output_stop() argument
8084 list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) { in perf_pmu_output_stop()
8086 * For per-CPU events, we need to make sure that neither they in perf_pmu_output_stop()
8087 * nor their children are running; for cpu==-1 events it's in perf_pmu_output_stop()
8088 * sufficient to stop the event itself if it's active, since in perf_pmu_output_stop()
8091 cpu = iter->cpu; in perf_pmu_output_stop()
8092 if (cpu == -1) in perf_pmu_output_stop()
8093 cpu = READ_ONCE(iter->oncpu); in perf_pmu_output_stop()
8095 if (cpu == -1) in perf_pmu_output_stop()
8098 err = cpu_function_call(cpu, __perf_pmu_output_stop, event); in perf_pmu_output_stop()
8099 if (err == -EAGAIN) { in perf_pmu_output_stop()
8108 * task tracking -- fork/exit
8128 static int perf_event_task_match(struct perf_event *event) in perf_event_task_match() argument
8130 return event->attr.comm || event->attr.mmap || in perf_event_task_match()
8131 event->attr.mmap2 || event->attr.mmap_data || in perf_event_task_match()
8132 event->attr.task; in perf_event_task_match()
8135 static void perf_event_task_output(struct perf_event *event, in perf_event_task_output() argument
8141 struct task_struct *task = task_event->task; in perf_event_task_output()
8142 int ret, size = task_event->event_id.header.size; in perf_event_task_output()
8144 if (!perf_event_task_match(event)) in perf_event_task_output()
8147 perf_event_header__init_id(&task_event->event_id.header, &sample, event); in perf_event_task_output()
8149 ret = perf_output_begin(&handle, &sample, event, in perf_event_task_output()
8150 task_event->event_id.header.size); in perf_event_task_output()
8154 task_event->event_id.pid = perf_event_pid(event, task); in perf_event_task_output()
8155 task_event->event_id.tid = perf_event_tid(event, task); in perf_event_task_output()
8157 if (task_event->event_id.header.type == PERF_RECORD_EXIT) { in perf_event_task_output()
8158 task_event->event_id.ppid = perf_event_pid(event, in perf_event_task_output()
8159 task->real_parent); in perf_event_task_output()
8160 task_event->event_id.ptid = perf_event_pid(event, in perf_event_task_output()
8161 task->real_parent); in perf_event_task_output()
8163 task_event->event_id.ppid = perf_event_pid(event, current); in perf_event_task_output()
8164 task_event->event_id.ptid = perf_event_tid(event, current); in perf_event_task_output()
8167 task_event->event_id.time = perf_event_clock(event); in perf_event_task_output()
8169 perf_output_put(&handle, task_event->event_id); in perf_event_task_output()
8171 perf_event__output_id_sample(event, &handle, &sample); in perf_event_task_output()
8175 task_event->event_id.header.size = size; in perf_event_task_output()
8234 static int perf_event_comm_match(struct perf_event *event) in perf_event_comm_match() argument
8236 return event->attr.comm; in perf_event_comm_match()
8239 static void perf_event_comm_output(struct perf_event *event, in perf_event_comm_output() argument
8245 int size = comm_event->event_id.header.size; in perf_event_comm_output()
8248 if (!perf_event_comm_match(event)) in perf_event_comm_output()
8251 perf_event_header__init_id(&comm_event->event_id.header, &sample, event); in perf_event_comm_output()
8252 ret = perf_output_begin(&handle, &sample, event, in perf_event_comm_output()
8253 comm_event->event_id.header.size); in perf_event_comm_output()
8258 comm_event->event_id.pid = perf_event_pid(event, comm_event->task); in perf_event_comm_output()
8259 comm_event->event_id.tid = perf_event_tid(event, comm_event->task); in perf_event_comm_output()
8261 perf_output_put(&handle, comm_event->event_id); in perf_event_comm_output()
8262 __output_copy(&handle, comm_event->comm, in perf_event_comm_output()
8263 comm_event->comm_size); in perf_event_comm_output()
8265 perf_event__output_id_sample(event, &handle, &sample); in perf_event_comm_output()
8269 comm_event->event_id.header.size = size; in perf_event_comm_output()
8278 strscpy(comm, comm_event->task->comm, sizeof(comm)); in perf_event_comm_event()
8281 comm_event->comm = comm; in perf_event_comm_event()
8282 comm_event->comm_size = size; in perf_event_comm_event()
8284 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; in perf_event_comm_event()
8333 static int perf_event_namespaces_match(struct perf_event *event) in perf_event_namespaces_match() argument
8335 return event->attr.namespaces; in perf_event_namespaces_match()
8338 static void perf_event_namespaces_output(struct perf_event *event, in perf_event_namespaces_output() argument
8344 u16 header_size = namespaces_event->event_id.header.size; in perf_event_namespaces_output()
8347 if (!perf_event_namespaces_match(event)) in perf_event_namespaces_output()
8350 perf_event_header__init_id(&namespaces_event->event_id.header, in perf_event_namespaces_output()
8351 &sample, event); in perf_event_namespaces_output()
8352 ret = perf_output_begin(&handle, &sample, event, in perf_event_namespaces_output()
8353 namespaces_event->event_id.header.size); in perf_event_namespaces_output()
8357 namespaces_event->event_id.pid = perf_event_pid(event, in perf_event_namespaces_output()
8358 namespaces_event->task); in perf_event_namespaces_output()
8359 namespaces_event->event_id.tid = perf_event_tid(event, in perf_event_namespaces_output()
8360 namespaces_event->task); in perf_event_namespaces_output()
8362 perf_output_put(&handle, namespaces_event->event_id); in perf_event_namespaces_output()
8364 perf_event__output_id_sample(event, &handle, &sample); in perf_event_namespaces_output()
8368 namespaces_event->event_id.header.size = header_size; in perf_event_namespaces_output()
8381 ns_inode = ns_path.dentry->d_inode; in perf_fill_ns_link_info()
8382 ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); in perf_fill_ns_link_info()
8383 ns_link_info->ino = ns_inode->i_ino; in perf_fill_ns_link_info()
8461 static int perf_event_cgroup_match(struct perf_event *event) in perf_event_cgroup_match() argument
8463 return event->attr.cgroup; in perf_event_cgroup_match()
8466 static void perf_event_cgroup_output(struct perf_event *event, void *data) in perf_event_cgroup_output() argument
8471 u16 header_size = cgroup_event->event_id.header.size; in perf_event_cgroup_output()
8474 if (!perf_event_cgroup_match(event)) in perf_event_cgroup_output()
8477 perf_event_header__init_id(&cgroup_event->event_id.header, in perf_event_cgroup_output()
8478 &sample, event); in perf_event_cgroup_output()
8479 ret = perf_output_begin(&handle, &sample, event, in perf_event_cgroup_output()
8480 cgroup_event->event_id.header.size); in perf_event_cgroup_output()
8484 perf_output_put(&handle, cgroup_event->event_id); in perf_event_cgroup_output()
8485 __output_copy(&handle, cgroup_event->path, cgroup_event->path_size); in perf_event_cgroup_output()
8487 perf_event__output_id_sample(event, &handle, &sample); in perf_event_cgroup_output()
8491 cgroup_event->event_id.header.size = header_size; in perf_event_cgroup_output()
8520 cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64)); in perf_event_cgroup()
8572 static int perf_event_mmap_match(struct perf_event *event, in perf_event_mmap_match() argument
8576 struct vm_area_struct *vma = mmap_event->vma; in perf_event_mmap_match()
8577 int executable = vma->vm_flags & VM_EXEC; in perf_event_mmap_match()
8579 return (!executable && event->attr.mmap_data) || in perf_event_mmap_match()
8580 (executable && (event->attr.mmap || event->attr.mmap2)); in perf_event_mmap_match()
8583 static void perf_event_mmap_output(struct perf_event *event, in perf_event_mmap_output() argument
8589 int size = mmap_event->event_id.header.size; in perf_event_mmap_output()
8590 u32 type = mmap_event->event_id.header.type; in perf_event_mmap_output()
8594 if (!perf_event_mmap_match(event, data)) in perf_event_mmap_output()
8597 if (event->attr.mmap2) { in perf_event_mmap_output()
8598 mmap_event->event_id.header.type = PERF_RECORD_MMAP2; in perf_event_mmap_output()
8599 mmap_event->event_id.header.size += sizeof(mmap_event->maj); in perf_event_mmap_output()
8600 mmap_event->event_id.header.size += sizeof(mmap_event->min); in perf_event_mmap_output()
8601 mmap_event->event_id.header.size += sizeof(mmap_event->ino); in perf_event_mmap_output()
8602 mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); in perf_event_mmap_output()
8603 mmap_event->event_id.header.size += sizeof(mmap_event->prot); in perf_event_mmap_output()
8604 mmap_event->event_id.header.size += sizeof(mmap_event->flags); in perf_event_mmap_output()
8607 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); in perf_event_mmap_output()
8608 ret = perf_output_begin(&handle, &sample, event, in perf_event_mmap_output()
8609 mmap_event->event_id.header.size); in perf_event_mmap_output()
8613 mmap_event->event_id.pid = perf_event_pid(event, current); in perf_event_mmap_output()
8614 mmap_event->event_id.tid = perf_event_tid(event, current); in perf_event_mmap_output()
8616 use_build_id = event->attr.build_id && mmap_event->build_id_size; in perf_event_mmap_output()
8618 if (event->attr.mmap2 && use_build_id) in perf_event_mmap_output()
8619 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID; in perf_event_mmap_output()
8621 perf_output_put(&handle, mmap_event->event_id); in perf_event_mmap_output()
8623 if (event->attr.mmap2) { in perf_event_mmap_output()
8625 u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 }; in perf_event_mmap_output()
8628 __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX); in perf_event_mmap_output()
8630 perf_output_put(&handle, mmap_event->maj); in perf_event_mmap_output()
8631 perf_output_put(&handle, mmap_event->min); in perf_event_mmap_output()
8632 perf_output_put(&handle, mmap_event->ino); in perf_event_mmap_output()
8633 perf_output_put(&handle, mmap_event->ino_generation); in perf_event_mmap_output()
8635 perf_output_put(&handle, mmap_event->prot); in perf_event_mmap_output()
8636 perf_output_put(&handle, mmap_event->flags); in perf_event_mmap_output()
8639 __output_copy(&handle, mmap_event->file_name, in perf_event_mmap_output()
8640 mmap_event->file_size); in perf_event_mmap_output()
8642 perf_event__output_id_sample(event, &handle, &sample); in perf_event_mmap_output()
8646 mmap_event->event_id.header.size = size; in perf_event_mmap_output()
8647 mmap_event->event_id.header.type = type; in perf_event_mmap_output()
8652 struct vm_area_struct *vma = mmap_event->vma; in perf_event_mmap_event()
8653 struct file *file = vma->vm_file; in perf_event_mmap_event()
8662 if (vma->vm_flags & VM_READ) in perf_event_mmap_event()
8664 if (vma->vm_flags & VM_WRITE) in perf_event_mmap_event()
8666 if (vma->vm_flags & VM_EXEC) in perf_event_mmap_event()
8669 if (vma->vm_flags & VM_MAYSHARE) in perf_event_mmap_event()
8674 if (vma->vm_flags & VM_LOCKED) in perf_event_mmap_event()
8693 name = file_path(file, buf, PATH_MAX - sizeof(u64)); in perf_event_mmap_event()
8698 inode = file_inode(vma->vm_file); in perf_event_mmap_event()
8699 dev = inode->i_sb->s_dev; in perf_event_mmap_event()
8700 ino = inode->i_ino; in perf_event_mmap_event()
8701 gen = inode->i_generation; in perf_event_mmap_event()
8707 if (vma->vm_ops && vma->vm_ops->name) in perf_event_mmap_event()
8708 name = (char *) vma->vm_ops->name(vma); in perf_event_mmap_event()
8734 mmap_event->file_name = name; in perf_event_mmap_event()
8735 mmap_event->file_size = size; in perf_event_mmap_event()
8736 mmap_event->maj = maj; in perf_event_mmap_event()
8737 mmap_event->min = min; in perf_event_mmap_event()
8738 mmap_event->ino = ino; in perf_event_mmap_event()
8739 mmap_event->ino_generation = gen; in perf_event_mmap_event()
8740 mmap_event->prot = prot; in perf_event_mmap_event()
8741 mmap_event->flags = flags; in perf_event_mmap_event()
8743 if (!(vma->vm_flags & VM_EXEC)) in perf_event_mmap_event()
8744 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; in perf_event_mmap_event()
8746 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; in perf_event_mmap_event()
8749 build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size); in perf_event_mmap_event()
8765 /* d_inode(NULL) won't be equal to any mapped user-space file */ in perf_addr_filter_match()
8766 if (!filter->path.dentry) in perf_addr_filter_match()
8769 if (d_inode(filter->path.dentry) != file_inode(file)) in perf_addr_filter_match()
8772 if (filter->offset > offset + size) in perf_addr_filter_match()
8775 if (filter->offset + filter->size < offset) in perf_addr_filter_match()
8785 unsigned long vma_size = vma->vm_end - vma->vm_start; in perf_addr_filter_vma_adjust()
8786 unsigned long off = vma->vm_pgoff << PAGE_SHIFT; in perf_addr_filter_vma_adjust()
8787 struct file *file = vma->vm_file; in perf_addr_filter_vma_adjust()
8792 if (filter->offset < off) { in perf_addr_filter_vma_adjust()
8793 fr->start = vma->vm_start; in perf_addr_filter_vma_adjust()
8794 fr->size = min(vma_size, filter->size - (off - filter->offset)); in perf_addr_filter_vma_adjust()
8796 fr->start = vma->vm_start + filter->offset - off; in perf_addr_filter_vma_adjust()
8797 fr->size = min(vma->vm_end - fr->start, filter->size); in perf_addr_filter_vma_adjust()
8803 static void __perf_addr_filters_adjust(struct perf_event *event, void *data) in __perf_addr_filters_adjust() argument
8805 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); in __perf_addr_filters_adjust()
8811 if (!has_addr_filter(event)) in __perf_addr_filters_adjust()
8814 if (!vma->vm_file) in __perf_addr_filters_adjust()
8817 raw_spin_lock_irqsave(&ifh->lock, flags); in __perf_addr_filters_adjust()
8818 list_for_each_entry(filter, &ifh->list, entry) { in __perf_addr_filters_adjust()
8820 &event->addr_filter_ranges[count])) in __perf_addr_filters_adjust()
8827 event->addr_filters_gen++; in __perf_addr_filters_adjust()
8828 raw_spin_unlock_irqrestore(&ifh->lock, flags); in __perf_addr_filters_adjust()
8831 perf_event_stop(event, 1); in __perf_addr_filters_adjust()
8845 if (!(vma->vm_flags & VM_EXEC)) in perf_addr_filters_adjust()
8849 ctx = rcu_dereference(current->perf_event_ctxp); in perf_addr_filters_adjust()
8874 .start = vma->vm_start, in perf_event_mmap()
8875 .len = vma->vm_end - vma->vm_start, in perf_event_mmap()
8876 .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, in perf_event_mmap()
8890 void perf_event_aux_event(struct perf_event *event, unsigned long head, in perf_event_aux_event() argument
8912 perf_event_header__init_id(&rec.header, &sample, event); in perf_event_aux_event()
8913 ret = perf_output_begin(&handle, &sample, event, rec.header.size); in perf_event_aux_event()
8919 perf_event__output_id_sample(event, &handle, &sample); in perf_event_aux_event()
8927 void perf_log_lost_samples(struct perf_event *event, u64 lost) in perf_log_lost_samples() argument
8945 perf_event_header__init_id(&lost_samples_event.header, &sample, event); in perf_log_lost_samples()
8947 ret = perf_output_begin(&handle, &sample, event, in perf_log_lost_samples()
8953 perf_event__output_id_sample(event, &handle, &sample); in perf_log_lost_samples()
8972 static int perf_event_switch_match(struct perf_event *event) in perf_event_switch_match() argument
8974 return event->attr.context_switch; in perf_event_switch_match()
8977 static void perf_event_switch_output(struct perf_event *event, void *data) in perf_event_switch_output() argument
8984 if (!perf_event_switch_match(event)) in perf_event_switch_output()
8987 /* Only CPU-wide events are allowed to see next/prev pid/tid */ in perf_event_switch_output()
8988 if (event->ctx->task) { in perf_event_switch_output()
8989 se->event_id.header.type = PERF_RECORD_SWITCH; in perf_event_switch_output()
8990 se->event_id.header.size = sizeof(se->event_id.header); in perf_event_switch_output()
8992 se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE; in perf_event_switch_output()
8993 se->event_id.header.size = sizeof(se->event_id); in perf_event_switch_output()
8994 se->event_id.next_prev_pid = in perf_event_switch_output()
8995 perf_event_pid(event, se->next_prev); in perf_event_switch_output()
8996 se->event_id.next_prev_tid = in perf_event_switch_output()
8997 perf_event_tid(event, se->next_prev); in perf_event_switch_output()
9000 perf_event_header__init_id(&se->event_id.header, &sample, event); in perf_event_switch_output()
9002 ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size); in perf_event_switch_output()
9006 if (event->ctx->task) in perf_event_switch_output()
9007 perf_output_put(&handle, se->event_id.header); in perf_event_switch_output()
9009 perf_output_put(&handle, se->event_id); in perf_event_switch_output()
9011 perf_event__output_id_sample(event, &handle, &sample); in perf_event_switch_output()
9037 if (!sched_in && task->on_rq) { in perf_event_switch()
9049 static void perf_log_throttle(struct perf_event *event, int enable) in perf_log_throttle() argument
9066 .time = perf_event_clock(event), in perf_log_throttle()
9067 .id = primary_event_id(event), in perf_log_throttle()
9068 .stream_id = event->id, in perf_log_throttle()
9074 perf_event_header__init_id(&throttle_event.header, &sample, event); in perf_log_throttle()
9076 ret = perf_output_begin(&handle, &sample, event, in perf_log_throttle()
9082 perf_event__output_id_sample(event, &handle, &sample); in perf_log_throttle()
9102 static int perf_event_ksymbol_match(struct perf_event *event) in perf_event_ksymbol_match() argument
9104 return event->attr.ksymbol; in perf_event_ksymbol_match()
9107 static void perf_event_ksymbol_output(struct perf_event *event, void *data) in perf_event_ksymbol_output() argument
9114 if (!perf_event_ksymbol_match(event)) in perf_event_ksymbol_output()
9117 perf_event_header__init_id(&ksymbol_event->event_id.header, in perf_event_ksymbol_output()
9118 &sample, event); in perf_event_ksymbol_output()
9119 ret = perf_output_begin(&handle, &sample, event, in perf_event_ksymbol_output()
9120 ksymbol_event->event_id.header.size); in perf_event_ksymbol_output()
9124 perf_output_put(&handle, ksymbol_event->event_id); in perf_event_ksymbol_output()
9125 __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); in perf_event_ksymbol_output()
9126 perf_event__output_id_sample(event, &handle, &sample); in perf_event_ksymbol_output()
9192 static int perf_event_bpf_match(struct perf_event *event) in perf_event_bpf_match() argument
9194 return event->attr.bpf_event; in perf_event_bpf_match()
9197 static void perf_event_bpf_output(struct perf_event *event, void *data) in perf_event_bpf_output() argument
9204 if (!perf_event_bpf_match(event)) in perf_event_bpf_output()
9207 perf_event_header__init_id(&bpf_event->event_id.header, in perf_event_bpf_output()
9208 &sample, event); in perf_event_bpf_output()
9209 ret = perf_output_begin(&handle, &sample, event, in perf_event_bpf_output()
9210 bpf_event->event_id.header.size); in perf_event_bpf_output()
9214 perf_output_put(&handle, bpf_event->event_id); in perf_event_bpf_output()
9215 perf_event__output_id_sample(event, &handle, &sample); in perf_event_bpf_output()
9226 if (prog->aux->func_cnt == 0) { in perf_event_bpf_emit_ksymbols()
9228 (u64)(unsigned long)prog->bpf_func, in perf_event_bpf_emit_ksymbols()
9229 prog->jited_len, unregister, in perf_event_bpf_emit_ksymbols()
9230 prog->aux->ksym.name); in perf_event_bpf_emit_ksymbols()
9232 for (i = 0; i < prog->aux->func_cnt; i++) { in perf_event_bpf_emit_ksymbols()
9233 struct bpf_prog *subprog = prog->aux->func[i]; in perf_event_bpf_emit_ksymbols()
9237 (u64)(unsigned long)subprog->bpf_func, in perf_event_bpf_emit_ksymbols()
9238 subprog->jited_len, unregister, in perf_event_bpf_emit_ksymbols()
9239 subprog->aux->ksym.name); in perf_event_bpf_emit_ksymbols()
9276 .id = prog->aux->id, in perf_event_bpf_event()
9282 memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE); in perf_event_bpf_event()
9300 static int perf_event_text_poke_match(struct perf_event *event) in perf_event_text_poke_match() argument
9302 return event->attr.text_poke; in perf_event_text_poke_match()
9305 static void perf_event_text_poke_output(struct perf_event *event, void *data) in perf_event_text_poke_output() argument
9313 if (!perf_event_text_poke_match(event)) in perf_event_text_poke_output()
9316 perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event); in perf_event_text_poke_output()
9318 ret = perf_output_begin(&handle, &sample, event, in perf_event_text_poke_output()
9319 text_poke_event->event_id.header.size); in perf_event_text_poke_output()
9323 perf_output_put(&handle, text_poke_event->event_id); in perf_event_text_poke_output()
9324 perf_output_put(&handle, text_poke_event->old_len); in perf_event_text_poke_output()
9325 perf_output_put(&handle, text_poke_event->new_len); in perf_event_text_poke_output()
9327 __output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len); in perf_event_text_poke_output()
9328 __output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len); in perf_event_text_poke_output()
9330 if (text_poke_event->pad) in perf_event_text_poke_output()
9331 __output_copy(&handle, &padding, text_poke_event->pad); in perf_event_text_poke_output()
9333 perf_event__output_id_sample(event, &handle, &sample); in perf_event_text_poke_output()
9349 pad = ALIGN(tot, sizeof(u64)) - tot; in perf_event_text_poke()
9370 void perf_event_itrace_started(struct perf_event *event) in perf_event_itrace_started() argument
9372 event->attach_state |= PERF_ATTACH_ITRACE; in perf_event_itrace_started()
9375 static void perf_log_itrace_start(struct perf_event *event) in perf_log_itrace_start() argument
9386 if (event->parent) in perf_log_itrace_start()
9387 event = event->parent; in perf_log_itrace_start()
9389 if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) || in perf_log_itrace_start()
9390 event->attach_state & PERF_ATTACH_ITRACE) in perf_log_itrace_start()
9396 rec.pid = perf_event_pid(event, current); in perf_log_itrace_start()
9397 rec.tid = perf_event_tid(event, current); in perf_log_itrace_start()
9399 perf_event_header__init_id(&rec.header, &sample, event); in perf_log_itrace_start()
9400 ret = perf_output_begin(&handle, &sample, event, rec.header.size); in perf_log_itrace_start()
9406 perf_event__output_id_sample(event, &handle, &sample); in perf_log_itrace_start()
9411 void perf_report_aux_output_id(struct perf_event *event, u64 hw_id) in perf_report_aux_output_id() argument
9421 if (event->parent) in perf_report_aux_output_id()
9422 event = event->parent; in perf_report_aux_output_id()
9429 perf_event_header__init_id(&rec.header, &sample, event); in perf_report_aux_output_id()
9430 ret = perf_output_begin(&handle, &sample, event, rec.header.size); in perf_report_aux_output_id()
9436 perf_event__output_id_sample(event, &handle, &sample); in perf_report_aux_output_id()
9443 __perf_event_account_interrupt(struct perf_event *event, int throttle) in __perf_event_account_interrupt() argument
9445 struct hw_perf_event *hwc = &event->hw; in __perf_event_account_interrupt()
9450 if (seq != hwc->interrupts_seq) { in __perf_event_account_interrupt()
9451 hwc->interrupts_seq = seq; in __perf_event_account_interrupt()
9452 hwc->interrupts = 1; in __perf_event_account_interrupt()
9454 hwc->interrupts++; in __perf_event_account_interrupt()
9456 hwc->interrupts > max_samples_per_tick)) { in __perf_event_account_interrupt()
9459 hwc->interrupts = MAX_INTERRUPTS; in __perf_event_account_interrupt()
9460 perf_log_throttle(event, 0); in __perf_event_account_interrupt()
9465 if (event->attr.freq) { in __perf_event_account_interrupt()
9467 s64 delta = now - hwc->freq_time_stamp; in __perf_event_account_interrupt()
9469 hwc->freq_time_stamp = now; in __perf_event_account_interrupt()
9472 perf_adjust_period(event, delta, hwc->last_period, true); in __perf_event_account_interrupt()
9478 int perf_event_account_interrupt(struct perf_event *event) in perf_event_account_interrupt() argument
9480 return __perf_event_account_interrupt(event, 1); in perf_event_account_interrupt()
9483 static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) in sample_is_allowed() argument
9490 if (event->attr.exclude_kernel && !user_mode(regs)) in sample_is_allowed()
9497 * Generic event overflow handling, sampling.
9500 static int __perf_event_overflow(struct perf_event *event, in __perf_event_overflow() argument
9504 int events = atomic_read(&event->event_limit); in __perf_event_overflow()
9508 * Non-sampling counters might still use the PMI to fold short in __perf_event_overflow()
9511 if (unlikely(!is_sampling_event(event))) in __perf_event_overflow()
9514 ret = __perf_event_account_interrupt(event, throttle); in __perf_event_overflow()
9521 event->pending_kill = POLL_IN; in __perf_event_overflow()
9522 if (events && atomic_dec_and_test(&event->event_limit)) { in __perf_event_overflow()
9524 event->pending_kill = POLL_HUP; in __perf_event_overflow()
9525 perf_event_disable_inatomic(event); in __perf_event_overflow()
9528 if (event->attr.sigtrap) { in __perf_event_overflow()
9532 * it is the first event, on the other hand, we should also not in __perf_event_overflow()
9535 bool valid_sample = sample_is_allowed(event, regs); in __perf_event_overflow()
9540 if (!event->pending_sigtrap) { in __perf_event_overflow()
9541 event->pending_sigtrap = pending_id; in __perf_event_overflow()
9542 local_inc(&event->ctx->nr_pending); in __perf_event_overflow()
9543 } else if (event->attr.exclude_kernel && valid_sample) { in __perf_event_overflow()
9551 * 2. Events that can overflow again before the IRQ- in __perf_event_overflow()
9554 * check 32-bit hash of the current IP. in __perf_event_overflow()
9556 WARN_ON_ONCE(event->pending_sigtrap != pending_id); in __perf_event_overflow()
9559 event->pending_addr = 0; in __perf_event_overflow()
9560 if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) in __perf_event_overflow()
9561 event->pending_addr = data->addr; in __perf_event_overflow()
9562 irq_work_queue(&event->pending_irq); in __perf_event_overflow()
9565 READ_ONCE(event->overflow_handler)(event, data, regs); in __perf_event_overflow() local
9567 if (*perf_event_fasync(event) && event->pending_kill) { in __perf_event_overflow()
9568 event->pending_wakeup = 1; in __perf_event_overflow()
9569 irq_work_queue(&event->pending_irq); in __perf_event_overflow()
9575 int perf_event_overflow(struct perf_event *event, in perf_event_overflow() argument
9579 return __perf_event_overflow(event, 1, data, regs); in perf_event_overflow()
9583 * Generic software event infrastructure
9598 * We directly increment event->count and keep a second value in
9599 * event->hw.period_left to count intervals. This period event
9600 * is kept in the range [-sample_period, 0] so that we can use the
9604 u64 perf_swevent_set_period(struct perf_event *event) in perf_swevent_set_period() argument
9606 struct hw_perf_event *hwc = &event->hw; in perf_swevent_set_period()
9607 u64 period = hwc->last_period; in perf_swevent_set_period()
9611 hwc->last_period = hwc->sample_period; in perf_swevent_set_period()
9613 old = local64_read(&hwc->period_left); in perf_swevent_set_period()
9621 val -= offset; in perf_swevent_set_period()
9622 } while (!local64_try_cmpxchg(&hwc->period_left, &old, val)); in perf_swevent_set_period()
9627 static void perf_swevent_overflow(struct perf_event *event, u64 overflow, in perf_swevent_overflow() argument
9631 struct hw_perf_event *hwc = &event->hw; in perf_swevent_overflow()
9635 overflow = perf_swevent_set_period(event); in perf_swevent_overflow()
9637 if (hwc->interrupts == MAX_INTERRUPTS) in perf_swevent_overflow()
9640 for (; overflow; overflow--) { in perf_swevent_overflow()
9641 if (__perf_event_overflow(event, throttle, in perf_swevent_overflow()
9645 * hwc->interrupts == MAX_INTERRUPTS. in perf_swevent_overflow()
9653 static void perf_swevent_event(struct perf_event *event, u64 nr, in perf_swevent_event() argument
9657 struct hw_perf_event *hwc = &event->hw; in perf_swevent_event()
9659 local64_add(nr, &event->count); in perf_swevent_event()
9664 if (!is_sampling_event(event)) in perf_swevent_event()
9667 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) { in perf_swevent_event()
9668 data->period = nr; in perf_swevent_event()
9669 return perf_swevent_overflow(event, 1, data, regs); in perf_swevent_event()
9671 data->period = event->hw.last_period; in perf_swevent_event()
9673 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) in perf_swevent_event()
9674 return perf_swevent_overflow(event, 1, data, regs); in perf_swevent_event()
9676 if (local64_add_negative(nr, &hwc->period_left)) in perf_swevent_event()
9679 perf_swevent_overflow(event, 0, data, regs); in perf_swevent_event()
9682 static int perf_exclude_event(struct perf_event *event, in perf_exclude_event() argument
9685 if (event->hw.state & PERF_HES_STOPPED) in perf_exclude_event()
9689 if (event->attr.exclude_user && user_mode(regs)) in perf_exclude_event()
9692 if (event->attr.exclude_kernel && !user_mode(regs)) in perf_exclude_event()
9699 static int perf_swevent_match(struct perf_event *event, in perf_swevent_match() argument
9705 if (event->attr.type != type) in perf_swevent_match()
9708 if (event->attr.config != event_id) in perf_swevent_match()
9711 if (perf_exclude_event(event, regs)) in perf_swevent_match()
9729 return &hlist->heads[hash]; in __find_swevent_head()
9738 hlist = rcu_dereference(swhash->swevent_hlist); in find_swevent_head_rcu()
9745 /* For the event head insertion and removal in the hlist */
9747 find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) in find_swevent_head() argument
9750 u32 event_id = event->attr.config; in find_swevent_head()
9751 u64 type = event->attr.type; in find_swevent_head()
9754 * Event scheduling is always serialized against hlist allocation in find_swevent_head()
9758 hlist = rcu_dereference_protected(swhash->swevent_hlist, in find_swevent_head()
9759 lockdep_is_held(&event->ctx->lock)); in find_swevent_head()
9772 struct perf_event *event; in do_perf_sw_event() local
9780 hlist_for_each_entry_rcu(event, head, hlist_entry) { in do_perf_sw_event()
9781 if (perf_swevent_match(event, type, event_id, data, regs)) in do_perf_sw_event()
9782 perf_swevent_event(event, nr, data, regs); in do_perf_sw_event()
9794 return get_recursion_context(swhash->recursion); in perf_swevent_get_recursion_context()
9802 put_recursion_context(swhash->recursion, rctx); in perf_swevent_put_recursion_context()
9832 static void perf_swevent_read(struct perf_event *event) in perf_swevent_read() argument
9836 static int perf_swevent_add(struct perf_event *event, int flags) in perf_swevent_add() argument
9839 struct hw_perf_event *hwc = &event->hw; in perf_swevent_add()
9842 if (is_sampling_event(event)) { in perf_swevent_add()
9843 hwc->last_period = hwc->sample_period; in perf_swevent_add()
9844 perf_swevent_set_period(event); in perf_swevent_add()
9847 hwc->state = !(flags & PERF_EF_START); in perf_swevent_add()
9849 head = find_swevent_head(swhash, event); in perf_swevent_add()
9851 return -EINVAL; in perf_swevent_add()
9853 hlist_add_head_rcu(&event->hlist_entry, head); in perf_swevent_add()
9854 perf_event_update_userpage(event); in perf_swevent_add()
9859 static void perf_swevent_del(struct perf_event *event, int flags) in perf_swevent_del() argument
9861 hlist_del_rcu(&event->hlist_entry); in perf_swevent_del()
9864 static void perf_swevent_start(struct perf_event *event, int flags) in perf_swevent_start() argument
9866 event->hw.state = 0; in perf_swevent_start()
9869 static void perf_swevent_stop(struct perf_event *event, int flags) in perf_swevent_stop() argument
9871 event->hw.state = PERF_HES_STOPPED; in perf_swevent_stop()
9878 return rcu_dereference_protected(swhash->swevent_hlist, in swevent_hlist_deref()
9879 lockdep_is_held(&swhash->hlist_mutex)); in swevent_hlist_deref()
9889 RCU_INIT_POINTER(swhash->swevent_hlist, NULL); in swevent_hlist_release()
9897 mutex_lock(&swhash->hlist_mutex); in swevent_hlist_put_cpu()
9899 if (!--swhash->hlist_refcount) in swevent_hlist_put_cpu()
9902 mutex_unlock(&swhash->hlist_mutex); in swevent_hlist_put_cpu()
9918 mutex_lock(&swhash->hlist_mutex); in swevent_hlist_get_cpu()
9925 err = -ENOMEM; in swevent_hlist_get_cpu()
9928 rcu_assign_pointer(swhash->swevent_hlist, hlist); in swevent_hlist_get_cpu()
9930 swhash->hlist_refcount++; in swevent_hlist_get_cpu()
9932 mutex_unlock(&swhash->hlist_mutex); in swevent_hlist_get_cpu()
9963 static void sw_perf_event_destroy(struct perf_event *event) in sw_perf_event_destroy() argument
9965 u64 event_id = event->attr.config; in sw_perf_event_destroy()
9967 WARN_ON(event->parent); in sw_perf_event_destroy()
9976 static int perf_swevent_init(struct perf_event *event) in perf_swevent_init() argument
9978 u64 event_id = event->attr.config; in perf_swevent_init()
9980 if (event->attr.type != PERF_TYPE_SOFTWARE) in perf_swevent_init()
9981 return -ENOENT; in perf_swevent_init()
9986 if (has_branch_stack(event)) in perf_swevent_init()
9987 return -EOPNOTSUPP; in perf_swevent_init()
9991 event->attr.type = perf_cpu_clock.type; in perf_swevent_init()
9992 return -ENOENT; in perf_swevent_init()
9994 event->attr.type = perf_task_clock.type; in perf_swevent_init()
9995 return -ENOENT; in perf_swevent_init()
10002 return -ENOENT; in perf_swevent_init()
10004 if (!event->parent) { in perf_swevent_init()
10012 event->destroy = sw_perf_event_destroy; in perf_swevent_init()
10033 static void tp_perf_event_destroy(struct perf_event *event) in tp_perf_event_destroy() argument
10035 perf_trace_destroy(event); in tp_perf_event_destroy()
10038 static int perf_tp_event_init(struct perf_event *event) in perf_tp_event_init() argument
10042 if (event->attr.type != PERF_TYPE_TRACEPOINT) in perf_tp_event_init()
10043 return -ENOENT; in perf_tp_event_init()
10048 if (has_branch_stack(event)) in perf_tp_event_init()
10049 return -EOPNOTSUPP; in perf_tp_event_init()
10051 err = perf_trace_init(event); in perf_tp_event_init()
10055 event->destroy = tp_perf_event_destroy; in perf_tp_event_init()
10071 static int perf_tp_filter_match(struct perf_event *event, in perf_tp_filter_match() argument
10074 void *record = data->raw->frag.data; in perf_tp_filter_match()
10077 if (event->parent) in perf_tp_filter_match()
10078 event = event->parent; in perf_tp_filter_match()
10080 if (likely(!event->filter) || filter_match_preds(event->filter, record)) in perf_tp_filter_match()
10085 static int perf_tp_event_match(struct perf_event *event, in perf_tp_event_match() argument
10089 if (event->hw.state & PERF_HES_STOPPED) in perf_tp_event_match()
10092 * If exclude_kernel, only trace user-space tracepoints (uprobes) in perf_tp_event_match()
10094 if (event->attr.exclude_kernel && !user_mode(regs)) in perf_tp_event_match()
10097 if (!perf_tp_filter_match(event, data)) in perf_tp_event_match()
10115 perf_tp_event(call->event.type, count, raw_data, size, regs, head, in perf_trace_run_bpf_submit()
10123 struct perf_event *event) in __perf_tp_event_target_task() argument
10127 if (event->attr.config != entry->type) in __perf_tp_event_target_task()
10130 if (event->attr.sigtrap) in __perf_tp_event_target_task()
10132 if (perf_tp_event_match(event, data, regs)) in __perf_tp_event_target_task()
10133 perf_swevent_event(event, count, data, regs); in __perf_tp_event_target_task()
10143 struct perf_event *event, *sibling; in perf_tp_event_target_task() local
10145 perf_event_groups_for_cpu_pmu(event, &ctx->pinned_groups, cpu, pmu) { in perf_tp_event_target_task()
10146 __perf_tp_event_target_task(count, record, regs, data, event); in perf_tp_event_target_task()
10147 for_each_sibling_event(sibling, event) in perf_tp_event_target_task()
10151 perf_event_groups_for_cpu_pmu(event, &ctx->flexible_groups, cpu, pmu) { in perf_tp_event_target_task()
10152 __perf_tp_event_target_task(count, record, regs, data, event); in perf_tp_event_target_task()
10153 for_each_sibling_event(sibling, event) in perf_tp_event_target_task()
10163 struct perf_event *event; in perf_tp_event() local
10177 hlist_for_each_entry_rcu(event, head, hlist_entry) { in perf_tp_event()
10178 if (perf_tp_event_match(event, &data, regs)) { in perf_tp_event()
10179 perf_swevent_event(event, count, &data, regs); in perf_tp_event()
10182 * Here use the same on-stack perf_sample_data, in perf_tp_event()
10183 * some members in data are event-specific and in perf_tp_event()
10184 * need to be re-computed for different sweveents. in perf_tp_event()
10185 * Re-initialize data->sample_flags safely to avoid in perf_tp_event()
10186 * the problem that next event skips preparing data in perf_tp_event()
10187 * because data->sample_flags is set. in perf_tp_event()
10196 * deliver this event there too. in perf_tp_event()
10202 ctx = rcu_dereference(task->perf_event_ctxp); in perf_tp_event()
10206 raw_spin_lock(&ctx->lock); in perf_tp_event()
10208 raw_spin_unlock(&ctx->lock); in perf_tp_event()
10235 PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
10257 static int perf_kprobe_event_init(struct perf_event *event);
10269 static int perf_kprobe_event_init(struct perf_event *event) in perf_kprobe_event_init() argument
10274 if (event->attr.type != perf_kprobe.type) in perf_kprobe_event_init()
10275 return -ENOENT; in perf_kprobe_event_init()
10278 return -EACCES; in perf_kprobe_event_init()
10283 if (has_branch_stack(event)) in perf_kprobe_event_init()
10284 return -EOPNOTSUPP; in perf_kprobe_event_init()
10286 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; in perf_kprobe_event_init()
10287 err = perf_kprobe_init(event, is_retprobe); in perf_kprobe_event_init()
10291 event->destroy = perf_kprobe_destroy; in perf_kprobe_event_init()
10298 PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
10316 static int perf_uprobe_event_init(struct perf_event *event);
10328 static int perf_uprobe_event_init(struct perf_event *event) in perf_uprobe_event_init() argument
10334 if (event->attr.type != perf_uprobe.type) in perf_uprobe_event_init()
10335 return -ENOENT; in perf_uprobe_event_init()
10338 return -EACCES; in perf_uprobe_event_init()
10343 if (has_branch_stack(event)) in perf_uprobe_event_init()
10344 return -EOPNOTSUPP; in perf_uprobe_event_init()
10346 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; in perf_uprobe_event_init()
10347 ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT; in perf_uprobe_event_init()
10348 err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe); in perf_uprobe_event_init()
10352 event->destroy = perf_uprobe_destroy; in perf_uprobe_event_init()
10362 perf_pmu_register(&perf_kprobe, "kprobe", -1); in perf_tp_register()
10365 perf_pmu_register(&perf_uprobe, "uprobe", -1); in perf_tp_register()
10369 static void perf_event_free_filter(struct perf_event *event) in perf_event_free_filter() argument
10371 ftrace_profile_free_filter(event); in perf_event_free_filter()
10375 static void bpf_overflow_handler(struct perf_event *event, in bpf_overflow_handler() argument
10381 .event = event, in bpf_overflow_handler()
10390 prog = READ_ONCE(event->prog); in bpf_overflow_handler()
10392 perf_prepare_sample(data, event, regs); in bpf_overflow_handler()
10401 event->orig_overflow_handler(event, data, regs); in bpf_overflow_handler()
10404 static int perf_event_set_bpf_handler(struct perf_event *event, in perf_event_set_bpf_handler() argument
10408 if (event->overflow_handler_context) in perf_event_set_bpf_handler()
10410 return -EINVAL; in perf_event_set_bpf_handler()
10412 if (event->prog) in perf_event_set_bpf_handler()
10413 return -EEXIST; in perf_event_set_bpf_handler()
10415 if (prog->type != BPF_PROG_TYPE_PERF_EVENT) in perf_event_set_bpf_handler()
10416 return -EINVAL; in perf_event_set_bpf_handler()
10418 if (event->attr.precise_ip && in perf_event_set_bpf_handler()
10419 prog->call_get_stack && in perf_event_set_bpf_handler()
10420 (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) || in perf_event_set_bpf_handler()
10421 event->attr.exclude_callchain_kernel || in perf_event_set_bpf_handler()
10422 event->attr.exclude_callchain_user)) { in perf_event_set_bpf_handler()
10432 return -EPROTO; in perf_event_set_bpf_handler()
10435 event->prog = prog; in perf_event_set_bpf_handler()
10436 event->bpf_cookie = bpf_cookie; in perf_event_set_bpf_handler()
10437 event->orig_overflow_handler = READ_ONCE(event->overflow_handler); in perf_event_set_bpf_handler()
10438 WRITE_ONCE(event->overflow_handler, bpf_overflow_handler); in perf_event_set_bpf_handler()
10442 static void perf_event_free_bpf_handler(struct perf_event *event) in perf_event_free_bpf_handler() argument
10444 struct bpf_prog *prog = event->prog; in perf_event_free_bpf_handler()
10449 WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler); in perf_event_free_bpf_handler()
10450 event->prog = NULL; in perf_event_free_bpf_handler()
10454 static int perf_event_set_bpf_handler(struct perf_event *event, in perf_event_set_bpf_handler() argument
10458 return -EOPNOTSUPP; in perf_event_set_bpf_handler()
10460 static void perf_event_free_bpf_handler(struct perf_event *event) in perf_event_free_bpf_handler() argument
10466 * returns true if the event is a tracepoint, or a kprobe/upprobe created
10469 static inline bool perf_event_is_tracing(struct perf_event *event) in perf_event_is_tracing() argument
10471 if (event->pmu == &perf_tracepoint) in perf_event_is_tracing()
10474 if (event->pmu == &perf_kprobe) in perf_event_is_tracing()
10478 if (event->pmu == &perf_uprobe) in perf_event_is_tracing()
10484 int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, in perf_event_set_bpf_prog() argument
10489 if (!perf_event_is_tracing(event)) in perf_event_set_bpf_prog()
10490 return perf_event_set_bpf_handler(event, prog, bpf_cookie); in perf_event_set_bpf_prog()
10492 is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_KPROBE; in perf_event_set_bpf_prog()
10493 is_uprobe = event->tp_event->flags & TRACE_EVENT_FL_UPROBE; in perf_event_set_bpf_prog()
10494 is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; in perf_event_set_bpf_prog()
10495 is_syscall_tp = is_syscall_trace_event(event->tp_event); in perf_event_set_bpf_prog()
10498 return -EINVAL; in perf_event_set_bpf_prog()
10500 if (((is_kprobe || is_uprobe) && prog->type != BPF_PROG_TYPE_KPROBE) || in perf_event_set_bpf_prog()
10501 (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) || in perf_event_set_bpf_prog()
10502 (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) in perf_event_set_bpf_prog()
10503 return -EINVAL; in perf_event_set_bpf_prog()
10505 if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe) in perf_event_set_bpf_prog()
10507 return -EINVAL; in perf_event_set_bpf_prog()
10510 if (prog->kprobe_override && !is_kprobe) in perf_event_set_bpf_prog()
10511 return -EINVAL; in perf_event_set_bpf_prog()
10514 int off = trace_event_get_offsets(event->tp_event); in perf_event_set_bpf_prog()
10516 if (prog->aux->max_ctx_offset > off) in perf_event_set_bpf_prog()
10517 return -EACCES; in perf_event_set_bpf_prog()
10520 return perf_event_attach_bpf_prog(event, prog, bpf_cookie); in perf_event_set_bpf_prog()
10523 void perf_event_free_bpf_prog(struct perf_event *event) in perf_event_free_bpf_prog() argument
10525 if (!perf_event_is_tracing(event)) { in perf_event_free_bpf_prog()
10526 perf_event_free_bpf_handler(event); in perf_event_free_bpf_prog()
10529 perf_event_detach_bpf_prog(event); in perf_event_free_bpf_prog()
10538 static void perf_event_free_filter(struct perf_event *event) in perf_event_free_filter() argument
10542 int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, in perf_event_set_bpf_prog() argument
10545 return -ENOENT; in perf_event_set_bpf_prog()
10548 void perf_event_free_bpf_prog(struct perf_event *event) in perf_event_free_bpf_prog() argument
10559 perf_sample_data_init(&sample, bp->attr.bp_addr, 0); in perf_bp_event()
10561 if (!bp->hw.state && !perf_exclude_event(bp, regs)) in perf_bp_event()
10570 perf_addr_filter_new(struct perf_event *event, struct list_head *filters) in perf_addr_filter_new() argument
10572 int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu); in perf_addr_filter_new()
10579 INIT_LIST_HEAD(&filter->entry); in perf_addr_filter_new()
10580 list_add_tail(&filter->entry, filters); in perf_addr_filter_new()
10590 path_put(&filter->path); in free_filters_list()
10591 list_del(&filter->entry); in free_filters_list()
10599 static void perf_addr_filters_splice(struct perf_event *event, in perf_addr_filters_splice() argument
10605 if (!has_addr_filter(event)) in perf_addr_filters_splice()
10609 if (event->parent) in perf_addr_filters_splice()
10612 raw_spin_lock_irqsave(&event->addr_filters.lock, flags); in perf_addr_filters_splice()
10614 list_splice_init(&event->addr_filters.list, &list); in perf_addr_filters_splice()
10616 list_splice(head, &event->addr_filters.list); in perf_addr_filters_splice()
10618 raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags); in perf_addr_filters_splice()
10636 if (!vma->vm_file) in perf_addr_filter_apply()
10645 * Update event's address range filters based on the
10648 static void perf_event_addr_filters_apply(struct perf_event *event) in perf_event_addr_filters_apply() argument
10650 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); in perf_event_addr_filters_apply()
10651 struct task_struct *task = READ_ONCE(event->ctx->task); in perf_event_addr_filters_apply()
10658 * We may observe TASK_TOMBSTONE, which means that the event tear-down in perf_event_addr_filters_apply()
10664 if (ifh->nr_file_filters) { in perf_event_addr_filters_apply()
10672 raw_spin_lock_irqsave(&ifh->lock, flags); in perf_event_addr_filters_apply()
10673 list_for_each_entry(filter, &ifh->list, entry) { in perf_event_addr_filters_apply()
10674 if (filter->path.dentry) { in perf_event_addr_filters_apply()
10679 event->addr_filter_ranges[count].start = 0; in perf_event_addr_filters_apply()
10680 event->addr_filter_ranges[count].size = 0; in perf_event_addr_filters_apply()
10682 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); in perf_event_addr_filters_apply()
10684 event->addr_filter_ranges[count].start = filter->offset; in perf_event_addr_filters_apply()
10685 event->addr_filter_ranges[count].size = filter->size; in perf_event_addr_filters_apply()
10691 event->addr_filters_gen++; in perf_event_addr_filters_apply()
10692 raw_spin_unlock_irqrestore(&ifh->lock, flags); in perf_event_addr_filters_apply()
10694 if (ifh->nr_file_filters) { in perf_event_addr_filters_apply()
10701 perf_event_stop(event, 1); in perf_event_addr_filters_apply()
10724 IF_ACT_NONE = -1,
10755 perf_event_parse_addr_filter(struct perf_event *event, char *fstr, in perf_event_parse_addr_filter() argument
10763 int ret = -EINVAL; in perf_event_parse_addr_filter()
10767 return -ENOMEM; in perf_event_parse_addr_filter()
10775 ret = -EINVAL; in perf_event_parse_addr_filter()
10782 filter = perf_addr_filter_new(event, filters); in perf_event_parse_addr_filter()
10795 filter->action = actions[token]; in perf_event_parse_addr_filter()
10810 ret = kstrtoul(args[0].from, 0, &filter->offset); in perf_event_parse_addr_filter()
10816 ret = kstrtoul(args[1].from, 0, &filter->size); in perf_event_parse_addr_filter()
10827 ret = -ENOMEM; in perf_event_parse_addr_filter()
10841 * Make sure that it doesn't contradict itself or the event's in perf_event_parse_addr_filter()
10845 ret = -EINVAL; in perf_event_parse_addr_filter()
10848 * ACTION "filter" must have a non-zero length region in perf_event_parse_addr_filter()
10851 if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER && in perf_event_parse_addr_filter()
10852 !filter->size) in perf_event_parse_addr_filter()
10860 * For now, we only support file-based filters in perf_event_parse_addr_filter()
10861 * in per-task events; doing so for CPU-wide in perf_event_parse_addr_filter()
10867 ret = -EOPNOTSUPP; in perf_event_parse_addr_filter()
10868 if (!event->ctx->task) in perf_event_parse_addr_filter()
10873 &filter->path); in perf_event_parse_addr_filter()
10877 ret = -EINVAL; in perf_event_parse_addr_filter()
10878 if (!filter->path.dentry || in perf_event_parse_addr_filter()
10879 !S_ISREG(d_inode(filter->path.dentry) in perf_event_parse_addr_filter()
10880 ->i_mode)) in perf_event_parse_addr_filter()
10883 event->addr_filters.nr_file_filters++; in perf_event_parse_addr_filter()
10912 perf_event_set_addr_filter(struct perf_event *event, char *filter_str) in perf_event_set_addr_filter() argument
10921 lockdep_assert_held(&event->ctx->mutex); in perf_event_set_addr_filter()
10923 if (WARN_ON_ONCE(event->parent)) in perf_event_set_addr_filter()
10924 return -EINVAL; in perf_event_set_addr_filter()
10926 ret = perf_event_parse_addr_filter(event, filter_str, &filters); in perf_event_set_addr_filter()
10930 ret = event->pmu->addr_filters_validate(&filters); in perf_event_set_addr_filter()
10935 perf_addr_filters_splice(event, &filters); in perf_event_set_addr_filter()
10938 perf_event_for_each_child(event, perf_event_addr_filters_apply); in perf_event_set_addr_filter()
10946 event->addr_filters.nr_file_filters = 0; in perf_event_set_addr_filter()
10951 static int perf_event_set_filter(struct perf_event *event, void __user *arg) in perf_event_set_filter() argument
10953 int ret = -EINVAL; in perf_event_set_filter()
10961 if (perf_event_is_tracing(event)) { in perf_event_set_filter()
10962 struct perf_event_context *ctx = event->ctx; in perf_event_set_filter()
10967 * the tracepoint muck will deadlock against ctx->mutex, but in perf_event_set_filter()
10969 * temporarily drop ctx->mutex. As per perf_event_ctx_lock() we in perf_event_set_filter()
10972 * This can result in event getting moved to a different ctx, in perf_event_set_filter()
10975 mutex_unlock(&ctx->mutex); in perf_event_set_filter()
10976 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); in perf_event_set_filter()
10977 mutex_lock(&ctx->mutex); in perf_event_set_filter()
10980 if (has_addr_filter(event)) in perf_event_set_filter()
10981 ret = perf_event_set_addr_filter(event, filter_str); in perf_event_set_filter()
10996 struct perf_event *event; in perf_swevent_hrtimer() local
10999 event = container_of(hrtimer, struct perf_event, hw.hrtimer); in perf_swevent_hrtimer()
11001 if (event->state != PERF_EVENT_STATE_ACTIVE) in perf_swevent_hrtimer()
11004 event->pmu->read(event); in perf_swevent_hrtimer()
11006 perf_sample_data_init(&data, 0, event->hw.last_period); in perf_swevent_hrtimer()
11009 if (regs && !perf_exclude_event(event, regs)) { in perf_swevent_hrtimer()
11010 if (!(event->attr.exclude_idle && is_idle_task(current))) in perf_swevent_hrtimer()
11011 if (__perf_event_overflow(event, 1, &data, regs)) in perf_swevent_hrtimer()
11015 period = max_t(u64, 10000, event->hw.sample_period); in perf_swevent_hrtimer()
11021 static void perf_swevent_start_hrtimer(struct perf_event *event) in perf_swevent_start_hrtimer() argument
11023 struct hw_perf_event *hwc = &event->hw; in perf_swevent_start_hrtimer()
11026 if (!is_sampling_event(event)) in perf_swevent_start_hrtimer()
11029 period = local64_read(&hwc->period_left); in perf_swevent_start_hrtimer()
11034 local64_set(&hwc->period_left, 0); in perf_swevent_start_hrtimer()
11036 period = max_t(u64, 10000, hwc->sample_period); in perf_swevent_start_hrtimer()
11038 hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), in perf_swevent_start_hrtimer()
11042 static void perf_swevent_cancel_hrtimer(struct perf_event *event) in perf_swevent_cancel_hrtimer() argument
11044 struct hw_perf_event *hwc = &event->hw; in perf_swevent_cancel_hrtimer()
11046 if (is_sampling_event(event)) { in perf_swevent_cancel_hrtimer()
11047 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); in perf_swevent_cancel_hrtimer()
11048 local64_set(&hwc->period_left, ktime_to_ns(remaining)); in perf_swevent_cancel_hrtimer()
11050 hrtimer_cancel(&hwc->hrtimer); in perf_swevent_cancel_hrtimer()
11054 static void perf_swevent_init_hrtimer(struct perf_event *event) in perf_swevent_init_hrtimer() argument
11056 struct hw_perf_event *hwc = &event->hw; in perf_swevent_init_hrtimer()
11058 if (!is_sampling_event(event)) in perf_swevent_init_hrtimer()
11061 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); in perf_swevent_init_hrtimer()
11062 hwc->hrtimer.function = perf_swevent_hrtimer; in perf_swevent_init_hrtimer()
11065 * Since hrtimers have a fixed rate, we can do a static freq->period in perf_swevent_init_hrtimer()
11068 if (event->attr.freq) { in perf_swevent_init_hrtimer()
11069 long freq = event->attr.sample_freq; in perf_swevent_init_hrtimer()
11071 event->attr.sample_period = NSEC_PER_SEC / freq; in perf_swevent_init_hrtimer()
11072 hwc->sample_period = event->attr.sample_period; in perf_swevent_init_hrtimer()
11073 local64_set(&hwc->period_left, hwc->sample_period); in perf_swevent_init_hrtimer()
11074 hwc->last_period = hwc->sample_period; in perf_swevent_init_hrtimer()
11075 event->attr.freq = 0; in perf_swevent_init_hrtimer()
11080 * Software event: cpu wall time clock
11083 static void cpu_clock_event_update(struct perf_event *event) in cpu_clock_event_update() argument
11089 prev = local64_xchg(&event->hw.prev_count, now); in cpu_clock_event_update()
11090 local64_add(now - prev, &event->count); in cpu_clock_event_update()
11093 static void cpu_clock_event_start(struct perf_event *event, int flags) in cpu_clock_event_start() argument
11095 local64_set(&event->hw.prev_count, local_clock()); in cpu_clock_event_start()
11096 perf_swevent_start_hrtimer(event); in cpu_clock_event_start()
11099 static void cpu_clock_event_stop(struct perf_event *event, int flags) in cpu_clock_event_stop() argument
11101 perf_swevent_cancel_hrtimer(event); in cpu_clock_event_stop()
11102 cpu_clock_event_update(event); in cpu_clock_event_stop()
11105 static int cpu_clock_event_add(struct perf_event *event, int flags) in cpu_clock_event_add() argument
11108 cpu_clock_event_start(event, flags); in cpu_clock_event_add()
11109 perf_event_update_userpage(event); in cpu_clock_event_add()
11114 static void cpu_clock_event_del(struct perf_event *event, int flags) in cpu_clock_event_del() argument
11116 cpu_clock_event_stop(event, flags); in cpu_clock_event_del()
11119 static void cpu_clock_event_read(struct perf_event *event) in cpu_clock_event_read() argument
11121 cpu_clock_event_update(event); in cpu_clock_event_read()
11124 static int cpu_clock_event_init(struct perf_event *event) in cpu_clock_event_init() argument
11126 if (event->attr.type != perf_cpu_clock.type) in cpu_clock_event_init()
11127 return -ENOENT; in cpu_clock_event_init()
11129 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) in cpu_clock_event_init()
11130 return -ENOENT; in cpu_clock_event_init()
11135 if (has_branch_stack(event)) in cpu_clock_event_init()
11136 return -EOPNOTSUPP; in cpu_clock_event_init()
11138 perf_swevent_init_hrtimer(event); in cpu_clock_event_init()
11158 * Software event: task time clock
11161 static void task_clock_event_update(struct perf_event *event, u64 now) in task_clock_event_update() argument
11166 prev = local64_xchg(&event->hw.prev_count, now); in task_clock_event_update()
11167 delta = now - prev; in task_clock_event_update()
11168 local64_add(delta, &event->count); in task_clock_event_update()
11171 static void task_clock_event_start(struct perf_event *event, int flags) in task_clock_event_start() argument
11173 local64_set(&event->hw.prev_count, event->ctx->time); in task_clock_event_start()
11174 perf_swevent_start_hrtimer(event); in task_clock_event_start()
11177 static void task_clock_event_stop(struct perf_event *event, int flags) in task_clock_event_stop() argument
11179 perf_swevent_cancel_hrtimer(event); in task_clock_event_stop()
11180 task_clock_event_update(event, event->ctx->time); in task_clock_event_stop()
11183 static int task_clock_event_add(struct perf_event *event, int flags) in task_clock_event_add() argument
11186 task_clock_event_start(event, flags); in task_clock_event_add()
11187 perf_event_update_userpage(event); in task_clock_event_add()
11192 static void task_clock_event_del(struct perf_event *event, int flags) in task_clock_event_del() argument
11194 task_clock_event_stop(event, PERF_EF_UPDATE); in task_clock_event_del()
11197 static void task_clock_event_read(struct perf_event *event) in task_clock_event_read() argument
11200 u64 delta = now - event->ctx->timestamp; in task_clock_event_read()
11201 u64 time = event->ctx->time + delta; in task_clock_event_read()
11203 task_clock_event_update(event, time); in task_clock_event_read()
11206 static int task_clock_event_init(struct perf_event *event) in task_clock_event_init() argument
11208 if (event->attr.type != perf_task_clock.type) in task_clock_event_init()
11209 return -ENOENT; in task_clock_event_init()
11211 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) in task_clock_event_init()
11212 return -ENOENT; in task_clock_event_init()
11217 if (has_branch_stack(event)) in task_clock_event_init()
11218 return -EOPNOTSUPP; in task_clock_event_init()
11220 perf_swevent_init_hrtimer(event); in task_clock_event_init()
11252 static int perf_event_nop_int(struct perf_event *event, u64 value) in perf_event_nop_int() argument
11294 static int perf_event_idx_default(struct perf_event *event) in perf_event_idx_default() argument
11301 free_percpu(pmu->cpu_pmu_context); in free_pmu_context()
11313 return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters); in nr_addr_filters_show()
11324 return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->type); in type_show()
11335 return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->hrtimer_interval_ms); in perf_event_mux_interval_ms_show()
11353 return -EINVAL; in perf_event_mux_interval_ms_store()
11356 if (timer == pmu->hrtimer_interval_ms) in perf_event_mux_interval_ms_store()
11360 pmu->hrtimer_interval_ms = timer; in perf_event_mux_interval_ms_store()
11366 cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu); in perf_event_mux_interval_ms_store()
11367 cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); in perf_event_mux_interval_ms_store()
11398 int ret = -ENOMEM; in pmu_dev_alloc()
11400 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL); in pmu_dev_alloc()
11401 if (!pmu->dev) in pmu_dev_alloc()
11404 pmu->dev->groups = pmu->attr_groups; in pmu_dev_alloc()
11405 device_initialize(pmu->dev); in pmu_dev_alloc()
11407 dev_set_drvdata(pmu->dev, pmu); in pmu_dev_alloc()
11408 pmu->dev->bus = &pmu_bus; in pmu_dev_alloc()
11409 pmu->dev->parent = pmu->parent; in pmu_dev_alloc()
11410 pmu->dev->release = pmu_dev_release; in pmu_dev_alloc()
11412 ret = dev_set_name(pmu->dev, "%s", pmu->name); in pmu_dev_alloc()
11416 ret = device_add(pmu->dev); in pmu_dev_alloc()
11421 if (pmu->nr_addr_filters) in pmu_dev_alloc()
11422 ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); in pmu_dev_alloc()
11427 if (pmu->attr_update) in pmu_dev_alloc()
11428 ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); in pmu_dev_alloc()
11437 device_del(pmu->dev); in pmu_dev_alloc()
11440 put_device(pmu->dev); in pmu_dev_alloc()
11452 ret = -ENOMEM; in perf_pmu_register()
11453 pmu->pmu_disable_count = alloc_percpu(int); in perf_pmu_register()
11454 if (!pmu->pmu_disable_count) in perf_pmu_register()
11457 pmu->type = -1; in perf_pmu_register()
11459 ret = -EINVAL; in perf_pmu_register()
11463 pmu->name = name; in perf_pmu_register()
11475 pmu->type = type; in perf_pmu_register()
11477 if (pmu_bus_running && !pmu->dev) { in perf_pmu_register()
11483 ret = -ENOMEM; in perf_pmu_register()
11484 pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context); in perf_pmu_register()
11485 if (!pmu->cpu_pmu_context) in perf_pmu_register()
11491 cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu); in perf_pmu_register()
11492 __perf_init_event_pmu_context(&cpc->epc, pmu); in perf_pmu_register()
11496 if (!pmu->start_txn) { in perf_pmu_register()
11497 if (pmu->pmu_enable) { in perf_pmu_register()
11503 pmu->start_txn = perf_pmu_start_txn; in perf_pmu_register()
11504 pmu->commit_txn = perf_pmu_commit_txn; in perf_pmu_register()
11505 pmu->cancel_txn = perf_pmu_cancel_txn; in perf_pmu_register()
11507 pmu->start_txn = perf_pmu_nop_txn; in perf_pmu_register()
11508 pmu->commit_txn = perf_pmu_nop_int; in perf_pmu_register()
11509 pmu->cancel_txn = perf_pmu_nop_void; in perf_pmu_register()
11513 if (!pmu->pmu_enable) { in perf_pmu_register()
11514 pmu->pmu_enable = perf_pmu_nop_void; in perf_pmu_register()
11515 pmu->pmu_disable = perf_pmu_nop_void; in perf_pmu_register()
11518 if (!pmu->check_period) in perf_pmu_register()
11519 pmu->check_period = perf_event_nop_int; in perf_pmu_register()
11521 if (!pmu->event_idx) in perf_pmu_register()
11522 pmu->event_idx = perf_event_idx_default; in perf_pmu_register()
11524 list_add_rcu(&pmu->entry, &pmus); in perf_pmu_register()
11525 atomic_set(&pmu->exclusive_cnt, 0); in perf_pmu_register()
11533 if (pmu->dev && pmu->dev != PMU_NULL_DEV) { in perf_pmu_register()
11534 device_del(pmu->dev); in perf_pmu_register()
11535 put_device(pmu->dev); in perf_pmu_register()
11539 idr_remove(&pmu_idr, pmu->type); in perf_pmu_register()
11542 free_percpu(pmu->pmu_disable_count); in perf_pmu_register()
11550 list_del_rcu(&pmu->entry); in perf_pmu_unregister()
11559 free_percpu(pmu->pmu_disable_count); in perf_pmu_unregister()
11560 idr_remove(&pmu_idr, pmu->type); in perf_pmu_unregister()
11561 if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) { in perf_pmu_unregister()
11562 if (pmu->nr_addr_filters) in perf_pmu_unregister()
11563 device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); in perf_pmu_unregister()
11564 device_del(pmu->dev); in perf_pmu_unregister()
11565 put_device(pmu->dev); in perf_pmu_unregister()
11572 static inline bool has_extended_regs(struct perf_event *event) in has_extended_regs() argument
11574 return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) || in has_extended_regs()
11575 (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK); in has_extended_regs()
11578 static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) in perf_try_init_event() argument
11583 if (!try_module_get(pmu->module)) in perf_try_init_event()
11584 return -ENODEV; in perf_try_init_event()
11587 * A number of pmu->event_init() methods iterate the sibling_list to, in perf_try_init_event()
11589 * if this is a sibling event, acquire the ctx->mutex to protect in perf_try_init_event()
11592 if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) { in perf_try_init_event()
11594 * This ctx->mutex can nest when we're called through in perf_try_init_event()
11597 ctx = perf_event_ctx_lock_nested(event->group_leader, in perf_try_init_event()
11602 event->pmu = pmu; in perf_try_init_event()
11603 ret = pmu->event_init(event); in perf_try_init_event()
11606 perf_event_ctx_unlock(event->group_leader, ctx); in perf_try_init_event()
11609 if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) && in perf_try_init_event()
11610 has_extended_regs(event)) in perf_try_init_event()
11611 ret = -EOPNOTSUPP; in perf_try_init_event()
11613 if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && in perf_try_init_event()
11614 event_has_any_exclude_flag(event)) in perf_try_init_event()
11615 ret = -EINVAL; in perf_try_init_event()
11617 if (ret && event->destroy) in perf_try_init_event()
11618 event->destroy(event); in perf_try_init_event()
11622 module_put(pmu->module); in perf_try_init_event()
11627 static struct pmu *perf_init_event(struct perf_event *event) in perf_init_event() argument
11636 * Save original type before calling pmu->event_init() since certain in perf_init_event()
11637 * pmus overwrites event->attr.type to forward event to another pmu. in perf_init_event()
11639 event->orig_type = event->attr.type; in perf_init_event()
11642 if (event->parent && event->parent->pmu) { in perf_init_event()
11643 pmu = event->parent->pmu; in perf_init_event()
11644 ret = perf_try_init_event(pmu, event); in perf_init_event()
11653 type = event->attr.type; in perf_init_event()
11655 type = event->attr.config >> PERF_PMU_TYPE_SHIFT; in perf_init_event()
11660 event->attr.config &= PERF_HW_EVENT_MASK; in perf_init_event()
11669 if (event->attr.type != type && type != PERF_TYPE_RAW && in perf_init_event()
11670 !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)) in perf_init_event()
11673 ret = perf_try_init_event(pmu, event); in perf_init_event()
11674 if (ret == -ENOENT && event->attr.type != type && !extended_type) { in perf_init_event()
11675 type = event->attr.type; in perf_init_event()
11686 ret = perf_try_init_event(pmu, event); in perf_init_event()
11690 if (ret != -ENOENT) { in perf_init_event()
11696 pmu = ERR_PTR(-ENOENT); in perf_init_event()
11703 static void attach_sb_event(struct perf_event *event) in attach_sb_event() argument
11705 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu); in attach_sb_event()
11707 raw_spin_lock(&pel->lock); in attach_sb_event()
11708 list_add_rcu(&event->sb_list, &pel->list); in attach_sb_event()
11709 raw_spin_unlock(&pel->lock); in attach_sb_event()
11713 * We keep a list of all !task (and therefore per-cpu) events
11714 * that need to receive side-band records.
11716 * This avoids having to scan all the various PMU per-cpu contexts
11719 static void account_pmu_sb_event(struct perf_event *event) in account_pmu_sb_event() argument
11721 if (is_sb_event(event)) in account_pmu_sb_event()
11722 attach_sb_event(event); in account_pmu_sb_event()
11746 static void account_event(struct perf_event *event) in account_event() argument
11750 if (event->parent) in account_event()
11753 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) in account_event()
11755 if (event->attr.mmap || event->attr.mmap_data) in account_event()
11757 if (event->attr.build_id) in account_event()
11759 if (event->attr.comm) in account_event()
11761 if (event->attr.namespaces) in account_event()
11763 if (event->attr.cgroup) in account_event()
11765 if (event->attr.task) in account_event()
11767 if (event->attr.freq) in account_event()
11769 if (event->attr.context_switch) { in account_event()
11773 if (has_branch_stack(event)) in account_event()
11775 if (is_cgroup_event(event)) in account_event()
11777 if (event->attr.ksymbol) in account_event()
11779 if (event->attr.bpf_event) in account_event()
11781 if (event->attr.text_poke) in account_event()
11805 * increments to by-pass the mutex. in account_event()
11812 account_pmu_sb_event(event); in account_event()
11816 * Allocate and initialize an event structure
11827 struct perf_event *event; in perf_event_alloc() local
11829 long err = -EINVAL; in perf_event_alloc()
11833 if (!task || cpu != -1) in perf_event_alloc()
11834 return ERR_PTR(-EINVAL); in perf_event_alloc()
11836 if (attr->sigtrap && !task) { in perf_event_alloc()
11838 return ERR_PTR(-EINVAL); in perf_event_alloc()
11841 node = (cpu >= 0) ? cpu_to_node(cpu) : -1; in perf_event_alloc()
11842 event = kmem_cache_alloc_node(perf_event_cache, GFP_KERNEL | __GFP_ZERO, in perf_event_alloc()
11844 if (!event) in perf_event_alloc()
11845 return ERR_PTR(-ENOMEM); in perf_event_alloc()
11852 group_leader = event; in perf_event_alloc()
11854 mutex_init(&event->child_mutex); in perf_event_alloc()
11855 INIT_LIST_HEAD(&event->child_list); in perf_event_alloc()
11857 INIT_LIST_HEAD(&event->event_entry); in perf_event_alloc()
11858 INIT_LIST_HEAD(&event->sibling_list); in perf_event_alloc()
11859 INIT_LIST_HEAD(&event->active_list); in perf_event_alloc()
11860 init_event_group(event); in perf_event_alloc()
11861 INIT_LIST_HEAD(&event->rb_entry); in perf_event_alloc()
11862 INIT_LIST_HEAD(&event->active_entry); in perf_event_alloc()
11863 INIT_LIST_HEAD(&event->addr_filters.list); in perf_event_alloc()
11864 INIT_HLIST_NODE(&event->hlist_entry); in perf_event_alloc()
11867 init_waitqueue_head(&event->waitq); in perf_event_alloc()
11868 init_irq_work(&event->pending_irq, perf_pending_irq); in perf_event_alloc()
11869 init_task_work(&event->pending_task, perf_pending_task); in perf_event_alloc()
11871 mutex_init(&event->mmap_mutex); in perf_event_alloc()
11872 raw_spin_lock_init(&event->addr_filters.lock); in perf_event_alloc()
11874 atomic_long_set(&event->refcount, 1); in perf_event_alloc()
11875 event->cpu = cpu; in perf_event_alloc()
11876 event->attr = *attr; in perf_event_alloc()
11877 event->group_leader = group_leader; in perf_event_alloc()
11878 event->pmu = NULL; in perf_event_alloc()
11879 event->oncpu = -1; in perf_event_alloc()
11881 event->parent = parent_event; in perf_event_alloc()
11883 event->ns = get_pid_ns(task_active_pid_ns(current)); in perf_event_alloc()
11884 event->id = atomic64_inc_return(&perf_event_id); in perf_event_alloc()
11886 event->state = PERF_EVENT_STATE_INACTIVE; in perf_event_alloc()
11889 event->event_caps = parent_event->event_caps; in perf_event_alloc()
11892 event->attach_state = PERF_ATTACH_TASK; in perf_event_alloc()
11898 event->hw.target = get_task_struct(task); in perf_event_alloc()
11901 event->clock = &local_clock; in perf_event_alloc()
11903 event->clock = parent_event->clock; in perf_event_alloc()
11906 overflow_handler = parent_event->overflow_handler; in perf_event_alloc()
11907 context = parent_event->overflow_handler_context; in perf_event_alloc()
11910 struct bpf_prog *prog = parent_event->prog; in perf_event_alloc()
11913 event->prog = prog; in perf_event_alloc()
11914 event->orig_overflow_handler = in perf_event_alloc()
11915 parent_event->orig_overflow_handler; in perf_event_alloc()
11921 event->overflow_handler = overflow_handler; in perf_event_alloc()
11922 event->overflow_handler_context = context; in perf_event_alloc()
11923 } else if (is_write_backward(event)){ in perf_event_alloc()
11924 event->overflow_handler = perf_event_output_backward; in perf_event_alloc()
11925 event->overflow_handler_context = NULL; in perf_event_alloc()
11927 event->overflow_handler = perf_event_output_forward; in perf_event_alloc()
11928 event->overflow_handler_context = NULL; in perf_event_alloc()
11931 perf_event__state_init(event); in perf_event_alloc()
11935 hwc = &event->hw; in perf_event_alloc()
11936 hwc->sample_period = attr->sample_period; in perf_event_alloc()
11937 if (attr->freq && attr->sample_freq) in perf_event_alloc()
11938 hwc->sample_period = 1; in perf_event_alloc()
11939 hwc->last_period = hwc->sample_period; in perf_event_alloc()
11941 local64_set(&hwc->period_left, hwc->sample_period); in perf_event_alloc()
11947 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) in perf_event_alloc()
11950 if (!has_branch_stack(event)) in perf_event_alloc()
11951 event->attr.branch_sample_type = 0; in perf_event_alloc()
11953 pmu = perf_init_event(event); in perf_event_alloc()
11960 * Disallow uncore-task events. Similarly, disallow uncore-cgroup in perf_event_alloc()
11964 if (pmu->task_ctx_nr == perf_invalid_context && (task || cgroup_fd != -1)) { in perf_event_alloc()
11965 err = -EINVAL; in perf_event_alloc()
11969 if (event->attr.aux_output && in perf_event_alloc()
11970 !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) { in perf_event_alloc()
11971 err = -EOPNOTSUPP; in perf_event_alloc()
11975 if (cgroup_fd != -1) { in perf_event_alloc()
11976 err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); in perf_event_alloc()
11981 err = exclusive_event_init(event); in perf_event_alloc()
11985 if (has_addr_filter(event)) { in perf_event_alloc()
11986 event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters, in perf_event_alloc()
11989 if (!event->addr_filter_ranges) { in perf_event_alloc()
11990 err = -ENOMEM; in perf_event_alloc()
11998 if (event->parent) { in perf_event_alloc()
11999 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); in perf_event_alloc()
12001 raw_spin_lock_irq(&ifh->lock); in perf_event_alloc()
12002 memcpy(event->addr_filter_ranges, in perf_event_alloc()
12003 event->parent->addr_filter_ranges, in perf_event_alloc()
12004 pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range)); in perf_event_alloc()
12005 raw_spin_unlock_irq(&ifh->lock); in perf_event_alloc()
12009 event->addr_filters_gen = 1; in perf_event_alloc()
12012 if (!event->parent) { in perf_event_alloc()
12013 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { in perf_event_alloc()
12014 err = get_callchain_buffers(attr->sample_max_stack); in perf_event_alloc()
12020 err = security_perf_event_alloc(event); in perf_event_alloc()
12025 account_event(event); in perf_event_alloc()
12027 return event; in perf_event_alloc()
12030 if (!event->parent) { in perf_event_alloc()
12031 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) in perf_event_alloc()
12035 kfree(event->addr_filter_ranges); in perf_event_alloc()
12038 exclusive_event_destroy(event); in perf_event_alloc()
12041 if (is_cgroup_event(event)) in perf_event_alloc()
12042 perf_detach_cgroup(event); in perf_event_alloc()
12043 if (event->destroy) in perf_event_alloc()
12044 event->destroy(event); in perf_event_alloc()
12045 module_put(pmu->module); in perf_event_alloc()
12047 if (event->hw.target) in perf_event_alloc()
12048 put_task_struct(event->hw.target); in perf_event_alloc()
12049 call_rcu(&event->rcu_head, free_event_rcu); in perf_event_alloc()
12063 ret = get_user(size, &uattr->size); in perf_copy_attr()
12075 if (ret == -E2BIG) in perf_copy_attr()
12080 attr->size = size; in perf_copy_attr()
12082 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) in perf_copy_attr()
12083 return -EINVAL; in perf_copy_attr()
12085 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) in perf_copy_attr()
12086 return -EINVAL; in perf_copy_attr()
12088 if (attr->read_format & ~(PERF_FORMAT_MAX-1)) in perf_copy_attr()
12089 return -EINVAL; in perf_copy_attr()
12091 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { in perf_copy_attr()
12092 u64 mask = attr->branch_sample_type; in perf_copy_attr()
12095 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) in perf_copy_attr()
12096 return -EINVAL; in perf_copy_attr()
12100 return -EINVAL; in perf_copy_attr()
12106 if (!attr->exclude_kernel) in perf_copy_attr()
12109 if (!attr->exclude_user) in perf_copy_attr()
12112 if (!attr->exclude_hv) in perf_copy_attr()
12117 attr->branch_sample_type = mask; in perf_copy_attr()
12127 if (attr->sample_type & PERF_SAMPLE_REGS_USER) { in perf_copy_attr()
12128 ret = perf_reg_validate(attr->sample_regs_user); in perf_copy_attr()
12133 if (attr->sample_type & PERF_SAMPLE_STACK_USER) { in perf_copy_attr()
12135 return -ENOSYS; in perf_copy_attr()
12142 if (attr->sample_stack_user >= USHRT_MAX) in perf_copy_attr()
12143 return -EINVAL; in perf_copy_attr()
12144 else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) in perf_copy_attr()
12145 return -EINVAL; in perf_copy_attr()
12148 if (!attr->sample_max_stack) in perf_copy_attr()
12149 attr->sample_max_stack = sysctl_perf_event_max_stack; in perf_copy_attr()
12151 if (attr->sample_type & PERF_SAMPLE_REGS_INTR) in perf_copy_attr()
12152 ret = perf_reg_validate(attr->sample_regs_intr); in perf_copy_attr()
12155 if (attr->sample_type & PERF_SAMPLE_CGROUP) in perf_copy_attr()
12156 return -EINVAL; in perf_copy_attr()
12158 if ((attr->sample_type & PERF_SAMPLE_WEIGHT) && in perf_copy_attr()
12159 (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) in perf_copy_attr()
12160 return -EINVAL; in perf_copy_attr()
12162 if (!attr->inherit && attr->inherit_thread) in perf_copy_attr()
12163 return -EINVAL; in perf_copy_attr()
12165 if (attr->remove_on_exec && attr->enable_on_exec) in perf_copy_attr()
12166 return -EINVAL; in perf_copy_attr()
12168 if (attr->sigtrap && !attr->remove_on_exec) in perf_copy_attr()
12169 return -EINVAL; in perf_copy_attr()
12175 put_user(sizeof(*attr), &uattr->size); in perf_copy_attr()
12176 ret = -E2BIG; in perf_copy_attr()
12190 perf_event_set_output(struct perf_event *event, struct perf_event *output_event) in perf_event_set_output() argument
12193 int ret = -EINVAL; in perf_event_set_output()
12196 mutex_lock(&event->mmap_mutex); in perf_event_set_output()
12201 if (event == output_event) in perf_event_set_output()
12205 * Don't allow cross-cpu buffers in perf_event_set_output()
12207 if (output_event->cpu != event->cpu) in perf_event_set_output()
12211 * If its not a per-cpu rb, it must be the same task. in perf_event_set_output()
12213 if (output_event->cpu == -1 && output_event->hw.target != event->hw.target) in perf_event_set_output()
12219 if (output_event->clock != event->clock) in perf_event_set_output()
12226 if (is_write_backward(output_event) != is_write_backward(event)) in perf_event_set_output()
12232 if (has_aux(event) && has_aux(output_event) && in perf_event_set_output()
12233 event->pmu != output_event->pmu) in perf_event_set_output()
12238 * output_event is already on rb->event_list, and the list iteration in perf_event_set_output()
12239 * restarts after every removal, it is guaranteed this new event is in perf_event_set_output()
12241 * observe !rb->mmap_count. in perf_event_set_output()
12243 mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); in perf_event_set_output()
12246 if (atomic_read(&event->mmap_count)) in perf_event_set_output()
12256 if (!atomic_read(&rb->mmap_count)) { in perf_event_set_output()
12262 ring_buffer_attach(event, rb); in perf_event_set_output()
12266 mutex_unlock(&event->mmap_mutex); in perf_event_set_output()
12268 mutex_unlock(&output_event->mmap_mutex); in perf_event_set_output()
12274 static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) in perf_event_set_clock() argument
12280 event->clock = &ktime_get_mono_fast_ns; in perf_event_set_clock()
12285 event->clock = &ktime_get_raw_fast_ns; in perf_event_set_clock()
12290 event->clock = &ktime_get_real_ns; in perf_event_set_clock()
12294 event->clock = &ktime_get_boottime_ns; in perf_event_set_clock()
12298 event->clock = &ktime_get_clocktai_ns; in perf_event_set_clock()
12302 return -EINVAL; in perf_event_set_clock()
12305 if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI)) in perf_event_set_clock()
12306 return -EINVAL; in perf_event_set_clock()
12317 if (attr->sigtrap) { in perf_check_permission()
12323 is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); in perf_check_permission()
12343 * sys_perf_event_open - open a performance event, associate it to a task/cpu
12348 * @group_fd: group leader event fd
12349 * @flags: perf event open flags
12357 struct perf_event *event, *sibling; in SYSCALL_DEFINE5() local
12368 int cgroup_fd = -1; in SYSCALL_DEFINE5()
12372 return -EINVAL; in SYSCALL_DEFINE5()
12391 return -EACCES; in SYSCALL_DEFINE5()
12396 return -EINVAL; in SYSCALL_DEFINE5()
12399 return -EINVAL; in SYSCALL_DEFINE5()
12422 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) in SYSCALL_DEFINE5()
12423 return -EINVAL; in SYSCALL_DEFINE5()
12432 if (group_fd != -1) { in SYSCALL_DEFINE5()
12436 group_leader = group.file->private_data; in SYSCALL_DEFINE5()
12443 if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) { in SYSCALL_DEFINE5()
12452 group_leader->attr.inherit != attr.inherit) { in SYSCALL_DEFINE5()
12453 err = -EINVAL; in SYSCALL_DEFINE5()
12460 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, in SYSCALL_DEFINE5()
12462 if (IS_ERR(event)) { in SYSCALL_DEFINE5()
12463 err = PTR_ERR(event); in SYSCALL_DEFINE5()
12467 if (is_sampling_event(event)) { in SYSCALL_DEFINE5()
12468 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { in SYSCALL_DEFINE5()
12469 err = -EOPNOTSUPP; in SYSCALL_DEFINE5()
12478 pmu = event->pmu; in SYSCALL_DEFINE5()
12481 err = perf_event_set_clock(event, attr.clockid); in SYSCALL_DEFINE5()
12486 if (pmu->task_ctx_nr == perf_sw_context) in SYSCALL_DEFINE5()
12487 event->event_caps |= PERF_EV_CAP_SOFTWARE; in SYSCALL_DEFINE5()
12490 err = down_read_interruptible(&task->signal->exec_update_lock); in SYSCALL_DEFINE5()
12496 * perf_install_in_context() call for this new event to in SYSCALL_DEFINE5()
12500 err = -EACCES; in SYSCALL_DEFINE5()
12508 ctx = find_get_context(task, event); in SYSCALL_DEFINE5()
12514 mutex_lock(&ctx->mutex); in SYSCALL_DEFINE5()
12516 if (ctx->task == TASK_TOMBSTONE) { in SYSCALL_DEFINE5()
12517 err = -ESRCH; in SYSCALL_DEFINE5()
12523 * Check if the @cpu we're creating an event for is online. in SYSCALL_DEFINE5()
12528 struct perf_cpu_context *cpuctx = per_cpu_ptr(&perf_cpu_context, event->cpu); in SYSCALL_DEFINE5()
12530 if (!cpuctx->online) { in SYSCALL_DEFINE5()
12531 err = -ENODEV; in SYSCALL_DEFINE5()
12537 err = -EINVAL; in SYSCALL_DEFINE5()
12541 * becoming part of another group-sibling): in SYSCALL_DEFINE5()
12543 if (group_leader->group_leader != group_leader) in SYSCALL_DEFINE5()
12547 if (group_leader->clock != event->clock) in SYSCALL_DEFINE5()
12555 if (group_leader->cpu != event->cpu) in SYSCALL_DEFINE5()
12561 if (group_leader->ctx != ctx) in SYSCALL_DEFINE5()
12570 if (is_software_event(event) && in SYSCALL_DEFINE5()
12573 * If the event is a sw event, but the group_leader in SYSCALL_DEFINE5()
12583 pmu = group_leader->pmu_ctx->pmu; in SYSCALL_DEFINE5()
12584 } else if (!is_software_event(event)) { in SYSCALL_DEFINE5()
12586 (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { in SYSCALL_DEFINE5()
12589 * try to add a hardware event, move the whole group to in SYSCALL_DEFINE5()
12597 group_leader->pmu_ctx->pmu != pmu) in SYSCALL_DEFINE5()
12605 pmu_ctx = find_get_pmu_context(pmu, ctx, event); in SYSCALL_DEFINE5()
12610 event->pmu_ctx = pmu_ctx; in SYSCALL_DEFINE5()
12613 err = perf_event_set_output(event, output_event); in SYSCALL_DEFINE5()
12618 if (!perf_event_validate_size(event)) { in SYSCALL_DEFINE5()
12619 err = -E2BIG; in SYSCALL_DEFINE5()
12623 if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader)) { in SYSCALL_DEFINE5()
12624 err = -EINVAL; in SYSCALL_DEFINE5()
12630 * because we need to serialize with concurrent event creation. in SYSCALL_DEFINE5()
12632 if (!exclusive_event_installable(event, ctx)) { in SYSCALL_DEFINE5()
12633 err = -EBUSY; in SYSCALL_DEFINE5()
12637 WARN_ON_ONCE(ctx->parent_ctx); in SYSCALL_DEFINE5()
12639 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, f_flags); in SYSCALL_DEFINE5()
12653 put_pmu_ctx(group_leader->pmu_ctx); in SYSCALL_DEFINE5()
12657 put_pmu_ctx(sibling->pmu_ctx); in SYSCALL_DEFINE5()
12664 * (through the sibling list, which is still in-tact), we can in SYSCALL_DEFINE5()
12667 * By installing siblings first we NO-OP because they're not in SYSCALL_DEFINE5()
12671 sibling->pmu_ctx = pmu_ctx; in SYSCALL_DEFINE5()
12674 perf_install_in_context(ctx, sibling, sibling->cpu); in SYSCALL_DEFINE5()
12679 * event. What we want here is event in the initial in SYSCALL_DEFINE5()
12682 group_leader->pmu_ctx = pmu_ctx; in SYSCALL_DEFINE5()
12685 perf_install_in_context(ctx, group_leader, group_leader->cpu); in SYSCALL_DEFINE5()
12691 * perf_install_in_context() which is the point the event is active and in SYSCALL_DEFINE5()
12694 perf_event__header_size(event); in SYSCALL_DEFINE5()
12695 perf_event__id_header_size(event); in SYSCALL_DEFINE5()
12697 event->owner = current; in SYSCALL_DEFINE5()
12699 perf_install_in_context(ctx, event, event->cpu); in SYSCALL_DEFINE5()
12702 mutex_unlock(&ctx->mutex); in SYSCALL_DEFINE5()
12705 up_read(&task->signal->exec_update_lock); in SYSCALL_DEFINE5()
12709 mutex_lock(¤t->perf_event_mutex); in SYSCALL_DEFINE5()
12710 list_add_tail(&event->owner_entry, ¤t->perf_event_list); in SYSCALL_DEFINE5()
12711 mutex_unlock(¤t->perf_event_mutex); in SYSCALL_DEFINE5()
12715 * new event on the sibling_list. This ensures destruction in SYSCALL_DEFINE5()
12724 put_pmu_ctx(event->pmu_ctx); in SYSCALL_DEFINE5()
12725 event->pmu_ctx = NULL; /* _free_event() */ in SYSCALL_DEFINE5()
12727 mutex_unlock(&ctx->mutex); in SYSCALL_DEFINE5()
12732 up_read(&task->signal->exec_update_lock); in SYSCALL_DEFINE5()
12734 free_event(event); in SYSCALL_DEFINE5()
12751 * @overflow_handler: callback to trigger when we hit the event
12762 struct perf_event *event; in perf_event_create_kernel_counter() local
12770 if (attr->aux_output) in perf_event_create_kernel_counter()
12771 return ERR_PTR(-EINVAL); in perf_event_create_kernel_counter()
12773 event = perf_event_alloc(attr, cpu, task, NULL, NULL, in perf_event_create_kernel_counter()
12774 overflow_handler, context, -1); in perf_event_create_kernel_counter()
12775 if (IS_ERR(event)) { in perf_event_create_kernel_counter()
12776 err = PTR_ERR(event); in perf_event_create_kernel_counter()
12781 event->owner = TASK_TOMBSTONE; in perf_event_create_kernel_counter()
12782 pmu = event->pmu; in perf_event_create_kernel_counter()
12784 if (pmu->task_ctx_nr == perf_sw_context) in perf_event_create_kernel_counter()
12785 event->event_caps |= PERF_EV_CAP_SOFTWARE; in perf_event_create_kernel_counter()
12790 ctx = find_get_context(task, event); in perf_event_create_kernel_counter()
12796 WARN_ON_ONCE(ctx->parent_ctx); in perf_event_create_kernel_counter()
12797 mutex_lock(&ctx->mutex); in perf_event_create_kernel_counter()
12798 if (ctx->task == TASK_TOMBSTONE) { in perf_event_create_kernel_counter()
12799 err = -ESRCH; in perf_event_create_kernel_counter()
12803 pmu_ctx = find_get_pmu_context(pmu, ctx, event); in perf_event_create_kernel_counter()
12808 event->pmu_ctx = pmu_ctx; in perf_event_create_kernel_counter()
12812 * Check if the @cpu we're creating an event for is online. in perf_event_create_kernel_counter()
12819 if (!cpuctx->online) { in perf_event_create_kernel_counter()
12820 err = -ENODEV; in perf_event_create_kernel_counter()
12825 if (!exclusive_event_installable(event, ctx)) { in perf_event_create_kernel_counter()
12826 err = -EBUSY; in perf_event_create_kernel_counter()
12830 perf_install_in_context(ctx, event, event->cpu); in perf_event_create_kernel_counter()
12832 mutex_unlock(&ctx->mutex); in perf_event_create_kernel_counter()
12834 return event; in perf_event_create_kernel_counter()
12838 event->pmu_ctx = NULL; /* _free_event() */ in perf_event_create_kernel_counter()
12840 mutex_unlock(&ctx->mutex); in perf_event_create_kernel_counter()
12844 free_event(event); in perf_event_create_kernel_counter()
12855 struct perf_event *event, *sibling; in __perf_pmu_remove() local
12857 perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) { in __perf_pmu_remove()
12858 perf_remove_from_context(event, 0); in __perf_pmu_remove()
12859 put_pmu_ctx(event->pmu_ctx); in __perf_pmu_remove()
12860 list_add(&event->migrate_entry, events); in __perf_pmu_remove()
12862 for_each_sibling_event(sibling, event) { in __perf_pmu_remove()
12864 put_pmu_ctx(sibling->pmu_ctx); in __perf_pmu_remove()
12865 list_add(&sibling->migrate_entry, events); in __perf_pmu_remove()
12872 int cpu, struct perf_event *event) in __perf_pmu_install_event() argument
12876 event->cpu = cpu; in __perf_pmu_install_event()
12877 epc = find_get_pmu_context(pmu, ctx, event); in __perf_pmu_install_event()
12878 event->pmu_ctx = epc; in __perf_pmu_install_event()
12880 if (event->state >= PERF_EVENT_STATE_OFF) in __perf_pmu_install_event()
12881 event->state = PERF_EVENT_STATE_INACTIVE; in __perf_pmu_install_event()
12882 perf_install_in_context(ctx, event, cpu); in __perf_pmu_install_event()
12888 struct perf_event *event, *tmp; in __perf_pmu_install() local
12891 * Re-instate events in 2 passes. in __perf_pmu_install()
12898 list_for_each_entry_safe(event, tmp, events, migrate_entry) { in __perf_pmu_install()
12899 if (event->group_leader == event) in __perf_pmu_install()
12902 list_del(&event->migrate_entry); in __perf_pmu_install()
12903 __perf_pmu_install_event(pmu, ctx, cpu, event); in __perf_pmu_install()
12910 list_for_each_entry_safe(event, tmp, events, migrate_entry) { in __perf_pmu_install()
12911 list_del(&event->migrate_entry); in __perf_pmu_install()
12912 __perf_pmu_install_event(pmu, ctx, cpu, event); in __perf_pmu_install()
12921 src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx; in perf_pmu_migrate_context()
12922 dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx; in perf_pmu_migrate_context()
12928 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); in perf_pmu_migrate_context()
12930 __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->pinned_groups, &events); in perf_pmu_migrate_context()
12931 __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->flexible_groups, &events); in perf_pmu_migrate_context()
12935 * Wait for the events to quiesce before re-instating them. in perf_pmu_migrate_context()
12942 mutex_unlock(&dst_ctx->mutex); in perf_pmu_migrate_context()
12943 mutex_unlock(&src_ctx->mutex); in perf_pmu_migrate_context()
12949 struct perf_event *parent_event = child_event->parent; in sync_child_event()
12952 if (child_event->attr.inherit_stat) { in sync_child_event()
12953 struct task_struct *task = child_event->ctx->task; in sync_child_event()
12964 atomic64_add(child_val, &parent_event->child_count); in sync_child_event()
12965 atomic64_add(child_event->total_time_enabled, in sync_child_event()
12966 &parent_event->child_total_time_enabled); in sync_child_event()
12967 atomic64_add(child_event->total_time_running, in sync_child_event()
12968 &parent_event->child_total_time_running); in sync_child_event()
12972 perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) in perf_event_exit_event() argument
12974 struct perf_event *parent_event = event->parent; in perf_event_exit_event()
12991 mutex_lock(&parent_event->child_mutex); in perf_event_exit_event()
12994 perf_remove_from_context(event, detach_flags); in perf_event_exit_event()
12996 raw_spin_lock_irq(&ctx->lock); in perf_event_exit_event()
12997 if (event->state > PERF_EVENT_STATE_EXIT) in perf_event_exit_event()
12998 perf_event_set_state(event, PERF_EVENT_STATE_EXIT); in perf_event_exit_event()
12999 raw_spin_unlock_irq(&ctx->lock); in perf_event_exit_event()
13005 mutex_unlock(&parent_event->child_mutex); in perf_event_exit_event()
13010 free_event(event); in perf_event_exit_event()
13018 perf_event_wakeup(event); in perf_event_exit_event()
13033 * In order to reduce the amount of tricky in ctx tear-down, we hold in perf_event_exit_task_context()
13042 mutex_lock(&child_ctx->mutex); in perf_event_exit_task_context()
13045 * In a single ctx::lock section, de-schedule the events and detach the in perf_event_exit_task_context()
13049 raw_spin_lock_irq(&child_ctx->lock); in perf_event_exit_task_context()
13053 * Now that the context is inactive, destroy the task <-> ctx relation in perf_event_exit_task_context()
13056 RCU_INIT_POINTER(child->perf_event_ctxp, NULL); in perf_event_exit_task_context()
13058 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE); in perf_event_exit_task_context()
13062 raw_spin_unlock_irq(&child_ctx->lock); in perf_event_exit_task_context()
13074 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) in perf_event_exit_task_context()
13077 mutex_unlock(&child_ctx->mutex); in perf_event_exit_task_context()
13083 * When a child task exits, feed back event values to parent events.
13090 struct perf_event *event, *tmp; in perf_event_exit_task() local
13092 mutex_lock(&child->perf_event_mutex); in perf_event_exit_task()
13093 list_for_each_entry_safe(event, tmp, &child->perf_event_list, in perf_event_exit_task()
13095 list_del_init(&event->owner_entry); in perf_event_exit_task()
13100 * we need to serialize on the owner->perf_event_mutex. in perf_event_exit_task()
13102 smp_store_release(&event->owner, NULL); in perf_event_exit_task()
13104 mutex_unlock(&child->perf_event_mutex); in perf_event_exit_task()
13111 * child contexts and sets child->perf_event_ctxp[] to NULL. in perf_event_exit_task()
13117 static void perf_free_event(struct perf_event *event, in perf_free_event() argument
13120 struct perf_event *parent = event->parent; in perf_free_event()
13125 mutex_lock(&parent->child_mutex); in perf_free_event()
13126 list_del_init(&event->child_list); in perf_free_event()
13127 mutex_unlock(&parent->child_mutex); in perf_free_event()
13131 raw_spin_lock_irq(&ctx->lock); in perf_free_event()
13132 perf_group_detach(event); in perf_free_event()
13133 list_del_event(event, ctx); in perf_free_event()
13134 raw_spin_unlock_irq(&ctx->lock); in perf_free_event()
13135 free_event(event); in perf_free_event()
13148 struct perf_event *event, *tmp; in perf_event_free_task() local
13150 ctx = rcu_access_pointer(task->perf_event_ctxp); in perf_event_free_task()
13154 mutex_lock(&ctx->mutex); in perf_event_free_task()
13155 raw_spin_lock_irq(&ctx->lock); in perf_event_free_task()
13157 * Destroy the task <-> ctx relation and mark the context dead. in perf_event_free_task()
13162 RCU_INIT_POINTER(task->perf_event_ctxp, NULL); in perf_event_free_task()
13163 WRITE_ONCE(ctx->task, TASK_TOMBSTONE); in perf_event_free_task()
13165 raw_spin_unlock_irq(&ctx->lock); in perf_event_free_task()
13168 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) in perf_event_free_task()
13169 perf_free_event(event, ctx); in perf_event_free_task()
13171 mutex_unlock(&ctx->mutex); in perf_event_free_task()
13182 * _free_event()'s put_task_struct(event->hw.target) will be a in perf_event_free_task()
13183 * use-after-free. in perf_event_free_task()
13187 wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1); in perf_event_free_task()
13193 WARN_ON_ONCE(task->perf_event_ctxp); in perf_event_delayed_put()
13200 return ERR_PTR(-EBADF); in perf_event_get()
13202 if (file->f_op != &perf_fops) { in perf_event_get()
13204 return ERR_PTR(-EBADF); in perf_event_get()
13212 if (file->f_op != &perf_fops) in perf_get_event()
13213 return ERR_PTR(-EINVAL); in perf_get_event()
13215 return file->private_data; in perf_get_event()
13218 const struct perf_event_attr *perf_event_attrs(struct perf_event *event) in perf_event_attrs() argument
13220 if (!event) in perf_event_attrs()
13221 return ERR_PTR(-EINVAL); in perf_event_attrs()
13223 return &event->attr; in perf_event_attrs()
13227 * Inherit an event from parent task to child task.
13230 * - valid pointer on success
13231 * - NULL for orphaned events
13232 * - IS_ERR() on error
13242 enum perf_event_state parent_state = parent_event->state; in inherit_event()
13253 if (parent_event->parent) in inherit_event()
13254 parent_event = parent_event->parent; in inherit_event()
13256 child_event = perf_event_alloc(&parent_event->attr, in inherit_event()
13257 parent_event->cpu, in inherit_event()
13260 NULL, NULL, -1); in inherit_event()
13264 pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); in inherit_event()
13269 child_event->pmu_ctx = pmu_ctx; in inherit_event()
13272 * is_orphaned_event() and list_add_tail(&parent_event->child_list) in inherit_event()
13277 mutex_lock(&parent_event->child_mutex); in inherit_event()
13279 !atomic_long_inc_not_zero(&parent_event->refcount)) { in inherit_event()
13280 mutex_unlock(&parent_event->child_mutex); in inherit_event()
13289 * Make the child state follow the state of the parent event, in inherit_event()
13294 child_event->state = PERF_EVENT_STATE_INACTIVE; in inherit_event()
13296 child_event->state = PERF_EVENT_STATE_OFF; in inherit_event()
13298 if (parent_event->attr.freq) { in inherit_event()
13299 u64 sample_period = parent_event->hw.sample_period; in inherit_event()
13300 struct hw_perf_event *hwc = &child_event->hw; in inherit_event()
13302 hwc->sample_period = sample_period; in inherit_event()
13303 hwc->last_period = sample_period; in inherit_event()
13305 local64_set(&hwc->period_left, sample_period); in inherit_event()
13308 child_event->ctx = child_ctx; in inherit_event()
13309 child_event->overflow_handler = parent_event->overflow_handler; in inherit_event()
13310 child_event->overflow_handler_context in inherit_event()
13311 = parent_event->overflow_handler_context; in inherit_event()
13322 raw_spin_lock_irqsave(&child_ctx->lock, flags); in inherit_event()
13324 child_event->attach_state |= PERF_ATTACH_CHILD; in inherit_event()
13325 raw_spin_unlock_irqrestore(&child_ctx->lock, flags); in inherit_event()
13328 * Link this into the parent event's child list in inherit_event()
13330 list_add_tail(&child_event->child_list, &parent_event->child_list); in inherit_event()
13331 mutex_unlock(&parent_event->child_mutex); in inherit_event()
13337 * Inherits an event group.
13343 * - 0 on success
13344 * - <0 on error
13371 if (sub->aux_event == parent_event && child_ctr && in inherit_group()
13373 return -EINVAL; in inherit_group()
13376 leader->group_generation = parent_event->group_generation; in inherit_group()
13381 * Creates the child task context and tries to inherit the event-group.
13384 * inherited_all set when we 'fail' to inherit an orphaned event; this is
13388 * - 0 on success
13389 * - <0 on error
13392 inherit_task_group(struct perf_event *event, struct task_struct *parent, in inherit_task_group() argument
13400 if (!event->attr.inherit || in inherit_task_group()
13401 (event->attr.inherit_thread && !(clone_flags & CLONE_THREAD)) || in inherit_task_group()
13403 (event->attr.sigtrap && (clone_flags & CLONE_CLEAR_SIGHAND))) { in inherit_task_group()
13408 child_ctx = child->perf_event_ctxp; in inherit_task_group()
13418 return -ENOMEM; in inherit_task_group()
13420 child->perf_event_ctxp = child_ctx; in inherit_task_group()
13423 ret = inherit_group(event, parent, parent_ctx, child, child_ctx); in inherit_task_group()
13437 struct perf_event *event; in perf_event_init_context() local
13443 if (likely(!parent->perf_event_ctxp)) in perf_event_init_context()
13456 * it non-NULL earlier, the only reason for it to become NULL in perf_event_init_context()
13462 * Lock the parent list. No need to lock the child - not PID in perf_event_init_context()
13465 mutex_lock(&parent_ctx->mutex); in perf_event_init_context()
13468 * We dont have to disable NMIs - we are only looking at in perf_event_init_context()
13471 perf_event_groups_for_each(event, &parent_ctx->pinned_groups) { in perf_event_init_context()
13472 ret = inherit_task_group(event, parent, parent_ctx, in perf_event_init_context()
13479 * We can't hold ctx->lock when iterating the ->flexible_group list due in perf_event_init_context()
13483 raw_spin_lock_irqsave(&parent_ctx->lock, flags); in perf_event_init_context()
13484 parent_ctx->rotate_disable = 1; in perf_event_init_context()
13485 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); in perf_event_init_context()
13487 perf_event_groups_for_each(event, &parent_ctx->flexible_groups) { in perf_event_init_context()
13488 ret = inherit_task_group(event, parent, parent_ctx, in perf_event_init_context()
13494 raw_spin_lock_irqsave(&parent_ctx->lock, flags); in perf_event_init_context()
13495 parent_ctx->rotate_disable = 0; in perf_event_init_context()
13497 child_ctx = child->perf_event_ctxp; in perf_event_init_context()
13505 * parent_ctx->lock avoids it from being uncloned. in perf_event_init_context()
13507 cloned_ctx = parent_ctx->parent_ctx; in perf_event_init_context()
13509 child_ctx->parent_ctx = cloned_ctx; in perf_event_init_context()
13510 child_ctx->parent_gen = parent_ctx->parent_gen; in perf_event_init_context()
13512 child_ctx->parent_ctx = parent_ctx; in perf_event_init_context()
13513 child_ctx->parent_gen = parent_ctx->generation; in perf_event_init_context()
13515 get_ctx(child_ctx->parent_ctx); in perf_event_init_context()
13518 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); in perf_event_init_context()
13520 mutex_unlock(&parent_ctx->mutex); in perf_event_init_context()
13535 child->perf_event_ctxp = NULL; in perf_event_init_task()
13536 mutex_init(&child->perf_event_mutex); in perf_event_init_task()
13537 INIT_LIST_HEAD(&child->perf_event_list); in perf_event_init_task()
13558 mutex_init(&swhash->hlist_mutex); in perf_event_init_all_cpus()
13566 __perf_event_init_context(&cpuctx->ctx); in perf_event_init_all_cpus()
13567 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); in perf_event_init_all_cpus()
13568 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); in perf_event_init_all_cpus()
13569 cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask); in perf_event_init_all_cpus()
13570 cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default); in perf_event_init_all_cpus()
13571 cpuctx->heap = cpuctx->heap_default; in perf_event_init_all_cpus()
13579 mutex_lock(&swhash->hlist_mutex); in perf_swevent_init_cpu()
13580 if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) { in perf_swevent_init_cpu()
13585 rcu_assign_pointer(swhash->swevent_hlist, hlist); in perf_swevent_init_cpu()
13587 mutex_unlock(&swhash->hlist_mutex); in perf_swevent_init_cpu()
13595 struct perf_event *event; in __perf_event_exit_context() local
13597 raw_spin_lock(&ctx->lock); in __perf_event_exit_context()
13599 list_for_each_entry(event, &ctx->event_list, event_entry) in __perf_event_exit_context()
13600 __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP); in __perf_event_exit_context()
13601 raw_spin_unlock(&ctx->lock); in __perf_event_exit_context()
13609 // XXX simplify cpuctx->online in perf_event_exit_cpu_context()
13612 ctx = &cpuctx->ctx; in perf_event_exit_cpu_context()
13614 mutex_lock(&ctx->mutex); in perf_event_exit_cpu_context()
13616 cpuctx->online = 0; in perf_event_exit_cpu_context()
13617 mutex_unlock(&ctx->mutex); in perf_event_exit_cpu_context()
13637 ctx = &cpuctx->ctx; in perf_event_init_cpu()
13639 mutex_lock(&ctx->mutex); in perf_event_init_cpu()
13640 cpuctx->online = 1; in perf_event_init_cpu()
13641 mutex_unlock(&ctx->mutex); in perf_event_init_cpu()
13682 perf_pmu_register(&perf_cpu_clock, "cpu_clock", -1); in perf_event_init()
13683 perf_pmu_register(&perf_task_clock, "task_clock", -1); in perf_event_init()
13707 if (pmu_attr->event_str) in perf_event_sysfs_show()
13708 return sprintf(page, "%s\n", pmu_attr->event_str); in perf_event_sysfs_show()
13726 if (pmu->dev) in perf_event_sysfs_init()
13730 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret); in perf_event_sysfs_init()
13750 return ERR_PTR(-ENOMEM); in perf_cgroup_css_alloc()
13752 jc->info = alloc_percpu(struct perf_cgroup_info); in perf_cgroup_css_alloc()
13753 if (!jc->info) { in perf_cgroup_css_alloc()
13755 return ERR_PTR(-ENOMEM); in perf_cgroup_css_alloc()
13758 return &jc->css; in perf_cgroup_css_alloc()
13765 free_percpu(jc->info); in perf_cgroup_css_free()
13771 perf_event_cgroup(css->cgroup); in perf_cgroup_css_online()