Lines Matching +full:min +full:- +full:sample +full:- +full:time
1 // SPDX-License-Identifier: GPL-2.0-only
3 * thread-stack.c: Synthesize a thread's stack using call / return events
21 #include "call-path.h"
22 #include "thread-stack.h"
40 * struct thread_stack_entry - thread stack entry.
43 * @ref: external reference (e.g. db_id of sample)
47 * @db_id: id used for db-export
68 * struct thread_stack - thread stack constructed from 'call' and 'return'
115 return !(thread->tid || thread->pid_); in thread_stack__per_cpu()
123 new_sz = ts->sz + STACK_GROWTH; in thread_stack__grow()
126 new_stack = realloc(ts->stack, sz); in thread_stack__grow()
128 return -ENOMEM; in thread_stack__grow()
130 ts->stack = new_stack; in thread_stack__grow()
131 ts->sz = new_sz; in thread_stack__grow()
152 ts->br_stack_rb = zalloc(sz); in thread_stack__init()
153 if (!ts->br_stack_rb) in thread_stack__init()
154 return -ENOMEM; in thread_stack__init()
155 ts->br_stack_sz = br_stack_sz; in thread_stack__init()
158 if (thread->maps && thread->maps->machine) { in thread_stack__init()
159 struct machine *machine = thread->maps->machine; in thread_stack__init()
160 const char *arch = perf_env__arch(machine->env); in thread_stack__init()
162 ts->kernel_start = machine__kernel_start(machine); in thread_stack__init()
164 ts->rstate = X86_RETPOLINE_POSSIBLE; in thread_stack__init()
166 ts->kernel_start = 1ULL << 63; in thread_stack__init()
168 ts->crp = crp; in thread_stack__init()
178 struct thread_stack *ts = thread->ts, *new_ts; in thread_stack__new()
179 unsigned int old_sz = ts ? ts->arr_sz : 0; in thread_stack__new()
191 new_ts->arr_sz = new_sz; in thread_stack__new()
192 zfree(&thread->ts); in thread_stack__new()
193 thread->ts = new_ts; in thread_stack__new()
198 (unsigned int)cpu < ts->arr_sz) in thread_stack__new()
201 if (!ts->stack && in thread_stack__new()
210 struct thread_stack *ts = thread->ts; in thread__cpu_stack()
215 if (!ts || (unsigned int)cpu >= ts->arr_sz) in thread__cpu_stack()
220 if (!ts->stack) in thread__cpu_stack()
235 return thread->ts; in thread__stack()
243 if (ts->cnt == ts->sz) { in thread_stack__push()
247 ts->cnt = 0; in thread_stack__push()
251 ts->stack[ts->cnt].trace_end = trace_end; in thread_stack__push()
252 ts->stack[ts->cnt++].ret_addr = ret_addr; in thread_stack__push()
270 for (i = ts->cnt; i; ) { in thread_stack__pop()
271 if (ts->stack[--i].ret_addr == ret_addr) { in thread_stack__pop()
272 ts->cnt = i; in thread_stack__pop()
282 for (i = ts->cnt; i; ) { in thread_stack__pop_trace_end()
283 if (ts->stack[--i].trace_end) in thread_stack__pop_trace_end()
284 ts->cnt = i; in thread_stack__pop_trace_end()
292 if (!ts->cnt) in thread_stack__in_kernel()
295 return ts->stack[ts->cnt - 1].cp->in_kernel; in thread_stack__in_kernel()
302 struct call_return_processor *crp = ts->crp; in thread_stack__call_return()
306 .comm = ts->comm, in thread_stack__call_return()
311 tse = &ts->stack[idx]; in thread_stack__call_return()
312 cr.cp = tse->cp; in thread_stack__call_return()
313 cr.call_time = tse->timestamp; in thread_stack__call_return()
315 cr.branch_count = ts->branch_count - tse->branch_count; in thread_stack__call_return()
316 cr.insn_count = ts->insn_count - tse->insn_count; in thread_stack__call_return()
317 cr.cyc_count = ts->cyc_count - tse->cyc_count; in thread_stack__call_return()
318 cr.db_id = tse->db_id; in thread_stack__call_return()
319 cr.call_ref = tse->ref; in thread_stack__call_return()
321 if (tse->no_call) in thread_stack__call_return()
325 if (tse->non_call) in thread_stack__call_return()
333 parent_db_id = idx ? &(tse - 1)->db_id : NULL; in thread_stack__call_return()
335 return crp->process(&cr, parent_db_id, crp->data); in thread_stack__call_return()
340 struct call_return_processor *crp = ts->crp; in __thread_stack__flush()
344 ts->cnt = 0; in __thread_stack__flush()
345 ts->br_stack_pos = 0; in __thread_stack__flush()
346 if (ts->br_stack_rb) in __thread_stack__flush()
347 ts->br_stack_rb->nr = 0; in __thread_stack__flush()
351 while (ts->cnt) { in __thread_stack__flush()
352 err = thread_stack__call_return(thread, ts, --ts->cnt, in __thread_stack__flush()
353 ts->last_time, 0, true); in __thread_stack__flush()
356 ts->cnt = 0; in __thread_stack__flush()
366 struct thread_stack *ts = thread->ts; in thread_stack__flush()
371 for (pos = 0; pos < ts->arr_sz; pos++) { in thread_stack__flush()
385 struct branch_stack *bs = ts->br_stack_rb; in thread_stack__update_br_stack()
388 if (!ts->br_stack_pos) in thread_stack__update_br_stack()
389 ts->br_stack_pos = ts->br_stack_sz; in thread_stack__update_br_stack()
391 ts->br_stack_pos -= 1; in thread_stack__update_br_stack()
393 be = &bs->entries[ts->br_stack_pos]; in thread_stack__update_br_stack()
394 be->from = from_ip; in thread_stack__update_br_stack()
395 be->to = to_ip; in thread_stack__update_br_stack()
396 be->flags.value = 0; in thread_stack__update_br_stack()
397 be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); in thread_stack__update_br_stack()
398 be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); in thread_stack__update_br_stack()
400 be->flags.mispred = ts->mispred_all; in thread_stack__update_br_stack()
402 if (bs->nr < ts->br_stack_sz) in thread_stack__update_br_stack()
403 bs->nr += 1; in thread_stack__update_br_stack()
413 return -EINVAL; in thread_stack__event()
419 return -ENOMEM; in thread_stack__event()
421 ts->trace_nr = trace_nr; in thread_stack__event()
422 ts->mispred_all = mispred_all; in thread_stack__event()
430 if (trace_nr != ts->trace_nr) { in thread_stack__event()
431 if (ts->trace_nr) in thread_stack__event()
433 ts->trace_nr = trace_nr; in thread_stack__event()
443 if (ts->crp || !callstack) in thread_stack__event()
453 return 0; /* Zero-length calls are excluded */ in thread_stack__event()
480 if (trace_nr != ts->trace_nr) { in thread_stack__set_trace_nr()
481 if (ts->trace_nr) in thread_stack__set_trace_nr()
483 ts->trace_nr = trace_nr; in thread_stack__set_trace_nr()
490 zfree(&ts->stack); in __thread_stack__free()
491 zfree(&ts->br_stack_rb); in __thread_stack__free()
496 unsigned int arr_sz = ts->arr_sz; in thread_stack__reset()
500 ts->arr_sz = arr_sz; in thread_stack__reset()
505 struct thread_stack *ts = thread->ts; in thread_stack__free()
509 for (pos = 0; pos < ts->arr_sz; pos++) in thread_stack__free()
511 zfree(&thread->ts); in thread_stack__free()
530 chain->nr = 0; in thread_stack__sample()
534 chain->ips[0] = context; in thread_stack__sample()
535 chain->ips[1] = ip; in thread_stack__sample()
538 chain->nr = 2; in thread_stack__sample()
544 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { in thread_stack__sample()
545 ip = ts->stack[ts->cnt - j].ret_addr; in thread_stack__sample()
548 if (i >= sz - 1) in thread_stack__sample()
550 chain->ips[i++] = context; in thread_stack__sample()
553 chain->ips[i] = ip; in thread_stack__sample()
556 chain->nr = i; in thread_stack__sample()
560 * Hardware sample records, created some time after the event occurred, need to
573 chain->nr = 0; in thread_stack__sample_late()
585 for (j = 1; j <= ts->cnt; j++) { in thread_stack__sample_late()
586 ip = ts->stack[ts->cnt - j].ret_addr; in thread_stack__sample_late()
595 for (; nr < sz && j <= ts->cnt; nr++, j++) { in thread_stack__sample_late()
596 ip = ts->stack[ts->cnt - j].ret_addr; in thread_stack__sample_late()
599 if (nr >= sz - 1) in thread_stack__sample_late()
601 chain->ips[nr++] = context; in thread_stack__sample_late()
604 chain->ips[nr] = ip; in thread_stack__sample_late()
608 chain->nr = nr; in thread_stack__sample_late()
610 chain->ips[0] = sample_context; in thread_stack__sample_late()
611 chain->ips[1] = sample_ip; in thread_stack__sample_late()
612 chain->nr = 2; in thread_stack__sample_late()
625 dst->nr = 0; in thread_stack__br_sample()
630 src = ts->br_stack_rb; in thread_stack__br_sample()
631 if (!src->nr) in thread_stack__br_sample()
634 dst->nr = min((unsigned int)src->nr, sz); in thread_stack__br_sample()
636 be = &dst->entries[0]; in thread_stack__br_sample()
637 nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); in thread_stack__br_sample()
638 memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); in thread_stack__br_sample()
640 if (src->nr >= ts->br_stack_sz) { in thread_stack__br_sample()
641 sz -= nr; in thread_stack__br_sample()
642 be = &dst->entries[nr]; in thread_stack__br_sample()
643 nr = min(ts->br_stack_pos, sz); in thread_stack__br_sample()
644 memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); in thread_stack__br_sample()
652 *start = be->to && be->to < kernel_start; in us_start()
665 *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || in ks_start()
666 be->from < kernel_start || in ks_start()
667 (be->to && be->to < kernel_start); in ks_start()
674 * Hardware sample records, created some time after the event occurred, need to
687 dst->nr = 0; in thread_stack__br_sample_late()
692 src = ts->br_stack_rb; in thread_stack__br_sample_late()
693 if (!src->nr) in thread_stack__br_sample_late()
696 spos = &src->entries[ts->br_stack_pos]; in thread_stack__br_sample_late()
697 ssz = &src->entries[ts->br_stack_sz]; in thread_stack__br_sample_late()
699 d = &dst->entries[0]; in thread_stack__br_sample_late()
704 * User space sample: start copying branch entries when the in thread_stack__br_sample_late()
714 if (src->nr >= ts->br_stack_sz) { in thread_stack__br_sample_late()
715 for (s = &src->entries[0]; s < spos && nr < sz; s++) { in thread_stack__br_sample_late()
726 * Kernel space sample: start copying branch entries when the ip in thread_stack__br_sample_late()
738 if (src->nr >= ts->br_stack_sz) { in thread_stack__br_sample_late()
739 for (s = &src->entries[0]; s < spos && nr < sz; s++) { in thread_stack__br_sample_late()
749 dst->nr = nr; in thread_stack__br_sample_late()
761 crp->cpr = call_path_root__new(); in call_return_processor__new()
762 if (!crp->cpr) in call_return_processor__new()
764 crp->process = process; in call_return_processor__new()
765 crp->data = data; in call_return_processor__new()
776 call_path_root__free(crp->cpr); in call_return_processor__free()
789 return -ENOMEM; in thread_stack__push_cp()
791 if (ts->cnt == ts->sz) { in thread_stack__push_cp()
797 tse = &ts->stack[ts->cnt++]; in thread_stack__push_cp()
798 tse->ret_addr = ret_addr; in thread_stack__push_cp()
799 tse->timestamp = timestamp; in thread_stack__push_cp()
800 tse->ref = ref; in thread_stack__push_cp()
801 tse->branch_count = ts->branch_count; in thread_stack__push_cp()
802 tse->insn_count = ts->insn_count; in thread_stack__push_cp()
803 tse->cyc_count = ts->cyc_count; in thread_stack__push_cp()
804 tse->cp = cp; in thread_stack__push_cp()
805 tse->no_call = no_call; in thread_stack__push_cp()
806 tse->trace_end = trace_end; in thread_stack__push_cp()
807 tse->non_call = false; in thread_stack__push_cp()
808 tse->db_id = 0; in thread_stack__push_cp()
819 if (!ts->cnt) in thread_stack__pop_cp()
822 if (ts->cnt == 1) { in thread_stack__pop_cp()
823 struct thread_stack_entry *tse = &ts->stack[0]; in thread_stack__pop_cp()
825 if (tse->cp->sym == sym) in thread_stack__pop_cp()
826 return thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_cp()
830 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && in thread_stack__pop_cp()
831 !ts->stack[ts->cnt - 1].non_call) { in thread_stack__pop_cp()
832 return thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_cp()
835 size_t i = ts->cnt - 1; in thread_stack__pop_cp()
837 while (i--) { in thread_stack__pop_cp()
838 if (ts->stack[i].ret_addr != ret_addr || in thread_stack__pop_cp()
839 ts->stack[i].non_call) in thread_stack__pop_cp()
842 while (ts->cnt > i) { in thread_stack__pop_cp()
844 --ts->cnt, in thread_stack__pop_cp()
850 return thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_cp()
859 struct perf_sample *sample, in thread_stack__bottom() argument
863 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__bottom()
868 if (sample->ip) { in thread_stack__bottom()
869 ip = sample->ip; in thread_stack__bottom()
870 sym = from_al->sym; in thread_stack__bottom()
871 } else if (sample->addr) { in thread_stack__bottom()
872 ip = sample->addr; in thread_stack__bottom()
873 sym = to_al->sym; in thread_stack__bottom()
878 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, in thread_stack__bottom()
879 ts->kernel_start); in thread_stack__bottom()
881 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, in thread_stack__bottom()
886 struct perf_sample *sample, u64 ref) in thread_stack__pop_ks() argument
888 u64 tm = sample->time; in thread_stack__pop_ks()
893 err = thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_ks()
904 struct perf_sample *sample, in thread_stack__no_call_return() argument
908 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__no_call_return()
909 struct call_path *root = &cpr->call_path; in thread_stack__no_call_return()
910 struct symbol *fsym = from_al->sym; in thread_stack__no_call_return()
911 struct symbol *tsym = to_al->sym; in thread_stack__no_call_return()
913 u64 ks = ts->kernel_start; in thread_stack__no_call_return()
914 u64 addr = sample->addr; in thread_stack__no_call_return()
915 u64 tm = sample->time; in thread_stack__no_call_return()
916 u64 ip = sample->ip; in thread_stack__no_call_return()
921 err = thread_stack__pop_ks(thread, ts, sample, ref); in thread_stack__no_call_return()
926 if (!ts->cnt) { in thread_stack__no_call_return()
933 err = thread_stack__pop_ks(thread, ts, sample, ref); in thread_stack__no_call_return()
938 if (ts->cnt) in thread_stack__no_call_return()
939 parent = ts->stack[ts->cnt - 1].cp; in thread_stack__no_call_return()
943 if (parent->sym == from_al->sym) { in thread_stack__no_call_return()
949 if (ts->cnt == 1) { in thread_stack__no_call_return()
950 err = thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__no_call_return()
956 if (!ts->cnt) { in thread_stack__no_call_return()
971 ts->stack[ts->cnt - 1].non_call = true; in thread_stack__no_call_return()
993 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); in thread_stack__no_call_return()
1003 if (!ts->cnt) in thread_stack__trace_begin()
1007 tse = &ts->stack[ts->cnt - 1]; in thread_stack__trace_begin()
1008 if (tse->trace_end) { in thread_stack__trace_begin()
1009 err = thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__trace_begin()
1019 struct perf_sample *sample, u64 ref) in thread_stack__trace_end() argument
1021 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__trace_end()
1026 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) in thread_stack__trace_end()
1029 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, in thread_stack__trace_end()
1030 ts->kernel_start); in thread_stack__trace_end()
1032 ret_addr = sample->ip + sample->insn_len; in thread_stack__trace_end()
1034 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, in thread_stack__trace_end()
1051 struct perf_sample *sample, in thread_stack__x86_retpoline() argument
1054 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; in thread_stack__x86_retpoline()
1055 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__x86_retpoline()
1056 struct symbol *sym = tse->cp->sym; in thread_stack__x86_retpoline()
1057 struct symbol *tsym = to_al->sym; in thread_stack__x86_retpoline()
1060 if (sym && is_x86_retpoline(sym->name)) { in thread_stack__x86_retpoline()
1064 * not itself mean anything. Here the top-of-stack is removed, in thread_stack__x86_retpoline()
1066 * resulting top-of-stack is replaced with the actual target. in thread_stack__x86_retpoline()
1071 ts->cnt -= 1; in thread_stack__x86_retpoline()
1072 sym = ts->stack[ts->cnt - 2].cp->sym; in thread_stack__x86_retpoline()
1073 if (sym && sym == tsym && to_al->addr != tsym->start) { in thread_stack__x86_retpoline()
1079 ts->cnt -= 1; in thread_stack__x86_retpoline()
1087 ts->cnt -= 1; in thread_stack__x86_retpoline()
1091 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, in thread_stack__x86_retpoline()
1092 sample->addr, ts->kernel_start); in thread_stack__x86_retpoline()
1094 return -ENOMEM; in thread_stack__x86_retpoline()
1096 /* Replace the top-of-stack with the actual target */ in thread_stack__x86_retpoline()
1097 ts->stack[ts->cnt - 1].cp = cp; in thread_stack__x86_retpoline()
1103 struct perf_sample *sample, in thread_stack__process() argument
1108 struct thread_stack *ts = thread__stack(thread, sample->cpu); in thread_stack__process()
1112 if (ts && !ts->crp) { in thread_stack__process()
1119 ts = thread_stack__new(thread, sample->cpu, crp, true, 0); in thread_stack__process()
1121 return -ENOMEM; in thread_stack__process()
1122 ts->comm = comm; in thread_stack__process()
1125 rstate = ts->rstate; in thread_stack__process()
1127 ts->rstate = X86_RETPOLINE_POSSIBLE; in thread_stack__process()
1130 if (ts->comm != comm && thread->pid_ == thread->tid) { in thread_stack__process()
1134 ts->comm = comm; in thread_stack__process()
1138 if (!ts->cnt) { in thread_stack__process()
1139 err = thread_stack__bottom(ts, sample, from_al, to_al, ref); in thread_stack__process()
1144 ts->branch_count += 1; in thread_stack__process()
1145 ts->insn_count += sample->insn_cnt; in thread_stack__process()
1146 ts->cyc_count += sample->cyc_cnt; in thread_stack__process()
1147 ts->last_time = sample->time; in thread_stack__process()
1149 if (sample->flags & PERF_IP_FLAG_CALL) { in thread_stack__process()
1150 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; in thread_stack__process()
1151 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__process()
1155 if (!sample->ip || !sample->addr) in thread_stack__process()
1158 ret_addr = sample->ip + sample->insn_len; in thread_stack__process()
1159 if (ret_addr == sample->addr) in thread_stack__process()
1160 return 0; /* Zero-length calls are excluded */ in thread_stack__process()
1162 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, in thread_stack__process()
1163 to_al->sym, sample->addr, in thread_stack__process()
1164 ts->kernel_start); in thread_stack__process()
1165 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, in thread_stack__process()
1172 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && in thread_stack__process()
1173 from_al->sym == to_al->sym && in thread_stack__process()
1174 to_al->addr != to_al->sym->start) in thread_stack__process()
1175 ts->rstate = X86_RETPOLINE_DETECTED; in thread_stack__process()
1177 } else if (sample->flags & PERF_IP_FLAG_RETURN) { in thread_stack__process()
1178 if (!sample->addr) { in thread_stack__process()
1182 if (!(sample->flags & return_from_kernel)) in thread_stack__process()
1186 return thread_stack__pop_ks(thread, ts, sample, ref); in thread_stack__process()
1189 if (!sample->ip) in thread_stack__process()
1193 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && in thread_stack__process()
1194 ts->stack[ts->cnt - 1].ret_addr != sample->addr) in thread_stack__process()
1195 return thread_stack__x86_retpoline(ts, sample, to_al); in thread_stack__process()
1197 err = thread_stack__pop_cp(thread, ts, sample->addr, in thread_stack__process()
1198 sample->time, ref, from_al->sym); in thread_stack__process()
1202 err = thread_stack__no_call_return(thread, ts, sample, in thread_stack__process()
1205 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { in thread_stack__process()
1206 err = thread_stack__trace_begin(thread, ts, sample->time, ref); in thread_stack__process()
1207 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { in thread_stack__process()
1208 err = thread_stack__trace_end(ts, sample, ref); in thread_stack__process()
1209 } else if (sample->flags & PERF_IP_FLAG_BRANCH && in thread_stack__process()
1210 from_al->sym != to_al->sym && to_al->sym && in thread_stack__process()
1211 to_al->addr == to_al->sym->start) { in thread_stack__process()
1212 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__process()
1221 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, in thread_stack__process()
1222 to_al->sym, sample->addr, in thread_stack__process()
1223 ts->kernel_start); in thread_stack__process()
1224 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, in thread_stack__process()
1227 ts->stack[ts->cnt - 1].non_call = true; in thread_stack__process()
1239 return ts->cnt; in thread_stack__depth()