1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/callchain.h"
14 #include "util/lock-contention.h"
15 #include "util/bpf_skel/lock_data.h"
16 
17 #include <subcmd/pager.h>
18 #include <subcmd/parse-options.h>
19 #include "util/trace-event.h"
20 #include "util/tracepoint.h"
21 
22 #include "util/debug.h"
23 #include "util/session.h"
24 #include "util/tool.h"
25 #include "util/data.h"
26 #include "util/string2.h"
27 #include "util/map.h"
28 #include "util/util.h"
29 
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/prctl.h>
33 #include <semaphore.h>
34 #include <math.h>
35 #include <limits.h>
36 #include <ctype.h>
37 
38 #include <linux/list.h>
39 #include <linux/hash.h>
40 #include <linux/kernel.h>
41 #include <linux/zalloc.h>
42 #include <linux/err.h>
43 #include <linux/stringify.h>
44 
45 static struct perf_session *session;
46 static struct target target;
47 
48 /* based on kernel/lockdep.c */
49 #define LOCKHASH_BITS		12
50 #define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
51 
52 static struct hlist_head *lockhash_table;
53 
54 #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
55 #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
56 
57 static struct rb_root		thread_stats;
58 
59 static bool combine_locks;
60 static bool show_thread_stats;
61 static bool show_lock_addrs;
62 static bool show_lock_owner;
63 static bool use_bpf;
64 static unsigned long bpf_map_entries = MAX_ENTRIES;
65 static int max_stack_depth = CONTENTION_STACK_DEPTH;
66 static int stack_skip = CONTENTION_STACK_SKIP;
67 static int print_nr_entries = INT_MAX / 2;
68 static LIST_HEAD(callstack_filters);
69 static const char *output_name = NULL;
70 static FILE *lock_output;
71 
72 struct callstack_filter {
73 	struct list_head list;
74 	char name[];
75 };
76 
77 static struct lock_filter filters;
78 
79 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
80 
needs_callstack(void)81 static bool needs_callstack(void)
82 {
83 	return !list_empty(&callstack_filters);
84 }
85 
thread_stat_find(u32 tid)86 static struct thread_stat *thread_stat_find(u32 tid)
87 {
88 	struct rb_node *node;
89 	struct thread_stat *st;
90 
91 	node = thread_stats.rb_node;
92 	while (node) {
93 		st = container_of(node, struct thread_stat, rb);
94 		if (st->tid == tid)
95 			return st;
96 		else if (tid < st->tid)
97 			node = node->rb_left;
98 		else
99 			node = node->rb_right;
100 	}
101 
102 	return NULL;
103 }
104 
thread_stat_insert(struct thread_stat * new)105 static void thread_stat_insert(struct thread_stat *new)
106 {
107 	struct rb_node **rb = &thread_stats.rb_node;
108 	struct rb_node *parent = NULL;
109 	struct thread_stat *p;
110 
111 	while (*rb) {
112 		p = container_of(*rb, struct thread_stat, rb);
113 		parent = *rb;
114 
115 		if (new->tid < p->tid)
116 			rb = &(*rb)->rb_left;
117 		else if (new->tid > p->tid)
118 			rb = &(*rb)->rb_right;
119 		else
120 			BUG_ON("inserting invalid thread_stat\n");
121 	}
122 
123 	rb_link_node(&new->rb, parent, rb);
124 	rb_insert_color(&new->rb, &thread_stats);
125 }
126 
thread_stat_findnew_after_first(u32 tid)127 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
128 {
129 	struct thread_stat *st;
130 
131 	st = thread_stat_find(tid);
132 	if (st)
133 		return st;
134 
135 	st = zalloc(sizeof(struct thread_stat));
136 	if (!st) {
137 		pr_err("memory allocation failed\n");
138 		return NULL;
139 	}
140 
141 	st->tid = tid;
142 	INIT_LIST_HEAD(&st->seq_list);
143 
144 	thread_stat_insert(st);
145 
146 	return st;
147 }
148 
149 static struct thread_stat *thread_stat_findnew_first(u32 tid);
150 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
151 	thread_stat_findnew_first;
152 
thread_stat_findnew_first(u32 tid)153 static struct thread_stat *thread_stat_findnew_first(u32 tid)
154 {
155 	struct thread_stat *st;
156 
157 	st = zalloc(sizeof(struct thread_stat));
158 	if (!st) {
159 		pr_err("memory allocation failed\n");
160 		return NULL;
161 	}
162 	st->tid = tid;
163 	INIT_LIST_HEAD(&st->seq_list);
164 
165 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
166 	rb_insert_color(&st->rb, &thread_stats);
167 
168 	thread_stat_findnew = thread_stat_findnew_after_first;
169 	return st;
170 }
171 
172 /* build simple key function one is bigger than two */
173 #define SINGLE_KEY(member)						\
174 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
175 					 struct lock_stat *two)		\
176 	{								\
177 		return one->member > two->member;			\
178 	}
179 
180 SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)181 SINGLE_KEY(nr_contended)
182 SINGLE_KEY(avg_wait_time)
183 SINGLE_KEY(wait_time_total)
184 SINGLE_KEY(wait_time_max)
185 
186 static int lock_stat_key_wait_time_min(struct lock_stat *one,
187 					struct lock_stat *two)
188 {
189 	u64 s1 = one->wait_time_min;
190 	u64 s2 = two->wait_time_min;
191 	if (s1 == ULLONG_MAX)
192 		s1 = 0;
193 	if (s2 == ULLONG_MAX)
194 		s2 = 0;
195 	return s1 > s2;
196 }
197 
198 struct lock_key {
199 	/*
200 	 * name: the value for specify by user
201 	 * this should be simpler than raw name of member
202 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
203 	 */
204 	const char		*name;
205 	/* header: the string printed on the header line */
206 	const char		*header;
207 	/* len: the printing width of the field */
208 	int			len;
209 	/* key: a pointer to function to compare two lock stats for sorting */
210 	int			(*key)(struct lock_stat*, struct lock_stat*);
211 	/* print: a pointer to function to print a given lock stats */
212 	void			(*print)(struct lock_key*, struct lock_stat*);
213 	/* list: list entry to link this */
214 	struct list_head	list;
215 };
216 
lock_stat_key_print_time(unsigned long long nsec,int len)217 static void lock_stat_key_print_time(unsigned long long nsec, int len)
218 {
219 	static const struct {
220 		float base;
221 		const char *unit;
222 	} table[] = {
223 		{ 1e9 * 3600, "h " },
224 		{ 1e9 * 60, "m " },
225 		{ 1e9, "s " },
226 		{ 1e6, "ms" },
227 		{ 1e3, "us" },
228 		{ 0, NULL },
229 	};
230 
231 	/* for CSV output */
232 	if (len == 0) {
233 		fprintf(lock_output, "%llu", nsec);
234 		return;
235 	}
236 
237 	for (int i = 0; table[i].unit; i++) {
238 		if (nsec < table[i].base)
239 			continue;
240 
241 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
242 		return;
243 	}
244 
245 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
246 }
247 
248 #define PRINT_KEY(member)						\
249 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
250 					   struct lock_stat *ls)	\
251 {									\
252 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
253 }
254 
255 #define PRINT_TIME(member)						\
256 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
257 					   struct lock_stat *ls)	\
258 {									\
259 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
260 }
261 
262 PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)263 PRINT_KEY(nr_contended)
264 PRINT_TIME(avg_wait_time)
265 PRINT_TIME(wait_time_total)
266 PRINT_TIME(wait_time_max)
267 
268 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
269 					      struct lock_stat *ls)
270 {
271 	u64 wait_time = ls->wait_time_min;
272 
273 	if (wait_time == ULLONG_MAX)
274 		wait_time = 0;
275 
276 	lock_stat_key_print_time(wait_time, key->len);
277 }
278 
279 
280 static const char		*sort_key = "acquired";
281 
282 static int			(*compare)(struct lock_stat *, struct lock_stat *);
283 
284 static struct rb_root		sorted; /* place to store intermediate data */
285 static struct rb_root		result;	/* place to store sorted data */
286 
287 static LIST_HEAD(lock_keys);
288 static const char		*output_fields;
289 
290 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
291 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
292 static struct lock_key report_keys[] = {
293 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
294 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
295 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
297 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
298 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
299 
300 	/* extra comparisons much complicated should be here */
301 	{ }
302 };
303 
304 static struct lock_key contention_keys[] = {
305 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
306 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
307 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
308 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
309 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
310 
311 	/* extra comparisons much complicated should be here */
312 	{ }
313 };
314 
select_key(bool contention)315 static int select_key(bool contention)
316 {
317 	int i;
318 	struct lock_key *keys = report_keys;
319 
320 	if (contention)
321 		keys = contention_keys;
322 
323 	for (i = 0; keys[i].name; i++) {
324 		if (!strcmp(keys[i].name, sort_key)) {
325 			compare = keys[i].key;
326 
327 			/* selected key should be in the output fields */
328 			if (list_empty(&keys[i].list))
329 				list_add_tail(&keys[i].list, &lock_keys);
330 
331 			return 0;
332 		}
333 	}
334 
335 	pr_err("Unknown compare key: %s\n", sort_key);
336 	return -1;
337 }
338 
add_output_field(bool contention,char * name)339 static int add_output_field(bool contention, char *name)
340 {
341 	int i;
342 	struct lock_key *keys = report_keys;
343 
344 	if (contention)
345 		keys = contention_keys;
346 
347 	for (i = 0; keys[i].name; i++) {
348 		if (strcmp(keys[i].name, name))
349 			continue;
350 
351 		/* prevent double link */
352 		if (list_empty(&keys[i].list))
353 			list_add_tail(&keys[i].list, &lock_keys);
354 
355 		return 0;
356 	}
357 
358 	pr_err("Unknown output field: %s\n", name);
359 	return -1;
360 }
361 
setup_output_field(bool contention,const char * str)362 static int setup_output_field(bool contention, const char *str)
363 {
364 	char *tok, *tmp, *orig;
365 	int i, ret = 0;
366 	struct lock_key *keys = report_keys;
367 
368 	if (contention)
369 		keys = contention_keys;
370 
371 	/* no output field given: use all of them */
372 	if (str == NULL) {
373 		for (i = 0; keys[i].name; i++)
374 			list_add_tail(&keys[i].list, &lock_keys);
375 		return 0;
376 	}
377 
378 	for (i = 0; keys[i].name; i++)
379 		INIT_LIST_HEAD(&keys[i].list);
380 
381 	orig = tmp = strdup(str);
382 	if (orig == NULL)
383 		return -ENOMEM;
384 
385 	while ((tok = strsep(&tmp, ",")) != NULL){
386 		ret = add_output_field(contention, tok);
387 		if (ret < 0)
388 			break;
389 	}
390 	free(orig);
391 
392 	return ret;
393 }
394 
combine_lock_stats(struct lock_stat * st)395 static void combine_lock_stats(struct lock_stat *st)
396 {
397 	struct rb_node **rb = &sorted.rb_node;
398 	struct rb_node *parent = NULL;
399 	struct lock_stat *p;
400 	int ret;
401 
402 	while (*rb) {
403 		p = container_of(*rb, struct lock_stat, rb);
404 		parent = *rb;
405 
406 		if (st->name && p->name)
407 			ret = strcmp(st->name, p->name);
408 		else
409 			ret = !!st->name - !!p->name;
410 
411 		if (ret == 0) {
412 			p->nr_acquired += st->nr_acquired;
413 			p->nr_contended += st->nr_contended;
414 			p->wait_time_total += st->wait_time_total;
415 
416 			if (p->nr_contended)
417 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
418 
419 			if (p->wait_time_min > st->wait_time_min)
420 				p->wait_time_min = st->wait_time_min;
421 			if (p->wait_time_max < st->wait_time_max)
422 				p->wait_time_max = st->wait_time_max;
423 
424 			p->broken |= st->broken;
425 			st->combined = 1;
426 			return;
427 		}
428 
429 		if (ret < 0)
430 			rb = &(*rb)->rb_left;
431 		else
432 			rb = &(*rb)->rb_right;
433 	}
434 
435 	rb_link_node(&st->rb, parent, rb);
436 	rb_insert_color(&st->rb, &sorted);
437 }
438 
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))439 static void insert_to_result(struct lock_stat *st,
440 			     int (*bigger)(struct lock_stat *, struct lock_stat *))
441 {
442 	struct rb_node **rb = &result.rb_node;
443 	struct rb_node *parent = NULL;
444 	struct lock_stat *p;
445 
446 	if (combine_locks && st->combined)
447 		return;
448 
449 	while (*rb) {
450 		p = container_of(*rb, struct lock_stat, rb);
451 		parent = *rb;
452 
453 		if (bigger(st, p))
454 			rb = &(*rb)->rb_left;
455 		else
456 			rb = &(*rb)->rb_right;
457 	}
458 
459 	rb_link_node(&st->rb, parent, rb);
460 	rb_insert_color(&st->rb, &result);
461 }
462 
463 /* returns left most element of result, and erase it */
pop_from_result(void)464 static struct lock_stat *pop_from_result(void)
465 {
466 	struct rb_node *node = result.rb_node;
467 
468 	if (!node)
469 		return NULL;
470 
471 	while (node->rb_left)
472 		node = node->rb_left;
473 
474 	rb_erase(node, &result);
475 	return container_of(node, struct lock_stat, rb);
476 }
477 
lock_stat_find(u64 addr)478 struct lock_stat *lock_stat_find(u64 addr)
479 {
480 	struct hlist_head *entry = lockhashentry(addr);
481 	struct lock_stat *ret;
482 
483 	hlist_for_each_entry(ret, entry, hash_entry) {
484 		if (ret->addr == addr)
485 			return ret;
486 	}
487 	return NULL;
488 }
489 
lock_stat_findnew(u64 addr,const char * name,int flags)490 struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
491 {
492 	struct hlist_head *entry = lockhashentry(addr);
493 	struct lock_stat *ret, *new;
494 
495 	hlist_for_each_entry(ret, entry, hash_entry) {
496 		if (ret->addr == addr)
497 			return ret;
498 	}
499 
500 	new = zalloc(sizeof(struct lock_stat));
501 	if (!new)
502 		goto alloc_failed;
503 
504 	new->addr = addr;
505 	new->name = strdup(name);
506 	if (!new->name) {
507 		free(new);
508 		goto alloc_failed;
509 	}
510 
511 	new->flags = flags;
512 	new->wait_time_min = ULLONG_MAX;
513 
514 	hlist_add_head(&new->hash_entry, entry);
515 	return new;
516 
517 alloc_failed:
518 	pr_err("memory allocation failed\n");
519 	return NULL;
520 }
521 
match_callstack_filter(struct machine * machine,u64 * callstack)522 bool match_callstack_filter(struct machine *machine, u64 *callstack)
523 {
524 	struct map *kmap;
525 	struct symbol *sym;
526 	u64 ip;
527 
528 	if (list_empty(&callstack_filters))
529 		return true;
530 
531 	for (int i = 0; i < max_stack_depth; i++) {
532 		struct callstack_filter *filter;
533 
534 		if (!callstack || !callstack[i])
535 			break;
536 
537 		ip = callstack[i];
538 		sym = machine__find_kernel_symbol(machine, ip, &kmap);
539 		if (sym == NULL)
540 			continue;
541 
542 		list_for_each_entry(filter, &callstack_filters, list) {
543 			if (strstr(sym->name, filter->name))
544 				return true;
545 		}
546 	}
547 	return false;
548 }
549 
550 struct trace_lock_handler {
551 	/* it's used on CONFIG_LOCKDEP */
552 	int (*acquire_event)(struct evsel *evsel,
553 			     struct perf_sample *sample);
554 
555 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
556 	int (*acquired_event)(struct evsel *evsel,
557 			      struct perf_sample *sample);
558 
559 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
560 	int (*contended_event)(struct evsel *evsel,
561 			       struct perf_sample *sample);
562 
563 	/* it's used on CONFIG_LOCKDEP */
564 	int (*release_event)(struct evsel *evsel,
565 			     struct perf_sample *sample);
566 
567 	/* it's used when CONFIG_LOCKDEP is off */
568 	int (*contention_begin_event)(struct evsel *evsel,
569 				      struct perf_sample *sample);
570 
571 	/* it's used when CONFIG_LOCKDEP is off */
572 	int (*contention_end_event)(struct evsel *evsel,
573 				    struct perf_sample *sample);
574 };
575 
get_seq(struct thread_stat * ts,u64 addr)576 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
577 {
578 	struct lock_seq_stat *seq;
579 
580 	list_for_each_entry(seq, &ts->seq_list, list) {
581 		if (seq->addr == addr)
582 			return seq;
583 	}
584 
585 	seq = zalloc(sizeof(struct lock_seq_stat));
586 	if (!seq) {
587 		pr_err("memory allocation failed\n");
588 		return NULL;
589 	}
590 	seq->state = SEQ_STATE_UNINITIALIZED;
591 	seq->addr = addr;
592 
593 	list_add(&seq->list, &ts->seq_list);
594 	return seq;
595 }
596 
597 enum broken_state {
598 	BROKEN_ACQUIRE,
599 	BROKEN_ACQUIRED,
600 	BROKEN_CONTENDED,
601 	BROKEN_RELEASE,
602 	BROKEN_MAX,
603 };
604 
605 static int bad_hist[BROKEN_MAX];
606 
607 enum acquire_flags {
608 	TRY_LOCK = 1,
609 	READ_LOCK = 2,
610 };
611 
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)612 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
613 {
614 	switch (aggr_mode) {
615 	case LOCK_AGGR_ADDR:
616 		*key = addr;
617 		break;
618 	case LOCK_AGGR_TASK:
619 		*key = tid;
620 		break;
621 	case LOCK_AGGR_CALLER:
622 	default:
623 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
624 		return -EINVAL;
625 	}
626 	return 0;
627 }
628 
629 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
630 
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)631 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
632 				 struct perf_sample *sample)
633 {
634 	if (aggr_mode == LOCK_AGGR_CALLER) {
635 		*key = callchain_id(evsel, sample);
636 		return 0;
637 	}
638 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
639 }
640 
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)641 static int report_lock_acquire_event(struct evsel *evsel,
642 				     struct perf_sample *sample)
643 {
644 	struct lock_stat *ls;
645 	struct thread_stat *ts;
646 	struct lock_seq_stat *seq;
647 	const char *name = evsel__strval(evsel, sample, "name");
648 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
649 	int flag = evsel__intval(evsel, sample, "flags");
650 	u64 key;
651 	int ret;
652 
653 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
654 	if (ret < 0)
655 		return ret;
656 
657 	ls = lock_stat_findnew(key, name, 0);
658 	if (!ls)
659 		return -ENOMEM;
660 
661 	ts = thread_stat_findnew(sample->tid);
662 	if (!ts)
663 		return -ENOMEM;
664 
665 	seq = get_seq(ts, addr);
666 	if (!seq)
667 		return -ENOMEM;
668 
669 	switch (seq->state) {
670 	case SEQ_STATE_UNINITIALIZED:
671 	case SEQ_STATE_RELEASED:
672 		if (!flag) {
673 			seq->state = SEQ_STATE_ACQUIRING;
674 		} else {
675 			if (flag & TRY_LOCK)
676 				ls->nr_trylock++;
677 			if (flag & READ_LOCK)
678 				ls->nr_readlock++;
679 			seq->state = SEQ_STATE_READ_ACQUIRED;
680 			seq->read_count = 1;
681 			ls->nr_acquired++;
682 		}
683 		break;
684 	case SEQ_STATE_READ_ACQUIRED:
685 		if (flag & READ_LOCK) {
686 			seq->read_count++;
687 			ls->nr_acquired++;
688 			goto end;
689 		} else {
690 			goto broken;
691 		}
692 		break;
693 	case SEQ_STATE_ACQUIRED:
694 	case SEQ_STATE_ACQUIRING:
695 	case SEQ_STATE_CONTENDED:
696 broken:
697 		/* broken lock sequence */
698 		if (!ls->broken) {
699 			ls->broken = 1;
700 			bad_hist[BROKEN_ACQUIRE]++;
701 		}
702 		list_del_init(&seq->list);
703 		free(seq);
704 		goto end;
705 	default:
706 		BUG_ON("Unknown state of lock sequence found!\n");
707 		break;
708 	}
709 
710 	ls->nr_acquire++;
711 	seq->prev_event_time = sample->time;
712 end:
713 	return 0;
714 }
715 
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)716 static int report_lock_acquired_event(struct evsel *evsel,
717 				      struct perf_sample *sample)
718 {
719 	struct lock_stat *ls;
720 	struct thread_stat *ts;
721 	struct lock_seq_stat *seq;
722 	u64 contended_term;
723 	const char *name = evsel__strval(evsel, sample, "name");
724 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
725 	u64 key;
726 	int ret;
727 
728 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
729 	if (ret < 0)
730 		return ret;
731 
732 	ls = lock_stat_findnew(key, name, 0);
733 	if (!ls)
734 		return -ENOMEM;
735 
736 	ts = thread_stat_findnew(sample->tid);
737 	if (!ts)
738 		return -ENOMEM;
739 
740 	seq = get_seq(ts, addr);
741 	if (!seq)
742 		return -ENOMEM;
743 
744 	switch (seq->state) {
745 	case SEQ_STATE_UNINITIALIZED:
746 		/* orphan event, do nothing */
747 		return 0;
748 	case SEQ_STATE_ACQUIRING:
749 		break;
750 	case SEQ_STATE_CONTENDED:
751 		contended_term = sample->time - seq->prev_event_time;
752 		ls->wait_time_total += contended_term;
753 		if (contended_term < ls->wait_time_min)
754 			ls->wait_time_min = contended_term;
755 		if (ls->wait_time_max < contended_term)
756 			ls->wait_time_max = contended_term;
757 		break;
758 	case SEQ_STATE_RELEASED:
759 	case SEQ_STATE_ACQUIRED:
760 	case SEQ_STATE_READ_ACQUIRED:
761 		/* broken lock sequence */
762 		if (!ls->broken) {
763 			ls->broken = 1;
764 			bad_hist[BROKEN_ACQUIRED]++;
765 		}
766 		list_del_init(&seq->list);
767 		free(seq);
768 		goto end;
769 	default:
770 		BUG_ON("Unknown state of lock sequence found!\n");
771 		break;
772 	}
773 
774 	seq->state = SEQ_STATE_ACQUIRED;
775 	ls->nr_acquired++;
776 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
777 	seq->prev_event_time = sample->time;
778 end:
779 	return 0;
780 }
781 
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)782 static int report_lock_contended_event(struct evsel *evsel,
783 				       struct perf_sample *sample)
784 {
785 	struct lock_stat *ls;
786 	struct thread_stat *ts;
787 	struct lock_seq_stat *seq;
788 	const char *name = evsel__strval(evsel, sample, "name");
789 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
790 	u64 key;
791 	int ret;
792 
793 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
794 	if (ret < 0)
795 		return ret;
796 
797 	ls = lock_stat_findnew(key, name, 0);
798 	if (!ls)
799 		return -ENOMEM;
800 
801 	ts = thread_stat_findnew(sample->tid);
802 	if (!ts)
803 		return -ENOMEM;
804 
805 	seq = get_seq(ts, addr);
806 	if (!seq)
807 		return -ENOMEM;
808 
809 	switch (seq->state) {
810 	case SEQ_STATE_UNINITIALIZED:
811 		/* orphan event, do nothing */
812 		return 0;
813 	case SEQ_STATE_ACQUIRING:
814 		break;
815 	case SEQ_STATE_RELEASED:
816 	case SEQ_STATE_ACQUIRED:
817 	case SEQ_STATE_READ_ACQUIRED:
818 	case SEQ_STATE_CONTENDED:
819 		/* broken lock sequence */
820 		if (!ls->broken) {
821 			ls->broken = 1;
822 			bad_hist[BROKEN_CONTENDED]++;
823 		}
824 		list_del_init(&seq->list);
825 		free(seq);
826 		goto end;
827 	default:
828 		BUG_ON("Unknown state of lock sequence found!\n");
829 		break;
830 	}
831 
832 	seq->state = SEQ_STATE_CONTENDED;
833 	ls->nr_contended++;
834 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
835 	seq->prev_event_time = sample->time;
836 end:
837 	return 0;
838 }
839 
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)840 static int report_lock_release_event(struct evsel *evsel,
841 				     struct perf_sample *sample)
842 {
843 	struct lock_stat *ls;
844 	struct thread_stat *ts;
845 	struct lock_seq_stat *seq;
846 	const char *name = evsel__strval(evsel, sample, "name");
847 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
848 	u64 key;
849 	int ret;
850 
851 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
852 	if (ret < 0)
853 		return ret;
854 
855 	ls = lock_stat_findnew(key, name, 0);
856 	if (!ls)
857 		return -ENOMEM;
858 
859 	ts = thread_stat_findnew(sample->tid);
860 	if (!ts)
861 		return -ENOMEM;
862 
863 	seq = get_seq(ts, addr);
864 	if (!seq)
865 		return -ENOMEM;
866 
867 	switch (seq->state) {
868 	case SEQ_STATE_UNINITIALIZED:
869 		goto end;
870 	case SEQ_STATE_ACQUIRED:
871 		break;
872 	case SEQ_STATE_READ_ACQUIRED:
873 		seq->read_count--;
874 		BUG_ON(seq->read_count < 0);
875 		if (seq->read_count) {
876 			ls->nr_release++;
877 			goto end;
878 		}
879 		break;
880 	case SEQ_STATE_ACQUIRING:
881 	case SEQ_STATE_CONTENDED:
882 	case SEQ_STATE_RELEASED:
883 		/* broken lock sequence */
884 		if (!ls->broken) {
885 			ls->broken = 1;
886 			bad_hist[BROKEN_RELEASE]++;
887 		}
888 		goto free_seq;
889 	default:
890 		BUG_ON("Unknown state of lock sequence found!\n");
891 		break;
892 	}
893 
894 	ls->nr_release++;
895 free_seq:
896 	list_del_init(&seq->list);
897 	free(seq);
898 end:
899 	return 0;
900 }
901 
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)902 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
903 				  char *buf, int size)
904 {
905 	u64 offset;
906 
907 	if (map == NULL || sym == NULL) {
908 		buf[0] = '\0';
909 		return 0;
910 	}
911 
912 	offset = map__map_ip(map, ip) - sym->start;
913 
914 	if (offset)
915 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
916 	else
917 		return strlcpy(buf, sym->name, size);
918 }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)919 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
920 				  char *buf, int size)
921 {
922 	struct thread *thread;
923 	struct callchain_cursor *cursor;
924 	struct machine *machine = &session->machines.host;
925 	struct symbol *sym;
926 	int skip = 0;
927 	int ret;
928 
929 	/* lock names will be replaced to task name later */
930 	if (show_thread_stats)
931 		return -1;
932 
933 	thread = machine__findnew_thread(machine, -1, sample->pid);
934 	if (thread == NULL)
935 		return -1;
936 
937 	cursor = get_tls_callchain_cursor();
938 
939 	/* use caller function name from the callchain */
940 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
941 					NULL, NULL, max_stack_depth);
942 	if (ret != 0) {
943 		thread__put(thread);
944 		return -1;
945 	}
946 
947 	callchain_cursor_commit(cursor);
948 	thread__put(thread);
949 
950 	while (true) {
951 		struct callchain_cursor_node *node;
952 
953 		node = callchain_cursor_current(cursor);
954 		if (node == NULL)
955 			break;
956 
957 		/* skip first few entries - for lock functions */
958 		if (++skip <= stack_skip)
959 			goto next;
960 
961 		sym = node->ms.sym;
962 		if (sym && !machine__is_lock_function(machine, node->ip)) {
963 			get_symbol_name_offset(node->ms.map, sym, node->ip,
964 					       buf, size);
965 			return 0;
966 		}
967 
968 next:
969 		callchain_cursor_advance(cursor);
970 	}
971 	return -1;
972 }
973 
callchain_id(struct evsel * evsel,struct perf_sample * sample)974 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
975 {
976 	struct callchain_cursor *cursor;
977 	struct machine *machine = &session->machines.host;
978 	struct thread *thread;
979 	u64 hash = 0;
980 	int skip = 0;
981 	int ret;
982 
983 	thread = machine__findnew_thread(machine, -1, sample->pid);
984 	if (thread == NULL)
985 		return -1;
986 
987 	cursor = get_tls_callchain_cursor();
988 	/* use caller function name from the callchain */
989 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
990 					NULL, NULL, max_stack_depth);
991 	thread__put(thread);
992 
993 	if (ret != 0)
994 		return -1;
995 
996 	callchain_cursor_commit(cursor);
997 
998 	while (true) {
999 		struct callchain_cursor_node *node;
1000 
1001 		node = callchain_cursor_current(cursor);
1002 		if (node == NULL)
1003 			break;
1004 
1005 		/* skip first few entries - for lock functions */
1006 		if (++skip <= stack_skip)
1007 			goto next;
1008 
1009 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
1010 			goto next;
1011 
1012 		hash ^= hash_long((unsigned long)node->ip, 64);
1013 
1014 next:
1015 		callchain_cursor_advance(cursor);
1016 	}
1017 	return hash;
1018 }
1019 
get_callstack(struct perf_sample * sample,int max_stack)1020 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
1021 {
1022 	u64 *callstack;
1023 	u64 i;
1024 	int c;
1025 
1026 	callstack = calloc(max_stack, sizeof(*callstack));
1027 	if (callstack == NULL)
1028 		return NULL;
1029 
1030 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
1031 		u64 ip = sample->callchain->ips[i];
1032 
1033 		if (ip >= PERF_CONTEXT_MAX)
1034 			continue;
1035 
1036 		callstack[c++] = ip;
1037 	}
1038 	return callstack;
1039 }
1040 
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)1041 static int report_lock_contention_begin_event(struct evsel *evsel,
1042 					      struct perf_sample *sample)
1043 {
1044 	struct lock_stat *ls;
1045 	struct thread_stat *ts;
1046 	struct lock_seq_stat *seq;
1047 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1048 	unsigned int flags = evsel__intval(evsel, sample, "flags");
1049 	u64 key;
1050 	int i, ret;
1051 	static bool kmap_loaded;
1052 	struct machine *machine = &session->machines.host;
1053 	struct map *kmap;
1054 	struct symbol *sym;
1055 
1056 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1057 	if (ret < 0)
1058 		return ret;
1059 
1060 	if (!kmap_loaded) {
1061 		unsigned long *addrs;
1062 
1063 		/* make sure it loads the kernel map to find lock symbols */
1064 		map__load(machine__kernel_map(machine));
1065 		kmap_loaded = true;
1066 
1067 		/* convert (kernel) symbols to addresses */
1068 		for (i = 0; i < filters.nr_syms; i++) {
1069 			sym = machine__find_kernel_symbol_by_name(machine,
1070 								  filters.syms[i],
1071 								  &kmap);
1072 			if (sym == NULL) {
1073 				pr_warning("ignore unknown symbol: %s\n",
1074 					   filters.syms[i]);
1075 				continue;
1076 			}
1077 
1078 			addrs = realloc(filters.addrs,
1079 					(filters.nr_addrs + 1) * sizeof(*addrs));
1080 			if (addrs == NULL) {
1081 				pr_warning("memory allocation failure\n");
1082 				return -ENOMEM;
1083 			}
1084 
1085 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1086 			filters.addrs = addrs;
1087 		}
1088 	}
1089 
1090 	ls = lock_stat_find(key);
1091 	if (!ls) {
1092 		char buf[128];
1093 		const char *name = "";
1094 
1095 		switch (aggr_mode) {
1096 		case LOCK_AGGR_ADDR:
1097 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1098 			if (sym)
1099 				name = sym->name;
1100 			break;
1101 		case LOCK_AGGR_CALLER:
1102 			name = buf;
1103 			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1104 				name = "Unknown";
1105 			break;
1106 		case LOCK_AGGR_TASK:
1107 		default:
1108 			break;
1109 		}
1110 
1111 		ls = lock_stat_findnew(key, name, flags);
1112 		if (!ls)
1113 			return -ENOMEM;
1114 	}
1115 
1116 	if (filters.nr_types) {
1117 		bool found = false;
1118 
1119 		for (i = 0; i < filters.nr_types; i++) {
1120 			if (flags == filters.types[i]) {
1121 				found = true;
1122 				break;
1123 			}
1124 		}
1125 
1126 		if (!found)
1127 			return 0;
1128 	}
1129 
1130 	if (filters.nr_addrs) {
1131 		bool found = false;
1132 
1133 		for (i = 0; i < filters.nr_addrs; i++) {
1134 			if (addr == filters.addrs[i]) {
1135 				found = true;
1136 				break;
1137 			}
1138 		}
1139 
1140 		if (!found)
1141 			return 0;
1142 	}
1143 
1144 	if (needs_callstack()) {
1145 		u64 *callstack = get_callstack(sample, max_stack_depth);
1146 		if (callstack == NULL)
1147 			return -ENOMEM;
1148 
1149 		if (!match_callstack_filter(machine, callstack)) {
1150 			free(callstack);
1151 			return 0;
1152 		}
1153 
1154 		if (ls->callstack == NULL)
1155 			ls->callstack = callstack;
1156 		else
1157 			free(callstack);
1158 	}
1159 
1160 	ts = thread_stat_findnew(sample->tid);
1161 	if (!ts)
1162 		return -ENOMEM;
1163 
1164 	seq = get_seq(ts, addr);
1165 	if (!seq)
1166 		return -ENOMEM;
1167 
1168 	switch (seq->state) {
1169 	case SEQ_STATE_UNINITIALIZED:
1170 	case SEQ_STATE_ACQUIRED:
1171 		break;
1172 	case SEQ_STATE_CONTENDED:
1173 		/*
1174 		 * It can have nested contention begin with mutex spinning,
1175 		 * then we would use the original contention begin event and
1176 		 * ignore the second one.
1177 		 */
1178 		goto end;
1179 	case SEQ_STATE_ACQUIRING:
1180 	case SEQ_STATE_READ_ACQUIRED:
1181 	case SEQ_STATE_RELEASED:
1182 		/* broken lock sequence */
1183 		if (!ls->broken) {
1184 			ls->broken = 1;
1185 			bad_hist[BROKEN_CONTENDED]++;
1186 		}
1187 		list_del_init(&seq->list);
1188 		free(seq);
1189 		goto end;
1190 	default:
1191 		BUG_ON("Unknown state of lock sequence found!\n");
1192 		break;
1193 	}
1194 
1195 	if (seq->state != SEQ_STATE_CONTENDED) {
1196 		seq->state = SEQ_STATE_CONTENDED;
1197 		seq->prev_event_time = sample->time;
1198 		ls->nr_contended++;
1199 	}
1200 end:
1201 	return 0;
1202 }
1203 
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1204 static int report_lock_contention_end_event(struct evsel *evsel,
1205 					    struct perf_sample *sample)
1206 {
1207 	struct lock_stat *ls;
1208 	struct thread_stat *ts;
1209 	struct lock_seq_stat *seq;
1210 	u64 contended_term;
1211 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1212 	u64 key;
1213 	int ret;
1214 
1215 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1216 	if (ret < 0)
1217 		return ret;
1218 
1219 	ls = lock_stat_find(key);
1220 	if (!ls)
1221 		return 0;
1222 
1223 	ts = thread_stat_find(sample->tid);
1224 	if (!ts)
1225 		return 0;
1226 
1227 	seq = get_seq(ts, addr);
1228 	if (!seq)
1229 		return -ENOMEM;
1230 
1231 	switch (seq->state) {
1232 	case SEQ_STATE_UNINITIALIZED:
1233 		goto end;
1234 	case SEQ_STATE_CONTENDED:
1235 		contended_term = sample->time - seq->prev_event_time;
1236 		ls->wait_time_total += contended_term;
1237 		if (contended_term < ls->wait_time_min)
1238 			ls->wait_time_min = contended_term;
1239 		if (ls->wait_time_max < contended_term)
1240 			ls->wait_time_max = contended_term;
1241 		break;
1242 	case SEQ_STATE_ACQUIRING:
1243 	case SEQ_STATE_ACQUIRED:
1244 	case SEQ_STATE_READ_ACQUIRED:
1245 	case SEQ_STATE_RELEASED:
1246 		/* broken lock sequence */
1247 		if (!ls->broken) {
1248 			ls->broken = 1;
1249 			bad_hist[BROKEN_ACQUIRED]++;
1250 		}
1251 		list_del_init(&seq->list);
1252 		free(seq);
1253 		goto end;
1254 	default:
1255 		BUG_ON("Unknown state of lock sequence found!\n");
1256 		break;
1257 	}
1258 
1259 	seq->state = SEQ_STATE_ACQUIRED;
1260 	ls->nr_acquired++;
1261 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1262 end:
1263 	return 0;
1264 }
1265 
1266 /* lock oriented handlers */
1267 /* TODO: handlers for CPU oriented, thread oriented */
1268 static struct trace_lock_handler report_lock_ops  = {
1269 	.acquire_event		= report_lock_acquire_event,
1270 	.acquired_event		= report_lock_acquired_event,
1271 	.contended_event	= report_lock_contended_event,
1272 	.release_event		= report_lock_release_event,
1273 	.contention_begin_event	= report_lock_contention_begin_event,
1274 	.contention_end_event	= report_lock_contention_end_event,
1275 };
1276 
1277 static struct trace_lock_handler contention_lock_ops  = {
1278 	.contention_begin_event	= report_lock_contention_begin_event,
1279 	.contention_end_event	= report_lock_contention_end_event,
1280 };
1281 
1282 
1283 static struct trace_lock_handler *trace_handler;
1284 
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1285 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1286 {
1287 	if (trace_handler->acquire_event)
1288 		return trace_handler->acquire_event(evsel, sample);
1289 	return 0;
1290 }
1291 
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1292 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1293 {
1294 	if (trace_handler->acquired_event)
1295 		return trace_handler->acquired_event(evsel, sample);
1296 	return 0;
1297 }
1298 
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1299 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1300 {
1301 	if (trace_handler->contended_event)
1302 		return trace_handler->contended_event(evsel, sample);
1303 	return 0;
1304 }
1305 
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1306 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1307 {
1308 	if (trace_handler->release_event)
1309 		return trace_handler->release_event(evsel, sample);
1310 	return 0;
1311 }
1312 
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1313 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1314 {
1315 	if (trace_handler->contention_begin_event)
1316 		return trace_handler->contention_begin_event(evsel, sample);
1317 	return 0;
1318 }
1319 
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1320 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1321 {
1322 	if (trace_handler->contention_end_event)
1323 		return trace_handler->contention_end_event(evsel, sample);
1324 	return 0;
1325 }
1326 
print_bad_events(int bad,int total)1327 static void print_bad_events(int bad, int total)
1328 {
1329 	/* Output for debug, this have to be removed */
1330 	int i;
1331 	int broken = 0;
1332 	const char *name[4] =
1333 		{ "acquire", "acquired", "contended", "release" };
1334 
1335 	for (i = 0; i < BROKEN_MAX; i++)
1336 		broken += bad_hist[i];
1337 
1338 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1339 		return;
1340 
1341 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1342 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1343 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1344 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1345 	for (i = 0; i < BROKEN_MAX; i++)
1346 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1347 }
1348 
1349 /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1350 static void print_result(void)
1351 {
1352 	struct lock_stat *st;
1353 	struct lock_key *key;
1354 	char cut_name[20];
1355 	int bad, total, printed;
1356 
1357 	if (!quiet) {
1358 		fprintf(lock_output, "%20s ", "Name");
1359 		list_for_each_entry(key, &lock_keys, list)
1360 			fprintf(lock_output, "%*s ", key->len, key->header);
1361 		fprintf(lock_output, "\n\n");
1362 	}
1363 
1364 	bad = total = printed = 0;
1365 	while ((st = pop_from_result())) {
1366 		total++;
1367 		if (st->broken)
1368 			bad++;
1369 		if (!st->nr_acquired)
1370 			continue;
1371 
1372 		bzero(cut_name, 20);
1373 
1374 		if (strlen(st->name) < 20) {
1375 			/* output raw name */
1376 			const char *name = st->name;
1377 
1378 			if (show_thread_stats) {
1379 				struct thread *t;
1380 
1381 				/* st->addr contains tid of thread */
1382 				t = perf_session__findnew(session, st->addr);
1383 				name = thread__comm_str(t);
1384 			}
1385 
1386 			fprintf(lock_output, "%20s ", name);
1387 		} else {
1388 			strncpy(cut_name, st->name, 16);
1389 			cut_name[16] = '.';
1390 			cut_name[17] = '.';
1391 			cut_name[18] = '.';
1392 			cut_name[19] = '\0';
1393 			/* cut off name for saving output style */
1394 			fprintf(lock_output, "%20s ", cut_name);
1395 		}
1396 
1397 		list_for_each_entry(key, &lock_keys, list) {
1398 			key->print(key, st);
1399 			fprintf(lock_output, " ");
1400 		}
1401 		fprintf(lock_output, "\n");
1402 
1403 		if (++printed >= print_nr_entries)
1404 			break;
1405 	}
1406 
1407 	print_bad_events(bad, total);
1408 }
1409 
1410 static bool info_threads, info_map;
1411 
dump_threads(void)1412 static void dump_threads(void)
1413 {
1414 	struct thread_stat *st;
1415 	struct rb_node *node;
1416 	struct thread *t;
1417 
1418 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1419 
1420 	node = rb_first(&thread_stats);
1421 	while (node) {
1422 		st = container_of(node, struct thread_stat, rb);
1423 		t = perf_session__findnew(session, st->tid);
1424 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1425 		node = rb_next(node);
1426 		thread__put(t);
1427 	}
1428 }
1429 
compare_maps(struct lock_stat * a,struct lock_stat * b)1430 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1431 {
1432 	int ret;
1433 
1434 	if (a->name && b->name)
1435 		ret = strcmp(a->name, b->name);
1436 	else
1437 		ret = !!a->name - !!b->name;
1438 
1439 	if (!ret)
1440 		return a->addr < b->addr;
1441 	else
1442 		return ret < 0;
1443 }
1444 
dump_map(void)1445 static void dump_map(void)
1446 {
1447 	unsigned int i;
1448 	struct lock_stat *st;
1449 
1450 	fprintf(lock_output, "Address of instance: name of class\n");
1451 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1452 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1453 			insert_to_result(st, compare_maps);
1454 		}
1455 	}
1456 
1457 	while ((st = pop_from_result()))
1458 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1459 }
1460 
dump_info(void)1461 static int dump_info(void)
1462 {
1463 	int rc = 0;
1464 
1465 	if (info_threads)
1466 		dump_threads();
1467 	else if (info_map)
1468 		dump_map();
1469 	else {
1470 		rc = -1;
1471 		pr_err("Unknown type of information\n");
1472 	}
1473 
1474 	return rc;
1475 }
1476 
1477 static const struct evsel_str_handler lock_tracepoints[] = {
1478 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1479 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1480 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1481 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1482 };
1483 
1484 static const struct evsel_str_handler contention_tracepoints[] = {
1485 	{ "lock:contention_begin", evsel__process_contention_begin, },
1486 	{ "lock:contention_end",   evsel__process_contention_end,   },
1487 };
1488 
process_event_update(struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1489 static int process_event_update(struct perf_tool *tool,
1490 				union perf_event *event,
1491 				struct evlist **pevlist)
1492 {
1493 	int ret;
1494 
1495 	ret = perf_event__process_event_update(tool, event, pevlist);
1496 	if (ret < 0)
1497 		return ret;
1498 
1499 	/* this can return -EEXIST since we call it for each evsel */
1500 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1501 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1502 	return 0;
1503 }
1504 
1505 typedef int (*tracepoint_handler)(struct evsel *evsel,
1506 				  struct perf_sample *sample);
1507 
process_sample_event(struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1508 static int process_sample_event(struct perf_tool *tool __maybe_unused,
1509 				union perf_event *event,
1510 				struct perf_sample *sample,
1511 				struct evsel *evsel,
1512 				struct machine *machine)
1513 {
1514 	int err = 0;
1515 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1516 							sample->tid);
1517 
1518 	if (thread == NULL) {
1519 		pr_debug("problem processing %d event, skipping it.\n",
1520 			event->header.type);
1521 		return -1;
1522 	}
1523 
1524 	if (evsel->handler != NULL) {
1525 		tracepoint_handler f = evsel->handler;
1526 		err = f(evsel, sample);
1527 	}
1528 
1529 	thread__put(thread);
1530 
1531 	return err;
1532 }
1533 
combine_result(void)1534 static void combine_result(void)
1535 {
1536 	unsigned int i;
1537 	struct lock_stat *st;
1538 
1539 	if (!combine_locks)
1540 		return;
1541 
1542 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1543 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1544 			combine_lock_stats(st);
1545 		}
1546 	}
1547 }
1548 
sort_result(void)1549 static void sort_result(void)
1550 {
1551 	unsigned int i;
1552 	struct lock_stat *st;
1553 
1554 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1555 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1556 			insert_to_result(st, compare);
1557 		}
1558 	}
1559 }
1560 
1561 static const struct {
1562 	unsigned int flags;
1563 	const char *str;
1564 	const char *name;
1565 } lock_type_table[] = {
1566 	{ 0,				"semaphore",	"semaphore" },
1567 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1568 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1569 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1570 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1571 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1572 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1573 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1574 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1575 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1576 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1577 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1578 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1579 	/* alias for get_type_flag() */
1580 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex-spin",	"mutex" },
1581 };
1582 
get_type_str(unsigned int flags)1583 static const char *get_type_str(unsigned int flags)
1584 {
1585 	flags &= LCB_F_MAX_FLAGS - 1;
1586 
1587 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1588 		if (lock_type_table[i].flags == flags)
1589 			return lock_type_table[i].str;
1590 	}
1591 	return "unknown";
1592 }
1593 
get_type_name(unsigned int flags)1594 static const char *get_type_name(unsigned int flags)
1595 {
1596 	flags &= LCB_F_MAX_FLAGS - 1;
1597 
1598 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1599 		if (lock_type_table[i].flags == flags)
1600 			return lock_type_table[i].name;
1601 	}
1602 	return "unknown";
1603 }
1604 
get_type_flag(const char * str)1605 static unsigned int get_type_flag(const char *str)
1606 {
1607 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1608 		if (!strcmp(lock_type_table[i].name, str))
1609 			return lock_type_table[i].flags;
1610 	}
1611 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1612 		if (!strcmp(lock_type_table[i].str, str))
1613 			return lock_type_table[i].flags;
1614 	}
1615 	return UINT_MAX;
1616 }
1617 
lock_filter_finish(void)1618 static void lock_filter_finish(void)
1619 {
1620 	zfree(&filters.types);
1621 	filters.nr_types = 0;
1622 
1623 	zfree(&filters.addrs);
1624 	filters.nr_addrs = 0;
1625 
1626 	for (int i = 0; i < filters.nr_syms; i++)
1627 		free(filters.syms[i]);
1628 
1629 	zfree(&filters.syms);
1630 	filters.nr_syms = 0;
1631 }
1632 
sort_contention_result(void)1633 static void sort_contention_result(void)
1634 {
1635 	sort_result();
1636 }
1637 
print_header_stdio(void)1638 static void print_header_stdio(void)
1639 {
1640 	struct lock_key *key;
1641 
1642 	list_for_each_entry(key, &lock_keys, list)
1643 		fprintf(lock_output, "%*s ", key->len, key->header);
1644 
1645 	switch (aggr_mode) {
1646 	case LOCK_AGGR_TASK:
1647 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1648 			show_lock_owner ? "owner" : "comm");
1649 		break;
1650 	case LOCK_AGGR_CALLER:
1651 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1652 		break;
1653 	case LOCK_AGGR_ADDR:
1654 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1655 		break;
1656 	default:
1657 		break;
1658 	}
1659 }
1660 
print_header_csv(const char * sep)1661 static void print_header_csv(const char *sep)
1662 {
1663 	struct lock_key *key;
1664 
1665 	fprintf(lock_output, "# output: ");
1666 	list_for_each_entry(key, &lock_keys, list)
1667 		fprintf(lock_output, "%s%s ", key->header, sep);
1668 
1669 	switch (aggr_mode) {
1670 	case LOCK_AGGR_TASK:
1671 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1672 			show_lock_owner ? "owner" : "comm");
1673 		break;
1674 	case LOCK_AGGR_CALLER:
1675 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1676 		if (verbose > 0)
1677 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1678 		fprintf(lock_output, "\n");
1679 		break;
1680 	case LOCK_AGGR_ADDR:
1681 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1682 		break;
1683 	default:
1684 		break;
1685 	}
1686 }
1687 
print_header(void)1688 static void print_header(void)
1689 {
1690 	if (!quiet) {
1691 		if (symbol_conf.field_sep)
1692 			print_header_csv(symbol_conf.field_sep);
1693 		else
1694 			print_header_stdio();
1695 	}
1696 }
1697 
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1698 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1699 {
1700 	struct lock_key *key;
1701 	struct thread *t;
1702 	int pid;
1703 
1704 	list_for_each_entry(key, &lock_keys, list) {
1705 		key->print(key, st);
1706 		fprintf(lock_output, " ");
1707 	}
1708 
1709 	switch (aggr_mode) {
1710 	case LOCK_AGGR_CALLER:
1711 		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1712 		break;
1713 	case LOCK_AGGR_TASK:
1714 		pid = st->addr;
1715 		t = perf_session__findnew(session, pid);
1716 		fprintf(lock_output, "  %10d   %s\n",
1717 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1718 		break;
1719 	case LOCK_AGGR_ADDR:
1720 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1721 			st->name, get_type_name(st->flags));
1722 		break;
1723 	default:
1724 		break;
1725 	}
1726 
1727 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1728 		struct map *kmap;
1729 		struct symbol *sym;
1730 		char buf[128];
1731 		u64 ip;
1732 
1733 		for (int i = 0; i < max_stack_depth; i++) {
1734 			if (!st->callstack || !st->callstack[i])
1735 				break;
1736 
1737 			ip = st->callstack[i];
1738 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1739 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1740 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1741 		}
1742 	}
1743 }
1744 
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1745 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1746 				const char *sep)
1747 {
1748 	struct lock_key *key;
1749 	struct thread *t;
1750 	int pid;
1751 
1752 	list_for_each_entry(key, &lock_keys, list) {
1753 		key->print(key, st);
1754 		fprintf(lock_output, "%s ", sep);
1755 	}
1756 
1757 	switch (aggr_mode) {
1758 	case LOCK_AGGR_CALLER:
1759 		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1760 		if (verbose <= 0)
1761 			fprintf(lock_output, "\n");
1762 		break;
1763 	case LOCK_AGGR_TASK:
1764 		pid = st->addr;
1765 		t = perf_session__findnew(session, pid);
1766 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1767 			pid == -1 ? "Unknown" : thread__comm_str(t));
1768 		break;
1769 	case LOCK_AGGR_ADDR:
1770 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1771 			st->name, sep, get_type_name(st->flags));
1772 		break;
1773 	default:
1774 		break;
1775 	}
1776 
1777 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1778 		struct map *kmap;
1779 		struct symbol *sym;
1780 		char buf[128];
1781 		u64 ip;
1782 
1783 		for (int i = 0; i < max_stack_depth; i++) {
1784 			if (!st->callstack || !st->callstack[i])
1785 				break;
1786 
1787 			ip = st->callstack[i];
1788 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1789 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1790 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1791 		}
1792 		fprintf(lock_output, "\n");
1793 	}
1794 }
1795 
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1796 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1797 {
1798 	if (symbol_conf.field_sep)
1799 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1800 	else
1801 		print_lock_stat_stdio(con, st);
1802 }
1803 
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1804 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1805 {
1806 	/* Output for debug, this have to be removed */
1807 	int broken = fails->task + fails->stack + fails->time + fails->data;
1808 
1809 	if (!use_bpf)
1810 		print_bad_events(bad, total);
1811 
1812 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1813 		return;
1814 
1815 	total += broken;
1816 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1817 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1818 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1819 
1820 	fprintf(lock_output, "histogram of failure reasons\n");
1821 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1822 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1823 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1824 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1825 }
1826 
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1827 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1828 			     const char *sep)
1829 {
1830 	/* Output for debug, this have to be removed */
1831 	if (use_bpf)
1832 		bad = fails->task + fails->stack + fails->time + fails->data;
1833 
1834 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1835 		return;
1836 
1837 	total += bad;
1838 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1839 
1840 	if (use_bpf) {
1841 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1842 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1843 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1844 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1845 	} else {
1846 		int i;
1847 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1848 
1849 		for (i = 0; i < BROKEN_MAX; i++)
1850 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1851 	}
1852 	fprintf(lock_output, "\n");
1853 }
1854 
print_footer(int total,int bad,struct lock_contention_fails * fails)1855 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1856 {
1857 	if (symbol_conf.field_sep)
1858 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1859 	else
1860 		print_footer_stdio(total, bad, fails);
1861 }
1862 
print_contention_result(struct lock_contention * con)1863 static void print_contention_result(struct lock_contention *con)
1864 {
1865 	struct lock_stat *st;
1866 	int bad, total, printed;
1867 
1868 	if (!quiet)
1869 		print_header();
1870 
1871 	bad = total = printed = 0;
1872 
1873 	while ((st = pop_from_result())) {
1874 		total += use_bpf ? st->nr_contended : 1;
1875 		if (st->broken)
1876 			bad++;
1877 
1878 		if (!st->wait_time_total)
1879 			continue;
1880 
1881 		print_lock_stat(con, st);
1882 
1883 		if (++printed >= print_nr_entries)
1884 			break;
1885 	}
1886 
1887 	if (print_nr_entries) {
1888 		/* update the total/bad stats */
1889 		while ((st = pop_from_result())) {
1890 			total += use_bpf ? st->nr_contended : 1;
1891 			if (st->broken)
1892 				bad++;
1893 		}
1894 	}
1895 	/* some entries are collected but hidden by the callstack filter */
1896 	total += con->nr_filtered;
1897 
1898 	print_footer(total, bad, &con->fails);
1899 }
1900 
1901 static bool force;
1902 
__cmd_report(bool display_info)1903 static int __cmd_report(bool display_info)
1904 {
1905 	int err = -EINVAL;
1906 	struct perf_tool eops = {
1907 		.attr		 = perf_event__process_attr,
1908 		.event_update	 = process_event_update,
1909 		.sample		 = process_sample_event,
1910 		.comm		 = perf_event__process_comm,
1911 		.mmap		 = perf_event__process_mmap,
1912 		.namespaces	 = perf_event__process_namespaces,
1913 		.tracing_data	 = perf_event__process_tracing_data,
1914 		.ordered_events	 = true,
1915 	};
1916 	struct perf_data data = {
1917 		.path  = input_name,
1918 		.mode  = PERF_DATA_MODE_READ,
1919 		.force = force,
1920 	};
1921 
1922 	session = perf_session__new(&data, &eops);
1923 	if (IS_ERR(session)) {
1924 		pr_err("Initializing perf session failed\n");
1925 		return PTR_ERR(session);
1926 	}
1927 
1928 	symbol_conf.allow_aliases = true;
1929 	symbol__init(&session->header.env);
1930 
1931 	if (!data.is_pipe) {
1932 		if (!perf_session__has_traces(session, "lock record"))
1933 			goto out_delete;
1934 
1935 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1936 			pr_err("Initializing perf session tracepoint handlers failed\n");
1937 			goto out_delete;
1938 		}
1939 
1940 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1941 			pr_err("Initializing perf session tracepoint handlers failed\n");
1942 			goto out_delete;
1943 		}
1944 	}
1945 
1946 	if (setup_output_field(false, output_fields))
1947 		goto out_delete;
1948 
1949 	if (select_key(false))
1950 		goto out_delete;
1951 
1952 	if (show_thread_stats)
1953 		aggr_mode = LOCK_AGGR_TASK;
1954 
1955 	err = perf_session__process_events(session);
1956 	if (err)
1957 		goto out_delete;
1958 
1959 	setup_pager();
1960 	if (display_info) /* used for info subcommand */
1961 		err = dump_info();
1962 	else {
1963 		combine_result();
1964 		sort_result();
1965 		print_result();
1966 	}
1967 
1968 out_delete:
1969 	perf_session__delete(session);
1970 	return err;
1971 }
1972 
sighandler(int sig __maybe_unused)1973 static void sighandler(int sig __maybe_unused)
1974 {
1975 }
1976 
check_lock_contention_options(const struct option * options,const char * const * usage)1977 static int check_lock_contention_options(const struct option *options,
1978 					 const char * const *usage)
1979 
1980 {
1981 	if (show_thread_stats && show_lock_addrs) {
1982 		pr_err("Cannot use thread and addr mode together\n");
1983 		parse_options_usage(usage, options, "threads", 0);
1984 		parse_options_usage(NULL, options, "lock-addr", 0);
1985 		return -1;
1986 	}
1987 
1988 	if (show_lock_owner && !use_bpf) {
1989 		pr_err("Lock owners are available only with BPF\n");
1990 		parse_options_usage(usage, options, "lock-owner", 0);
1991 		parse_options_usage(NULL, options, "use-bpf", 0);
1992 		return -1;
1993 	}
1994 
1995 	if (show_lock_owner && show_lock_addrs) {
1996 		pr_err("Cannot use owner and addr mode together\n");
1997 		parse_options_usage(usage, options, "lock-owner", 0);
1998 		parse_options_usage(NULL, options, "lock-addr", 0);
1999 		return -1;
2000 	}
2001 
2002 	if (symbol_conf.field_sep) {
2003 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
2004 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
2005 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
2006 			pr_err("Cannot use the separator that is already used\n");
2007 			parse_options_usage(usage, options, "x", 1);
2008 			return -1;
2009 		}
2010 	}
2011 
2012 	if (show_lock_owner)
2013 		show_thread_stats = true;
2014 
2015 	return 0;
2016 }
2017 
__cmd_contention(int argc,const char ** argv)2018 static int __cmd_contention(int argc, const char **argv)
2019 {
2020 	int err = -EINVAL;
2021 	struct perf_tool eops = {
2022 		.attr		 = perf_event__process_attr,
2023 		.event_update	 = process_event_update,
2024 		.sample		 = process_sample_event,
2025 		.comm		 = perf_event__process_comm,
2026 		.mmap		 = perf_event__process_mmap,
2027 		.tracing_data	 = perf_event__process_tracing_data,
2028 		.ordered_events	 = true,
2029 	};
2030 	struct perf_data data = {
2031 		.path  = input_name,
2032 		.mode  = PERF_DATA_MODE_READ,
2033 		.force = force,
2034 	};
2035 	struct lock_contention con = {
2036 		.target = &target,
2037 		.map_nr_entries = bpf_map_entries,
2038 		.max_stack = max_stack_depth,
2039 		.stack_skip = stack_skip,
2040 		.filters = &filters,
2041 		.save_callstack = needs_callstack(),
2042 		.owner = show_lock_owner,
2043 	};
2044 
2045 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2046 	if (!lockhash_table)
2047 		return -ENOMEM;
2048 
2049 	con.result = &lockhash_table[0];
2050 
2051 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
2052 	if (IS_ERR(session)) {
2053 		pr_err("Initializing perf session failed\n");
2054 		err = PTR_ERR(session);
2055 		session = NULL;
2056 		goto out_delete;
2057 	}
2058 
2059 	con.machine = &session->machines.host;
2060 
2061 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2062 		show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
2063 
2064 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2065 		con.save_callstack = true;
2066 
2067 	symbol_conf.allow_aliases = true;
2068 	symbol__init(&session->header.env);
2069 
2070 	if (use_bpf) {
2071 		err = target__validate(&target);
2072 		if (err) {
2073 			char errbuf[512];
2074 
2075 			target__strerror(&target, err, errbuf, 512);
2076 			pr_err("%s\n", errbuf);
2077 			goto out_delete;
2078 		}
2079 
2080 		signal(SIGINT, sighandler);
2081 		signal(SIGCHLD, sighandler);
2082 		signal(SIGTERM, sighandler);
2083 
2084 		con.evlist = evlist__new();
2085 		if (con.evlist == NULL) {
2086 			err = -ENOMEM;
2087 			goto out_delete;
2088 		}
2089 
2090 		err = evlist__create_maps(con.evlist, &target);
2091 		if (err < 0)
2092 			goto out_delete;
2093 
2094 		if (argc) {
2095 			err = evlist__prepare_workload(con.evlist, &target,
2096 						       argv, false, NULL);
2097 			if (err < 0)
2098 				goto out_delete;
2099 		}
2100 
2101 		if (lock_contention_prepare(&con) < 0) {
2102 			pr_err("lock contention BPF setup failed\n");
2103 			goto out_delete;
2104 		}
2105 	} else if (!data.is_pipe) {
2106 		if (!perf_session__has_traces(session, "lock record"))
2107 			goto out_delete;
2108 
2109 		if (!evlist__find_evsel_by_str(session->evlist,
2110 					       "lock:contention_begin")) {
2111 			pr_err("lock contention evsel not found\n");
2112 			goto out_delete;
2113 		}
2114 
2115 		if (perf_session__set_tracepoints_handlers(session,
2116 						contention_tracepoints)) {
2117 			pr_err("Initializing perf session tracepoint handlers failed\n");
2118 			goto out_delete;
2119 		}
2120 	}
2121 
2122 	if (setup_output_field(true, output_fields))
2123 		goto out_delete;
2124 
2125 	if (select_key(true))
2126 		goto out_delete;
2127 
2128 	if (symbol_conf.field_sep) {
2129 		int i;
2130 		struct lock_key *keys = contention_keys;
2131 
2132 		/* do not align output in CSV format */
2133 		for (i = 0; keys[i].name; i++)
2134 			keys[i].len = 0;
2135 	}
2136 
2137 	if (use_bpf) {
2138 		lock_contention_start();
2139 		if (argc)
2140 			evlist__start_workload(con.evlist);
2141 
2142 		/* wait for signal */
2143 		pause();
2144 
2145 		lock_contention_stop();
2146 		lock_contention_read(&con);
2147 	} else {
2148 		err = perf_session__process_events(session);
2149 		if (err)
2150 			goto out_delete;
2151 	}
2152 
2153 	setup_pager();
2154 
2155 	sort_contention_result();
2156 	print_contention_result(&con);
2157 
2158 out_delete:
2159 	lock_filter_finish();
2160 	evlist__delete(con.evlist);
2161 	lock_contention_finish();
2162 	perf_session__delete(session);
2163 	zfree(&lockhash_table);
2164 	return err;
2165 }
2166 
2167 
__cmd_record(int argc,const char ** argv)2168 static int __cmd_record(int argc, const char **argv)
2169 {
2170 	const char *record_args[] = {
2171 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2172 	};
2173 	const char *callgraph_args[] = {
2174 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2175 	};
2176 	unsigned int rec_argc, i, j, ret;
2177 	unsigned int nr_tracepoints;
2178 	unsigned int nr_callgraph_args = 0;
2179 	const char **rec_argv;
2180 	bool has_lock_stat = true;
2181 
2182 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2183 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2184 			pr_debug("tracepoint %s is not enabled. "
2185 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2186 				 lock_tracepoints[i].name);
2187 			has_lock_stat = false;
2188 			break;
2189 		}
2190 	}
2191 
2192 	if (has_lock_stat)
2193 		goto setup_args;
2194 
2195 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2196 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2197 			pr_err("tracepoint %s is not enabled.\n",
2198 			       contention_tracepoints[i].name);
2199 			return 1;
2200 		}
2201 	}
2202 
2203 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2204 
2205 setup_args:
2206 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2207 
2208 	if (has_lock_stat)
2209 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2210 	else
2211 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2212 
2213 	/* factor of 2 is for -e in front of each tracepoint */
2214 	rec_argc += 2 * nr_tracepoints;
2215 
2216 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2217 	if (!rec_argv)
2218 		return -ENOMEM;
2219 
2220 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2221 		rec_argv[i] = strdup(record_args[i]);
2222 
2223 	for (j = 0; j < nr_tracepoints; j++) {
2224 		const char *ev_name;
2225 
2226 		if (has_lock_stat)
2227 			ev_name = strdup(lock_tracepoints[j].name);
2228 		else
2229 			ev_name = strdup(contention_tracepoints[j].name);
2230 
2231 		if (!ev_name)
2232 			return -ENOMEM;
2233 
2234 		rec_argv[i++] = "-e";
2235 		rec_argv[i++] = ev_name;
2236 	}
2237 
2238 	for (j = 0; j < nr_callgraph_args; j++, i++)
2239 		rec_argv[i] = callgraph_args[j];
2240 
2241 	for (j = 1; j < (unsigned int)argc; j++, i++)
2242 		rec_argv[i] = argv[j];
2243 
2244 	BUG_ON(i != rec_argc);
2245 
2246 	ret = cmd_record(i, rec_argv);
2247 	free(rec_argv);
2248 	return ret;
2249 }
2250 
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2251 static int parse_map_entry(const struct option *opt, const char *str,
2252 			    int unset __maybe_unused)
2253 {
2254 	unsigned long *len = (unsigned long *)opt->value;
2255 	unsigned long val;
2256 	char *endptr;
2257 
2258 	errno = 0;
2259 	val = strtoul(str, &endptr, 0);
2260 	if (*endptr != '\0' || errno != 0) {
2261 		pr_err("invalid BPF map length: %s\n", str);
2262 		return -1;
2263 	}
2264 
2265 	*len = val;
2266 	return 0;
2267 }
2268 
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2269 static int parse_max_stack(const struct option *opt, const char *str,
2270 			   int unset __maybe_unused)
2271 {
2272 	unsigned long *len = (unsigned long *)opt->value;
2273 	long val;
2274 	char *endptr;
2275 
2276 	errno = 0;
2277 	val = strtol(str, &endptr, 0);
2278 	if (*endptr != '\0' || errno != 0) {
2279 		pr_err("invalid max stack depth: %s\n", str);
2280 		return -1;
2281 	}
2282 
2283 	if (val < 0 || val > sysctl__max_stack()) {
2284 		pr_err("invalid max stack depth: %ld\n", val);
2285 		return -1;
2286 	}
2287 
2288 	*len = val;
2289 	return 0;
2290 }
2291 
add_lock_type(unsigned int flags)2292 static bool add_lock_type(unsigned int flags)
2293 {
2294 	unsigned int *tmp;
2295 
2296 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2297 	if (tmp == NULL)
2298 		return false;
2299 
2300 	tmp[filters.nr_types++] = flags;
2301 	filters.types = tmp;
2302 	return true;
2303 }
2304 
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2305 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2306 			   int unset __maybe_unused)
2307 {
2308 	char *s, *tmp, *tok;
2309 	int ret = 0;
2310 
2311 	s = strdup(str);
2312 	if (s == NULL)
2313 		return -1;
2314 
2315 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2316 		unsigned int flags = get_type_flag(tok);
2317 
2318 		if (flags == -1U) {
2319 			pr_err("Unknown lock flags: %s\n", tok);
2320 			ret = -1;
2321 			break;
2322 		}
2323 
2324 		if (!add_lock_type(flags)) {
2325 			ret = -1;
2326 			break;
2327 		}
2328 	}
2329 
2330 	free(s);
2331 	return ret;
2332 }
2333 
add_lock_addr(unsigned long addr)2334 static bool add_lock_addr(unsigned long addr)
2335 {
2336 	unsigned long *tmp;
2337 
2338 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2339 	if (tmp == NULL) {
2340 		pr_err("Memory allocation failure\n");
2341 		return false;
2342 	}
2343 
2344 	tmp[filters.nr_addrs++] = addr;
2345 	filters.addrs = tmp;
2346 	return true;
2347 }
2348 
add_lock_sym(char * name)2349 static bool add_lock_sym(char *name)
2350 {
2351 	char **tmp;
2352 	char *sym = strdup(name);
2353 
2354 	if (sym == NULL) {
2355 		pr_err("Memory allocation failure\n");
2356 		return false;
2357 	}
2358 
2359 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2360 	if (tmp == NULL) {
2361 		pr_err("Memory allocation failure\n");
2362 		free(sym);
2363 		return false;
2364 	}
2365 
2366 	tmp[filters.nr_syms++] = sym;
2367 	filters.syms = tmp;
2368 	return true;
2369 }
2370 
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2371 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2372 			   int unset __maybe_unused)
2373 {
2374 	char *s, *tmp, *tok;
2375 	int ret = 0;
2376 	u64 addr;
2377 
2378 	s = strdup(str);
2379 	if (s == NULL)
2380 		return -1;
2381 
2382 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2383 		char *end;
2384 
2385 		addr = strtoul(tok, &end, 16);
2386 		if (*end == '\0') {
2387 			if (!add_lock_addr(addr)) {
2388 				ret = -1;
2389 				break;
2390 			}
2391 			continue;
2392 		}
2393 
2394 		/*
2395 		 * At this moment, we don't have kernel symbols.  Save the symbols
2396 		 * in a separate list and resolve them to addresses later.
2397 		 */
2398 		if (!add_lock_sym(tok)) {
2399 			ret = -1;
2400 			break;
2401 		}
2402 	}
2403 
2404 	free(s);
2405 	return ret;
2406 }
2407 
parse_call_stack(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2408 static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
2409 			   int unset __maybe_unused)
2410 {
2411 	char *s, *tmp, *tok;
2412 	int ret = 0;
2413 
2414 	s = strdup(str);
2415 	if (s == NULL)
2416 		return -1;
2417 
2418 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2419 		struct callstack_filter *entry;
2420 
2421 		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
2422 		if (entry == NULL) {
2423 			pr_err("Memory allocation failure\n");
2424 			return -1;
2425 		}
2426 
2427 		strcpy(entry->name, tok);
2428 		list_add_tail(&entry->list, &callstack_filters);
2429 	}
2430 
2431 	free(s);
2432 	return ret;
2433 }
2434 
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2435 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2436 			int unset __maybe_unused)
2437 {
2438 	const char **name = (const char **)opt->value;
2439 
2440 	if (str == NULL)
2441 		return -1;
2442 
2443 	lock_output = fopen(str, "w");
2444 	if (lock_output == NULL) {
2445 		pr_err("Cannot open %s\n", str);
2446 		return -1;
2447 	}
2448 
2449 	*name = str;
2450 	return 0;
2451 }
2452 
cmd_lock(int argc,const char ** argv)2453 int cmd_lock(int argc, const char **argv)
2454 {
2455 	const struct option lock_options[] = {
2456 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2457 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2458 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2459 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2460 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2461 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2462 		   "file", "vmlinux pathname"),
2463 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2464 		   "file", "kallsyms pathname"),
2465 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2466 	OPT_END()
2467 	};
2468 
2469 	const struct option info_options[] = {
2470 	OPT_BOOLEAN('t', "threads", &info_threads,
2471 		    "dump thread list in perf.data"),
2472 	OPT_BOOLEAN('m', "map", &info_map,
2473 		    "map of lock instances (address:name table)"),
2474 	OPT_PARENT(lock_options)
2475 	};
2476 
2477 	const struct option report_options[] = {
2478 	OPT_STRING('k', "key", &sort_key, "acquired",
2479 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2480 	OPT_STRING('F', "field", &output_fields, NULL,
2481 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2482 	/* TODO: type */
2483 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2484 		    "combine locks in the same class"),
2485 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2486 		    "show per-thread lock stats"),
2487 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2488 	OPT_PARENT(lock_options)
2489 	};
2490 
2491 	struct option contention_options[] = {
2492 	OPT_STRING('k', "key", &sort_key, "wait_total",
2493 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2494 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2495 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2496 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2497 		    "show per-thread lock stats"),
2498 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2499 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2500 		    "System-wide collection from all CPUs"),
2501 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2502 		    "List of cpus to monitor"),
2503 	OPT_STRING('p', "pid", &target.pid, "pid",
2504 		   "Trace on existing process id"),
2505 	OPT_STRING(0, "tid", &target.tid, "tid",
2506 		   "Trace on existing thread id (exclusive to --pid)"),
2507 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2508 		     "Max number of BPF map entries", parse_map_entry),
2509 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2510 		     "Set the maximum stack depth when collecting lock contention, "
2511 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2512 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2513 		    "Set the number of stack depth to skip when finding a lock caller, "
2514 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2515 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2516 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2517 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2518 		     "Filter specific type of locks", parse_lock_type),
2519 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2520 		     "Filter specific address/symbol of locks", parse_lock_addr),
2521 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2522 		     "Filter specific function in the callstack", parse_call_stack),
2523 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2524 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2525 		   "print result in CSV format with custom separator"),
2526 	OPT_PARENT(lock_options)
2527 	};
2528 
2529 	const char * const info_usage[] = {
2530 		"perf lock info [<options>]",
2531 		NULL
2532 	};
2533 	const char *const lock_subcommands[] = { "record", "report", "script",
2534 						 "info", "contention", NULL };
2535 	const char *lock_usage[] = {
2536 		NULL,
2537 		NULL
2538 	};
2539 	const char * const report_usage[] = {
2540 		"perf lock report [<options>]",
2541 		NULL
2542 	};
2543 	const char * const contention_usage[] = {
2544 		"perf lock contention [<options>]",
2545 		NULL
2546 	};
2547 	unsigned int i;
2548 	int rc = 0;
2549 
2550 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2551 	if (!lockhash_table)
2552 		return -ENOMEM;
2553 
2554 	for (i = 0; i < LOCKHASH_SIZE; i++)
2555 		INIT_HLIST_HEAD(lockhash_table + i);
2556 
2557 	lock_output = stderr;
2558 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2559 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2560 	if (!argc)
2561 		usage_with_options(lock_usage, lock_options);
2562 
2563 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2564 		return __cmd_record(argc, argv);
2565 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2566 		trace_handler = &report_lock_ops;
2567 		if (argc) {
2568 			argc = parse_options(argc, argv,
2569 					     report_options, report_usage, 0);
2570 			if (argc)
2571 				usage_with_options(report_usage, report_options);
2572 		}
2573 		rc = __cmd_report(false);
2574 	} else if (!strcmp(argv[0], "script")) {
2575 		/* Aliased to 'perf script' */
2576 		rc = cmd_script(argc, argv);
2577 	} else if (!strcmp(argv[0], "info")) {
2578 		if (argc) {
2579 			argc = parse_options(argc, argv,
2580 					     info_options, info_usage, 0);
2581 			if (argc)
2582 				usage_with_options(info_usage, info_options);
2583 		}
2584 		/* recycling report_lock_ops */
2585 		trace_handler = &report_lock_ops;
2586 		rc = __cmd_report(true);
2587 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2588 		trace_handler = &contention_lock_ops;
2589 		sort_key = "wait_total";
2590 		output_fields = "contended,wait_total,wait_max,avg_wait";
2591 
2592 #ifndef HAVE_BPF_SKEL
2593 		set_option_nobuild(contention_options, 'b', "use-bpf",
2594 				   "no BUILD_BPF_SKEL=1", false);
2595 #endif
2596 		if (argc) {
2597 			argc = parse_options(argc, argv, contention_options,
2598 					     contention_usage, 0);
2599 		}
2600 
2601 		if (check_lock_contention_options(contention_options,
2602 						  contention_usage) < 0)
2603 			return -1;
2604 
2605 		rc = __cmd_contention(argc, argv);
2606 	} else {
2607 		usage_with_options(lock_usage, lock_options);
2608 	}
2609 
2610 	zfree(&lockhash_table);
2611 	return rc;
2612 }
2613