1 /* Copyright (c) 2016 Facebook
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include "bpf_helpers.h"
9 #include <uapi/linux/ptrace.h>
10 #include <uapi/linux/perf_event.h>
11 #include <linux/version.h>
12 #include <linux/sched.h>
13 
14 #define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
15 
16 #define MINBLOCK_US	1
17 
18 struct key_t {
19 	char waker[TASK_COMM_LEN];
20 	char target[TASK_COMM_LEN];
21 	u32 wret;
22 	u32 tret;
23 };
24 
25 struct bpf_map_def SEC("maps") counts = {
26 	.type = BPF_MAP_TYPE_HASH,
27 	.key_size = sizeof(struct key_t),
28 	.value_size = sizeof(u64),
29 	.max_entries = 10000,
30 };
31 
32 struct bpf_map_def SEC("maps") start = {
33 	.type = BPF_MAP_TYPE_HASH,
34 	.key_size = sizeof(u32),
35 	.value_size = sizeof(u64),
36 	.max_entries = 10000,
37 };
38 
39 struct wokeby_t {
40 	char name[TASK_COMM_LEN];
41 	u32 ret;
42 };
43 
44 struct bpf_map_def SEC("maps") wokeby = {
45 	.type = BPF_MAP_TYPE_HASH,
46 	.key_size = sizeof(u32),
47 	.value_size = sizeof(struct wokeby_t),
48 	.max_entries = 10000,
49 };
50 
51 struct bpf_map_def SEC("maps") stackmap = {
52 	.type = BPF_MAP_TYPE_STACK_TRACE,
53 	.key_size = sizeof(u32),
54 	.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
55 	.max_entries = 10000,
56 };
57 
58 #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
59 
60 SEC("kprobe/try_to_wake_up")
waker(struct pt_regs * ctx)61 int waker(struct pt_regs *ctx)
62 {
63 	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
64 	struct wokeby_t woke;
65 	u32 pid;
66 
67 	pid = _(p->pid);
68 
69 	bpf_get_current_comm(&woke.name, sizeof(woke.name));
70 	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
71 
72 	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
73 	return 0;
74 }
75 
update_counts(void * ctx,u32 pid,u64 delta)76 static inline int update_counts(void *ctx, u32 pid, u64 delta)
77 {
78 	struct wokeby_t *woke;
79 	u64 zero = 0, *val;
80 	struct key_t key;
81 
82 	__builtin_memset(&key.waker, 0, sizeof(key.waker));
83 	bpf_get_current_comm(&key.target, sizeof(key.target));
84 	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
85 	key.wret = 0;
86 
87 	woke = bpf_map_lookup_elem(&wokeby, &pid);
88 	if (woke) {
89 		key.wret = woke->ret;
90 		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
91 		bpf_map_delete_elem(&wokeby, &pid);
92 	}
93 
94 	val = bpf_map_lookup_elem(&counts, &key);
95 	if (!val) {
96 		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
97 		val = bpf_map_lookup_elem(&counts, &key);
98 		if (!val)
99 			return 0;
100 	}
101 	(*val) += delta;
102 	return 0;
103 }
104 
105 #if 1
106 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
107 struct sched_switch_args {
108 	unsigned long long pad;
109 	char prev_comm[16];
110 	int prev_pid;
111 	int prev_prio;
112 	long long prev_state;
113 	char next_comm[16];
114 	int next_pid;
115 	int next_prio;
116 };
117 SEC("tracepoint/sched/sched_switch")
oncpu(struct sched_switch_args * ctx)118 int oncpu(struct sched_switch_args *ctx)
119 {
120 	/* record previous thread sleep time */
121 	u32 pid = ctx->prev_pid;
122 #else
123 SEC("kprobe/finish_task_switch")
124 int oncpu(struct pt_regs *ctx)
125 {
126 	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
127 	/* record previous thread sleep time */
128 	u32 pid = _(p->pid);
129 #endif
130 	u64 delta, ts, *tsp;
131 
132 	ts = bpf_ktime_get_ns();
133 	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
134 
135 	/* calculate current thread's delta time */
136 	pid = bpf_get_current_pid_tgid();
137 	tsp = bpf_map_lookup_elem(&start, &pid);
138 	if (!tsp)
139 		/* missed start or filtered */
140 		return 0;
141 
142 	delta = bpf_ktime_get_ns() - *tsp;
143 	bpf_map_delete_elem(&start, &pid);
144 	delta = delta / 1000;
145 	if (delta < MINBLOCK_US)
146 		return 0;
147 
148 	return update_counts(ctx, pid, delta);
149 }
150 char _license[] SEC("license") = "GPL";
151 u32 _version SEC("version") = LINUX_VERSION_CODE;
152