1 #ifndef _LINUX_TRACEPOINT_H
2 #define _LINUX_TRACEPOINT_H
3 
4 /*
5  * Kernel Tracepoint API.
6  *
7  * See Documentation/trace/tracepoints.rst.
8  *
9  * Copyright (C) 2008-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
10  *
11  * Heavily inspired from the Linux Kernel Markers.
12  *
13  * This file is released under the GPLv2.
14  * See the file COPYING for more details.
15  */
16 
17 #include <linux/smp.h>
18 #include <linux/srcu.h>
19 #include <linux/errno.h>
20 #include <linux/types.h>
21 #include <linux/cpumask.h>
22 #include <linux/rcupdate.h>
23 #include <linux/tracepoint-defs.h>
24 
25 struct module;
26 struct tracepoint;
27 struct notifier_block;
28 
29 struct trace_eval_map {
30 	const char		*system;
31 	const char		*eval_string;
32 	unsigned long		eval_value;
33 };
34 
35 #define TRACEPOINT_DEFAULT_PRIO	10
36 
37 extern struct srcu_struct tracepoint_srcu;
38 
39 extern int
40 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
41 extern int
42 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
43 			       int prio);
44 extern int
45 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
46 extern void
47 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
48 		void *priv);
49 
50 #ifdef CONFIG_MODULES
51 struct tp_module {
52 	struct list_head list;
53 	struct module *mod;
54 };
55 
56 bool trace_module_has_bad_taint(struct module *mod);
57 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
58 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
59 #else
trace_module_has_bad_taint(struct module * mod)60 static inline bool trace_module_has_bad_taint(struct module *mod)
61 {
62 	return false;
63 }
64 static inline
register_tracepoint_module_notifier(struct notifier_block * nb)65 int register_tracepoint_module_notifier(struct notifier_block *nb)
66 {
67 	return 0;
68 }
69 static inline
unregister_tracepoint_module_notifier(struct notifier_block * nb)70 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
71 {
72 	return 0;
73 }
74 #endif /* CONFIG_MODULES */
75 
76 /*
77  * tracepoint_synchronize_unregister must be called between the last tracepoint
78  * probe unregistration and the end of module exit to make sure there is no
79  * caller executing a probe when it is freed.
80  */
81 #ifdef CONFIG_TRACEPOINTS
tracepoint_synchronize_unregister(void)82 static inline void tracepoint_synchronize_unregister(void)
83 {
84 	synchronize_srcu(&tracepoint_srcu);
85 	synchronize_sched();
86 }
87 #else
tracepoint_synchronize_unregister(void)88 static inline void tracepoint_synchronize_unregister(void)
89 { }
90 #endif
91 
92 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
93 extern int syscall_regfunc(void);
94 extern void syscall_unregfunc(void);
95 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
96 
97 #define PARAMS(args...) args
98 
99 #define TRACE_DEFINE_ENUM(x)
100 #define TRACE_DEFINE_SIZEOF(x)
101 
102 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
tracepoint_ptr_deref(tracepoint_ptr_t * p)103 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
104 {
105 	return offset_to_ptr(p);
106 }
107 
108 #define __TRACEPOINT_ENTRY(name)					\
109 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
110 	    "	.balign 4					\n"	\
111 	    "	.long 	__tracepoint_" #name " - .		\n"	\
112 	    "	.previous					\n")
113 #else
tracepoint_ptr_deref(tracepoint_ptr_t * p)114 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
115 {
116 	return *p;
117 }
118 
119 #define __TRACEPOINT_ENTRY(name)					 \
120 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
121 	__attribute__((section("__tracepoints_ptrs"))) =		 \
122 		&__tracepoint_##name
123 #endif
124 
125 #endif /* _LINUX_TRACEPOINT_H */
126 
127 /*
128  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
129  *  file ifdef protection.
130  *  This is due to the way trace events work. If a file includes two
131  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
132  *  will override the TRACE_EVENT and break the second include.
133  */
134 
135 #ifndef DECLARE_TRACE
136 
137 #define TP_PROTO(args...)	args
138 #define TP_ARGS(args...)	args
139 #define TP_CONDITION(args...)	args
140 
141 /*
142  * Individual subsystem my have a separate configuration to
143  * enable their tracepoints. By default, this file will create
144  * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
145  * wants to be able to disable its tracepoints from being created
146  * it can define NOTRACE before including the tracepoint headers.
147  */
148 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
149 #define TRACEPOINTS_ENABLED
150 #endif
151 
152 #ifdef TRACEPOINTS_ENABLED
153 
154 /*
155  * it_func[0] is never NULL because there is at least one element in the array
156  * when the array itself is non NULL.
157  *
158  * Note, the proto and args passed in includes "__data" as the first parameter.
159  * The reason for this is to handle the "void" prototype. If a tracepoint
160  * has a "void" prototype, then it is invalid to declare a function
161  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
162  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
163  */
164 #define __DO_TRACE(tp, proto, args, cond, rcuidle)			\
165 	do {								\
166 		struct tracepoint_func *it_func_ptr;			\
167 		void *it_func;						\
168 		void *__data;						\
169 		int __maybe_unused idx = 0;				\
170 									\
171 		if (!(cond))						\
172 			return;						\
173 									\
174 		/* srcu can't be used from NMI */			\
175 		WARN_ON_ONCE(rcuidle && in_nmi());			\
176 									\
177 		/* keep srcu and sched-rcu usage consistent */		\
178 		preempt_disable_notrace();				\
179 									\
180 		/*							\
181 		 * For rcuidle callers, use srcu since sched-rcu	\
182 		 * doesn't work from the idle path.			\
183 		 */							\
184 		if (rcuidle) {						\
185 			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
186 			rcu_irq_enter_irqson();				\
187 		}							\
188 									\
189 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
190 									\
191 		if (it_func_ptr) {					\
192 			do {						\
193 				it_func = (it_func_ptr)->func;		\
194 				__data = (it_func_ptr)->data;		\
195 				((void(*)(proto))(it_func))(args);	\
196 			} while ((++it_func_ptr)->func);		\
197 		}							\
198 									\
199 		if (rcuidle) {						\
200 			rcu_irq_exit_irqson();				\
201 			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
202 		}							\
203 									\
204 		preempt_enable_notrace();				\
205 	} while (0)
206 
207 #ifndef MODULE
208 #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \
209 	static inline void trace_##name##_rcuidle(proto)		\
210 	{								\
211 		if (static_key_false(&__tracepoint_##name.key))		\
212 			__DO_TRACE(&__tracepoint_##name,		\
213 				TP_PROTO(data_proto),			\
214 				TP_ARGS(data_args),			\
215 				TP_CONDITION(cond), 1);			\
216 	}
217 #else
218 #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
219 #endif
220 
221 /*
222  * Make sure the alignment of the structure in the __tracepoints section will
223  * not add unwanted padding between the beginning of the section and the
224  * structure. Force alignment to the same alignment as the section start.
225  *
226  * When lockdep is enabled, we make sure to always do the RCU portions of
227  * the tracepoint code, regardless of whether tracing is on. However,
228  * don't check if the condition is false, due to interaction with idle
229  * instrumentation. This lets us find RCU issues triggered with tracepoints
230  * even when this tracepoint is off. This code has no purpose other than
231  * poking RCU a bit.
232  */
233 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
234 	extern struct tracepoint __tracepoint_##name;			\
235 	static inline void trace_##name(proto)				\
236 	{								\
237 		if (static_key_false(&__tracepoint_##name.key))		\
238 			__DO_TRACE(&__tracepoint_##name,		\
239 				TP_PROTO(data_proto),			\
240 				TP_ARGS(data_args),			\
241 				TP_CONDITION(cond), 0);			\
242 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
243 			rcu_read_lock_sched_notrace();			\
244 			rcu_dereference_sched(__tracepoint_##name.funcs);\
245 			rcu_read_unlock_sched_notrace();		\
246 		}							\
247 	}								\
248 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
249 		PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))	\
250 	static inline int						\
251 	register_trace_##name(void (*probe)(data_proto), void *data)	\
252 	{								\
253 		return tracepoint_probe_register(&__tracepoint_##name,	\
254 						(void *)probe, data);	\
255 	}								\
256 	static inline int						\
257 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
258 				   int prio)				\
259 	{								\
260 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
261 					      (void *)probe, data, prio); \
262 	}								\
263 	static inline int						\
264 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
265 	{								\
266 		return tracepoint_probe_unregister(&__tracepoint_##name,\
267 						(void *)probe, data);	\
268 	}								\
269 	static inline void						\
270 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
271 	{								\
272 	}								\
273 	static inline bool						\
274 	trace_##name##_enabled(void)					\
275 	{								\
276 		return static_key_false(&__tracepoint_##name.key);	\
277 	}
278 
279 /*
280  * We have no guarantee that gcc and the linker won't up-align the tracepoint
281  * structures, so we create an array of pointers that will be used for iteration
282  * on the tracepoints.
283  */
284 #define DEFINE_TRACE_FN(name, reg, unreg)				 \
285 	static const char __tpstrtab_##name[]				 \
286 	__attribute__((section("__tracepoints_strings"))) = #name;	 \
287 	struct tracepoint __tracepoint_##name				 \
288 	__attribute__((section("__tracepoints"), used)) =		 \
289 		{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
290 	__TRACEPOINT_ENTRY(name);
291 
292 #define DEFINE_TRACE(name)						\
293 	DEFINE_TRACE_FN(name, NULL, NULL);
294 
295 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
296 	EXPORT_SYMBOL_GPL(__tracepoint_##name)
297 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
298 	EXPORT_SYMBOL(__tracepoint_##name)
299 
300 #else /* !TRACEPOINTS_ENABLED */
301 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
302 	static inline void trace_##name(proto)				\
303 	{ }								\
304 	static inline void trace_##name##_rcuidle(proto)		\
305 	{ }								\
306 	static inline int						\
307 	register_trace_##name(void (*probe)(data_proto),		\
308 			      void *data)				\
309 	{								\
310 		return -ENOSYS;						\
311 	}								\
312 	static inline int						\
313 	unregister_trace_##name(void (*probe)(data_proto),		\
314 				void *data)				\
315 	{								\
316 		return -ENOSYS;						\
317 	}								\
318 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
319 	{								\
320 	}								\
321 	static inline bool						\
322 	trace_##name##_enabled(void)					\
323 	{								\
324 		return false;						\
325 	}
326 
327 #define DEFINE_TRACE_FN(name, reg, unreg)
328 #define DEFINE_TRACE(name)
329 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
330 #define EXPORT_TRACEPOINT_SYMBOL(name)
331 
332 #endif /* TRACEPOINTS_ENABLED */
333 
334 #ifdef CONFIG_TRACING
335 /**
336  * tracepoint_string - register constant persistent string to trace system
337  * @str - a constant persistent string that will be referenced in tracepoints
338  *
339  * If constant strings are being used in tracepoints, it is faster and
340  * more efficient to just save the pointer to the string and reference
341  * that with a printf "%s" instead of saving the string in the ring buffer
342  * and wasting space and time.
343  *
344  * The problem with the above approach is that userspace tools that read
345  * the binary output of the trace buffers do not have access to the string.
346  * Instead they just show the address of the string which is not very
347  * useful to users.
348  *
349  * With tracepoint_string(), the string will be registered to the tracing
350  * system and exported to userspace via the debugfs/tracing/printk_formats
351  * file that maps the string address to the string text. This way userspace
352  * tools that read the binary buffers have a way to map the pointers to
353  * the ASCII strings they represent.
354  *
355  * The @str used must be a constant string and persistent as it would not
356  * make sense to show a string that no longer exists. But it is still fine
357  * to be used with modules, because when modules are unloaded, if they
358  * had tracepoints, the ring buffers are cleared too. As long as the string
359  * does not change during the life of the module, it is fine to use
360  * tracepoint_string() within a module.
361  */
362 #define tracepoint_string(str)						\
363 	({								\
364 		static const char *___tp_str __tracepoint_string = str; \
365 		___tp_str;						\
366 	})
367 #define __tracepoint_string	__attribute__((section("__tracepoint_str")))
368 #else
369 /*
370  * tracepoint_string() is used to save the string address for userspace
371  * tracing tools. When tracing isn't configured, there's no need to save
372  * anything.
373  */
374 # define tracepoint_string(str) str
375 # define __tracepoint_string
376 #endif
377 
378 /*
379  * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype
380  * (void). "void" is a special value in a function prototype and can
381  * not be combined with other arguments. Since the DECLARE_TRACE()
382  * macro adds a data element at the beginning of the prototype,
383  * we need a way to differentiate "(void *data, proto)" from
384  * "(void *data, void)". The second prototype is invalid.
385  *
386  * DECLARE_TRACE_NOARGS() passes "void" as the tracepoint prototype
387  * and "void *__data" as the callback prototype.
388  *
389  * DECLARE_TRACE() passes "proto" as the tracepoint protoype and
390  * "void *__data, proto" as the callback prototype.
391  */
392 #define DECLARE_TRACE_NOARGS(name)					\
393 	__DECLARE_TRACE(name, void, ,					\
394 			cpu_online(raw_smp_processor_id()),		\
395 			void *__data, __data)
396 
397 #define DECLARE_TRACE(name, proto, args)				\
398 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
399 			cpu_online(raw_smp_processor_id()),		\
400 			PARAMS(void *__data, proto),			\
401 			PARAMS(__data, args))
402 
403 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
404 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
405 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
406 			PARAMS(void *__data, proto),			\
407 			PARAMS(__data, args))
408 
409 #define TRACE_EVENT_FLAGS(event, flag)
410 
411 #define TRACE_EVENT_PERF_PERM(event, expr...)
412 
413 #endif /* DECLARE_TRACE */
414 
415 #ifndef TRACE_EVENT
416 /*
417  * For use with the TRACE_EVENT macro:
418  *
419  * We define a tracepoint, its arguments, its printk format
420  * and its 'fast binary record' layout.
421  *
422  * Firstly, name your tracepoint via TRACE_EVENT(name : the
423  * 'subsystem_event' notation is fine.
424  *
425  * Think about this whole construct as the
426  * 'trace_sched_switch() function' from now on.
427  *
428  *
429  *  TRACE_EVENT(sched_switch,
430  *
431  *	*
432  *	* A function has a regular function arguments
433  *	* prototype, declare it via TP_PROTO():
434  *	*
435  *
436  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
437  *		 struct task_struct *next),
438  *
439  *	*
440  *	* Define the call signature of the 'function'.
441  *	* (Design sidenote: we use this instead of a
442  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
443  *	*
444  *
445  *	TP_ARGS(rq, prev, next),
446  *
447  *	*
448  *	* Fast binary tracing: define the trace record via
449  *	* TP_STRUCT__entry(). You can think about it like a
450  *	* regular C structure local variable definition.
451  *	*
452  *	* This is how the trace record is structured and will
453  *	* be saved into the ring buffer. These are the fields
454  *	* that will be exposed to user-space in
455  *	* /sys/kernel/debug/tracing/events/<*>/format.
456  *	*
457  *	* The declared 'local variable' is called '__entry'
458  *	*
459  *	* __field(pid_t, prev_prid) is equivalent to a standard declariton:
460  *	*
461  *	*	pid_t	prev_pid;
462  *	*
463  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
464  *	*
465  *	*	char	prev_comm[TASK_COMM_LEN];
466  *	*
467  *
468  *	TP_STRUCT__entry(
469  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
470  *		__field(	pid_t,	prev_pid			)
471  *		__field(	int,	prev_prio			)
472  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
473  *		__field(	pid_t,	next_pid			)
474  *		__field(	int,	next_prio			)
475  *	),
476  *
477  *	*
478  *	* Assign the entry into the trace record, by embedding
479  *	* a full C statement block into TP_fast_assign(). You
480  *	* can refer to the trace record as '__entry' -
481  *	* otherwise you can put arbitrary C code in here.
482  *	*
483  *	* Note: this C code will execute every time a trace event
484  *	* happens, on an active tracepoint.
485  *	*
486  *
487  *	TP_fast_assign(
488  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
489  *		__entry->prev_pid	= prev->pid;
490  *		__entry->prev_prio	= prev->prio;
491  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
492  *		__entry->next_pid	= next->pid;
493  *		__entry->next_prio	= next->prio;
494  *	),
495  *
496  *	*
497  *	* Formatted output of a trace record via TP_printk().
498  *	* This is how the tracepoint will appear under ftrace
499  *	* plugins that make use of this tracepoint.
500  *	*
501  *	* (raw-binary tracing wont actually perform this step.)
502  *	*
503  *
504  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
505  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
506  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
507  *
508  * );
509  *
510  * This macro construct is thus used for the regular printk format
511  * tracing setup, it is used to construct a function pointer based
512  * tracepoint callback (this is used by programmatic plugins and
513  * can also by used by generic instrumentation like SystemTap), and
514  * it is also used to expose a structured trace record in
515  * /sys/kernel/debug/tracing/events/.
516  *
517  * A set of (un)registration functions can be passed to the variant
518  * TRACE_EVENT_FN to perform any (un)registration work.
519  */
520 
521 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
522 #define DEFINE_EVENT(template, name, proto, args)		\
523 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
524 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
525 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
526 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
527 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
528 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
529 			       args, cond)			\
530 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
531 				PARAMS(args), PARAMS(cond))
532 
533 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
534 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
535 #define TRACE_EVENT_FN(name, proto, args, struct,		\
536 		assign, print, reg, unreg)			\
537 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
538 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
539 		assign, print, reg, unreg)			\
540 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
541 			PARAMS(args), PARAMS(cond))
542 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
543 			      struct, assign, print)		\
544 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
545 				PARAMS(args), PARAMS(cond))
546 
547 #define TRACE_EVENT_FLAGS(event, flag)
548 
549 #define TRACE_EVENT_PERF_PERM(event, expr...)
550 
551 #endif /* ifdef TRACE_EVENT (see note above) */
552