1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_array *tr,
167 				   struct trace_buffer *buffer,
168 				   unsigned long flags, int pc);
169 
170 #define MAX_TRACER_SIZE		100
171 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
172 static char *default_bootup_tracer;
173 
174 static bool allocate_snapshot;
175 
set_cmdline_ftrace(char * str)176 static int __init set_cmdline_ftrace(char *str)
177 {
178 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
179 	default_bootup_tracer = bootup_tracer_buf;
180 	/* We are using ftrace early, expand it */
181 	ring_buffer_expanded = true;
182 	return 1;
183 }
184 __setup("ftrace=", set_cmdline_ftrace);
185 
set_ftrace_dump_on_oops(char * str)186 static int __init set_ftrace_dump_on_oops(char *str)
187 {
188 	if (*str++ != '=' || !*str) {
189 		ftrace_dump_on_oops = DUMP_ALL;
190 		return 1;
191 	}
192 
193 	if (!strcmp("orig_cpu", str)) {
194 		ftrace_dump_on_oops = DUMP_ORIG;
195                 return 1;
196         }
197 
198         return 0;
199 }
200 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
201 
stop_trace_on_warning(char * str)202 static int __init stop_trace_on_warning(char *str)
203 {
204 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
205 		__disable_trace_on_warning = 1;
206 	return 1;
207 }
208 __setup("traceoff_on_warning", stop_trace_on_warning);
209 
boot_alloc_snapshot(char * str)210 static int __init boot_alloc_snapshot(char *str)
211 {
212 	allocate_snapshot = true;
213 	/* We also need the main ring buffer expanded */
214 	ring_buffer_expanded = true;
215 	return 1;
216 }
217 __setup("alloc_snapshot", boot_alloc_snapshot);
218 
219 
220 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
221 
set_trace_boot_options(char * str)222 static int __init set_trace_boot_options(char *str)
223 {
224 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
225 	return 0;
226 }
227 __setup("trace_options=", set_trace_boot_options);
228 
229 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
230 static char *trace_boot_clock __initdata;
231 
set_trace_boot_clock(char * str)232 static int __init set_trace_boot_clock(char *str)
233 {
234 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
235 	trace_boot_clock = trace_boot_clock_buf;
236 	return 0;
237 }
238 __setup("trace_clock=", set_trace_boot_clock);
239 
set_tracepoint_printk(char * str)240 static int __init set_tracepoint_printk(char *str)
241 {
242 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
243 		tracepoint_printk = 1;
244 	return 1;
245 }
246 __setup("tp_printk", set_tracepoint_printk);
247 
ns2usecs(u64 nsec)248 unsigned long long ns2usecs(u64 nsec)
249 {
250 	nsec += 500;
251 	do_div(nsec, 1000);
252 	return nsec;
253 }
254 
255 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)256 trace_process_export(struct trace_export *export,
257 	       struct ring_buffer_event *event, int flag)
258 {
259 	struct trace_entry *entry;
260 	unsigned int size = 0;
261 
262 	if (export->flags & flag) {
263 		entry = ring_buffer_event_data(event);
264 		size = ring_buffer_event_length(event);
265 		export->write(export, entry, size);
266 	}
267 }
268 
269 static DEFINE_MUTEX(ftrace_export_lock);
270 
271 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
272 
273 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
274 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
275 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
276 
ftrace_exports_enable(struct trace_export * export)277 static inline void ftrace_exports_enable(struct trace_export *export)
278 {
279 	if (export->flags & TRACE_EXPORT_FUNCTION)
280 		static_branch_inc(&trace_function_exports_enabled);
281 
282 	if (export->flags & TRACE_EXPORT_EVENT)
283 		static_branch_inc(&trace_event_exports_enabled);
284 
285 	if (export->flags & TRACE_EXPORT_MARKER)
286 		static_branch_inc(&trace_marker_exports_enabled);
287 }
288 
ftrace_exports_disable(struct trace_export * export)289 static inline void ftrace_exports_disable(struct trace_export *export)
290 {
291 	if (export->flags & TRACE_EXPORT_FUNCTION)
292 		static_branch_dec(&trace_function_exports_enabled);
293 
294 	if (export->flags & TRACE_EXPORT_EVENT)
295 		static_branch_dec(&trace_event_exports_enabled);
296 
297 	if (export->flags & TRACE_EXPORT_MARKER)
298 		static_branch_dec(&trace_marker_exports_enabled);
299 }
300 
ftrace_exports(struct ring_buffer_event * event,int flag)301 static void ftrace_exports(struct ring_buffer_event *event, int flag)
302 {
303 	struct trace_export *export;
304 
305 	preempt_disable_notrace();
306 
307 	export = rcu_dereference_raw_check(ftrace_exports_list);
308 	while (export) {
309 		trace_process_export(export, event, flag);
310 		export = rcu_dereference_raw_check(export->next);
311 	}
312 
313 	preempt_enable_notrace();
314 }
315 
316 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)317 add_trace_export(struct trace_export **list, struct trace_export *export)
318 {
319 	rcu_assign_pointer(export->next, *list);
320 	/*
321 	 * We are entering export into the list but another
322 	 * CPU might be walking that list. We need to make sure
323 	 * the export->next pointer is valid before another CPU sees
324 	 * the export pointer included into the list.
325 	 */
326 	rcu_assign_pointer(*list, export);
327 }
328 
329 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)330 rm_trace_export(struct trace_export **list, struct trace_export *export)
331 {
332 	struct trace_export **p;
333 
334 	for (p = list; *p != NULL; p = &(*p)->next)
335 		if (*p == export)
336 			break;
337 
338 	if (*p != export)
339 		return -1;
340 
341 	rcu_assign_pointer(*p, (*p)->next);
342 
343 	return 0;
344 }
345 
346 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)347 add_ftrace_export(struct trace_export **list, struct trace_export *export)
348 {
349 	ftrace_exports_enable(export);
350 
351 	add_trace_export(list, export);
352 }
353 
354 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)355 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
356 {
357 	int ret;
358 
359 	ret = rm_trace_export(list, export);
360 	ftrace_exports_disable(export);
361 
362 	return ret;
363 }
364 
register_ftrace_export(struct trace_export * export)365 int register_ftrace_export(struct trace_export *export)
366 {
367 	if (WARN_ON_ONCE(!export->write))
368 		return -1;
369 
370 	mutex_lock(&ftrace_export_lock);
371 
372 	add_ftrace_export(&ftrace_exports_list, export);
373 
374 	mutex_unlock(&ftrace_export_lock);
375 
376 	return 0;
377 }
378 EXPORT_SYMBOL_GPL(register_ftrace_export);
379 
unregister_ftrace_export(struct trace_export * export)380 int unregister_ftrace_export(struct trace_export *export)
381 {
382 	int ret;
383 
384 	mutex_lock(&ftrace_export_lock);
385 
386 	ret = rm_ftrace_export(&ftrace_exports_list, export);
387 
388 	mutex_unlock(&ftrace_export_lock);
389 
390 	return ret;
391 }
392 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
393 
394 /* trace_flags holds trace_options default values */
395 #define TRACE_DEFAULT_FLAGS						\
396 	(FUNCTION_DEFAULT_FLAGS |					\
397 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
398 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
399 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
400 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
401 
402 /* trace_options that are only supported by global_trace */
403 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
404 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
405 
406 /* trace_flags that are default zero for instances */
407 #define ZEROED_TRACE_FLAGS \
408 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
409 
410 /*
411  * The global_trace is the descriptor that holds the top-level tracing
412  * buffers for the live tracing.
413  */
414 static struct trace_array global_trace = {
415 	.trace_flags = TRACE_DEFAULT_FLAGS,
416 };
417 
418 LIST_HEAD(ftrace_trace_arrays);
419 
trace_array_get(struct trace_array * this_tr)420 int trace_array_get(struct trace_array *this_tr)
421 {
422 	struct trace_array *tr;
423 	int ret = -ENODEV;
424 
425 	mutex_lock(&trace_types_lock);
426 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
427 		if (tr == this_tr) {
428 			tr->ref++;
429 			ret = 0;
430 			break;
431 		}
432 	}
433 	mutex_unlock(&trace_types_lock);
434 
435 	return ret;
436 }
437 
__trace_array_put(struct trace_array * this_tr)438 static void __trace_array_put(struct trace_array *this_tr)
439 {
440 	WARN_ON(!this_tr->ref);
441 	this_tr->ref--;
442 }
443 
444 /**
445  * trace_array_put - Decrement the reference counter for this trace array.
446  *
447  * NOTE: Use this when we no longer need the trace array returned by
448  * trace_array_get_by_name(). This ensures the trace array can be later
449  * destroyed.
450  *
451  */
trace_array_put(struct trace_array * this_tr)452 void trace_array_put(struct trace_array *this_tr)
453 {
454 	if (!this_tr)
455 		return;
456 
457 	mutex_lock(&trace_types_lock);
458 	__trace_array_put(this_tr);
459 	mutex_unlock(&trace_types_lock);
460 }
461 EXPORT_SYMBOL_GPL(trace_array_put);
462 
tracing_check_open_get_tr(struct trace_array * tr)463 int tracing_check_open_get_tr(struct trace_array *tr)
464 {
465 	int ret;
466 
467 	ret = security_locked_down(LOCKDOWN_TRACEFS);
468 	if (ret)
469 		return ret;
470 
471 	if (tracing_disabled)
472 		return -ENODEV;
473 
474 	if (tr && trace_array_get(tr) < 0)
475 		return -ENODEV;
476 
477 	return 0;
478 }
479 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)480 int call_filter_check_discard(struct trace_event_call *call, void *rec,
481 			      struct trace_buffer *buffer,
482 			      struct ring_buffer_event *event)
483 {
484 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
485 	    !filter_match_preds(call->filter, rec)) {
486 		__trace_event_discard_commit(buffer, event);
487 		return 1;
488 	}
489 
490 	return 0;
491 }
492 
trace_free_pid_list(struct trace_pid_list * pid_list)493 void trace_free_pid_list(struct trace_pid_list *pid_list)
494 {
495 	vfree(pid_list->pids);
496 	kfree(pid_list);
497 }
498 
499 /**
500  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
501  * @filtered_pids: The list of pids to check
502  * @search_pid: The PID to find in @filtered_pids
503  *
504  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
505  */
506 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)507 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
508 {
509 	/*
510 	 * If pid_max changed after filtered_pids was created, we
511 	 * by default ignore all pids greater than the previous pid_max.
512 	 */
513 	if (search_pid >= filtered_pids->pid_max)
514 		return false;
515 
516 	return test_bit(search_pid, filtered_pids->pids);
517 }
518 
519 /**
520  * trace_ignore_this_task - should a task be ignored for tracing
521  * @filtered_pids: The list of pids to check
522  * @task: The task that should be ignored if not filtered
523  *
524  * Checks if @task should be traced or not from @filtered_pids.
525  * Returns true if @task should *NOT* be traced.
526  * Returns false if @task should be traced.
527  */
528 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)529 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
530 		       struct trace_pid_list *filtered_no_pids,
531 		       struct task_struct *task)
532 {
533 	/*
534 	 * If filterd_no_pids is not empty, and the task's pid is listed
535 	 * in filtered_no_pids, then return true.
536 	 * Otherwise, if filtered_pids is empty, that means we can
537 	 * trace all tasks. If it has content, then only trace pids
538 	 * within filtered_pids.
539 	 */
540 
541 	return (filtered_pids &&
542 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
543 		(filtered_no_pids &&
544 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
545 }
546 
547 /**
548  * trace_filter_add_remove_task - Add or remove a task from a pid_list
549  * @pid_list: The list to modify
550  * @self: The current task for fork or NULL for exit
551  * @task: The task to add or remove
552  *
553  * If adding a task, if @self is defined, the task is only added if @self
554  * is also included in @pid_list. This happens on fork and tasks should
555  * only be added when the parent is listed. If @self is NULL, then the
556  * @task pid will be removed from the list, which would happen on exit
557  * of a task.
558  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)559 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
560 				  struct task_struct *self,
561 				  struct task_struct *task)
562 {
563 	if (!pid_list)
564 		return;
565 
566 	/* For forks, we only add if the forking task is listed */
567 	if (self) {
568 		if (!trace_find_filtered_pid(pid_list, self->pid))
569 			return;
570 	}
571 
572 	/* Sorry, but we don't support pid_max changing after setting */
573 	if (task->pid >= pid_list->pid_max)
574 		return;
575 
576 	/* "self" is set for forks, and NULL for exits */
577 	if (self)
578 		set_bit(task->pid, pid_list->pids);
579 	else
580 		clear_bit(task->pid, pid_list->pids);
581 }
582 
583 /**
584  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
585  * @pid_list: The pid list to show
586  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
587  * @pos: The position of the file
588  *
589  * This is used by the seq_file "next" operation to iterate the pids
590  * listed in a trace_pid_list structure.
591  *
592  * Returns the pid+1 as we want to display pid of zero, but NULL would
593  * stop the iteration.
594  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)595 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
596 {
597 	unsigned long pid = (unsigned long)v;
598 
599 	(*pos)++;
600 
601 	/* pid already is +1 of the actual prevous bit */
602 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
603 
604 	/* Return pid + 1 to allow zero to be represented */
605 	if (pid < pid_list->pid_max)
606 		return (void *)(pid + 1);
607 
608 	return NULL;
609 }
610 
611 /**
612  * trace_pid_start - Used for seq_file to start reading pid lists
613  * @pid_list: The pid list to show
614  * @pos: The position of the file
615  *
616  * This is used by seq_file "start" operation to start the iteration
617  * of listing pids.
618  *
619  * Returns the pid+1 as we want to display pid of zero, but NULL would
620  * stop the iteration.
621  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)622 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
623 {
624 	unsigned long pid;
625 	loff_t l = 0;
626 
627 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
628 	if (pid >= pid_list->pid_max)
629 		return NULL;
630 
631 	/* Return pid + 1 so that zero can be the exit value */
632 	for (pid++; pid && l < *pos;
633 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
634 		;
635 	return (void *)pid;
636 }
637 
638 /**
639  * trace_pid_show - show the current pid in seq_file processing
640  * @m: The seq_file structure to write into
641  * @v: A void pointer of the pid (+1) value to display
642  *
643  * Can be directly used by seq_file operations to display the current
644  * pid value.
645  */
trace_pid_show(struct seq_file * m,void * v)646 int trace_pid_show(struct seq_file *m, void *v)
647 {
648 	unsigned long pid = (unsigned long)v - 1;
649 
650 	seq_printf(m, "%lu\n", pid);
651 	return 0;
652 }
653 
654 /* 128 should be much more than enough */
655 #define PID_BUF_SIZE		127
656 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)657 int trace_pid_write(struct trace_pid_list *filtered_pids,
658 		    struct trace_pid_list **new_pid_list,
659 		    const char __user *ubuf, size_t cnt)
660 {
661 	struct trace_pid_list *pid_list;
662 	struct trace_parser parser;
663 	unsigned long val;
664 	int nr_pids = 0;
665 	ssize_t read = 0;
666 	ssize_t ret = 0;
667 	loff_t pos;
668 	pid_t pid;
669 
670 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
671 		return -ENOMEM;
672 
673 	/*
674 	 * Always recreate a new array. The write is an all or nothing
675 	 * operation. Always create a new array when adding new pids by
676 	 * the user. If the operation fails, then the current list is
677 	 * not modified.
678 	 */
679 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
680 	if (!pid_list) {
681 		trace_parser_put(&parser);
682 		return -ENOMEM;
683 	}
684 
685 	pid_list->pid_max = READ_ONCE(pid_max);
686 
687 	/* Only truncating will shrink pid_max */
688 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
689 		pid_list->pid_max = filtered_pids->pid_max;
690 
691 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
692 	if (!pid_list->pids) {
693 		trace_parser_put(&parser);
694 		kfree(pid_list);
695 		return -ENOMEM;
696 	}
697 
698 	if (filtered_pids) {
699 		/* copy the current bits to the new max */
700 		for_each_set_bit(pid, filtered_pids->pids,
701 				 filtered_pids->pid_max) {
702 			set_bit(pid, pid_list->pids);
703 			nr_pids++;
704 		}
705 	}
706 
707 	while (cnt > 0) {
708 
709 		pos = 0;
710 
711 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
712 		if (ret < 0 || !trace_parser_loaded(&parser))
713 			break;
714 
715 		read += ret;
716 		ubuf += ret;
717 		cnt -= ret;
718 
719 		ret = -EINVAL;
720 		if (kstrtoul(parser.buffer, 0, &val))
721 			break;
722 		if (val >= pid_list->pid_max)
723 			break;
724 
725 		pid = (pid_t)val;
726 
727 		set_bit(pid, pid_list->pids);
728 		nr_pids++;
729 
730 		trace_parser_clear(&parser);
731 		ret = 0;
732 	}
733 	trace_parser_put(&parser);
734 
735 	if (ret < 0) {
736 		trace_free_pid_list(pid_list);
737 		return ret;
738 	}
739 
740 	if (!nr_pids) {
741 		/* Cleared the list of pids */
742 		trace_free_pid_list(pid_list);
743 		read = ret;
744 		pid_list = NULL;
745 	}
746 
747 	*new_pid_list = pid_list;
748 
749 	return read;
750 }
751 
buffer_ftrace_now(struct array_buffer * buf,int cpu)752 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
753 {
754 	u64 ts;
755 
756 	/* Early boot up does not have a buffer yet */
757 	if (!buf->buffer)
758 		return trace_clock_local();
759 
760 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
761 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
762 
763 	return ts;
764 }
765 
ftrace_now(int cpu)766 u64 ftrace_now(int cpu)
767 {
768 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
769 }
770 
771 /**
772  * tracing_is_enabled - Show if global_trace has been disabled
773  *
774  * Shows if the global trace has been enabled or not. It uses the
775  * mirror flag "buffer_disabled" to be used in fast paths such as for
776  * the irqsoff tracer. But it may be inaccurate due to races. If you
777  * need to know the accurate state, use tracing_is_on() which is a little
778  * slower, but accurate.
779  */
tracing_is_enabled(void)780 int tracing_is_enabled(void)
781 {
782 	/*
783 	 * For quick access (irqsoff uses this in fast path), just
784 	 * return the mirror variable of the state of the ring buffer.
785 	 * It's a little racy, but we don't really care.
786 	 */
787 	smp_rmb();
788 	return !global_trace.buffer_disabled;
789 }
790 
791 /*
792  * trace_buf_size is the size in bytes that is allocated
793  * for a buffer. Note, the number of bytes is always rounded
794  * to page size.
795  *
796  * This number is purposely set to a low number of 16384.
797  * If the dump on oops happens, it will be much appreciated
798  * to not have to wait for all that output. Anyway this can be
799  * boot time and run time configurable.
800  */
801 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
802 
803 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
804 
805 /* trace_types holds a link list of available tracers. */
806 static struct tracer		*trace_types __read_mostly;
807 
808 /*
809  * trace_types_lock is used to protect the trace_types list.
810  */
811 DEFINE_MUTEX(trace_types_lock);
812 
813 /*
814  * serialize the access of the ring buffer
815  *
816  * ring buffer serializes readers, but it is low level protection.
817  * The validity of the events (which returns by ring_buffer_peek() ..etc)
818  * are not protected by ring buffer.
819  *
820  * The content of events may become garbage if we allow other process consumes
821  * these events concurrently:
822  *   A) the page of the consumed events may become a normal page
823  *      (not reader page) in ring buffer, and this page will be rewrited
824  *      by events producer.
825  *   B) The page of the consumed events may become a page for splice_read,
826  *      and this page will be returned to system.
827  *
828  * These primitives allow multi process access to different cpu ring buffer
829  * concurrently.
830  *
831  * These primitives don't distinguish read-only and read-consume access.
832  * Multi read-only access are also serialized.
833  */
834 
835 #ifdef CONFIG_SMP
836 static DECLARE_RWSEM(all_cpu_access_lock);
837 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
838 
trace_access_lock(int cpu)839 static inline void trace_access_lock(int cpu)
840 {
841 	if (cpu == RING_BUFFER_ALL_CPUS) {
842 		/* gain it for accessing the whole ring buffer. */
843 		down_write(&all_cpu_access_lock);
844 	} else {
845 		/* gain it for accessing a cpu ring buffer. */
846 
847 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
848 		down_read(&all_cpu_access_lock);
849 
850 		/* Secondly block other access to this @cpu ring buffer. */
851 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
852 	}
853 }
854 
trace_access_unlock(int cpu)855 static inline void trace_access_unlock(int cpu)
856 {
857 	if (cpu == RING_BUFFER_ALL_CPUS) {
858 		up_write(&all_cpu_access_lock);
859 	} else {
860 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
861 		up_read(&all_cpu_access_lock);
862 	}
863 }
864 
trace_access_lock_init(void)865 static inline void trace_access_lock_init(void)
866 {
867 	int cpu;
868 
869 	for_each_possible_cpu(cpu)
870 		mutex_init(&per_cpu(cpu_access_lock, cpu));
871 }
872 
873 #else
874 
875 static DEFINE_MUTEX(access_lock);
876 
trace_access_lock(int cpu)877 static inline void trace_access_lock(int cpu)
878 {
879 	(void)cpu;
880 	mutex_lock(&access_lock);
881 }
882 
trace_access_unlock(int cpu)883 static inline void trace_access_unlock(int cpu)
884 {
885 	(void)cpu;
886 	mutex_unlock(&access_lock);
887 }
888 
trace_access_lock_init(void)889 static inline void trace_access_lock_init(void)
890 {
891 }
892 
893 #endif
894 
895 #ifdef CONFIG_STACKTRACE
896 static void __ftrace_trace_stack(struct trace_buffer *buffer,
897 				 unsigned long flags,
898 				 int skip, int pc, struct pt_regs *regs);
899 static inline void ftrace_trace_stack(struct trace_array *tr,
900 				      struct trace_buffer *buffer,
901 				      unsigned long flags,
902 				      int skip, int pc, struct pt_regs *regs);
903 
904 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)905 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
906 					unsigned long flags,
907 					int skip, int pc, struct pt_regs *regs)
908 {
909 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)910 static inline void ftrace_trace_stack(struct trace_array *tr,
911 				      struct trace_buffer *buffer,
912 				      unsigned long flags,
913 				      int skip, int pc, struct pt_regs *regs)
914 {
915 }
916 
917 #endif
918 
919 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)920 trace_event_setup(struct ring_buffer_event *event,
921 		  int type, unsigned long flags, int pc)
922 {
923 	struct trace_entry *ent = ring_buffer_event_data(event);
924 
925 	tracing_generic_entry_update(ent, type, flags, pc);
926 }
927 
928 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)929 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
930 			  int type,
931 			  unsigned long len,
932 			  unsigned long flags, int pc)
933 {
934 	struct ring_buffer_event *event;
935 
936 	event = ring_buffer_lock_reserve(buffer, len);
937 	if (event != NULL)
938 		trace_event_setup(event, type, flags, pc);
939 
940 	return event;
941 }
942 
tracer_tracing_on(struct trace_array * tr)943 void tracer_tracing_on(struct trace_array *tr)
944 {
945 	if (tr->array_buffer.buffer)
946 		ring_buffer_record_on(tr->array_buffer.buffer);
947 	/*
948 	 * This flag is looked at when buffers haven't been allocated
949 	 * yet, or by some tracers (like irqsoff), that just want to
950 	 * know if the ring buffer has been disabled, but it can handle
951 	 * races of where it gets disabled but we still do a record.
952 	 * As the check is in the fast path of the tracers, it is more
953 	 * important to be fast than accurate.
954 	 */
955 	tr->buffer_disabled = 0;
956 	/* Make the flag seen by readers */
957 	smp_wmb();
958 }
959 
960 /**
961  * tracing_on - enable tracing buffers
962  *
963  * This function enables tracing buffers that may have been
964  * disabled with tracing_off.
965  */
tracing_on(void)966 void tracing_on(void)
967 {
968 	tracer_tracing_on(&global_trace);
969 }
970 EXPORT_SYMBOL_GPL(tracing_on);
971 
972 
973 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)974 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
975 {
976 	__this_cpu_write(trace_taskinfo_save, true);
977 
978 	/* If this is the temp buffer, we need to commit fully */
979 	if (this_cpu_read(trace_buffered_event) == event) {
980 		/* Length is in event->array[0] */
981 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
982 		/* Release the temp buffer */
983 		this_cpu_dec(trace_buffered_event_cnt);
984 	} else
985 		ring_buffer_unlock_commit(buffer, event);
986 }
987 
988 /**
989  * __trace_puts - write a constant string into the trace buffer.
990  * @ip:	   The address of the caller
991  * @str:   The constant string to write
992  * @size:  The size of the string.
993  */
__trace_puts(unsigned long ip,const char * str,int size)994 int __trace_puts(unsigned long ip, const char *str, int size)
995 {
996 	struct ring_buffer_event *event;
997 	struct trace_buffer *buffer;
998 	struct print_entry *entry;
999 	unsigned long irq_flags;
1000 	int alloc;
1001 	int pc;
1002 
1003 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1004 		return 0;
1005 
1006 	pc = preempt_count();
1007 
1008 	if (unlikely(tracing_selftest_running || tracing_disabled))
1009 		return 0;
1010 
1011 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1012 
1013 	local_save_flags(irq_flags);
1014 	buffer = global_trace.array_buffer.buffer;
1015 	ring_buffer_nest_start(buffer);
1016 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1017 					    irq_flags, pc);
1018 	if (!event) {
1019 		size = 0;
1020 		goto out;
1021 	}
1022 
1023 	entry = ring_buffer_event_data(event);
1024 	entry->ip = ip;
1025 
1026 	memcpy(&entry->buf, str, size);
1027 
1028 	/* Add a newline if necessary */
1029 	if (entry->buf[size - 1] != '\n') {
1030 		entry->buf[size] = '\n';
1031 		entry->buf[size + 1] = '\0';
1032 	} else
1033 		entry->buf[size] = '\0';
1034 
1035 	__buffer_unlock_commit(buffer, event);
1036 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1037  out:
1038 	ring_buffer_nest_end(buffer);
1039 	return size;
1040 }
1041 EXPORT_SYMBOL_GPL(__trace_puts);
1042 
1043 /**
1044  * __trace_bputs - write the pointer to a constant string into trace buffer
1045  * @ip:	   The address of the caller
1046  * @str:   The constant string to write to the buffer to
1047  */
__trace_bputs(unsigned long ip,const char * str)1048 int __trace_bputs(unsigned long ip, const char *str)
1049 {
1050 	struct ring_buffer_event *event;
1051 	struct trace_buffer *buffer;
1052 	struct bputs_entry *entry;
1053 	unsigned long irq_flags;
1054 	int size = sizeof(struct bputs_entry);
1055 	int ret = 0;
1056 	int pc;
1057 
1058 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1059 		return 0;
1060 
1061 	pc = preempt_count();
1062 
1063 	if (unlikely(tracing_selftest_running || tracing_disabled))
1064 		return 0;
1065 
1066 	local_save_flags(irq_flags);
1067 	buffer = global_trace.array_buffer.buffer;
1068 
1069 	ring_buffer_nest_start(buffer);
1070 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1071 					    irq_flags, pc);
1072 	if (!event)
1073 		goto out;
1074 
1075 	entry = ring_buffer_event_data(event);
1076 	entry->ip			= ip;
1077 	entry->str			= str;
1078 
1079 	__buffer_unlock_commit(buffer, event);
1080 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1081 
1082 	ret = 1;
1083  out:
1084 	ring_buffer_nest_end(buffer);
1085 	return ret;
1086 }
1087 EXPORT_SYMBOL_GPL(__trace_bputs);
1088 
1089 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1090 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1091 					   void *cond_data)
1092 {
1093 	struct tracer *tracer = tr->current_trace;
1094 	unsigned long flags;
1095 
1096 	if (in_nmi()) {
1097 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1098 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1099 		return;
1100 	}
1101 
1102 	if (!tr->allocated_snapshot) {
1103 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1104 		internal_trace_puts("*** stopping trace here!   ***\n");
1105 		tracing_off();
1106 		return;
1107 	}
1108 
1109 	/* Note, snapshot can not be used when the tracer uses it */
1110 	if (tracer->use_max_tr) {
1111 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1112 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1113 		return;
1114 	}
1115 
1116 	local_irq_save(flags);
1117 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1118 	local_irq_restore(flags);
1119 }
1120 
tracing_snapshot_instance(struct trace_array * tr)1121 void tracing_snapshot_instance(struct trace_array *tr)
1122 {
1123 	tracing_snapshot_instance_cond(tr, NULL);
1124 }
1125 
1126 /**
1127  * tracing_snapshot - take a snapshot of the current buffer.
1128  *
1129  * This causes a swap between the snapshot buffer and the current live
1130  * tracing buffer. You can use this to take snapshots of the live
1131  * trace when some condition is triggered, but continue to trace.
1132  *
1133  * Note, make sure to allocate the snapshot with either
1134  * a tracing_snapshot_alloc(), or by doing it manually
1135  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1136  *
1137  * If the snapshot buffer is not allocated, it will stop tracing.
1138  * Basically making a permanent snapshot.
1139  */
tracing_snapshot(void)1140 void tracing_snapshot(void)
1141 {
1142 	struct trace_array *tr = &global_trace;
1143 
1144 	tracing_snapshot_instance(tr);
1145 }
1146 EXPORT_SYMBOL_GPL(tracing_snapshot);
1147 
1148 /**
1149  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1150  * @tr:		The tracing instance to snapshot
1151  * @cond_data:	The data to be tested conditionally, and possibly saved
1152  *
1153  * This is the same as tracing_snapshot() except that the snapshot is
1154  * conditional - the snapshot will only happen if the
1155  * cond_snapshot.update() implementation receiving the cond_data
1156  * returns true, which means that the trace array's cond_snapshot
1157  * update() operation used the cond_data to determine whether the
1158  * snapshot should be taken, and if it was, presumably saved it along
1159  * with the snapshot.
1160  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1161 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1162 {
1163 	tracing_snapshot_instance_cond(tr, cond_data);
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1166 
1167 /**
1168  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1169  * @tr:		The tracing instance
1170  *
1171  * When the user enables a conditional snapshot using
1172  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1173  * with the snapshot.  This accessor is used to retrieve it.
1174  *
1175  * Should not be called from cond_snapshot.update(), since it takes
1176  * the tr->max_lock lock, which the code calling
1177  * cond_snapshot.update() has already done.
1178  *
1179  * Returns the cond_data associated with the trace array's snapshot.
1180  */
tracing_cond_snapshot_data(struct trace_array * tr)1181 void *tracing_cond_snapshot_data(struct trace_array *tr)
1182 {
1183 	void *cond_data = NULL;
1184 
1185 	arch_spin_lock(&tr->max_lock);
1186 
1187 	if (tr->cond_snapshot)
1188 		cond_data = tr->cond_snapshot->cond_data;
1189 
1190 	arch_spin_unlock(&tr->max_lock);
1191 
1192 	return cond_data;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1195 
1196 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1197 					struct array_buffer *size_buf, int cpu_id);
1198 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1199 
tracing_alloc_snapshot_instance(struct trace_array * tr)1200 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1201 {
1202 	int ret;
1203 
1204 	if (!tr->allocated_snapshot) {
1205 
1206 		/* allocate spare buffer */
1207 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1208 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1209 		if (ret < 0)
1210 			return ret;
1211 
1212 		tr->allocated_snapshot = true;
1213 	}
1214 
1215 	return 0;
1216 }
1217 
free_snapshot(struct trace_array * tr)1218 static void free_snapshot(struct trace_array *tr)
1219 {
1220 	/*
1221 	 * We don't free the ring buffer. instead, resize it because
1222 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1223 	 * we want preserve it.
1224 	 */
1225 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1226 	set_buffer_entries(&tr->max_buffer, 1);
1227 	tracing_reset_online_cpus(&tr->max_buffer);
1228 	tr->allocated_snapshot = false;
1229 }
1230 
1231 /**
1232  * tracing_alloc_snapshot - allocate snapshot buffer.
1233  *
1234  * This only allocates the snapshot buffer if it isn't already
1235  * allocated - it doesn't also take a snapshot.
1236  *
1237  * This is meant to be used in cases where the snapshot buffer needs
1238  * to be set up for events that can't sleep but need to be able to
1239  * trigger a snapshot.
1240  */
tracing_alloc_snapshot(void)1241 int tracing_alloc_snapshot(void)
1242 {
1243 	struct trace_array *tr = &global_trace;
1244 	int ret;
1245 
1246 	ret = tracing_alloc_snapshot_instance(tr);
1247 	WARN_ON(ret < 0);
1248 
1249 	return ret;
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1252 
1253 /**
1254  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1255  *
1256  * This is similar to tracing_snapshot(), but it will allocate the
1257  * snapshot buffer if it isn't already allocated. Use this only
1258  * where it is safe to sleep, as the allocation may sleep.
1259  *
1260  * This causes a swap between the snapshot buffer and the current live
1261  * tracing buffer. You can use this to take snapshots of the live
1262  * trace when some condition is triggered, but continue to trace.
1263  */
tracing_snapshot_alloc(void)1264 void tracing_snapshot_alloc(void)
1265 {
1266 	int ret;
1267 
1268 	ret = tracing_alloc_snapshot();
1269 	if (ret < 0)
1270 		return;
1271 
1272 	tracing_snapshot();
1273 }
1274 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1275 
1276 /**
1277  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1278  * @tr:		The tracing instance
1279  * @cond_data:	User data to associate with the snapshot
1280  * @update:	Implementation of the cond_snapshot update function
1281  *
1282  * Check whether the conditional snapshot for the given instance has
1283  * already been enabled, or if the current tracer is already using a
1284  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1285  * save the cond_data and update function inside.
1286  *
1287  * Returns 0 if successful, error otherwise.
1288  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1289 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1290 				 cond_update_fn_t update)
1291 {
1292 	struct cond_snapshot *cond_snapshot;
1293 	int ret = 0;
1294 
1295 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1296 	if (!cond_snapshot)
1297 		return -ENOMEM;
1298 
1299 	cond_snapshot->cond_data = cond_data;
1300 	cond_snapshot->update = update;
1301 
1302 	mutex_lock(&trace_types_lock);
1303 
1304 	ret = tracing_alloc_snapshot_instance(tr);
1305 	if (ret)
1306 		goto fail_unlock;
1307 
1308 	if (tr->current_trace->use_max_tr) {
1309 		ret = -EBUSY;
1310 		goto fail_unlock;
1311 	}
1312 
1313 	/*
1314 	 * The cond_snapshot can only change to NULL without the
1315 	 * trace_types_lock. We don't care if we race with it going
1316 	 * to NULL, but we want to make sure that it's not set to
1317 	 * something other than NULL when we get here, which we can
1318 	 * do safely with only holding the trace_types_lock and not
1319 	 * having to take the max_lock.
1320 	 */
1321 	if (tr->cond_snapshot) {
1322 		ret = -EBUSY;
1323 		goto fail_unlock;
1324 	}
1325 
1326 	arch_spin_lock(&tr->max_lock);
1327 	tr->cond_snapshot = cond_snapshot;
1328 	arch_spin_unlock(&tr->max_lock);
1329 
1330 	mutex_unlock(&trace_types_lock);
1331 
1332 	return ret;
1333 
1334  fail_unlock:
1335 	mutex_unlock(&trace_types_lock);
1336 	kfree(cond_snapshot);
1337 	return ret;
1338 }
1339 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1340 
1341 /**
1342  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1343  * @tr:		The tracing instance
1344  *
1345  * Check whether the conditional snapshot for the given instance is
1346  * enabled; if so, free the cond_snapshot associated with it,
1347  * otherwise return -EINVAL.
1348  *
1349  * Returns 0 if successful, error otherwise.
1350  */
tracing_snapshot_cond_disable(struct trace_array * tr)1351 int tracing_snapshot_cond_disable(struct trace_array *tr)
1352 {
1353 	int ret = 0;
1354 
1355 	arch_spin_lock(&tr->max_lock);
1356 
1357 	if (!tr->cond_snapshot)
1358 		ret = -EINVAL;
1359 	else {
1360 		kfree(tr->cond_snapshot);
1361 		tr->cond_snapshot = NULL;
1362 	}
1363 
1364 	arch_spin_unlock(&tr->max_lock);
1365 
1366 	return ret;
1367 }
1368 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1369 #else
tracing_snapshot(void)1370 void tracing_snapshot(void)
1371 {
1372 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1373 }
1374 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1375 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1376 {
1377 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1380 int tracing_alloc_snapshot(void)
1381 {
1382 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1383 	return -ENODEV;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1386 void tracing_snapshot_alloc(void)
1387 {
1388 	/* Give warning */
1389 	tracing_snapshot();
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1392 void *tracing_cond_snapshot_data(struct trace_array *tr)
1393 {
1394 	return NULL;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1397 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1398 {
1399 	return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1402 int tracing_snapshot_cond_disable(struct trace_array *tr)
1403 {
1404 	return false;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1407 #endif /* CONFIG_TRACER_SNAPSHOT */
1408 
tracer_tracing_off(struct trace_array * tr)1409 void tracer_tracing_off(struct trace_array *tr)
1410 {
1411 	if (tr->array_buffer.buffer)
1412 		ring_buffer_record_off(tr->array_buffer.buffer);
1413 	/*
1414 	 * This flag is looked at when buffers haven't been allocated
1415 	 * yet, or by some tracers (like irqsoff), that just want to
1416 	 * know if the ring buffer has been disabled, but it can handle
1417 	 * races of where it gets disabled but we still do a record.
1418 	 * As the check is in the fast path of the tracers, it is more
1419 	 * important to be fast than accurate.
1420 	 */
1421 	tr->buffer_disabled = 1;
1422 	/* Make the flag seen by readers */
1423 	smp_wmb();
1424 }
1425 
1426 /**
1427  * tracing_off - turn off tracing buffers
1428  *
1429  * This function stops the tracing buffers from recording data.
1430  * It does not disable any overhead the tracers themselves may
1431  * be causing. This function simply causes all recording to
1432  * the ring buffers to fail.
1433  */
tracing_off(void)1434 void tracing_off(void)
1435 {
1436 	tracer_tracing_off(&global_trace);
1437 }
1438 EXPORT_SYMBOL_GPL(tracing_off);
1439 
disable_trace_on_warning(void)1440 void disable_trace_on_warning(void)
1441 {
1442 	if (__disable_trace_on_warning) {
1443 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1444 			"Disabling tracing due to warning\n");
1445 		tracing_off();
1446 	}
1447 }
1448 
1449 /**
1450  * tracer_tracing_is_on - show real state of ring buffer enabled
1451  * @tr : the trace array to know if ring buffer is enabled
1452  *
1453  * Shows real state of the ring buffer if it is enabled or not.
1454  */
tracer_tracing_is_on(struct trace_array * tr)1455 bool tracer_tracing_is_on(struct trace_array *tr)
1456 {
1457 	if (tr->array_buffer.buffer)
1458 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1459 	return !tr->buffer_disabled;
1460 }
1461 
1462 /**
1463  * tracing_is_on - show state of ring buffers enabled
1464  */
tracing_is_on(void)1465 int tracing_is_on(void)
1466 {
1467 	return tracer_tracing_is_on(&global_trace);
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_is_on);
1470 
set_buf_size(char * str)1471 static int __init set_buf_size(char *str)
1472 {
1473 	unsigned long buf_size;
1474 
1475 	if (!str)
1476 		return 0;
1477 	buf_size = memparse(str, &str);
1478 	/* nr_entries can not be zero */
1479 	if (buf_size == 0)
1480 		return 0;
1481 	trace_buf_size = buf_size;
1482 	return 1;
1483 }
1484 __setup("trace_buf_size=", set_buf_size);
1485 
set_tracing_thresh(char * str)1486 static int __init set_tracing_thresh(char *str)
1487 {
1488 	unsigned long threshold;
1489 	int ret;
1490 
1491 	if (!str)
1492 		return 0;
1493 	ret = kstrtoul(str, 0, &threshold);
1494 	if (ret < 0)
1495 		return 0;
1496 	tracing_thresh = threshold * 1000;
1497 	return 1;
1498 }
1499 __setup("tracing_thresh=", set_tracing_thresh);
1500 
nsecs_to_usecs(unsigned long nsecs)1501 unsigned long nsecs_to_usecs(unsigned long nsecs)
1502 {
1503 	return nsecs / 1000;
1504 }
1505 
1506 /*
1507  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1508  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1509  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1510  * of strings in the order that the evals (enum) were defined.
1511  */
1512 #undef C
1513 #define C(a, b) b
1514 
1515 /* These must match the bit postions in trace_iterator_flags */
1516 static const char *trace_options[] = {
1517 	TRACE_FLAGS
1518 	NULL
1519 };
1520 
1521 static struct {
1522 	u64 (*func)(void);
1523 	const char *name;
1524 	int in_ns;		/* is this clock in nanoseconds? */
1525 } trace_clocks[] = {
1526 	{ trace_clock_local,		"local",	1 },
1527 	{ trace_clock_global,		"global",	1 },
1528 	{ trace_clock_counter,		"counter",	0 },
1529 	{ trace_clock_jiffies,		"uptime",	0 },
1530 	{ trace_clock,			"perf",		1 },
1531 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1532 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1533 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1534 	ARCH_TRACE_CLOCKS
1535 };
1536 
trace_clock_in_ns(struct trace_array * tr)1537 bool trace_clock_in_ns(struct trace_array *tr)
1538 {
1539 	if (trace_clocks[tr->clock_id].in_ns)
1540 		return true;
1541 
1542 	return false;
1543 }
1544 
1545 /*
1546  * trace_parser_get_init - gets the buffer for trace parser
1547  */
trace_parser_get_init(struct trace_parser * parser,int size)1548 int trace_parser_get_init(struct trace_parser *parser, int size)
1549 {
1550 	memset(parser, 0, sizeof(*parser));
1551 
1552 	parser->buffer = kmalloc(size, GFP_KERNEL);
1553 	if (!parser->buffer)
1554 		return 1;
1555 
1556 	parser->size = size;
1557 	return 0;
1558 }
1559 
1560 /*
1561  * trace_parser_put - frees the buffer for trace parser
1562  */
trace_parser_put(struct trace_parser * parser)1563 void trace_parser_put(struct trace_parser *parser)
1564 {
1565 	kfree(parser->buffer);
1566 	parser->buffer = NULL;
1567 }
1568 
1569 /*
1570  * trace_get_user - reads the user input string separated by  space
1571  * (matched by isspace(ch))
1572  *
1573  * For each string found the 'struct trace_parser' is updated,
1574  * and the function returns.
1575  *
1576  * Returns number of bytes read.
1577  *
1578  * See kernel/trace/trace.h for 'struct trace_parser' details.
1579  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1580 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1581 	size_t cnt, loff_t *ppos)
1582 {
1583 	char ch;
1584 	size_t read = 0;
1585 	ssize_t ret;
1586 
1587 	if (!*ppos)
1588 		trace_parser_clear(parser);
1589 
1590 	ret = get_user(ch, ubuf++);
1591 	if (ret)
1592 		goto out;
1593 
1594 	read++;
1595 	cnt--;
1596 
1597 	/*
1598 	 * The parser is not finished with the last write,
1599 	 * continue reading the user input without skipping spaces.
1600 	 */
1601 	if (!parser->cont) {
1602 		/* skip white space */
1603 		while (cnt && isspace(ch)) {
1604 			ret = get_user(ch, ubuf++);
1605 			if (ret)
1606 				goto out;
1607 			read++;
1608 			cnt--;
1609 		}
1610 
1611 		parser->idx = 0;
1612 
1613 		/* only spaces were written */
1614 		if (isspace(ch) || !ch) {
1615 			*ppos += read;
1616 			ret = read;
1617 			goto out;
1618 		}
1619 	}
1620 
1621 	/* read the non-space input */
1622 	while (cnt && !isspace(ch) && ch) {
1623 		if (parser->idx < parser->size - 1)
1624 			parser->buffer[parser->idx++] = ch;
1625 		else {
1626 			ret = -EINVAL;
1627 			goto out;
1628 		}
1629 		ret = get_user(ch, ubuf++);
1630 		if (ret)
1631 			goto out;
1632 		read++;
1633 		cnt--;
1634 	}
1635 
1636 	/* We either got finished input or we have to wait for another call. */
1637 	if (isspace(ch) || !ch) {
1638 		parser->buffer[parser->idx] = 0;
1639 		parser->cont = false;
1640 	} else if (parser->idx < parser->size - 1) {
1641 		parser->cont = true;
1642 		parser->buffer[parser->idx++] = ch;
1643 		/* Make sure the parsed string always terminates with '\0'. */
1644 		parser->buffer[parser->idx] = 0;
1645 	} else {
1646 		ret = -EINVAL;
1647 		goto out;
1648 	}
1649 
1650 	*ppos += read;
1651 	ret = read;
1652 
1653 out:
1654 	return ret;
1655 }
1656 
1657 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1658 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1659 {
1660 	int len;
1661 
1662 	if (trace_seq_used(s) <= s->seq.readpos)
1663 		return -EBUSY;
1664 
1665 	len = trace_seq_used(s) - s->seq.readpos;
1666 	if (cnt > len)
1667 		cnt = len;
1668 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1669 
1670 	s->seq.readpos += cnt;
1671 	return cnt;
1672 }
1673 
1674 unsigned long __read_mostly	tracing_thresh;
1675 static const struct file_operations tracing_max_lat_fops;
1676 
1677 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1678 	defined(CONFIG_FSNOTIFY)
1679 
1680 static struct workqueue_struct *fsnotify_wq;
1681 
latency_fsnotify_workfn(struct work_struct * work)1682 static void latency_fsnotify_workfn(struct work_struct *work)
1683 {
1684 	struct trace_array *tr = container_of(work, struct trace_array,
1685 					      fsnotify_work);
1686 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1687 }
1688 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1689 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1690 {
1691 	struct trace_array *tr = container_of(iwork, struct trace_array,
1692 					      fsnotify_irqwork);
1693 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1694 }
1695 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1696 static void trace_create_maxlat_file(struct trace_array *tr,
1697 				     struct dentry *d_tracer)
1698 {
1699 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1700 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1701 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1702 					      d_tracer, &tr->max_latency,
1703 					      &tracing_max_lat_fops);
1704 }
1705 
latency_fsnotify_init(void)1706 __init static int latency_fsnotify_init(void)
1707 {
1708 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1709 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1710 	if (!fsnotify_wq) {
1711 		pr_err("Unable to allocate tr_max_lat_wq\n");
1712 		return -ENOMEM;
1713 	}
1714 	return 0;
1715 }
1716 
1717 late_initcall_sync(latency_fsnotify_init);
1718 
latency_fsnotify(struct trace_array * tr)1719 void latency_fsnotify(struct trace_array *tr)
1720 {
1721 	if (!fsnotify_wq)
1722 		return;
1723 	/*
1724 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1725 	 * possible that we are called from __schedule() or do_idle(), which
1726 	 * could cause a deadlock.
1727 	 */
1728 	irq_work_queue(&tr->fsnotify_irqwork);
1729 }
1730 
1731 /*
1732  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1733  *  defined(CONFIG_FSNOTIFY)
1734  */
1735 #else
1736 
1737 #define trace_create_maxlat_file(tr, d_tracer)				\
1738 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1739 			  &tr->max_latency, &tracing_max_lat_fops)
1740 
1741 #endif
1742 
1743 #ifdef CONFIG_TRACER_MAX_TRACE
1744 /*
1745  * Copy the new maximum trace into the separate maximum-trace
1746  * structure. (this way the maximum trace is permanently saved,
1747  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1748  */
1749 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1750 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1751 {
1752 	struct array_buffer *trace_buf = &tr->array_buffer;
1753 	struct array_buffer *max_buf = &tr->max_buffer;
1754 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1755 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1756 
1757 	max_buf->cpu = cpu;
1758 	max_buf->time_start = data->preempt_timestamp;
1759 
1760 	max_data->saved_latency = tr->max_latency;
1761 	max_data->critical_start = data->critical_start;
1762 	max_data->critical_end = data->critical_end;
1763 
1764 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1765 	max_data->pid = tsk->pid;
1766 	/*
1767 	 * If tsk == current, then use current_uid(), as that does not use
1768 	 * RCU. The irq tracer can be called out of RCU scope.
1769 	 */
1770 	if (tsk == current)
1771 		max_data->uid = current_uid();
1772 	else
1773 		max_data->uid = task_uid(tsk);
1774 
1775 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1776 	max_data->policy = tsk->policy;
1777 	max_data->rt_priority = tsk->rt_priority;
1778 
1779 	/* record this tasks comm */
1780 	tracing_record_cmdline(tsk);
1781 	latency_fsnotify(tr);
1782 }
1783 
1784 /**
1785  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1786  * @tr: tracer
1787  * @tsk: the task with the latency
1788  * @cpu: The cpu that initiated the trace.
1789  * @cond_data: User data associated with a conditional snapshot
1790  *
1791  * Flip the buffers between the @tr and the max_tr and record information
1792  * about which task was the cause of this latency.
1793  */
1794 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1795 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1796 	      void *cond_data)
1797 {
1798 	if (tr->stop_count)
1799 		return;
1800 
1801 	WARN_ON_ONCE(!irqs_disabled());
1802 
1803 	if (!tr->allocated_snapshot) {
1804 		/* Only the nop tracer should hit this when disabling */
1805 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1806 		return;
1807 	}
1808 
1809 	arch_spin_lock(&tr->max_lock);
1810 
1811 	/* Inherit the recordable setting from array_buffer */
1812 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1813 		ring_buffer_record_on(tr->max_buffer.buffer);
1814 	else
1815 		ring_buffer_record_off(tr->max_buffer.buffer);
1816 
1817 #ifdef CONFIG_TRACER_SNAPSHOT
1818 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1819 		goto out_unlock;
1820 #endif
1821 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1822 
1823 	__update_max_tr(tr, tsk, cpu);
1824 
1825  out_unlock:
1826 	arch_spin_unlock(&tr->max_lock);
1827 }
1828 
1829 /**
1830  * update_max_tr_single - only copy one trace over, and reset the rest
1831  * @tr: tracer
1832  * @tsk: task with the latency
1833  * @cpu: the cpu of the buffer to copy.
1834  *
1835  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1836  */
1837 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1838 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1839 {
1840 	int ret;
1841 
1842 	if (tr->stop_count)
1843 		return;
1844 
1845 	WARN_ON_ONCE(!irqs_disabled());
1846 	if (!tr->allocated_snapshot) {
1847 		/* Only the nop tracer should hit this when disabling */
1848 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1849 		return;
1850 	}
1851 
1852 	arch_spin_lock(&tr->max_lock);
1853 
1854 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1855 
1856 	if (ret == -EBUSY) {
1857 		/*
1858 		 * We failed to swap the buffer due to a commit taking
1859 		 * place on this CPU. We fail to record, but we reset
1860 		 * the max trace buffer (no one writes directly to it)
1861 		 * and flag that it failed.
1862 		 */
1863 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1864 			"Failed to swap buffers due to commit in progress\n");
1865 	}
1866 
1867 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1868 
1869 	__update_max_tr(tr, tsk, cpu);
1870 	arch_spin_unlock(&tr->max_lock);
1871 }
1872 #endif /* CONFIG_TRACER_MAX_TRACE */
1873 
wait_on_pipe(struct trace_iterator * iter,int full)1874 static int wait_on_pipe(struct trace_iterator *iter, int full)
1875 {
1876 	/* Iterators are static, they should be filled or empty */
1877 	if (trace_buffer_iter(iter, iter->cpu_file))
1878 		return 0;
1879 
1880 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1881 				full);
1882 }
1883 
1884 #ifdef CONFIG_FTRACE_STARTUP_TEST
1885 static bool selftests_can_run;
1886 
1887 struct trace_selftests {
1888 	struct list_head		list;
1889 	struct tracer			*type;
1890 };
1891 
1892 static LIST_HEAD(postponed_selftests);
1893 
save_selftest(struct tracer * type)1894 static int save_selftest(struct tracer *type)
1895 {
1896 	struct trace_selftests *selftest;
1897 
1898 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1899 	if (!selftest)
1900 		return -ENOMEM;
1901 
1902 	selftest->type = type;
1903 	list_add(&selftest->list, &postponed_selftests);
1904 	return 0;
1905 }
1906 
run_tracer_selftest(struct tracer * type)1907 static int run_tracer_selftest(struct tracer *type)
1908 {
1909 	struct trace_array *tr = &global_trace;
1910 	struct tracer *saved_tracer = tr->current_trace;
1911 	int ret;
1912 
1913 	if (!type->selftest || tracing_selftest_disabled)
1914 		return 0;
1915 
1916 	/*
1917 	 * If a tracer registers early in boot up (before scheduling is
1918 	 * initialized and such), then do not run its selftests yet.
1919 	 * Instead, run it a little later in the boot process.
1920 	 */
1921 	if (!selftests_can_run)
1922 		return save_selftest(type);
1923 
1924 	/*
1925 	 * Run a selftest on this tracer.
1926 	 * Here we reset the trace buffer, and set the current
1927 	 * tracer to be this tracer. The tracer can then run some
1928 	 * internal tracing to verify that everything is in order.
1929 	 * If we fail, we do not register this tracer.
1930 	 */
1931 	tracing_reset_online_cpus(&tr->array_buffer);
1932 
1933 	tr->current_trace = type;
1934 
1935 #ifdef CONFIG_TRACER_MAX_TRACE
1936 	if (type->use_max_tr) {
1937 		/* If we expanded the buffers, make sure the max is expanded too */
1938 		if (ring_buffer_expanded)
1939 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1940 					   RING_BUFFER_ALL_CPUS);
1941 		tr->allocated_snapshot = true;
1942 	}
1943 #endif
1944 
1945 	/* the test is responsible for initializing and enabling */
1946 	pr_info("Testing tracer %s: ", type->name);
1947 	ret = type->selftest(type, tr);
1948 	/* the test is responsible for resetting too */
1949 	tr->current_trace = saved_tracer;
1950 	if (ret) {
1951 		printk(KERN_CONT "FAILED!\n");
1952 		/* Add the warning after printing 'FAILED' */
1953 		WARN_ON(1);
1954 		return -1;
1955 	}
1956 	/* Only reset on passing, to avoid touching corrupted buffers */
1957 	tracing_reset_online_cpus(&tr->array_buffer);
1958 
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 	if (type->use_max_tr) {
1961 		tr->allocated_snapshot = false;
1962 
1963 		/* Shrink the max buffer again */
1964 		if (ring_buffer_expanded)
1965 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1966 					   RING_BUFFER_ALL_CPUS);
1967 	}
1968 #endif
1969 
1970 	printk(KERN_CONT "PASSED\n");
1971 	return 0;
1972 }
1973 
init_trace_selftests(void)1974 static __init int init_trace_selftests(void)
1975 {
1976 	struct trace_selftests *p, *n;
1977 	struct tracer *t, **last;
1978 	int ret;
1979 
1980 	selftests_can_run = true;
1981 
1982 	mutex_lock(&trace_types_lock);
1983 
1984 	if (list_empty(&postponed_selftests))
1985 		goto out;
1986 
1987 	pr_info("Running postponed tracer tests:\n");
1988 
1989 	tracing_selftest_running = true;
1990 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1991 		/* This loop can take minutes when sanitizers are enabled, so
1992 		 * lets make sure we allow RCU processing.
1993 		 */
1994 		cond_resched();
1995 		ret = run_tracer_selftest(p->type);
1996 		/* If the test fails, then warn and remove from available_tracers */
1997 		if (ret < 0) {
1998 			WARN(1, "tracer: %s failed selftest, disabling\n",
1999 			     p->type->name);
2000 			last = &trace_types;
2001 			for (t = trace_types; t; t = t->next) {
2002 				if (t == p->type) {
2003 					*last = t->next;
2004 					break;
2005 				}
2006 				last = &t->next;
2007 			}
2008 		}
2009 		list_del(&p->list);
2010 		kfree(p);
2011 	}
2012 	tracing_selftest_running = false;
2013 
2014  out:
2015 	mutex_unlock(&trace_types_lock);
2016 
2017 	return 0;
2018 }
2019 core_initcall(init_trace_selftests);
2020 #else
run_tracer_selftest(struct tracer * type)2021 static inline int run_tracer_selftest(struct tracer *type)
2022 {
2023 	return 0;
2024 }
2025 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2026 
2027 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2028 
2029 static void __init apply_trace_boot_options(void);
2030 
2031 /**
2032  * register_tracer - register a tracer with the ftrace system.
2033  * @type: the plugin for the tracer
2034  *
2035  * Register a new plugin tracer.
2036  */
register_tracer(struct tracer * type)2037 int __init register_tracer(struct tracer *type)
2038 {
2039 	struct tracer *t;
2040 	int ret = 0;
2041 
2042 	if (!type->name) {
2043 		pr_info("Tracer must have a name\n");
2044 		return -1;
2045 	}
2046 
2047 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2048 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2049 		return -1;
2050 	}
2051 
2052 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2053 		pr_warn("Can not register tracer %s due to lockdown\n",
2054 			   type->name);
2055 		return -EPERM;
2056 	}
2057 
2058 	mutex_lock(&trace_types_lock);
2059 
2060 	tracing_selftest_running = true;
2061 
2062 	for (t = trace_types; t; t = t->next) {
2063 		if (strcmp(type->name, t->name) == 0) {
2064 			/* already found */
2065 			pr_info("Tracer %s already registered\n",
2066 				type->name);
2067 			ret = -1;
2068 			goto out;
2069 		}
2070 	}
2071 
2072 	if (!type->set_flag)
2073 		type->set_flag = &dummy_set_flag;
2074 	if (!type->flags) {
2075 		/*allocate a dummy tracer_flags*/
2076 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2077 		if (!type->flags) {
2078 			ret = -ENOMEM;
2079 			goto out;
2080 		}
2081 		type->flags->val = 0;
2082 		type->flags->opts = dummy_tracer_opt;
2083 	} else
2084 		if (!type->flags->opts)
2085 			type->flags->opts = dummy_tracer_opt;
2086 
2087 	/* store the tracer for __set_tracer_option */
2088 	type->flags->trace = type;
2089 
2090 	ret = run_tracer_selftest(type);
2091 	if (ret < 0)
2092 		goto out;
2093 
2094 	type->next = trace_types;
2095 	trace_types = type;
2096 	add_tracer_options(&global_trace, type);
2097 
2098  out:
2099 	tracing_selftest_running = false;
2100 	mutex_unlock(&trace_types_lock);
2101 
2102 	if (ret || !default_bootup_tracer)
2103 		goto out_unlock;
2104 
2105 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2106 		goto out_unlock;
2107 
2108 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2109 	/* Do we want this tracer to start on bootup? */
2110 	tracing_set_tracer(&global_trace, type->name);
2111 	default_bootup_tracer = NULL;
2112 
2113 	apply_trace_boot_options();
2114 
2115 	/* disable other selftests, since this will break it. */
2116 	tracing_selftest_disabled = true;
2117 #ifdef CONFIG_FTRACE_STARTUP_TEST
2118 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
2119 	       type->name);
2120 #endif
2121 
2122  out_unlock:
2123 	return ret;
2124 }
2125 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2126 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2127 {
2128 	struct trace_buffer *buffer = buf->buffer;
2129 
2130 	if (!buffer)
2131 		return;
2132 
2133 	ring_buffer_record_disable(buffer);
2134 
2135 	/* Make sure all commits have finished */
2136 	synchronize_rcu();
2137 	ring_buffer_reset_cpu(buffer, cpu);
2138 
2139 	ring_buffer_record_enable(buffer);
2140 }
2141 
tracing_reset_online_cpus(struct array_buffer * buf)2142 void tracing_reset_online_cpus(struct array_buffer *buf)
2143 {
2144 	struct trace_buffer *buffer = buf->buffer;
2145 
2146 	if (!buffer)
2147 		return;
2148 
2149 	ring_buffer_record_disable(buffer);
2150 
2151 	/* Make sure all commits have finished */
2152 	synchronize_rcu();
2153 
2154 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2155 
2156 	ring_buffer_reset_online_cpus(buffer);
2157 
2158 	ring_buffer_record_enable(buffer);
2159 }
2160 
2161 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)2162 void tracing_reset_all_online_cpus(void)
2163 {
2164 	struct trace_array *tr;
2165 
2166 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2167 		if (!tr->clear_trace)
2168 			continue;
2169 		tr->clear_trace = false;
2170 		tracing_reset_online_cpus(&tr->array_buffer);
2171 #ifdef CONFIG_TRACER_MAX_TRACE
2172 		tracing_reset_online_cpus(&tr->max_buffer);
2173 #endif
2174 	}
2175 }
2176 
2177 static int *tgid_map;
2178 
2179 #define SAVED_CMDLINES_DEFAULT 128
2180 #define NO_CMDLINE_MAP UINT_MAX
2181 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2182 struct saved_cmdlines_buffer {
2183 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2184 	unsigned *map_cmdline_to_pid;
2185 	unsigned cmdline_num;
2186 	int cmdline_idx;
2187 	char *saved_cmdlines;
2188 };
2189 static struct saved_cmdlines_buffer *savedcmd;
2190 
2191 /* temporary disable recording */
2192 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2193 
get_saved_cmdlines(int idx)2194 static inline char *get_saved_cmdlines(int idx)
2195 {
2196 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2197 }
2198 
set_cmdline(int idx,const char * cmdline)2199 static inline void set_cmdline(int idx, const char *cmdline)
2200 {
2201 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2202 }
2203 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2204 static int allocate_cmdlines_buffer(unsigned int val,
2205 				    struct saved_cmdlines_buffer *s)
2206 {
2207 	s->map_cmdline_to_pid = kmalloc_array(val,
2208 					      sizeof(*s->map_cmdline_to_pid),
2209 					      GFP_KERNEL);
2210 	if (!s->map_cmdline_to_pid)
2211 		return -ENOMEM;
2212 
2213 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2214 	if (!s->saved_cmdlines) {
2215 		kfree(s->map_cmdline_to_pid);
2216 		return -ENOMEM;
2217 	}
2218 
2219 	s->cmdline_idx = 0;
2220 	s->cmdline_num = val;
2221 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2222 	       sizeof(s->map_pid_to_cmdline));
2223 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2224 	       val * sizeof(*s->map_cmdline_to_pid));
2225 
2226 	return 0;
2227 }
2228 
trace_create_savedcmd(void)2229 static int trace_create_savedcmd(void)
2230 {
2231 	int ret;
2232 
2233 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2234 	if (!savedcmd)
2235 		return -ENOMEM;
2236 
2237 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2238 	if (ret < 0) {
2239 		kfree(savedcmd);
2240 		savedcmd = NULL;
2241 		return -ENOMEM;
2242 	}
2243 
2244 	return 0;
2245 }
2246 
is_tracing_stopped(void)2247 int is_tracing_stopped(void)
2248 {
2249 	return global_trace.stop_count;
2250 }
2251 
2252 /**
2253  * tracing_start - quick start of the tracer
2254  *
2255  * If tracing is enabled but was stopped by tracing_stop,
2256  * this will start the tracer back up.
2257  */
tracing_start(void)2258 void tracing_start(void)
2259 {
2260 	struct trace_buffer *buffer;
2261 	unsigned long flags;
2262 
2263 	if (tracing_disabled)
2264 		return;
2265 
2266 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2267 	if (--global_trace.stop_count) {
2268 		if (global_trace.stop_count < 0) {
2269 			/* Someone screwed up their debugging */
2270 			WARN_ON_ONCE(1);
2271 			global_trace.stop_count = 0;
2272 		}
2273 		goto out;
2274 	}
2275 
2276 	/* Prevent the buffers from switching */
2277 	arch_spin_lock(&global_trace.max_lock);
2278 
2279 	buffer = global_trace.array_buffer.buffer;
2280 	if (buffer)
2281 		ring_buffer_record_enable(buffer);
2282 
2283 #ifdef CONFIG_TRACER_MAX_TRACE
2284 	buffer = global_trace.max_buffer.buffer;
2285 	if (buffer)
2286 		ring_buffer_record_enable(buffer);
2287 #endif
2288 
2289 	arch_spin_unlock(&global_trace.max_lock);
2290 
2291  out:
2292 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2293 }
2294 
tracing_start_tr(struct trace_array * tr)2295 static void tracing_start_tr(struct trace_array *tr)
2296 {
2297 	struct trace_buffer *buffer;
2298 	unsigned long flags;
2299 
2300 	if (tracing_disabled)
2301 		return;
2302 
2303 	/* If global, we need to also start the max tracer */
2304 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2305 		return tracing_start();
2306 
2307 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2308 
2309 	if (--tr->stop_count) {
2310 		if (tr->stop_count < 0) {
2311 			/* Someone screwed up their debugging */
2312 			WARN_ON_ONCE(1);
2313 			tr->stop_count = 0;
2314 		}
2315 		goto out;
2316 	}
2317 
2318 	buffer = tr->array_buffer.buffer;
2319 	if (buffer)
2320 		ring_buffer_record_enable(buffer);
2321 
2322  out:
2323 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2324 }
2325 
2326 /**
2327  * tracing_stop - quick stop of the tracer
2328  *
2329  * Light weight way to stop tracing. Use in conjunction with
2330  * tracing_start.
2331  */
tracing_stop(void)2332 void tracing_stop(void)
2333 {
2334 	struct trace_buffer *buffer;
2335 	unsigned long flags;
2336 
2337 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2338 	if (global_trace.stop_count++)
2339 		goto out;
2340 
2341 	/* Prevent the buffers from switching */
2342 	arch_spin_lock(&global_trace.max_lock);
2343 
2344 	buffer = global_trace.array_buffer.buffer;
2345 	if (buffer)
2346 		ring_buffer_record_disable(buffer);
2347 
2348 #ifdef CONFIG_TRACER_MAX_TRACE
2349 	buffer = global_trace.max_buffer.buffer;
2350 	if (buffer)
2351 		ring_buffer_record_disable(buffer);
2352 #endif
2353 
2354 	arch_spin_unlock(&global_trace.max_lock);
2355 
2356  out:
2357 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2358 }
2359 
tracing_stop_tr(struct trace_array * tr)2360 static void tracing_stop_tr(struct trace_array *tr)
2361 {
2362 	struct trace_buffer *buffer;
2363 	unsigned long flags;
2364 
2365 	/* If global, we need to also stop the max tracer */
2366 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2367 		return tracing_stop();
2368 
2369 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2370 	if (tr->stop_count++)
2371 		goto out;
2372 
2373 	buffer = tr->array_buffer.buffer;
2374 	if (buffer)
2375 		ring_buffer_record_disable(buffer);
2376 
2377  out:
2378 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2379 }
2380 
trace_save_cmdline(struct task_struct * tsk)2381 static int trace_save_cmdline(struct task_struct *tsk)
2382 {
2383 	unsigned pid, idx;
2384 
2385 	/* treat recording of idle task as a success */
2386 	if (!tsk->pid)
2387 		return 1;
2388 
2389 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2390 		return 0;
2391 
2392 	/*
2393 	 * It's not the end of the world if we don't get
2394 	 * the lock, but we also don't want to spin
2395 	 * nor do we want to disable interrupts,
2396 	 * so if we miss here, then better luck next time.
2397 	 */
2398 	if (!arch_spin_trylock(&trace_cmdline_lock))
2399 		return 0;
2400 
2401 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2402 	if (idx == NO_CMDLINE_MAP) {
2403 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2404 
2405 		/*
2406 		 * Check whether the cmdline buffer at idx has a pid
2407 		 * mapped. We are going to overwrite that entry so we
2408 		 * need to clear the map_pid_to_cmdline. Otherwise we
2409 		 * would read the new comm for the old pid.
2410 		 */
2411 		pid = savedcmd->map_cmdline_to_pid[idx];
2412 		if (pid != NO_CMDLINE_MAP)
2413 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2414 
2415 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2416 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2417 
2418 		savedcmd->cmdline_idx = idx;
2419 	}
2420 
2421 	set_cmdline(idx, tsk->comm);
2422 
2423 	arch_spin_unlock(&trace_cmdline_lock);
2424 
2425 	return 1;
2426 }
2427 
__trace_find_cmdline(int pid,char comm[])2428 static void __trace_find_cmdline(int pid, char comm[])
2429 {
2430 	unsigned map;
2431 
2432 	if (!pid) {
2433 		strcpy(comm, "<idle>");
2434 		return;
2435 	}
2436 
2437 	if (WARN_ON_ONCE(pid < 0)) {
2438 		strcpy(comm, "<XXX>");
2439 		return;
2440 	}
2441 
2442 	if (pid > PID_MAX_DEFAULT) {
2443 		strcpy(comm, "<...>");
2444 		return;
2445 	}
2446 
2447 	map = savedcmd->map_pid_to_cmdline[pid];
2448 	if (map != NO_CMDLINE_MAP)
2449 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2450 	else
2451 		strcpy(comm, "<...>");
2452 }
2453 
trace_find_cmdline(int pid,char comm[])2454 void trace_find_cmdline(int pid, char comm[])
2455 {
2456 	preempt_disable();
2457 	arch_spin_lock(&trace_cmdline_lock);
2458 
2459 	__trace_find_cmdline(pid, comm);
2460 
2461 	arch_spin_unlock(&trace_cmdline_lock);
2462 	preempt_enable();
2463 }
2464 
trace_find_tgid(int pid)2465 int trace_find_tgid(int pid)
2466 {
2467 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2468 		return 0;
2469 
2470 	return tgid_map[pid];
2471 }
2472 
trace_save_tgid(struct task_struct * tsk)2473 static int trace_save_tgid(struct task_struct *tsk)
2474 {
2475 	/* treat recording of idle task as a success */
2476 	if (!tsk->pid)
2477 		return 1;
2478 
2479 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2480 		return 0;
2481 
2482 	tgid_map[tsk->pid] = tsk->tgid;
2483 	return 1;
2484 }
2485 
tracing_record_taskinfo_skip(int flags)2486 static bool tracing_record_taskinfo_skip(int flags)
2487 {
2488 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2489 		return true;
2490 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2491 		return true;
2492 	if (!__this_cpu_read(trace_taskinfo_save))
2493 		return true;
2494 	return false;
2495 }
2496 
2497 /**
2498  * tracing_record_taskinfo - record the task info of a task
2499  *
2500  * @task:  task to record
2501  * @flags: TRACE_RECORD_CMDLINE for recording comm
2502  *         TRACE_RECORD_TGID for recording tgid
2503  */
tracing_record_taskinfo(struct task_struct * task,int flags)2504 void tracing_record_taskinfo(struct task_struct *task, int flags)
2505 {
2506 	bool done;
2507 
2508 	if (tracing_record_taskinfo_skip(flags))
2509 		return;
2510 
2511 	/*
2512 	 * Record as much task information as possible. If some fail, continue
2513 	 * to try to record the others.
2514 	 */
2515 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2516 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2517 
2518 	/* If recording any information failed, retry again soon. */
2519 	if (!done)
2520 		return;
2521 
2522 	__this_cpu_write(trace_taskinfo_save, false);
2523 }
2524 
2525 /**
2526  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2527  *
2528  * @prev: previous task during sched_switch
2529  * @next: next task during sched_switch
2530  * @flags: TRACE_RECORD_CMDLINE for recording comm
2531  *         TRACE_RECORD_TGID for recording tgid
2532  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2533 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2534 					  struct task_struct *next, int flags)
2535 {
2536 	bool done;
2537 
2538 	if (tracing_record_taskinfo_skip(flags))
2539 		return;
2540 
2541 	/*
2542 	 * Record as much task information as possible. If some fail, continue
2543 	 * to try to record the others.
2544 	 */
2545 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2546 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2547 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2548 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2549 
2550 	/* If recording any information failed, retry again soon. */
2551 	if (!done)
2552 		return;
2553 
2554 	__this_cpu_write(trace_taskinfo_save, false);
2555 }
2556 
2557 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2558 void tracing_record_cmdline(struct task_struct *task)
2559 {
2560 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2561 }
2562 
tracing_record_tgid(struct task_struct * task)2563 void tracing_record_tgid(struct task_struct *task)
2564 {
2565 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2566 }
2567 
2568 /*
2569  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2570  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2571  * simplifies those functions and keeps them in sync.
2572  */
trace_handle_return(struct trace_seq * s)2573 enum print_line_t trace_handle_return(struct trace_seq *s)
2574 {
2575 	return trace_seq_has_overflowed(s) ?
2576 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2577 }
2578 EXPORT_SYMBOL_GPL(trace_handle_return);
2579 
2580 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2581 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2582 			     unsigned long flags, int pc)
2583 {
2584 	struct task_struct *tsk = current;
2585 
2586 	entry->preempt_count		= pc & 0xff;
2587 	entry->pid			= (tsk) ? tsk->pid : 0;
2588 	entry->type			= type;
2589 	entry->flags =
2590 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2591 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2592 #else
2593 		TRACE_FLAG_IRQS_NOSUPPORT |
2594 #endif
2595 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2596 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2597 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2598 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2599 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2600 }
2601 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2602 
2603 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2604 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2605 			  int type,
2606 			  unsigned long len,
2607 			  unsigned long flags, int pc)
2608 {
2609 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2610 }
2611 
2612 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2613 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2614 static int trace_buffered_event_ref;
2615 
2616 /**
2617  * trace_buffered_event_enable - enable buffering events
2618  *
2619  * When events are being filtered, it is quicker to use a temporary
2620  * buffer to write the event data into if there's a likely chance
2621  * that it will not be committed. The discard of the ring buffer
2622  * is not as fast as committing, and is much slower than copying
2623  * a commit.
2624  *
2625  * When an event is to be filtered, allocate per cpu buffers to
2626  * write the event data into, and if the event is filtered and discarded
2627  * it is simply dropped, otherwise, the entire data is to be committed
2628  * in one shot.
2629  */
trace_buffered_event_enable(void)2630 void trace_buffered_event_enable(void)
2631 {
2632 	struct ring_buffer_event *event;
2633 	struct page *page;
2634 	int cpu;
2635 
2636 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2637 
2638 	if (trace_buffered_event_ref++)
2639 		return;
2640 
2641 	for_each_tracing_cpu(cpu) {
2642 		page = alloc_pages_node(cpu_to_node(cpu),
2643 					GFP_KERNEL | __GFP_NORETRY, 0);
2644 		if (!page)
2645 			goto failed;
2646 
2647 		event = page_address(page);
2648 		memset(event, 0, sizeof(*event));
2649 
2650 		per_cpu(trace_buffered_event, cpu) = event;
2651 
2652 		preempt_disable();
2653 		if (cpu == smp_processor_id() &&
2654 		    __this_cpu_read(trace_buffered_event) !=
2655 		    per_cpu(trace_buffered_event, cpu))
2656 			WARN_ON_ONCE(1);
2657 		preempt_enable();
2658 	}
2659 
2660 	return;
2661  failed:
2662 	trace_buffered_event_disable();
2663 }
2664 
enable_trace_buffered_event(void * data)2665 static void enable_trace_buffered_event(void *data)
2666 {
2667 	/* Probably not needed, but do it anyway */
2668 	smp_rmb();
2669 	this_cpu_dec(trace_buffered_event_cnt);
2670 }
2671 
disable_trace_buffered_event(void * data)2672 static void disable_trace_buffered_event(void *data)
2673 {
2674 	this_cpu_inc(trace_buffered_event_cnt);
2675 }
2676 
2677 /**
2678  * trace_buffered_event_disable - disable buffering events
2679  *
2680  * When a filter is removed, it is faster to not use the buffered
2681  * events, and to commit directly into the ring buffer. Free up
2682  * the temp buffers when there are no more users. This requires
2683  * special synchronization with current events.
2684  */
trace_buffered_event_disable(void)2685 void trace_buffered_event_disable(void)
2686 {
2687 	int cpu;
2688 
2689 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2690 
2691 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2692 		return;
2693 
2694 	if (--trace_buffered_event_ref)
2695 		return;
2696 
2697 	preempt_disable();
2698 	/* For each CPU, set the buffer as used. */
2699 	smp_call_function_many(tracing_buffer_mask,
2700 			       disable_trace_buffered_event, NULL, 1);
2701 	preempt_enable();
2702 
2703 	/* Wait for all current users to finish */
2704 	synchronize_rcu();
2705 
2706 	for_each_tracing_cpu(cpu) {
2707 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2708 		per_cpu(trace_buffered_event, cpu) = NULL;
2709 	}
2710 	/*
2711 	 * Make sure trace_buffered_event is NULL before clearing
2712 	 * trace_buffered_event_cnt.
2713 	 */
2714 	smp_wmb();
2715 
2716 	preempt_disable();
2717 	/* Do the work on each cpu */
2718 	smp_call_function_many(tracing_buffer_mask,
2719 			       enable_trace_buffered_event, NULL, 1);
2720 	preempt_enable();
2721 }
2722 
2723 static struct trace_buffer *temp_buffer;
2724 
2725 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2726 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2727 			  struct trace_event_file *trace_file,
2728 			  int type, unsigned long len,
2729 			  unsigned long flags, int pc)
2730 {
2731 	struct ring_buffer_event *entry;
2732 	int val;
2733 
2734 	*current_rb = trace_file->tr->array_buffer.buffer;
2735 
2736 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2737 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2738 	    (entry = this_cpu_read(trace_buffered_event))) {
2739 		/* Try to use the per cpu buffer first */
2740 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2741 		if (val == 1) {
2742 			trace_event_setup(entry, type, flags, pc);
2743 			entry->array[0] = len;
2744 			return entry;
2745 		}
2746 		this_cpu_dec(trace_buffered_event_cnt);
2747 	}
2748 
2749 	entry = __trace_buffer_lock_reserve(*current_rb,
2750 					    type, len, flags, pc);
2751 	/*
2752 	 * If tracing is off, but we have triggers enabled
2753 	 * we still need to look at the event data. Use the temp_buffer
2754 	 * to store the trace event for the trigger to use. It's recursive
2755 	 * safe and will not be recorded anywhere.
2756 	 */
2757 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2758 		*current_rb = temp_buffer;
2759 		entry = __trace_buffer_lock_reserve(*current_rb,
2760 						    type, len, flags, pc);
2761 	}
2762 	return entry;
2763 }
2764 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2765 
2766 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2767 static DEFINE_MUTEX(tracepoint_printk_mutex);
2768 
output_printk(struct trace_event_buffer * fbuffer)2769 static void output_printk(struct trace_event_buffer *fbuffer)
2770 {
2771 	struct trace_event_call *event_call;
2772 	struct trace_event_file *file;
2773 	struct trace_event *event;
2774 	unsigned long flags;
2775 	struct trace_iterator *iter = tracepoint_print_iter;
2776 
2777 	/* We should never get here if iter is NULL */
2778 	if (WARN_ON_ONCE(!iter))
2779 		return;
2780 
2781 	event_call = fbuffer->trace_file->event_call;
2782 	if (!event_call || !event_call->event.funcs ||
2783 	    !event_call->event.funcs->trace)
2784 		return;
2785 
2786 	file = fbuffer->trace_file;
2787 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2788 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2789 	     !filter_match_preds(file->filter, fbuffer->entry)))
2790 		return;
2791 
2792 	event = &fbuffer->trace_file->event_call->event;
2793 
2794 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2795 	trace_seq_init(&iter->seq);
2796 	iter->ent = fbuffer->entry;
2797 	event_call->event.funcs->trace(iter, 0, event);
2798 	trace_seq_putc(&iter->seq, 0);
2799 	printk("%s", iter->seq.buffer);
2800 
2801 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2802 }
2803 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2804 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2805 			     void *buffer, size_t *lenp,
2806 			     loff_t *ppos)
2807 {
2808 	int save_tracepoint_printk;
2809 	int ret;
2810 
2811 	mutex_lock(&tracepoint_printk_mutex);
2812 	save_tracepoint_printk = tracepoint_printk;
2813 
2814 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2815 
2816 	/*
2817 	 * This will force exiting early, as tracepoint_printk
2818 	 * is always zero when tracepoint_printk_iter is not allocated
2819 	 */
2820 	if (!tracepoint_print_iter)
2821 		tracepoint_printk = 0;
2822 
2823 	if (save_tracepoint_printk == tracepoint_printk)
2824 		goto out;
2825 
2826 	if (tracepoint_printk)
2827 		static_key_enable(&tracepoint_printk_key.key);
2828 	else
2829 		static_key_disable(&tracepoint_printk_key.key);
2830 
2831  out:
2832 	mutex_unlock(&tracepoint_printk_mutex);
2833 
2834 	return ret;
2835 }
2836 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2837 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2838 {
2839 	if (static_key_false(&tracepoint_printk_key.key))
2840 		output_printk(fbuffer);
2841 
2842 	if (static_branch_unlikely(&trace_event_exports_enabled))
2843 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2844 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2845 				    fbuffer->event, fbuffer->entry,
2846 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2847 }
2848 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2849 
2850 /*
2851  * Skip 3:
2852  *
2853  *   trace_buffer_unlock_commit_regs()
2854  *   trace_event_buffer_commit()
2855  *   trace_event_raw_event_xxx()
2856  */
2857 # define STACK_SKIP 3
2858 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2859 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2860 				     struct trace_buffer *buffer,
2861 				     struct ring_buffer_event *event,
2862 				     unsigned long flags, int pc,
2863 				     struct pt_regs *regs)
2864 {
2865 	__buffer_unlock_commit(buffer, event);
2866 
2867 	/*
2868 	 * If regs is not set, then skip the necessary functions.
2869 	 * Note, we can still get here via blktrace, wakeup tracer
2870 	 * and mmiotrace, but that's ok if they lose a function or
2871 	 * two. They are not that meaningful.
2872 	 */
2873 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2874 	ftrace_trace_userstack(tr, buffer, flags, pc);
2875 }
2876 
2877 /*
2878  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2879  */
2880 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2881 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2882 				   struct ring_buffer_event *event)
2883 {
2884 	__buffer_unlock_commit(buffer, event);
2885 }
2886 
2887 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2888 trace_function(struct trace_array *tr,
2889 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2890 	       int pc)
2891 {
2892 	struct trace_event_call *call = &event_function;
2893 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2894 	struct ring_buffer_event *event;
2895 	struct ftrace_entry *entry;
2896 
2897 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2898 					    flags, pc);
2899 	if (!event)
2900 		return;
2901 	entry	= ring_buffer_event_data(event);
2902 	entry->ip			= ip;
2903 	entry->parent_ip		= parent_ip;
2904 
2905 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2906 		if (static_branch_unlikely(&trace_function_exports_enabled))
2907 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2908 		__buffer_unlock_commit(buffer, event);
2909 	}
2910 }
2911 
2912 #ifdef CONFIG_STACKTRACE
2913 
2914 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2915 #define FTRACE_KSTACK_NESTING	4
2916 
2917 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2918 
2919 struct ftrace_stack {
2920 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2921 };
2922 
2923 
2924 struct ftrace_stacks {
2925 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2926 };
2927 
2928 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2929 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2930 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2931 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2932 				 unsigned long flags,
2933 				 int skip, int pc, struct pt_regs *regs)
2934 {
2935 	struct trace_event_call *call = &event_kernel_stack;
2936 	struct ring_buffer_event *event;
2937 	unsigned int size, nr_entries;
2938 	struct ftrace_stack *fstack;
2939 	struct stack_entry *entry;
2940 	int stackidx;
2941 
2942 	/*
2943 	 * Add one, for this function and the call to save_stack_trace()
2944 	 * If regs is set, then these functions will not be in the way.
2945 	 */
2946 #ifndef CONFIG_UNWINDER_ORC
2947 	if (!regs)
2948 		skip++;
2949 #endif
2950 
2951 	preempt_disable_notrace();
2952 
2953 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2954 
2955 	/* This should never happen. If it does, yell once and skip */
2956 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2957 		goto out;
2958 
2959 	/*
2960 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2961 	 * interrupt will either see the value pre increment or post
2962 	 * increment. If the interrupt happens pre increment it will have
2963 	 * restored the counter when it returns.  We just need a barrier to
2964 	 * keep gcc from moving things around.
2965 	 */
2966 	barrier();
2967 
2968 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2969 	size = ARRAY_SIZE(fstack->calls);
2970 
2971 	if (regs) {
2972 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2973 						   size, skip);
2974 	} else {
2975 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2976 	}
2977 
2978 	size = nr_entries * sizeof(unsigned long);
2979 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2980 					    sizeof(*entry) + size, flags, pc);
2981 	if (!event)
2982 		goto out;
2983 	entry = ring_buffer_event_data(event);
2984 
2985 	memcpy(&entry->caller, fstack->calls, size);
2986 	entry->size = nr_entries;
2987 
2988 	if (!call_filter_check_discard(call, entry, buffer, event))
2989 		__buffer_unlock_commit(buffer, event);
2990 
2991  out:
2992 	/* Again, don't let gcc optimize things here */
2993 	barrier();
2994 	__this_cpu_dec(ftrace_stack_reserve);
2995 	preempt_enable_notrace();
2996 
2997 }
2998 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2999 static inline void ftrace_trace_stack(struct trace_array *tr,
3000 				      struct trace_buffer *buffer,
3001 				      unsigned long flags,
3002 				      int skip, int pc, struct pt_regs *regs)
3003 {
3004 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3005 		return;
3006 
3007 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
3008 }
3009 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3010 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3011 		   int pc)
3012 {
3013 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3014 
3015 	if (rcu_is_watching()) {
3016 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3017 		return;
3018 	}
3019 
3020 	/*
3021 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3022 	 * but if the above rcu_is_watching() failed, then the NMI
3023 	 * triggered someplace critical, and rcu_irq_enter() should
3024 	 * not be called from NMI.
3025 	 */
3026 	if (unlikely(in_nmi()))
3027 		return;
3028 
3029 	rcu_irq_enter_irqson();
3030 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3031 	rcu_irq_exit_irqson();
3032 }
3033 
3034 /**
3035  * trace_dump_stack - record a stack back trace in the trace buffer
3036  * @skip: Number of functions to skip (helper handlers)
3037  */
trace_dump_stack(int skip)3038 void trace_dump_stack(int skip)
3039 {
3040 	unsigned long flags;
3041 
3042 	if (tracing_disabled || tracing_selftest_running)
3043 		return;
3044 
3045 	local_save_flags(flags);
3046 
3047 #ifndef CONFIG_UNWINDER_ORC
3048 	/* Skip 1 to skip this function. */
3049 	skip++;
3050 #endif
3051 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3052 			     flags, skip, preempt_count(), NULL);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_dump_stack);
3055 
3056 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3057 static DEFINE_PER_CPU(int, user_stack_count);
3058 
3059 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3060 ftrace_trace_userstack(struct trace_array *tr,
3061 		       struct trace_buffer *buffer, unsigned long flags, int pc)
3062 {
3063 	struct trace_event_call *call = &event_user_stack;
3064 	struct ring_buffer_event *event;
3065 	struct userstack_entry *entry;
3066 
3067 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3068 		return;
3069 
3070 	/*
3071 	 * NMIs can not handle page faults, even with fix ups.
3072 	 * The save user stack can (and often does) fault.
3073 	 */
3074 	if (unlikely(in_nmi()))
3075 		return;
3076 
3077 	/*
3078 	 * prevent recursion, since the user stack tracing may
3079 	 * trigger other kernel events.
3080 	 */
3081 	preempt_disable();
3082 	if (__this_cpu_read(user_stack_count))
3083 		goto out;
3084 
3085 	__this_cpu_inc(user_stack_count);
3086 
3087 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3088 					    sizeof(*entry), flags, pc);
3089 	if (!event)
3090 		goto out_drop_count;
3091 	entry	= ring_buffer_event_data(event);
3092 
3093 	entry->tgid		= current->tgid;
3094 	memset(&entry->caller, 0, sizeof(entry->caller));
3095 
3096 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3097 	if (!call_filter_check_discard(call, entry, buffer, event))
3098 		__buffer_unlock_commit(buffer, event);
3099 
3100  out_drop_count:
3101 	__this_cpu_dec(user_stack_count);
3102  out:
3103 	preempt_enable();
3104 }
3105 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3106 static void ftrace_trace_userstack(struct trace_array *tr,
3107 				   struct trace_buffer *buffer,
3108 				   unsigned long flags, int pc)
3109 {
3110 }
3111 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3112 
3113 #endif /* CONFIG_STACKTRACE */
3114 
3115 /* created for use with alloc_percpu */
3116 struct trace_buffer_struct {
3117 	int nesting;
3118 	char buffer[4][TRACE_BUF_SIZE];
3119 };
3120 
3121 static struct trace_buffer_struct *trace_percpu_buffer;
3122 
3123 /*
3124  * Thise allows for lockless recording.  If we're nested too deeply, then
3125  * this returns NULL.
3126  */
get_trace_buf(void)3127 static char *get_trace_buf(void)
3128 {
3129 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3130 
3131 	if (!buffer || buffer->nesting >= 4)
3132 		return NULL;
3133 
3134 	buffer->nesting++;
3135 
3136 	/* Interrupts must see nesting incremented before we use the buffer */
3137 	barrier();
3138 	return &buffer->buffer[buffer->nesting - 1][0];
3139 }
3140 
put_trace_buf(void)3141 static void put_trace_buf(void)
3142 {
3143 	/* Don't let the decrement of nesting leak before this */
3144 	barrier();
3145 	this_cpu_dec(trace_percpu_buffer->nesting);
3146 }
3147 
alloc_percpu_trace_buffer(void)3148 static int alloc_percpu_trace_buffer(void)
3149 {
3150 	struct trace_buffer_struct *buffers;
3151 
3152 	if (trace_percpu_buffer)
3153 		return 0;
3154 
3155 	buffers = alloc_percpu(struct trace_buffer_struct);
3156 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3157 		return -ENOMEM;
3158 
3159 	trace_percpu_buffer = buffers;
3160 	return 0;
3161 }
3162 
3163 static int buffers_allocated;
3164 
trace_printk_init_buffers(void)3165 void trace_printk_init_buffers(void)
3166 {
3167 	if (buffers_allocated)
3168 		return;
3169 
3170 	if (alloc_percpu_trace_buffer())
3171 		return;
3172 
3173 	/* trace_printk() is for debug use only. Don't use it in production. */
3174 
3175 	pr_warn("\n");
3176 	pr_warn("**********************************************************\n");
3177 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3178 	pr_warn("**                                                      **\n");
3179 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3180 	pr_warn("**                                                      **\n");
3181 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3182 	pr_warn("** unsafe for production use.                           **\n");
3183 	pr_warn("**                                                      **\n");
3184 	pr_warn("** If you see this message and you are not debugging    **\n");
3185 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3186 	pr_warn("**                                                      **\n");
3187 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3188 	pr_warn("**********************************************************\n");
3189 
3190 	/* Expand the buffers to set size */
3191 	tracing_update_buffers();
3192 
3193 	buffers_allocated = 1;
3194 
3195 	/*
3196 	 * trace_printk_init_buffers() can be called by modules.
3197 	 * If that happens, then we need to start cmdline recording
3198 	 * directly here. If the global_trace.buffer is already
3199 	 * allocated here, then this was called by module code.
3200 	 */
3201 	if (global_trace.array_buffer.buffer)
3202 		tracing_start_cmdline_record();
3203 }
3204 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3205 
trace_printk_start_comm(void)3206 void trace_printk_start_comm(void)
3207 {
3208 	/* Start tracing comms if trace printk is set */
3209 	if (!buffers_allocated)
3210 		return;
3211 	tracing_start_cmdline_record();
3212 }
3213 
trace_printk_start_stop_comm(int enabled)3214 static void trace_printk_start_stop_comm(int enabled)
3215 {
3216 	if (!buffers_allocated)
3217 		return;
3218 
3219 	if (enabled)
3220 		tracing_start_cmdline_record();
3221 	else
3222 		tracing_stop_cmdline_record();
3223 }
3224 
3225 /**
3226  * trace_vbprintk - write binary msg to tracing buffer
3227  * @ip:    The address of the caller
3228  * @fmt:   The string format to write to the buffer
3229  * @args:  Arguments for @fmt
3230  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3231 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3232 {
3233 	struct trace_event_call *call = &event_bprint;
3234 	struct ring_buffer_event *event;
3235 	struct trace_buffer *buffer;
3236 	struct trace_array *tr = &global_trace;
3237 	struct bprint_entry *entry;
3238 	unsigned long flags;
3239 	char *tbuffer;
3240 	int len = 0, size, pc;
3241 
3242 	if (unlikely(tracing_selftest_running || tracing_disabled))
3243 		return 0;
3244 
3245 	/* Don't pollute graph traces with trace_vprintk internals */
3246 	pause_graph_tracing();
3247 
3248 	pc = preempt_count();
3249 	preempt_disable_notrace();
3250 
3251 	tbuffer = get_trace_buf();
3252 	if (!tbuffer) {
3253 		len = 0;
3254 		goto out_nobuffer;
3255 	}
3256 
3257 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3258 
3259 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3260 		goto out_put;
3261 
3262 	local_save_flags(flags);
3263 	size = sizeof(*entry) + sizeof(u32) * len;
3264 	buffer = tr->array_buffer.buffer;
3265 	ring_buffer_nest_start(buffer);
3266 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3267 					    flags, pc);
3268 	if (!event)
3269 		goto out;
3270 	entry = ring_buffer_event_data(event);
3271 	entry->ip			= ip;
3272 	entry->fmt			= fmt;
3273 
3274 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3275 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3276 		__buffer_unlock_commit(buffer, event);
3277 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3278 	}
3279 
3280 out:
3281 	ring_buffer_nest_end(buffer);
3282 out_put:
3283 	put_trace_buf();
3284 
3285 out_nobuffer:
3286 	preempt_enable_notrace();
3287 	unpause_graph_tracing();
3288 
3289 	return len;
3290 }
3291 EXPORT_SYMBOL_GPL(trace_vbprintk);
3292 
3293 __printf(3, 0)
3294 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3295 __trace_array_vprintk(struct trace_buffer *buffer,
3296 		      unsigned long ip, const char *fmt, va_list args)
3297 {
3298 	struct trace_event_call *call = &event_print;
3299 	struct ring_buffer_event *event;
3300 	int len = 0, size, pc;
3301 	struct print_entry *entry;
3302 	unsigned long flags;
3303 	char *tbuffer;
3304 
3305 	if (tracing_disabled || tracing_selftest_running)
3306 		return 0;
3307 
3308 	/* Don't pollute graph traces with trace_vprintk internals */
3309 	pause_graph_tracing();
3310 
3311 	pc = preempt_count();
3312 	preempt_disable_notrace();
3313 
3314 
3315 	tbuffer = get_trace_buf();
3316 	if (!tbuffer) {
3317 		len = 0;
3318 		goto out_nobuffer;
3319 	}
3320 
3321 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3322 
3323 	local_save_flags(flags);
3324 	size = sizeof(*entry) + len + 1;
3325 	ring_buffer_nest_start(buffer);
3326 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3327 					    flags, pc);
3328 	if (!event)
3329 		goto out;
3330 	entry = ring_buffer_event_data(event);
3331 	entry->ip = ip;
3332 
3333 	memcpy(&entry->buf, tbuffer, len + 1);
3334 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3335 		__buffer_unlock_commit(buffer, event);
3336 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3337 	}
3338 
3339 out:
3340 	ring_buffer_nest_end(buffer);
3341 	put_trace_buf();
3342 
3343 out_nobuffer:
3344 	preempt_enable_notrace();
3345 	unpause_graph_tracing();
3346 
3347 	return len;
3348 }
3349 
3350 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3351 int trace_array_vprintk(struct trace_array *tr,
3352 			unsigned long ip, const char *fmt, va_list args)
3353 {
3354 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3355 }
3356 
3357 /**
3358  * trace_array_printk - Print a message to a specific instance
3359  * @tr: The instance trace_array descriptor
3360  * @ip: The instruction pointer that this is called from.
3361  * @fmt: The format to print (printf format)
3362  *
3363  * If a subsystem sets up its own instance, they have the right to
3364  * printk strings into their tracing instance buffer using this
3365  * function. Note, this function will not write into the top level
3366  * buffer (use trace_printk() for that), as writing into the top level
3367  * buffer should only have events that can be individually disabled.
3368  * trace_printk() is only used for debugging a kernel, and should not
3369  * be ever encorporated in normal use.
3370  *
3371  * trace_array_printk() can be used, as it will not add noise to the
3372  * top level tracing buffer.
3373  *
3374  * Note, trace_array_init_printk() must be called on @tr before this
3375  * can be used.
3376  */
3377 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3378 int trace_array_printk(struct trace_array *tr,
3379 		       unsigned long ip, const char *fmt, ...)
3380 {
3381 	int ret;
3382 	va_list ap;
3383 
3384 	if (!tr)
3385 		return -ENOENT;
3386 
3387 	/* This is only allowed for created instances */
3388 	if (tr == &global_trace)
3389 		return 0;
3390 
3391 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3392 		return 0;
3393 
3394 	va_start(ap, fmt);
3395 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3396 	va_end(ap);
3397 	return ret;
3398 }
3399 EXPORT_SYMBOL_GPL(trace_array_printk);
3400 
3401 /**
3402  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3403  * @tr: The trace array to initialize the buffers for
3404  *
3405  * As trace_array_printk() only writes into instances, they are OK to
3406  * have in the kernel (unlike trace_printk()). This needs to be called
3407  * before trace_array_printk() can be used on a trace_array.
3408  */
trace_array_init_printk(struct trace_array * tr)3409 int trace_array_init_printk(struct trace_array *tr)
3410 {
3411 	if (!tr)
3412 		return -ENOENT;
3413 
3414 	/* This is only allowed for created instances */
3415 	if (tr == &global_trace)
3416 		return -EINVAL;
3417 
3418 	return alloc_percpu_trace_buffer();
3419 }
3420 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3421 
3422 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3423 int trace_array_printk_buf(struct trace_buffer *buffer,
3424 			   unsigned long ip, const char *fmt, ...)
3425 {
3426 	int ret;
3427 	va_list ap;
3428 
3429 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3430 		return 0;
3431 
3432 	va_start(ap, fmt);
3433 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3434 	va_end(ap);
3435 	return ret;
3436 }
3437 
3438 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3439 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3440 {
3441 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3442 }
3443 EXPORT_SYMBOL_GPL(trace_vprintk);
3444 
trace_iterator_increment(struct trace_iterator * iter)3445 static void trace_iterator_increment(struct trace_iterator *iter)
3446 {
3447 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3448 
3449 	iter->idx++;
3450 	if (buf_iter)
3451 		ring_buffer_iter_advance(buf_iter);
3452 }
3453 
3454 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3455 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3456 		unsigned long *lost_events)
3457 {
3458 	struct ring_buffer_event *event;
3459 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3460 
3461 	if (buf_iter) {
3462 		event = ring_buffer_iter_peek(buf_iter, ts);
3463 		if (lost_events)
3464 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3465 				(unsigned long)-1 : 0;
3466 	} else {
3467 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3468 					 lost_events);
3469 	}
3470 
3471 	if (event) {
3472 		iter->ent_size = ring_buffer_event_length(event);
3473 		return ring_buffer_event_data(event);
3474 	}
3475 	iter->ent_size = 0;
3476 	return NULL;
3477 }
3478 
3479 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3480 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3481 		  unsigned long *missing_events, u64 *ent_ts)
3482 {
3483 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3484 	struct trace_entry *ent, *next = NULL;
3485 	unsigned long lost_events = 0, next_lost = 0;
3486 	int cpu_file = iter->cpu_file;
3487 	u64 next_ts = 0, ts;
3488 	int next_cpu = -1;
3489 	int next_size = 0;
3490 	int cpu;
3491 
3492 	/*
3493 	 * If we are in a per_cpu trace file, don't bother by iterating over
3494 	 * all cpu and peek directly.
3495 	 */
3496 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3497 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3498 			return NULL;
3499 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3500 		if (ent_cpu)
3501 			*ent_cpu = cpu_file;
3502 
3503 		return ent;
3504 	}
3505 
3506 	for_each_tracing_cpu(cpu) {
3507 
3508 		if (ring_buffer_empty_cpu(buffer, cpu))
3509 			continue;
3510 
3511 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3512 
3513 		/*
3514 		 * Pick the entry with the smallest timestamp:
3515 		 */
3516 		if (ent && (!next || ts < next_ts)) {
3517 			next = ent;
3518 			next_cpu = cpu;
3519 			next_ts = ts;
3520 			next_lost = lost_events;
3521 			next_size = iter->ent_size;
3522 		}
3523 	}
3524 
3525 	iter->ent_size = next_size;
3526 
3527 	if (ent_cpu)
3528 		*ent_cpu = next_cpu;
3529 
3530 	if (ent_ts)
3531 		*ent_ts = next_ts;
3532 
3533 	if (missing_events)
3534 		*missing_events = next_lost;
3535 
3536 	return next;
3537 }
3538 
3539 #define STATIC_TEMP_BUF_SIZE	128
3540 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3541 
3542 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3543 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3544 					  int *ent_cpu, u64 *ent_ts)
3545 {
3546 	/* __find_next_entry will reset ent_size */
3547 	int ent_size = iter->ent_size;
3548 	struct trace_entry *entry;
3549 
3550 	/*
3551 	 * If called from ftrace_dump(), then the iter->temp buffer
3552 	 * will be the static_temp_buf and not created from kmalloc.
3553 	 * If the entry size is greater than the buffer, we can
3554 	 * not save it. Just return NULL in that case. This is only
3555 	 * used to add markers when two consecutive events' time
3556 	 * stamps have a large delta. See trace_print_lat_context()
3557 	 */
3558 	if (iter->temp == static_temp_buf &&
3559 	    STATIC_TEMP_BUF_SIZE < ent_size)
3560 		return NULL;
3561 
3562 	/*
3563 	 * The __find_next_entry() may call peek_next_entry(), which may
3564 	 * call ring_buffer_peek() that may make the contents of iter->ent
3565 	 * undefined. Need to copy iter->ent now.
3566 	 */
3567 	if (iter->ent && iter->ent != iter->temp) {
3568 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3569 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3570 			void *temp;
3571 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3572 			if (!temp)
3573 				return NULL;
3574 			kfree(iter->temp);
3575 			iter->temp = temp;
3576 			iter->temp_size = iter->ent_size;
3577 		}
3578 		memcpy(iter->temp, iter->ent, iter->ent_size);
3579 		iter->ent = iter->temp;
3580 	}
3581 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3582 	/* Put back the original ent_size */
3583 	iter->ent_size = ent_size;
3584 
3585 	return entry;
3586 }
3587 
3588 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3589 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3590 {
3591 	iter->ent = __find_next_entry(iter, &iter->cpu,
3592 				      &iter->lost_events, &iter->ts);
3593 
3594 	if (iter->ent)
3595 		trace_iterator_increment(iter);
3596 
3597 	return iter->ent ? iter : NULL;
3598 }
3599 
trace_consume(struct trace_iterator * iter)3600 static void trace_consume(struct trace_iterator *iter)
3601 {
3602 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3603 			    &iter->lost_events);
3604 }
3605 
s_next(struct seq_file * m,void * v,loff_t * pos)3606 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3607 {
3608 	struct trace_iterator *iter = m->private;
3609 	int i = (int)*pos;
3610 	void *ent;
3611 
3612 	WARN_ON_ONCE(iter->leftover);
3613 
3614 	(*pos)++;
3615 
3616 	/* can't go backwards */
3617 	if (iter->idx > i)
3618 		return NULL;
3619 
3620 	if (iter->idx < 0)
3621 		ent = trace_find_next_entry_inc(iter);
3622 	else
3623 		ent = iter;
3624 
3625 	while (ent && iter->idx < i)
3626 		ent = trace_find_next_entry_inc(iter);
3627 
3628 	iter->pos = *pos;
3629 
3630 	return ent;
3631 }
3632 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3633 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3634 {
3635 	struct ring_buffer_iter *buf_iter;
3636 	unsigned long entries = 0;
3637 	u64 ts;
3638 
3639 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3640 
3641 	buf_iter = trace_buffer_iter(iter, cpu);
3642 	if (!buf_iter)
3643 		return;
3644 
3645 	ring_buffer_iter_reset(buf_iter);
3646 
3647 	/*
3648 	 * We could have the case with the max latency tracers
3649 	 * that a reset never took place on a cpu. This is evident
3650 	 * by the timestamp being before the start of the buffer.
3651 	 */
3652 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3653 		if (ts >= iter->array_buffer->time_start)
3654 			break;
3655 		entries++;
3656 		ring_buffer_iter_advance(buf_iter);
3657 	}
3658 
3659 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3660 }
3661 
3662 /*
3663  * The current tracer is copied to avoid a global locking
3664  * all around.
3665  */
s_start(struct seq_file * m,loff_t * pos)3666 static void *s_start(struct seq_file *m, loff_t *pos)
3667 {
3668 	struct trace_iterator *iter = m->private;
3669 	struct trace_array *tr = iter->tr;
3670 	int cpu_file = iter->cpu_file;
3671 	void *p = NULL;
3672 	loff_t l = 0;
3673 	int cpu;
3674 
3675 	/*
3676 	 * copy the tracer to avoid using a global lock all around.
3677 	 * iter->trace is a copy of current_trace, the pointer to the
3678 	 * name may be used instead of a strcmp(), as iter->trace->name
3679 	 * will point to the same string as current_trace->name.
3680 	 */
3681 	mutex_lock(&trace_types_lock);
3682 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3683 		*iter->trace = *tr->current_trace;
3684 	mutex_unlock(&trace_types_lock);
3685 
3686 #ifdef CONFIG_TRACER_MAX_TRACE
3687 	if (iter->snapshot && iter->trace->use_max_tr)
3688 		return ERR_PTR(-EBUSY);
3689 #endif
3690 
3691 	if (!iter->snapshot)
3692 		atomic_inc(&trace_record_taskinfo_disabled);
3693 
3694 	if (*pos != iter->pos) {
3695 		iter->ent = NULL;
3696 		iter->cpu = 0;
3697 		iter->idx = -1;
3698 
3699 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3700 			for_each_tracing_cpu(cpu)
3701 				tracing_iter_reset(iter, cpu);
3702 		} else
3703 			tracing_iter_reset(iter, cpu_file);
3704 
3705 		iter->leftover = 0;
3706 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3707 			;
3708 
3709 	} else {
3710 		/*
3711 		 * If we overflowed the seq_file before, then we want
3712 		 * to just reuse the trace_seq buffer again.
3713 		 */
3714 		if (iter->leftover)
3715 			p = iter;
3716 		else {
3717 			l = *pos - 1;
3718 			p = s_next(m, p, &l);
3719 		}
3720 	}
3721 
3722 	trace_event_read_lock();
3723 	trace_access_lock(cpu_file);
3724 	return p;
3725 }
3726 
s_stop(struct seq_file * m,void * p)3727 static void s_stop(struct seq_file *m, void *p)
3728 {
3729 	struct trace_iterator *iter = m->private;
3730 
3731 #ifdef CONFIG_TRACER_MAX_TRACE
3732 	if (iter->snapshot && iter->trace->use_max_tr)
3733 		return;
3734 #endif
3735 
3736 	if (!iter->snapshot)
3737 		atomic_dec(&trace_record_taskinfo_disabled);
3738 
3739 	trace_access_unlock(iter->cpu_file);
3740 	trace_event_read_unlock();
3741 }
3742 
3743 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3744 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3745 		      unsigned long *entries, int cpu)
3746 {
3747 	unsigned long count;
3748 
3749 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3750 	/*
3751 	 * If this buffer has skipped entries, then we hold all
3752 	 * entries for the trace and we need to ignore the
3753 	 * ones before the time stamp.
3754 	 */
3755 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3756 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3757 		/* total is the same as the entries */
3758 		*total = count;
3759 	} else
3760 		*total = count +
3761 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3762 	*entries = count;
3763 }
3764 
3765 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3766 get_total_entries(struct array_buffer *buf,
3767 		  unsigned long *total, unsigned long *entries)
3768 {
3769 	unsigned long t, e;
3770 	int cpu;
3771 
3772 	*total = 0;
3773 	*entries = 0;
3774 
3775 	for_each_tracing_cpu(cpu) {
3776 		get_total_entries_cpu(buf, &t, &e, cpu);
3777 		*total += t;
3778 		*entries += e;
3779 	}
3780 }
3781 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3782 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3783 {
3784 	unsigned long total, entries;
3785 
3786 	if (!tr)
3787 		tr = &global_trace;
3788 
3789 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3790 
3791 	return entries;
3792 }
3793 
trace_total_entries(struct trace_array * tr)3794 unsigned long trace_total_entries(struct trace_array *tr)
3795 {
3796 	unsigned long total, entries;
3797 
3798 	if (!tr)
3799 		tr = &global_trace;
3800 
3801 	get_total_entries(&tr->array_buffer, &total, &entries);
3802 
3803 	return entries;
3804 }
3805 
print_lat_help_header(struct seq_file * m)3806 static void print_lat_help_header(struct seq_file *m)
3807 {
3808 	seq_puts(m, "#                    _------=> CPU#            \n"
3809 		    "#                   / _-----=> irqs-off        \n"
3810 		    "#                  | / _----=> need-resched    \n"
3811 		    "#                  || / _---=> hardirq/softirq \n"
3812 		    "#                  ||| / _--=> preempt-depth   \n"
3813 		    "#                  |||| /     delay            \n"
3814 		    "#  cmd     pid     ||||| time  |   caller      \n"
3815 		    "#     \\   /        |||||  \\    |   /         \n");
3816 }
3817 
print_event_info(struct array_buffer * buf,struct seq_file * m)3818 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3819 {
3820 	unsigned long total;
3821 	unsigned long entries;
3822 
3823 	get_total_entries(buf, &total, &entries);
3824 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3825 		   entries, total, num_online_cpus());
3826 	seq_puts(m, "#\n");
3827 }
3828 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3829 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3830 				   unsigned int flags)
3831 {
3832 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3833 
3834 	print_event_info(buf, m);
3835 
3836 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3837 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3838 }
3839 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3840 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3841 				       unsigned int flags)
3842 {
3843 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3844 	const char *space = "            ";
3845 	int prec = tgid ? 12 : 2;
3846 
3847 	print_event_info(buf, m);
3848 
3849 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3850 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3851 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3852 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3853 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3854 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3855 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3856 }
3857 
3858 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3859 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3860 {
3861 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3862 	struct array_buffer *buf = iter->array_buffer;
3863 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3864 	struct tracer *type = iter->trace;
3865 	unsigned long entries;
3866 	unsigned long total;
3867 	const char *name = "preemption";
3868 
3869 	name = type->name;
3870 
3871 	get_total_entries(buf, &total, &entries);
3872 
3873 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3874 		   name, UTS_RELEASE);
3875 	seq_puts(m, "# -----------------------------------"
3876 		 "---------------------------------\n");
3877 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3878 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3879 		   nsecs_to_usecs(data->saved_latency),
3880 		   entries,
3881 		   total,
3882 		   buf->cpu,
3883 #if defined(CONFIG_PREEMPT_NONE)
3884 		   "server",
3885 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3886 		   "desktop",
3887 #elif defined(CONFIG_PREEMPT)
3888 		   "preempt",
3889 #elif defined(CONFIG_PREEMPT_RT)
3890 		   "preempt_rt",
3891 #else
3892 		   "unknown",
3893 #endif
3894 		   /* These are reserved for later use */
3895 		   0, 0, 0, 0);
3896 #ifdef CONFIG_SMP
3897 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3898 #else
3899 	seq_puts(m, ")\n");
3900 #endif
3901 	seq_puts(m, "#    -----------------\n");
3902 	seq_printf(m, "#    | task: %.16s-%d "
3903 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3904 		   data->comm, data->pid,
3905 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3906 		   data->policy, data->rt_priority);
3907 	seq_puts(m, "#    -----------------\n");
3908 
3909 	if (data->critical_start) {
3910 		seq_puts(m, "#  => started at: ");
3911 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3912 		trace_print_seq(m, &iter->seq);
3913 		seq_puts(m, "\n#  => ended at:   ");
3914 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3915 		trace_print_seq(m, &iter->seq);
3916 		seq_puts(m, "\n#\n");
3917 	}
3918 
3919 	seq_puts(m, "#\n");
3920 }
3921 
test_cpu_buff_start(struct trace_iterator * iter)3922 static void test_cpu_buff_start(struct trace_iterator *iter)
3923 {
3924 	struct trace_seq *s = &iter->seq;
3925 	struct trace_array *tr = iter->tr;
3926 
3927 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3928 		return;
3929 
3930 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3931 		return;
3932 
3933 	if (cpumask_available(iter->started) &&
3934 	    cpumask_test_cpu(iter->cpu, iter->started))
3935 		return;
3936 
3937 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3938 		return;
3939 
3940 	if (cpumask_available(iter->started))
3941 		cpumask_set_cpu(iter->cpu, iter->started);
3942 
3943 	/* Don't print started cpu buffer for the first entry of the trace */
3944 	if (iter->idx > 1)
3945 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3946 				iter->cpu);
3947 }
3948 
print_trace_fmt(struct trace_iterator * iter)3949 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3950 {
3951 	struct trace_array *tr = iter->tr;
3952 	struct trace_seq *s = &iter->seq;
3953 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3954 	struct trace_entry *entry;
3955 	struct trace_event *event;
3956 
3957 	entry = iter->ent;
3958 
3959 	test_cpu_buff_start(iter);
3960 
3961 	event = ftrace_find_event(entry->type);
3962 
3963 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3964 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3965 			trace_print_lat_context(iter);
3966 		else
3967 			trace_print_context(iter);
3968 	}
3969 
3970 	if (trace_seq_has_overflowed(s))
3971 		return TRACE_TYPE_PARTIAL_LINE;
3972 
3973 	if (event)
3974 		return event->funcs->trace(iter, sym_flags, event);
3975 
3976 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3977 
3978 	return trace_handle_return(s);
3979 }
3980 
print_raw_fmt(struct trace_iterator * iter)3981 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3982 {
3983 	struct trace_array *tr = iter->tr;
3984 	struct trace_seq *s = &iter->seq;
3985 	struct trace_entry *entry;
3986 	struct trace_event *event;
3987 
3988 	entry = iter->ent;
3989 
3990 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3991 		trace_seq_printf(s, "%d %d %llu ",
3992 				 entry->pid, iter->cpu, iter->ts);
3993 
3994 	if (trace_seq_has_overflowed(s))
3995 		return TRACE_TYPE_PARTIAL_LINE;
3996 
3997 	event = ftrace_find_event(entry->type);
3998 	if (event)
3999 		return event->funcs->raw(iter, 0, event);
4000 
4001 	trace_seq_printf(s, "%d ?\n", entry->type);
4002 
4003 	return trace_handle_return(s);
4004 }
4005 
print_hex_fmt(struct trace_iterator * iter)4006 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4007 {
4008 	struct trace_array *tr = iter->tr;
4009 	struct trace_seq *s = &iter->seq;
4010 	unsigned char newline = '\n';
4011 	struct trace_entry *entry;
4012 	struct trace_event *event;
4013 
4014 	entry = iter->ent;
4015 
4016 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4017 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4018 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4019 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4020 		if (trace_seq_has_overflowed(s))
4021 			return TRACE_TYPE_PARTIAL_LINE;
4022 	}
4023 
4024 	event = ftrace_find_event(entry->type);
4025 	if (event) {
4026 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4027 		if (ret != TRACE_TYPE_HANDLED)
4028 			return ret;
4029 	}
4030 
4031 	SEQ_PUT_FIELD(s, newline);
4032 
4033 	return trace_handle_return(s);
4034 }
4035 
print_bin_fmt(struct trace_iterator * iter)4036 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4037 {
4038 	struct trace_array *tr = iter->tr;
4039 	struct trace_seq *s = &iter->seq;
4040 	struct trace_entry *entry;
4041 	struct trace_event *event;
4042 
4043 	entry = iter->ent;
4044 
4045 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4046 		SEQ_PUT_FIELD(s, entry->pid);
4047 		SEQ_PUT_FIELD(s, iter->cpu);
4048 		SEQ_PUT_FIELD(s, iter->ts);
4049 		if (trace_seq_has_overflowed(s))
4050 			return TRACE_TYPE_PARTIAL_LINE;
4051 	}
4052 
4053 	event = ftrace_find_event(entry->type);
4054 	return event ? event->funcs->binary(iter, 0, event) :
4055 		TRACE_TYPE_HANDLED;
4056 }
4057 
trace_empty(struct trace_iterator * iter)4058 int trace_empty(struct trace_iterator *iter)
4059 {
4060 	struct ring_buffer_iter *buf_iter;
4061 	int cpu;
4062 
4063 	/* If we are looking at one CPU buffer, only check that one */
4064 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4065 		cpu = iter->cpu_file;
4066 		buf_iter = trace_buffer_iter(iter, cpu);
4067 		if (buf_iter) {
4068 			if (!ring_buffer_iter_empty(buf_iter))
4069 				return 0;
4070 		} else {
4071 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4072 				return 0;
4073 		}
4074 		return 1;
4075 	}
4076 
4077 	for_each_tracing_cpu(cpu) {
4078 		buf_iter = trace_buffer_iter(iter, cpu);
4079 		if (buf_iter) {
4080 			if (!ring_buffer_iter_empty(buf_iter))
4081 				return 0;
4082 		} else {
4083 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4084 				return 0;
4085 		}
4086 	}
4087 
4088 	return 1;
4089 }
4090 
4091 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4092 enum print_line_t print_trace_line(struct trace_iterator *iter)
4093 {
4094 	struct trace_array *tr = iter->tr;
4095 	unsigned long trace_flags = tr->trace_flags;
4096 	enum print_line_t ret;
4097 
4098 	if (iter->lost_events) {
4099 		if (iter->lost_events == (unsigned long)-1)
4100 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4101 					 iter->cpu);
4102 		else
4103 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4104 					 iter->cpu, iter->lost_events);
4105 		if (trace_seq_has_overflowed(&iter->seq))
4106 			return TRACE_TYPE_PARTIAL_LINE;
4107 	}
4108 
4109 	if (iter->trace && iter->trace->print_line) {
4110 		ret = iter->trace->print_line(iter);
4111 		if (ret != TRACE_TYPE_UNHANDLED)
4112 			return ret;
4113 	}
4114 
4115 	if (iter->ent->type == TRACE_BPUTS &&
4116 			trace_flags & TRACE_ITER_PRINTK &&
4117 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4118 		return trace_print_bputs_msg_only(iter);
4119 
4120 	if (iter->ent->type == TRACE_BPRINT &&
4121 			trace_flags & TRACE_ITER_PRINTK &&
4122 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4123 		return trace_print_bprintk_msg_only(iter);
4124 
4125 	if (iter->ent->type == TRACE_PRINT &&
4126 			trace_flags & TRACE_ITER_PRINTK &&
4127 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4128 		return trace_print_printk_msg_only(iter);
4129 
4130 	if (trace_flags & TRACE_ITER_BIN)
4131 		return print_bin_fmt(iter);
4132 
4133 	if (trace_flags & TRACE_ITER_HEX)
4134 		return print_hex_fmt(iter);
4135 
4136 	if (trace_flags & TRACE_ITER_RAW)
4137 		return print_raw_fmt(iter);
4138 
4139 	return print_trace_fmt(iter);
4140 }
4141 
trace_latency_header(struct seq_file * m)4142 void trace_latency_header(struct seq_file *m)
4143 {
4144 	struct trace_iterator *iter = m->private;
4145 	struct trace_array *tr = iter->tr;
4146 
4147 	/* print nothing if the buffers are empty */
4148 	if (trace_empty(iter))
4149 		return;
4150 
4151 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4152 		print_trace_header(m, iter);
4153 
4154 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4155 		print_lat_help_header(m);
4156 }
4157 
trace_default_header(struct seq_file * m)4158 void trace_default_header(struct seq_file *m)
4159 {
4160 	struct trace_iterator *iter = m->private;
4161 	struct trace_array *tr = iter->tr;
4162 	unsigned long trace_flags = tr->trace_flags;
4163 
4164 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4165 		return;
4166 
4167 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4168 		/* print nothing if the buffers are empty */
4169 		if (trace_empty(iter))
4170 			return;
4171 		print_trace_header(m, iter);
4172 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4173 			print_lat_help_header(m);
4174 	} else {
4175 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4176 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4177 				print_func_help_header_irq(iter->array_buffer,
4178 							   m, trace_flags);
4179 			else
4180 				print_func_help_header(iter->array_buffer, m,
4181 						       trace_flags);
4182 		}
4183 	}
4184 }
4185 
test_ftrace_alive(struct seq_file * m)4186 static void test_ftrace_alive(struct seq_file *m)
4187 {
4188 	if (!ftrace_is_dead())
4189 		return;
4190 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4191 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4192 }
4193 
4194 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4195 static void show_snapshot_main_help(struct seq_file *m)
4196 {
4197 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4198 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4199 		    "#                      Takes a snapshot of the main buffer.\n"
4200 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4201 		    "#                      (Doesn't have to be '2' works with any number that\n"
4202 		    "#                       is not a '0' or '1')\n");
4203 }
4204 
show_snapshot_percpu_help(struct seq_file * m)4205 static void show_snapshot_percpu_help(struct seq_file *m)
4206 {
4207 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4208 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4209 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4210 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4211 #else
4212 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4213 		    "#                     Must use main snapshot file to allocate.\n");
4214 #endif
4215 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4216 		    "#                      (Doesn't have to be '2' works with any number that\n"
4217 		    "#                       is not a '0' or '1')\n");
4218 }
4219 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4220 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4221 {
4222 	if (iter->tr->allocated_snapshot)
4223 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4224 	else
4225 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4226 
4227 	seq_puts(m, "# Snapshot commands:\n");
4228 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4229 		show_snapshot_main_help(m);
4230 	else
4231 		show_snapshot_percpu_help(m);
4232 }
4233 #else
4234 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4235 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4236 #endif
4237 
s_show(struct seq_file * m,void * v)4238 static int s_show(struct seq_file *m, void *v)
4239 {
4240 	struct trace_iterator *iter = v;
4241 	int ret;
4242 
4243 	if (iter->ent == NULL) {
4244 		if (iter->tr) {
4245 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4246 			seq_puts(m, "#\n");
4247 			test_ftrace_alive(m);
4248 		}
4249 		if (iter->snapshot && trace_empty(iter))
4250 			print_snapshot_help(m, iter);
4251 		else if (iter->trace && iter->trace->print_header)
4252 			iter->trace->print_header(m);
4253 		else
4254 			trace_default_header(m);
4255 
4256 	} else if (iter->leftover) {
4257 		/*
4258 		 * If we filled the seq_file buffer earlier, we
4259 		 * want to just show it now.
4260 		 */
4261 		ret = trace_print_seq(m, &iter->seq);
4262 
4263 		/* ret should this time be zero, but you never know */
4264 		iter->leftover = ret;
4265 
4266 	} else {
4267 		print_trace_line(iter);
4268 		ret = trace_print_seq(m, &iter->seq);
4269 		/*
4270 		 * If we overflow the seq_file buffer, then it will
4271 		 * ask us for this data again at start up.
4272 		 * Use that instead.
4273 		 *  ret is 0 if seq_file write succeeded.
4274 		 *        -1 otherwise.
4275 		 */
4276 		iter->leftover = ret;
4277 	}
4278 
4279 	return 0;
4280 }
4281 
4282 /*
4283  * Should be used after trace_array_get(), trace_types_lock
4284  * ensures that i_cdev was already initialized.
4285  */
tracing_get_cpu(struct inode * inode)4286 static inline int tracing_get_cpu(struct inode *inode)
4287 {
4288 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4289 		return (long)inode->i_cdev - 1;
4290 	return RING_BUFFER_ALL_CPUS;
4291 }
4292 
4293 static const struct seq_operations tracer_seq_ops = {
4294 	.start		= s_start,
4295 	.next		= s_next,
4296 	.stop		= s_stop,
4297 	.show		= s_show,
4298 };
4299 
4300 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4301 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4302 {
4303 	struct trace_array *tr = inode->i_private;
4304 	struct trace_iterator *iter;
4305 	int cpu;
4306 
4307 	if (tracing_disabled)
4308 		return ERR_PTR(-ENODEV);
4309 
4310 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4311 	if (!iter)
4312 		return ERR_PTR(-ENOMEM);
4313 
4314 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4315 				    GFP_KERNEL);
4316 	if (!iter->buffer_iter)
4317 		goto release;
4318 
4319 	/*
4320 	 * trace_find_next_entry() may need to save off iter->ent.
4321 	 * It will place it into the iter->temp buffer. As most
4322 	 * events are less than 128, allocate a buffer of that size.
4323 	 * If one is greater, then trace_find_next_entry() will
4324 	 * allocate a new buffer to adjust for the bigger iter->ent.
4325 	 * It's not critical if it fails to get allocated here.
4326 	 */
4327 	iter->temp = kmalloc(128, GFP_KERNEL);
4328 	if (iter->temp)
4329 		iter->temp_size = 128;
4330 
4331 	/*
4332 	 * We make a copy of the current tracer to avoid concurrent
4333 	 * changes on it while we are reading.
4334 	 */
4335 	mutex_lock(&trace_types_lock);
4336 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4337 	if (!iter->trace)
4338 		goto fail;
4339 
4340 	*iter->trace = *tr->current_trace;
4341 
4342 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4343 		goto fail;
4344 
4345 	iter->tr = tr;
4346 
4347 #ifdef CONFIG_TRACER_MAX_TRACE
4348 	/* Currently only the top directory has a snapshot */
4349 	if (tr->current_trace->print_max || snapshot)
4350 		iter->array_buffer = &tr->max_buffer;
4351 	else
4352 #endif
4353 		iter->array_buffer = &tr->array_buffer;
4354 	iter->snapshot = snapshot;
4355 	iter->pos = -1;
4356 	iter->cpu_file = tracing_get_cpu(inode);
4357 	mutex_init(&iter->mutex);
4358 
4359 	/* Notify the tracer early; before we stop tracing. */
4360 	if (iter->trace->open)
4361 		iter->trace->open(iter);
4362 
4363 	/* Annotate start of buffers if we had overruns */
4364 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4365 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4366 
4367 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4368 	if (trace_clocks[tr->clock_id].in_ns)
4369 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4370 
4371 	/*
4372 	 * If pause-on-trace is enabled, then stop the trace while
4373 	 * dumping, unless this is the "snapshot" file
4374 	 */
4375 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4376 		tracing_stop_tr(tr);
4377 
4378 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4379 		for_each_tracing_cpu(cpu) {
4380 			iter->buffer_iter[cpu] =
4381 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4382 							 cpu, GFP_KERNEL);
4383 		}
4384 		ring_buffer_read_prepare_sync();
4385 		for_each_tracing_cpu(cpu) {
4386 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4387 			tracing_iter_reset(iter, cpu);
4388 		}
4389 	} else {
4390 		cpu = iter->cpu_file;
4391 		iter->buffer_iter[cpu] =
4392 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4393 						 cpu, GFP_KERNEL);
4394 		ring_buffer_read_prepare_sync();
4395 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4396 		tracing_iter_reset(iter, cpu);
4397 	}
4398 
4399 	mutex_unlock(&trace_types_lock);
4400 
4401 	return iter;
4402 
4403  fail:
4404 	mutex_unlock(&trace_types_lock);
4405 	kfree(iter->trace);
4406 	kfree(iter->temp);
4407 	kfree(iter->buffer_iter);
4408 release:
4409 	seq_release_private(inode, file);
4410 	return ERR_PTR(-ENOMEM);
4411 }
4412 
tracing_open_generic(struct inode * inode,struct file * filp)4413 int tracing_open_generic(struct inode *inode, struct file *filp)
4414 {
4415 	int ret;
4416 
4417 	ret = tracing_check_open_get_tr(NULL);
4418 	if (ret)
4419 		return ret;
4420 
4421 	filp->private_data = inode->i_private;
4422 	return 0;
4423 }
4424 
tracing_is_disabled(void)4425 bool tracing_is_disabled(void)
4426 {
4427 	return (tracing_disabled) ? true: false;
4428 }
4429 
4430 /*
4431  * Open and update trace_array ref count.
4432  * Must have the current trace_array passed to it.
4433  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4434 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4435 {
4436 	struct trace_array *tr = inode->i_private;
4437 	int ret;
4438 
4439 	ret = tracing_check_open_get_tr(tr);
4440 	if (ret)
4441 		return ret;
4442 
4443 	filp->private_data = inode->i_private;
4444 
4445 	return 0;
4446 }
4447 
tracing_release(struct inode * inode,struct file * file)4448 static int tracing_release(struct inode *inode, struct file *file)
4449 {
4450 	struct trace_array *tr = inode->i_private;
4451 	struct seq_file *m = file->private_data;
4452 	struct trace_iterator *iter;
4453 	int cpu;
4454 
4455 	if (!(file->f_mode & FMODE_READ)) {
4456 		trace_array_put(tr);
4457 		return 0;
4458 	}
4459 
4460 	/* Writes do not use seq_file */
4461 	iter = m->private;
4462 	mutex_lock(&trace_types_lock);
4463 
4464 	for_each_tracing_cpu(cpu) {
4465 		if (iter->buffer_iter[cpu])
4466 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4467 	}
4468 
4469 	if (iter->trace && iter->trace->close)
4470 		iter->trace->close(iter);
4471 
4472 	if (!iter->snapshot && tr->stop_count)
4473 		/* reenable tracing if it was previously enabled */
4474 		tracing_start_tr(tr);
4475 
4476 	__trace_array_put(tr);
4477 
4478 	mutex_unlock(&trace_types_lock);
4479 
4480 	mutex_destroy(&iter->mutex);
4481 	free_cpumask_var(iter->started);
4482 	kfree(iter->temp);
4483 	kfree(iter->trace);
4484 	kfree(iter->buffer_iter);
4485 	seq_release_private(inode, file);
4486 
4487 	return 0;
4488 }
4489 
tracing_release_generic_tr(struct inode * inode,struct file * file)4490 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4491 {
4492 	struct trace_array *tr = inode->i_private;
4493 
4494 	trace_array_put(tr);
4495 	return 0;
4496 }
4497 
tracing_single_release_tr(struct inode * inode,struct file * file)4498 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4499 {
4500 	struct trace_array *tr = inode->i_private;
4501 
4502 	trace_array_put(tr);
4503 
4504 	return single_release(inode, file);
4505 }
4506 
tracing_open(struct inode * inode,struct file * file)4507 static int tracing_open(struct inode *inode, struct file *file)
4508 {
4509 	struct trace_array *tr = inode->i_private;
4510 	struct trace_iterator *iter;
4511 	int ret;
4512 
4513 	ret = tracing_check_open_get_tr(tr);
4514 	if (ret)
4515 		return ret;
4516 
4517 	/* If this file was open for write, then erase contents */
4518 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4519 		int cpu = tracing_get_cpu(inode);
4520 		struct array_buffer *trace_buf = &tr->array_buffer;
4521 
4522 #ifdef CONFIG_TRACER_MAX_TRACE
4523 		if (tr->current_trace->print_max)
4524 			trace_buf = &tr->max_buffer;
4525 #endif
4526 
4527 		if (cpu == RING_BUFFER_ALL_CPUS)
4528 			tracing_reset_online_cpus(trace_buf);
4529 		else
4530 			tracing_reset_cpu(trace_buf, cpu);
4531 	}
4532 
4533 	if (file->f_mode & FMODE_READ) {
4534 		iter = __tracing_open(inode, file, false);
4535 		if (IS_ERR(iter))
4536 			ret = PTR_ERR(iter);
4537 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4538 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4539 	}
4540 
4541 	if (ret < 0)
4542 		trace_array_put(tr);
4543 
4544 	return ret;
4545 }
4546 
4547 /*
4548  * Some tracers are not suitable for instance buffers.
4549  * A tracer is always available for the global array (toplevel)
4550  * or if it explicitly states that it is.
4551  */
4552 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4553 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4554 {
4555 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4556 }
4557 
4558 /* Find the next tracer that this trace array may use */
4559 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4560 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4561 {
4562 	while (t && !trace_ok_for_array(t, tr))
4563 		t = t->next;
4564 
4565 	return t;
4566 }
4567 
4568 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4569 t_next(struct seq_file *m, void *v, loff_t *pos)
4570 {
4571 	struct trace_array *tr = m->private;
4572 	struct tracer *t = v;
4573 
4574 	(*pos)++;
4575 
4576 	if (t)
4577 		t = get_tracer_for_array(tr, t->next);
4578 
4579 	return t;
4580 }
4581 
t_start(struct seq_file * m,loff_t * pos)4582 static void *t_start(struct seq_file *m, loff_t *pos)
4583 {
4584 	struct trace_array *tr = m->private;
4585 	struct tracer *t;
4586 	loff_t l = 0;
4587 
4588 	mutex_lock(&trace_types_lock);
4589 
4590 	t = get_tracer_for_array(tr, trace_types);
4591 	for (; t && l < *pos; t = t_next(m, t, &l))
4592 			;
4593 
4594 	return t;
4595 }
4596 
t_stop(struct seq_file * m,void * p)4597 static void t_stop(struct seq_file *m, void *p)
4598 {
4599 	mutex_unlock(&trace_types_lock);
4600 }
4601 
t_show(struct seq_file * m,void * v)4602 static int t_show(struct seq_file *m, void *v)
4603 {
4604 	struct tracer *t = v;
4605 
4606 	if (!t)
4607 		return 0;
4608 
4609 	seq_puts(m, t->name);
4610 	if (t->next)
4611 		seq_putc(m, ' ');
4612 	else
4613 		seq_putc(m, '\n');
4614 
4615 	return 0;
4616 }
4617 
4618 static const struct seq_operations show_traces_seq_ops = {
4619 	.start		= t_start,
4620 	.next		= t_next,
4621 	.stop		= t_stop,
4622 	.show		= t_show,
4623 };
4624 
show_traces_open(struct inode * inode,struct file * file)4625 static int show_traces_open(struct inode *inode, struct file *file)
4626 {
4627 	struct trace_array *tr = inode->i_private;
4628 	struct seq_file *m;
4629 	int ret;
4630 
4631 	ret = tracing_check_open_get_tr(tr);
4632 	if (ret)
4633 		return ret;
4634 
4635 	ret = seq_open(file, &show_traces_seq_ops);
4636 	if (ret) {
4637 		trace_array_put(tr);
4638 		return ret;
4639 	}
4640 
4641 	m = file->private_data;
4642 	m->private = tr;
4643 
4644 	return 0;
4645 }
4646 
show_traces_release(struct inode * inode,struct file * file)4647 static int show_traces_release(struct inode *inode, struct file *file)
4648 {
4649 	struct trace_array *tr = inode->i_private;
4650 
4651 	trace_array_put(tr);
4652 	return seq_release(inode, file);
4653 }
4654 
4655 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4656 tracing_write_stub(struct file *filp, const char __user *ubuf,
4657 		   size_t count, loff_t *ppos)
4658 {
4659 	return count;
4660 }
4661 
tracing_lseek(struct file * file,loff_t offset,int whence)4662 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4663 {
4664 	int ret;
4665 
4666 	if (file->f_mode & FMODE_READ)
4667 		ret = seq_lseek(file, offset, whence);
4668 	else
4669 		file->f_pos = ret = 0;
4670 
4671 	return ret;
4672 }
4673 
4674 static const struct file_operations tracing_fops = {
4675 	.open		= tracing_open,
4676 	.read		= seq_read,
4677 	.write		= tracing_write_stub,
4678 	.llseek		= tracing_lseek,
4679 	.release	= tracing_release,
4680 };
4681 
4682 static const struct file_operations show_traces_fops = {
4683 	.open		= show_traces_open,
4684 	.read		= seq_read,
4685 	.llseek		= seq_lseek,
4686 	.release	= show_traces_release,
4687 };
4688 
4689 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4690 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4691 		     size_t count, loff_t *ppos)
4692 {
4693 	struct trace_array *tr = file_inode(filp)->i_private;
4694 	char *mask_str;
4695 	int len;
4696 
4697 	len = snprintf(NULL, 0, "%*pb\n",
4698 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4699 	mask_str = kmalloc(len, GFP_KERNEL);
4700 	if (!mask_str)
4701 		return -ENOMEM;
4702 
4703 	len = snprintf(mask_str, len, "%*pb\n",
4704 		       cpumask_pr_args(tr->tracing_cpumask));
4705 	if (len >= count) {
4706 		count = -EINVAL;
4707 		goto out_err;
4708 	}
4709 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4710 
4711 out_err:
4712 	kfree(mask_str);
4713 
4714 	return count;
4715 }
4716 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4717 int tracing_set_cpumask(struct trace_array *tr,
4718 			cpumask_var_t tracing_cpumask_new)
4719 {
4720 	int cpu;
4721 
4722 	if (!tr)
4723 		return -EINVAL;
4724 
4725 	local_irq_disable();
4726 	arch_spin_lock(&tr->max_lock);
4727 	for_each_tracing_cpu(cpu) {
4728 		/*
4729 		 * Increase/decrease the disabled counter if we are
4730 		 * about to flip a bit in the cpumask:
4731 		 */
4732 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4733 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4734 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4735 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4736 		}
4737 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4738 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4739 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4740 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4741 		}
4742 	}
4743 	arch_spin_unlock(&tr->max_lock);
4744 	local_irq_enable();
4745 
4746 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4747 
4748 	return 0;
4749 }
4750 
4751 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4752 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4753 		      size_t count, loff_t *ppos)
4754 {
4755 	struct trace_array *tr = file_inode(filp)->i_private;
4756 	cpumask_var_t tracing_cpumask_new;
4757 	int err;
4758 
4759 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4760 		return -ENOMEM;
4761 
4762 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4763 	if (err)
4764 		goto err_free;
4765 
4766 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4767 	if (err)
4768 		goto err_free;
4769 
4770 	free_cpumask_var(tracing_cpumask_new);
4771 
4772 	return count;
4773 
4774 err_free:
4775 	free_cpumask_var(tracing_cpumask_new);
4776 
4777 	return err;
4778 }
4779 
4780 static const struct file_operations tracing_cpumask_fops = {
4781 	.open		= tracing_open_generic_tr,
4782 	.read		= tracing_cpumask_read,
4783 	.write		= tracing_cpumask_write,
4784 	.release	= tracing_release_generic_tr,
4785 	.llseek		= generic_file_llseek,
4786 };
4787 
tracing_trace_options_show(struct seq_file * m,void * v)4788 static int tracing_trace_options_show(struct seq_file *m, void *v)
4789 {
4790 	struct tracer_opt *trace_opts;
4791 	struct trace_array *tr = m->private;
4792 	u32 tracer_flags;
4793 	int i;
4794 
4795 	mutex_lock(&trace_types_lock);
4796 	tracer_flags = tr->current_trace->flags->val;
4797 	trace_opts = tr->current_trace->flags->opts;
4798 
4799 	for (i = 0; trace_options[i]; i++) {
4800 		if (tr->trace_flags & (1 << i))
4801 			seq_printf(m, "%s\n", trace_options[i]);
4802 		else
4803 			seq_printf(m, "no%s\n", trace_options[i]);
4804 	}
4805 
4806 	for (i = 0; trace_opts[i].name; i++) {
4807 		if (tracer_flags & trace_opts[i].bit)
4808 			seq_printf(m, "%s\n", trace_opts[i].name);
4809 		else
4810 			seq_printf(m, "no%s\n", trace_opts[i].name);
4811 	}
4812 	mutex_unlock(&trace_types_lock);
4813 
4814 	return 0;
4815 }
4816 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4817 static int __set_tracer_option(struct trace_array *tr,
4818 			       struct tracer_flags *tracer_flags,
4819 			       struct tracer_opt *opts, int neg)
4820 {
4821 	struct tracer *trace = tracer_flags->trace;
4822 	int ret;
4823 
4824 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4825 	if (ret)
4826 		return ret;
4827 
4828 	if (neg)
4829 		tracer_flags->val &= ~opts->bit;
4830 	else
4831 		tracer_flags->val |= opts->bit;
4832 	return 0;
4833 }
4834 
4835 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4836 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4837 {
4838 	struct tracer *trace = tr->current_trace;
4839 	struct tracer_flags *tracer_flags = trace->flags;
4840 	struct tracer_opt *opts = NULL;
4841 	int i;
4842 
4843 	for (i = 0; tracer_flags->opts[i].name; i++) {
4844 		opts = &tracer_flags->opts[i];
4845 
4846 		if (strcmp(cmp, opts->name) == 0)
4847 			return __set_tracer_option(tr, trace->flags, opts, neg);
4848 	}
4849 
4850 	return -EINVAL;
4851 }
4852 
4853 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4854 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4855 {
4856 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4857 		return -1;
4858 
4859 	return 0;
4860 }
4861 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4862 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4863 {
4864 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4865 	    (mask == TRACE_ITER_RECORD_CMD))
4866 		lockdep_assert_held(&event_mutex);
4867 
4868 	/* do nothing if flag is already set */
4869 	if (!!(tr->trace_flags & mask) == !!enabled)
4870 		return 0;
4871 
4872 	/* Give the tracer a chance to approve the change */
4873 	if (tr->current_trace->flag_changed)
4874 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4875 			return -EINVAL;
4876 
4877 	if (enabled)
4878 		tr->trace_flags |= mask;
4879 	else
4880 		tr->trace_flags &= ~mask;
4881 
4882 	if (mask == TRACE_ITER_RECORD_CMD)
4883 		trace_event_enable_cmd_record(enabled);
4884 
4885 	if (mask == TRACE_ITER_RECORD_TGID) {
4886 		if (!tgid_map)
4887 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4888 					   sizeof(*tgid_map),
4889 					   GFP_KERNEL);
4890 		if (!tgid_map) {
4891 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4892 			return -ENOMEM;
4893 		}
4894 
4895 		trace_event_enable_tgid_record(enabled);
4896 	}
4897 
4898 	if (mask == TRACE_ITER_EVENT_FORK)
4899 		trace_event_follow_fork(tr, enabled);
4900 
4901 	if (mask == TRACE_ITER_FUNC_FORK)
4902 		ftrace_pid_follow_fork(tr, enabled);
4903 
4904 	if (mask == TRACE_ITER_OVERWRITE) {
4905 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4906 #ifdef CONFIG_TRACER_MAX_TRACE
4907 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4908 #endif
4909 	}
4910 
4911 	if (mask == TRACE_ITER_PRINTK) {
4912 		trace_printk_start_stop_comm(enabled);
4913 		trace_printk_control(enabled);
4914 	}
4915 
4916 	return 0;
4917 }
4918 
trace_set_options(struct trace_array * tr,char * option)4919 int trace_set_options(struct trace_array *tr, char *option)
4920 {
4921 	char *cmp;
4922 	int neg = 0;
4923 	int ret;
4924 	size_t orig_len = strlen(option);
4925 	int len;
4926 
4927 	cmp = strstrip(option);
4928 
4929 	len = str_has_prefix(cmp, "no");
4930 	if (len)
4931 		neg = 1;
4932 
4933 	cmp += len;
4934 
4935 	mutex_lock(&event_mutex);
4936 	mutex_lock(&trace_types_lock);
4937 
4938 	ret = match_string(trace_options, -1, cmp);
4939 	/* If no option could be set, test the specific tracer options */
4940 	if (ret < 0)
4941 		ret = set_tracer_option(tr, cmp, neg);
4942 	else
4943 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4944 
4945 	mutex_unlock(&trace_types_lock);
4946 	mutex_unlock(&event_mutex);
4947 
4948 	/*
4949 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4950 	 * turn it back into a space.
4951 	 */
4952 	if (orig_len > strlen(option))
4953 		option[strlen(option)] = ' ';
4954 
4955 	return ret;
4956 }
4957 
apply_trace_boot_options(void)4958 static void __init apply_trace_boot_options(void)
4959 {
4960 	char *buf = trace_boot_options_buf;
4961 	char *option;
4962 
4963 	while (true) {
4964 		option = strsep(&buf, ",");
4965 
4966 		if (!option)
4967 			break;
4968 
4969 		if (*option)
4970 			trace_set_options(&global_trace, option);
4971 
4972 		/* Put back the comma to allow this to be called again */
4973 		if (buf)
4974 			*(buf - 1) = ',';
4975 	}
4976 }
4977 
4978 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4979 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4980 			size_t cnt, loff_t *ppos)
4981 {
4982 	struct seq_file *m = filp->private_data;
4983 	struct trace_array *tr = m->private;
4984 	char buf[64];
4985 	int ret;
4986 
4987 	if (cnt >= sizeof(buf))
4988 		return -EINVAL;
4989 
4990 	if (copy_from_user(buf, ubuf, cnt))
4991 		return -EFAULT;
4992 
4993 	buf[cnt] = 0;
4994 
4995 	ret = trace_set_options(tr, buf);
4996 	if (ret < 0)
4997 		return ret;
4998 
4999 	*ppos += cnt;
5000 
5001 	return cnt;
5002 }
5003 
tracing_trace_options_open(struct inode * inode,struct file * file)5004 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5005 {
5006 	struct trace_array *tr = inode->i_private;
5007 	int ret;
5008 
5009 	ret = tracing_check_open_get_tr(tr);
5010 	if (ret)
5011 		return ret;
5012 
5013 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5014 	if (ret < 0)
5015 		trace_array_put(tr);
5016 
5017 	return ret;
5018 }
5019 
5020 static const struct file_operations tracing_iter_fops = {
5021 	.open		= tracing_trace_options_open,
5022 	.read		= seq_read,
5023 	.llseek		= seq_lseek,
5024 	.release	= tracing_single_release_tr,
5025 	.write		= tracing_trace_options_write,
5026 };
5027 
5028 static const char readme_msg[] =
5029 	"tracing mini-HOWTO:\n\n"
5030 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5031 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5032 	" Important files:\n"
5033 	"  trace\t\t\t- The static contents of the buffer\n"
5034 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5035 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5036 	"  current_tracer\t- function and latency tracers\n"
5037 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5038 	"  error_log\t- error log for failed commands (that support it)\n"
5039 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5040 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5041 	"  trace_clock\t\t-change the clock used to order events\n"
5042 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5043 	"      global:   Synced across CPUs but slows tracing down.\n"
5044 	"     counter:   Not a clock, but just an increment\n"
5045 	"      uptime:   Jiffy counter from time of boot\n"
5046 	"        perf:   Same clock that perf events use\n"
5047 #ifdef CONFIG_X86_64
5048 	"     x86-tsc:   TSC cycle counter\n"
5049 #endif
5050 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5051 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5052 	"    absolute:   Absolute (standalone) timestamp\n"
5053 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5054 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5055 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5056 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5057 	"\t\t\t  Remove sub-buffer with rmdir\n"
5058 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5059 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5060 	"\t\t\t  option name\n"
5061 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5062 #ifdef CONFIG_DYNAMIC_FTRACE
5063 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5064 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5065 	"\t\t\t  functions\n"
5066 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5067 	"\t     modules: Can select a group via module\n"
5068 	"\t      Format: :mod:<module-name>\n"
5069 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5070 	"\t    triggers: a command to perform when function is hit\n"
5071 	"\t      Format: <function>:<trigger>[:count]\n"
5072 	"\t     trigger: traceon, traceoff\n"
5073 	"\t\t      enable_event:<system>:<event>\n"
5074 	"\t\t      disable_event:<system>:<event>\n"
5075 #ifdef CONFIG_STACKTRACE
5076 	"\t\t      stacktrace\n"
5077 #endif
5078 #ifdef CONFIG_TRACER_SNAPSHOT
5079 	"\t\t      snapshot\n"
5080 #endif
5081 	"\t\t      dump\n"
5082 	"\t\t      cpudump\n"
5083 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5084 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5085 	"\t     The first one will disable tracing every time do_fault is hit\n"
5086 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5087 	"\t       The first time do trap is hit and it disables tracing, the\n"
5088 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5089 	"\t       the counter will not decrement. It only decrements when the\n"
5090 	"\t       trigger did work\n"
5091 	"\t     To remove trigger without count:\n"
5092 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5093 	"\t     To remove trigger with a count:\n"
5094 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5095 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5096 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5097 	"\t    modules: Can select a group via module command :mod:\n"
5098 	"\t    Does not accept triggers\n"
5099 #endif /* CONFIG_DYNAMIC_FTRACE */
5100 #ifdef CONFIG_FUNCTION_TRACER
5101 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5102 	"\t\t    (function)\n"
5103 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5104 	"\t\t    (function)\n"
5105 #endif
5106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5107 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5108 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5109 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5110 #endif
5111 #ifdef CONFIG_TRACER_SNAPSHOT
5112 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5113 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5114 	"\t\t\t  information\n"
5115 #endif
5116 #ifdef CONFIG_STACK_TRACER
5117 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5118 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5119 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5120 	"\t\t\t  new trace)\n"
5121 #ifdef CONFIG_DYNAMIC_FTRACE
5122 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5123 	"\t\t\t  traces\n"
5124 #endif
5125 #endif /* CONFIG_STACK_TRACER */
5126 #ifdef CONFIG_DYNAMIC_EVENTS
5127 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5128 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5129 #endif
5130 #ifdef CONFIG_KPROBE_EVENTS
5131 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5132 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5133 #endif
5134 #ifdef CONFIG_UPROBE_EVENTS
5135 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5136 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5137 #endif
5138 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5139 	"\t  accepts: event-definitions (one definition per line)\n"
5140 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5141 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5142 #ifdef CONFIG_HIST_TRIGGERS
5143 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5144 #endif
5145 	"\t           -:[<group>/]<event>\n"
5146 #ifdef CONFIG_KPROBE_EVENTS
5147 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5148   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5149 #endif
5150 #ifdef CONFIG_UPROBE_EVENTS
5151   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5152 #endif
5153 	"\t     args: <name>=fetcharg[:type]\n"
5154 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5155 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5156 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5157 #else
5158 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5159 #endif
5160 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5161 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5162 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5163 	"\t           <type>\\[<array-size>\\]\n"
5164 #ifdef CONFIG_HIST_TRIGGERS
5165 	"\t    field: <stype> <name>;\n"
5166 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5167 	"\t           [unsigned] char/int/long\n"
5168 #endif
5169 #endif
5170 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5171 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5172 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5173 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5174 	"\t\t\t  events\n"
5175 	"      filter\t\t- If set, only events passing filter are traced\n"
5176 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5177 	"\t\t\t  <event>:\n"
5178 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5179 	"      filter\t\t- If set, only events passing filter are traced\n"
5180 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5181 	"\t    Format: <trigger>[:count][if <filter>]\n"
5182 	"\t   trigger: traceon, traceoff\n"
5183 	"\t            enable_event:<system>:<event>\n"
5184 	"\t            disable_event:<system>:<event>\n"
5185 #ifdef CONFIG_HIST_TRIGGERS
5186 	"\t            enable_hist:<system>:<event>\n"
5187 	"\t            disable_hist:<system>:<event>\n"
5188 #endif
5189 #ifdef CONFIG_STACKTRACE
5190 	"\t\t    stacktrace\n"
5191 #endif
5192 #ifdef CONFIG_TRACER_SNAPSHOT
5193 	"\t\t    snapshot\n"
5194 #endif
5195 #ifdef CONFIG_HIST_TRIGGERS
5196 	"\t\t    hist (see below)\n"
5197 #endif
5198 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5199 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5200 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5201 	"\t                  events/block/block_unplug/trigger\n"
5202 	"\t   The first disables tracing every time block_unplug is hit.\n"
5203 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5204 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5205 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5206 	"\t   Like function triggers, the counter is only decremented if it\n"
5207 	"\t    enabled or disabled tracing.\n"
5208 	"\t   To remove a trigger without a count:\n"
5209 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5210 	"\t   To remove a trigger with a count:\n"
5211 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5212 	"\t   Filters can be ignored when removing a trigger.\n"
5213 #ifdef CONFIG_HIST_TRIGGERS
5214 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5215 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5216 	"\t            [:values=<field1[,field2,...]>]\n"
5217 	"\t            [:sort=<field1[,field2,...]>]\n"
5218 	"\t            [:size=#entries]\n"
5219 	"\t            [:pause][:continue][:clear]\n"
5220 	"\t            [:name=histname1]\n"
5221 	"\t            [:<handler>.<action>]\n"
5222 	"\t            [if <filter>]\n\n"
5223 	"\t    When a matching event is hit, an entry is added to a hash\n"
5224 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5225 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5226 	"\t    correspond to fields in the event's format description.  Keys\n"
5227 	"\t    can be any field, or the special string 'stacktrace'.\n"
5228 	"\t    Compound keys consisting of up to two fields can be specified\n"
5229 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5230 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5231 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5232 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5233 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5234 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5235 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5236 	"\t    its histogram data will be shared with other triggers of the\n"
5237 	"\t    same name, and trigger hits will update this common data.\n\n"
5238 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5239 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5240 	"\t    triggers attached to an event, there will be a table for each\n"
5241 	"\t    trigger in the output.  The table displayed for a named\n"
5242 	"\t    trigger will be the same as any other instance having the\n"
5243 	"\t    same name.  The default format used to display a given field\n"
5244 	"\t    can be modified by appending any of the following modifiers\n"
5245 	"\t    to the field name, as applicable:\n\n"
5246 	"\t            .hex        display a number as a hex value\n"
5247 	"\t            .sym        display an address as a symbol\n"
5248 	"\t            .sym-offset display an address as a symbol and offset\n"
5249 	"\t            .execname   display a common_pid as a program name\n"
5250 	"\t            .syscall    display a syscall id as a syscall name\n"
5251 	"\t            .log2       display log2 value rather than raw number\n"
5252 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5253 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5254 	"\t    trigger or to start a hist trigger but not log any events\n"
5255 	"\t    until told to do so.  'continue' can be used to start or\n"
5256 	"\t    restart a paused hist trigger.\n\n"
5257 	"\t    The 'clear' parameter will clear the contents of a running\n"
5258 	"\t    hist trigger and leave its current paused/active state\n"
5259 	"\t    unchanged.\n\n"
5260 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5261 	"\t    have one event conditionally start and stop another event's\n"
5262 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5263 	"\t    the enable_event and disable_event triggers.\n\n"
5264 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5265 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5266 	"\t        <handler>.<action>\n\n"
5267 	"\t    The available handlers are:\n\n"
5268 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5269 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5270 	"\t        onchange(var)            - invoke action if var changes\n\n"
5271 	"\t    The available actions are:\n\n"
5272 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5273 	"\t        save(field,...)                      - save current event fields\n"
5274 #ifdef CONFIG_TRACER_SNAPSHOT
5275 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5276 #endif
5277 #ifdef CONFIG_SYNTH_EVENTS
5278 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5279 	"\t  Write into this file to define/undefine new synthetic events.\n"
5280 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5281 #endif
5282 #endif
5283 ;
5284 
5285 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5286 tracing_readme_read(struct file *filp, char __user *ubuf,
5287 		       size_t cnt, loff_t *ppos)
5288 {
5289 	return simple_read_from_buffer(ubuf, cnt, ppos,
5290 					readme_msg, strlen(readme_msg));
5291 }
5292 
5293 static const struct file_operations tracing_readme_fops = {
5294 	.open		= tracing_open_generic,
5295 	.read		= tracing_readme_read,
5296 	.llseek		= generic_file_llseek,
5297 };
5298 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5299 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5300 {
5301 	int *ptr = v;
5302 
5303 	if (*pos || m->count)
5304 		ptr++;
5305 
5306 	(*pos)++;
5307 
5308 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5309 		if (trace_find_tgid(*ptr))
5310 			return ptr;
5311 	}
5312 
5313 	return NULL;
5314 }
5315 
saved_tgids_start(struct seq_file * m,loff_t * pos)5316 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5317 {
5318 	void *v;
5319 	loff_t l = 0;
5320 
5321 	if (!tgid_map)
5322 		return NULL;
5323 
5324 	v = &tgid_map[0];
5325 	while (l <= *pos) {
5326 		v = saved_tgids_next(m, v, &l);
5327 		if (!v)
5328 			return NULL;
5329 	}
5330 
5331 	return v;
5332 }
5333 
saved_tgids_stop(struct seq_file * m,void * v)5334 static void saved_tgids_stop(struct seq_file *m, void *v)
5335 {
5336 }
5337 
saved_tgids_show(struct seq_file * m,void * v)5338 static int saved_tgids_show(struct seq_file *m, void *v)
5339 {
5340 	int pid = (int *)v - tgid_map;
5341 
5342 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5343 	return 0;
5344 }
5345 
5346 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5347 	.start		= saved_tgids_start,
5348 	.stop		= saved_tgids_stop,
5349 	.next		= saved_tgids_next,
5350 	.show		= saved_tgids_show,
5351 };
5352 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5353 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5354 {
5355 	int ret;
5356 
5357 	ret = tracing_check_open_get_tr(NULL);
5358 	if (ret)
5359 		return ret;
5360 
5361 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5362 }
5363 
5364 
5365 static const struct file_operations tracing_saved_tgids_fops = {
5366 	.open		= tracing_saved_tgids_open,
5367 	.read		= seq_read,
5368 	.llseek		= seq_lseek,
5369 	.release	= seq_release,
5370 };
5371 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5372 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5373 {
5374 	unsigned int *ptr = v;
5375 
5376 	if (*pos || m->count)
5377 		ptr++;
5378 
5379 	(*pos)++;
5380 
5381 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5382 	     ptr++) {
5383 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5384 			continue;
5385 
5386 		return ptr;
5387 	}
5388 
5389 	return NULL;
5390 }
5391 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5392 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5393 {
5394 	void *v;
5395 	loff_t l = 0;
5396 
5397 	preempt_disable();
5398 	arch_spin_lock(&trace_cmdline_lock);
5399 
5400 	v = &savedcmd->map_cmdline_to_pid[0];
5401 	while (l <= *pos) {
5402 		v = saved_cmdlines_next(m, v, &l);
5403 		if (!v)
5404 			return NULL;
5405 	}
5406 
5407 	return v;
5408 }
5409 
saved_cmdlines_stop(struct seq_file * m,void * v)5410 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5411 {
5412 	arch_spin_unlock(&trace_cmdline_lock);
5413 	preempt_enable();
5414 }
5415 
saved_cmdlines_show(struct seq_file * m,void * v)5416 static int saved_cmdlines_show(struct seq_file *m, void *v)
5417 {
5418 	char buf[TASK_COMM_LEN];
5419 	unsigned int *pid = v;
5420 
5421 	__trace_find_cmdline(*pid, buf);
5422 	seq_printf(m, "%d %s\n", *pid, buf);
5423 	return 0;
5424 }
5425 
5426 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5427 	.start		= saved_cmdlines_start,
5428 	.next		= saved_cmdlines_next,
5429 	.stop		= saved_cmdlines_stop,
5430 	.show		= saved_cmdlines_show,
5431 };
5432 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5433 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5434 {
5435 	int ret;
5436 
5437 	ret = tracing_check_open_get_tr(NULL);
5438 	if (ret)
5439 		return ret;
5440 
5441 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5442 }
5443 
5444 static const struct file_operations tracing_saved_cmdlines_fops = {
5445 	.open		= tracing_saved_cmdlines_open,
5446 	.read		= seq_read,
5447 	.llseek		= seq_lseek,
5448 	.release	= seq_release,
5449 };
5450 
5451 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5452 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5453 				 size_t cnt, loff_t *ppos)
5454 {
5455 	char buf[64];
5456 	int r;
5457 
5458 	arch_spin_lock(&trace_cmdline_lock);
5459 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5460 	arch_spin_unlock(&trace_cmdline_lock);
5461 
5462 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5463 }
5464 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5465 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5466 {
5467 	kfree(s->saved_cmdlines);
5468 	kfree(s->map_cmdline_to_pid);
5469 	kfree(s);
5470 }
5471 
tracing_resize_saved_cmdlines(unsigned int val)5472 static int tracing_resize_saved_cmdlines(unsigned int val)
5473 {
5474 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5475 
5476 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5477 	if (!s)
5478 		return -ENOMEM;
5479 
5480 	if (allocate_cmdlines_buffer(val, s) < 0) {
5481 		kfree(s);
5482 		return -ENOMEM;
5483 	}
5484 
5485 	arch_spin_lock(&trace_cmdline_lock);
5486 	savedcmd_temp = savedcmd;
5487 	savedcmd = s;
5488 	arch_spin_unlock(&trace_cmdline_lock);
5489 	free_saved_cmdlines_buffer(savedcmd_temp);
5490 
5491 	return 0;
5492 }
5493 
5494 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5495 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5496 				  size_t cnt, loff_t *ppos)
5497 {
5498 	unsigned long val;
5499 	int ret;
5500 
5501 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5502 	if (ret)
5503 		return ret;
5504 
5505 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5506 	if (!val || val > PID_MAX_DEFAULT)
5507 		return -EINVAL;
5508 
5509 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5510 	if (ret < 0)
5511 		return ret;
5512 
5513 	*ppos += cnt;
5514 
5515 	return cnt;
5516 }
5517 
5518 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5519 	.open		= tracing_open_generic,
5520 	.read		= tracing_saved_cmdlines_size_read,
5521 	.write		= tracing_saved_cmdlines_size_write,
5522 };
5523 
5524 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5525 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5526 update_eval_map(union trace_eval_map_item *ptr)
5527 {
5528 	if (!ptr->map.eval_string) {
5529 		if (ptr->tail.next) {
5530 			ptr = ptr->tail.next;
5531 			/* Set ptr to the next real item (skip head) */
5532 			ptr++;
5533 		} else
5534 			return NULL;
5535 	}
5536 	return ptr;
5537 }
5538 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5539 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5540 {
5541 	union trace_eval_map_item *ptr = v;
5542 
5543 	/*
5544 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5545 	 * This really should never happen.
5546 	 */
5547 	(*pos)++;
5548 	ptr = update_eval_map(ptr);
5549 	if (WARN_ON_ONCE(!ptr))
5550 		return NULL;
5551 
5552 	ptr++;
5553 	ptr = update_eval_map(ptr);
5554 
5555 	return ptr;
5556 }
5557 
eval_map_start(struct seq_file * m,loff_t * pos)5558 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5559 {
5560 	union trace_eval_map_item *v;
5561 	loff_t l = 0;
5562 
5563 	mutex_lock(&trace_eval_mutex);
5564 
5565 	v = trace_eval_maps;
5566 	if (v)
5567 		v++;
5568 
5569 	while (v && l < *pos) {
5570 		v = eval_map_next(m, v, &l);
5571 	}
5572 
5573 	return v;
5574 }
5575 
eval_map_stop(struct seq_file * m,void * v)5576 static void eval_map_stop(struct seq_file *m, void *v)
5577 {
5578 	mutex_unlock(&trace_eval_mutex);
5579 }
5580 
eval_map_show(struct seq_file * m,void * v)5581 static int eval_map_show(struct seq_file *m, void *v)
5582 {
5583 	union trace_eval_map_item *ptr = v;
5584 
5585 	seq_printf(m, "%s %ld (%s)\n",
5586 		   ptr->map.eval_string, ptr->map.eval_value,
5587 		   ptr->map.system);
5588 
5589 	return 0;
5590 }
5591 
5592 static const struct seq_operations tracing_eval_map_seq_ops = {
5593 	.start		= eval_map_start,
5594 	.next		= eval_map_next,
5595 	.stop		= eval_map_stop,
5596 	.show		= eval_map_show,
5597 };
5598 
tracing_eval_map_open(struct inode * inode,struct file * filp)5599 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5600 {
5601 	int ret;
5602 
5603 	ret = tracing_check_open_get_tr(NULL);
5604 	if (ret)
5605 		return ret;
5606 
5607 	return seq_open(filp, &tracing_eval_map_seq_ops);
5608 }
5609 
5610 static const struct file_operations tracing_eval_map_fops = {
5611 	.open		= tracing_eval_map_open,
5612 	.read		= seq_read,
5613 	.llseek		= seq_lseek,
5614 	.release	= seq_release,
5615 };
5616 
5617 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5618 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5619 {
5620 	/* Return tail of array given the head */
5621 	return ptr + ptr->head.length + 1;
5622 }
5623 
5624 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5625 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5626 			   int len)
5627 {
5628 	struct trace_eval_map **stop;
5629 	struct trace_eval_map **map;
5630 	union trace_eval_map_item *map_array;
5631 	union trace_eval_map_item *ptr;
5632 
5633 	stop = start + len;
5634 
5635 	/*
5636 	 * The trace_eval_maps contains the map plus a head and tail item,
5637 	 * where the head holds the module and length of array, and the
5638 	 * tail holds a pointer to the next list.
5639 	 */
5640 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5641 	if (!map_array) {
5642 		pr_warn("Unable to allocate trace eval mapping\n");
5643 		return;
5644 	}
5645 
5646 	mutex_lock(&trace_eval_mutex);
5647 
5648 	if (!trace_eval_maps)
5649 		trace_eval_maps = map_array;
5650 	else {
5651 		ptr = trace_eval_maps;
5652 		for (;;) {
5653 			ptr = trace_eval_jmp_to_tail(ptr);
5654 			if (!ptr->tail.next)
5655 				break;
5656 			ptr = ptr->tail.next;
5657 
5658 		}
5659 		ptr->tail.next = map_array;
5660 	}
5661 	map_array->head.mod = mod;
5662 	map_array->head.length = len;
5663 	map_array++;
5664 
5665 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5666 		map_array->map = **map;
5667 		map_array++;
5668 	}
5669 	memset(map_array, 0, sizeof(*map_array));
5670 
5671 	mutex_unlock(&trace_eval_mutex);
5672 }
5673 
trace_create_eval_file(struct dentry * d_tracer)5674 static void trace_create_eval_file(struct dentry *d_tracer)
5675 {
5676 	trace_create_file("eval_map", 0444, d_tracer,
5677 			  NULL, &tracing_eval_map_fops);
5678 }
5679 
5680 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5681 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5682 static inline void trace_insert_eval_map_file(struct module *mod,
5683 			      struct trace_eval_map **start, int len) { }
5684 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5685 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5686 static void trace_insert_eval_map(struct module *mod,
5687 				  struct trace_eval_map **start, int len)
5688 {
5689 	struct trace_eval_map **map;
5690 
5691 	if (len <= 0)
5692 		return;
5693 
5694 	map = start;
5695 
5696 	trace_event_eval_update(map, len);
5697 
5698 	trace_insert_eval_map_file(mod, start, len);
5699 }
5700 
5701 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5702 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5703 		       size_t cnt, loff_t *ppos)
5704 {
5705 	struct trace_array *tr = filp->private_data;
5706 	char buf[MAX_TRACER_SIZE+2];
5707 	int r;
5708 
5709 	mutex_lock(&trace_types_lock);
5710 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5711 	mutex_unlock(&trace_types_lock);
5712 
5713 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5714 }
5715 
tracer_init(struct tracer * t,struct trace_array * tr)5716 int tracer_init(struct tracer *t, struct trace_array *tr)
5717 {
5718 	tracing_reset_online_cpus(&tr->array_buffer);
5719 	return t->init(tr);
5720 }
5721 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5722 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5723 {
5724 	int cpu;
5725 
5726 	for_each_tracing_cpu(cpu)
5727 		per_cpu_ptr(buf->data, cpu)->entries = val;
5728 }
5729 
5730 #ifdef CONFIG_TRACER_MAX_TRACE
5731 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5732 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5733 					struct array_buffer *size_buf, int cpu_id)
5734 {
5735 	int cpu, ret = 0;
5736 
5737 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5738 		for_each_tracing_cpu(cpu) {
5739 			ret = ring_buffer_resize(trace_buf->buffer,
5740 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5741 			if (ret < 0)
5742 				break;
5743 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5744 				per_cpu_ptr(size_buf->data, cpu)->entries;
5745 		}
5746 	} else {
5747 		ret = ring_buffer_resize(trace_buf->buffer,
5748 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5749 		if (ret == 0)
5750 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5751 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5752 	}
5753 
5754 	return ret;
5755 }
5756 #endif /* CONFIG_TRACER_MAX_TRACE */
5757 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5758 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5759 					unsigned long size, int cpu)
5760 {
5761 	int ret;
5762 
5763 	/*
5764 	 * If kernel or user changes the size of the ring buffer
5765 	 * we use the size that was given, and we can forget about
5766 	 * expanding it later.
5767 	 */
5768 	ring_buffer_expanded = true;
5769 
5770 	/* May be called before buffers are initialized */
5771 	if (!tr->array_buffer.buffer)
5772 		return 0;
5773 
5774 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5775 	if (ret < 0)
5776 		return ret;
5777 
5778 #ifdef CONFIG_TRACER_MAX_TRACE
5779 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5780 	    !tr->current_trace->use_max_tr)
5781 		goto out;
5782 
5783 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5784 	if (ret < 0) {
5785 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5786 						     &tr->array_buffer, cpu);
5787 		if (r < 0) {
5788 			/*
5789 			 * AARGH! We are left with different
5790 			 * size max buffer!!!!
5791 			 * The max buffer is our "snapshot" buffer.
5792 			 * When a tracer needs a snapshot (one of the
5793 			 * latency tracers), it swaps the max buffer
5794 			 * with the saved snap shot. We succeeded to
5795 			 * update the size of the main buffer, but failed to
5796 			 * update the size of the max buffer. But when we tried
5797 			 * to reset the main buffer to the original size, we
5798 			 * failed there too. This is very unlikely to
5799 			 * happen, but if it does, warn and kill all
5800 			 * tracing.
5801 			 */
5802 			WARN_ON(1);
5803 			tracing_disabled = 1;
5804 		}
5805 		return ret;
5806 	}
5807 
5808 	if (cpu == RING_BUFFER_ALL_CPUS)
5809 		set_buffer_entries(&tr->max_buffer, size);
5810 	else
5811 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5812 
5813  out:
5814 #endif /* CONFIG_TRACER_MAX_TRACE */
5815 
5816 	if (cpu == RING_BUFFER_ALL_CPUS)
5817 		set_buffer_entries(&tr->array_buffer, size);
5818 	else
5819 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5820 
5821 	return ret;
5822 }
5823 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5824 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5825 				  unsigned long size, int cpu_id)
5826 {
5827 	int ret = size;
5828 
5829 	mutex_lock(&trace_types_lock);
5830 
5831 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5832 		/* make sure, this cpu is enabled in the mask */
5833 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5834 			ret = -EINVAL;
5835 			goto out;
5836 		}
5837 	}
5838 
5839 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5840 	if (ret < 0)
5841 		ret = -ENOMEM;
5842 
5843 out:
5844 	mutex_unlock(&trace_types_lock);
5845 
5846 	return ret;
5847 }
5848 
5849 
5850 /**
5851  * tracing_update_buffers - used by tracing facility to expand ring buffers
5852  *
5853  * To save on memory when the tracing is never used on a system with it
5854  * configured in. The ring buffers are set to a minimum size. But once
5855  * a user starts to use the tracing facility, then they need to grow
5856  * to their default size.
5857  *
5858  * This function is to be called when a tracer is about to be used.
5859  */
tracing_update_buffers(void)5860 int tracing_update_buffers(void)
5861 {
5862 	int ret = 0;
5863 
5864 	mutex_lock(&trace_types_lock);
5865 	if (!ring_buffer_expanded)
5866 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5867 						RING_BUFFER_ALL_CPUS);
5868 	mutex_unlock(&trace_types_lock);
5869 
5870 	return ret;
5871 }
5872 
5873 struct trace_option_dentry;
5874 
5875 static void
5876 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5877 
5878 /*
5879  * Used to clear out the tracer before deletion of an instance.
5880  * Must have trace_types_lock held.
5881  */
tracing_set_nop(struct trace_array * tr)5882 static void tracing_set_nop(struct trace_array *tr)
5883 {
5884 	if (tr->current_trace == &nop_trace)
5885 		return;
5886 
5887 	tr->current_trace->enabled--;
5888 
5889 	if (tr->current_trace->reset)
5890 		tr->current_trace->reset(tr);
5891 
5892 	tr->current_trace = &nop_trace;
5893 }
5894 
add_tracer_options(struct trace_array * tr,struct tracer * t)5895 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5896 {
5897 	/* Only enable if the directory has been created already. */
5898 	if (!tr->dir)
5899 		return;
5900 
5901 	create_trace_option_files(tr, t);
5902 }
5903 
tracing_set_tracer(struct trace_array * tr,const char * buf)5904 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5905 {
5906 	struct tracer *t;
5907 #ifdef CONFIG_TRACER_MAX_TRACE
5908 	bool had_max_tr;
5909 #endif
5910 	int ret = 0;
5911 
5912 	mutex_lock(&trace_types_lock);
5913 
5914 	if (!ring_buffer_expanded) {
5915 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5916 						RING_BUFFER_ALL_CPUS);
5917 		if (ret < 0)
5918 			goto out;
5919 		ret = 0;
5920 	}
5921 
5922 	for (t = trace_types; t; t = t->next) {
5923 		if (strcmp(t->name, buf) == 0)
5924 			break;
5925 	}
5926 	if (!t) {
5927 		ret = -EINVAL;
5928 		goto out;
5929 	}
5930 	if (t == tr->current_trace)
5931 		goto out;
5932 
5933 #ifdef CONFIG_TRACER_SNAPSHOT
5934 	if (t->use_max_tr) {
5935 		arch_spin_lock(&tr->max_lock);
5936 		if (tr->cond_snapshot)
5937 			ret = -EBUSY;
5938 		arch_spin_unlock(&tr->max_lock);
5939 		if (ret)
5940 			goto out;
5941 	}
5942 #endif
5943 	/* Some tracers won't work on kernel command line */
5944 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5945 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5946 			t->name);
5947 		goto out;
5948 	}
5949 
5950 	/* Some tracers are only allowed for the top level buffer */
5951 	if (!trace_ok_for_array(t, tr)) {
5952 		ret = -EINVAL;
5953 		goto out;
5954 	}
5955 
5956 	/* If trace pipe files are being read, we can't change the tracer */
5957 	if (tr->trace_ref) {
5958 		ret = -EBUSY;
5959 		goto out;
5960 	}
5961 
5962 	trace_branch_disable();
5963 
5964 	tr->current_trace->enabled--;
5965 
5966 	if (tr->current_trace->reset)
5967 		tr->current_trace->reset(tr);
5968 
5969 	/* Current trace needs to be nop_trace before synchronize_rcu */
5970 	tr->current_trace = &nop_trace;
5971 
5972 #ifdef CONFIG_TRACER_MAX_TRACE
5973 	had_max_tr = tr->allocated_snapshot;
5974 
5975 	if (had_max_tr && !t->use_max_tr) {
5976 		/*
5977 		 * We need to make sure that the update_max_tr sees that
5978 		 * current_trace changed to nop_trace to keep it from
5979 		 * swapping the buffers after we resize it.
5980 		 * The update_max_tr is called from interrupts disabled
5981 		 * so a synchronized_sched() is sufficient.
5982 		 */
5983 		synchronize_rcu();
5984 		free_snapshot(tr);
5985 	}
5986 #endif
5987 
5988 #ifdef CONFIG_TRACER_MAX_TRACE
5989 	if (t->use_max_tr && !had_max_tr) {
5990 		ret = tracing_alloc_snapshot_instance(tr);
5991 		if (ret < 0)
5992 			goto out;
5993 	}
5994 #endif
5995 
5996 	if (t->init) {
5997 		ret = tracer_init(t, tr);
5998 		if (ret)
5999 			goto out;
6000 	}
6001 
6002 	tr->current_trace = t;
6003 	tr->current_trace->enabled++;
6004 	trace_branch_enable(tr);
6005  out:
6006 	mutex_unlock(&trace_types_lock);
6007 
6008 	return ret;
6009 }
6010 
6011 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6012 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6013 			size_t cnt, loff_t *ppos)
6014 {
6015 	struct trace_array *tr = filp->private_data;
6016 	char buf[MAX_TRACER_SIZE+1];
6017 	int i;
6018 	size_t ret;
6019 	int err;
6020 
6021 	ret = cnt;
6022 
6023 	if (cnt > MAX_TRACER_SIZE)
6024 		cnt = MAX_TRACER_SIZE;
6025 
6026 	if (copy_from_user(buf, ubuf, cnt))
6027 		return -EFAULT;
6028 
6029 	buf[cnt] = 0;
6030 
6031 	/* strip ending whitespace. */
6032 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6033 		buf[i] = 0;
6034 
6035 	err = tracing_set_tracer(tr, buf);
6036 	if (err)
6037 		return err;
6038 
6039 	*ppos += ret;
6040 
6041 	return ret;
6042 }
6043 
6044 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6045 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6046 		   size_t cnt, loff_t *ppos)
6047 {
6048 	char buf[64];
6049 	int r;
6050 
6051 	r = snprintf(buf, sizeof(buf), "%ld\n",
6052 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6053 	if (r > sizeof(buf))
6054 		r = sizeof(buf);
6055 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6056 }
6057 
6058 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6059 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6060 		    size_t cnt, loff_t *ppos)
6061 {
6062 	unsigned long val;
6063 	int ret;
6064 
6065 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6066 	if (ret)
6067 		return ret;
6068 
6069 	*ptr = val * 1000;
6070 
6071 	return cnt;
6072 }
6073 
6074 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6075 tracing_thresh_read(struct file *filp, char __user *ubuf,
6076 		    size_t cnt, loff_t *ppos)
6077 {
6078 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6079 }
6080 
6081 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6082 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6083 		     size_t cnt, loff_t *ppos)
6084 {
6085 	struct trace_array *tr = filp->private_data;
6086 	int ret;
6087 
6088 	mutex_lock(&trace_types_lock);
6089 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6090 	if (ret < 0)
6091 		goto out;
6092 
6093 	if (tr->current_trace->update_thresh) {
6094 		ret = tr->current_trace->update_thresh(tr);
6095 		if (ret < 0)
6096 			goto out;
6097 	}
6098 
6099 	ret = cnt;
6100 out:
6101 	mutex_unlock(&trace_types_lock);
6102 
6103 	return ret;
6104 }
6105 
6106 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6107 
6108 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6109 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6110 		     size_t cnt, loff_t *ppos)
6111 {
6112 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6113 }
6114 
6115 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6116 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6117 		      size_t cnt, loff_t *ppos)
6118 {
6119 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6120 }
6121 
6122 #endif
6123 
tracing_open_pipe(struct inode * inode,struct file * filp)6124 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6125 {
6126 	struct trace_array *tr = inode->i_private;
6127 	struct trace_iterator *iter;
6128 	int ret;
6129 
6130 	ret = tracing_check_open_get_tr(tr);
6131 	if (ret)
6132 		return ret;
6133 
6134 	mutex_lock(&trace_types_lock);
6135 
6136 	/* create a buffer to store the information to pass to userspace */
6137 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6138 	if (!iter) {
6139 		ret = -ENOMEM;
6140 		__trace_array_put(tr);
6141 		goto out;
6142 	}
6143 
6144 	trace_seq_init(&iter->seq);
6145 	iter->trace = tr->current_trace;
6146 
6147 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6148 		ret = -ENOMEM;
6149 		goto fail;
6150 	}
6151 
6152 	/* trace pipe does not show start of buffer */
6153 	cpumask_setall(iter->started);
6154 
6155 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6156 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6157 
6158 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6159 	if (trace_clocks[tr->clock_id].in_ns)
6160 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6161 
6162 	iter->tr = tr;
6163 	iter->array_buffer = &tr->array_buffer;
6164 	iter->cpu_file = tracing_get_cpu(inode);
6165 	mutex_init(&iter->mutex);
6166 	filp->private_data = iter;
6167 
6168 	if (iter->trace->pipe_open)
6169 		iter->trace->pipe_open(iter);
6170 
6171 	nonseekable_open(inode, filp);
6172 
6173 	tr->trace_ref++;
6174 out:
6175 	mutex_unlock(&trace_types_lock);
6176 	return ret;
6177 
6178 fail:
6179 	kfree(iter);
6180 	__trace_array_put(tr);
6181 	mutex_unlock(&trace_types_lock);
6182 	return ret;
6183 }
6184 
tracing_release_pipe(struct inode * inode,struct file * file)6185 static int tracing_release_pipe(struct inode *inode, struct file *file)
6186 {
6187 	struct trace_iterator *iter = file->private_data;
6188 	struct trace_array *tr = inode->i_private;
6189 
6190 	mutex_lock(&trace_types_lock);
6191 
6192 	tr->trace_ref--;
6193 
6194 	if (iter->trace->pipe_close)
6195 		iter->trace->pipe_close(iter);
6196 
6197 	mutex_unlock(&trace_types_lock);
6198 
6199 	free_cpumask_var(iter->started);
6200 	mutex_destroy(&iter->mutex);
6201 	kfree(iter);
6202 
6203 	trace_array_put(tr);
6204 
6205 	return 0;
6206 }
6207 
6208 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6209 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6210 {
6211 	struct trace_array *tr = iter->tr;
6212 
6213 	/* Iterators are static, they should be filled or empty */
6214 	if (trace_buffer_iter(iter, iter->cpu_file))
6215 		return EPOLLIN | EPOLLRDNORM;
6216 
6217 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6218 		/*
6219 		 * Always select as readable when in blocking mode
6220 		 */
6221 		return EPOLLIN | EPOLLRDNORM;
6222 	else
6223 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6224 					     filp, poll_table);
6225 }
6226 
6227 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6228 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6229 {
6230 	struct trace_iterator *iter = filp->private_data;
6231 
6232 	return trace_poll(iter, filp, poll_table);
6233 }
6234 
6235 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6236 static int tracing_wait_pipe(struct file *filp)
6237 {
6238 	struct trace_iterator *iter = filp->private_data;
6239 	int ret;
6240 
6241 	while (trace_empty(iter)) {
6242 
6243 		if ((filp->f_flags & O_NONBLOCK)) {
6244 			return -EAGAIN;
6245 		}
6246 
6247 		/*
6248 		 * We block until we read something and tracing is disabled.
6249 		 * We still block if tracing is disabled, but we have never
6250 		 * read anything. This allows a user to cat this file, and
6251 		 * then enable tracing. But after we have read something,
6252 		 * we give an EOF when tracing is again disabled.
6253 		 *
6254 		 * iter->pos will be 0 if we haven't read anything.
6255 		 */
6256 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6257 			break;
6258 
6259 		mutex_unlock(&iter->mutex);
6260 
6261 		ret = wait_on_pipe(iter, 0);
6262 
6263 		mutex_lock(&iter->mutex);
6264 
6265 		if (ret)
6266 			return ret;
6267 	}
6268 
6269 	return 1;
6270 }
6271 
6272 /*
6273  * Consumer reader.
6274  */
6275 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6276 tracing_read_pipe(struct file *filp, char __user *ubuf,
6277 		  size_t cnt, loff_t *ppos)
6278 {
6279 	struct trace_iterator *iter = filp->private_data;
6280 	ssize_t sret;
6281 
6282 	/*
6283 	 * Avoid more than one consumer on a single file descriptor
6284 	 * This is just a matter of traces coherency, the ring buffer itself
6285 	 * is protected.
6286 	 */
6287 	mutex_lock(&iter->mutex);
6288 
6289 	/* return any leftover data */
6290 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6291 	if (sret != -EBUSY)
6292 		goto out;
6293 
6294 	trace_seq_init(&iter->seq);
6295 
6296 	if (iter->trace->read) {
6297 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6298 		if (sret)
6299 			goto out;
6300 	}
6301 
6302 waitagain:
6303 	sret = tracing_wait_pipe(filp);
6304 	if (sret <= 0)
6305 		goto out;
6306 
6307 	/* stop when tracing is finished */
6308 	if (trace_empty(iter)) {
6309 		sret = 0;
6310 		goto out;
6311 	}
6312 
6313 	if (cnt >= PAGE_SIZE)
6314 		cnt = PAGE_SIZE - 1;
6315 
6316 	/* reset all but tr, trace, and overruns */
6317 	memset(&iter->seq, 0,
6318 	       sizeof(struct trace_iterator) -
6319 	       offsetof(struct trace_iterator, seq));
6320 	cpumask_clear(iter->started);
6321 	trace_seq_init(&iter->seq);
6322 	iter->pos = -1;
6323 
6324 	trace_event_read_lock();
6325 	trace_access_lock(iter->cpu_file);
6326 	while (trace_find_next_entry_inc(iter) != NULL) {
6327 		enum print_line_t ret;
6328 		int save_len = iter->seq.seq.len;
6329 
6330 		ret = print_trace_line(iter);
6331 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6332 			/* don't print partial lines */
6333 			iter->seq.seq.len = save_len;
6334 			break;
6335 		}
6336 		if (ret != TRACE_TYPE_NO_CONSUME)
6337 			trace_consume(iter);
6338 
6339 		if (trace_seq_used(&iter->seq) >= cnt)
6340 			break;
6341 
6342 		/*
6343 		 * Setting the full flag means we reached the trace_seq buffer
6344 		 * size and we should leave by partial output condition above.
6345 		 * One of the trace_seq_* functions is not used properly.
6346 		 */
6347 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6348 			  iter->ent->type);
6349 	}
6350 	trace_access_unlock(iter->cpu_file);
6351 	trace_event_read_unlock();
6352 
6353 	/* Now copy what we have to the user */
6354 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6355 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6356 		trace_seq_init(&iter->seq);
6357 
6358 	/*
6359 	 * If there was nothing to send to user, in spite of consuming trace
6360 	 * entries, go back to wait for more entries.
6361 	 */
6362 	if (sret == -EBUSY)
6363 		goto waitagain;
6364 
6365 out:
6366 	mutex_unlock(&iter->mutex);
6367 
6368 	return sret;
6369 }
6370 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6371 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6372 				     unsigned int idx)
6373 {
6374 	__free_page(spd->pages[idx]);
6375 }
6376 
6377 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6378 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6379 {
6380 	size_t count;
6381 	int save_len;
6382 	int ret;
6383 
6384 	/* Seq buffer is page-sized, exactly what we need. */
6385 	for (;;) {
6386 		save_len = iter->seq.seq.len;
6387 		ret = print_trace_line(iter);
6388 
6389 		if (trace_seq_has_overflowed(&iter->seq)) {
6390 			iter->seq.seq.len = save_len;
6391 			break;
6392 		}
6393 
6394 		/*
6395 		 * This should not be hit, because it should only
6396 		 * be set if the iter->seq overflowed. But check it
6397 		 * anyway to be safe.
6398 		 */
6399 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6400 			iter->seq.seq.len = save_len;
6401 			break;
6402 		}
6403 
6404 		count = trace_seq_used(&iter->seq) - save_len;
6405 		if (rem < count) {
6406 			rem = 0;
6407 			iter->seq.seq.len = save_len;
6408 			break;
6409 		}
6410 
6411 		if (ret != TRACE_TYPE_NO_CONSUME)
6412 			trace_consume(iter);
6413 		rem -= count;
6414 		if (!trace_find_next_entry_inc(iter))	{
6415 			rem = 0;
6416 			iter->ent = NULL;
6417 			break;
6418 		}
6419 	}
6420 
6421 	return rem;
6422 }
6423 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6424 static ssize_t tracing_splice_read_pipe(struct file *filp,
6425 					loff_t *ppos,
6426 					struct pipe_inode_info *pipe,
6427 					size_t len,
6428 					unsigned int flags)
6429 {
6430 	struct page *pages_def[PIPE_DEF_BUFFERS];
6431 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6432 	struct trace_iterator *iter = filp->private_data;
6433 	struct splice_pipe_desc spd = {
6434 		.pages		= pages_def,
6435 		.partial	= partial_def,
6436 		.nr_pages	= 0, /* This gets updated below. */
6437 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6438 		.ops		= &default_pipe_buf_ops,
6439 		.spd_release	= tracing_spd_release_pipe,
6440 	};
6441 	ssize_t ret;
6442 	size_t rem;
6443 	unsigned int i;
6444 
6445 	if (splice_grow_spd(pipe, &spd))
6446 		return -ENOMEM;
6447 
6448 	mutex_lock(&iter->mutex);
6449 
6450 	if (iter->trace->splice_read) {
6451 		ret = iter->trace->splice_read(iter, filp,
6452 					       ppos, pipe, len, flags);
6453 		if (ret)
6454 			goto out_err;
6455 	}
6456 
6457 	ret = tracing_wait_pipe(filp);
6458 	if (ret <= 0)
6459 		goto out_err;
6460 
6461 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6462 		ret = -EFAULT;
6463 		goto out_err;
6464 	}
6465 
6466 	trace_event_read_lock();
6467 	trace_access_lock(iter->cpu_file);
6468 
6469 	/* Fill as many pages as possible. */
6470 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6471 		spd.pages[i] = alloc_page(GFP_KERNEL);
6472 		if (!spd.pages[i])
6473 			break;
6474 
6475 		rem = tracing_fill_pipe_page(rem, iter);
6476 
6477 		/* Copy the data into the page, so we can start over. */
6478 		ret = trace_seq_to_buffer(&iter->seq,
6479 					  page_address(spd.pages[i]),
6480 					  trace_seq_used(&iter->seq));
6481 		if (ret < 0) {
6482 			__free_page(spd.pages[i]);
6483 			break;
6484 		}
6485 		spd.partial[i].offset = 0;
6486 		spd.partial[i].len = trace_seq_used(&iter->seq);
6487 
6488 		trace_seq_init(&iter->seq);
6489 	}
6490 
6491 	trace_access_unlock(iter->cpu_file);
6492 	trace_event_read_unlock();
6493 	mutex_unlock(&iter->mutex);
6494 
6495 	spd.nr_pages = i;
6496 
6497 	if (i)
6498 		ret = splice_to_pipe(pipe, &spd);
6499 	else
6500 		ret = 0;
6501 out:
6502 	splice_shrink_spd(&spd);
6503 	return ret;
6504 
6505 out_err:
6506 	mutex_unlock(&iter->mutex);
6507 	goto out;
6508 }
6509 
6510 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6511 tracing_entries_read(struct file *filp, char __user *ubuf,
6512 		     size_t cnt, loff_t *ppos)
6513 {
6514 	struct inode *inode = file_inode(filp);
6515 	struct trace_array *tr = inode->i_private;
6516 	int cpu = tracing_get_cpu(inode);
6517 	char buf[64];
6518 	int r = 0;
6519 	ssize_t ret;
6520 
6521 	mutex_lock(&trace_types_lock);
6522 
6523 	if (cpu == RING_BUFFER_ALL_CPUS) {
6524 		int cpu, buf_size_same;
6525 		unsigned long size;
6526 
6527 		size = 0;
6528 		buf_size_same = 1;
6529 		/* check if all cpu sizes are same */
6530 		for_each_tracing_cpu(cpu) {
6531 			/* fill in the size from first enabled cpu */
6532 			if (size == 0)
6533 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6534 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6535 				buf_size_same = 0;
6536 				break;
6537 			}
6538 		}
6539 
6540 		if (buf_size_same) {
6541 			if (!ring_buffer_expanded)
6542 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6543 					    size >> 10,
6544 					    trace_buf_size >> 10);
6545 			else
6546 				r = sprintf(buf, "%lu\n", size >> 10);
6547 		} else
6548 			r = sprintf(buf, "X\n");
6549 	} else
6550 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6551 
6552 	mutex_unlock(&trace_types_lock);
6553 
6554 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555 	return ret;
6556 }
6557 
6558 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6559 tracing_entries_write(struct file *filp, const char __user *ubuf,
6560 		      size_t cnt, loff_t *ppos)
6561 {
6562 	struct inode *inode = file_inode(filp);
6563 	struct trace_array *tr = inode->i_private;
6564 	unsigned long val;
6565 	int ret;
6566 
6567 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6568 	if (ret)
6569 		return ret;
6570 
6571 	/* must have at least 1 entry */
6572 	if (!val)
6573 		return -EINVAL;
6574 
6575 	/* value is in KB */
6576 	val <<= 10;
6577 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6578 	if (ret < 0)
6579 		return ret;
6580 
6581 	*ppos += cnt;
6582 
6583 	return cnt;
6584 }
6585 
6586 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6587 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6588 				size_t cnt, loff_t *ppos)
6589 {
6590 	struct trace_array *tr = filp->private_data;
6591 	char buf[64];
6592 	int r, cpu;
6593 	unsigned long size = 0, expanded_size = 0;
6594 
6595 	mutex_lock(&trace_types_lock);
6596 	for_each_tracing_cpu(cpu) {
6597 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6598 		if (!ring_buffer_expanded)
6599 			expanded_size += trace_buf_size >> 10;
6600 	}
6601 	if (ring_buffer_expanded)
6602 		r = sprintf(buf, "%lu\n", size);
6603 	else
6604 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6605 	mutex_unlock(&trace_types_lock);
6606 
6607 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6608 }
6609 
6610 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6611 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6612 			  size_t cnt, loff_t *ppos)
6613 {
6614 	/*
6615 	 * There is no need to read what the user has written, this function
6616 	 * is just to make sure that there is no error when "echo" is used
6617 	 */
6618 
6619 	*ppos += cnt;
6620 
6621 	return cnt;
6622 }
6623 
6624 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6625 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6626 {
6627 	struct trace_array *tr = inode->i_private;
6628 
6629 	/* disable tracing ? */
6630 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6631 		tracer_tracing_off(tr);
6632 	/* resize the ring buffer to 0 */
6633 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6634 
6635 	trace_array_put(tr);
6636 
6637 	return 0;
6638 }
6639 
6640 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6641 tracing_mark_write(struct file *filp, const char __user *ubuf,
6642 					size_t cnt, loff_t *fpos)
6643 {
6644 	struct trace_array *tr = filp->private_data;
6645 	struct ring_buffer_event *event;
6646 	enum event_trigger_type tt = ETT_NONE;
6647 	struct trace_buffer *buffer;
6648 	struct print_entry *entry;
6649 	unsigned long irq_flags;
6650 	ssize_t written;
6651 	int size;
6652 	int len;
6653 
6654 /* Used in tracing_mark_raw_write() as well */
6655 #define FAULTED_STR "<faulted>"
6656 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6657 
6658 	if (tracing_disabled)
6659 		return -EINVAL;
6660 
6661 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6662 		return -EINVAL;
6663 
6664 	if (cnt > TRACE_BUF_SIZE)
6665 		cnt = TRACE_BUF_SIZE;
6666 
6667 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6668 
6669 	local_save_flags(irq_flags);
6670 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6671 
6672 	/* If less than "<faulted>", then make sure we can still add that */
6673 	if (cnt < FAULTED_SIZE)
6674 		size += FAULTED_SIZE - cnt;
6675 
6676 	buffer = tr->array_buffer.buffer;
6677 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6678 					    irq_flags, preempt_count());
6679 	if (unlikely(!event))
6680 		/* Ring buffer disabled, return as if not open for write */
6681 		return -EBADF;
6682 
6683 	entry = ring_buffer_event_data(event);
6684 	entry->ip = _THIS_IP_;
6685 
6686 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6687 	if (len) {
6688 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6689 		cnt = FAULTED_SIZE;
6690 		written = -EFAULT;
6691 	} else
6692 		written = cnt;
6693 
6694 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6695 		/* do not add \n before testing triggers, but add \0 */
6696 		entry->buf[cnt] = '\0';
6697 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6698 	}
6699 
6700 	if (entry->buf[cnt - 1] != '\n') {
6701 		entry->buf[cnt] = '\n';
6702 		entry->buf[cnt + 1] = '\0';
6703 	} else
6704 		entry->buf[cnt] = '\0';
6705 
6706 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6707 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6708 	__buffer_unlock_commit(buffer, event);
6709 
6710 	if (tt)
6711 		event_triggers_post_call(tr->trace_marker_file, tt);
6712 
6713 	if (written > 0)
6714 		*fpos += written;
6715 
6716 	return written;
6717 }
6718 
6719 /* Limit it for now to 3K (including tag) */
6720 #define RAW_DATA_MAX_SIZE (1024*3)
6721 
6722 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6723 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6724 					size_t cnt, loff_t *fpos)
6725 {
6726 	struct trace_array *tr = filp->private_data;
6727 	struct ring_buffer_event *event;
6728 	struct trace_buffer *buffer;
6729 	struct raw_data_entry *entry;
6730 	unsigned long irq_flags;
6731 	ssize_t written;
6732 	int size;
6733 	int len;
6734 
6735 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6736 
6737 	if (tracing_disabled)
6738 		return -EINVAL;
6739 
6740 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6741 		return -EINVAL;
6742 
6743 	/* The marker must at least have a tag id */
6744 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6745 		return -EINVAL;
6746 
6747 	if (cnt > TRACE_BUF_SIZE)
6748 		cnt = TRACE_BUF_SIZE;
6749 
6750 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6751 
6752 	local_save_flags(irq_flags);
6753 	size = sizeof(*entry) + cnt;
6754 	if (cnt < FAULT_SIZE_ID)
6755 		size += FAULT_SIZE_ID - cnt;
6756 
6757 	buffer = tr->array_buffer.buffer;
6758 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6759 					    irq_flags, preempt_count());
6760 	if (!event)
6761 		/* Ring buffer disabled, return as if not open for write */
6762 		return -EBADF;
6763 
6764 	entry = ring_buffer_event_data(event);
6765 
6766 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6767 	if (len) {
6768 		entry->id = -1;
6769 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6770 		written = -EFAULT;
6771 	} else
6772 		written = cnt;
6773 
6774 	__buffer_unlock_commit(buffer, event);
6775 
6776 	if (written > 0)
6777 		*fpos += written;
6778 
6779 	return written;
6780 }
6781 
tracing_clock_show(struct seq_file * m,void * v)6782 static int tracing_clock_show(struct seq_file *m, void *v)
6783 {
6784 	struct trace_array *tr = m->private;
6785 	int i;
6786 
6787 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6788 		seq_printf(m,
6789 			"%s%s%s%s", i ? " " : "",
6790 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6791 			i == tr->clock_id ? "]" : "");
6792 	seq_putc(m, '\n');
6793 
6794 	return 0;
6795 }
6796 
tracing_set_clock(struct trace_array * tr,const char * clockstr)6797 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6798 {
6799 	int i;
6800 
6801 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6802 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6803 			break;
6804 	}
6805 	if (i == ARRAY_SIZE(trace_clocks))
6806 		return -EINVAL;
6807 
6808 	mutex_lock(&trace_types_lock);
6809 
6810 	tr->clock_id = i;
6811 
6812 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6813 
6814 	/*
6815 	 * New clock may not be consistent with the previous clock.
6816 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6817 	 */
6818 	tracing_reset_online_cpus(&tr->array_buffer);
6819 
6820 #ifdef CONFIG_TRACER_MAX_TRACE
6821 	if (tr->max_buffer.buffer)
6822 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6823 	tracing_reset_online_cpus(&tr->max_buffer);
6824 #endif
6825 
6826 	mutex_unlock(&trace_types_lock);
6827 
6828 	return 0;
6829 }
6830 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6831 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6832 				   size_t cnt, loff_t *fpos)
6833 {
6834 	struct seq_file *m = filp->private_data;
6835 	struct trace_array *tr = m->private;
6836 	char buf[64];
6837 	const char *clockstr;
6838 	int ret;
6839 
6840 	if (cnt >= sizeof(buf))
6841 		return -EINVAL;
6842 
6843 	if (copy_from_user(buf, ubuf, cnt))
6844 		return -EFAULT;
6845 
6846 	buf[cnt] = 0;
6847 
6848 	clockstr = strstrip(buf);
6849 
6850 	ret = tracing_set_clock(tr, clockstr);
6851 	if (ret)
6852 		return ret;
6853 
6854 	*fpos += cnt;
6855 
6856 	return cnt;
6857 }
6858 
tracing_clock_open(struct inode * inode,struct file * file)6859 static int tracing_clock_open(struct inode *inode, struct file *file)
6860 {
6861 	struct trace_array *tr = inode->i_private;
6862 	int ret;
6863 
6864 	ret = tracing_check_open_get_tr(tr);
6865 	if (ret)
6866 		return ret;
6867 
6868 	ret = single_open(file, tracing_clock_show, inode->i_private);
6869 	if (ret < 0)
6870 		trace_array_put(tr);
6871 
6872 	return ret;
6873 }
6874 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6875 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6876 {
6877 	struct trace_array *tr = m->private;
6878 
6879 	mutex_lock(&trace_types_lock);
6880 
6881 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6882 		seq_puts(m, "delta [absolute]\n");
6883 	else
6884 		seq_puts(m, "[delta] absolute\n");
6885 
6886 	mutex_unlock(&trace_types_lock);
6887 
6888 	return 0;
6889 }
6890 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6891 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6892 {
6893 	struct trace_array *tr = inode->i_private;
6894 	int ret;
6895 
6896 	ret = tracing_check_open_get_tr(tr);
6897 	if (ret)
6898 		return ret;
6899 
6900 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6901 	if (ret < 0)
6902 		trace_array_put(tr);
6903 
6904 	return ret;
6905 }
6906 
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6907 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6908 {
6909 	int ret = 0;
6910 
6911 	mutex_lock(&trace_types_lock);
6912 
6913 	if (abs && tr->time_stamp_abs_ref++)
6914 		goto out;
6915 
6916 	if (!abs) {
6917 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6918 			ret = -EINVAL;
6919 			goto out;
6920 		}
6921 
6922 		if (--tr->time_stamp_abs_ref)
6923 			goto out;
6924 	}
6925 
6926 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6927 
6928 #ifdef CONFIG_TRACER_MAX_TRACE
6929 	if (tr->max_buffer.buffer)
6930 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6931 #endif
6932  out:
6933 	mutex_unlock(&trace_types_lock);
6934 
6935 	return ret;
6936 }
6937 
6938 struct ftrace_buffer_info {
6939 	struct trace_iterator	iter;
6940 	void			*spare;
6941 	unsigned int		spare_cpu;
6942 	unsigned int		read;
6943 };
6944 
6945 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6946 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6947 {
6948 	struct trace_array *tr = inode->i_private;
6949 	struct trace_iterator *iter;
6950 	struct seq_file *m;
6951 	int ret;
6952 
6953 	ret = tracing_check_open_get_tr(tr);
6954 	if (ret)
6955 		return ret;
6956 
6957 	if (file->f_mode & FMODE_READ) {
6958 		iter = __tracing_open(inode, file, true);
6959 		if (IS_ERR(iter))
6960 			ret = PTR_ERR(iter);
6961 	} else {
6962 		/* Writes still need the seq_file to hold the private data */
6963 		ret = -ENOMEM;
6964 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6965 		if (!m)
6966 			goto out;
6967 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6968 		if (!iter) {
6969 			kfree(m);
6970 			goto out;
6971 		}
6972 		ret = 0;
6973 
6974 		iter->tr = tr;
6975 		iter->array_buffer = &tr->max_buffer;
6976 		iter->cpu_file = tracing_get_cpu(inode);
6977 		m->private = iter;
6978 		file->private_data = m;
6979 	}
6980 out:
6981 	if (ret < 0)
6982 		trace_array_put(tr);
6983 
6984 	return ret;
6985 }
6986 
6987 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6988 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6989 		       loff_t *ppos)
6990 {
6991 	struct seq_file *m = filp->private_data;
6992 	struct trace_iterator *iter = m->private;
6993 	struct trace_array *tr = iter->tr;
6994 	unsigned long val;
6995 	int ret;
6996 
6997 	ret = tracing_update_buffers();
6998 	if (ret < 0)
6999 		return ret;
7000 
7001 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002 	if (ret)
7003 		return ret;
7004 
7005 	mutex_lock(&trace_types_lock);
7006 
7007 	if (tr->current_trace->use_max_tr) {
7008 		ret = -EBUSY;
7009 		goto out;
7010 	}
7011 
7012 	arch_spin_lock(&tr->max_lock);
7013 	if (tr->cond_snapshot)
7014 		ret = -EBUSY;
7015 	arch_spin_unlock(&tr->max_lock);
7016 	if (ret)
7017 		goto out;
7018 
7019 	switch (val) {
7020 	case 0:
7021 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7022 			ret = -EINVAL;
7023 			break;
7024 		}
7025 		if (tr->allocated_snapshot)
7026 			free_snapshot(tr);
7027 		break;
7028 	case 1:
7029 /* Only allow per-cpu swap if the ring buffer supports it */
7030 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7031 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7032 			ret = -EINVAL;
7033 			break;
7034 		}
7035 #endif
7036 		if (tr->allocated_snapshot)
7037 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7038 					&tr->array_buffer, iter->cpu_file);
7039 		else
7040 			ret = tracing_alloc_snapshot_instance(tr);
7041 		if (ret < 0)
7042 			break;
7043 		local_irq_disable();
7044 		/* Now, we're going to swap */
7045 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7046 			update_max_tr(tr, current, smp_processor_id(), NULL);
7047 		else
7048 			update_max_tr_single(tr, current, iter->cpu_file);
7049 		local_irq_enable();
7050 		break;
7051 	default:
7052 		if (tr->allocated_snapshot) {
7053 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7054 				tracing_reset_online_cpus(&tr->max_buffer);
7055 			else
7056 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7057 		}
7058 		break;
7059 	}
7060 
7061 	if (ret >= 0) {
7062 		*ppos += cnt;
7063 		ret = cnt;
7064 	}
7065 out:
7066 	mutex_unlock(&trace_types_lock);
7067 	return ret;
7068 }
7069 
tracing_snapshot_release(struct inode * inode,struct file * file)7070 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7071 {
7072 	struct seq_file *m = file->private_data;
7073 	int ret;
7074 
7075 	ret = tracing_release(inode, file);
7076 
7077 	if (file->f_mode & FMODE_READ)
7078 		return ret;
7079 
7080 	/* If write only, the seq_file is just a stub */
7081 	if (m)
7082 		kfree(m->private);
7083 	kfree(m);
7084 
7085 	return 0;
7086 }
7087 
7088 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7089 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7090 				    size_t count, loff_t *ppos);
7091 static int tracing_buffers_release(struct inode *inode, struct file *file);
7092 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7093 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7094 
snapshot_raw_open(struct inode * inode,struct file * filp)7095 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7096 {
7097 	struct ftrace_buffer_info *info;
7098 	int ret;
7099 
7100 	/* The following checks for tracefs lockdown */
7101 	ret = tracing_buffers_open(inode, filp);
7102 	if (ret < 0)
7103 		return ret;
7104 
7105 	info = filp->private_data;
7106 
7107 	if (info->iter.trace->use_max_tr) {
7108 		tracing_buffers_release(inode, filp);
7109 		return -EBUSY;
7110 	}
7111 
7112 	info->iter.snapshot = true;
7113 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7114 
7115 	return ret;
7116 }
7117 
7118 #endif /* CONFIG_TRACER_SNAPSHOT */
7119 
7120 
7121 static const struct file_operations tracing_thresh_fops = {
7122 	.open		= tracing_open_generic,
7123 	.read		= tracing_thresh_read,
7124 	.write		= tracing_thresh_write,
7125 	.llseek		= generic_file_llseek,
7126 };
7127 
7128 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7129 static const struct file_operations tracing_max_lat_fops = {
7130 	.open		= tracing_open_generic,
7131 	.read		= tracing_max_lat_read,
7132 	.write		= tracing_max_lat_write,
7133 	.llseek		= generic_file_llseek,
7134 };
7135 #endif
7136 
7137 static const struct file_operations set_tracer_fops = {
7138 	.open		= tracing_open_generic,
7139 	.read		= tracing_set_trace_read,
7140 	.write		= tracing_set_trace_write,
7141 	.llseek		= generic_file_llseek,
7142 };
7143 
7144 static const struct file_operations tracing_pipe_fops = {
7145 	.open		= tracing_open_pipe,
7146 	.poll		= tracing_poll_pipe,
7147 	.read		= tracing_read_pipe,
7148 	.splice_read	= tracing_splice_read_pipe,
7149 	.release	= tracing_release_pipe,
7150 	.llseek		= no_llseek,
7151 };
7152 
7153 static const struct file_operations tracing_entries_fops = {
7154 	.open		= tracing_open_generic_tr,
7155 	.read		= tracing_entries_read,
7156 	.write		= tracing_entries_write,
7157 	.llseek		= generic_file_llseek,
7158 	.release	= tracing_release_generic_tr,
7159 };
7160 
7161 static const struct file_operations tracing_total_entries_fops = {
7162 	.open		= tracing_open_generic_tr,
7163 	.read		= tracing_total_entries_read,
7164 	.llseek		= generic_file_llseek,
7165 	.release	= tracing_release_generic_tr,
7166 };
7167 
7168 static const struct file_operations tracing_free_buffer_fops = {
7169 	.open		= tracing_open_generic_tr,
7170 	.write		= tracing_free_buffer_write,
7171 	.release	= tracing_free_buffer_release,
7172 };
7173 
7174 static const struct file_operations tracing_mark_fops = {
7175 	.open		= tracing_open_generic_tr,
7176 	.write		= tracing_mark_write,
7177 	.llseek		= generic_file_llseek,
7178 	.release	= tracing_release_generic_tr,
7179 };
7180 
7181 static const struct file_operations tracing_mark_raw_fops = {
7182 	.open		= tracing_open_generic_tr,
7183 	.write		= tracing_mark_raw_write,
7184 	.llseek		= generic_file_llseek,
7185 	.release	= tracing_release_generic_tr,
7186 };
7187 
7188 static const struct file_operations trace_clock_fops = {
7189 	.open		= tracing_clock_open,
7190 	.read		= seq_read,
7191 	.llseek		= seq_lseek,
7192 	.release	= tracing_single_release_tr,
7193 	.write		= tracing_clock_write,
7194 };
7195 
7196 static const struct file_operations trace_time_stamp_mode_fops = {
7197 	.open		= tracing_time_stamp_mode_open,
7198 	.read		= seq_read,
7199 	.llseek		= seq_lseek,
7200 	.release	= tracing_single_release_tr,
7201 };
7202 
7203 #ifdef CONFIG_TRACER_SNAPSHOT
7204 static const struct file_operations snapshot_fops = {
7205 	.open		= tracing_snapshot_open,
7206 	.read		= seq_read,
7207 	.write		= tracing_snapshot_write,
7208 	.llseek		= tracing_lseek,
7209 	.release	= tracing_snapshot_release,
7210 };
7211 
7212 static const struct file_operations snapshot_raw_fops = {
7213 	.open		= snapshot_raw_open,
7214 	.read		= tracing_buffers_read,
7215 	.release	= tracing_buffers_release,
7216 	.splice_read	= tracing_buffers_splice_read,
7217 	.llseek		= no_llseek,
7218 };
7219 
7220 #endif /* CONFIG_TRACER_SNAPSHOT */
7221 
7222 #define TRACING_LOG_ERRS_MAX	8
7223 #define TRACING_LOG_LOC_MAX	128
7224 
7225 #define CMD_PREFIX "  Command: "
7226 
7227 struct err_info {
7228 	const char	**errs;	/* ptr to loc-specific array of err strings */
7229 	u8		type;	/* index into errs -> specific err string */
7230 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7231 	u64		ts;
7232 };
7233 
7234 struct tracing_log_err {
7235 	struct list_head	list;
7236 	struct err_info		info;
7237 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7238 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7239 };
7240 
7241 static DEFINE_MUTEX(tracing_err_log_lock);
7242 
get_tracing_log_err(struct trace_array * tr)7243 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7244 {
7245 	struct tracing_log_err *err;
7246 
7247 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7248 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7249 		if (!err)
7250 			err = ERR_PTR(-ENOMEM);
7251 		tr->n_err_log_entries++;
7252 
7253 		return err;
7254 	}
7255 
7256 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7257 	list_del(&err->list);
7258 
7259 	return err;
7260 }
7261 
7262 /**
7263  * err_pos - find the position of a string within a command for error careting
7264  * @cmd: The tracing command that caused the error
7265  * @str: The string to position the caret at within @cmd
7266  *
7267  * Finds the position of the first occurence of @str within @cmd.  The
7268  * return value can be passed to tracing_log_err() for caret placement
7269  * within @cmd.
7270  *
7271  * Returns the index within @cmd of the first occurence of @str or 0
7272  * if @str was not found.
7273  */
err_pos(char * cmd,const char * str)7274 unsigned int err_pos(char *cmd, const char *str)
7275 {
7276 	char *found;
7277 
7278 	if (WARN_ON(!strlen(cmd)))
7279 		return 0;
7280 
7281 	found = strstr(cmd, str);
7282 	if (found)
7283 		return found - cmd;
7284 
7285 	return 0;
7286 }
7287 
7288 /**
7289  * tracing_log_err - write an error to the tracing error log
7290  * @tr: The associated trace array for the error (NULL for top level array)
7291  * @loc: A string describing where the error occurred
7292  * @cmd: The tracing command that caused the error
7293  * @errs: The array of loc-specific static error strings
7294  * @type: The index into errs[], which produces the specific static err string
7295  * @pos: The position the caret should be placed in the cmd
7296  *
7297  * Writes an error into tracing/error_log of the form:
7298  *
7299  * <loc>: error: <text>
7300  *   Command: <cmd>
7301  *              ^
7302  *
7303  * tracing/error_log is a small log file containing the last
7304  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7305  * unless there has been a tracing error, and the error log can be
7306  * cleared and have its memory freed by writing the empty string in
7307  * truncation mode to it i.e. echo > tracing/error_log.
7308  *
7309  * NOTE: the @errs array along with the @type param are used to
7310  * produce a static error string - this string is not copied and saved
7311  * when the error is logged - only a pointer to it is saved.  See
7312  * existing callers for examples of how static strings are typically
7313  * defined for use with tracing_log_err().
7314  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7315 void tracing_log_err(struct trace_array *tr,
7316 		     const char *loc, const char *cmd,
7317 		     const char **errs, u8 type, u8 pos)
7318 {
7319 	struct tracing_log_err *err;
7320 
7321 	if (!tr)
7322 		tr = &global_trace;
7323 
7324 	mutex_lock(&tracing_err_log_lock);
7325 	err = get_tracing_log_err(tr);
7326 	if (PTR_ERR(err) == -ENOMEM) {
7327 		mutex_unlock(&tracing_err_log_lock);
7328 		return;
7329 	}
7330 
7331 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7332 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7333 
7334 	err->info.errs = errs;
7335 	err->info.type = type;
7336 	err->info.pos = pos;
7337 	err->info.ts = local_clock();
7338 
7339 	list_add_tail(&err->list, &tr->err_log);
7340 	mutex_unlock(&tracing_err_log_lock);
7341 }
7342 
clear_tracing_err_log(struct trace_array * tr)7343 static void clear_tracing_err_log(struct trace_array *tr)
7344 {
7345 	struct tracing_log_err *err, *next;
7346 
7347 	mutex_lock(&tracing_err_log_lock);
7348 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7349 		list_del(&err->list);
7350 		kfree(err);
7351 	}
7352 
7353 	tr->n_err_log_entries = 0;
7354 	mutex_unlock(&tracing_err_log_lock);
7355 }
7356 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7357 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7358 {
7359 	struct trace_array *tr = m->private;
7360 
7361 	mutex_lock(&tracing_err_log_lock);
7362 
7363 	return seq_list_start(&tr->err_log, *pos);
7364 }
7365 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7366 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7367 {
7368 	struct trace_array *tr = m->private;
7369 
7370 	return seq_list_next(v, &tr->err_log, pos);
7371 }
7372 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7373 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7374 {
7375 	mutex_unlock(&tracing_err_log_lock);
7376 }
7377 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7378 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7379 {
7380 	u8 i;
7381 
7382 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7383 		seq_putc(m, ' ');
7384 	for (i = 0; i < pos; i++)
7385 		seq_putc(m, ' ');
7386 	seq_puts(m, "^\n");
7387 }
7388 
tracing_err_log_seq_show(struct seq_file * m,void * v)7389 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7390 {
7391 	struct tracing_log_err *err = v;
7392 
7393 	if (err) {
7394 		const char *err_text = err->info.errs[err->info.type];
7395 		u64 sec = err->info.ts;
7396 		u32 nsec;
7397 
7398 		nsec = do_div(sec, NSEC_PER_SEC);
7399 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7400 			   err->loc, err_text);
7401 		seq_printf(m, "%s", err->cmd);
7402 		tracing_err_log_show_pos(m, err->info.pos);
7403 	}
7404 
7405 	return 0;
7406 }
7407 
7408 static const struct seq_operations tracing_err_log_seq_ops = {
7409 	.start  = tracing_err_log_seq_start,
7410 	.next   = tracing_err_log_seq_next,
7411 	.stop   = tracing_err_log_seq_stop,
7412 	.show   = tracing_err_log_seq_show
7413 };
7414 
tracing_err_log_open(struct inode * inode,struct file * file)7415 static int tracing_err_log_open(struct inode *inode, struct file *file)
7416 {
7417 	struct trace_array *tr = inode->i_private;
7418 	int ret = 0;
7419 
7420 	ret = tracing_check_open_get_tr(tr);
7421 	if (ret)
7422 		return ret;
7423 
7424 	/* If this file was opened for write, then erase contents */
7425 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7426 		clear_tracing_err_log(tr);
7427 
7428 	if (file->f_mode & FMODE_READ) {
7429 		ret = seq_open(file, &tracing_err_log_seq_ops);
7430 		if (!ret) {
7431 			struct seq_file *m = file->private_data;
7432 			m->private = tr;
7433 		} else {
7434 			trace_array_put(tr);
7435 		}
7436 	}
7437 	return ret;
7438 }
7439 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7440 static ssize_t tracing_err_log_write(struct file *file,
7441 				     const char __user *buffer,
7442 				     size_t count, loff_t *ppos)
7443 {
7444 	return count;
7445 }
7446 
tracing_err_log_release(struct inode * inode,struct file * file)7447 static int tracing_err_log_release(struct inode *inode, struct file *file)
7448 {
7449 	struct trace_array *tr = inode->i_private;
7450 
7451 	trace_array_put(tr);
7452 
7453 	if (file->f_mode & FMODE_READ)
7454 		seq_release(inode, file);
7455 
7456 	return 0;
7457 }
7458 
7459 static const struct file_operations tracing_err_log_fops = {
7460 	.open           = tracing_err_log_open,
7461 	.write		= tracing_err_log_write,
7462 	.read           = seq_read,
7463 	.llseek         = seq_lseek,
7464 	.release        = tracing_err_log_release,
7465 };
7466 
tracing_buffers_open(struct inode * inode,struct file * filp)7467 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7468 {
7469 	struct trace_array *tr = inode->i_private;
7470 	struct ftrace_buffer_info *info;
7471 	int ret;
7472 
7473 	ret = tracing_check_open_get_tr(tr);
7474 	if (ret)
7475 		return ret;
7476 
7477 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7478 	if (!info) {
7479 		trace_array_put(tr);
7480 		return -ENOMEM;
7481 	}
7482 
7483 	mutex_lock(&trace_types_lock);
7484 
7485 	info->iter.tr		= tr;
7486 	info->iter.cpu_file	= tracing_get_cpu(inode);
7487 	info->iter.trace	= tr->current_trace;
7488 	info->iter.array_buffer = &tr->array_buffer;
7489 	info->spare		= NULL;
7490 	/* Force reading ring buffer for first read */
7491 	info->read		= (unsigned int)-1;
7492 
7493 	filp->private_data = info;
7494 
7495 	tr->trace_ref++;
7496 
7497 	mutex_unlock(&trace_types_lock);
7498 
7499 	ret = nonseekable_open(inode, filp);
7500 	if (ret < 0)
7501 		trace_array_put(tr);
7502 
7503 	return ret;
7504 }
7505 
7506 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7507 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7508 {
7509 	struct ftrace_buffer_info *info = filp->private_data;
7510 	struct trace_iterator *iter = &info->iter;
7511 
7512 	return trace_poll(iter, filp, poll_table);
7513 }
7514 
7515 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7516 tracing_buffers_read(struct file *filp, char __user *ubuf,
7517 		     size_t count, loff_t *ppos)
7518 {
7519 	struct ftrace_buffer_info *info = filp->private_data;
7520 	struct trace_iterator *iter = &info->iter;
7521 	ssize_t ret = 0;
7522 	ssize_t size;
7523 
7524 	if (!count)
7525 		return 0;
7526 
7527 #ifdef CONFIG_TRACER_MAX_TRACE
7528 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7529 		return -EBUSY;
7530 #endif
7531 
7532 	if (!info->spare) {
7533 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7534 							  iter->cpu_file);
7535 		if (IS_ERR(info->spare)) {
7536 			ret = PTR_ERR(info->spare);
7537 			info->spare = NULL;
7538 		} else {
7539 			info->spare_cpu = iter->cpu_file;
7540 		}
7541 	}
7542 	if (!info->spare)
7543 		return ret;
7544 
7545 	/* Do we have previous read data to read? */
7546 	if (info->read < PAGE_SIZE)
7547 		goto read;
7548 
7549  again:
7550 	trace_access_lock(iter->cpu_file);
7551 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7552 				    &info->spare,
7553 				    count,
7554 				    iter->cpu_file, 0);
7555 	trace_access_unlock(iter->cpu_file);
7556 
7557 	if (ret < 0) {
7558 		if (trace_empty(iter)) {
7559 			if ((filp->f_flags & O_NONBLOCK))
7560 				return -EAGAIN;
7561 
7562 			ret = wait_on_pipe(iter, 0);
7563 			if (ret)
7564 				return ret;
7565 
7566 			goto again;
7567 		}
7568 		return 0;
7569 	}
7570 
7571 	info->read = 0;
7572  read:
7573 	size = PAGE_SIZE - info->read;
7574 	if (size > count)
7575 		size = count;
7576 
7577 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7578 	if (ret == size)
7579 		return -EFAULT;
7580 
7581 	size -= ret;
7582 
7583 	*ppos += size;
7584 	info->read += size;
7585 
7586 	return size;
7587 }
7588 
tracing_buffers_release(struct inode * inode,struct file * file)7589 static int tracing_buffers_release(struct inode *inode, struct file *file)
7590 {
7591 	struct ftrace_buffer_info *info = file->private_data;
7592 	struct trace_iterator *iter = &info->iter;
7593 
7594 	mutex_lock(&trace_types_lock);
7595 
7596 	iter->tr->trace_ref--;
7597 
7598 	__trace_array_put(iter->tr);
7599 
7600 	if (info->spare)
7601 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7602 					   info->spare_cpu, info->spare);
7603 	kvfree(info);
7604 
7605 	mutex_unlock(&trace_types_lock);
7606 
7607 	return 0;
7608 }
7609 
7610 struct buffer_ref {
7611 	struct trace_buffer	*buffer;
7612 	void			*page;
7613 	int			cpu;
7614 	refcount_t		refcount;
7615 };
7616 
buffer_ref_release(struct buffer_ref * ref)7617 static void buffer_ref_release(struct buffer_ref *ref)
7618 {
7619 	if (!refcount_dec_and_test(&ref->refcount))
7620 		return;
7621 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7622 	kfree(ref);
7623 }
7624 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7625 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7626 				    struct pipe_buffer *buf)
7627 {
7628 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7629 
7630 	buffer_ref_release(ref);
7631 	buf->private = 0;
7632 }
7633 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7634 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7635 				struct pipe_buffer *buf)
7636 {
7637 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7638 
7639 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7640 		return false;
7641 
7642 	refcount_inc(&ref->refcount);
7643 	return true;
7644 }
7645 
7646 /* Pipe buffer operations for a buffer. */
7647 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7648 	.release		= buffer_pipe_buf_release,
7649 	.get			= buffer_pipe_buf_get,
7650 };
7651 
7652 /*
7653  * Callback from splice_to_pipe(), if we need to release some pages
7654  * at the end of the spd in case we error'ed out in filling the pipe.
7655  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7656 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7657 {
7658 	struct buffer_ref *ref =
7659 		(struct buffer_ref *)spd->partial[i].private;
7660 
7661 	buffer_ref_release(ref);
7662 	spd->partial[i].private = 0;
7663 }
7664 
7665 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7666 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7667 			    struct pipe_inode_info *pipe, size_t len,
7668 			    unsigned int flags)
7669 {
7670 	struct ftrace_buffer_info *info = file->private_data;
7671 	struct trace_iterator *iter = &info->iter;
7672 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7673 	struct page *pages_def[PIPE_DEF_BUFFERS];
7674 	struct splice_pipe_desc spd = {
7675 		.pages		= pages_def,
7676 		.partial	= partial_def,
7677 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7678 		.ops		= &buffer_pipe_buf_ops,
7679 		.spd_release	= buffer_spd_release,
7680 	};
7681 	struct buffer_ref *ref;
7682 	int entries, i;
7683 	ssize_t ret = 0;
7684 
7685 #ifdef CONFIG_TRACER_MAX_TRACE
7686 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7687 		return -EBUSY;
7688 #endif
7689 
7690 	if (*ppos & (PAGE_SIZE - 1))
7691 		return -EINVAL;
7692 
7693 	if (len & (PAGE_SIZE - 1)) {
7694 		if (len < PAGE_SIZE)
7695 			return -EINVAL;
7696 		len &= PAGE_MASK;
7697 	}
7698 
7699 	if (splice_grow_spd(pipe, &spd))
7700 		return -ENOMEM;
7701 
7702  again:
7703 	trace_access_lock(iter->cpu_file);
7704 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7705 
7706 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7707 		struct page *page;
7708 		int r;
7709 
7710 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7711 		if (!ref) {
7712 			ret = -ENOMEM;
7713 			break;
7714 		}
7715 
7716 		refcount_set(&ref->refcount, 1);
7717 		ref->buffer = iter->array_buffer->buffer;
7718 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7719 		if (IS_ERR(ref->page)) {
7720 			ret = PTR_ERR(ref->page);
7721 			ref->page = NULL;
7722 			kfree(ref);
7723 			break;
7724 		}
7725 		ref->cpu = iter->cpu_file;
7726 
7727 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7728 					  len, iter->cpu_file, 1);
7729 		if (r < 0) {
7730 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7731 						   ref->page);
7732 			kfree(ref);
7733 			break;
7734 		}
7735 
7736 		page = virt_to_page(ref->page);
7737 
7738 		spd.pages[i] = page;
7739 		spd.partial[i].len = PAGE_SIZE;
7740 		spd.partial[i].offset = 0;
7741 		spd.partial[i].private = (unsigned long)ref;
7742 		spd.nr_pages++;
7743 		*ppos += PAGE_SIZE;
7744 
7745 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7746 	}
7747 
7748 	trace_access_unlock(iter->cpu_file);
7749 	spd.nr_pages = i;
7750 
7751 	/* did we read anything? */
7752 	if (!spd.nr_pages) {
7753 		if (ret)
7754 			goto out;
7755 
7756 		ret = -EAGAIN;
7757 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7758 			goto out;
7759 
7760 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7761 		if (ret)
7762 			goto out;
7763 
7764 		goto again;
7765 	}
7766 
7767 	ret = splice_to_pipe(pipe, &spd);
7768 out:
7769 	splice_shrink_spd(&spd);
7770 
7771 	return ret;
7772 }
7773 
7774 static const struct file_operations tracing_buffers_fops = {
7775 	.open		= tracing_buffers_open,
7776 	.read		= tracing_buffers_read,
7777 	.poll		= tracing_buffers_poll,
7778 	.release	= tracing_buffers_release,
7779 	.splice_read	= tracing_buffers_splice_read,
7780 	.llseek		= no_llseek,
7781 };
7782 
7783 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7784 tracing_stats_read(struct file *filp, char __user *ubuf,
7785 		   size_t count, loff_t *ppos)
7786 {
7787 	struct inode *inode = file_inode(filp);
7788 	struct trace_array *tr = inode->i_private;
7789 	struct array_buffer *trace_buf = &tr->array_buffer;
7790 	int cpu = tracing_get_cpu(inode);
7791 	struct trace_seq *s;
7792 	unsigned long cnt;
7793 	unsigned long long t;
7794 	unsigned long usec_rem;
7795 
7796 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7797 	if (!s)
7798 		return -ENOMEM;
7799 
7800 	trace_seq_init(s);
7801 
7802 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7803 	trace_seq_printf(s, "entries: %ld\n", cnt);
7804 
7805 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7806 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7807 
7808 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7809 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7810 
7811 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7812 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7813 
7814 	if (trace_clocks[tr->clock_id].in_ns) {
7815 		/* local or global for trace_clock */
7816 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7817 		usec_rem = do_div(t, USEC_PER_SEC);
7818 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7819 								t, usec_rem);
7820 
7821 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7822 		usec_rem = do_div(t, USEC_PER_SEC);
7823 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7824 	} else {
7825 		/* counter or tsc mode for trace_clock */
7826 		trace_seq_printf(s, "oldest event ts: %llu\n",
7827 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7828 
7829 		trace_seq_printf(s, "now ts: %llu\n",
7830 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7831 	}
7832 
7833 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7834 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7835 
7836 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7837 	trace_seq_printf(s, "read events: %ld\n", cnt);
7838 
7839 	count = simple_read_from_buffer(ubuf, count, ppos,
7840 					s->buffer, trace_seq_used(s));
7841 
7842 	kfree(s);
7843 
7844 	return count;
7845 }
7846 
7847 static const struct file_operations tracing_stats_fops = {
7848 	.open		= tracing_open_generic_tr,
7849 	.read		= tracing_stats_read,
7850 	.llseek		= generic_file_llseek,
7851 	.release	= tracing_release_generic_tr,
7852 };
7853 
7854 #ifdef CONFIG_DYNAMIC_FTRACE
7855 
7856 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7857 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7858 		  size_t cnt, loff_t *ppos)
7859 {
7860 	ssize_t ret;
7861 	char *buf;
7862 	int r;
7863 
7864 	/* 256 should be plenty to hold the amount needed */
7865 	buf = kmalloc(256, GFP_KERNEL);
7866 	if (!buf)
7867 		return -ENOMEM;
7868 
7869 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7870 		      ftrace_update_tot_cnt,
7871 		      ftrace_number_of_pages,
7872 		      ftrace_number_of_groups);
7873 
7874 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7875 	kfree(buf);
7876 	return ret;
7877 }
7878 
7879 static const struct file_operations tracing_dyn_info_fops = {
7880 	.open		= tracing_open_generic,
7881 	.read		= tracing_read_dyn_info,
7882 	.llseek		= generic_file_llseek,
7883 };
7884 #endif /* CONFIG_DYNAMIC_FTRACE */
7885 
7886 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7887 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7888 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7889 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7890 		void *data)
7891 {
7892 	tracing_snapshot_instance(tr);
7893 }
7894 
7895 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7896 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7897 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7898 		      void *data)
7899 {
7900 	struct ftrace_func_mapper *mapper = data;
7901 	long *count = NULL;
7902 
7903 	if (mapper)
7904 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7905 
7906 	if (count) {
7907 
7908 		if (*count <= 0)
7909 			return;
7910 
7911 		(*count)--;
7912 	}
7913 
7914 	tracing_snapshot_instance(tr);
7915 }
7916 
7917 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7918 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7919 		      struct ftrace_probe_ops *ops, void *data)
7920 {
7921 	struct ftrace_func_mapper *mapper = data;
7922 	long *count = NULL;
7923 
7924 	seq_printf(m, "%ps:", (void *)ip);
7925 
7926 	seq_puts(m, "snapshot");
7927 
7928 	if (mapper)
7929 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7930 
7931 	if (count)
7932 		seq_printf(m, ":count=%ld\n", *count);
7933 	else
7934 		seq_puts(m, ":unlimited\n");
7935 
7936 	return 0;
7937 }
7938 
7939 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7940 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7941 		     unsigned long ip, void *init_data, void **data)
7942 {
7943 	struct ftrace_func_mapper *mapper = *data;
7944 
7945 	if (!mapper) {
7946 		mapper = allocate_ftrace_func_mapper();
7947 		if (!mapper)
7948 			return -ENOMEM;
7949 		*data = mapper;
7950 	}
7951 
7952 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7953 }
7954 
7955 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7956 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7957 		     unsigned long ip, void *data)
7958 {
7959 	struct ftrace_func_mapper *mapper = data;
7960 
7961 	if (!ip) {
7962 		if (!mapper)
7963 			return;
7964 		free_ftrace_func_mapper(mapper, NULL);
7965 		return;
7966 	}
7967 
7968 	ftrace_func_mapper_remove_ip(mapper, ip);
7969 }
7970 
7971 static struct ftrace_probe_ops snapshot_probe_ops = {
7972 	.func			= ftrace_snapshot,
7973 	.print			= ftrace_snapshot_print,
7974 };
7975 
7976 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7977 	.func			= ftrace_count_snapshot,
7978 	.print			= ftrace_snapshot_print,
7979 	.init			= ftrace_snapshot_init,
7980 	.free			= ftrace_snapshot_free,
7981 };
7982 
7983 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7984 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7985 			       char *glob, char *cmd, char *param, int enable)
7986 {
7987 	struct ftrace_probe_ops *ops;
7988 	void *count = (void *)-1;
7989 	char *number;
7990 	int ret;
7991 
7992 	if (!tr)
7993 		return -ENODEV;
7994 
7995 	/* hash funcs only work with set_ftrace_filter */
7996 	if (!enable)
7997 		return -EINVAL;
7998 
7999 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8000 
8001 	if (glob[0] == '!')
8002 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8003 
8004 	if (!param)
8005 		goto out_reg;
8006 
8007 	number = strsep(&param, ":");
8008 
8009 	if (!strlen(number))
8010 		goto out_reg;
8011 
8012 	/*
8013 	 * We use the callback data field (which is a pointer)
8014 	 * as our counter.
8015 	 */
8016 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8017 	if (ret)
8018 		return ret;
8019 
8020  out_reg:
8021 	ret = tracing_alloc_snapshot_instance(tr);
8022 	if (ret < 0)
8023 		goto out;
8024 
8025 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8026 
8027  out:
8028 	return ret < 0 ? ret : 0;
8029 }
8030 
8031 static struct ftrace_func_command ftrace_snapshot_cmd = {
8032 	.name			= "snapshot",
8033 	.func			= ftrace_trace_snapshot_callback,
8034 };
8035 
register_snapshot_cmd(void)8036 static __init int register_snapshot_cmd(void)
8037 {
8038 	return register_ftrace_command(&ftrace_snapshot_cmd);
8039 }
8040 #else
register_snapshot_cmd(void)8041 static inline __init int register_snapshot_cmd(void) { return 0; }
8042 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8043 
tracing_get_dentry(struct trace_array * tr)8044 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8045 {
8046 	if (WARN_ON(!tr->dir))
8047 		return ERR_PTR(-ENODEV);
8048 
8049 	/* Top directory uses NULL as the parent */
8050 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8051 		return NULL;
8052 
8053 	/* All sub buffers have a descriptor */
8054 	return tr->dir;
8055 }
8056 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8057 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8058 {
8059 	struct dentry *d_tracer;
8060 
8061 	if (tr->percpu_dir)
8062 		return tr->percpu_dir;
8063 
8064 	d_tracer = tracing_get_dentry(tr);
8065 	if (IS_ERR(d_tracer))
8066 		return NULL;
8067 
8068 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8069 
8070 	MEM_FAIL(!tr->percpu_dir,
8071 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8072 
8073 	return tr->percpu_dir;
8074 }
8075 
8076 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8077 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8078 		      void *data, long cpu, const struct file_operations *fops)
8079 {
8080 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8081 
8082 	if (ret) /* See tracing_get_cpu() */
8083 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8084 	return ret;
8085 }
8086 
8087 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8088 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8089 {
8090 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8091 	struct dentry *d_cpu;
8092 	char cpu_dir[30]; /* 30 characters should be more than enough */
8093 
8094 	if (!d_percpu)
8095 		return;
8096 
8097 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8098 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8099 	if (!d_cpu) {
8100 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8101 		return;
8102 	}
8103 
8104 	/* per cpu trace_pipe */
8105 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8106 				tr, cpu, &tracing_pipe_fops);
8107 
8108 	/* per cpu trace */
8109 	trace_create_cpu_file("trace", 0644, d_cpu,
8110 				tr, cpu, &tracing_fops);
8111 
8112 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8113 				tr, cpu, &tracing_buffers_fops);
8114 
8115 	trace_create_cpu_file("stats", 0444, d_cpu,
8116 				tr, cpu, &tracing_stats_fops);
8117 
8118 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8119 				tr, cpu, &tracing_entries_fops);
8120 
8121 #ifdef CONFIG_TRACER_SNAPSHOT
8122 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8123 				tr, cpu, &snapshot_fops);
8124 
8125 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8126 				tr, cpu, &snapshot_raw_fops);
8127 #endif
8128 }
8129 
8130 #ifdef CONFIG_FTRACE_SELFTEST
8131 /* Let selftest have access to static functions in this file */
8132 #include "trace_selftest.c"
8133 #endif
8134 
8135 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8136 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8137 			loff_t *ppos)
8138 {
8139 	struct trace_option_dentry *topt = filp->private_data;
8140 	char *buf;
8141 
8142 	if (topt->flags->val & topt->opt->bit)
8143 		buf = "1\n";
8144 	else
8145 		buf = "0\n";
8146 
8147 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8148 }
8149 
8150 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8151 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8152 			 loff_t *ppos)
8153 {
8154 	struct trace_option_dentry *topt = filp->private_data;
8155 	unsigned long val;
8156 	int ret;
8157 
8158 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8159 	if (ret)
8160 		return ret;
8161 
8162 	if (val != 0 && val != 1)
8163 		return -EINVAL;
8164 
8165 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8166 		mutex_lock(&trace_types_lock);
8167 		ret = __set_tracer_option(topt->tr, topt->flags,
8168 					  topt->opt, !val);
8169 		mutex_unlock(&trace_types_lock);
8170 		if (ret)
8171 			return ret;
8172 	}
8173 
8174 	*ppos += cnt;
8175 
8176 	return cnt;
8177 }
8178 
8179 
8180 static const struct file_operations trace_options_fops = {
8181 	.open = tracing_open_generic,
8182 	.read = trace_options_read,
8183 	.write = trace_options_write,
8184 	.llseek	= generic_file_llseek,
8185 };
8186 
8187 /*
8188  * In order to pass in both the trace_array descriptor as well as the index
8189  * to the flag that the trace option file represents, the trace_array
8190  * has a character array of trace_flags_index[], which holds the index
8191  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8192  * The address of this character array is passed to the flag option file
8193  * read/write callbacks.
8194  *
8195  * In order to extract both the index and the trace_array descriptor,
8196  * get_tr_index() uses the following algorithm.
8197  *
8198  *   idx = *ptr;
8199  *
8200  * As the pointer itself contains the address of the index (remember
8201  * index[1] == 1).
8202  *
8203  * Then to get the trace_array descriptor, by subtracting that index
8204  * from the ptr, we get to the start of the index itself.
8205  *
8206  *   ptr - idx == &index[0]
8207  *
8208  * Then a simple container_of() from that pointer gets us to the
8209  * trace_array descriptor.
8210  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8211 static void get_tr_index(void *data, struct trace_array **ptr,
8212 			 unsigned int *pindex)
8213 {
8214 	*pindex = *(unsigned char *)data;
8215 
8216 	*ptr = container_of(data - *pindex, struct trace_array,
8217 			    trace_flags_index);
8218 }
8219 
8220 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8221 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8222 			loff_t *ppos)
8223 {
8224 	void *tr_index = filp->private_data;
8225 	struct trace_array *tr;
8226 	unsigned int index;
8227 	char *buf;
8228 
8229 	get_tr_index(tr_index, &tr, &index);
8230 
8231 	if (tr->trace_flags & (1 << index))
8232 		buf = "1\n";
8233 	else
8234 		buf = "0\n";
8235 
8236 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8237 }
8238 
8239 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8240 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8241 			 loff_t *ppos)
8242 {
8243 	void *tr_index = filp->private_data;
8244 	struct trace_array *tr;
8245 	unsigned int index;
8246 	unsigned long val;
8247 	int ret;
8248 
8249 	get_tr_index(tr_index, &tr, &index);
8250 
8251 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8252 	if (ret)
8253 		return ret;
8254 
8255 	if (val != 0 && val != 1)
8256 		return -EINVAL;
8257 
8258 	mutex_lock(&event_mutex);
8259 	mutex_lock(&trace_types_lock);
8260 	ret = set_tracer_flag(tr, 1 << index, val);
8261 	mutex_unlock(&trace_types_lock);
8262 	mutex_unlock(&event_mutex);
8263 
8264 	if (ret < 0)
8265 		return ret;
8266 
8267 	*ppos += cnt;
8268 
8269 	return cnt;
8270 }
8271 
8272 static const struct file_operations trace_options_core_fops = {
8273 	.open = tracing_open_generic,
8274 	.read = trace_options_core_read,
8275 	.write = trace_options_core_write,
8276 	.llseek = generic_file_llseek,
8277 };
8278 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8279 struct dentry *trace_create_file(const char *name,
8280 				 umode_t mode,
8281 				 struct dentry *parent,
8282 				 void *data,
8283 				 const struct file_operations *fops)
8284 {
8285 	struct dentry *ret;
8286 
8287 	ret = tracefs_create_file(name, mode, parent, data, fops);
8288 	if (!ret)
8289 		pr_warn("Could not create tracefs '%s' entry\n", name);
8290 
8291 	return ret;
8292 }
8293 
8294 
trace_options_init_dentry(struct trace_array * tr)8295 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8296 {
8297 	struct dentry *d_tracer;
8298 
8299 	if (tr->options)
8300 		return tr->options;
8301 
8302 	d_tracer = tracing_get_dentry(tr);
8303 	if (IS_ERR(d_tracer))
8304 		return NULL;
8305 
8306 	tr->options = tracefs_create_dir("options", d_tracer);
8307 	if (!tr->options) {
8308 		pr_warn("Could not create tracefs directory 'options'\n");
8309 		return NULL;
8310 	}
8311 
8312 	return tr->options;
8313 }
8314 
8315 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8316 create_trace_option_file(struct trace_array *tr,
8317 			 struct trace_option_dentry *topt,
8318 			 struct tracer_flags *flags,
8319 			 struct tracer_opt *opt)
8320 {
8321 	struct dentry *t_options;
8322 
8323 	t_options = trace_options_init_dentry(tr);
8324 	if (!t_options)
8325 		return;
8326 
8327 	topt->flags = flags;
8328 	topt->opt = opt;
8329 	topt->tr = tr;
8330 
8331 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8332 				    &trace_options_fops);
8333 
8334 }
8335 
8336 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8337 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8338 {
8339 	struct trace_option_dentry *topts;
8340 	struct trace_options *tr_topts;
8341 	struct tracer_flags *flags;
8342 	struct tracer_opt *opts;
8343 	int cnt;
8344 	int i;
8345 
8346 	if (!tracer)
8347 		return;
8348 
8349 	flags = tracer->flags;
8350 
8351 	if (!flags || !flags->opts)
8352 		return;
8353 
8354 	/*
8355 	 * If this is an instance, only create flags for tracers
8356 	 * the instance may have.
8357 	 */
8358 	if (!trace_ok_for_array(tracer, tr))
8359 		return;
8360 
8361 	for (i = 0; i < tr->nr_topts; i++) {
8362 		/* Make sure there's no duplicate flags. */
8363 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8364 			return;
8365 	}
8366 
8367 	opts = flags->opts;
8368 
8369 	for (cnt = 0; opts[cnt].name; cnt++)
8370 		;
8371 
8372 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8373 	if (!topts)
8374 		return;
8375 
8376 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8377 			    GFP_KERNEL);
8378 	if (!tr_topts) {
8379 		kfree(topts);
8380 		return;
8381 	}
8382 
8383 	tr->topts = tr_topts;
8384 	tr->topts[tr->nr_topts].tracer = tracer;
8385 	tr->topts[tr->nr_topts].topts = topts;
8386 	tr->nr_topts++;
8387 
8388 	for (cnt = 0; opts[cnt].name; cnt++) {
8389 		create_trace_option_file(tr, &topts[cnt], flags,
8390 					 &opts[cnt]);
8391 		MEM_FAIL(topts[cnt].entry == NULL,
8392 			  "Failed to create trace option: %s",
8393 			  opts[cnt].name);
8394 	}
8395 }
8396 
8397 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8398 create_trace_option_core_file(struct trace_array *tr,
8399 			      const char *option, long index)
8400 {
8401 	struct dentry *t_options;
8402 
8403 	t_options = trace_options_init_dentry(tr);
8404 	if (!t_options)
8405 		return NULL;
8406 
8407 	return trace_create_file(option, 0644, t_options,
8408 				 (void *)&tr->trace_flags_index[index],
8409 				 &trace_options_core_fops);
8410 }
8411 
create_trace_options_dir(struct trace_array * tr)8412 static void create_trace_options_dir(struct trace_array *tr)
8413 {
8414 	struct dentry *t_options;
8415 	bool top_level = tr == &global_trace;
8416 	int i;
8417 
8418 	t_options = trace_options_init_dentry(tr);
8419 	if (!t_options)
8420 		return;
8421 
8422 	for (i = 0; trace_options[i]; i++) {
8423 		if (top_level ||
8424 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8425 			create_trace_option_core_file(tr, trace_options[i], i);
8426 	}
8427 }
8428 
8429 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8430 rb_simple_read(struct file *filp, char __user *ubuf,
8431 	       size_t cnt, loff_t *ppos)
8432 {
8433 	struct trace_array *tr = filp->private_data;
8434 	char buf[64];
8435 	int r;
8436 
8437 	r = tracer_tracing_is_on(tr);
8438 	r = sprintf(buf, "%d\n", r);
8439 
8440 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8441 }
8442 
8443 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8444 rb_simple_write(struct file *filp, const char __user *ubuf,
8445 		size_t cnt, loff_t *ppos)
8446 {
8447 	struct trace_array *tr = filp->private_data;
8448 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8449 	unsigned long val;
8450 	int ret;
8451 
8452 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8453 	if (ret)
8454 		return ret;
8455 
8456 	if (buffer) {
8457 		mutex_lock(&trace_types_lock);
8458 		if (!!val == tracer_tracing_is_on(tr)) {
8459 			val = 0; /* do nothing */
8460 		} else if (val) {
8461 			tracer_tracing_on(tr);
8462 			if (tr->current_trace->start)
8463 				tr->current_trace->start(tr);
8464 		} else {
8465 			tracer_tracing_off(tr);
8466 			if (tr->current_trace->stop)
8467 				tr->current_trace->stop(tr);
8468 		}
8469 		mutex_unlock(&trace_types_lock);
8470 	}
8471 
8472 	(*ppos)++;
8473 
8474 	return cnt;
8475 }
8476 
8477 static const struct file_operations rb_simple_fops = {
8478 	.open		= tracing_open_generic_tr,
8479 	.read		= rb_simple_read,
8480 	.write		= rb_simple_write,
8481 	.release	= tracing_release_generic_tr,
8482 	.llseek		= default_llseek,
8483 };
8484 
8485 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8486 buffer_percent_read(struct file *filp, char __user *ubuf,
8487 		    size_t cnt, loff_t *ppos)
8488 {
8489 	struct trace_array *tr = filp->private_data;
8490 	char buf[64];
8491 	int r;
8492 
8493 	r = tr->buffer_percent;
8494 	r = sprintf(buf, "%d\n", r);
8495 
8496 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8497 }
8498 
8499 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8500 buffer_percent_write(struct file *filp, const char __user *ubuf,
8501 		     size_t cnt, loff_t *ppos)
8502 {
8503 	struct trace_array *tr = filp->private_data;
8504 	unsigned long val;
8505 	int ret;
8506 
8507 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8508 	if (ret)
8509 		return ret;
8510 
8511 	if (val > 100)
8512 		return -EINVAL;
8513 
8514 	if (!val)
8515 		val = 1;
8516 
8517 	tr->buffer_percent = val;
8518 
8519 	(*ppos)++;
8520 
8521 	return cnt;
8522 }
8523 
8524 static const struct file_operations buffer_percent_fops = {
8525 	.open		= tracing_open_generic_tr,
8526 	.read		= buffer_percent_read,
8527 	.write		= buffer_percent_write,
8528 	.release	= tracing_release_generic_tr,
8529 	.llseek		= default_llseek,
8530 };
8531 
8532 static struct dentry *trace_instance_dir;
8533 
8534 static void
8535 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8536 
8537 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8538 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8539 {
8540 	enum ring_buffer_flags rb_flags;
8541 
8542 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8543 
8544 	buf->tr = tr;
8545 
8546 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8547 	if (!buf->buffer)
8548 		return -ENOMEM;
8549 
8550 	buf->data = alloc_percpu(struct trace_array_cpu);
8551 	if (!buf->data) {
8552 		ring_buffer_free(buf->buffer);
8553 		buf->buffer = NULL;
8554 		return -ENOMEM;
8555 	}
8556 
8557 	/* Allocate the first page for all buffers */
8558 	set_buffer_entries(&tr->array_buffer,
8559 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8560 
8561 	return 0;
8562 }
8563 
allocate_trace_buffers(struct trace_array * tr,int size)8564 static int allocate_trace_buffers(struct trace_array *tr, int size)
8565 {
8566 	int ret;
8567 
8568 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8569 	if (ret)
8570 		return ret;
8571 
8572 #ifdef CONFIG_TRACER_MAX_TRACE
8573 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8574 				    allocate_snapshot ? size : 1);
8575 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8576 		ring_buffer_free(tr->array_buffer.buffer);
8577 		tr->array_buffer.buffer = NULL;
8578 		free_percpu(tr->array_buffer.data);
8579 		tr->array_buffer.data = NULL;
8580 		return -ENOMEM;
8581 	}
8582 	tr->allocated_snapshot = allocate_snapshot;
8583 
8584 	/*
8585 	 * Only the top level trace array gets its snapshot allocated
8586 	 * from the kernel command line.
8587 	 */
8588 	allocate_snapshot = false;
8589 #endif
8590 
8591 	return 0;
8592 }
8593 
free_trace_buffer(struct array_buffer * buf)8594 static void free_trace_buffer(struct array_buffer *buf)
8595 {
8596 	if (buf->buffer) {
8597 		ring_buffer_free(buf->buffer);
8598 		buf->buffer = NULL;
8599 		free_percpu(buf->data);
8600 		buf->data = NULL;
8601 	}
8602 }
8603 
free_trace_buffers(struct trace_array * tr)8604 static void free_trace_buffers(struct trace_array *tr)
8605 {
8606 	if (!tr)
8607 		return;
8608 
8609 	free_trace_buffer(&tr->array_buffer);
8610 
8611 #ifdef CONFIG_TRACER_MAX_TRACE
8612 	free_trace_buffer(&tr->max_buffer);
8613 #endif
8614 }
8615 
init_trace_flags_index(struct trace_array * tr)8616 static void init_trace_flags_index(struct trace_array *tr)
8617 {
8618 	int i;
8619 
8620 	/* Used by the trace options files */
8621 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8622 		tr->trace_flags_index[i] = i;
8623 }
8624 
__update_tracer_options(struct trace_array * tr)8625 static void __update_tracer_options(struct trace_array *tr)
8626 {
8627 	struct tracer *t;
8628 
8629 	for (t = trace_types; t; t = t->next)
8630 		add_tracer_options(tr, t);
8631 }
8632 
update_tracer_options(struct trace_array * tr)8633 static void update_tracer_options(struct trace_array *tr)
8634 {
8635 	mutex_lock(&trace_types_lock);
8636 	__update_tracer_options(tr);
8637 	mutex_unlock(&trace_types_lock);
8638 }
8639 
8640 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8641 struct trace_array *trace_array_find(const char *instance)
8642 {
8643 	struct trace_array *tr, *found = NULL;
8644 
8645 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8646 		if (tr->name && strcmp(tr->name, instance) == 0) {
8647 			found = tr;
8648 			break;
8649 		}
8650 	}
8651 
8652 	return found;
8653 }
8654 
trace_array_find_get(const char * instance)8655 struct trace_array *trace_array_find_get(const char *instance)
8656 {
8657 	struct trace_array *tr;
8658 
8659 	mutex_lock(&trace_types_lock);
8660 	tr = trace_array_find(instance);
8661 	if (tr)
8662 		tr->ref++;
8663 	mutex_unlock(&trace_types_lock);
8664 
8665 	return tr;
8666 }
8667 
trace_array_create_dir(struct trace_array * tr)8668 static int trace_array_create_dir(struct trace_array *tr)
8669 {
8670 	int ret;
8671 
8672 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8673 	if (!tr->dir)
8674 		return -EINVAL;
8675 
8676 	ret = event_trace_add_tracer(tr->dir, tr);
8677 	if (ret)
8678 		tracefs_remove(tr->dir);
8679 
8680 	init_tracer_tracefs(tr, tr->dir);
8681 	__update_tracer_options(tr);
8682 
8683 	return ret;
8684 }
8685 
trace_array_create(const char * name)8686 static struct trace_array *trace_array_create(const char *name)
8687 {
8688 	struct trace_array *tr;
8689 	int ret;
8690 
8691 	ret = -ENOMEM;
8692 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8693 	if (!tr)
8694 		return ERR_PTR(ret);
8695 
8696 	tr->name = kstrdup(name, GFP_KERNEL);
8697 	if (!tr->name)
8698 		goto out_free_tr;
8699 
8700 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8701 		goto out_free_tr;
8702 
8703 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8704 
8705 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8706 
8707 	raw_spin_lock_init(&tr->start_lock);
8708 
8709 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8710 
8711 	tr->current_trace = &nop_trace;
8712 
8713 	INIT_LIST_HEAD(&tr->systems);
8714 	INIT_LIST_HEAD(&tr->events);
8715 	INIT_LIST_HEAD(&tr->hist_vars);
8716 	INIT_LIST_HEAD(&tr->err_log);
8717 
8718 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8719 		goto out_free_tr;
8720 
8721 	if (ftrace_allocate_ftrace_ops(tr) < 0)
8722 		goto out_free_tr;
8723 
8724 	ftrace_init_trace_array(tr);
8725 
8726 	init_trace_flags_index(tr);
8727 
8728 	if (trace_instance_dir) {
8729 		ret = trace_array_create_dir(tr);
8730 		if (ret)
8731 			goto out_free_tr;
8732 	} else
8733 		__trace_early_add_events(tr);
8734 
8735 	list_add(&tr->list, &ftrace_trace_arrays);
8736 
8737 	tr->ref++;
8738 
8739 	return tr;
8740 
8741  out_free_tr:
8742 	ftrace_free_ftrace_ops(tr);
8743 	free_trace_buffers(tr);
8744 	free_cpumask_var(tr->tracing_cpumask);
8745 	kfree(tr->name);
8746 	kfree(tr);
8747 
8748 	return ERR_PTR(ret);
8749 }
8750 
instance_mkdir(const char * name)8751 static int instance_mkdir(const char *name)
8752 {
8753 	struct trace_array *tr;
8754 	int ret;
8755 
8756 	mutex_lock(&event_mutex);
8757 	mutex_lock(&trace_types_lock);
8758 
8759 	ret = -EEXIST;
8760 	if (trace_array_find(name))
8761 		goto out_unlock;
8762 
8763 	tr = trace_array_create(name);
8764 
8765 	ret = PTR_ERR_OR_ZERO(tr);
8766 
8767 out_unlock:
8768 	mutex_unlock(&trace_types_lock);
8769 	mutex_unlock(&event_mutex);
8770 	return ret;
8771 }
8772 
8773 /**
8774  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8775  * @name: The name of the trace array to be looked up/created.
8776  *
8777  * Returns pointer to trace array with given name.
8778  * NULL, if it cannot be created.
8779  *
8780  * NOTE: This function increments the reference counter associated with the
8781  * trace array returned. This makes sure it cannot be freed while in use.
8782  * Use trace_array_put() once the trace array is no longer needed.
8783  * If the trace_array is to be freed, trace_array_destroy() needs to
8784  * be called after the trace_array_put(), or simply let user space delete
8785  * it from the tracefs instances directory. But until the
8786  * trace_array_put() is called, user space can not delete it.
8787  *
8788  */
trace_array_get_by_name(const char * name)8789 struct trace_array *trace_array_get_by_name(const char *name)
8790 {
8791 	struct trace_array *tr;
8792 
8793 	mutex_lock(&event_mutex);
8794 	mutex_lock(&trace_types_lock);
8795 
8796 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8797 		if (tr->name && strcmp(tr->name, name) == 0)
8798 			goto out_unlock;
8799 	}
8800 
8801 	tr = trace_array_create(name);
8802 
8803 	if (IS_ERR(tr))
8804 		tr = NULL;
8805 out_unlock:
8806 	if (tr)
8807 		tr->ref++;
8808 
8809 	mutex_unlock(&trace_types_lock);
8810 	mutex_unlock(&event_mutex);
8811 	return tr;
8812 }
8813 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8814 
__remove_instance(struct trace_array * tr)8815 static int __remove_instance(struct trace_array *tr)
8816 {
8817 	int i;
8818 
8819 	/* Reference counter for a newly created trace array = 1. */
8820 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8821 		return -EBUSY;
8822 
8823 	list_del(&tr->list);
8824 
8825 	/* Disable all the flags that were enabled coming in */
8826 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8827 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8828 			set_tracer_flag(tr, 1 << i, 0);
8829 	}
8830 
8831 	tracing_set_nop(tr);
8832 	clear_ftrace_function_probes(tr);
8833 	event_trace_del_tracer(tr);
8834 	ftrace_clear_pids(tr);
8835 	ftrace_destroy_function_files(tr);
8836 	tracefs_remove(tr->dir);
8837 	free_trace_buffers(tr);
8838 
8839 	for (i = 0; i < tr->nr_topts; i++) {
8840 		kfree(tr->topts[i].topts);
8841 	}
8842 	kfree(tr->topts);
8843 
8844 	free_cpumask_var(tr->tracing_cpumask);
8845 	kfree(tr->name);
8846 	kfree(tr);
8847 
8848 	return 0;
8849 }
8850 
trace_array_destroy(struct trace_array * this_tr)8851 int trace_array_destroy(struct trace_array *this_tr)
8852 {
8853 	struct trace_array *tr;
8854 	int ret;
8855 
8856 	if (!this_tr)
8857 		return -EINVAL;
8858 
8859 	mutex_lock(&event_mutex);
8860 	mutex_lock(&trace_types_lock);
8861 
8862 	ret = -ENODEV;
8863 
8864 	/* Making sure trace array exists before destroying it. */
8865 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8866 		if (tr == this_tr) {
8867 			ret = __remove_instance(tr);
8868 			break;
8869 		}
8870 	}
8871 
8872 	mutex_unlock(&trace_types_lock);
8873 	mutex_unlock(&event_mutex);
8874 
8875 	return ret;
8876 }
8877 EXPORT_SYMBOL_GPL(trace_array_destroy);
8878 
instance_rmdir(const char * name)8879 static int instance_rmdir(const char *name)
8880 {
8881 	struct trace_array *tr;
8882 	int ret;
8883 
8884 	mutex_lock(&event_mutex);
8885 	mutex_lock(&trace_types_lock);
8886 
8887 	ret = -ENODEV;
8888 	tr = trace_array_find(name);
8889 	if (tr)
8890 		ret = __remove_instance(tr);
8891 
8892 	mutex_unlock(&trace_types_lock);
8893 	mutex_unlock(&event_mutex);
8894 
8895 	return ret;
8896 }
8897 
create_trace_instances(struct dentry * d_tracer)8898 static __init void create_trace_instances(struct dentry *d_tracer)
8899 {
8900 	struct trace_array *tr;
8901 
8902 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8903 							 instance_mkdir,
8904 							 instance_rmdir);
8905 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8906 		return;
8907 
8908 	mutex_lock(&event_mutex);
8909 	mutex_lock(&trace_types_lock);
8910 
8911 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8912 		if (!tr->name)
8913 			continue;
8914 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8915 			     "Failed to create instance directory\n"))
8916 			break;
8917 	}
8918 
8919 	mutex_unlock(&trace_types_lock);
8920 	mutex_unlock(&event_mutex);
8921 }
8922 
8923 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8924 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8925 {
8926 	struct trace_event_file *file;
8927 	int cpu;
8928 
8929 	trace_create_file("available_tracers", 0444, d_tracer,
8930 			tr, &show_traces_fops);
8931 
8932 	trace_create_file("current_tracer", 0644, d_tracer,
8933 			tr, &set_tracer_fops);
8934 
8935 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8936 			  tr, &tracing_cpumask_fops);
8937 
8938 	trace_create_file("trace_options", 0644, d_tracer,
8939 			  tr, &tracing_iter_fops);
8940 
8941 	trace_create_file("trace", 0644, d_tracer,
8942 			  tr, &tracing_fops);
8943 
8944 	trace_create_file("trace_pipe", 0444, d_tracer,
8945 			  tr, &tracing_pipe_fops);
8946 
8947 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8948 			  tr, &tracing_entries_fops);
8949 
8950 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8951 			  tr, &tracing_total_entries_fops);
8952 
8953 	trace_create_file("free_buffer", 0200, d_tracer,
8954 			  tr, &tracing_free_buffer_fops);
8955 
8956 	trace_create_file("trace_marker", 0220, d_tracer,
8957 			  tr, &tracing_mark_fops);
8958 
8959 	file = __find_event_file(tr, "ftrace", "print");
8960 	if (file && file->dir)
8961 		trace_create_file("trigger", 0644, file->dir, file,
8962 				  &event_trigger_fops);
8963 	tr->trace_marker_file = file;
8964 
8965 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8966 			  tr, &tracing_mark_raw_fops);
8967 
8968 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8969 			  &trace_clock_fops);
8970 
8971 	trace_create_file("tracing_on", 0644, d_tracer,
8972 			  tr, &rb_simple_fops);
8973 
8974 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8975 			  &trace_time_stamp_mode_fops);
8976 
8977 	tr->buffer_percent = 50;
8978 
8979 	trace_create_file("buffer_percent", 0444, d_tracer,
8980 			tr, &buffer_percent_fops);
8981 
8982 	create_trace_options_dir(tr);
8983 
8984 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8985 	trace_create_maxlat_file(tr, d_tracer);
8986 #endif
8987 
8988 	if (ftrace_create_function_files(tr, d_tracer))
8989 		MEM_FAIL(1, "Could not allocate function filter files");
8990 
8991 #ifdef CONFIG_TRACER_SNAPSHOT
8992 	trace_create_file("snapshot", 0644, d_tracer,
8993 			  tr, &snapshot_fops);
8994 #endif
8995 
8996 	trace_create_file("error_log", 0644, d_tracer,
8997 			  tr, &tracing_err_log_fops);
8998 
8999 	for_each_tracing_cpu(cpu)
9000 		tracing_init_tracefs_percpu(tr, cpu);
9001 
9002 	ftrace_init_tracefs(tr, d_tracer);
9003 }
9004 
trace_automount(struct dentry * mntpt,void * ingore)9005 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9006 {
9007 	struct vfsmount *mnt;
9008 	struct file_system_type *type;
9009 
9010 	/*
9011 	 * To maintain backward compatibility for tools that mount
9012 	 * debugfs to get to the tracing facility, tracefs is automatically
9013 	 * mounted to the debugfs/tracing directory.
9014 	 */
9015 	type = get_fs_type("tracefs");
9016 	if (!type)
9017 		return NULL;
9018 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9019 	put_filesystem(type);
9020 	if (IS_ERR(mnt))
9021 		return NULL;
9022 	mntget(mnt);
9023 
9024 	return mnt;
9025 }
9026 
9027 /**
9028  * tracing_init_dentry - initialize top level trace array
9029  *
9030  * This is called when creating files or directories in the tracing
9031  * directory. It is called via fs_initcall() by any of the boot up code
9032  * and expects to return the dentry of the top level tracing directory.
9033  */
tracing_init_dentry(void)9034 int tracing_init_dentry(void)
9035 {
9036 	struct trace_array *tr = &global_trace;
9037 
9038 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9039 		pr_warn("Tracing disabled due to lockdown\n");
9040 		return -EPERM;
9041 	}
9042 
9043 	/* The top level trace array uses  NULL as parent */
9044 	if (tr->dir)
9045 		return 0;
9046 
9047 	if (WARN_ON(!tracefs_initialized()))
9048 		return -ENODEV;
9049 
9050 	/*
9051 	 * As there may still be users that expect the tracing
9052 	 * files to exist in debugfs/tracing, we must automount
9053 	 * the tracefs file system there, so older tools still
9054 	 * work with the newer kerenl.
9055 	 */
9056 	tr->dir = debugfs_create_automount("tracing", NULL,
9057 					   trace_automount, NULL);
9058 
9059 	return 0;
9060 }
9061 
9062 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9063 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9064 
trace_eval_init(void)9065 static void __init trace_eval_init(void)
9066 {
9067 	int len;
9068 
9069 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9070 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9071 }
9072 
9073 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9074 static void trace_module_add_evals(struct module *mod)
9075 {
9076 	if (!mod->num_trace_evals)
9077 		return;
9078 
9079 	/*
9080 	 * Modules with bad taint do not have events created, do
9081 	 * not bother with enums either.
9082 	 */
9083 	if (trace_module_has_bad_taint(mod))
9084 		return;
9085 
9086 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9087 }
9088 
9089 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9090 static void trace_module_remove_evals(struct module *mod)
9091 {
9092 	union trace_eval_map_item *map;
9093 	union trace_eval_map_item **last = &trace_eval_maps;
9094 
9095 	if (!mod->num_trace_evals)
9096 		return;
9097 
9098 	mutex_lock(&trace_eval_mutex);
9099 
9100 	map = trace_eval_maps;
9101 
9102 	while (map) {
9103 		if (map->head.mod == mod)
9104 			break;
9105 		map = trace_eval_jmp_to_tail(map);
9106 		last = &map->tail.next;
9107 		map = map->tail.next;
9108 	}
9109 	if (!map)
9110 		goto out;
9111 
9112 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9113 	kfree(map);
9114  out:
9115 	mutex_unlock(&trace_eval_mutex);
9116 }
9117 #else
trace_module_remove_evals(struct module * mod)9118 static inline void trace_module_remove_evals(struct module *mod) { }
9119 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9120 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9121 static int trace_module_notify(struct notifier_block *self,
9122 			       unsigned long val, void *data)
9123 {
9124 	struct module *mod = data;
9125 
9126 	switch (val) {
9127 	case MODULE_STATE_COMING:
9128 		trace_module_add_evals(mod);
9129 		break;
9130 	case MODULE_STATE_GOING:
9131 		trace_module_remove_evals(mod);
9132 		break;
9133 	}
9134 
9135 	return NOTIFY_OK;
9136 }
9137 
9138 static struct notifier_block trace_module_nb = {
9139 	.notifier_call = trace_module_notify,
9140 	.priority = 0,
9141 };
9142 #endif /* CONFIG_MODULES */
9143 
tracer_init_tracefs(void)9144 static __init int tracer_init_tracefs(void)
9145 {
9146 	int ret;
9147 
9148 	trace_access_lock_init();
9149 
9150 	ret = tracing_init_dentry();
9151 	if (ret)
9152 		return 0;
9153 
9154 	event_trace_init();
9155 
9156 	init_tracer_tracefs(&global_trace, NULL);
9157 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9158 
9159 	trace_create_file("tracing_thresh", 0644, NULL,
9160 			&global_trace, &tracing_thresh_fops);
9161 
9162 	trace_create_file("README", 0444, NULL,
9163 			NULL, &tracing_readme_fops);
9164 
9165 	trace_create_file("saved_cmdlines", 0444, NULL,
9166 			NULL, &tracing_saved_cmdlines_fops);
9167 
9168 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9169 			  NULL, &tracing_saved_cmdlines_size_fops);
9170 
9171 	trace_create_file("saved_tgids", 0444, NULL,
9172 			NULL, &tracing_saved_tgids_fops);
9173 
9174 	trace_eval_init();
9175 
9176 	trace_create_eval_file(NULL);
9177 
9178 #ifdef CONFIG_MODULES
9179 	register_module_notifier(&trace_module_nb);
9180 #endif
9181 
9182 #ifdef CONFIG_DYNAMIC_FTRACE
9183 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9184 			NULL, &tracing_dyn_info_fops);
9185 #endif
9186 
9187 	create_trace_instances(NULL);
9188 
9189 	update_tracer_options(&global_trace);
9190 
9191 	return 0;
9192 }
9193 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9194 static int trace_panic_handler(struct notifier_block *this,
9195 			       unsigned long event, void *unused)
9196 {
9197 	if (ftrace_dump_on_oops)
9198 		ftrace_dump(ftrace_dump_on_oops);
9199 	return NOTIFY_OK;
9200 }
9201 
9202 static struct notifier_block trace_panic_notifier = {
9203 	.notifier_call  = trace_panic_handler,
9204 	.next           = NULL,
9205 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9206 };
9207 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9208 static int trace_die_handler(struct notifier_block *self,
9209 			     unsigned long val,
9210 			     void *data)
9211 {
9212 	switch (val) {
9213 	case DIE_OOPS:
9214 		if (ftrace_dump_on_oops)
9215 			ftrace_dump(ftrace_dump_on_oops);
9216 		break;
9217 	default:
9218 		break;
9219 	}
9220 	return NOTIFY_OK;
9221 }
9222 
9223 static struct notifier_block trace_die_notifier = {
9224 	.notifier_call = trace_die_handler,
9225 	.priority = 200
9226 };
9227 
9228 /*
9229  * printk is set to max of 1024, we really don't need it that big.
9230  * Nothing should be printing 1000 characters anyway.
9231  */
9232 #define TRACE_MAX_PRINT		1000
9233 
9234 /*
9235  * Define here KERN_TRACE so that we have one place to modify
9236  * it if we decide to change what log level the ftrace dump
9237  * should be at.
9238  */
9239 #define KERN_TRACE		KERN_EMERG
9240 
9241 void
trace_printk_seq(struct trace_seq * s)9242 trace_printk_seq(struct trace_seq *s)
9243 {
9244 	/* Probably should print a warning here. */
9245 	if (s->seq.len >= TRACE_MAX_PRINT)
9246 		s->seq.len = TRACE_MAX_PRINT;
9247 
9248 	/*
9249 	 * More paranoid code. Although the buffer size is set to
9250 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9251 	 * an extra layer of protection.
9252 	 */
9253 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9254 		s->seq.len = s->seq.size - 1;
9255 
9256 	/* should be zero ended, but we are paranoid. */
9257 	s->buffer[s->seq.len] = 0;
9258 
9259 	printk(KERN_TRACE "%s", s->buffer);
9260 
9261 	trace_seq_init(s);
9262 }
9263 
trace_init_global_iter(struct trace_iterator * iter)9264 void trace_init_global_iter(struct trace_iterator *iter)
9265 {
9266 	iter->tr = &global_trace;
9267 	iter->trace = iter->tr->current_trace;
9268 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9269 	iter->array_buffer = &global_trace.array_buffer;
9270 
9271 	if (iter->trace && iter->trace->open)
9272 		iter->trace->open(iter);
9273 
9274 	/* Annotate start of buffers if we had overruns */
9275 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9276 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9277 
9278 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9279 	if (trace_clocks[iter->tr->clock_id].in_ns)
9280 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9281 }
9282 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9283 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9284 {
9285 	/* use static because iter can be a bit big for the stack */
9286 	static struct trace_iterator iter;
9287 	static atomic_t dump_running;
9288 	struct trace_array *tr = &global_trace;
9289 	unsigned int old_userobj;
9290 	unsigned long flags;
9291 	int cnt = 0, cpu;
9292 
9293 	/* Only allow one dump user at a time. */
9294 	if (atomic_inc_return(&dump_running) != 1) {
9295 		atomic_dec(&dump_running);
9296 		return;
9297 	}
9298 
9299 	/*
9300 	 * Always turn off tracing when we dump.
9301 	 * We don't need to show trace output of what happens
9302 	 * between multiple crashes.
9303 	 *
9304 	 * If the user does a sysrq-z, then they can re-enable
9305 	 * tracing with echo 1 > tracing_on.
9306 	 */
9307 	tracing_off();
9308 
9309 	local_irq_save(flags);
9310 	printk_nmi_direct_enter();
9311 
9312 	/* Simulate the iterator */
9313 	trace_init_global_iter(&iter);
9314 	/* Can not use kmalloc for iter.temp */
9315 	iter.temp = static_temp_buf;
9316 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9317 
9318 	for_each_tracing_cpu(cpu) {
9319 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9320 	}
9321 
9322 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9323 
9324 	/* don't look at user memory in panic mode */
9325 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9326 
9327 	switch (oops_dump_mode) {
9328 	case DUMP_ALL:
9329 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9330 		break;
9331 	case DUMP_ORIG:
9332 		iter.cpu_file = raw_smp_processor_id();
9333 		break;
9334 	case DUMP_NONE:
9335 		goto out_enable;
9336 	default:
9337 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9338 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9339 	}
9340 
9341 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9342 
9343 	/* Did function tracer already get disabled? */
9344 	if (ftrace_is_dead()) {
9345 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9346 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9347 	}
9348 
9349 	/*
9350 	 * We need to stop all tracing on all CPUS to read
9351 	 * the next buffer. This is a bit expensive, but is
9352 	 * not done often. We fill all what we can read,
9353 	 * and then release the locks again.
9354 	 */
9355 
9356 	while (!trace_empty(&iter)) {
9357 
9358 		if (!cnt)
9359 			printk(KERN_TRACE "---------------------------------\n");
9360 
9361 		cnt++;
9362 
9363 		trace_iterator_reset(&iter);
9364 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9365 
9366 		if (trace_find_next_entry_inc(&iter) != NULL) {
9367 			int ret;
9368 
9369 			ret = print_trace_line(&iter);
9370 			if (ret != TRACE_TYPE_NO_CONSUME)
9371 				trace_consume(&iter);
9372 		}
9373 		touch_nmi_watchdog();
9374 
9375 		trace_printk_seq(&iter.seq);
9376 	}
9377 
9378 	if (!cnt)
9379 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9380 	else
9381 		printk(KERN_TRACE "---------------------------------\n");
9382 
9383  out_enable:
9384 	tr->trace_flags |= old_userobj;
9385 
9386 	for_each_tracing_cpu(cpu) {
9387 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9388 	}
9389 	atomic_dec(&dump_running);
9390 	printk_nmi_direct_exit();
9391 	local_irq_restore(flags);
9392 }
9393 EXPORT_SYMBOL_GPL(ftrace_dump);
9394 
trace_run_command(const char * buf,int (* createfn)(int,char **))9395 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9396 {
9397 	char **argv;
9398 	int argc, ret;
9399 
9400 	argc = 0;
9401 	ret = 0;
9402 	argv = argv_split(GFP_KERNEL, buf, &argc);
9403 	if (!argv)
9404 		return -ENOMEM;
9405 
9406 	if (argc)
9407 		ret = createfn(argc, argv);
9408 
9409 	argv_free(argv);
9410 
9411 	return ret;
9412 }
9413 
9414 #define WRITE_BUFSIZE  4096
9415 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9416 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9417 				size_t count, loff_t *ppos,
9418 				int (*createfn)(int, char **))
9419 {
9420 	char *kbuf, *buf, *tmp;
9421 	int ret = 0;
9422 	size_t done = 0;
9423 	size_t size;
9424 
9425 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9426 	if (!kbuf)
9427 		return -ENOMEM;
9428 
9429 	while (done < count) {
9430 		size = count - done;
9431 
9432 		if (size >= WRITE_BUFSIZE)
9433 			size = WRITE_BUFSIZE - 1;
9434 
9435 		if (copy_from_user(kbuf, buffer + done, size)) {
9436 			ret = -EFAULT;
9437 			goto out;
9438 		}
9439 		kbuf[size] = '\0';
9440 		buf = kbuf;
9441 		do {
9442 			tmp = strchr(buf, '\n');
9443 			if (tmp) {
9444 				*tmp = '\0';
9445 				size = tmp - buf + 1;
9446 			} else {
9447 				size = strlen(buf);
9448 				if (done + size < count) {
9449 					if (buf != kbuf)
9450 						break;
9451 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9452 					pr_warn("Line length is too long: Should be less than %d\n",
9453 						WRITE_BUFSIZE - 2);
9454 					ret = -EINVAL;
9455 					goto out;
9456 				}
9457 			}
9458 			done += size;
9459 
9460 			/* Remove comments */
9461 			tmp = strchr(buf, '#');
9462 
9463 			if (tmp)
9464 				*tmp = '\0';
9465 
9466 			ret = trace_run_command(buf, createfn);
9467 			if (ret)
9468 				goto out;
9469 			buf += size;
9470 
9471 		} while (done < count);
9472 	}
9473 	ret = done;
9474 
9475 out:
9476 	kfree(kbuf);
9477 
9478 	return ret;
9479 }
9480 
tracer_alloc_buffers(void)9481 __init static int tracer_alloc_buffers(void)
9482 {
9483 	int ring_buf_size;
9484 	int ret = -ENOMEM;
9485 
9486 
9487 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9488 		pr_warn("Tracing disabled due to lockdown\n");
9489 		return -EPERM;
9490 	}
9491 
9492 	/*
9493 	 * Make sure we don't accidentally add more trace options
9494 	 * than we have bits for.
9495 	 */
9496 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9497 
9498 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9499 		goto out;
9500 
9501 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9502 		goto out_free_buffer_mask;
9503 
9504 	/* Only allocate trace_printk buffers if a trace_printk exists */
9505 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9506 		/* Must be called before global_trace.buffer is allocated */
9507 		trace_printk_init_buffers();
9508 
9509 	/* To save memory, keep the ring buffer size to its minimum */
9510 	if (ring_buffer_expanded)
9511 		ring_buf_size = trace_buf_size;
9512 	else
9513 		ring_buf_size = 1;
9514 
9515 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9516 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9517 
9518 	raw_spin_lock_init(&global_trace.start_lock);
9519 
9520 	/*
9521 	 * The prepare callbacks allocates some memory for the ring buffer. We
9522 	 * don't free the buffer if the CPU goes down. If we were to free
9523 	 * the buffer, then the user would lose any trace that was in the
9524 	 * buffer. The memory will be removed once the "instance" is removed.
9525 	 */
9526 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9527 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9528 				      NULL);
9529 	if (ret < 0)
9530 		goto out_free_cpumask;
9531 	/* Used for event triggers */
9532 	ret = -ENOMEM;
9533 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9534 	if (!temp_buffer)
9535 		goto out_rm_hp_state;
9536 
9537 	if (trace_create_savedcmd() < 0)
9538 		goto out_free_temp_buffer;
9539 
9540 	/* TODO: make the number of buffers hot pluggable with CPUS */
9541 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9542 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9543 		goto out_free_savedcmd;
9544 	}
9545 
9546 	if (global_trace.buffer_disabled)
9547 		tracing_off();
9548 
9549 	if (trace_boot_clock) {
9550 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9551 		if (ret < 0)
9552 			pr_warn("Trace clock %s not defined, going back to default\n",
9553 				trace_boot_clock);
9554 	}
9555 
9556 	/*
9557 	 * register_tracer() might reference current_trace, so it
9558 	 * needs to be set before we register anything. This is
9559 	 * just a bootstrap of current_trace anyway.
9560 	 */
9561 	global_trace.current_trace = &nop_trace;
9562 
9563 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9564 
9565 	ftrace_init_global_array_ops(&global_trace);
9566 
9567 	init_trace_flags_index(&global_trace);
9568 
9569 	register_tracer(&nop_trace);
9570 
9571 	/* Function tracing may start here (via kernel command line) */
9572 	init_function_trace();
9573 
9574 	/* All seems OK, enable tracing */
9575 	tracing_disabled = 0;
9576 
9577 	atomic_notifier_chain_register(&panic_notifier_list,
9578 				       &trace_panic_notifier);
9579 
9580 	register_die_notifier(&trace_die_notifier);
9581 
9582 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9583 
9584 	INIT_LIST_HEAD(&global_trace.systems);
9585 	INIT_LIST_HEAD(&global_trace.events);
9586 	INIT_LIST_HEAD(&global_trace.hist_vars);
9587 	INIT_LIST_HEAD(&global_trace.err_log);
9588 	list_add(&global_trace.list, &ftrace_trace_arrays);
9589 
9590 	apply_trace_boot_options();
9591 
9592 	register_snapshot_cmd();
9593 
9594 	return 0;
9595 
9596 out_free_savedcmd:
9597 	free_saved_cmdlines_buffer(savedcmd);
9598 out_free_temp_buffer:
9599 	ring_buffer_free(temp_buffer);
9600 out_rm_hp_state:
9601 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9602 out_free_cpumask:
9603 	free_cpumask_var(global_trace.tracing_cpumask);
9604 out_free_buffer_mask:
9605 	free_cpumask_var(tracing_buffer_mask);
9606 out:
9607 	return ret;
9608 }
9609 
early_trace_init(void)9610 void __init early_trace_init(void)
9611 {
9612 	if (tracepoint_printk) {
9613 		tracepoint_print_iter =
9614 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9615 		if (MEM_FAIL(!tracepoint_print_iter,
9616 			     "Failed to allocate trace iterator\n"))
9617 			tracepoint_printk = 0;
9618 		else
9619 			static_key_enable(&tracepoint_printk_key.key);
9620 	}
9621 	tracer_alloc_buffers();
9622 }
9623 
trace_init(void)9624 void __init trace_init(void)
9625 {
9626 	trace_event_init();
9627 }
9628 
clear_boot_tracer(void)9629 __init static int clear_boot_tracer(void)
9630 {
9631 	/*
9632 	 * The default tracer at boot buffer is an init section.
9633 	 * This function is called in lateinit. If we did not
9634 	 * find the boot tracer, then clear it out, to prevent
9635 	 * later registration from accessing the buffer that is
9636 	 * about to be freed.
9637 	 */
9638 	if (!default_bootup_tracer)
9639 		return 0;
9640 
9641 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9642 	       default_bootup_tracer);
9643 	default_bootup_tracer = NULL;
9644 
9645 	return 0;
9646 }
9647 
9648 fs_initcall(tracer_init_tracefs);
9649 late_initcall_sync(clear_boot_tracer);
9650 
9651 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9652 __init static int tracing_set_default_clock(void)
9653 {
9654 	/* sched_clock_stable() is determined in late_initcall */
9655 	if (!trace_boot_clock && !sched_clock_stable()) {
9656 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9657 			pr_warn("Can not set tracing clock due to lockdown\n");
9658 			return -EPERM;
9659 		}
9660 
9661 		printk(KERN_WARNING
9662 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9663 		       "If you want to keep using the local clock, then add:\n"
9664 		       "  \"trace_clock=local\"\n"
9665 		       "on the kernel command line\n");
9666 		tracing_set_clock(&global_trace, "global");
9667 	}
9668 
9669 	return 0;
9670 }
9671 late_initcall_sync(tracing_set_default_clock);
9672 #endif
9673