1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
58 */
59 bool ring_buffer_expanded;
60
61 /*
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
67 */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71 * If a tracer is running, we do not want to run SELFTEST.
72 */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 { }
83 };
84
85 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 return 0;
89 }
90
91 /*
92 * To prevent the comm cache from being overwritten when no
93 * tracing is active, only save the comm when a trace event
94 * occurred.
95 */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99 * Kill all tracing for good (never come back).
100 * It is initialized to 1 but will turn to zero if the initialization
101 * of the tracer is successful. But that is the only place that sets
102 * this back to zero.
103 */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly tracing_buffer_mask;
107
108 /*
109 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110 *
111 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112 * is set, then ftrace_dump is called. This will output the contents
113 * of the ftrace buffers to the console. This is very useful for
114 * capturing traces that lead to crashes and outputing it to a
115 * serial console.
116 *
117 * It is default off, but you can enable it with either specifying
118 * "ftrace_dump_on_oops" in the kernel command line, or setting
119 * /proc/sys/kernel/ftrace_dump_on_oops
120 * Set 1 if you want to dump buffers of all CPUs
121 * Set 2 if you want to dump the buffer of the CPU that triggered oops
122 */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 struct module *mod;
133 unsigned long length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139 /*
140 * "end" is first and points to NULL as it must be different
141 * than "mod" or "eval_string"
142 */
143 union trace_eval_map_item *next;
144 const char *end; /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150 * The trace_eval_maps are saved in an array with two extra elements,
151 * one at the beginning, and one at the end. The beginning item contains
152 * the count of the saved maps (head.length), and the module they
153 * belong to if not built in (head.mod). The ending item contains a
154 * pointer to the next array of saved eval_map items.
155 */
156 union trace_eval_map_item {
157 struct trace_eval_map map;
158 struct trace_eval_map_head head;
159 struct trace_eval_map_tail tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_array *tr,
167 struct trace_buffer *buffer,
168 unsigned long flags, int pc);
169
170 #define MAX_TRACER_SIZE 100
171 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
172 static char *default_bootup_tracer;
173
174 static bool allocate_snapshot;
175
set_cmdline_ftrace(char * str)176 static int __init set_cmdline_ftrace(char *str)
177 {
178 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
179 default_bootup_tracer = bootup_tracer_buf;
180 /* We are using ftrace early, expand it */
181 ring_buffer_expanded = true;
182 return 1;
183 }
184 __setup("ftrace=", set_cmdline_ftrace);
185
set_ftrace_dump_on_oops(char * str)186 static int __init set_ftrace_dump_on_oops(char *str)
187 {
188 if (*str++ != '=' || !*str) {
189 ftrace_dump_on_oops = DUMP_ALL;
190 return 1;
191 }
192
193 if (!strcmp("orig_cpu", str)) {
194 ftrace_dump_on_oops = DUMP_ORIG;
195 return 1;
196 }
197
198 return 0;
199 }
200 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
201
stop_trace_on_warning(char * str)202 static int __init stop_trace_on_warning(char *str)
203 {
204 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
205 __disable_trace_on_warning = 1;
206 return 1;
207 }
208 __setup("traceoff_on_warning", stop_trace_on_warning);
209
boot_alloc_snapshot(char * str)210 static int __init boot_alloc_snapshot(char *str)
211 {
212 allocate_snapshot = true;
213 /* We also need the main ring buffer expanded */
214 ring_buffer_expanded = true;
215 return 1;
216 }
217 __setup("alloc_snapshot", boot_alloc_snapshot);
218
219
220 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
221
set_trace_boot_options(char * str)222 static int __init set_trace_boot_options(char *str)
223 {
224 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
225 return 0;
226 }
227 __setup("trace_options=", set_trace_boot_options);
228
229 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
230 static char *trace_boot_clock __initdata;
231
set_trace_boot_clock(char * str)232 static int __init set_trace_boot_clock(char *str)
233 {
234 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
235 trace_boot_clock = trace_boot_clock_buf;
236 return 0;
237 }
238 __setup("trace_clock=", set_trace_boot_clock);
239
set_tracepoint_printk(char * str)240 static int __init set_tracepoint_printk(char *str)
241 {
242 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
243 tracepoint_printk = 1;
244 return 1;
245 }
246 __setup("tp_printk", set_tracepoint_printk);
247
ns2usecs(u64 nsec)248 unsigned long long ns2usecs(u64 nsec)
249 {
250 nsec += 500;
251 do_div(nsec, 1000);
252 return nsec;
253 }
254
255 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)256 trace_process_export(struct trace_export *export,
257 struct ring_buffer_event *event, int flag)
258 {
259 struct trace_entry *entry;
260 unsigned int size = 0;
261
262 if (export->flags & flag) {
263 entry = ring_buffer_event_data(event);
264 size = ring_buffer_event_length(event);
265 export->write(export, entry, size);
266 }
267 }
268
269 static DEFINE_MUTEX(ftrace_export_lock);
270
271 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
272
273 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
274 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
275 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
276
ftrace_exports_enable(struct trace_export * export)277 static inline void ftrace_exports_enable(struct trace_export *export)
278 {
279 if (export->flags & TRACE_EXPORT_FUNCTION)
280 static_branch_inc(&trace_function_exports_enabled);
281
282 if (export->flags & TRACE_EXPORT_EVENT)
283 static_branch_inc(&trace_event_exports_enabled);
284
285 if (export->flags & TRACE_EXPORT_MARKER)
286 static_branch_inc(&trace_marker_exports_enabled);
287 }
288
ftrace_exports_disable(struct trace_export * export)289 static inline void ftrace_exports_disable(struct trace_export *export)
290 {
291 if (export->flags & TRACE_EXPORT_FUNCTION)
292 static_branch_dec(&trace_function_exports_enabled);
293
294 if (export->flags & TRACE_EXPORT_EVENT)
295 static_branch_dec(&trace_event_exports_enabled);
296
297 if (export->flags & TRACE_EXPORT_MARKER)
298 static_branch_dec(&trace_marker_exports_enabled);
299 }
300
ftrace_exports(struct ring_buffer_event * event,int flag)301 static void ftrace_exports(struct ring_buffer_event *event, int flag)
302 {
303 struct trace_export *export;
304
305 preempt_disable_notrace();
306
307 export = rcu_dereference_raw_check(ftrace_exports_list);
308 while (export) {
309 trace_process_export(export, event, flag);
310 export = rcu_dereference_raw_check(export->next);
311 }
312
313 preempt_enable_notrace();
314 }
315
316 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)317 add_trace_export(struct trace_export **list, struct trace_export *export)
318 {
319 rcu_assign_pointer(export->next, *list);
320 /*
321 * We are entering export into the list but another
322 * CPU might be walking that list. We need to make sure
323 * the export->next pointer is valid before another CPU sees
324 * the export pointer included into the list.
325 */
326 rcu_assign_pointer(*list, export);
327 }
328
329 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)330 rm_trace_export(struct trace_export **list, struct trace_export *export)
331 {
332 struct trace_export **p;
333
334 for (p = list; *p != NULL; p = &(*p)->next)
335 if (*p == export)
336 break;
337
338 if (*p != export)
339 return -1;
340
341 rcu_assign_pointer(*p, (*p)->next);
342
343 return 0;
344 }
345
346 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)347 add_ftrace_export(struct trace_export **list, struct trace_export *export)
348 {
349 ftrace_exports_enable(export);
350
351 add_trace_export(list, export);
352 }
353
354 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)355 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
356 {
357 int ret;
358
359 ret = rm_trace_export(list, export);
360 ftrace_exports_disable(export);
361
362 return ret;
363 }
364
register_ftrace_export(struct trace_export * export)365 int register_ftrace_export(struct trace_export *export)
366 {
367 if (WARN_ON_ONCE(!export->write))
368 return -1;
369
370 mutex_lock(&ftrace_export_lock);
371
372 add_ftrace_export(&ftrace_exports_list, export);
373
374 mutex_unlock(&ftrace_export_lock);
375
376 return 0;
377 }
378 EXPORT_SYMBOL_GPL(register_ftrace_export);
379
unregister_ftrace_export(struct trace_export * export)380 int unregister_ftrace_export(struct trace_export *export)
381 {
382 int ret;
383
384 mutex_lock(&ftrace_export_lock);
385
386 ret = rm_ftrace_export(&ftrace_exports_list, export);
387
388 mutex_unlock(&ftrace_export_lock);
389
390 return ret;
391 }
392 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
393
394 /* trace_flags holds trace_options default values */
395 #define TRACE_DEFAULT_FLAGS \
396 (FUNCTION_DEFAULT_FLAGS | \
397 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
398 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
399 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
400 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
401
402 /* trace_options that are only supported by global_trace */
403 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
404 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
405
406 /* trace_flags that are default zero for instances */
407 #define ZEROED_TRACE_FLAGS \
408 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
409
410 /*
411 * The global_trace is the descriptor that holds the top-level tracing
412 * buffers for the live tracing.
413 */
414 static struct trace_array global_trace = {
415 .trace_flags = TRACE_DEFAULT_FLAGS,
416 };
417
418 LIST_HEAD(ftrace_trace_arrays);
419
trace_array_get(struct trace_array * this_tr)420 int trace_array_get(struct trace_array *this_tr)
421 {
422 struct trace_array *tr;
423 int ret = -ENODEV;
424
425 mutex_lock(&trace_types_lock);
426 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
427 if (tr == this_tr) {
428 tr->ref++;
429 ret = 0;
430 break;
431 }
432 }
433 mutex_unlock(&trace_types_lock);
434
435 return ret;
436 }
437
__trace_array_put(struct trace_array * this_tr)438 static void __trace_array_put(struct trace_array *this_tr)
439 {
440 WARN_ON(!this_tr->ref);
441 this_tr->ref--;
442 }
443
444 /**
445 * trace_array_put - Decrement the reference counter for this trace array.
446 *
447 * NOTE: Use this when we no longer need the trace array returned by
448 * trace_array_get_by_name(). This ensures the trace array can be later
449 * destroyed.
450 *
451 */
trace_array_put(struct trace_array * this_tr)452 void trace_array_put(struct trace_array *this_tr)
453 {
454 if (!this_tr)
455 return;
456
457 mutex_lock(&trace_types_lock);
458 __trace_array_put(this_tr);
459 mutex_unlock(&trace_types_lock);
460 }
461 EXPORT_SYMBOL_GPL(trace_array_put);
462
tracing_check_open_get_tr(struct trace_array * tr)463 int tracing_check_open_get_tr(struct trace_array *tr)
464 {
465 int ret;
466
467 ret = security_locked_down(LOCKDOWN_TRACEFS);
468 if (ret)
469 return ret;
470
471 if (tracing_disabled)
472 return -ENODEV;
473
474 if (tr && trace_array_get(tr) < 0)
475 return -ENODEV;
476
477 return 0;
478 }
479
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)480 int call_filter_check_discard(struct trace_event_call *call, void *rec,
481 struct trace_buffer *buffer,
482 struct ring_buffer_event *event)
483 {
484 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
485 !filter_match_preds(call->filter, rec)) {
486 __trace_event_discard_commit(buffer, event);
487 return 1;
488 }
489
490 return 0;
491 }
492
trace_free_pid_list(struct trace_pid_list * pid_list)493 void trace_free_pid_list(struct trace_pid_list *pid_list)
494 {
495 vfree(pid_list->pids);
496 kfree(pid_list);
497 }
498
499 /**
500 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
501 * @filtered_pids: The list of pids to check
502 * @search_pid: The PID to find in @filtered_pids
503 *
504 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
505 */
506 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)507 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
508 {
509 /*
510 * If pid_max changed after filtered_pids was created, we
511 * by default ignore all pids greater than the previous pid_max.
512 */
513 if (search_pid >= filtered_pids->pid_max)
514 return false;
515
516 return test_bit(search_pid, filtered_pids->pids);
517 }
518
519 /**
520 * trace_ignore_this_task - should a task be ignored for tracing
521 * @filtered_pids: The list of pids to check
522 * @task: The task that should be ignored if not filtered
523 *
524 * Checks if @task should be traced or not from @filtered_pids.
525 * Returns true if @task should *NOT* be traced.
526 * Returns false if @task should be traced.
527 */
528 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)529 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
530 struct trace_pid_list *filtered_no_pids,
531 struct task_struct *task)
532 {
533 /*
534 * If filterd_no_pids is not empty, and the task's pid is listed
535 * in filtered_no_pids, then return true.
536 * Otherwise, if filtered_pids is empty, that means we can
537 * trace all tasks. If it has content, then only trace pids
538 * within filtered_pids.
539 */
540
541 return (filtered_pids &&
542 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
543 (filtered_no_pids &&
544 trace_find_filtered_pid(filtered_no_pids, task->pid));
545 }
546
547 /**
548 * trace_filter_add_remove_task - Add or remove a task from a pid_list
549 * @pid_list: The list to modify
550 * @self: The current task for fork or NULL for exit
551 * @task: The task to add or remove
552 *
553 * If adding a task, if @self is defined, the task is only added if @self
554 * is also included in @pid_list. This happens on fork and tasks should
555 * only be added when the parent is listed. If @self is NULL, then the
556 * @task pid will be removed from the list, which would happen on exit
557 * of a task.
558 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)559 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
560 struct task_struct *self,
561 struct task_struct *task)
562 {
563 if (!pid_list)
564 return;
565
566 /* For forks, we only add if the forking task is listed */
567 if (self) {
568 if (!trace_find_filtered_pid(pid_list, self->pid))
569 return;
570 }
571
572 /* Sorry, but we don't support pid_max changing after setting */
573 if (task->pid >= pid_list->pid_max)
574 return;
575
576 /* "self" is set for forks, and NULL for exits */
577 if (self)
578 set_bit(task->pid, pid_list->pids);
579 else
580 clear_bit(task->pid, pid_list->pids);
581 }
582
583 /**
584 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
585 * @pid_list: The pid list to show
586 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
587 * @pos: The position of the file
588 *
589 * This is used by the seq_file "next" operation to iterate the pids
590 * listed in a trace_pid_list structure.
591 *
592 * Returns the pid+1 as we want to display pid of zero, but NULL would
593 * stop the iteration.
594 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)595 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
596 {
597 unsigned long pid = (unsigned long)v;
598
599 (*pos)++;
600
601 /* pid already is +1 of the actual prevous bit */
602 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
603
604 /* Return pid + 1 to allow zero to be represented */
605 if (pid < pid_list->pid_max)
606 return (void *)(pid + 1);
607
608 return NULL;
609 }
610
611 /**
612 * trace_pid_start - Used for seq_file to start reading pid lists
613 * @pid_list: The pid list to show
614 * @pos: The position of the file
615 *
616 * This is used by seq_file "start" operation to start the iteration
617 * of listing pids.
618 *
619 * Returns the pid+1 as we want to display pid of zero, but NULL would
620 * stop the iteration.
621 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)622 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
623 {
624 unsigned long pid;
625 loff_t l = 0;
626
627 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
628 if (pid >= pid_list->pid_max)
629 return NULL;
630
631 /* Return pid + 1 so that zero can be the exit value */
632 for (pid++; pid && l < *pos;
633 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
634 ;
635 return (void *)pid;
636 }
637
638 /**
639 * trace_pid_show - show the current pid in seq_file processing
640 * @m: The seq_file structure to write into
641 * @v: A void pointer of the pid (+1) value to display
642 *
643 * Can be directly used by seq_file operations to display the current
644 * pid value.
645 */
trace_pid_show(struct seq_file * m,void * v)646 int trace_pid_show(struct seq_file *m, void *v)
647 {
648 unsigned long pid = (unsigned long)v - 1;
649
650 seq_printf(m, "%lu\n", pid);
651 return 0;
652 }
653
654 /* 128 should be much more than enough */
655 #define PID_BUF_SIZE 127
656
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)657 int trace_pid_write(struct trace_pid_list *filtered_pids,
658 struct trace_pid_list **new_pid_list,
659 const char __user *ubuf, size_t cnt)
660 {
661 struct trace_pid_list *pid_list;
662 struct trace_parser parser;
663 unsigned long val;
664 int nr_pids = 0;
665 ssize_t read = 0;
666 ssize_t ret = 0;
667 loff_t pos;
668 pid_t pid;
669
670 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
671 return -ENOMEM;
672
673 /*
674 * Always recreate a new array. The write is an all or nothing
675 * operation. Always create a new array when adding new pids by
676 * the user. If the operation fails, then the current list is
677 * not modified.
678 */
679 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
680 if (!pid_list) {
681 trace_parser_put(&parser);
682 return -ENOMEM;
683 }
684
685 pid_list->pid_max = READ_ONCE(pid_max);
686
687 /* Only truncating will shrink pid_max */
688 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
689 pid_list->pid_max = filtered_pids->pid_max;
690
691 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
692 if (!pid_list->pids) {
693 trace_parser_put(&parser);
694 kfree(pid_list);
695 return -ENOMEM;
696 }
697
698 if (filtered_pids) {
699 /* copy the current bits to the new max */
700 for_each_set_bit(pid, filtered_pids->pids,
701 filtered_pids->pid_max) {
702 set_bit(pid, pid_list->pids);
703 nr_pids++;
704 }
705 }
706
707 while (cnt > 0) {
708
709 pos = 0;
710
711 ret = trace_get_user(&parser, ubuf, cnt, &pos);
712 if (ret < 0 || !trace_parser_loaded(&parser))
713 break;
714
715 read += ret;
716 ubuf += ret;
717 cnt -= ret;
718
719 ret = -EINVAL;
720 if (kstrtoul(parser.buffer, 0, &val))
721 break;
722 if (val >= pid_list->pid_max)
723 break;
724
725 pid = (pid_t)val;
726
727 set_bit(pid, pid_list->pids);
728 nr_pids++;
729
730 trace_parser_clear(&parser);
731 ret = 0;
732 }
733 trace_parser_put(&parser);
734
735 if (ret < 0) {
736 trace_free_pid_list(pid_list);
737 return ret;
738 }
739
740 if (!nr_pids) {
741 /* Cleared the list of pids */
742 trace_free_pid_list(pid_list);
743 read = ret;
744 pid_list = NULL;
745 }
746
747 *new_pid_list = pid_list;
748
749 return read;
750 }
751
buffer_ftrace_now(struct array_buffer * buf,int cpu)752 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
753 {
754 u64 ts;
755
756 /* Early boot up does not have a buffer yet */
757 if (!buf->buffer)
758 return trace_clock_local();
759
760 ts = ring_buffer_time_stamp(buf->buffer, cpu);
761 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
762
763 return ts;
764 }
765
ftrace_now(int cpu)766 u64 ftrace_now(int cpu)
767 {
768 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
769 }
770
771 /**
772 * tracing_is_enabled - Show if global_trace has been disabled
773 *
774 * Shows if the global trace has been enabled or not. It uses the
775 * mirror flag "buffer_disabled" to be used in fast paths such as for
776 * the irqsoff tracer. But it may be inaccurate due to races. If you
777 * need to know the accurate state, use tracing_is_on() which is a little
778 * slower, but accurate.
779 */
tracing_is_enabled(void)780 int tracing_is_enabled(void)
781 {
782 /*
783 * For quick access (irqsoff uses this in fast path), just
784 * return the mirror variable of the state of the ring buffer.
785 * It's a little racy, but we don't really care.
786 */
787 smp_rmb();
788 return !global_trace.buffer_disabled;
789 }
790
791 /*
792 * trace_buf_size is the size in bytes that is allocated
793 * for a buffer. Note, the number of bytes is always rounded
794 * to page size.
795 *
796 * This number is purposely set to a low number of 16384.
797 * If the dump on oops happens, it will be much appreciated
798 * to not have to wait for all that output. Anyway this can be
799 * boot time and run time configurable.
800 */
801 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
802
803 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
804
805 /* trace_types holds a link list of available tracers. */
806 static struct tracer *trace_types __read_mostly;
807
808 /*
809 * trace_types_lock is used to protect the trace_types list.
810 */
811 DEFINE_MUTEX(trace_types_lock);
812
813 /*
814 * serialize the access of the ring buffer
815 *
816 * ring buffer serializes readers, but it is low level protection.
817 * The validity of the events (which returns by ring_buffer_peek() ..etc)
818 * are not protected by ring buffer.
819 *
820 * The content of events may become garbage if we allow other process consumes
821 * these events concurrently:
822 * A) the page of the consumed events may become a normal page
823 * (not reader page) in ring buffer, and this page will be rewrited
824 * by events producer.
825 * B) The page of the consumed events may become a page for splice_read,
826 * and this page will be returned to system.
827 *
828 * These primitives allow multi process access to different cpu ring buffer
829 * concurrently.
830 *
831 * These primitives don't distinguish read-only and read-consume access.
832 * Multi read-only access are also serialized.
833 */
834
835 #ifdef CONFIG_SMP
836 static DECLARE_RWSEM(all_cpu_access_lock);
837 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
838
trace_access_lock(int cpu)839 static inline void trace_access_lock(int cpu)
840 {
841 if (cpu == RING_BUFFER_ALL_CPUS) {
842 /* gain it for accessing the whole ring buffer. */
843 down_write(&all_cpu_access_lock);
844 } else {
845 /* gain it for accessing a cpu ring buffer. */
846
847 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
848 down_read(&all_cpu_access_lock);
849
850 /* Secondly block other access to this @cpu ring buffer. */
851 mutex_lock(&per_cpu(cpu_access_lock, cpu));
852 }
853 }
854
trace_access_unlock(int cpu)855 static inline void trace_access_unlock(int cpu)
856 {
857 if (cpu == RING_BUFFER_ALL_CPUS) {
858 up_write(&all_cpu_access_lock);
859 } else {
860 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
861 up_read(&all_cpu_access_lock);
862 }
863 }
864
trace_access_lock_init(void)865 static inline void trace_access_lock_init(void)
866 {
867 int cpu;
868
869 for_each_possible_cpu(cpu)
870 mutex_init(&per_cpu(cpu_access_lock, cpu));
871 }
872
873 #else
874
875 static DEFINE_MUTEX(access_lock);
876
trace_access_lock(int cpu)877 static inline void trace_access_lock(int cpu)
878 {
879 (void)cpu;
880 mutex_lock(&access_lock);
881 }
882
trace_access_unlock(int cpu)883 static inline void trace_access_unlock(int cpu)
884 {
885 (void)cpu;
886 mutex_unlock(&access_lock);
887 }
888
trace_access_lock_init(void)889 static inline void trace_access_lock_init(void)
890 {
891 }
892
893 #endif
894
895 #ifdef CONFIG_STACKTRACE
896 static void __ftrace_trace_stack(struct trace_buffer *buffer,
897 unsigned long flags,
898 int skip, int pc, struct pt_regs *regs);
899 static inline void ftrace_trace_stack(struct trace_array *tr,
900 struct trace_buffer *buffer,
901 unsigned long flags,
902 int skip, int pc, struct pt_regs *regs);
903
904 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)905 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
906 unsigned long flags,
907 int skip, int pc, struct pt_regs *regs)
908 {
909 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)910 static inline void ftrace_trace_stack(struct trace_array *tr,
911 struct trace_buffer *buffer,
912 unsigned long flags,
913 int skip, int pc, struct pt_regs *regs)
914 {
915 }
916
917 #endif
918
919 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)920 trace_event_setup(struct ring_buffer_event *event,
921 int type, unsigned long flags, int pc)
922 {
923 struct trace_entry *ent = ring_buffer_event_data(event);
924
925 tracing_generic_entry_update(ent, type, flags, pc);
926 }
927
928 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)929 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
930 int type,
931 unsigned long len,
932 unsigned long flags, int pc)
933 {
934 struct ring_buffer_event *event;
935
936 event = ring_buffer_lock_reserve(buffer, len);
937 if (event != NULL)
938 trace_event_setup(event, type, flags, pc);
939
940 return event;
941 }
942
tracer_tracing_on(struct trace_array * tr)943 void tracer_tracing_on(struct trace_array *tr)
944 {
945 if (tr->array_buffer.buffer)
946 ring_buffer_record_on(tr->array_buffer.buffer);
947 /*
948 * This flag is looked at when buffers haven't been allocated
949 * yet, or by some tracers (like irqsoff), that just want to
950 * know if the ring buffer has been disabled, but it can handle
951 * races of where it gets disabled but we still do a record.
952 * As the check is in the fast path of the tracers, it is more
953 * important to be fast than accurate.
954 */
955 tr->buffer_disabled = 0;
956 /* Make the flag seen by readers */
957 smp_wmb();
958 }
959
960 /**
961 * tracing_on - enable tracing buffers
962 *
963 * This function enables tracing buffers that may have been
964 * disabled with tracing_off.
965 */
tracing_on(void)966 void tracing_on(void)
967 {
968 tracer_tracing_on(&global_trace);
969 }
970 EXPORT_SYMBOL_GPL(tracing_on);
971
972
973 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)974 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
975 {
976 __this_cpu_write(trace_taskinfo_save, true);
977
978 /* If this is the temp buffer, we need to commit fully */
979 if (this_cpu_read(trace_buffered_event) == event) {
980 /* Length is in event->array[0] */
981 ring_buffer_write(buffer, event->array[0], &event->array[1]);
982 /* Release the temp buffer */
983 this_cpu_dec(trace_buffered_event_cnt);
984 } else
985 ring_buffer_unlock_commit(buffer, event);
986 }
987
988 /**
989 * __trace_puts - write a constant string into the trace buffer.
990 * @ip: The address of the caller
991 * @str: The constant string to write
992 * @size: The size of the string.
993 */
__trace_puts(unsigned long ip,const char * str,int size)994 int __trace_puts(unsigned long ip, const char *str, int size)
995 {
996 struct ring_buffer_event *event;
997 struct trace_buffer *buffer;
998 struct print_entry *entry;
999 unsigned long irq_flags;
1000 int alloc;
1001 int pc;
1002
1003 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1004 return 0;
1005
1006 pc = preempt_count();
1007
1008 if (unlikely(tracing_selftest_running || tracing_disabled))
1009 return 0;
1010
1011 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1012
1013 local_save_flags(irq_flags);
1014 buffer = global_trace.array_buffer.buffer;
1015 ring_buffer_nest_start(buffer);
1016 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1017 irq_flags, pc);
1018 if (!event) {
1019 size = 0;
1020 goto out;
1021 }
1022
1023 entry = ring_buffer_event_data(event);
1024 entry->ip = ip;
1025
1026 memcpy(&entry->buf, str, size);
1027
1028 /* Add a newline if necessary */
1029 if (entry->buf[size - 1] != '\n') {
1030 entry->buf[size] = '\n';
1031 entry->buf[size + 1] = '\0';
1032 } else
1033 entry->buf[size] = '\0';
1034
1035 __buffer_unlock_commit(buffer, event);
1036 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1037 out:
1038 ring_buffer_nest_end(buffer);
1039 return size;
1040 }
1041 EXPORT_SYMBOL_GPL(__trace_puts);
1042
1043 /**
1044 * __trace_bputs - write the pointer to a constant string into trace buffer
1045 * @ip: The address of the caller
1046 * @str: The constant string to write to the buffer to
1047 */
__trace_bputs(unsigned long ip,const char * str)1048 int __trace_bputs(unsigned long ip, const char *str)
1049 {
1050 struct ring_buffer_event *event;
1051 struct trace_buffer *buffer;
1052 struct bputs_entry *entry;
1053 unsigned long irq_flags;
1054 int size = sizeof(struct bputs_entry);
1055 int ret = 0;
1056 int pc;
1057
1058 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1059 return 0;
1060
1061 pc = preempt_count();
1062
1063 if (unlikely(tracing_selftest_running || tracing_disabled))
1064 return 0;
1065
1066 local_save_flags(irq_flags);
1067 buffer = global_trace.array_buffer.buffer;
1068
1069 ring_buffer_nest_start(buffer);
1070 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1071 irq_flags, pc);
1072 if (!event)
1073 goto out;
1074
1075 entry = ring_buffer_event_data(event);
1076 entry->ip = ip;
1077 entry->str = str;
1078
1079 __buffer_unlock_commit(buffer, event);
1080 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1081
1082 ret = 1;
1083 out:
1084 ring_buffer_nest_end(buffer);
1085 return ret;
1086 }
1087 EXPORT_SYMBOL_GPL(__trace_bputs);
1088
1089 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1090 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1091 void *cond_data)
1092 {
1093 struct tracer *tracer = tr->current_trace;
1094 unsigned long flags;
1095
1096 if (in_nmi()) {
1097 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1098 internal_trace_puts("*** snapshot is being ignored ***\n");
1099 return;
1100 }
1101
1102 if (!tr->allocated_snapshot) {
1103 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1104 internal_trace_puts("*** stopping trace here! ***\n");
1105 tracing_off();
1106 return;
1107 }
1108
1109 /* Note, snapshot can not be used when the tracer uses it */
1110 if (tracer->use_max_tr) {
1111 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1112 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1113 return;
1114 }
1115
1116 local_irq_save(flags);
1117 update_max_tr(tr, current, smp_processor_id(), cond_data);
1118 local_irq_restore(flags);
1119 }
1120
tracing_snapshot_instance(struct trace_array * tr)1121 void tracing_snapshot_instance(struct trace_array *tr)
1122 {
1123 tracing_snapshot_instance_cond(tr, NULL);
1124 }
1125
1126 /**
1127 * tracing_snapshot - take a snapshot of the current buffer.
1128 *
1129 * This causes a swap between the snapshot buffer and the current live
1130 * tracing buffer. You can use this to take snapshots of the live
1131 * trace when some condition is triggered, but continue to trace.
1132 *
1133 * Note, make sure to allocate the snapshot with either
1134 * a tracing_snapshot_alloc(), or by doing it manually
1135 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1136 *
1137 * If the snapshot buffer is not allocated, it will stop tracing.
1138 * Basically making a permanent snapshot.
1139 */
tracing_snapshot(void)1140 void tracing_snapshot(void)
1141 {
1142 struct trace_array *tr = &global_trace;
1143
1144 tracing_snapshot_instance(tr);
1145 }
1146 EXPORT_SYMBOL_GPL(tracing_snapshot);
1147
1148 /**
1149 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1150 * @tr: The tracing instance to snapshot
1151 * @cond_data: The data to be tested conditionally, and possibly saved
1152 *
1153 * This is the same as tracing_snapshot() except that the snapshot is
1154 * conditional - the snapshot will only happen if the
1155 * cond_snapshot.update() implementation receiving the cond_data
1156 * returns true, which means that the trace array's cond_snapshot
1157 * update() operation used the cond_data to determine whether the
1158 * snapshot should be taken, and if it was, presumably saved it along
1159 * with the snapshot.
1160 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1161 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1162 {
1163 tracing_snapshot_instance_cond(tr, cond_data);
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1166
1167 /**
1168 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1169 * @tr: The tracing instance
1170 *
1171 * When the user enables a conditional snapshot using
1172 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1173 * with the snapshot. This accessor is used to retrieve it.
1174 *
1175 * Should not be called from cond_snapshot.update(), since it takes
1176 * the tr->max_lock lock, which the code calling
1177 * cond_snapshot.update() has already done.
1178 *
1179 * Returns the cond_data associated with the trace array's snapshot.
1180 */
tracing_cond_snapshot_data(struct trace_array * tr)1181 void *tracing_cond_snapshot_data(struct trace_array *tr)
1182 {
1183 void *cond_data = NULL;
1184
1185 arch_spin_lock(&tr->max_lock);
1186
1187 if (tr->cond_snapshot)
1188 cond_data = tr->cond_snapshot->cond_data;
1189
1190 arch_spin_unlock(&tr->max_lock);
1191
1192 return cond_data;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1195
1196 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1197 struct array_buffer *size_buf, int cpu_id);
1198 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1199
tracing_alloc_snapshot_instance(struct trace_array * tr)1200 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1201 {
1202 int ret;
1203
1204 if (!tr->allocated_snapshot) {
1205
1206 /* allocate spare buffer */
1207 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1208 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1209 if (ret < 0)
1210 return ret;
1211
1212 tr->allocated_snapshot = true;
1213 }
1214
1215 return 0;
1216 }
1217
free_snapshot(struct trace_array * tr)1218 static void free_snapshot(struct trace_array *tr)
1219 {
1220 /*
1221 * We don't free the ring buffer. instead, resize it because
1222 * The max_tr ring buffer has some state (e.g. ring->clock) and
1223 * we want preserve it.
1224 */
1225 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1226 set_buffer_entries(&tr->max_buffer, 1);
1227 tracing_reset_online_cpus(&tr->max_buffer);
1228 tr->allocated_snapshot = false;
1229 }
1230
1231 /**
1232 * tracing_alloc_snapshot - allocate snapshot buffer.
1233 *
1234 * This only allocates the snapshot buffer if it isn't already
1235 * allocated - it doesn't also take a snapshot.
1236 *
1237 * This is meant to be used in cases where the snapshot buffer needs
1238 * to be set up for events that can't sleep but need to be able to
1239 * trigger a snapshot.
1240 */
tracing_alloc_snapshot(void)1241 int tracing_alloc_snapshot(void)
1242 {
1243 struct trace_array *tr = &global_trace;
1244 int ret;
1245
1246 ret = tracing_alloc_snapshot_instance(tr);
1247 WARN_ON(ret < 0);
1248
1249 return ret;
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1252
1253 /**
1254 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1255 *
1256 * This is similar to tracing_snapshot(), but it will allocate the
1257 * snapshot buffer if it isn't already allocated. Use this only
1258 * where it is safe to sleep, as the allocation may sleep.
1259 *
1260 * This causes a swap between the snapshot buffer and the current live
1261 * tracing buffer. You can use this to take snapshots of the live
1262 * trace when some condition is triggered, but continue to trace.
1263 */
tracing_snapshot_alloc(void)1264 void tracing_snapshot_alloc(void)
1265 {
1266 int ret;
1267
1268 ret = tracing_alloc_snapshot();
1269 if (ret < 0)
1270 return;
1271
1272 tracing_snapshot();
1273 }
1274 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1275
1276 /**
1277 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1278 * @tr: The tracing instance
1279 * @cond_data: User data to associate with the snapshot
1280 * @update: Implementation of the cond_snapshot update function
1281 *
1282 * Check whether the conditional snapshot for the given instance has
1283 * already been enabled, or if the current tracer is already using a
1284 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1285 * save the cond_data and update function inside.
1286 *
1287 * Returns 0 if successful, error otherwise.
1288 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1289 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1290 cond_update_fn_t update)
1291 {
1292 struct cond_snapshot *cond_snapshot;
1293 int ret = 0;
1294
1295 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1296 if (!cond_snapshot)
1297 return -ENOMEM;
1298
1299 cond_snapshot->cond_data = cond_data;
1300 cond_snapshot->update = update;
1301
1302 mutex_lock(&trace_types_lock);
1303
1304 ret = tracing_alloc_snapshot_instance(tr);
1305 if (ret)
1306 goto fail_unlock;
1307
1308 if (tr->current_trace->use_max_tr) {
1309 ret = -EBUSY;
1310 goto fail_unlock;
1311 }
1312
1313 /*
1314 * The cond_snapshot can only change to NULL without the
1315 * trace_types_lock. We don't care if we race with it going
1316 * to NULL, but we want to make sure that it's not set to
1317 * something other than NULL when we get here, which we can
1318 * do safely with only holding the trace_types_lock and not
1319 * having to take the max_lock.
1320 */
1321 if (tr->cond_snapshot) {
1322 ret = -EBUSY;
1323 goto fail_unlock;
1324 }
1325
1326 arch_spin_lock(&tr->max_lock);
1327 tr->cond_snapshot = cond_snapshot;
1328 arch_spin_unlock(&tr->max_lock);
1329
1330 mutex_unlock(&trace_types_lock);
1331
1332 return ret;
1333
1334 fail_unlock:
1335 mutex_unlock(&trace_types_lock);
1336 kfree(cond_snapshot);
1337 return ret;
1338 }
1339 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1340
1341 /**
1342 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1343 * @tr: The tracing instance
1344 *
1345 * Check whether the conditional snapshot for the given instance is
1346 * enabled; if so, free the cond_snapshot associated with it,
1347 * otherwise return -EINVAL.
1348 *
1349 * Returns 0 if successful, error otherwise.
1350 */
tracing_snapshot_cond_disable(struct trace_array * tr)1351 int tracing_snapshot_cond_disable(struct trace_array *tr)
1352 {
1353 int ret = 0;
1354
1355 arch_spin_lock(&tr->max_lock);
1356
1357 if (!tr->cond_snapshot)
1358 ret = -EINVAL;
1359 else {
1360 kfree(tr->cond_snapshot);
1361 tr->cond_snapshot = NULL;
1362 }
1363
1364 arch_spin_unlock(&tr->max_lock);
1365
1366 return ret;
1367 }
1368 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1369 #else
tracing_snapshot(void)1370 void tracing_snapshot(void)
1371 {
1372 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1373 }
1374 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1375 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1376 {
1377 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1380 int tracing_alloc_snapshot(void)
1381 {
1382 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1383 return -ENODEV;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1386 void tracing_snapshot_alloc(void)
1387 {
1388 /* Give warning */
1389 tracing_snapshot();
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1392 void *tracing_cond_snapshot_data(struct trace_array *tr)
1393 {
1394 return NULL;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1397 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1398 {
1399 return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1402 int tracing_snapshot_cond_disable(struct trace_array *tr)
1403 {
1404 return false;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1407 #endif /* CONFIG_TRACER_SNAPSHOT */
1408
tracer_tracing_off(struct trace_array * tr)1409 void tracer_tracing_off(struct trace_array *tr)
1410 {
1411 if (tr->array_buffer.buffer)
1412 ring_buffer_record_off(tr->array_buffer.buffer);
1413 /*
1414 * This flag is looked at when buffers haven't been allocated
1415 * yet, or by some tracers (like irqsoff), that just want to
1416 * know if the ring buffer has been disabled, but it can handle
1417 * races of where it gets disabled but we still do a record.
1418 * As the check is in the fast path of the tracers, it is more
1419 * important to be fast than accurate.
1420 */
1421 tr->buffer_disabled = 1;
1422 /* Make the flag seen by readers */
1423 smp_wmb();
1424 }
1425
1426 /**
1427 * tracing_off - turn off tracing buffers
1428 *
1429 * This function stops the tracing buffers from recording data.
1430 * It does not disable any overhead the tracers themselves may
1431 * be causing. This function simply causes all recording to
1432 * the ring buffers to fail.
1433 */
tracing_off(void)1434 void tracing_off(void)
1435 {
1436 tracer_tracing_off(&global_trace);
1437 }
1438 EXPORT_SYMBOL_GPL(tracing_off);
1439
disable_trace_on_warning(void)1440 void disable_trace_on_warning(void)
1441 {
1442 if (__disable_trace_on_warning) {
1443 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1444 "Disabling tracing due to warning\n");
1445 tracing_off();
1446 }
1447 }
1448
1449 /**
1450 * tracer_tracing_is_on - show real state of ring buffer enabled
1451 * @tr : the trace array to know if ring buffer is enabled
1452 *
1453 * Shows real state of the ring buffer if it is enabled or not.
1454 */
tracer_tracing_is_on(struct trace_array * tr)1455 bool tracer_tracing_is_on(struct trace_array *tr)
1456 {
1457 if (tr->array_buffer.buffer)
1458 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1459 return !tr->buffer_disabled;
1460 }
1461
1462 /**
1463 * tracing_is_on - show state of ring buffers enabled
1464 */
tracing_is_on(void)1465 int tracing_is_on(void)
1466 {
1467 return tracer_tracing_is_on(&global_trace);
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_is_on);
1470
set_buf_size(char * str)1471 static int __init set_buf_size(char *str)
1472 {
1473 unsigned long buf_size;
1474
1475 if (!str)
1476 return 0;
1477 buf_size = memparse(str, &str);
1478 /* nr_entries can not be zero */
1479 if (buf_size == 0)
1480 return 0;
1481 trace_buf_size = buf_size;
1482 return 1;
1483 }
1484 __setup("trace_buf_size=", set_buf_size);
1485
set_tracing_thresh(char * str)1486 static int __init set_tracing_thresh(char *str)
1487 {
1488 unsigned long threshold;
1489 int ret;
1490
1491 if (!str)
1492 return 0;
1493 ret = kstrtoul(str, 0, &threshold);
1494 if (ret < 0)
1495 return 0;
1496 tracing_thresh = threshold * 1000;
1497 return 1;
1498 }
1499 __setup("tracing_thresh=", set_tracing_thresh);
1500
nsecs_to_usecs(unsigned long nsecs)1501 unsigned long nsecs_to_usecs(unsigned long nsecs)
1502 {
1503 return nsecs / 1000;
1504 }
1505
1506 /*
1507 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1508 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1509 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1510 * of strings in the order that the evals (enum) were defined.
1511 */
1512 #undef C
1513 #define C(a, b) b
1514
1515 /* These must match the bit postions in trace_iterator_flags */
1516 static const char *trace_options[] = {
1517 TRACE_FLAGS
1518 NULL
1519 };
1520
1521 static struct {
1522 u64 (*func)(void);
1523 const char *name;
1524 int in_ns; /* is this clock in nanoseconds? */
1525 } trace_clocks[] = {
1526 { trace_clock_local, "local", 1 },
1527 { trace_clock_global, "global", 1 },
1528 { trace_clock_counter, "counter", 0 },
1529 { trace_clock_jiffies, "uptime", 0 },
1530 { trace_clock, "perf", 1 },
1531 { ktime_get_mono_fast_ns, "mono", 1 },
1532 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1533 { ktime_get_boot_fast_ns, "boot", 1 },
1534 ARCH_TRACE_CLOCKS
1535 };
1536
trace_clock_in_ns(struct trace_array * tr)1537 bool trace_clock_in_ns(struct trace_array *tr)
1538 {
1539 if (trace_clocks[tr->clock_id].in_ns)
1540 return true;
1541
1542 return false;
1543 }
1544
1545 /*
1546 * trace_parser_get_init - gets the buffer for trace parser
1547 */
trace_parser_get_init(struct trace_parser * parser,int size)1548 int trace_parser_get_init(struct trace_parser *parser, int size)
1549 {
1550 memset(parser, 0, sizeof(*parser));
1551
1552 parser->buffer = kmalloc(size, GFP_KERNEL);
1553 if (!parser->buffer)
1554 return 1;
1555
1556 parser->size = size;
1557 return 0;
1558 }
1559
1560 /*
1561 * trace_parser_put - frees the buffer for trace parser
1562 */
trace_parser_put(struct trace_parser * parser)1563 void trace_parser_put(struct trace_parser *parser)
1564 {
1565 kfree(parser->buffer);
1566 parser->buffer = NULL;
1567 }
1568
1569 /*
1570 * trace_get_user - reads the user input string separated by space
1571 * (matched by isspace(ch))
1572 *
1573 * For each string found the 'struct trace_parser' is updated,
1574 * and the function returns.
1575 *
1576 * Returns number of bytes read.
1577 *
1578 * See kernel/trace/trace.h for 'struct trace_parser' details.
1579 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1580 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1581 size_t cnt, loff_t *ppos)
1582 {
1583 char ch;
1584 size_t read = 0;
1585 ssize_t ret;
1586
1587 if (!*ppos)
1588 trace_parser_clear(parser);
1589
1590 ret = get_user(ch, ubuf++);
1591 if (ret)
1592 goto out;
1593
1594 read++;
1595 cnt--;
1596
1597 /*
1598 * The parser is not finished with the last write,
1599 * continue reading the user input without skipping spaces.
1600 */
1601 if (!parser->cont) {
1602 /* skip white space */
1603 while (cnt && isspace(ch)) {
1604 ret = get_user(ch, ubuf++);
1605 if (ret)
1606 goto out;
1607 read++;
1608 cnt--;
1609 }
1610
1611 parser->idx = 0;
1612
1613 /* only spaces were written */
1614 if (isspace(ch) || !ch) {
1615 *ppos += read;
1616 ret = read;
1617 goto out;
1618 }
1619 }
1620
1621 /* read the non-space input */
1622 while (cnt && !isspace(ch) && ch) {
1623 if (parser->idx < parser->size - 1)
1624 parser->buffer[parser->idx++] = ch;
1625 else {
1626 ret = -EINVAL;
1627 goto out;
1628 }
1629 ret = get_user(ch, ubuf++);
1630 if (ret)
1631 goto out;
1632 read++;
1633 cnt--;
1634 }
1635
1636 /* We either got finished input or we have to wait for another call. */
1637 if (isspace(ch) || !ch) {
1638 parser->buffer[parser->idx] = 0;
1639 parser->cont = false;
1640 } else if (parser->idx < parser->size - 1) {
1641 parser->cont = true;
1642 parser->buffer[parser->idx++] = ch;
1643 /* Make sure the parsed string always terminates with '\0'. */
1644 parser->buffer[parser->idx] = 0;
1645 } else {
1646 ret = -EINVAL;
1647 goto out;
1648 }
1649
1650 *ppos += read;
1651 ret = read;
1652
1653 out:
1654 return ret;
1655 }
1656
1657 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1658 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1659 {
1660 int len;
1661
1662 if (trace_seq_used(s) <= s->seq.readpos)
1663 return -EBUSY;
1664
1665 len = trace_seq_used(s) - s->seq.readpos;
1666 if (cnt > len)
1667 cnt = len;
1668 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1669
1670 s->seq.readpos += cnt;
1671 return cnt;
1672 }
1673
1674 unsigned long __read_mostly tracing_thresh;
1675 static const struct file_operations tracing_max_lat_fops;
1676
1677 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1678 defined(CONFIG_FSNOTIFY)
1679
1680 static struct workqueue_struct *fsnotify_wq;
1681
latency_fsnotify_workfn(struct work_struct * work)1682 static void latency_fsnotify_workfn(struct work_struct *work)
1683 {
1684 struct trace_array *tr = container_of(work, struct trace_array,
1685 fsnotify_work);
1686 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1687 }
1688
latency_fsnotify_workfn_irq(struct irq_work * iwork)1689 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1690 {
1691 struct trace_array *tr = container_of(iwork, struct trace_array,
1692 fsnotify_irqwork);
1693 queue_work(fsnotify_wq, &tr->fsnotify_work);
1694 }
1695
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1696 static void trace_create_maxlat_file(struct trace_array *tr,
1697 struct dentry *d_tracer)
1698 {
1699 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1700 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1701 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1702 d_tracer, &tr->max_latency,
1703 &tracing_max_lat_fops);
1704 }
1705
latency_fsnotify_init(void)1706 __init static int latency_fsnotify_init(void)
1707 {
1708 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1709 WQ_UNBOUND | WQ_HIGHPRI, 0);
1710 if (!fsnotify_wq) {
1711 pr_err("Unable to allocate tr_max_lat_wq\n");
1712 return -ENOMEM;
1713 }
1714 return 0;
1715 }
1716
1717 late_initcall_sync(latency_fsnotify_init);
1718
latency_fsnotify(struct trace_array * tr)1719 void latency_fsnotify(struct trace_array *tr)
1720 {
1721 if (!fsnotify_wq)
1722 return;
1723 /*
1724 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1725 * possible that we are called from __schedule() or do_idle(), which
1726 * could cause a deadlock.
1727 */
1728 irq_work_queue(&tr->fsnotify_irqwork);
1729 }
1730
1731 /*
1732 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1733 * defined(CONFIG_FSNOTIFY)
1734 */
1735 #else
1736
1737 #define trace_create_maxlat_file(tr, d_tracer) \
1738 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1739 &tr->max_latency, &tracing_max_lat_fops)
1740
1741 #endif
1742
1743 #ifdef CONFIG_TRACER_MAX_TRACE
1744 /*
1745 * Copy the new maximum trace into the separate maximum-trace
1746 * structure. (this way the maximum trace is permanently saved,
1747 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1748 */
1749 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1750 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1751 {
1752 struct array_buffer *trace_buf = &tr->array_buffer;
1753 struct array_buffer *max_buf = &tr->max_buffer;
1754 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1755 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1756
1757 max_buf->cpu = cpu;
1758 max_buf->time_start = data->preempt_timestamp;
1759
1760 max_data->saved_latency = tr->max_latency;
1761 max_data->critical_start = data->critical_start;
1762 max_data->critical_end = data->critical_end;
1763
1764 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1765 max_data->pid = tsk->pid;
1766 /*
1767 * If tsk == current, then use current_uid(), as that does not use
1768 * RCU. The irq tracer can be called out of RCU scope.
1769 */
1770 if (tsk == current)
1771 max_data->uid = current_uid();
1772 else
1773 max_data->uid = task_uid(tsk);
1774
1775 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1776 max_data->policy = tsk->policy;
1777 max_data->rt_priority = tsk->rt_priority;
1778
1779 /* record this tasks comm */
1780 tracing_record_cmdline(tsk);
1781 latency_fsnotify(tr);
1782 }
1783
1784 /**
1785 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1786 * @tr: tracer
1787 * @tsk: the task with the latency
1788 * @cpu: The cpu that initiated the trace.
1789 * @cond_data: User data associated with a conditional snapshot
1790 *
1791 * Flip the buffers between the @tr and the max_tr and record information
1792 * about which task was the cause of this latency.
1793 */
1794 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1795 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1796 void *cond_data)
1797 {
1798 if (tr->stop_count)
1799 return;
1800
1801 WARN_ON_ONCE(!irqs_disabled());
1802
1803 if (!tr->allocated_snapshot) {
1804 /* Only the nop tracer should hit this when disabling */
1805 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1806 return;
1807 }
1808
1809 arch_spin_lock(&tr->max_lock);
1810
1811 /* Inherit the recordable setting from array_buffer */
1812 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1813 ring_buffer_record_on(tr->max_buffer.buffer);
1814 else
1815 ring_buffer_record_off(tr->max_buffer.buffer);
1816
1817 #ifdef CONFIG_TRACER_SNAPSHOT
1818 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1819 goto out_unlock;
1820 #endif
1821 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1822
1823 __update_max_tr(tr, tsk, cpu);
1824
1825 out_unlock:
1826 arch_spin_unlock(&tr->max_lock);
1827 }
1828
1829 /**
1830 * update_max_tr_single - only copy one trace over, and reset the rest
1831 * @tr: tracer
1832 * @tsk: task with the latency
1833 * @cpu: the cpu of the buffer to copy.
1834 *
1835 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1836 */
1837 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1838 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1839 {
1840 int ret;
1841
1842 if (tr->stop_count)
1843 return;
1844
1845 WARN_ON_ONCE(!irqs_disabled());
1846 if (!tr->allocated_snapshot) {
1847 /* Only the nop tracer should hit this when disabling */
1848 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1849 return;
1850 }
1851
1852 arch_spin_lock(&tr->max_lock);
1853
1854 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1855
1856 if (ret == -EBUSY) {
1857 /*
1858 * We failed to swap the buffer due to a commit taking
1859 * place on this CPU. We fail to record, but we reset
1860 * the max trace buffer (no one writes directly to it)
1861 * and flag that it failed.
1862 */
1863 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1864 "Failed to swap buffers due to commit in progress\n");
1865 }
1866
1867 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1868
1869 __update_max_tr(tr, tsk, cpu);
1870 arch_spin_unlock(&tr->max_lock);
1871 }
1872 #endif /* CONFIG_TRACER_MAX_TRACE */
1873
wait_on_pipe(struct trace_iterator * iter,int full)1874 static int wait_on_pipe(struct trace_iterator *iter, int full)
1875 {
1876 /* Iterators are static, they should be filled or empty */
1877 if (trace_buffer_iter(iter, iter->cpu_file))
1878 return 0;
1879
1880 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1881 full);
1882 }
1883
1884 #ifdef CONFIG_FTRACE_STARTUP_TEST
1885 static bool selftests_can_run;
1886
1887 struct trace_selftests {
1888 struct list_head list;
1889 struct tracer *type;
1890 };
1891
1892 static LIST_HEAD(postponed_selftests);
1893
save_selftest(struct tracer * type)1894 static int save_selftest(struct tracer *type)
1895 {
1896 struct trace_selftests *selftest;
1897
1898 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1899 if (!selftest)
1900 return -ENOMEM;
1901
1902 selftest->type = type;
1903 list_add(&selftest->list, &postponed_selftests);
1904 return 0;
1905 }
1906
run_tracer_selftest(struct tracer * type)1907 static int run_tracer_selftest(struct tracer *type)
1908 {
1909 struct trace_array *tr = &global_trace;
1910 struct tracer *saved_tracer = tr->current_trace;
1911 int ret;
1912
1913 if (!type->selftest || tracing_selftest_disabled)
1914 return 0;
1915
1916 /*
1917 * If a tracer registers early in boot up (before scheduling is
1918 * initialized and such), then do not run its selftests yet.
1919 * Instead, run it a little later in the boot process.
1920 */
1921 if (!selftests_can_run)
1922 return save_selftest(type);
1923
1924 /*
1925 * Run a selftest on this tracer.
1926 * Here we reset the trace buffer, and set the current
1927 * tracer to be this tracer. The tracer can then run some
1928 * internal tracing to verify that everything is in order.
1929 * If we fail, we do not register this tracer.
1930 */
1931 tracing_reset_online_cpus(&tr->array_buffer);
1932
1933 tr->current_trace = type;
1934
1935 #ifdef CONFIG_TRACER_MAX_TRACE
1936 if (type->use_max_tr) {
1937 /* If we expanded the buffers, make sure the max is expanded too */
1938 if (ring_buffer_expanded)
1939 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1940 RING_BUFFER_ALL_CPUS);
1941 tr->allocated_snapshot = true;
1942 }
1943 #endif
1944
1945 /* the test is responsible for initializing and enabling */
1946 pr_info("Testing tracer %s: ", type->name);
1947 ret = type->selftest(type, tr);
1948 /* the test is responsible for resetting too */
1949 tr->current_trace = saved_tracer;
1950 if (ret) {
1951 printk(KERN_CONT "FAILED!\n");
1952 /* Add the warning after printing 'FAILED' */
1953 WARN_ON(1);
1954 return -1;
1955 }
1956 /* Only reset on passing, to avoid touching corrupted buffers */
1957 tracing_reset_online_cpus(&tr->array_buffer);
1958
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 if (type->use_max_tr) {
1961 tr->allocated_snapshot = false;
1962
1963 /* Shrink the max buffer again */
1964 if (ring_buffer_expanded)
1965 ring_buffer_resize(tr->max_buffer.buffer, 1,
1966 RING_BUFFER_ALL_CPUS);
1967 }
1968 #endif
1969
1970 printk(KERN_CONT "PASSED\n");
1971 return 0;
1972 }
1973
init_trace_selftests(void)1974 static __init int init_trace_selftests(void)
1975 {
1976 struct trace_selftests *p, *n;
1977 struct tracer *t, **last;
1978 int ret;
1979
1980 selftests_can_run = true;
1981
1982 mutex_lock(&trace_types_lock);
1983
1984 if (list_empty(&postponed_selftests))
1985 goto out;
1986
1987 pr_info("Running postponed tracer tests:\n");
1988
1989 tracing_selftest_running = true;
1990 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1991 /* This loop can take minutes when sanitizers are enabled, so
1992 * lets make sure we allow RCU processing.
1993 */
1994 cond_resched();
1995 ret = run_tracer_selftest(p->type);
1996 /* If the test fails, then warn and remove from available_tracers */
1997 if (ret < 0) {
1998 WARN(1, "tracer: %s failed selftest, disabling\n",
1999 p->type->name);
2000 last = &trace_types;
2001 for (t = trace_types; t; t = t->next) {
2002 if (t == p->type) {
2003 *last = t->next;
2004 break;
2005 }
2006 last = &t->next;
2007 }
2008 }
2009 list_del(&p->list);
2010 kfree(p);
2011 }
2012 tracing_selftest_running = false;
2013
2014 out:
2015 mutex_unlock(&trace_types_lock);
2016
2017 return 0;
2018 }
2019 core_initcall(init_trace_selftests);
2020 #else
run_tracer_selftest(struct tracer * type)2021 static inline int run_tracer_selftest(struct tracer *type)
2022 {
2023 return 0;
2024 }
2025 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2026
2027 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2028
2029 static void __init apply_trace_boot_options(void);
2030
2031 /**
2032 * register_tracer - register a tracer with the ftrace system.
2033 * @type: the plugin for the tracer
2034 *
2035 * Register a new plugin tracer.
2036 */
register_tracer(struct tracer * type)2037 int __init register_tracer(struct tracer *type)
2038 {
2039 struct tracer *t;
2040 int ret = 0;
2041
2042 if (!type->name) {
2043 pr_info("Tracer must have a name\n");
2044 return -1;
2045 }
2046
2047 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2048 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2049 return -1;
2050 }
2051
2052 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2053 pr_warn("Can not register tracer %s due to lockdown\n",
2054 type->name);
2055 return -EPERM;
2056 }
2057
2058 mutex_lock(&trace_types_lock);
2059
2060 tracing_selftest_running = true;
2061
2062 for (t = trace_types; t; t = t->next) {
2063 if (strcmp(type->name, t->name) == 0) {
2064 /* already found */
2065 pr_info("Tracer %s already registered\n",
2066 type->name);
2067 ret = -1;
2068 goto out;
2069 }
2070 }
2071
2072 if (!type->set_flag)
2073 type->set_flag = &dummy_set_flag;
2074 if (!type->flags) {
2075 /*allocate a dummy tracer_flags*/
2076 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2077 if (!type->flags) {
2078 ret = -ENOMEM;
2079 goto out;
2080 }
2081 type->flags->val = 0;
2082 type->flags->opts = dummy_tracer_opt;
2083 } else
2084 if (!type->flags->opts)
2085 type->flags->opts = dummy_tracer_opt;
2086
2087 /* store the tracer for __set_tracer_option */
2088 type->flags->trace = type;
2089
2090 ret = run_tracer_selftest(type);
2091 if (ret < 0)
2092 goto out;
2093
2094 type->next = trace_types;
2095 trace_types = type;
2096 add_tracer_options(&global_trace, type);
2097
2098 out:
2099 tracing_selftest_running = false;
2100 mutex_unlock(&trace_types_lock);
2101
2102 if (ret || !default_bootup_tracer)
2103 goto out_unlock;
2104
2105 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2106 goto out_unlock;
2107
2108 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2109 /* Do we want this tracer to start on bootup? */
2110 tracing_set_tracer(&global_trace, type->name);
2111 default_bootup_tracer = NULL;
2112
2113 apply_trace_boot_options();
2114
2115 /* disable other selftests, since this will break it. */
2116 tracing_selftest_disabled = true;
2117 #ifdef CONFIG_FTRACE_STARTUP_TEST
2118 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
2119 type->name);
2120 #endif
2121
2122 out_unlock:
2123 return ret;
2124 }
2125
tracing_reset_cpu(struct array_buffer * buf,int cpu)2126 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2127 {
2128 struct trace_buffer *buffer = buf->buffer;
2129
2130 if (!buffer)
2131 return;
2132
2133 ring_buffer_record_disable(buffer);
2134
2135 /* Make sure all commits have finished */
2136 synchronize_rcu();
2137 ring_buffer_reset_cpu(buffer, cpu);
2138
2139 ring_buffer_record_enable(buffer);
2140 }
2141
tracing_reset_online_cpus(struct array_buffer * buf)2142 void tracing_reset_online_cpus(struct array_buffer *buf)
2143 {
2144 struct trace_buffer *buffer = buf->buffer;
2145
2146 if (!buffer)
2147 return;
2148
2149 ring_buffer_record_disable(buffer);
2150
2151 /* Make sure all commits have finished */
2152 synchronize_rcu();
2153
2154 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2155
2156 ring_buffer_reset_online_cpus(buffer);
2157
2158 ring_buffer_record_enable(buffer);
2159 }
2160
2161 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)2162 void tracing_reset_all_online_cpus(void)
2163 {
2164 struct trace_array *tr;
2165
2166 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2167 if (!tr->clear_trace)
2168 continue;
2169 tr->clear_trace = false;
2170 tracing_reset_online_cpus(&tr->array_buffer);
2171 #ifdef CONFIG_TRACER_MAX_TRACE
2172 tracing_reset_online_cpus(&tr->max_buffer);
2173 #endif
2174 }
2175 }
2176
2177 static int *tgid_map;
2178
2179 #define SAVED_CMDLINES_DEFAULT 128
2180 #define NO_CMDLINE_MAP UINT_MAX
2181 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2182 struct saved_cmdlines_buffer {
2183 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2184 unsigned *map_cmdline_to_pid;
2185 unsigned cmdline_num;
2186 int cmdline_idx;
2187 char *saved_cmdlines;
2188 };
2189 static struct saved_cmdlines_buffer *savedcmd;
2190
2191 /* temporary disable recording */
2192 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2193
get_saved_cmdlines(int idx)2194 static inline char *get_saved_cmdlines(int idx)
2195 {
2196 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2197 }
2198
set_cmdline(int idx,const char * cmdline)2199 static inline void set_cmdline(int idx, const char *cmdline)
2200 {
2201 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2202 }
2203
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2204 static int allocate_cmdlines_buffer(unsigned int val,
2205 struct saved_cmdlines_buffer *s)
2206 {
2207 s->map_cmdline_to_pid = kmalloc_array(val,
2208 sizeof(*s->map_cmdline_to_pid),
2209 GFP_KERNEL);
2210 if (!s->map_cmdline_to_pid)
2211 return -ENOMEM;
2212
2213 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2214 if (!s->saved_cmdlines) {
2215 kfree(s->map_cmdline_to_pid);
2216 return -ENOMEM;
2217 }
2218
2219 s->cmdline_idx = 0;
2220 s->cmdline_num = val;
2221 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2222 sizeof(s->map_pid_to_cmdline));
2223 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2224 val * sizeof(*s->map_cmdline_to_pid));
2225
2226 return 0;
2227 }
2228
trace_create_savedcmd(void)2229 static int trace_create_savedcmd(void)
2230 {
2231 int ret;
2232
2233 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2234 if (!savedcmd)
2235 return -ENOMEM;
2236
2237 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2238 if (ret < 0) {
2239 kfree(savedcmd);
2240 savedcmd = NULL;
2241 return -ENOMEM;
2242 }
2243
2244 return 0;
2245 }
2246
is_tracing_stopped(void)2247 int is_tracing_stopped(void)
2248 {
2249 return global_trace.stop_count;
2250 }
2251
2252 /**
2253 * tracing_start - quick start of the tracer
2254 *
2255 * If tracing is enabled but was stopped by tracing_stop,
2256 * this will start the tracer back up.
2257 */
tracing_start(void)2258 void tracing_start(void)
2259 {
2260 struct trace_buffer *buffer;
2261 unsigned long flags;
2262
2263 if (tracing_disabled)
2264 return;
2265
2266 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2267 if (--global_trace.stop_count) {
2268 if (global_trace.stop_count < 0) {
2269 /* Someone screwed up their debugging */
2270 WARN_ON_ONCE(1);
2271 global_trace.stop_count = 0;
2272 }
2273 goto out;
2274 }
2275
2276 /* Prevent the buffers from switching */
2277 arch_spin_lock(&global_trace.max_lock);
2278
2279 buffer = global_trace.array_buffer.buffer;
2280 if (buffer)
2281 ring_buffer_record_enable(buffer);
2282
2283 #ifdef CONFIG_TRACER_MAX_TRACE
2284 buffer = global_trace.max_buffer.buffer;
2285 if (buffer)
2286 ring_buffer_record_enable(buffer);
2287 #endif
2288
2289 arch_spin_unlock(&global_trace.max_lock);
2290
2291 out:
2292 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2293 }
2294
tracing_start_tr(struct trace_array * tr)2295 static void tracing_start_tr(struct trace_array *tr)
2296 {
2297 struct trace_buffer *buffer;
2298 unsigned long flags;
2299
2300 if (tracing_disabled)
2301 return;
2302
2303 /* If global, we need to also start the max tracer */
2304 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2305 return tracing_start();
2306
2307 raw_spin_lock_irqsave(&tr->start_lock, flags);
2308
2309 if (--tr->stop_count) {
2310 if (tr->stop_count < 0) {
2311 /* Someone screwed up their debugging */
2312 WARN_ON_ONCE(1);
2313 tr->stop_count = 0;
2314 }
2315 goto out;
2316 }
2317
2318 buffer = tr->array_buffer.buffer;
2319 if (buffer)
2320 ring_buffer_record_enable(buffer);
2321
2322 out:
2323 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2324 }
2325
2326 /**
2327 * tracing_stop - quick stop of the tracer
2328 *
2329 * Light weight way to stop tracing. Use in conjunction with
2330 * tracing_start.
2331 */
tracing_stop(void)2332 void tracing_stop(void)
2333 {
2334 struct trace_buffer *buffer;
2335 unsigned long flags;
2336
2337 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2338 if (global_trace.stop_count++)
2339 goto out;
2340
2341 /* Prevent the buffers from switching */
2342 arch_spin_lock(&global_trace.max_lock);
2343
2344 buffer = global_trace.array_buffer.buffer;
2345 if (buffer)
2346 ring_buffer_record_disable(buffer);
2347
2348 #ifdef CONFIG_TRACER_MAX_TRACE
2349 buffer = global_trace.max_buffer.buffer;
2350 if (buffer)
2351 ring_buffer_record_disable(buffer);
2352 #endif
2353
2354 arch_spin_unlock(&global_trace.max_lock);
2355
2356 out:
2357 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2358 }
2359
tracing_stop_tr(struct trace_array * tr)2360 static void tracing_stop_tr(struct trace_array *tr)
2361 {
2362 struct trace_buffer *buffer;
2363 unsigned long flags;
2364
2365 /* If global, we need to also stop the max tracer */
2366 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2367 return tracing_stop();
2368
2369 raw_spin_lock_irqsave(&tr->start_lock, flags);
2370 if (tr->stop_count++)
2371 goto out;
2372
2373 buffer = tr->array_buffer.buffer;
2374 if (buffer)
2375 ring_buffer_record_disable(buffer);
2376
2377 out:
2378 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2379 }
2380
trace_save_cmdline(struct task_struct * tsk)2381 static int trace_save_cmdline(struct task_struct *tsk)
2382 {
2383 unsigned pid, idx;
2384
2385 /* treat recording of idle task as a success */
2386 if (!tsk->pid)
2387 return 1;
2388
2389 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2390 return 0;
2391
2392 /*
2393 * It's not the end of the world if we don't get
2394 * the lock, but we also don't want to spin
2395 * nor do we want to disable interrupts,
2396 * so if we miss here, then better luck next time.
2397 */
2398 if (!arch_spin_trylock(&trace_cmdline_lock))
2399 return 0;
2400
2401 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2402 if (idx == NO_CMDLINE_MAP) {
2403 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2404
2405 /*
2406 * Check whether the cmdline buffer at idx has a pid
2407 * mapped. We are going to overwrite that entry so we
2408 * need to clear the map_pid_to_cmdline. Otherwise we
2409 * would read the new comm for the old pid.
2410 */
2411 pid = savedcmd->map_cmdline_to_pid[idx];
2412 if (pid != NO_CMDLINE_MAP)
2413 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2414
2415 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2416 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2417
2418 savedcmd->cmdline_idx = idx;
2419 }
2420
2421 set_cmdline(idx, tsk->comm);
2422
2423 arch_spin_unlock(&trace_cmdline_lock);
2424
2425 return 1;
2426 }
2427
__trace_find_cmdline(int pid,char comm[])2428 static void __trace_find_cmdline(int pid, char comm[])
2429 {
2430 unsigned map;
2431
2432 if (!pid) {
2433 strcpy(comm, "<idle>");
2434 return;
2435 }
2436
2437 if (WARN_ON_ONCE(pid < 0)) {
2438 strcpy(comm, "<XXX>");
2439 return;
2440 }
2441
2442 if (pid > PID_MAX_DEFAULT) {
2443 strcpy(comm, "<...>");
2444 return;
2445 }
2446
2447 map = savedcmd->map_pid_to_cmdline[pid];
2448 if (map != NO_CMDLINE_MAP)
2449 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2450 else
2451 strcpy(comm, "<...>");
2452 }
2453
trace_find_cmdline(int pid,char comm[])2454 void trace_find_cmdline(int pid, char comm[])
2455 {
2456 preempt_disable();
2457 arch_spin_lock(&trace_cmdline_lock);
2458
2459 __trace_find_cmdline(pid, comm);
2460
2461 arch_spin_unlock(&trace_cmdline_lock);
2462 preempt_enable();
2463 }
2464
trace_find_tgid(int pid)2465 int trace_find_tgid(int pid)
2466 {
2467 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2468 return 0;
2469
2470 return tgid_map[pid];
2471 }
2472
trace_save_tgid(struct task_struct * tsk)2473 static int trace_save_tgid(struct task_struct *tsk)
2474 {
2475 /* treat recording of idle task as a success */
2476 if (!tsk->pid)
2477 return 1;
2478
2479 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2480 return 0;
2481
2482 tgid_map[tsk->pid] = tsk->tgid;
2483 return 1;
2484 }
2485
tracing_record_taskinfo_skip(int flags)2486 static bool tracing_record_taskinfo_skip(int flags)
2487 {
2488 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2489 return true;
2490 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2491 return true;
2492 if (!__this_cpu_read(trace_taskinfo_save))
2493 return true;
2494 return false;
2495 }
2496
2497 /**
2498 * tracing_record_taskinfo - record the task info of a task
2499 *
2500 * @task: task to record
2501 * @flags: TRACE_RECORD_CMDLINE for recording comm
2502 * TRACE_RECORD_TGID for recording tgid
2503 */
tracing_record_taskinfo(struct task_struct * task,int flags)2504 void tracing_record_taskinfo(struct task_struct *task, int flags)
2505 {
2506 bool done;
2507
2508 if (tracing_record_taskinfo_skip(flags))
2509 return;
2510
2511 /*
2512 * Record as much task information as possible. If some fail, continue
2513 * to try to record the others.
2514 */
2515 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2516 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2517
2518 /* If recording any information failed, retry again soon. */
2519 if (!done)
2520 return;
2521
2522 __this_cpu_write(trace_taskinfo_save, false);
2523 }
2524
2525 /**
2526 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2527 *
2528 * @prev: previous task during sched_switch
2529 * @next: next task during sched_switch
2530 * @flags: TRACE_RECORD_CMDLINE for recording comm
2531 * TRACE_RECORD_TGID for recording tgid
2532 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2533 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2534 struct task_struct *next, int flags)
2535 {
2536 bool done;
2537
2538 if (tracing_record_taskinfo_skip(flags))
2539 return;
2540
2541 /*
2542 * Record as much task information as possible. If some fail, continue
2543 * to try to record the others.
2544 */
2545 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2546 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2547 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2548 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2549
2550 /* If recording any information failed, retry again soon. */
2551 if (!done)
2552 return;
2553
2554 __this_cpu_write(trace_taskinfo_save, false);
2555 }
2556
2557 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2558 void tracing_record_cmdline(struct task_struct *task)
2559 {
2560 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2561 }
2562
tracing_record_tgid(struct task_struct * task)2563 void tracing_record_tgid(struct task_struct *task)
2564 {
2565 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2566 }
2567
2568 /*
2569 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2570 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2571 * simplifies those functions and keeps them in sync.
2572 */
trace_handle_return(struct trace_seq * s)2573 enum print_line_t trace_handle_return(struct trace_seq *s)
2574 {
2575 return trace_seq_has_overflowed(s) ?
2576 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2577 }
2578 EXPORT_SYMBOL_GPL(trace_handle_return);
2579
2580 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2581 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2582 unsigned long flags, int pc)
2583 {
2584 struct task_struct *tsk = current;
2585
2586 entry->preempt_count = pc & 0xff;
2587 entry->pid = (tsk) ? tsk->pid : 0;
2588 entry->type = type;
2589 entry->flags =
2590 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2591 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2592 #else
2593 TRACE_FLAG_IRQS_NOSUPPORT |
2594 #endif
2595 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2596 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2597 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2598 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2599 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2600 }
2601 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2602
2603 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2604 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2605 int type,
2606 unsigned long len,
2607 unsigned long flags, int pc)
2608 {
2609 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2610 }
2611
2612 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2613 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2614 static int trace_buffered_event_ref;
2615
2616 /**
2617 * trace_buffered_event_enable - enable buffering events
2618 *
2619 * When events are being filtered, it is quicker to use a temporary
2620 * buffer to write the event data into if there's a likely chance
2621 * that it will not be committed. The discard of the ring buffer
2622 * is not as fast as committing, and is much slower than copying
2623 * a commit.
2624 *
2625 * When an event is to be filtered, allocate per cpu buffers to
2626 * write the event data into, and if the event is filtered and discarded
2627 * it is simply dropped, otherwise, the entire data is to be committed
2628 * in one shot.
2629 */
trace_buffered_event_enable(void)2630 void trace_buffered_event_enable(void)
2631 {
2632 struct ring_buffer_event *event;
2633 struct page *page;
2634 int cpu;
2635
2636 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2637
2638 if (trace_buffered_event_ref++)
2639 return;
2640
2641 for_each_tracing_cpu(cpu) {
2642 page = alloc_pages_node(cpu_to_node(cpu),
2643 GFP_KERNEL | __GFP_NORETRY, 0);
2644 if (!page)
2645 goto failed;
2646
2647 event = page_address(page);
2648 memset(event, 0, sizeof(*event));
2649
2650 per_cpu(trace_buffered_event, cpu) = event;
2651
2652 preempt_disable();
2653 if (cpu == smp_processor_id() &&
2654 __this_cpu_read(trace_buffered_event) !=
2655 per_cpu(trace_buffered_event, cpu))
2656 WARN_ON_ONCE(1);
2657 preempt_enable();
2658 }
2659
2660 return;
2661 failed:
2662 trace_buffered_event_disable();
2663 }
2664
enable_trace_buffered_event(void * data)2665 static void enable_trace_buffered_event(void *data)
2666 {
2667 /* Probably not needed, but do it anyway */
2668 smp_rmb();
2669 this_cpu_dec(trace_buffered_event_cnt);
2670 }
2671
disable_trace_buffered_event(void * data)2672 static void disable_trace_buffered_event(void *data)
2673 {
2674 this_cpu_inc(trace_buffered_event_cnt);
2675 }
2676
2677 /**
2678 * trace_buffered_event_disable - disable buffering events
2679 *
2680 * When a filter is removed, it is faster to not use the buffered
2681 * events, and to commit directly into the ring buffer. Free up
2682 * the temp buffers when there are no more users. This requires
2683 * special synchronization with current events.
2684 */
trace_buffered_event_disable(void)2685 void trace_buffered_event_disable(void)
2686 {
2687 int cpu;
2688
2689 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2690
2691 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2692 return;
2693
2694 if (--trace_buffered_event_ref)
2695 return;
2696
2697 preempt_disable();
2698 /* For each CPU, set the buffer as used. */
2699 smp_call_function_many(tracing_buffer_mask,
2700 disable_trace_buffered_event, NULL, 1);
2701 preempt_enable();
2702
2703 /* Wait for all current users to finish */
2704 synchronize_rcu();
2705
2706 for_each_tracing_cpu(cpu) {
2707 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2708 per_cpu(trace_buffered_event, cpu) = NULL;
2709 }
2710 /*
2711 * Make sure trace_buffered_event is NULL before clearing
2712 * trace_buffered_event_cnt.
2713 */
2714 smp_wmb();
2715
2716 preempt_disable();
2717 /* Do the work on each cpu */
2718 smp_call_function_many(tracing_buffer_mask,
2719 enable_trace_buffered_event, NULL, 1);
2720 preempt_enable();
2721 }
2722
2723 static struct trace_buffer *temp_buffer;
2724
2725 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2726 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2727 struct trace_event_file *trace_file,
2728 int type, unsigned long len,
2729 unsigned long flags, int pc)
2730 {
2731 struct ring_buffer_event *entry;
2732 int val;
2733
2734 *current_rb = trace_file->tr->array_buffer.buffer;
2735
2736 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2737 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2738 (entry = this_cpu_read(trace_buffered_event))) {
2739 /* Try to use the per cpu buffer first */
2740 val = this_cpu_inc_return(trace_buffered_event_cnt);
2741 if (val == 1) {
2742 trace_event_setup(entry, type, flags, pc);
2743 entry->array[0] = len;
2744 return entry;
2745 }
2746 this_cpu_dec(trace_buffered_event_cnt);
2747 }
2748
2749 entry = __trace_buffer_lock_reserve(*current_rb,
2750 type, len, flags, pc);
2751 /*
2752 * If tracing is off, but we have triggers enabled
2753 * we still need to look at the event data. Use the temp_buffer
2754 * to store the trace event for the trigger to use. It's recursive
2755 * safe and will not be recorded anywhere.
2756 */
2757 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2758 *current_rb = temp_buffer;
2759 entry = __trace_buffer_lock_reserve(*current_rb,
2760 type, len, flags, pc);
2761 }
2762 return entry;
2763 }
2764 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2765
2766 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2767 static DEFINE_MUTEX(tracepoint_printk_mutex);
2768
output_printk(struct trace_event_buffer * fbuffer)2769 static void output_printk(struct trace_event_buffer *fbuffer)
2770 {
2771 struct trace_event_call *event_call;
2772 struct trace_event_file *file;
2773 struct trace_event *event;
2774 unsigned long flags;
2775 struct trace_iterator *iter = tracepoint_print_iter;
2776
2777 /* We should never get here if iter is NULL */
2778 if (WARN_ON_ONCE(!iter))
2779 return;
2780
2781 event_call = fbuffer->trace_file->event_call;
2782 if (!event_call || !event_call->event.funcs ||
2783 !event_call->event.funcs->trace)
2784 return;
2785
2786 file = fbuffer->trace_file;
2787 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2788 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2789 !filter_match_preds(file->filter, fbuffer->entry)))
2790 return;
2791
2792 event = &fbuffer->trace_file->event_call->event;
2793
2794 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2795 trace_seq_init(&iter->seq);
2796 iter->ent = fbuffer->entry;
2797 event_call->event.funcs->trace(iter, 0, event);
2798 trace_seq_putc(&iter->seq, 0);
2799 printk("%s", iter->seq.buffer);
2800
2801 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2802 }
2803
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2804 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2805 void *buffer, size_t *lenp,
2806 loff_t *ppos)
2807 {
2808 int save_tracepoint_printk;
2809 int ret;
2810
2811 mutex_lock(&tracepoint_printk_mutex);
2812 save_tracepoint_printk = tracepoint_printk;
2813
2814 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2815
2816 /*
2817 * This will force exiting early, as tracepoint_printk
2818 * is always zero when tracepoint_printk_iter is not allocated
2819 */
2820 if (!tracepoint_print_iter)
2821 tracepoint_printk = 0;
2822
2823 if (save_tracepoint_printk == tracepoint_printk)
2824 goto out;
2825
2826 if (tracepoint_printk)
2827 static_key_enable(&tracepoint_printk_key.key);
2828 else
2829 static_key_disable(&tracepoint_printk_key.key);
2830
2831 out:
2832 mutex_unlock(&tracepoint_printk_mutex);
2833
2834 return ret;
2835 }
2836
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2837 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2838 {
2839 if (static_key_false(&tracepoint_printk_key.key))
2840 output_printk(fbuffer);
2841
2842 if (static_branch_unlikely(&trace_event_exports_enabled))
2843 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2844 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2845 fbuffer->event, fbuffer->entry,
2846 fbuffer->flags, fbuffer->pc, fbuffer->regs);
2847 }
2848 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2849
2850 /*
2851 * Skip 3:
2852 *
2853 * trace_buffer_unlock_commit_regs()
2854 * trace_event_buffer_commit()
2855 * trace_event_raw_event_xxx()
2856 */
2857 # define STACK_SKIP 3
2858
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2859 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2860 struct trace_buffer *buffer,
2861 struct ring_buffer_event *event,
2862 unsigned long flags, int pc,
2863 struct pt_regs *regs)
2864 {
2865 __buffer_unlock_commit(buffer, event);
2866
2867 /*
2868 * If regs is not set, then skip the necessary functions.
2869 * Note, we can still get here via blktrace, wakeup tracer
2870 * and mmiotrace, but that's ok if they lose a function or
2871 * two. They are not that meaningful.
2872 */
2873 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2874 ftrace_trace_userstack(tr, buffer, flags, pc);
2875 }
2876
2877 /*
2878 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2879 */
2880 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2881 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2882 struct ring_buffer_event *event)
2883 {
2884 __buffer_unlock_commit(buffer, event);
2885 }
2886
2887 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2888 trace_function(struct trace_array *tr,
2889 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2890 int pc)
2891 {
2892 struct trace_event_call *call = &event_function;
2893 struct trace_buffer *buffer = tr->array_buffer.buffer;
2894 struct ring_buffer_event *event;
2895 struct ftrace_entry *entry;
2896
2897 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2898 flags, pc);
2899 if (!event)
2900 return;
2901 entry = ring_buffer_event_data(event);
2902 entry->ip = ip;
2903 entry->parent_ip = parent_ip;
2904
2905 if (!call_filter_check_discard(call, entry, buffer, event)) {
2906 if (static_branch_unlikely(&trace_function_exports_enabled))
2907 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2908 __buffer_unlock_commit(buffer, event);
2909 }
2910 }
2911
2912 #ifdef CONFIG_STACKTRACE
2913
2914 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2915 #define FTRACE_KSTACK_NESTING 4
2916
2917 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2918
2919 struct ftrace_stack {
2920 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2921 };
2922
2923
2924 struct ftrace_stacks {
2925 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2926 };
2927
2928 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2929 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2930
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2931 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2932 unsigned long flags,
2933 int skip, int pc, struct pt_regs *regs)
2934 {
2935 struct trace_event_call *call = &event_kernel_stack;
2936 struct ring_buffer_event *event;
2937 unsigned int size, nr_entries;
2938 struct ftrace_stack *fstack;
2939 struct stack_entry *entry;
2940 int stackidx;
2941
2942 /*
2943 * Add one, for this function and the call to save_stack_trace()
2944 * If regs is set, then these functions will not be in the way.
2945 */
2946 #ifndef CONFIG_UNWINDER_ORC
2947 if (!regs)
2948 skip++;
2949 #endif
2950
2951 preempt_disable_notrace();
2952
2953 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2954
2955 /* This should never happen. If it does, yell once and skip */
2956 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2957 goto out;
2958
2959 /*
2960 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2961 * interrupt will either see the value pre increment or post
2962 * increment. If the interrupt happens pre increment it will have
2963 * restored the counter when it returns. We just need a barrier to
2964 * keep gcc from moving things around.
2965 */
2966 barrier();
2967
2968 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2969 size = ARRAY_SIZE(fstack->calls);
2970
2971 if (regs) {
2972 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2973 size, skip);
2974 } else {
2975 nr_entries = stack_trace_save(fstack->calls, size, skip);
2976 }
2977
2978 size = nr_entries * sizeof(unsigned long);
2979 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2980 sizeof(*entry) + size, flags, pc);
2981 if (!event)
2982 goto out;
2983 entry = ring_buffer_event_data(event);
2984
2985 memcpy(&entry->caller, fstack->calls, size);
2986 entry->size = nr_entries;
2987
2988 if (!call_filter_check_discard(call, entry, buffer, event))
2989 __buffer_unlock_commit(buffer, event);
2990
2991 out:
2992 /* Again, don't let gcc optimize things here */
2993 barrier();
2994 __this_cpu_dec(ftrace_stack_reserve);
2995 preempt_enable_notrace();
2996
2997 }
2998
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2999 static inline void ftrace_trace_stack(struct trace_array *tr,
3000 struct trace_buffer *buffer,
3001 unsigned long flags,
3002 int skip, int pc, struct pt_regs *regs)
3003 {
3004 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3005 return;
3006
3007 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3008 }
3009
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3010 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3011 int pc)
3012 {
3013 struct trace_buffer *buffer = tr->array_buffer.buffer;
3014
3015 if (rcu_is_watching()) {
3016 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3017 return;
3018 }
3019
3020 /*
3021 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3022 * but if the above rcu_is_watching() failed, then the NMI
3023 * triggered someplace critical, and rcu_irq_enter() should
3024 * not be called from NMI.
3025 */
3026 if (unlikely(in_nmi()))
3027 return;
3028
3029 rcu_irq_enter_irqson();
3030 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3031 rcu_irq_exit_irqson();
3032 }
3033
3034 /**
3035 * trace_dump_stack - record a stack back trace in the trace buffer
3036 * @skip: Number of functions to skip (helper handlers)
3037 */
trace_dump_stack(int skip)3038 void trace_dump_stack(int skip)
3039 {
3040 unsigned long flags;
3041
3042 if (tracing_disabled || tracing_selftest_running)
3043 return;
3044
3045 local_save_flags(flags);
3046
3047 #ifndef CONFIG_UNWINDER_ORC
3048 /* Skip 1 to skip this function. */
3049 skip++;
3050 #endif
3051 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3052 flags, skip, preempt_count(), NULL);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_dump_stack);
3055
3056 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3057 static DEFINE_PER_CPU(int, user_stack_count);
3058
3059 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3060 ftrace_trace_userstack(struct trace_array *tr,
3061 struct trace_buffer *buffer, unsigned long flags, int pc)
3062 {
3063 struct trace_event_call *call = &event_user_stack;
3064 struct ring_buffer_event *event;
3065 struct userstack_entry *entry;
3066
3067 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3068 return;
3069
3070 /*
3071 * NMIs can not handle page faults, even with fix ups.
3072 * The save user stack can (and often does) fault.
3073 */
3074 if (unlikely(in_nmi()))
3075 return;
3076
3077 /*
3078 * prevent recursion, since the user stack tracing may
3079 * trigger other kernel events.
3080 */
3081 preempt_disable();
3082 if (__this_cpu_read(user_stack_count))
3083 goto out;
3084
3085 __this_cpu_inc(user_stack_count);
3086
3087 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3088 sizeof(*entry), flags, pc);
3089 if (!event)
3090 goto out_drop_count;
3091 entry = ring_buffer_event_data(event);
3092
3093 entry->tgid = current->tgid;
3094 memset(&entry->caller, 0, sizeof(entry->caller));
3095
3096 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3097 if (!call_filter_check_discard(call, entry, buffer, event))
3098 __buffer_unlock_commit(buffer, event);
3099
3100 out_drop_count:
3101 __this_cpu_dec(user_stack_count);
3102 out:
3103 preempt_enable();
3104 }
3105 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3106 static void ftrace_trace_userstack(struct trace_array *tr,
3107 struct trace_buffer *buffer,
3108 unsigned long flags, int pc)
3109 {
3110 }
3111 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3112
3113 #endif /* CONFIG_STACKTRACE */
3114
3115 /* created for use with alloc_percpu */
3116 struct trace_buffer_struct {
3117 int nesting;
3118 char buffer[4][TRACE_BUF_SIZE];
3119 };
3120
3121 static struct trace_buffer_struct *trace_percpu_buffer;
3122
3123 /*
3124 * Thise allows for lockless recording. If we're nested too deeply, then
3125 * this returns NULL.
3126 */
get_trace_buf(void)3127 static char *get_trace_buf(void)
3128 {
3129 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3130
3131 if (!buffer || buffer->nesting >= 4)
3132 return NULL;
3133
3134 buffer->nesting++;
3135
3136 /* Interrupts must see nesting incremented before we use the buffer */
3137 barrier();
3138 return &buffer->buffer[buffer->nesting - 1][0];
3139 }
3140
put_trace_buf(void)3141 static void put_trace_buf(void)
3142 {
3143 /* Don't let the decrement of nesting leak before this */
3144 barrier();
3145 this_cpu_dec(trace_percpu_buffer->nesting);
3146 }
3147
alloc_percpu_trace_buffer(void)3148 static int alloc_percpu_trace_buffer(void)
3149 {
3150 struct trace_buffer_struct *buffers;
3151
3152 if (trace_percpu_buffer)
3153 return 0;
3154
3155 buffers = alloc_percpu(struct trace_buffer_struct);
3156 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3157 return -ENOMEM;
3158
3159 trace_percpu_buffer = buffers;
3160 return 0;
3161 }
3162
3163 static int buffers_allocated;
3164
trace_printk_init_buffers(void)3165 void trace_printk_init_buffers(void)
3166 {
3167 if (buffers_allocated)
3168 return;
3169
3170 if (alloc_percpu_trace_buffer())
3171 return;
3172
3173 /* trace_printk() is for debug use only. Don't use it in production. */
3174
3175 pr_warn("\n");
3176 pr_warn("**********************************************************\n");
3177 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3178 pr_warn("** **\n");
3179 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3180 pr_warn("** **\n");
3181 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3182 pr_warn("** unsafe for production use. **\n");
3183 pr_warn("** **\n");
3184 pr_warn("** If you see this message and you are not debugging **\n");
3185 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3186 pr_warn("** **\n");
3187 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3188 pr_warn("**********************************************************\n");
3189
3190 /* Expand the buffers to set size */
3191 tracing_update_buffers();
3192
3193 buffers_allocated = 1;
3194
3195 /*
3196 * trace_printk_init_buffers() can be called by modules.
3197 * If that happens, then we need to start cmdline recording
3198 * directly here. If the global_trace.buffer is already
3199 * allocated here, then this was called by module code.
3200 */
3201 if (global_trace.array_buffer.buffer)
3202 tracing_start_cmdline_record();
3203 }
3204 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3205
trace_printk_start_comm(void)3206 void trace_printk_start_comm(void)
3207 {
3208 /* Start tracing comms if trace printk is set */
3209 if (!buffers_allocated)
3210 return;
3211 tracing_start_cmdline_record();
3212 }
3213
trace_printk_start_stop_comm(int enabled)3214 static void trace_printk_start_stop_comm(int enabled)
3215 {
3216 if (!buffers_allocated)
3217 return;
3218
3219 if (enabled)
3220 tracing_start_cmdline_record();
3221 else
3222 tracing_stop_cmdline_record();
3223 }
3224
3225 /**
3226 * trace_vbprintk - write binary msg to tracing buffer
3227 * @ip: The address of the caller
3228 * @fmt: The string format to write to the buffer
3229 * @args: Arguments for @fmt
3230 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3231 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3232 {
3233 struct trace_event_call *call = &event_bprint;
3234 struct ring_buffer_event *event;
3235 struct trace_buffer *buffer;
3236 struct trace_array *tr = &global_trace;
3237 struct bprint_entry *entry;
3238 unsigned long flags;
3239 char *tbuffer;
3240 int len = 0, size, pc;
3241
3242 if (unlikely(tracing_selftest_running || tracing_disabled))
3243 return 0;
3244
3245 /* Don't pollute graph traces with trace_vprintk internals */
3246 pause_graph_tracing();
3247
3248 pc = preempt_count();
3249 preempt_disable_notrace();
3250
3251 tbuffer = get_trace_buf();
3252 if (!tbuffer) {
3253 len = 0;
3254 goto out_nobuffer;
3255 }
3256
3257 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3258
3259 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3260 goto out_put;
3261
3262 local_save_flags(flags);
3263 size = sizeof(*entry) + sizeof(u32) * len;
3264 buffer = tr->array_buffer.buffer;
3265 ring_buffer_nest_start(buffer);
3266 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3267 flags, pc);
3268 if (!event)
3269 goto out;
3270 entry = ring_buffer_event_data(event);
3271 entry->ip = ip;
3272 entry->fmt = fmt;
3273
3274 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3275 if (!call_filter_check_discard(call, entry, buffer, event)) {
3276 __buffer_unlock_commit(buffer, event);
3277 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3278 }
3279
3280 out:
3281 ring_buffer_nest_end(buffer);
3282 out_put:
3283 put_trace_buf();
3284
3285 out_nobuffer:
3286 preempt_enable_notrace();
3287 unpause_graph_tracing();
3288
3289 return len;
3290 }
3291 EXPORT_SYMBOL_GPL(trace_vbprintk);
3292
3293 __printf(3, 0)
3294 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3295 __trace_array_vprintk(struct trace_buffer *buffer,
3296 unsigned long ip, const char *fmt, va_list args)
3297 {
3298 struct trace_event_call *call = &event_print;
3299 struct ring_buffer_event *event;
3300 int len = 0, size, pc;
3301 struct print_entry *entry;
3302 unsigned long flags;
3303 char *tbuffer;
3304
3305 if (tracing_disabled || tracing_selftest_running)
3306 return 0;
3307
3308 /* Don't pollute graph traces with trace_vprintk internals */
3309 pause_graph_tracing();
3310
3311 pc = preempt_count();
3312 preempt_disable_notrace();
3313
3314
3315 tbuffer = get_trace_buf();
3316 if (!tbuffer) {
3317 len = 0;
3318 goto out_nobuffer;
3319 }
3320
3321 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3322
3323 local_save_flags(flags);
3324 size = sizeof(*entry) + len + 1;
3325 ring_buffer_nest_start(buffer);
3326 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3327 flags, pc);
3328 if (!event)
3329 goto out;
3330 entry = ring_buffer_event_data(event);
3331 entry->ip = ip;
3332
3333 memcpy(&entry->buf, tbuffer, len + 1);
3334 if (!call_filter_check_discard(call, entry, buffer, event)) {
3335 __buffer_unlock_commit(buffer, event);
3336 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3337 }
3338
3339 out:
3340 ring_buffer_nest_end(buffer);
3341 put_trace_buf();
3342
3343 out_nobuffer:
3344 preempt_enable_notrace();
3345 unpause_graph_tracing();
3346
3347 return len;
3348 }
3349
3350 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3351 int trace_array_vprintk(struct trace_array *tr,
3352 unsigned long ip, const char *fmt, va_list args)
3353 {
3354 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3355 }
3356
3357 /**
3358 * trace_array_printk - Print a message to a specific instance
3359 * @tr: The instance trace_array descriptor
3360 * @ip: The instruction pointer that this is called from.
3361 * @fmt: The format to print (printf format)
3362 *
3363 * If a subsystem sets up its own instance, they have the right to
3364 * printk strings into their tracing instance buffer using this
3365 * function. Note, this function will not write into the top level
3366 * buffer (use trace_printk() for that), as writing into the top level
3367 * buffer should only have events that can be individually disabled.
3368 * trace_printk() is only used for debugging a kernel, and should not
3369 * be ever encorporated in normal use.
3370 *
3371 * trace_array_printk() can be used, as it will not add noise to the
3372 * top level tracing buffer.
3373 *
3374 * Note, trace_array_init_printk() must be called on @tr before this
3375 * can be used.
3376 */
3377 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3378 int trace_array_printk(struct trace_array *tr,
3379 unsigned long ip, const char *fmt, ...)
3380 {
3381 int ret;
3382 va_list ap;
3383
3384 if (!tr)
3385 return -ENOENT;
3386
3387 /* This is only allowed for created instances */
3388 if (tr == &global_trace)
3389 return 0;
3390
3391 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3392 return 0;
3393
3394 va_start(ap, fmt);
3395 ret = trace_array_vprintk(tr, ip, fmt, ap);
3396 va_end(ap);
3397 return ret;
3398 }
3399 EXPORT_SYMBOL_GPL(trace_array_printk);
3400
3401 /**
3402 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3403 * @tr: The trace array to initialize the buffers for
3404 *
3405 * As trace_array_printk() only writes into instances, they are OK to
3406 * have in the kernel (unlike trace_printk()). This needs to be called
3407 * before trace_array_printk() can be used on a trace_array.
3408 */
trace_array_init_printk(struct trace_array * tr)3409 int trace_array_init_printk(struct trace_array *tr)
3410 {
3411 if (!tr)
3412 return -ENOENT;
3413
3414 /* This is only allowed for created instances */
3415 if (tr == &global_trace)
3416 return -EINVAL;
3417
3418 return alloc_percpu_trace_buffer();
3419 }
3420 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3421
3422 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3423 int trace_array_printk_buf(struct trace_buffer *buffer,
3424 unsigned long ip, const char *fmt, ...)
3425 {
3426 int ret;
3427 va_list ap;
3428
3429 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3430 return 0;
3431
3432 va_start(ap, fmt);
3433 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3434 va_end(ap);
3435 return ret;
3436 }
3437
3438 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3439 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3440 {
3441 return trace_array_vprintk(&global_trace, ip, fmt, args);
3442 }
3443 EXPORT_SYMBOL_GPL(trace_vprintk);
3444
trace_iterator_increment(struct trace_iterator * iter)3445 static void trace_iterator_increment(struct trace_iterator *iter)
3446 {
3447 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3448
3449 iter->idx++;
3450 if (buf_iter)
3451 ring_buffer_iter_advance(buf_iter);
3452 }
3453
3454 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3455 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3456 unsigned long *lost_events)
3457 {
3458 struct ring_buffer_event *event;
3459 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3460
3461 if (buf_iter) {
3462 event = ring_buffer_iter_peek(buf_iter, ts);
3463 if (lost_events)
3464 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3465 (unsigned long)-1 : 0;
3466 } else {
3467 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3468 lost_events);
3469 }
3470
3471 if (event) {
3472 iter->ent_size = ring_buffer_event_length(event);
3473 return ring_buffer_event_data(event);
3474 }
3475 iter->ent_size = 0;
3476 return NULL;
3477 }
3478
3479 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3480 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3481 unsigned long *missing_events, u64 *ent_ts)
3482 {
3483 struct trace_buffer *buffer = iter->array_buffer->buffer;
3484 struct trace_entry *ent, *next = NULL;
3485 unsigned long lost_events = 0, next_lost = 0;
3486 int cpu_file = iter->cpu_file;
3487 u64 next_ts = 0, ts;
3488 int next_cpu = -1;
3489 int next_size = 0;
3490 int cpu;
3491
3492 /*
3493 * If we are in a per_cpu trace file, don't bother by iterating over
3494 * all cpu and peek directly.
3495 */
3496 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3497 if (ring_buffer_empty_cpu(buffer, cpu_file))
3498 return NULL;
3499 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3500 if (ent_cpu)
3501 *ent_cpu = cpu_file;
3502
3503 return ent;
3504 }
3505
3506 for_each_tracing_cpu(cpu) {
3507
3508 if (ring_buffer_empty_cpu(buffer, cpu))
3509 continue;
3510
3511 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3512
3513 /*
3514 * Pick the entry with the smallest timestamp:
3515 */
3516 if (ent && (!next || ts < next_ts)) {
3517 next = ent;
3518 next_cpu = cpu;
3519 next_ts = ts;
3520 next_lost = lost_events;
3521 next_size = iter->ent_size;
3522 }
3523 }
3524
3525 iter->ent_size = next_size;
3526
3527 if (ent_cpu)
3528 *ent_cpu = next_cpu;
3529
3530 if (ent_ts)
3531 *ent_ts = next_ts;
3532
3533 if (missing_events)
3534 *missing_events = next_lost;
3535
3536 return next;
3537 }
3538
3539 #define STATIC_TEMP_BUF_SIZE 128
3540 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3541
3542 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3543 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3544 int *ent_cpu, u64 *ent_ts)
3545 {
3546 /* __find_next_entry will reset ent_size */
3547 int ent_size = iter->ent_size;
3548 struct trace_entry *entry;
3549
3550 /*
3551 * If called from ftrace_dump(), then the iter->temp buffer
3552 * will be the static_temp_buf and not created from kmalloc.
3553 * If the entry size is greater than the buffer, we can
3554 * not save it. Just return NULL in that case. This is only
3555 * used to add markers when two consecutive events' time
3556 * stamps have a large delta. See trace_print_lat_context()
3557 */
3558 if (iter->temp == static_temp_buf &&
3559 STATIC_TEMP_BUF_SIZE < ent_size)
3560 return NULL;
3561
3562 /*
3563 * The __find_next_entry() may call peek_next_entry(), which may
3564 * call ring_buffer_peek() that may make the contents of iter->ent
3565 * undefined. Need to copy iter->ent now.
3566 */
3567 if (iter->ent && iter->ent != iter->temp) {
3568 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3569 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3570 void *temp;
3571 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3572 if (!temp)
3573 return NULL;
3574 kfree(iter->temp);
3575 iter->temp = temp;
3576 iter->temp_size = iter->ent_size;
3577 }
3578 memcpy(iter->temp, iter->ent, iter->ent_size);
3579 iter->ent = iter->temp;
3580 }
3581 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3582 /* Put back the original ent_size */
3583 iter->ent_size = ent_size;
3584
3585 return entry;
3586 }
3587
3588 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3589 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3590 {
3591 iter->ent = __find_next_entry(iter, &iter->cpu,
3592 &iter->lost_events, &iter->ts);
3593
3594 if (iter->ent)
3595 trace_iterator_increment(iter);
3596
3597 return iter->ent ? iter : NULL;
3598 }
3599
trace_consume(struct trace_iterator * iter)3600 static void trace_consume(struct trace_iterator *iter)
3601 {
3602 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3603 &iter->lost_events);
3604 }
3605
s_next(struct seq_file * m,void * v,loff_t * pos)3606 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3607 {
3608 struct trace_iterator *iter = m->private;
3609 int i = (int)*pos;
3610 void *ent;
3611
3612 WARN_ON_ONCE(iter->leftover);
3613
3614 (*pos)++;
3615
3616 /* can't go backwards */
3617 if (iter->idx > i)
3618 return NULL;
3619
3620 if (iter->idx < 0)
3621 ent = trace_find_next_entry_inc(iter);
3622 else
3623 ent = iter;
3624
3625 while (ent && iter->idx < i)
3626 ent = trace_find_next_entry_inc(iter);
3627
3628 iter->pos = *pos;
3629
3630 return ent;
3631 }
3632
tracing_iter_reset(struct trace_iterator * iter,int cpu)3633 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3634 {
3635 struct ring_buffer_iter *buf_iter;
3636 unsigned long entries = 0;
3637 u64 ts;
3638
3639 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3640
3641 buf_iter = trace_buffer_iter(iter, cpu);
3642 if (!buf_iter)
3643 return;
3644
3645 ring_buffer_iter_reset(buf_iter);
3646
3647 /*
3648 * We could have the case with the max latency tracers
3649 * that a reset never took place on a cpu. This is evident
3650 * by the timestamp being before the start of the buffer.
3651 */
3652 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3653 if (ts >= iter->array_buffer->time_start)
3654 break;
3655 entries++;
3656 ring_buffer_iter_advance(buf_iter);
3657 }
3658
3659 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3660 }
3661
3662 /*
3663 * The current tracer is copied to avoid a global locking
3664 * all around.
3665 */
s_start(struct seq_file * m,loff_t * pos)3666 static void *s_start(struct seq_file *m, loff_t *pos)
3667 {
3668 struct trace_iterator *iter = m->private;
3669 struct trace_array *tr = iter->tr;
3670 int cpu_file = iter->cpu_file;
3671 void *p = NULL;
3672 loff_t l = 0;
3673 int cpu;
3674
3675 /*
3676 * copy the tracer to avoid using a global lock all around.
3677 * iter->trace is a copy of current_trace, the pointer to the
3678 * name may be used instead of a strcmp(), as iter->trace->name
3679 * will point to the same string as current_trace->name.
3680 */
3681 mutex_lock(&trace_types_lock);
3682 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3683 *iter->trace = *tr->current_trace;
3684 mutex_unlock(&trace_types_lock);
3685
3686 #ifdef CONFIG_TRACER_MAX_TRACE
3687 if (iter->snapshot && iter->trace->use_max_tr)
3688 return ERR_PTR(-EBUSY);
3689 #endif
3690
3691 if (!iter->snapshot)
3692 atomic_inc(&trace_record_taskinfo_disabled);
3693
3694 if (*pos != iter->pos) {
3695 iter->ent = NULL;
3696 iter->cpu = 0;
3697 iter->idx = -1;
3698
3699 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3700 for_each_tracing_cpu(cpu)
3701 tracing_iter_reset(iter, cpu);
3702 } else
3703 tracing_iter_reset(iter, cpu_file);
3704
3705 iter->leftover = 0;
3706 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3707 ;
3708
3709 } else {
3710 /*
3711 * If we overflowed the seq_file before, then we want
3712 * to just reuse the trace_seq buffer again.
3713 */
3714 if (iter->leftover)
3715 p = iter;
3716 else {
3717 l = *pos - 1;
3718 p = s_next(m, p, &l);
3719 }
3720 }
3721
3722 trace_event_read_lock();
3723 trace_access_lock(cpu_file);
3724 return p;
3725 }
3726
s_stop(struct seq_file * m,void * p)3727 static void s_stop(struct seq_file *m, void *p)
3728 {
3729 struct trace_iterator *iter = m->private;
3730
3731 #ifdef CONFIG_TRACER_MAX_TRACE
3732 if (iter->snapshot && iter->trace->use_max_tr)
3733 return;
3734 #endif
3735
3736 if (!iter->snapshot)
3737 atomic_dec(&trace_record_taskinfo_disabled);
3738
3739 trace_access_unlock(iter->cpu_file);
3740 trace_event_read_unlock();
3741 }
3742
3743 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3744 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3745 unsigned long *entries, int cpu)
3746 {
3747 unsigned long count;
3748
3749 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3750 /*
3751 * If this buffer has skipped entries, then we hold all
3752 * entries for the trace and we need to ignore the
3753 * ones before the time stamp.
3754 */
3755 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3756 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3757 /* total is the same as the entries */
3758 *total = count;
3759 } else
3760 *total = count +
3761 ring_buffer_overrun_cpu(buf->buffer, cpu);
3762 *entries = count;
3763 }
3764
3765 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3766 get_total_entries(struct array_buffer *buf,
3767 unsigned long *total, unsigned long *entries)
3768 {
3769 unsigned long t, e;
3770 int cpu;
3771
3772 *total = 0;
3773 *entries = 0;
3774
3775 for_each_tracing_cpu(cpu) {
3776 get_total_entries_cpu(buf, &t, &e, cpu);
3777 *total += t;
3778 *entries += e;
3779 }
3780 }
3781
trace_total_entries_cpu(struct trace_array * tr,int cpu)3782 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3783 {
3784 unsigned long total, entries;
3785
3786 if (!tr)
3787 tr = &global_trace;
3788
3789 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3790
3791 return entries;
3792 }
3793
trace_total_entries(struct trace_array * tr)3794 unsigned long trace_total_entries(struct trace_array *tr)
3795 {
3796 unsigned long total, entries;
3797
3798 if (!tr)
3799 tr = &global_trace;
3800
3801 get_total_entries(&tr->array_buffer, &total, &entries);
3802
3803 return entries;
3804 }
3805
print_lat_help_header(struct seq_file * m)3806 static void print_lat_help_header(struct seq_file *m)
3807 {
3808 seq_puts(m, "# _------=> CPU# \n"
3809 "# / _-----=> irqs-off \n"
3810 "# | / _----=> need-resched \n"
3811 "# || / _---=> hardirq/softirq \n"
3812 "# ||| / _--=> preempt-depth \n"
3813 "# |||| / delay \n"
3814 "# cmd pid ||||| time | caller \n"
3815 "# \\ / ||||| \\ | / \n");
3816 }
3817
print_event_info(struct array_buffer * buf,struct seq_file * m)3818 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3819 {
3820 unsigned long total;
3821 unsigned long entries;
3822
3823 get_total_entries(buf, &total, &entries);
3824 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3825 entries, total, num_online_cpus());
3826 seq_puts(m, "#\n");
3827 }
3828
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3829 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3830 unsigned int flags)
3831 {
3832 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3833
3834 print_event_info(buf, m);
3835
3836 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3837 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3838 }
3839
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3840 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3841 unsigned int flags)
3842 {
3843 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3844 const char *space = " ";
3845 int prec = tgid ? 12 : 2;
3846
3847 print_event_info(buf, m);
3848
3849 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3850 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3851 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3852 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3853 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3854 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3855 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3856 }
3857
3858 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3859 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3860 {
3861 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3862 struct array_buffer *buf = iter->array_buffer;
3863 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3864 struct tracer *type = iter->trace;
3865 unsigned long entries;
3866 unsigned long total;
3867 const char *name = "preemption";
3868
3869 name = type->name;
3870
3871 get_total_entries(buf, &total, &entries);
3872
3873 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3874 name, UTS_RELEASE);
3875 seq_puts(m, "# -----------------------------------"
3876 "---------------------------------\n");
3877 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3878 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3879 nsecs_to_usecs(data->saved_latency),
3880 entries,
3881 total,
3882 buf->cpu,
3883 #if defined(CONFIG_PREEMPT_NONE)
3884 "server",
3885 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3886 "desktop",
3887 #elif defined(CONFIG_PREEMPT)
3888 "preempt",
3889 #elif defined(CONFIG_PREEMPT_RT)
3890 "preempt_rt",
3891 #else
3892 "unknown",
3893 #endif
3894 /* These are reserved for later use */
3895 0, 0, 0, 0);
3896 #ifdef CONFIG_SMP
3897 seq_printf(m, " #P:%d)\n", num_online_cpus());
3898 #else
3899 seq_puts(m, ")\n");
3900 #endif
3901 seq_puts(m, "# -----------------\n");
3902 seq_printf(m, "# | task: %.16s-%d "
3903 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3904 data->comm, data->pid,
3905 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3906 data->policy, data->rt_priority);
3907 seq_puts(m, "# -----------------\n");
3908
3909 if (data->critical_start) {
3910 seq_puts(m, "# => started at: ");
3911 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3912 trace_print_seq(m, &iter->seq);
3913 seq_puts(m, "\n# => ended at: ");
3914 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3915 trace_print_seq(m, &iter->seq);
3916 seq_puts(m, "\n#\n");
3917 }
3918
3919 seq_puts(m, "#\n");
3920 }
3921
test_cpu_buff_start(struct trace_iterator * iter)3922 static void test_cpu_buff_start(struct trace_iterator *iter)
3923 {
3924 struct trace_seq *s = &iter->seq;
3925 struct trace_array *tr = iter->tr;
3926
3927 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3928 return;
3929
3930 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3931 return;
3932
3933 if (cpumask_available(iter->started) &&
3934 cpumask_test_cpu(iter->cpu, iter->started))
3935 return;
3936
3937 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3938 return;
3939
3940 if (cpumask_available(iter->started))
3941 cpumask_set_cpu(iter->cpu, iter->started);
3942
3943 /* Don't print started cpu buffer for the first entry of the trace */
3944 if (iter->idx > 1)
3945 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3946 iter->cpu);
3947 }
3948
print_trace_fmt(struct trace_iterator * iter)3949 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3950 {
3951 struct trace_array *tr = iter->tr;
3952 struct trace_seq *s = &iter->seq;
3953 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3954 struct trace_entry *entry;
3955 struct trace_event *event;
3956
3957 entry = iter->ent;
3958
3959 test_cpu_buff_start(iter);
3960
3961 event = ftrace_find_event(entry->type);
3962
3963 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3964 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3965 trace_print_lat_context(iter);
3966 else
3967 trace_print_context(iter);
3968 }
3969
3970 if (trace_seq_has_overflowed(s))
3971 return TRACE_TYPE_PARTIAL_LINE;
3972
3973 if (event)
3974 return event->funcs->trace(iter, sym_flags, event);
3975
3976 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3977
3978 return trace_handle_return(s);
3979 }
3980
print_raw_fmt(struct trace_iterator * iter)3981 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3982 {
3983 struct trace_array *tr = iter->tr;
3984 struct trace_seq *s = &iter->seq;
3985 struct trace_entry *entry;
3986 struct trace_event *event;
3987
3988 entry = iter->ent;
3989
3990 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3991 trace_seq_printf(s, "%d %d %llu ",
3992 entry->pid, iter->cpu, iter->ts);
3993
3994 if (trace_seq_has_overflowed(s))
3995 return TRACE_TYPE_PARTIAL_LINE;
3996
3997 event = ftrace_find_event(entry->type);
3998 if (event)
3999 return event->funcs->raw(iter, 0, event);
4000
4001 trace_seq_printf(s, "%d ?\n", entry->type);
4002
4003 return trace_handle_return(s);
4004 }
4005
print_hex_fmt(struct trace_iterator * iter)4006 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4007 {
4008 struct trace_array *tr = iter->tr;
4009 struct trace_seq *s = &iter->seq;
4010 unsigned char newline = '\n';
4011 struct trace_entry *entry;
4012 struct trace_event *event;
4013
4014 entry = iter->ent;
4015
4016 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4017 SEQ_PUT_HEX_FIELD(s, entry->pid);
4018 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4019 SEQ_PUT_HEX_FIELD(s, iter->ts);
4020 if (trace_seq_has_overflowed(s))
4021 return TRACE_TYPE_PARTIAL_LINE;
4022 }
4023
4024 event = ftrace_find_event(entry->type);
4025 if (event) {
4026 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4027 if (ret != TRACE_TYPE_HANDLED)
4028 return ret;
4029 }
4030
4031 SEQ_PUT_FIELD(s, newline);
4032
4033 return trace_handle_return(s);
4034 }
4035
print_bin_fmt(struct trace_iterator * iter)4036 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4037 {
4038 struct trace_array *tr = iter->tr;
4039 struct trace_seq *s = &iter->seq;
4040 struct trace_entry *entry;
4041 struct trace_event *event;
4042
4043 entry = iter->ent;
4044
4045 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4046 SEQ_PUT_FIELD(s, entry->pid);
4047 SEQ_PUT_FIELD(s, iter->cpu);
4048 SEQ_PUT_FIELD(s, iter->ts);
4049 if (trace_seq_has_overflowed(s))
4050 return TRACE_TYPE_PARTIAL_LINE;
4051 }
4052
4053 event = ftrace_find_event(entry->type);
4054 return event ? event->funcs->binary(iter, 0, event) :
4055 TRACE_TYPE_HANDLED;
4056 }
4057
trace_empty(struct trace_iterator * iter)4058 int trace_empty(struct trace_iterator *iter)
4059 {
4060 struct ring_buffer_iter *buf_iter;
4061 int cpu;
4062
4063 /* If we are looking at one CPU buffer, only check that one */
4064 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4065 cpu = iter->cpu_file;
4066 buf_iter = trace_buffer_iter(iter, cpu);
4067 if (buf_iter) {
4068 if (!ring_buffer_iter_empty(buf_iter))
4069 return 0;
4070 } else {
4071 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4072 return 0;
4073 }
4074 return 1;
4075 }
4076
4077 for_each_tracing_cpu(cpu) {
4078 buf_iter = trace_buffer_iter(iter, cpu);
4079 if (buf_iter) {
4080 if (!ring_buffer_iter_empty(buf_iter))
4081 return 0;
4082 } else {
4083 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4084 return 0;
4085 }
4086 }
4087
4088 return 1;
4089 }
4090
4091 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4092 enum print_line_t print_trace_line(struct trace_iterator *iter)
4093 {
4094 struct trace_array *tr = iter->tr;
4095 unsigned long trace_flags = tr->trace_flags;
4096 enum print_line_t ret;
4097
4098 if (iter->lost_events) {
4099 if (iter->lost_events == (unsigned long)-1)
4100 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4101 iter->cpu);
4102 else
4103 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4104 iter->cpu, iter->lost_events);
4105 if (trace_seq_has_overflowed(&iter->seq))
4106 return TRACE_TYPE_PARTIAL_LINE;
4107 }
4108
4109 if (iter->trace && iter->trace->print_line) {
4110 ret = iter->trace->print_line(iter);
4111 if (ret != TRACE_TYPE_UNHANDLED)
4112 return ret;
4113 }
4114
4115 if (iter->ent->type == TRACE_BPUTS &&
4116 trace_flags & TRACE_ITER_PRINTK &&
4117 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4118 return trace_print_bputs_msg_only(iter);
4119
4120 if (iter->ent->type == TRACE_BPRINT &&
4121 trace_flags & TRACE_ITER_PRINTK &&
4122 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4123 return trace_print_bprintk_msg_only(iter);
4124
4125 if (iter->ent->type == TRACE_PRINT &&
4126 trace_flags & TRACE_ITER_PRINTK &&
4127 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4128 return trace_print_printk_msg_only(iter);
4129
4130 if (trace_flags & TRACE_ITER_BIN)
4131 return print_bin_fmt(iter);
4132
4133 if (trace_flags & TRACE_ITER_HEX)
4134 return print_hex_fmt(iter);
4135
4136 if (trace_flags & TRACE_ITER_RAW)
4137 return print_raw_fmt(iter);
4138
4139 return print_trace_fmt(iter);
4140 }
4141
trace_latency_header(struct seq_file * m)4142 void trace_latency_header(struct seq_file *m)
4143 {
4144 struct trace_iterator *iter = m->private;
4145 struct trace_array *tr = iter->tr;
4146
4147 /* print nothing if the buffers are empty */
4148 if (trace_empty(iter))
4149 return;
4150
4151 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4152 print_trace_header(m, iter);
4153
4154 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4155 print_lat_help_header(m);
4156 }
4157
trace_default_header(struct seq_file * m)4158 void trace_default_header(struct seq_file *m)
4159 {
4160 struct trace_iterator *iter = m->private;
4161 struct trace_array *tr = iter->tr;
4162 unsigned long trace_flags = tr->trace_flags;
4163
4164 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4165 return;
4166
4167 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4168 /* print nothing if the buffers are empty */
4169 if (trace_empty(iter))
4170 return;
4171 print_trace_header(m, iter);
4172 if (!(trace_flags & TRACE_ITER_VERBOSE))
4173 print_lat_help_header(m);
4174 } else {
4175 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4176 if (trace_flags & TRACE_ITER_IRQ_INFO)
4177 print_func_help_header_irq(iter->array_buffer,
4178 m, trace_flags);
4179 else
4180 print_func_help_header(iter->array_buffer, m,
4181 trace_flags);
4182 }
4183 }
4184 }
4185
test_ftrace_alive(struct seq_file * m)4186 static void test_ftrace_alive(struct seq_file *m)
4187 {
4188 if (!ftrace_is_dead())
4189 return;
4190 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4191 "# MAY BE MISSING FUNCTION EVENTS\n");
4192 }
4193
4194 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4195 static void show_snapshot_main_help(struct seq_file *m)
4196 {
4197 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4198 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4199 "# Takes a snapshot of the main buffer.\n"
4200 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4201 "# (Doesn't have to be '2' works with any number that\n"
4202 "# is not a '0' or '1')\n");
4203 }
4204
show_snapshot_percpu_help(struct seq_file * m)4205 static void show_snapshot_percpu_help(struct seq_file *m)
4206 {
4207 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4208 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4209 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4210 "# Takes a snapshot of the main buffer for this cpu.\n");
4211 #else
4212 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4213 "# Must use main snapshot file to allocate.\n");
4214 #endif
4215 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4216 "# (Doesn't have to be '2' works with any number that\n"
4217 "# is not a '0' or '1')\n");
4218 }
4219
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4220 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4221 {
4222 if (iter->tr->allocated_snapshot)
4223 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4224 else
4225 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4226
4227 seq_puts(m, "# Snapshot commands:\n");
4228 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4229 show_snapshot_main_help(m);
4230 else
4231 show_snapshot_percpu_help(m);
4232 }
4233 #else
4234 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4235 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4236 #endif
4237
s_show(struct seq_file * m,void * v)4238 static int s_show(struct seq_file *m, void *v)
4239 {
4240 struct trace_iterator *iter = v;
4241 int ret;
4242
4243 if (iter->ent == NULL) {
4244 if (iter->tr) {
4245 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4246 seq_puts(m, "#\n");
4247 test_ftrace_alive(m);
4248 }
4249 if (iter->snapshot && trace_empty(iter))
4250 print_snapshot_help(m, iter);
4251 else if (iter->trace && iter->trace->print_header)
4252 iter->trace->print_header(m);
4253 else
4254 trace_default_header(m);
4255
4256 } else if (iter->leftover) {
4257 /*
4258 * If we filled the seq_file buffer earlier, we
4259 * want to just show it now.
4260 */
4261 ret = trace_print_seq(m, &iter->seq);
4262
4263 /* ret should this time be zero, but you never know */
4264 iter->leftover = ret;
4265
4266 } else {
4267 print_trace_line(iter);
4268 ret = trace_print_seq(m, &iter->seq);
4269 /*
4270 * If we overflow the seq_file buffer, then it will
4271 * ask us for this data again at start up.
4272 * Use that instead.
4273 * ret is 0 if seq_file write succeeded.
4274 * -1 otherwise.
4275 */
4276 iter->leftover = ret;
4277 }
4278
4279 return 0;
4280 }
4281
4282 /*
4283 * Should be used after trace_array_get(), trace_types_lock
4284 * ensures that i_cdev was already initialized.
4285 */
tracing_get_cpu(struct inode * inode)4286 static inline int tracing_get_cpu(struct inode *inode)
4287 {
4288 if (inode->i_cdev) /* See trace_create_cpu_file() */
4289 return (long)inode->i_cdev - 1;
4290 return RING_BUFFER_ALL_CPUS;
4291 }
4292
4293 static const struct seq_operations tracer_seq_ops = {
4294 .start = s_start,
4295 .next = s_next,
4296 .stop = s_stop,
4297 .show = s_show,
4298 };
4299
4300 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4301 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4302 {
4303 struct trace_array *tr = inode->i_private;
4304 struct trace_iterator *iter;
4305 int cpu;
4306
4307 if (tracing_disabled)
4308 return ERR_PTR(-ENODEV);
4309
4310 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4311 if (!iter)
4312 return ERR_PTR(-ENOMEM);
4313
4314 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4315 GFP_KERNEL);
4316 if (!iter->buffer_iter)
4317 goto release;
4318
4319 /*
4320 * trace_find_next_entry() may need to save off iter->ent.
4321 * It will place it into the iter->temp buffer. As most
4322 * events are less than 128, allocate a buffer of that size.
4323 * If one is greater, then trace_find_next_entry() will
4324 * allocate a new buffer to adjust for the bigger iter->ent.
4325 * It's not critical if it fails to get allocated here.
4326 */
4327 iter->temp = kmalloc(128, GFP_KERNEL);
4328 if (iter->temp)
4329 iter->temp_size = 128;
4330
4331 /*
4332 * We make a copy of the current tracer to avoid concurrent
4333 * changes on it while we are reading.
4334 */
4335 mutex_lock(&trace_types_lock);
4336 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4337 if (!iter->trace)
4338 goto fail;
4339
4340 *iter->trace = *tr->current_trace;
4341
4342 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4343 goto fail;
4344
4345 iter->tr = tr;
4346
4347 #ifdef CONFIG_TRACER_MAX_TRACE
4348 /* Currently only the top directory has a snapshot */
4349 if (tr->current_trace->print_max || snapshot)
4350 iter->array_buffer = &tr->max_buffer;
4351 else
4352 #endif
4353 iter->array_buffer = &tr->array_buffer;
4354 iter->snapshot = snapshot;
4355 iter->pos = -1;
4356 iter->cpu_file = tracing_get_cpu(inode);
4357 mutex_init(&iter->mutex);
4358
4359 /* Notify the tracer early; before we stop tracing. */
4360 if (iter->trace->open)
4361 iter->trace->open(iter);
4362
4363 /* Annotate start of buffers if we had overruns */
4364 if (ring_buffer_overruns(iter->array_buffer->buffer))
4365 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4366
4367 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4368 if (trace_clocks[tr->clock_id].in_ns)
4369 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4370
4371 /*
4372 * If pause-on-trace is enabled, then stop the trace while
4373 * dumping, unless this is the "snapshot" file
4374 */
4375 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4376 tracing_stop_tr(tr);
4377
4378 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4379 for_each_tracing_cpu(cpu) {
4380 iter->buffer_iter[cpu] =
4381 ring_buffer_read_prepare(iter->array_buffer->buffer,
4382 cpu, GFP_KERNEL);
4383 }
4384 ring_buffer_read_prepare_sync();
4385 for_each_tracing_cpu(cpu) {
4386 ring_buffer_read_start(iter->buffer_iter[cpu]);
4387 tracing_iter_reset(iter, cpu);
4388 }
4389 } else {
4390 cpu = iter->cpu_file;
4391 iter->buffer_iter[cpu] =
4392 ring_buffer_read_prepare(iter->array_buffer->buffer,
4393 cpu, GFP_KERNEL);
4394 ring_buffer_read_prepare_sync();
4395 ring_buffer_read_start(iter->buffer_iter[cpu]);
4396 tracing_iter_reset(iter, cpu);
4397 }
4398
4399 mutex_unlock(&trace_types_lock);
4400
4401 return iter;
4402
4403 fail:
4404 mutex_unlock(&trace_types_lock);
4405 kfree(iter->trace);
4406 kfree(iter->temp);
4407 kfree(iter->buffer_iter);
4408 release:
4409 seq_release_private(inode, file);
4410 return ERR_PTR(-ENOMEM);
4411 }
4412
tracing_open_generic(struct inode * inode,struct file * filp)4413 int tracing_open_generic(struct inode *inode, struct file *filp)
4414 {
4415 int ret;
4416
4417 ret = tracing_check_open_get_tr(NULL);
4418 if (ret)
4419 return ret;
4420
4421 filp->private_data = inode->i_private;
4422 return 0;
4423 }
4424
tracing_is_disabled(void)4425 bool tracing_is_disabled(void)
4426 {
4427 return (tracing_disabled) ? true: false;
4428 }
4429
4430 /*
4431 * Open and update trace_array ref count.
4432 * Must have the current trace_array passed to it.
4433 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4434 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4435 {
4436 struct trace_array *tr = inode->i_private;
4437 int ret;
4438
4439 ret = tracing_check_open_get_tr(tr);
4440 if (ret)
4441 return ret;
4442
4443 filp->private_data = inode->i_private;
4444
4445 return 0;
4446 }
4447
tracing_release(struct inode * inode,struct file * file)4448 static int tracing_release(struct inode *inode, struct file *file)
4449 {
4450 struct trace_array *tr = inode->i_private;
4451 struct seq_file *m = file->private_data;
4452 struct trace_iterator *iter;
4453 int cpu;
4454
4455 if (!(file->f_mode & FMODE_READ)) {
4456 trace_array_put(tr);
4457 return 0;
4458 }
4459
4460 /* Writes do not use seq_file */
4461 iter = m->private;
4462 mutex_lock(&trace_types_lock);
4463
4464 for_each_tracing_cpu(cpu) {
4465 if (iter->buffer_iter[cpu])
4466 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4467 }
4468
4469 if (iter->trace && iter->trace->close)
4470 iter->trace->close(iter);
4471
4472 if (!iter->snapshot && tr->stop_count)
4473 /* reenable tracing if it was previously enabled */
4474 tracing_start_tr(tr);
4475
4476 __trace_array_put(tr);
4477
4478 mutex_unlock(&trace_types_lock);
4479
4480 mutex_destroy(&iter->mutex);
4481 free_cpumask_var(iter->started);
4482 kfree(iter->temp);
4483 kfree(iter->trace);
4484 kfree(iter->buffer_iter);
4485 seq_release_private(inode, file);
4486
4487 return 0;
4488 }
4489
tracing_release_generic_tr(struct inode * inode,struct file * file)4490 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4491 {
4492 struct trace_array *tr = inode->i_private;
4493
4494 trace_array_put(tr);
4495 return 0;
4496 }
4497
tracing_single_release_tr(struct inode * inode,struct file * file)4498 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4499 {
4500 struct trace_array *tr = inode->i_private;
4501
4502 trace_array_put(tr);
4503
4504 return single_release(inode, file);
4505 }
4506
tracing_open(struct inode * inode,struct file * file)4507 static int tracing_open(struct inode *inode, struct file *file)
4508 {
4509 struct trace_array *tr = inode->i_private;
4510 struct trace_iterator *iter;
4511 int ret;
4512
4513 ret = tracing_check_open_get_tr(tr);
4514 if (ret)
4515 return ret;
4516
4517 /* If this file was open for write, then erase contents */
4518 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4519 int cpu = tracing_get_cpu(inode);
4520 struct array_buffer *trace_buf = &tr->array_buffer;
4521
4522 #ifdef CONFIG_TRACER_MAX_TRACE
4523 if (tr->current_trace->print_max)
4524 trace_buf = &tr->max_buffer;
4525 #endif
4526
4527 if (cpu == RING_BUFFER_ALL_CPUS)
4528 tracing_reset_online_cpus(trace_buf);
4529 else
4530 tracing_reset_cpu(trace_buf, cpu);
4531 }
4532
4533 if (file->f_mode & FMODE_READ) {
4534 iter = __tracing_open(inode, file, false);
4535 if (IS_ERR(iter))
4536 ret = PTR_ERR(iter);
4537 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4538 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4539 }
4540
4541 if (ret < 0)
4542 trace_array_put(tr);
4543
4544 return ret;
4545 }
4546
4547 /*
4548 * Some tracers are not suitable for instance buffers.
4549 * A tracer is always available for the global array (toplevel)
4550 * or if it explicitly states that it is.
4551 */
4552 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4553 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4554 {
4555 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4556 }
4557
4558 /* Find the next tracer that this trace array may use */
4559 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4560 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4561 {
4562 while (t && !trace_ok_for_array(t, tr))
4563 t = t->next;
4564
4565 return t;
4566 }
4567
4568 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4569 t_next(struct seq_file *m, void *v, loff_t *pos)
4570 {
4571 struct trace_array *tr = m->private;
4572 struct tracer *t = v;
4573
4574 (*pos)++;
4575
4576 if (t)
4577 t = get_tracer_for_array(tr, t->next);
4578
4579 return t;
4580 }
4581
t_start(struct seq_file * m,loff_t * pos)4582 static void *t_start(struct seq_file *m, loff_t *pos)
4583 {
4584 struct trace_array *tr = m->private;
4585 struct tracer *t;
4586 loff_t l = 0;
4587
4588 mutex_lock(&trace_types_lock);
4589
4590 t = get_tracer_for_array(tr, trace_types);
4591 for (; t && l < *pos; t = t_next(m, t, &l))
4592 ;
4593
4594 return t;
4595 }
4596
t_stop(struct seq_file * m,void * p)4597 static void t_stop(struct seq_file *m, void *p)
4598 {
4599 mutex_unlock(&trace_types_lock);
4600 }
4601
t_show(struct seq_file * m,void * v)4602 static int t_show(struct seq_file *m, void *v)
4603 {
4604 struct tracer *t = v;
4605
4606 if (!t)
4607 return 0;
4608
4609 seq_puts(m, t->name);
4610 if (t->next)
4611 seq_putc(m, ' ');
4612 else
4613 seq_putc(m, '\n');
4614
4615 return 0;
4616 }
4617
4618 static const struct seq_operations show_traces_seq_ops = {
4619 .start = t_start,
4620 .next = t_next,
4621 .stop = t_stop,
4622 .show = t_show,
4623 };
4624
show_traces_open(struct inode * inode,struct file * file)4625 static int show_traces_open(struct inode *inode, struct file *file)
4626 {
4627 struct trace_array *tr = inode->i_private;
4628 struct seq_file *m;
4629 int ret;
4630
4631 ret = tracing_check_open_get_tr(tr);
4632 if (ret)
4633 return ret;
4634
4635 ret = seq_open(file, &show_traces_seq_ops);
4636 if (ret) {
4637 trace_array_put(tr);
4638 return ret;
4639 }
4640
4641 m = file->private_data;
4642 m->private = tr;
4643
4644 return 0;
4645 }
4646
show_traces_release(struct inode * inode,struct file * file)4647 static int show_traces_release(struct inode *inode, struct file *file)
4648 {
4649 struct trace_array *tr = inode->i_private;
4650
4651 trace_array_put(tr);
4652 return seq_release(inode, file);
4653 }
4654
4655 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4656 tracing_write_stub(struct file *filp, const char __user *ubuf,
4657 size_t count, loff_t *ppos)
4658 {
4659 return count;
4660 }
4661
tracing_lseek(struct file * file,loff_t offset,int whence)4662 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4663 {
4664 int ret;
4665
4666 if (file->f_mode & FMODE_READ)
4667 ret = seq_lseek(file, offset, whence);
4668 else
4669 file->f_pos = ret = 0;
4670
4671 return ret;
4672 }
4673
4674 static const struct file_operations tracing_fops = {
4675 .open = tracing_open,
4676 .read = seq_read,
4677 .write = tracing_write_stub,
4678 .llseek = tracing_lseek,
4679 .release = tracing_release,
4680 };
4681
4682 static const struct file_operations show_traces_fops = {
4683 .open = show_traces_open,
4684 .read = seq_read,
4685 .llseek = seq_lseek,
4686 .release = show_traces_release,
4687 };
4688
4689 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4690 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4691 size_t count, loff_t *ppos)
4692 {
4693 struct trace_array *tr = file_inode(filp)->i_private;
4694 char *mask_str;
4695 int len;
4696
4697 len = snprintf(NULL, 0, "%*pb\n",
4698 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4699 mask_str = kmalloc(len, GFP_KERNEL);
4700 if (!mask_str)
4701 return -ENOMEM;
4702
4703 len = snprintf(mask_str, len, "%*pb\n",
4704 cpumask_pr_args(tr->tracing_cpumask));
4705 if (len >= count) {
4706 count = -EINVAL;
4707 goto out_err;
4708 }
4709 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4710
4711 out_err:
4712 kfree(mask_str);
4713
4714 return count;
4715 }
4716
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4717 int tracing_set_cpumask(struct trace_array *tr,
4718 cpumask_var_t tracing_cpumask_new)
4719 {
4720 int cpu;
4721
4722 if (!tr)
4723 return -EINVAL;
4724
4725 local_irq_disable();
4726 arch_spin_lock(&tr->max_lock);
4727 for_each_tracing_cpu(cpu) {
4728 /*
4729 * Increase/decrease the disabled counter if we are
4730 * about to flip a bit in the cpumask:
4731 */
4732 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4733 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4734 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4735 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4736 }
4737 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4738 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4739 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4740 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4741 }
4742 }
4743 arch_spin_unlock(&tr->max_lock);
4744 local_irq_enable();
4745
4746 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4747
4748 return 0;
4749 }
4750
4751 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4752 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4753 size_t count, loff_t *ppos)
4754 {
4755 struct trace_array *tr = file_inode(filp)->i_private;
4756 cpumask_var_t tracing_cpumask_new;
4757 int err;
4758
4759 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4760 return -ENOMEM;
4761
4762 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4763 if (err)
4764 goto err_free;
4765
4766 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4767 if (err)
4768 goto err_free;
4769
4770 free_cpumask_var(tracing_cpumask_new);
4771
4772 return count;
4773
4774 err_free:
4775 free_cpumask_var(tracing_cpumask_new);
4776
4777 return err;
4778 }
4779
4780 static const struct file_operations tracing_cpumask_fops = {
4781 .open = tracing_open_generic_tr,
4782 .read = tracing_cpumask_read,
4783 .write = tracing_cpumask_write,
4784 .release = tracing_release_generic_tr,
4785 .llseek = generic_file_llseek,
4786 };
4787
tracing_trace_options_show(struct seq_file * m,void * v)4788 static int tracing_trace_options_show(struct seq_file *m, void *v)
4789 {
4790 struct tracer_opt *trace_opts;
4791 struct trace_array *tr = m->private;
4792 u32 tracer_flags;
4793 int i;
4794
4795 mutex_lock(&trace_types_lock);
4796 tracer_flags = tr->current_trace->flags->val;
4797 trace_opts = tr->current_trace->flags->opts;
4798
4799 for (i = 0; trace_options[i]; i++) {
4800 if (tr->trace_flags & (1 << i))
4801 seq_printf(m, "%s\n", trace_options[i]);
4802 else
4803 seq_printf(m, "no%s\n", trace_options[i]);
4804 }
4805
4806 for (i = 0; trace_opts[i].name; i++) {
4807 if (tracer_flags & trace_opts[i].bit)
4808 seq_printf(m, "%s\n", trace_opts[i].name);
4809 else
4810 seq_printf(m, "no%s\n", trace_opts[i].name);
4811 }
4812 mutex_unlock(&trace_types_lock);
4813
4814 return 0;
4815 }
4816
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4817 static int __set_tracer_option(struct trace_array *tr,
4818 struct tracer_flags *tracer_flags,
4819 struct tracer_opt *opts, int neg)
4820 {
4821 struct tracer *trace = tracer_flags->trace;
4822 int ret;
4823
4824 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4825 if (ret)
4826 return ret;
4827
4828 if (neg)
4829 tracer_flags->val &= ~opts->bit;
4830 else
4831 tracer_flags->val |= opts->bit;
4832 return 0;
4833 }
4834
4835 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4836 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4837 {
4838 struct tracer *trace = tr->current_trace;
4839 struct tracer_flags *tracer_flags = trace->flags;
4840 struct tracer_opt *opts = NULL;
4841 int i;
4842
4843 for (i = 0; tracer_flags->opts[i].name; i++) {
4844 opts = &tracer_flags->opts[i];
4845
4846 if (strcmp(cmp, opts->name) == 0)
4847 return __set_tracer_option(tr, trace->flags, opts, neg);
4848 }
4849
4850 return -EINVAL;
4851 }
4852
4853 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4854 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4855 {
4856 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4857 return -1;
4858
4859 return 0;
4860 }
4861
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4862 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4863 {
4864 if ((mask == TRACE_ITER_RECORD_TGID) ||
4865 (mask == TRACE_ITER_RECORD_CMD))
4866 lockdep_assert_held(&event_mutex);
4867
4868 /* do nothing if flag is already set */
4869 if (!!(tr->trace_flags & mask) == !!enabled)
4870 return 0;
4871
4872 /* Give the tracer a chance to approve the change */
4873 if (tr->current_trace->flag_changed)
4874 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4875 return -EINVAL;
4876
4877 if (enabled)
4878 tr->trace_flags |= mask;
4879 else
4880 tr->trace_flags &= ~mask;
4881
4882 if (mask == TRACE_ITER_RECORD_CMD)
4883 trace_event_enable_cmd_record(enabled);
4884
4885 if (mask == TRACE_ITER_RECORD_TGID) {
4886 if (!tgid_map)
4887 tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4888 sizeof(*tgid_map),
4889 GFP_KERNEL);
4890 if (!tgid_map) {
4891 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4892 return -ENOMEM;
4893 }
4894
4895 trace_event_enable_tgid_record(enabled);
4896 }
4897
4898 if (mask == TRACE_ITER_EVENT_FORK)
4899 trace_event_follow_fork(tr, enabled);
4900
4901 if (mask == TRACE_ITER_FUNC_FORK)
4902 ftrace_pid_follow_fork(tr, enabled);
4903
4904 if (mask == TRACE_ITER_OVERWRITE) {
4905 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4906 #ifdef CONFIG_TRACER_MAX_TRACE
4907 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4908 #endif
4909 }
4910
4911 if (mask == TRACE_ITER_PRINTK) {
4912 trace_printk_start_stop_comm(enabled);
4913 trace_printk_control(enabled);
4914 }
4915
4916 return 0;
4917 }
4918
trace_set_options(struct trace_array * tr,char * option)4919 int trace_set_options(struct trace_array *tr, char *option)
4920 {
4921 char *cmp;
4922 int neg = 0;
4923 int ret;
4924 size_t orig_len = strlen(option);
4925 int len;
4926
4927 cmp = strstrip(option);
4928
4929 len = str_has_prefix(cmp, "no");
4930 if (len)
4931 neg = 1;
4932
4933 cmp += len;
4934
4935 mutex_lock(&event_mutex);
4936 mutex_lock(&trace_types_lock);
4937
4938 ret = match_string(trace_options, -1, cmp);
4939 /* If no option could be set, test the specific tracer options */
4940 if (ret < 0)
4941 ret = set_tracer_option(tr, cmp, neg);
4942 else
4943 ret = set_tracer_flag(tr, 1 << ret, !neg);
4944
4945 mutex_unlock(&trace_types_lock);
4946 mutex_unlock(&event_mutex);
4947
4948 /*
4949 * If the first trailing whitespace is replaced with '\0' by strstrip,
4950 * turn it back into a space.
4951 */
4952 if (orig_len > strlen(option))
4953 option[strlen(option)] = ' ';
4954
4955 return ret;
4956 }
4957
apply_trace_boot_options(void)4958 static void __init apply_trace_boot_options(void)
4959 {
4960 char *buf = trace_boot_options_buf;
4961 char *option;
4962
4963 while (true) {
4964 option = strsep(&buf, ",");
4965
4966 if (!option)
4967 break;
4968
4969 if (*option)
4970 trace_set_options(&global_trace, option);
4971
4972 /* Put back the comma to allow this to be called again */
4973 if (buf)
4974 *(buf - 1) = ',';
4975 }
4976 }
4977
4978 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4979 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4980 size_t cnt, loff_t *ppos)
4981 {
4982 struct seq_file *m = filp->private_data;
4983 struct trace_array *tr = m->private;
4984 char buf[64];
4985 int ret;
4986
4987 if (cnt >= sizeof(buf))
4988 return -EINVAL;
4989
4990 if (copy_from_user(buf, ubuf, cnt))
4991 return -EFAULT;
4992
4993 buf[cnt] = 0;
4994
4995 ret = trace_set_options(tr, buf);
4996 if (ret < 0)
4997 return ret;
4998
4999 *ppos += cnt;
5000
5001 return cnt;
5002 }
5003
tracing_trace_options_open(struct inode * inode,struct file * file)5004 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5005 {
5006 struct trace_array *tr = inode->i_private;
5007 int ret;
5008
5009 ret = tracing_check_open_get_tr(tr);
5010 if (ret)
5011 return ret;
5012
5013 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5014 if (ret < 0)
5015 trace_array_put(tr);
5016
5017 return ret;
5018 }
5019
5020 static const struct file_operations tracing_iter_fops = {
5021 .open = tracing_trace_options_open,
5022 .read = seq_read,
5023 .llseek = seq_lseek,
5024 .release = tracing_single_release_tr,
5025 .write = tracing_trace_options_write,
5026 };
5027
5028 static const char readme_msg[] =
5029 "tracing mini-HOWTO:\n\n"
5030 "# echo 0 > tracing_on : quick way to disable tracing\n"
5031 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5032 " Important files:\n"
5033 " trace\t\t\t- The static contents of the buffer\n"
5034 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5035 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5036 " current_tracer\t- function and latency tracers\n"
5037 " available_tracers\t- list of configured tracers for current_tracer\n"
5038 " error_log\t- error log for failed commands (that support it)\n"
5039 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5040 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5041 " trace_clock\t\t-change the clock used to order events\n"
5042 " local: Per cpu clock but may not be synced across CPUs\n"
5043 " global: Synced across CPUs but slows tracing down.\n"
5044 " counter: Not a clock, but just an increment\n"
5045 " uptime: Jiffy counter from time of boot\n"
5046 " perf: Same clock that perf events use\n"
5047 #ifdef CONFIG_X86_64
5048 " x86-tsc: TSC cycle counter\n"
5049 #endif
5050 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5051 " delta: Delta difference against a buffer-wide timestamp\n"
5052 " absolute: Absolute (standalone) timestamp\n"
5053 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5054 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5055 " tracing_cpumask\t- Limit which CPUs to trace\n"
5056 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5057 "\t\t\t Remove sub-buffer with rmdir\n"
5058 " trace_options\t\t- Set format or modify how tracing happens\n"
5059 "\t\t\t Disable an option by prefixing 'no' to the\n"
5060 "\t\t\t option name\n"
5061 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5062 #ifdef CONFIG_DYNAMIC_FTRACE
5063 "\n available_filter_functions - list of functions that can be filtered on\n"
5064 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5065 "\t\t\t functions\n"
5066 "\t accepts: func_full_name or glob-matching-pattern\n"
5067 "\t modules: Can select a group via module\n"
5068 "\t Format: :mod:<module-name>\n"
5069 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5070 "\t triggers: a command to perform when function is hit\n"
5071 "\t Format: <function>:<trigger>[:count]\n"
5072 "\t trigger: traceon, traceoff\n"
5073 "\t\t enable_event:<system>:<event>\n"
5074 "\t\t disable_event:<system>:<event>\n"
5075 #ifdef CONFIG_STACKTRACE
5076 "\t\t stacktrace\n"
5077 #endif
5078 #ifdef CONFIG_TRACER_SNAPSHOT
5079 "\t\t snapshot\n"
5080 #endif
5081 "\t\t dump\n"
5082 "\t\t cpudump\n"
5083 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5084 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5085 "\t The first one will disable tracing every time do_fault is hit\n"
5086 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5087 "\t The first time do trap is hit and it disables tracing, the\n"
5088 "\t counter will decrement to 2. If tracing is already disabled,\n"
5089 "\t the counter will not decrement. It only decrements when the\n"
5090 "\t trigger did work\n"
5091 "\t To remove trigger without count:\n"
5092 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5093 "\t To remove trigger with a count:\n"
5094 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5095 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5096 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5097 "\t modules: Can select a group via module command :mod:\n"
5098 "\t Does not accept triggers\n"
5099 #endif /* CONFIG_DYNAMIC_FTRACE */
5100 #ifdef CONFIG_FUNCTION_TRACER
5101 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5102 "\t\t (function)\n"
5103 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5104 "\t\t (function)\n"
5105 #endif
5106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5107 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5108 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5109 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5110 #endif
5111 #ifdef CONFIG_TRACER_SNAPSHOT
5112 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5113 "\t\t\t snapshot buffer. Read the contents for more\n"
5114 "\t\t\t information\n"
5115 #endif
5116 #ifdef CONFIG_STACK_TRACER
5117 " stack_trace\t\t- Shows the max stack trace when active\n"
5118 " stack_max_size\t- Shows current max stack size that was traced\n"
5119 "\t\t\t Write into this file to reset the max size (trigger a\n"
5120 "\t\t\t new trace)\n"
5121 #ifdef CONFIG_DYNAMIC_FTRACE
5122 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5123 "\t\t\t traces\n"
5124 #endif
5125 #endif /* CONFIG_STACK_TRACER */
5126 #ifdef CONFIG_DYNAMIC_EVENTS
5127 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5128 "\t\t\t Write into this file to define/undefine new trace events.\n"
5129 #endif
5130 #ifdef CONFIG_KPROBE_EVENTS
5131 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5132 "\t\t\t Write into this file to define/undefine new trace events.\n"
5133 #endif
5134 #ifdef CONFIG_UPROBE_EVENTS
5135 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5136 "\t\t\t Write into this file to define/undefine new trace events.\n"
5137 #endif
5138 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5139 "\t accepts: event-definitions (one definition per line)\n"
5140 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5141 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5142 #ifdef CONFIG_HIST_TRIGGERS
5143 "\t s:[synthetic/]<event> <field> [<field>]\n"
5144 #endif
5145 "\t -:[<group>/]<event>\n"
5146 #ifdef CONFIG_KPROBE_EVENTS
5147 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5148 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5149 #endif
5150 #ifdef CONFIG_UPROBE_EVENTS
5151 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5152 #endif
5153 "\t args: <name>=fetcharg[:type]\n"
5154 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5155 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5156 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5157 #else
5158 "\t $stack<index>, $stack, $retval, $comm,\n"
5159 #endif
5160 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5161 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5162 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5163 "\t <type>\\[<array-size>\\]\n"
5164 #ifdef CONFIG_HIST_TRIGGERS
5165 "\t field: <stype> <name>;\n"
5166 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5167 "\t [unsigned] char/int/long\n"
5168 #endif
5169 #endif
5170 " events/\t\t- Directory containing all trace event subsystems:\n"
5171 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5172 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5173 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5174 "\t\t\t events\n"
5175 " filter\t\t- If set, only events passing filter are traced\n"
5176 " events/<system>/<event>/\t- Directory containing control files for\n"
5177 "\t\t\t <event>:\n"
5178 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5179 " filter\t\t- If set, only events passing filter are traced\n"
5180 " trigger\t\t- If set, a command to perform when event is hit\n"
5181 "\t Format: <trigger>[:count][if <filter>]\n"
5182 "\t trigger: traceon, traceoff\n"
5183 "\t enable_event:<system>:<event>\n"
5184 "\t disable_event:<system>:<event>\n"
5185 #ifdef CONFIG_HIST_TRIGGERS
5186 "\t enable_hist:<system>:<event>\n"
5187 "\t disable_hist:<system>:<event>\n"
5188 #endif
5189 #ifdef CONFIG_STACKTRACE
5190 "\t\t stacktrace\n"
5191 #endif
5192 #ifdef CONFIG_TRACER_SNAPSHOT
5193 "\t\t snapshot\n"
5194 #endif
5195 #ifdef CONFIG_HIST_TRIGGERS
5196 "\t\t hist (see below)\n"
5197 #endif
5198 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5199 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5200 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5201 "\t events/block/block_unplug/trigger\n"
5202 "\t The first disables tracing every time block_unplug is hit.\n"
5203 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5204 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5205 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5206 "\t Like function triggers, the counter is only decremented if it\n"
5207 "\t enabled or disabled tracing.\n"
5208 "\t To remove a trigger without a count:\n"
5209 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5210 "\t To remove a trigger with a count:\n"
5211 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5212 "\t Filters can be ignored when removing a trigger.\n"
5213 #ifdef CONFIG_HIST_TRIGGERS
5214 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5215 "\t Format: hist:keys=<field1[,field2,...]>\n"
5216 "\t [:values=<field1[,field2,...]>]\n"
5217 "\t [:sort=<field1[,field2,...]>]\n"
5218 "\t [:size=#entries]\n"
5219 "\t [:pause][:continue][:clear]\n"
5220 "\t [:name=histname1]\n"
5221 "\t [:<handler>.<action>]\n"
5222 "\t [if <filter>]\n\n"
5223 "\t When a matching event is hit, an entry is added to a hash\n"
5224 "\t table using the key(s) and value(s) named, and the value of a\n"
5225 "\t sum called 'hitcount' is incremented. Keys and values\n"
5226 "\t correspond to fields in the event's format description. Keys\n"
5227 "\t can be any field, or the special string 'stacktrace'.\n"
5228 "\t Compound keys consisting of up to two fields can be specified\n"
5229 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5230 "\t fields. Sort keys consisting of up to two fields can be\n"
5231 "\t specified using the 'sort' keyword. The sort direction can\n"
5232 "\t be modified by appending '.descending' or '.ascending' to a\n"
5233 "\t sort field. The 'size' parameter can be used to specify more\n"
5234 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5235 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5236 "\t its histogram data will be shared with other triggers of the\n"
5237 "\t same name, and trigger hits will update this common data.\n\n"
5238 "\t Reading the 'hist' file for the event will dump the hash\n"
5239 "\t table in its entirety to stdout. If there are multiple hist\n"
5240 "\t triggers attached to an event, there will be a table for each\n"
5241 "\t trigger in the output. The table displayed for a named\n"
5242 "\t trigger will be the same as any other instance having the\n"
5243 "\t same name. The default format used to display a given field\n"
5244 "\t can be modified by appending any of the following modifiers\n"
5245 "\t to the field name, as applicable:\n\n"
5246 "\t .hex display a number as a hex value\n"
5247 "\t .sym display an address as a symbol\n"
5248 "\t .sym-offset display an address as a symbol and offset\n"
5249 "\t .execname display a common_pid as a program name\n"
5250 "\t .syscall display a syscall id as a syscall name\n"
5251 "\t .log2 display log2 value rather than raw number\n"
5252 "\t .usecs display a common_timestamp in microseconds\n\n"
5253 "\t The 'pause' parameter can be used to pause an existing hist\n"
5254 "\t trigger or to start a hist trigger but not log any events\n"
5255 "\t until told to do so. 'continue' can be used to start or\n"
5256 "\t restart a paused hist trigger.\n\n"
5257 "\t The 'clear' parameter will clear the contents of a running\n"
5258 "\t hist trigger and leave its current paused/active state\n"
5259 "\t unchanged.\n\n"
5260 "\t The enable_hist and disable_hist triggers can be used to\n"
5261 "\t have one event conditionally start and stop another event's\n"
5262 "\t already-attached hist trigger. The syntax is analogous to\n"
5263 "\t the enable_event and disable_event triggers.\n\n"
5264 "\t Hist trigger handlers and actions are executed whenever a\n"
5265 "\t a histogram entry is added or updated. They take the form:\n\n"
5266 "\t <handler>.<action>\n\n"
5267 "\t The available handlers are:\n\n"
5268 "\t onmatch(matching.event) - invoke on addition or update\n"
5269 "\t onmax(var) - invoke if var exceeds current max\n"
5270 "\t onchange(var) - invoke action if var changes\n\n"
5271 "\t The available actions are:\n\n"
5272 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5273 "\t save(field,...) - save current event fields\n"
5274 #ifdef CONFIG_TRACER_SNAPSHOT
5275 "\t snapshot() - snapshot the trace buffer\n\n"
5276 #endif
5277 #ifdef CONFIG_SYNTH_EVENTS
5278 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5279 "\t Write into this file to define/undefine new synthetic events.\n"
5280 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5281 #endif
5282 #endif
5283 ;
5284
5285 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5286 tracing_readme_read(struct file *filp, char __user *ubuf,
5287 size_t cnt, loff_t *ppos)
5288 {
5289 return simple_read_from_buffer(ubuf, cnt, ppos,
5290 readme_msg, strlen(readme_msg));
5291 }
5292
5293 static const struct file_operations tracing_readme_fops = {
5294 .open = tracing_open_generic,
5295 .read = tracing_readme_read,
5296 .llseek = generic_file_llseek,
5297 };
5298
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5299 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5300 {
5301 int *ptr = v;
5302
5303 if (*pos || m->count)
5304 ptr++;
5305
5306 (*pos)++;
5307
5308 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5309 if (trace_find_tgid(*ptr))
5310 return ptr;
5311 }
5312
5313 return NULL;
5314 }
5315
saved_tgids_start(struct seq_file * m,loff_t * pos)5316 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5317 {
5318 void *v;
5319 loff_t l = 0;
5320
5321 if (!tgid_map)
5322 return NULL;
5323
5324 v = &tgid_map[0];
5325 while (l <= *pos) {
5326 v = saved_tgids_next(m, v, &l);
5327 if (!v)
5328 return NULL;
5329 }
5330
5331 return v;
5332 }
5333
saved_tgids_stop(struct seq_file * m,void * v)5334 static void saved_tgids_stop(struct seq_file *m, void *v)
5335 {
5336 }
5337
saved_tgids_show(struct seq_file * m,void * v)5338 static int saved_tgids_show(struct seq_file *m, void *v)
5339 {
5340 int pid = (int *)v - tgid_map;
5341
5342 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5343 return 0;
5344 }
5345
5346 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5347 .start = saved_tgids_start,
5348 .stop = saved_tgids_stop,
5349 .next = saved_tgids_next,
5350 .show = saved_tgids_show,
5351 };
5352
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5353 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5354 {
5355 int ret;
5356
5357 ret = tracing_check_open_get_tr(NULL);
5358 if (ret)
5359 return ret;
5360
5361 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5362 }
5363
5364
5365 static const struct file_operations tracing_saved_tgids_fops = {
5366 .open = tracing_saved_tgids_open,
5367 .read = seq_read,
5368 .llseek = seq_lseek,
5369 .release = seq_release,
5370 };
5371
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5372 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5373 {
5374 unsigned int *ptr = v;
5375
5376 if (*pos || m->count)
5377 ptr++;
5378
5379 (*pos)++;
5380
5381 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5382 ptr++) {
5383 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5384 continue;
5385
5386 return ptr;
5387 }
5388
5389 return NULL;
5390 }
5391
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5392 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5393 {
5394 void *v;
5395 loff_t l = 0;
5396
5397 preempt_disable();
5398 arch_spin_lock(&trace_cmdline_lock);
5399
5400 v = &savedcmd->map_cmdline_to_pid[0];
5401 while (l <= *pos) {
5402 v = saved_cmdlines_next(m, v, &l);
5403 if (!v)
5404 return NULL;
5405 }
5406
5407 return v;
5408 }
5409
saved_cmdlines_stop(struct seq_file * m,void * v)5410 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5411 {
5412 arch_spin_unlock(&trace_cmdline_lock);
5413 preempt_enable();
5414 }
5415
saved_cmdlines_show(struct seq_file * m,void * v)5416 static int saved_cmdlines_show(struct seq_file *m, void *v)
5417 {
5418 char buf[TASK_COMM_LEN];
5419 unsigned int *pid = v;
5420
5421 __trace_find_cmdline(*pid, buf);
5422 seq_printf(m, "%d %s\n", *pid, buf);
5423 return 0;
5424 }
5425
5426 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5427 .start = saved_cmdlines_start,
5428 .next = saved_cmdlines_next,
5429 .stop = saved_cmdlines_stop,
5430 .show = saved_cmdlines_show,
5431 };
5432
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5433 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5434 {
5435 int ret;
5436
5437 ret = tracing_check_open_get_tr(NULL);
5438 if (ret)
5439 return ret;
5440
5441 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5442 }
5443
5444 static const struct file_operations tracing_saved_cmdlines_fops = {
5445 .open = tracing_saved_cmdlines_open,
5446 .read = seq_read,
5447 .llseek = seq_lseek,
5448 .release = seq_release,
5449 };
5450
5451 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5452 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5453 size_t cnt, loff_t *ppos)
5454 {
5455 char buf[64];
5456 int r;
5457
5458 arch_spin_lock(&trace_cmdline_lock);
5459 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5460 arch_spin_unlock(&trace_cmdline_lock);
5461
5462 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5463 }
5464
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5465 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5466 {
5467 kfree(s->saved_cmdlines);
5468 kfree(s->map_cmdline_to_pid);
5469 kfree(s);
5470 }
5471
tracing_resize_saved_cmdlines(unsigned int val)5472 static int tracing_resize_saved_cmdlines(unsigned int val)
5473 {
5474 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5475
5476 s = kmalloc(sizeof(*s), GFP_KERNEL);
5477 if (!s)
5478 return -ENOMEM;
5479
5480 if (allocate_cmdlines_buffer(val, s) < 0) {
5481 kfree(s);
5482 return -ENOMEM;
5483 }
5484
5485 arch_spin_lock(&trace_cmdline_lock);
5486 savedcmd_temp = savedcmd;
5487 savedcmd = s;
5488 arch_spin_unlock(&trace_cmdline_lock);
5489 free_saved_cmdlines_buffer(savedcmd_temp);
5490
5491 return 0;
5492 }
5493
5494 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5495 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5496 size_t cnt, loff_t *ppos)
5497 {
5498 unsigned long val;
5499 int ret;
5500
5501 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5502 if (ret)
5503 return ret;
5504
5505 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5506 if (!val || val > PID_MAX_DEFAULT)
5507 return -EINVAL;
5508
5509 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5510 if (ret < 0)
5511 return ret;
5512
5513 *ppos += cnt;
5514
5515 return cnt;
5516 }
5517
5518 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5519 .open = tracing_open_generic,
5520 .read = tracing_saved_cmdlines_size_read,
5521 .write = tracing_saved_cmdlines_size_write,
5522 };
5523
5524 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5525 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5526 update_eval_map(union trace_eval_map_item *ptr)
5527 {
5528 if (!ptr->map.eval_string) {
5529 if (ptr->tail.next) {
5530 ptr = ptr->tail.next;
5531 /* Set ptr to the next real item (skip head) */
5532 ptr++;
5533 } else
5534 return NULL;
5535 }
5536 return ptr;
5537 }
5538
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5539 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5540 {
5541 union trace_eval_map_item *ptr = v;
5542
5543 /*
5544 * Paranoid! If ptr points to end, we don't want to increment past it.
5545 * This really should never happen.
5546 */
5547 (*pos)++;
5548 ptr = update_eval_map(ptr);
5549 if (WARN_ON_ONCE(!ptr))
5550 return NULL;
5551
5552 ptr++;
5553 ptr = update_eval_map(ptr);
5554
5555 return ptr;
5556 }
5557
eval_map_start(struct seq_file * m,loff_t * pos)5558 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5559 {
5560 union trace_eval_map_item *v;
5561 loff_t l = 0;
5562
5563 mutex_lock(&trace_eval_mutex);
5564
5565 v = trace_eval_maps;
5566 if (v)
5567 v++;
5568
5569 while (v && l < *pos) {
5570 v = eval_map_next(m, v, &l);
5571 }
5572
5573 return v;
5574 }
5575
eval_map_stop(struct seq_file * m,void * v)5576 static void eval_map_stop(struct seq_file *m, void *v)
5577 {
5578 mutex_unlock(&trace_eval_mutex);
5579 }
5580
eval_map_show(struct seq_file * m,void * v)5581 static int eval_map_show(struct seq_file *m, void *v)
5582 {
5583 union trace_eval_map_item *ptr = v;
5584
5585 seq_printf(m, "%s %ld (%s)\n",
5586 ptr->map.eval_string, ptr->map.eval_value,
5587 ptr->map.system);
5588
5589 return 0;
5590 }
5591
5592 static const struct seq_operations tracing_eval_map_seq_ops = {
5593 .start = eval_map_start,
5594 .next = eval_map_next,
5595 .stop = eval_map_stop,
5596 .show = eval_map_show,
5597 };
5598
tracing_eval_map_open(struct inode * inode,struct file * filp)5599 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5600 {
5601 int ret;
5602
5603 ret = tracing_check_open_get_tr(NULL);
5604 if (ret)
5605 return ret;
5606
5607 return seq_open(filp, &tracing_eval_map_seq_ops);
5608 }
5609
5610 static const struct file_operations tracing_eval_map_fops = {
5611 .open = tracing_eval_map_open,
5612 .read = seq_read,
5613 .llseek = seq_lseek,
5614 .release = seq_release,
5615 };
5616
5617 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5618 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5619 {
5620 /* Return tail of array given the head */
5621 return ptr + ptr->head.length + 1;
5622 }
5623
5624 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5625 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5626 int len)
5627 {
5628 struct trace_eval_map **stop;
5629 struct trace_eval_map **map;
5630 union trace_eval_map_item *map_array;
5631 union trace_eval_map_item *ptr;
5632
5633 stop = start + len;
5634
5635 /*
5636 * The trace_eval_maps contains the map plus a head and tail item,
5637 * where the head holds the module and length of array, and the
5638 * tail holds a pointer to the next list.
5639 */
5640 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5641 if (!map_array) {
5642 pr_warn("Unable to allocate trace eval mapping\n");
5643 return;
5644 }
5645
5646 mutex_lock(&trace_eval_mutex);
5647
5648 if (!trace_eval_maps)
5649 trace_eval_maps = map_array;
5650 else {
5651 ptr = trace_eval_maps;
5652 for (;;) {
5653 ptr = trace_eval_jmp_to_tail(ptr);
5654 if (!ptr->tail.next)
5655 break;
5656 ptr = ptr->tail.next;
5657
5658 }
5659 ptr->tail.next = map_array;
5660 }
5661 map_array->head.mod = mod;
5662 map_array->head.length = len;
5663 map_array++;
5664
5665 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5666 map_array->map = **map;
5667 map_array++;
5668 }
5669 memset(map_array, 0, sizeof(*map_array));
5670
5671 mutex_unlock(&trace_eval_mutex);
5672 }
5673
trace_create_eval_file(struct dentry * d_tracer)5674 static void trace_create_eval_file(struct dentry *d_tracer)
5675 {
5676 trace_create_file("eval_map", 0444, d_tracer,
5677 NULL, &tracing_eval_map_fops);
5678 }
5679
5680 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5681 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5682 static inline void trace_insert_eval_map_file(struct module *mod,
5683 struct trace_eval_map **start, int len) { }
5684 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5685
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5686 static void trace_insert_eval_map(struct module *mod,
5687 struct trace_eval_map **start, int len)
5688 {
5689 struct trace_eval_map **map;
5690
5691 if (len <= 0)
5692 return;
5693
5694 map = start;
5695
5696 trace_event_eval_update(map, len);
5697
5698 trace_insert_eval_map_file(mod, start, len);
5699 }
5700
5701 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5702 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5703 size_t cnt, loff_t *ppos)
5704 {
5705 struct trace_array *tr = filp->private_data;
5706 char buf[MAX_TRACER_SIZE+2];
5707 int r;
5708
5709 mutex_lock(&trace_types_lock);
5710 r = sprintf(buf, "%s\n", tr->current_trace->name);
5711 mutex_unlock(&trace_types_lock);
5712
5713 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5714 }
5715
tracer_init(struct tracer * t,struct trace_array * tr)5716 int tracer_init(struct tracer *t, struct trace_array *tr)
5717 {
5718 tracing_reset_online_cpus(&tr->array_buffer);
5719 return t->init(tr);
5720 }
5721
set_buffer_entries(struct array_buffer * buf,unsigned long val)5722 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5723 {
5724 int cpu;
5725
5726 for_each_tracing_cpu(cpu)
5727 per_cpu_ptr(buf->data, cpu)->entries = val;
5728 }
5729
5730 #ifdef CONFIG_TRACER_MAX_TRACE
5731 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5732 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5733 struct array_buffer *size_buf, int cpu_id)
5734 {
5735 int cpu, ret = 0;
5736
5737 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5738 for_each_tracing_cpu(cpu) {
5739 ret = ring_buffer_resize(trace_buf->buffer,
5740 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5741 if (ret < 0)
5742 break;
5743 per_cpu_ptr(trace_buf->data, cpu)->entries =
5744 per_cpu_ptr(size_buf->data, cpu)->entries;
5745 }
5746 } else {
5747 ret = ring_buffer_resize(trace_buf->buffer,
5748 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5749 if (ret == 0)
5750 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5751 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5752 }
5753
5754 return ret;
5755 }
5756 #endif /* CONFIG_TRACER_MAX_TRACE */
5757
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5758 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5759 unsigned long size, int cpu)
5760 {
5761 int ret;
5762
5763 /*
5764 * If kernel or user changes the size of the ring buffer
5765 * we use the size that was given, and we can forget about
5766 * expanding it later.
5767 */
5768 ring_buffer_expanded = true;
5769
5770 /* May be called before buffers are initialized */
5771 if (!tr->array_buffer.buffer)
5772 return 0;
5773
5774 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5775 if (ret < 0)
5776 return ret;
5777
5778 #ifdef CONFIG_TRACER_MAX_TRACE
5779 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5780 !tr->current_trace->use_max_tr)
5781 goto out;
5782
5783 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5784 if (ret < 0) {
5785 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5786 &tr->array_buffer, cpu);
5787 if (r < 0) {
5788 /*
5789 * AARGH! We are left with different
5790 * size max buffer!!!!
5791 * The max buffer is our "snapshot" buffer.
5792 * When a tracer needs a snapshot (one of the
5793 * latency tracers), it swaps the max buffer
5794 * with the saved snap shot. We succeeded to
5795 * update the size of the main buffer, but failed to
5796 * update the size of the max buffer. But when we tried
5797 * to reset the main buffer to the original size, we
5798 * failed there too. This is very unlikely to
5799 * happen, but if it does, warn and kill all
5800 * tracing.
5801 */
5802 WARN_ON(1);
5803 tracing_disabled = 1;
5804 }
5805 return ret;
5806 }
5807
5808 if (cpu == RING_BUFFER_ALL_CPUS)
5809 set_buffer_entries(&tr->max_buffer, size);
5810 else
5811 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5812
5813 out:
5814 #endif /* CONFIG_TRACER_MAX_TRACE */
5815
5816 if (cpu == RING_BUFFER_ALL_CPUS)
5817 set_buffer_entries(&tr->array_buffer, size);
5818 else
5819 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5820
5821 return ret;
5822 }
5823
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5824 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5825 unsigned long size, int cpu_id)
5826 {
5827 int ret = size;
5828
5829 mutex_lock(&trace_types_lock);
5830
5831 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5832 /* make sure, this cpu is enabled in the mask */
5833 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5834 ret = -EINVAL;
5835 goto out;
5836 }
5837 }
5838
5839 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5840 if (ret < 0)
5841 ret = -ENOMEM;
5842
5843 out:
5844 mutex_unlock(&trace_types_lock);
5845
5846 return ret;
5847 }
5848
5849
5850 /**
5851 * tracing_update_buffers - used by tracing facility to expand ring buffers
5852 *
5853 * To save on memory when the tracing is never used on a system with it
5854 * configured in. The ring buffers are set to a minimum size. But once
5855 * a user starts to use the tracing facility, then they need to grow
5856 * to their default size.
5857 *
5858 * This function is to be called when a tracer is about to be used.
5859 */
tracing_update_buffers(void)5860 int tracing_update_buffers(void)
5861 {
5862 int ret = 0;
5863
5864 mutex_lock(&trace_types_lock);
5865 if (!ring_buffer_expanded)
5866 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5867 RING_BUFFER_ALL_CPUS);
5868 mutex_unlock(&trace_types_lock);
5869
5870 return ret;
5871 }
5872
5873 struct trace_option_dentry;
5874
5875 static void
5876 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5877
5878 /*
5879 * Used to clear out the tracer before deletion of an instance.
5880 * Must have trace_types_lock held.
5881 */
tracing_set_nop(struct trace_array * tr)5882 static void tracing_set_nop(struct trace_array *tr)
5883 {
5884 if (tr->current_trace == &nop_trace)
5885 return;
5886
5887 tr->current_trace->enabled--;
5888
5889 if (tr->current_trace->reset)
5890 tr->current_trace->reset(tr);
5891
5892 tr->current_trace = &nop_trace;
5893 }
5894
add_tracer_options(struct trace_array * tr,struct tracer * t)5895 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5896 {
5897 /* Only enable if the directory has been created already. */
5898 if (!tr->dir)
5899 return;
5900
5901 create_trace_option_files(tr, t);
5902 }
5903
tracing_set_tracer(struct trace_array * tr,const char * buf)5904 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5905 {
5906 struct tracer *t;
5907 #ifdef CONFIG_TRACER_MAX_TRACE
5908 bool had_max_tr;
5909 #endif
5910 int ret = 0;
5911
5912 mutex_lock(&trace_types_lock);
5913
5914 if (!ring_buffer_expanded) {
5915 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5916 RING_BUFFER_ALL_CPUS);
5917 if (ret < 0)
5918 goto out;
5919 ret = 0;
5920 }
5921
5922 for (t = trace_types; t; t = t->next) {
5923 if (strcmp(t->name, buf) == 0)
5924 break;
5925 }
5926 if (!t) {
5927 ret = -EINVAL;
5928 goto out;
5929 }
5930 if (t == tr->current_trace)
5931 goto out;
5932
5933 #ifdef CONFIG_TRACER_SNAPSHOT
5934 if (t->use_max_tr) {
5935 arch_spin_lock(&tr->max_lock);
5936 if (tr->cond_snapshot)
5937 ret = -EBUSY;
5938 arch_spin_unlock(&tr->max_lock);
5939 if (ret)
5940 goto out;
5941 }
5942 #endif
5943 /* Some tracers won't work on kernel command line */
5944 if (system_state < SYSTEM_RUNNING && t->noboot) {
5945 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5946 t->name);
5947 goto out;
5948 }
5949
5950 /* Some tracers are only allowed for the top level buffer */
5951 if (!trace_ok_for_array(t, tr)) {
5952 ret = -EINVAL;
5953 goto out;
5954 }
5955
5956 /* If trace pipe files are being read, we can't change the tracer */
5957 if (tr->trace_ref) {
5958 ret = -EBUSY;
5959 goto out;
5960 }
5961
5962 trace_branch_disable();
5963
5964 tr->current_trace->enabled--;
5965
5966 if (tr->current_trace->reset)
5967 tr->current_trace->reset(tr);
5968
5969 /* Current trace needs to be nop_trace before synchronize_rcu */
5970 tr->current_trace = &nop_trace;
5971
5972 #ifdef CONFIG_TRACER_MAX_TRACE
5973 had_max_tr = tr->allocated_snapshot;
5974
5975 if (had_max_tr && !t->use_max_tr) {
5976 /*
5977 * We need to make sure that the update_max_tr sees that
5978 * current_trace changed to nop_trace to keep it from
5979 * swapping the buffers after we resize it.
5980 * The update_max_tr is called from interrupts disabled
5981 * so a synchronized_sched() is sufficient.
5982 */
5983 synchronize_rcu();
5984 free_snapshot(tr);
5985 }
5986 #endif
5987
5988 #ifdef CONFIG_TRACER_MAX_TRACE
5989 if (t->use_max_tr && !had_max_tr) {
5990 ret = tracing_alloc_snapshot_instance(tr);
5991 if (ret < 0)
5992 goto out;
5993 }
5994 #endif
5995
5996 if (t->init) {
5997 ret = tracer_init(t, tr);
5998 if (ret)
5999 goto out;
6000 }
6001
6002 tr->current_trace = t;
6003 tr->current_trace->enabled++;
6004 trace_branch_enable(tr);
6005 out:
6006 mutex_unlock(&trace_types_lock);
6007
6008 return ret;
6009 }
6010
6011 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6012 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6013 size_t cnt, loff_t *ppos)
6014 {
6015 struct trace_array *tr = filp->private_data;
6016 char buf[MAX_TRACER_SIZE+1];
6017 int i;
6018 size_t ret;
6019 int err;
6020
6021 ret = cnt;
6022
6023 if (cnt > MAX_TRACER_SIZE)
6024 cnt = MAX_TRACER_SIZE;
6025
6026 if (copy_from_user(buf, ubuf, cnt))
6027 return -EFAULT;
6028
6029 buf[cnt] = 0;
6030
6031 /* strip ending whitespace. */
6032 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6033 buf[i] = 0;
6034
6035 err = tracing_set_tracer(tr, buf);
6036 if (err)
6037 return err;
6038
6039 *ppos += ret;
6040
6041 return ret;
6042 }
6043
6044 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6045 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6046 size_t cnt, loff_t *ppos)
6047 {
6048 char buf[64];
6049 int r;
6050
6051 r = snprintf(buf, sizeof(buf), "%ld\n",
6052 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6053 if (r > sizeof(buf))
6054 r = sizeof(buf);
6055 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6056 }
6057
6058 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6059 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6060 size_t cnt, loff_t *ppos)
6061 {
6062 unsigned long val;
6063 int ret;
6064
6065 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6066 if (ret)
6067 return ret;
6068
6069 *ptr = val * 1000;
6070
6071 return cnt;
6072 }
6073
6074 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6075 tracing_thresh_read(struct file *filp, char __user *ubuf,
6076 size_t cnt, loff_t *ppos)
6077 {
6078 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6079 }
6080
6081 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6082 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6083 size_t cnt, loff_t *ppos)
6084 {
6085 struct trace_array *tr = filp->private_data;
6086 int ret;
6087
6088 mutex_lock(&trace_types_lock);
6089 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6090 if (ret < 0)
6091 goto out;
6092
6093 if (tr->current_trace->update_thresh) {
6094 ret = tr->current_trace->update_thresh(tr);
6095 if (ret < 0)
6096 goto out;
6097 }
6098
6099 ret = cnt;
6100 out:
6101 mutex_unlock(&trace_types_lock);
6102
6103 return ret;
6104 }
6105
6106 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6107
6108 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6109 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6110 size_t cnt, loff_t *ppos)
6111 {
6112 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6113 }
6114
6115 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6116 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6117 size_t cnt, loff_t *ppos)
6118 {
6119 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6120 }
6121
6122 #endif
6123
tracing_open_pipe(struct inode * inode,struct file * filp)6124 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6125 {
6126 struct trace_array *tr = inode->i_private;
6127 struct trace_iterator *iter;
6128 int ret;
6129
6130 ret = tracing_check_open_get_tr(tr);
6131 if (ret)
6132 return ret;
6133
6134 mutex_lock(&trace_types_lock);
6135
6136 /* create a buffer to store the information to pass to userspace */
6137 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6138 if (!iter) {
6139 ret = -ENOMEM;
6140 __trace_array_put(tr);
6141 goto out;
6142 }
6143
6144 trace_seq_init(&iter->seq);
6145 iter->trace = tr->current_trace;
6146
6147 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6148 ret = -ENOMEM;
6149 goto fail;
6150 }
6151
6152 /* trace pipe does not show start of buffer */
6153 cpumask_setall(iter->started);
6154
6155 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6156 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6157
6158 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6159 if (trace_clocks[tr->clock_id].in_ns)
6160 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6161
6162 iter->tr = tr;
6163 iter->array_buffer = &tr->array_buffer;
6164 iter->cpu_file = tracing_get_cpu(inode);
6165 mutex_init(&iter->mutex);
6166 filp->private_data = iter;
6167
6168 if (iter->trace->pipe_open)
6169 iter->trace->pipe_open(iter);
6170
6171 nonseekable_open(inode, filp);
6172
6173 tr->trace_ref++;
6174 out:
6175 mutex_unlock(&trace_types_lock);
6176 return ret;
6177
6178 fail:
6179 kfree(iter);
6180 __trace_array_put(tr);
6181 mutex_unlock(&trace_types_lock);
6182 return ret;
6183 }
6184
tracing_release_pipe(struct inode * inode,struct file * file)6185 static int tracing_release_pipe(struct inode *inode, struct file *file)
6186 {
6187 struct trace_iterator *iter = file->private_data;
6188 struct trace_array *tr = inode->i_private;
6189
6190 mutex_lock(&trace_types_lock);
6191
6192 tr->trace_ref--;
6193
6194 if (iter->trace->pipe_close)
6195 iter->trace->pipe_close(iter);
6196
6197 mutex_unlock(&trace_types_lock);
6198
6199 free_cpumask_var(iter->started);
6200 mutex_destroy(&iter->mutex);
6201 kfree(iter);
6202
6203 trace_array_put(tr);
6204
6205 return 0;
6206 }
6207
6208 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6209 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6210 {
6211 struct trace_array *tr = iter->tr;
6212
6213 /* Iterators are static, they should be filled or empty */
6214 if (trace_buffer_iter(iter, iter->cpu_file))
6215 return EPOLLIN | EPOLLRDNORM;
6216
6217 if (tr->trace_flags & TRACE_ITER_BLOCK)
6218 /*
6219 * Always select as readable when in blocking mode
6220 */
6221 return EPOLLIN | EPOLLRDNORM;
6222 else
6223 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6224 filp, poll_table);
6225 }
6226
6227 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6228 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6229 {
6230 struct trace_iterator *iter = filp->private_data;
6231
6232 return trace_poll(iter, filp, poll_table);
6233 }
6234
6235 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6236 static int tracing_wait_pipe(struct file *filp)
6237 {
6238 struct trace_iterator *iter = filp->private_data;
6239 int ret;
6240
6241 while (trace_empty(iter)) {
6242
6243 if ((filp->f_flags & O_NONBLOCK)) {
6244 return -EAGAIN;
6245 }
6246
6247 /*
6248 * We block until we read something and tracing is disabled.
6249 * We still block if tracing is disabled, but we have never
6250 * read anything. This allows a user to cat this file, and
6251 * then enable tracing. But after we have read something,
6252 * we give an EOF when tracing is again disabled.
6253 *
6254 * iter->pos will be 0 if we haven't read anything.
6255 */
6256 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6257 break;
6258
6259 mutex_unlock(&iter->mutex);
6260
6261 ret = wait_on_pipe(iter, 0);
6262
6263 mutex_lock(&iter->mutex);
6264
6265 if (ret)
6266 return ret;
6267 }
6268
6269 return 1;
6270 }
6271
6272 /*
6273 * Consumer reader.
6274 */
6275 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6276 tracing_read_pipe(struct file *filp, char __user *ubuf,
6277 size_t cnt, loff_t *ppos)
6278 {
6279 struct trace_iterator *iter = filp->private_data;
6280 ssize_t sret;
6281
6282 /*
6283 * Avoid more than one consumer on a single file descriptor
6284 * This is just a matter of traces coherency, the ring buffer itself
6285 * is protected.
6286 */
6287 mutex_lock(&iter->mutex);
6288
6289 /* return any leftover data */
6290 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6291 if (sret != -EBUSY)
6292 goto out;
6293
6294 trace_seq_init(&iter->seq);
6295
6296 if (iter->trace->read) {
6297 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6298 if (sret)
6299 goto out;
6300 }
6301
6302 waitagain:
6303 sret = tracing_wait_pipe(filp);
6304 if (sret <= 0)
6305 goto out;
6306
6307 /* stop when tracing is finished */
6308 if (trace_empty(iter)) {
6309 sret = 0;
6310 goto out;
6311 }
6312
6313 if (cnt >= PAGE_SIZE)
6314 cnt = PAGE_SIZE - 1;
6315
6316 /* reset all but tr, trace, and overruns */
6317 memset(&iter->seq, 0,
6318 sizeof(struct trace_iterator) -
6319 offsetof(struct trace_iterator, seq));
6320 cpumask_clear(iter->started);
6321 trace_seq_init(&iter->seq);
6322 iter->pos = -1;
6323
6324 trace_event_read_lock();
6325 trace_access_lock(iter->cpu_file);
6326 while (trace_find_next_entry_inc(iter) != NULL) {
6327 enum print_line_t ret;
6328 int save_len = iter->seq.seq.len;
6329
6330 ret = print_trace_line(iter);
6331 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6332 /* don't print partial lines */
6333 iter->seq.seq.len = save_len;
6334 break;
6335 }
6336 if (ret != TRACE_TYPE_NO_CONSUME)
6337 trace_consume(iter);
6338
6339 if (trace_seq_used(&iter->seq) >= cnt)
6340 break;
6341
6342 /*
6343 * Setting the full flag means we reached the trace_seq buffer
6344 * size and we should leave by partial output condition above.
6345 * One of the trace_seq_* functions is not used properly.
6346 */
6347 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6348 iter->ent->type);
6349 }
6350 trace_access_unlock(iter->cpu_file);
6351 trace_event_read_unlock();
6352
6353 /* Now copy what we have to the user */
6354 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6355 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6356 trace_seq_init(&iter->seq);
6357
6358 /*
6359 * If there was nothing to send to user, in spite of consuming trace
6360 * entries, go back to wait for more entries.
6361 */
6362 if (sret == -EBUSY)
6363 goto waitagain;
6364
6365 out:
6366 mutex_unlock(&iter->mutex);
6367
6368 return sret;
6369 }
6370
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6371 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6372 unsigned int idx)
6373 {
6374 __free_page(spd->pages[idx]);
6375 }
6376
6377 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6378 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6379 {
6380 size_t count;
6381 int save_len;
6382 int ret;
6383
6384 /* Seq buffer is page-sized, exactly what we need. */
6385 for (;;) {
6386 save_len = iter->seq.seq.len;
6387 ret = print_trace_line(iter);
6388
6389 if (trace_seq_has_overflowed(&iter->seq)) {
6390 iter->seq.seq.len = save_len;
6391 break;
6392 }
6393
6394 /*
6395 * This should not be hit, because it should only
6396 * be set if the iter->seq overflowed. But check it
6397 * anyway to be safe.
6398 */
6399 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6400 iter->seq.seq.len = save_len;
6401 break;
6402 }
6403
6404 count = trace_seq_used(&iter->seq) - save_len;
6405 if (rem < count) {
6406 rem = 0;
6407 iter->seq.seq.len = save_len;
6408 break;
6409 }
6410
6411 if (ret != TRACE_TYPE_NO_CONSUME)
6412 trace_consume(iter);
6413 rem -= count;
6414 if (!trace_find_next_entry_inc(iter)) {
6415 rem = 0;
6416 iter->ent = NULL;
6417 break;
6418 }
6419 }
6420
6421 return rem;
6422 }
6423
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6424 static ssize_t tracing_splice_read_pipe(struct file *filp,
6425 loff_t *ppos,
6426 struct pipe_inode_info *pipe,
6427 size_t len,
6428 unsigned int flags)
6429 {
6430 struct page *pages_def[PIPE_DEF_BUFFERS];
6431 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6432 struct trace_iterator *iter = filp->private_data;
6433 struct splice_pipe_desc spd = {
6434 .pages = pages_def,
6435 .partial = partial_def,
6436 .nr_pages = 0, /* This gets updated below. */
6437 .nr_pages_max = PIPE_DEF_BUFFERS,
6438 .ops = &default_pipe_buf_ops,
6439 .spd_release = tracing_spd_release_pipe,
6440 };
6441 ssize_t ret;
6442 size_t rem;
6443 unsigned int i;
6444
6445 if (splice_grow_spd(pipe, &spd))
6446 return -ENOMEM;
6447
6448 mutex_lock(&iter->mutex);
6449
6450 if (iter->trace->splice_read) {
6451 ret = iter->trace->splice_read(iter, filp,
6452 ppos, pipe, len, flags);
6453 if (ret)
6454 goto out_err;
6455 }
6456
6457 ret = tracing_wait_pipe(filp);
6458 if (ret <= 0)
6459 goto out_err;
6460
6461 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6462 ret = -EFAULT;
6463 goto out_err;
6464 }
6465
6466 trace_event_read_lock();
6467 trace_access_lock(iter->cpu_file);
6468
6469 /* Fill as many pages as possible. */
6470 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6471 spd.pages[i] = alloc_page(GFP_KERNEL);
6472 if (!spd.pages[i])
6473 break;
6474
6475 rem = tracing_fill_pipe_page(rem, iter);
6476
6477 /* Copy the data into the page, so we can start over. */
6478 ret = trace_seq_to_buffer(&iter->seq,
6479 page_address(spd.pages[i]),
6480 trace_seq_used(&iter->seq));
6481 if (ret < 0) {
6482 __free_page(spd.pages[i]);
6483 break;
6484 }
6485 spd.partial[i].offset = 0;
6486 spd.partial[i].len = trace_seq_used(&iter->seq);
6487
6488 trace_seq_init(&iter->seq);
6489 }
6490
6491 trace_access_unlock(iter->cpu_file);
6492 trace_event_read_unlock();
6493 mutex_unlock(&iter->mutex);
6494
6495 spd.nr_pages = i;
6496
6497 if (i)
6498 ret = splice_to_pipe(pipe, &spd);
6499 else
6500 ret = 0;
6501 out:
6502 splice_shrink_spd(&spd);
6503 return ret;
6504
6505 out_err:
6506 mutex_unlock(&iter->mutex);
6507 goto out;
6508 }
6509
6510 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6511 tracing_entries_read(struct file *filp, char __user *ubuf,
6512 size_t cnt, loff_t *ppos)
6513 {
6514 struct inode *inode = file_inode(filp);
6515 struct trace_array *tr = inode->i_private;
6516 int cpu = tracing_get_cpu(inode);
6517 char buf[64];
6518 int r = 0;
6519 ssize_t ret;
6520
6521 mutex_lock(&trace_types_lock);
6522
6523 if (cpu == RING_BUFFER_ALL_CPUS) {
6524 int cpu, buf_size_same;
6525 unsigned long size;
6526
6527 size = 0;
6528 buf_size_same = 1;
6529 /* check if all cpu sizes are same */
6530 for_each_tracing_cpu(cpu) {
6531 /* fill in the size from first enabled cpu */
6532 if (size == 0)
6533 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6534 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6535 buf_size_same = 0;
6536 break;
6537 }
6538 }
6539
6540 if (buf_size_same) {
6541 if (!ring_buffer_expanded)
6542 r = sprintf(buf, "%lu (expanded: %lu)\n",
6543 size >> 10,
6544 trace_buf_size >> 10);
6545 else
6546 r = sprintf(buf, "%lu\n", size >> 10);
6547 } else
6548 r = sprintf(buf, "X\n");
6549 } else
6550 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6551
6552 mutex_unlock(&trace_types_lock);
6553
6554 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555 return ret;
6556 }
6557
6558 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6559 tracing_entries_write(struct file *filp, const char __user *ubuf,
6560 size_t cnt, loff_t *ppos)
6561 {
6562 struct inode *inode = file_inode(filp);
6563 struct trace_array *tr = inode->i_private;
6564 unsigned long val;
6565 int ret;
6566
6567 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6568 if (ret)
6569 return ret;
6570
6571 /* must have at least 1 entry */
6572 if (!val)
6573 return -EINVAL;
6574
6575 /* value is in KB */
6576 val <<= 10;
6577 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6578 if (ret < 0)
6579 return ret;
6580
6581 *ppos += cnt;
6582
6583 return cnt;
6584 }
6585
6586 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6587 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6588 size_t cnt, loff_t *ppos)
6589 {
6590 struct trace_array *tr = filp->private_data;
6591 char buf[64];
6592 int r, cpu;
6593 unsigned long size = 0, expanded_size = 0;
6594
6595 mutex_lock(&trace_types_lock);
6596 for_each_tracing_cpu(cpu) {
6597 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6598 if (!ring_buffer_expanded)
6599 expanded_size += trace_buf_size >> 10;
6600 }
6601 if (ring_buffer_expanded)
6602 r = sprintf(buf, "%lu\n", size);
6603 else
6604 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6605 mutex_unlock(&trace_types_lock);
6606
6607 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6608 }
6609
6610 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6611 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6612 size_t cnt, loff_t *ppos)
6613 {
6614 /*
6615 * There is no need to read what the user has written, this function
6616 * is just to make sure that there is no error when "echo" is used
6617 */
6618
6619 *ppos += cnt;
6620
6621 return cnt;
6622 }
6623
6624 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6625 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6626 {
6627 struct trace_array *tr = inode->i_private;
6628
6629 /* disable tracing ? */
6630 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6631 tracer_tracing_off(tr);
6632 /* resize the ring buffer to 0 */
6633 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6634
6635 trace_array_put(tr);
6636
6637 return 0;
6638 }
6639
6640 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6641 tracing_mark_write(struct file *filp, const char __user *ubuf,
6642 size_t cnt, loff_t *fpos)
6643 {
6644 struct trace_array *tr = filp->private_data;
6645 struct ring_buffer_event *event;
6646 enum event_trigger_type tt = ETT_NONE;
6647 struct trace_buffer *buffer;
6648 struct print_entry *entry;
6649 unsigned long irq_flags;
6650 ssize_t written;
6651 int size;
6652 int len;
6653
6654 /* Used in tracing_mark_raw_write() as well */
6655 #define FAULTED_STR "<faulted>"
6656 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6657
6658 if (tracing_disabled)
6659 return -EINVAL;
6660
6661 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6662 return -EINVAL;
6663
6664 if (cnt > TRACE_BUF_SIZE)
6665 cnt = TRACE_BUF_SIZE;
6666
6667 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6668
6669 local_save_flags(irq_flags);
6670 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6671
6672 /* If less than "<faulted>", then make sure we can still add that */
6673 if (cnt < FAULTED_SIZE)
6674 size += FAULTED_SIZE - cnt;
6675
6676 buffer = tr->array_buffer.buffer;
6677 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6678 irq_flags, preempt_count());
6679 if (unlikely(!event))
6680 /* Ring buffer disabled, return as if not open for write */
6681 return -EBADF;
6682
6683 entry = ring_buffer_event_data(event);
6684 entry->ip = _THIS_IP_;
6685
6686 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6687 if (len) {
6688 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6689 cnt = FAULTED_SIZE;
6690 written = -EFAULT;
6691 } else
6692 written = cnt;
6693
6694 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6695 /* do not add \n before testing triggers, but add \0 */
6696 entry->buf[cnt] = '\0';
6697 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6698 }
6699
6700 if (entry->buf[cnt - 1] != '\n') {
6701 entry->buf[cnt] = '\n';
6702 entry->buf[cnt + 1] = '\0';
6703 } else
6704 entry->buf[cnt] = '\0';
6705
6706 if (static_branch_unlikely(&trace_marker_exports_enabled))
6707 ftrace_exports(event, TRACE_EXPORT_MARKER);
6708 __buffer_unlock_commit(buffer, event);
6709
6710 if (tt)
6711 event_triggers_post_call(tr->trace_marker_file, tt);
6712
6713 if (written > 0)
6714 *fpos += written;
6715
6716 return written;
6717 }
6718
6719 /* Limit it for now to 3K (including tag) */
6720 #define RAW_DATA_MAX_SIZE (1024*3)
6721
6722 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6723 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6724 size_t cnt, loff_t *fpos)
6725 {
6726 struct trace_array *tr = filp->private_data;
6727 struct ring_buffer_event *event;
6728 struct trace_buffer *buffer;
6729 struct raw_data_entry *entry;
6730 unsigned long irq_flags;
6731 ssize_t written;
6732 int size;
6733 int len;
6734
6735 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6736
6737 if (tracing_disabled)
6738 return -EINVAL;
6739
6740 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6741 return -EINVAL;
6742
6743 /* The marker must at least have a tag id */
6744 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6745 return -EINVAL;
6746
6747 if (cnt > TRACE_BUF_SIZE)
6748 cnt = TRACE_BUF_SIZE;
6749
6750 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6751
6752 local_save_flags(irq_flags);
6753 size = sizeof(*entry) + cnt;
6754 if (cnt < FAULT_SIZE_ID)
6755 size += FAULT_SIZE_ID - cnt;
6756
6757 buffer = tr->array_buffer.buffer;
6758 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6759 irq_flags, preempt_count());
6760 if (!event)
6761 /* Ring buffer disabled, return as if not open for write */
6762 return -EBADF;
6763
6764 entry = ring_buffer_event_data(event);
6765
6766 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6767 if (len) {
6768 entry->id = -1;
6769 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6770 written = -EFAULT;
6771 } else
6772 written = cnt;
6773
6774 __buffer_unlock_commit(buffer, event);
6775
6776 if (written > 0)
6777 *fpos += written;
6778
6779 return written;
6780 }
6781
tracing_clock_show(struct seq_file * m,void * v)6782 static int tracing_clock_show(struct seq_file *m, void *v)
6783 {
6784 struct trace_array *tr = m->private;
6785 int i;
6786
6787 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6788 seq_printf(m,
6789 "%s%s%s%s", i ? " " : "",
6790 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6791 i == tr->clock_id ? "]" : "");
6792 seq_putc(m, '\n');
6793
6794 return 0;
6795 }
6796
tracing_set_clock(struct trace_array * tr,const char * clockstr)6797 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6798 {
6799 int i;
6800
6801 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6802 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6803 break;
6804 }
6805 if (i == ARRAY_SIZE(trace_clocks))
6806 return -EINVAL;
6807
6808 mutex_lock(&trace_types_lock);
6809
6810 tr->clock_id = i;
6811
6812 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6813
6814 /*
6815 * New clock may not be consistent with the previous clock.
6816 * Reset the buffer so that it doesn't have incomparable timestamps.
6817 */
6818 tracing_reset_online_cpus(&tr->array_buffer);
6819
6820 #ifdef CONFIG_TRACER_MAX_TRACE
6821 if (tr->max_buffer.buffer)
6822 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6823 tracing_reset_online_cpus(&tr->max_buffer);
6824 #endif
6825
6826 mutex_unlock(&trace_types_lock);
6827
6828 return 0;
6829 }
6830
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6831 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6832 size_t cnt, loff_t *fpos)
6833 {
6834 struct seq_file *m = filp->private_data;
6835 struct trace_array *tr = m->private;
6836 char buf[64];
6837 const char *clockstr;
6838 int ret;
6839
6840 if (cnt >= sizeof(buf))
6841 return -EINVAL;
6842
6843 if (copy_from_user(buf, ubuf, cnt))
6844 return -EFAULT;
6845
6846 buf[cnt] = 0;
6847
6848 clockstr = strstrip(buf);
6849
6850 ret = tracing_set_clock(tr, clockstr);
6851 if (ret)
6852 return ret;
6853
6854 *fpos += cnt;
6855
6856 return cnt;
6857 }
6858
tracing_clock_open(struct inode * inode,struct file * file)6859 static int tracing_clock_open(struct inode *inode, struct file *file)
6860 {
6861 struct trace_array *tr = inode->i_private;
6862 int ret;
6863
6864 ret = tracing_check_open_get_tr(tr);
6865 if (ret)
6866 return ret;
6867
6868 ret = single_open(file, tracing_clock_show, inode->i_private);
6869 if (ret < 0)
6870 trace_array_put(tr);
6871
6872 return ret;
6873 }
6874
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6875 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6876 {
6877 struct trace_array *tr = m->private;
6878
6879 mutex_lock(&trace_types_lock);
6880
6881 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6882 seq_puts(m, "delta [absolute]\n");
6883 else
6884 seq_puts(m, "[delta] absolute\n");
6885
6886 mutex_unlock(&trace_types_lock);
6887
6888 return 0;
6889 }
6890
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6891 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6892 {
6893 struct trace_array *tr = inode->i_private;
6894 int ret;
6895
6896 ret = tracing_check_open_get_tr(tr);
6897 if (ret)
6898 return ret;
6899
6900 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6901 if (ret < 0)
6902 trace_array_put(tr);
6903
6904 return ret;
6905 }
6906
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6907 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6908 {
6909 int ret = 0;
6910
6911 mutex_lock(&trace_types_lock);
6912
6913 if (abs && tr->time_stamp_abs_ref++)
6914 goto out;
6915
6916 if (!abs) {
6917 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6918 ret = -EINVAL;
6919 goto out;
6920 }
6921
6922 if (--tr->time_stamp_abs_ref)
6923 goto out;
6924 }
6925
6926 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6927
6928 #ifdef CONFIG_TRACER_MAX_TRACE
6929 if (tr->max_buffer.buffer)
6930 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6931 #endif
6932 out:
6933 mutex_unlock(&trace_types_lock);
6934
6935 return ret;
6936 }
6937
6938 struct ftrace_buffer_info {
6939 struct trace_iterator iter;
6940 void *spare;
6941 unsigned int spare_cpu;
6942 unsigned int read;
6943 };
6944
6945 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6946 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6947 {
6948 struct trace_array *tr = inode->i_private;
6949 struct trace_iterator *iter;
6950 struct seq_file *m;
6951 int ret;
6952
6953 ret = tracing_check_open_get_tr(tr);
6954 if (ret)
6955 return ret;
6956
6957 if (file->f_mode & FMODE_READ) {
6958 iter = __tracing_open(inode, file, true);
6959 if (IS_ERR(iter))
6960 ret = PTR_ERR(iter);
6961 } else {
6962 /* Writes still need the seq_file to hold the private data */
6963 ret = -ENOMEM;
6964 m = kzalloc(sizeof(*m), GFP_KERNEL);
6965 if (!m)
6966 goto out;
6967 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6968 if (!iter) {
6969 kfree(m);
6970 goto out;
6971 }
6972 ret = 0;
6973
6974 iter->tr = tr;
6975 iter->array_buffer = &tr->max_buffer;
6976 iter->cpu_file = tracing_get_cpu(inode);
6977 m->private = iter;
6978 file->private_data = m;
6979 }
6980 out:
6981 if (ret < 0)
6982 trace_array_put(tr);
6983
6984 return ret;
6985 }
6986
6987 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6988 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6989 loff_t *ppos)
6990 {
6991 struct seq_file *m = filp->private_data;
6992 struct trace_iterator *iter = m->private;
6993 struct trace_array *tr = iter->tr;
6994 unsigned long val;
6995 int ret;
6996
6997 ret = tracing_update_buffers();
6998 if (ret < 0)
6999 return ret;
7000
7001 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002 if (ret)
7003 return ret;
7004
7005 mutex_lock(&trace_types_lock);
7006
7007 if (tr->current_trace->use_max_tr) {
7008 ret = -EBUSY;
7009 goto out;
7010 }
7011
7012 arch_spin_lock(&tr->max_lock);
7013 if (tr->cond_snapshot)
7014 ret = -EBUSY;
7015 arch_spin_unlock(&tr->max_lock);
7016 if (ret)
7017 goto out;
7018
7019 switch (val) {
7020 case 0:
7021 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7022 ret = -EINVAL;
7023 break;
7024 }
7025 if (tr->allocated_snapshot)
7026 free_snapshot(tr);
7027 break;
7028 case 1:
7029 /* Only allow per-cpu swap if the ring buffer supports it */
7030 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7031 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7032 ret = -EINVAL;
7033 break;
7034 }
7035 #endif
7036 if (tr->allocated_snapshot)
7037 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7038 &tr->array_buffer, iter->cpu_file);
7039 else
7040 ret = tracing_alloc_snapshot_instance(tr);
7041 if (ret < 0)
7042 break;
7043 local_irq_disable();
7044 /* Now, we're going to swap */
7045 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7046 update_max_tr(tr, current, smp_processor_id(), NULL);
7047 else
7048 update_max_tr_single(tr, current, iter->cpu_file);
7049 local_irq_enable();
7050 break;
7051 default:
7052 if (tr->allocated_snapshot) {
7053 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7054 tracing_reset_online_cpus(&tr->max_buffer);
7055 else
7056 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7057 }
7058 break;
7059 }
7060
7061 if (ret >= 0) {
7062 *ppos += cnt;
7063 ret = cnt;
7064 }
7065 out:
7066 mutex_unlock(&trace_types_lock);
7067 return ret;
7068 }
7069
tracing_snapshot_release(struct inode * inode,struct file * file)7070 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7071 {
7072 struct seq_file *m = file->private_data;
7073 int ret;
7074
7075 ret = tracing_release(inode, file);
7076
7077 if (file->f_mode & FMODE_READ)
7078 return ret;
7079
7080 /* If write only, the seq_file is just a stub */
7081 if (m)
7082 kfree(m->private);
7083 kfree(m);
7084
7085 return 0;
7086 }
7087
7088 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7089 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7090 size_t count, loff_t *ppos);
7091 static int tracing_buffers_release(struct inode *inode, struct file *file);
7092 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7093 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7094
snapshot_raw_open(struct inode * inode,struct file * filp)7095 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7096 {
7097 struct ftrace_buffer_info *info;
7098 int ret;
7099
7100 /* The following checks for tracefs lockdown */
7101 ret = tracing_buffers_open(inode, filp);
7102 if (ret < 0)
7103 return ret;
7104
7105 info = filp->private_data;
7106
7107 if (info->iter.trace->use_max_tr) {
7108 tracing_buffers_release(inode, filp);
7109 return -EBUSY;
7110 }
7111
7112 info->iter.snapshot = true;
7113 info->iter.array_buffer = &info->iter.tr->max_buffer;
7114
7115 return ret;
7116 }
7117
7118 #endif /* CONFIG_TRACER_SNAPSHOT */
7119
7120
7121 static const struct file_operations tracing_thresh_fops = {
7122 .open = tracing_open_generic,
7123 .read = tracing_thresh_read,
7124 .write = tracing_thresh_write,
7125 .llseek = generic_file_llseek,
7126 };
7127
7128 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7129 static const struct file_operations tracing_max_lat_fops = {
7130 .open = tracing_open_generic,
7131 .read = tracing_max_lat_read,
7132 .write = tracing_max_lat_write,
7133 .llseek = generic_file_llseek,
7134 };
7135 #endif
7136
7137 static const struct file_operations set_tracer_fops = {
7138 .open = tracing_open_generic,
7139 .read = tracing_set_trace_read,
7140 .write = tracing_set_trace_write,
7141 .llseek = generic_file_llseek,
7142 };
7143
7144 static const struct file_operations tracing_pipe_fops = {
7145 .open = tracing_open_pipe,
7146 .poll = tracing_poll_pipe,
7147 .read = tracing_read_pipe,
7148 .splice_read = tracing_splice_read_pipe,
7149 .release = tracing_release_pipe,
7150 .llseek = no_llseek,
7151 };
7152
7153 static const struct file_operations tracing_entries_fops = {
7154 .open = tracing_open_generic_tr,
7155 .read = tracing_entries_read,
7156 .write = tracing_entries_write,
7157 .llseek = generic_file_llseek,
7158 .release = tracing_release_generic_tr,
7159 };
7160
7161 static const struct file_operations tracing_total_entries_fops = {
7162 .open = tracing_open_generic_tr,
7163 .read = tracing_total_entries_read,
7164 .llseek = generic_file_llseek,
7165 .release = tracing_release_generic_tr,
7166 };
7167
7168 static const struct file_operations tracing_free_buffer_fops = {
7169 .open = tracing_open_generic_tr,
7170 .write = tracing_free_buffer_write,
7171 .release = tracing_free_buffer_release,
7172 };
7173
7174 static const struct file_operations tracing_mark_fops = {
7175 .open = tracing_open_generic_tr,
7176 .write = tracing_mark_write,
7177 .llseek = generic_file_llseek,
7178 .release = tracing_release_generic_tr,
7179 };
7180
7181 static const struct file_operations tracing_mark_raw_fops = {
7182 .open = tracing_open_generic_tr,
7183 .write = tracing_mark_raw_write,
7184 .llseek = generic_file_llseek,
7185 .release = tracing_release_generic_tr,
7186 };
7187
7188 static const struct file_operations trace_clock_fops = {
7189 .open = tracing_clock_open,
7190 .read = seq_read,
7191 .llseek = seq_lseek,
7192 .release = tracing_single_release_tr,
7193 .write = tracing_clock_write,
7194 };
7195
7196 static const struct file_operations trace_time_stamp_mode_fops = {
7197 .open = tracing_time_stamp_mode_open,
7198 .read = seq_read,
7199 .llseek = seq_lseek,
7200 .release = tracing_single_release_tr,
7201 };
7202
7203 #ifdef CONFIG_TRACER_SNAPSHOT
7204 static const struct file_operations snapshot_fops = {
7205 .open = tracing_snapshot_open,
7206 .read = seq_read,
7207 .write = tracing_snapshot_write,
7208 .llseek = tracing_lseek,
7209 .release = tracing_snapshot_release,
7210 };
7211
7212 static const struct file_operations snapshot_raw_fops = {
7213 .open = snapshot_raw_open,
7214 .read = tracing_buffers_read,
7215 .release = tracing_buffers_release,
7216 .splice_read = tracing_buffers_splice_read,
7217 .llseek = no_llseek,
7218 };
7219
7220 #endif /* CONFIG_TRACER_SNAPSHOT */
7221
7222 #define TRACING_LOG_ERRS_MAX 8
7223 #define TRACING_LOG_LOC_MAX 128
7224
7225 #define CMD_PREFIX " Command: "
7226
7227 struct err_info {
7228 const char **errs; /* ptr to loc-specific array of err strings */
7229 u8 type; /* index into errs -> specific err string */
7230 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7231 u64 ts;
7232 };
7233
7234 struct tracing_log_err {
7235 struct list_head list;
7236 struct err_info info;
7237 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7238 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7239 };
7240
7241 static DEFINE_MUTEX(tracing_err_log_lock);
7242
get_tracing_log_err(struct trace_array * tr)7243 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7244 {
7245 struct tracing_log_err *err;
7246
7247 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7248 err = kzalloc(sizeof(*err), GFP_KERNEL);
7249 if (!err)
7250 err = ERR_PTR(-ENOMEM);
7251 tr->n_err_log_entries++;
7252
7253 return err;
7254 }
7255
7256 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7257 list_del(&err->list);
7258
7259 return err;
7260 }
7261
7262 /**
7263 * err_pos - find the position of a string within a command for error careting
7264 * @cmd: The tracing command that caused the error
7265 * @str: The string to position the caret at within @cmd
7266 *
7267 * Finds the position of the first occurence of @str within @cmd. The
7268 * return value can be passed to tracing_log_err() for caret placement
7269 * within @cmd.
7270 *
7271 * Returns the index within @cmd of the first occurence of @str or 0
7272 * if @str was not found.
7273 */
err_pos(char * cmd,const char * str)7274 unsigned int err_pos(char *cmd, const char *str)
7275 {
7276 char *found;
7277
7278 if (WARN_ON(!strlen(cmd)))
7279 return 0;
7280
7281 found = strstr(cmd, str);
7282 if (found)
7283 return found - cmd;
7284
7285 return 0;
7286 }
7287
7288 /**
7289 * tracing_log_err - write an error to the tracing error log
7290 * @tr: The associated trace array for the error (NULL for top level array)
7291 * @loc: A string describing where the error occurred
7292 * @cmd: The tracing command that caused the error
7293 * @errs: The array of loc-specific static error strings
7294 * @type: The index into errs[], which produces the specific static err string
7295 * @pos: The position the caret should be placed in the cmd
7296 *
7297 * Writes an error into tracing/error_log of the form:
7298 *
7299 * <loc>: error: <text>
7300 * Command: <cmd>
7301 * ^
7302 *
7303 * tracing/error_log is a small log file containing the last
7304 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7305 * unless there has been a tracing error, and the error log can be
7306 * cleared and have its memory freed by writing the empty string in
7307 * truncation mode to it i.e. echo > tracing/error_log.
7308 *
7309 * NOTE: the @errs array along with the @type param are used to
7310 * produce a static error string - this string is not copied and saved
7311 * when the error is logged - only a pointer to it is saved. See
7312 * existing callers for examples of how static strings are typically
7313 * defined for use with tracing_log_err().
7314 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7315 void tracing_log_err(struct trace_array *tr,
7316 const char *loc, const char *cmd,
7317 const char **errs, u8 type, u8 pos)
7318 {
7319 struct tracing_log_err *err;
7320
7321 if (!tr)
7322 tr = &global_trace;
7323
7324 mutex_lock(&tracing_err_log_lock);
7325 err = get_tracing_log_err(tr);
7326 if (PTR_ERR(err) == -ENOMEM) {
7327 mutex_unlock(&tracing_err_log_lock);
7328 return;
7329 }
7330
7331 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7332 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7333
7334 err->info.errs = errs;
7335 err->info.type = type;
7336 err->info.pos = pos;
7337 err->info.ts = local_clock();
7338
7339 list_add_tail(&err->list, &tr->err_log);
7340 mutex_unlock(&tracing_err_log_lock);
7341 }
7342
clear_tracing_err_log(struct trace_array * tr)7343 static void clear_tracing_err_log(struct trace_array *tr)
7344 {
7345 struct tracing_log_err *err, *next;
7346
7347 mutex_lock(&tracing_err_log_lock);
7348 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7349 list_del(&err->list);
7350 kfree(err);
7351 }
7352
7353 tr->n_err_log_entries = 0;
7354 mutex_unlock(&tracing_err_log_lock);
7355 }
7356
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7357 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7358 {
7359 struct trace_array *tr = m->private;
7360
7361 mutex_lock(&tracing_err_log_lock);
7362
7363 return seq_list_start(&tr->err_log, *pos);
7364 }
7365
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7366 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7367 {
7368 struct trace_array *tr = m->private;
7369
7370 return seq_list_next(v, &tr->err_log, pos);
7371 }
7372
tracing_err_log_seq_stop(struct seq_file * m,void * v)7373 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7374 {
7375 mutex_unlock(&tracing_err_log_lock);
7376 }
7377
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7378 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7379 {
7380 u8 i;
7381
7382 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7383 seq_putc(m, ' ');
7384 for (i = 0; i < pos; i++)
7385 seq_putc(m, ' ');
7386 seq_puts(m, "^\n");
7387 }
7388
tracing_err_log_seq_show(struct seq_file * m,void * v)7389 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7390 {
7391 struct tracing_log_err *err = v;
7392
7393 if (err) {
7394 const char *err_text = err->info.errs[err->info.type];
7395 u64 sec = err->info.ts;
7396 u32 nsec;
7397
7398 nsec = do_div(sec, NSEC_PER_SEC);
7399 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7400 err->loc, err_text);
7401 seq_printf(m, "%s", err->cmd);
7402 tracing_err_log_show_pos(m, err->info.pos);
7403 }
7404
7405 return 0;
7406 }
7407
7408 static const struct seq_operations tracing_err_log_seq_ops = {
7409 .start = tracing_err_log_seq_start,
7410 .next = tracing_err_log_seq_next,
7411 .stop = tracing_err_log_seq_stop,
7412 .show = tracing_err_log_seq_show
7413 };
7414
tracing_err_log_open(struct inode * inode,struct file * file)7415 static int tracing_err_log_open(struct inode *inode, struct file *file)
7416 {
7417 struct trace_array *tr = inode->i_private;
7418 int ret = 0;
7419
7420 ret = tracing_check_open_get_tr(tr);
7421 if (ret)
7422 return ret;
7423
7424 /* If this file was opened for write, then erase contents */
7425 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7426 clear_tracing_err_log(tr);
7427
7428 if (file->f_mode & FMODE_READ) {
7429 ret = seq_open(file, &tracing_err_log_seq_ops);
7430 if (!ret) {
7431 struct seq_file *m = file->private_data;
7432 m->private = tr;
7433 } else {
7434 trace_array_put(tr);
7435 }
7436 }
7437 return ret;
7438 }
7439
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7440 static ssize_t tracing_err_log_write(struct file *file,
7441 const char __user *buffer,
7442 size_t count, loff_t *ppos)
7443 {
7444 return count;
7445 }
7446
tracing_err_log_release(struct inode * inode,struct file * file)7447 static int tracing_err_log_release(struct inode *inode, struct file *file)
7448 {
7449 struct trace_array *tr = inode->i_private;
7450
7451 trace_array_put(tr);
7452
7453 if (file->f_mode & FMODE_READ)
7454 seq_release(inode, file);
7455
7456 return 0;
7457 }
7458
7459 static const struct file_operations tracing_err_log_fops = {
7460 .open = tracing_err_log_open,
7461 .write = tracing_err_log_write,
7462 .read = seq_read,
7463 .llseek = seq_lseek,
7464 .release = tracing_err_log_release,
7465 };
7466
tracing_buffers_open(struct inode * inode,struct file * filp)7467 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7468 {
7469 struct trace_array *tr = inode->i_private;
7470 struct ftrace_buffer_info *info;
7471 int ret;
7472
7473 ret = tracing_check_open_get_tr(tr);
7474 if (ret)
7475 return ret;
7476
7477 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7478 if (!info) {
7479 trace_array_put(tr);
7480 return -ENOMEM;
7481 }
7482
7483 mutex_lock(&trace_types_lock);
7484
7485 info->iter.tr = tr;
7486 info->iter.cpu_file = tracing_get_cpu(inode);
7487 info->iter.trace = tr->current_trace;
7488 info->iter.array_buffer = &tr->array_buffer;
7489 info->spare = NULL;
7490 /* Force reading ring buffer for first read */
7491 info->read = (unsigned int)-1;
7492
7493 filp->private_data = info;
7494
7495 tr->trace_ref++;
7496
7497 mutex_unlock(&trace_types_lock);
7498
7499 ret = nonseekable_open(inode, filp);
7500 if (ret < 0)
7501 trace_array_put(tr);
7502
7503 return ret;
7504 }
7505
7506 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7507 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7508 {
7509 struct ftrace_buffer_info *info = filp->private_data;
7510 struct trace_iterator *iter = &info->iter;
7511
7512 return trace_poll(iter, filp, poll_table);
7513 }
7514
7515 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7516 tracing_buffers_read(struct file *filp, char __user *ubuf,
7517 size_t count, loff_t *ppos)
7518 {
7519 struct ftrace_buffer_info *info = filp->private_data;
7520 struct trace_iterator *iter = &info->iter;
7521 ssize_t ret = 0;
7522 ssize_t size;
7523
7524 if (!count)
7525 return 0;
7526
7527 #ifdef CONFIG_TRACER_MAX_TRACE
7528 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7529 return -EBUSY;
7530 #endif
7531
7532 if (!info->spare) {
7533 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7534 iter->cpu_file);
7535 if (IS_ERR(info->spare)) {
7536 ret = PTR_ERR(info->spare);
7537 info->spare = NULL;
7538 } else {
7539 info->spare_cpu = iter->cpu_file;
7540 }
7541 }
7542 if (!info->spare)
7543 return ret;
7544
7545 /* Do we have previous read data to read? */
7546 if (info->read < PAGE_SIZE)
7547 goto read;
7548
7549 again:
7550 trace_access_lock(iter->cpu_file);
7551 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7552 &info->spare,
7553 count,
7554 iter->cpu_file, 0);
7555 trace_access_unlock(iter->cpu_file);
7556
7557 if (ret < 0) {
7558 if (trace_empty(iter)) {
7559 if ((filp->f_flags & O_NONBLOCK))
7560 return -EAGAIN;
7561
7562 ret = wait_on_pipe(iter, 0);
7563 if (ret)
7564 return ret;
7565
7566 goto again;
7567 }
7568 return 0;
7569 }
7570
7571 info->read = 0;
7572 read:
7573 size = PAGE_SIZE - info->read;
7574 if (size > count)
7575 size = count;
7576
7577 ret = copy_to_user(ubuf, info->spare + info->read, size);
7578 if (ret == size)
7579 return -EFAULT;
7580
7581 size -= ret;
7582
7583 *ppos += size;
7584 info->read += size;
7585
7586 return size;
7587 }
7588
tracing_buffers_release(struct inode * inode,struct file * file)7589 static int tracing_buffers_release(struct inode *inode, struct file *file)
7590 {
7591 struct ftrace_buffer_info *info = file->private_data;
7592 struct trace_iterator *iter = &info->iter;
7593
7594 mutex_lock(&trace_types_lock);
7595
7596 iter->tr->trace_ref--;
7597
7598 __trace_array_put(iter->tr);
7599
7600 if (info->spare)
7601 ring_buffer_free_read_page(iter->array_buffer->buffer,
7602 info->spare_cpu, info->spare);
7603 kvfree(info);
7604
7605 mutex_unlock(&trace_types_lock);
7606
7607 return 0;
7608 }
7609
7610 struct buffer_ref {
7611 struct trace_buffer *buffer;
7612 void *page;
7613 int cpu;
7614 refcount_t refcount;
7615 };
7616
buffer_ref_release(struct buffer_ref * ref)7617 static void buffer_ref_release(struct buffer_ref *ref)
7618 {
7619 if (!refcount_dec_and_test(&ref->refcount))
7620 return;
7621 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7622 kfree(ref);
7623 }
7624
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7625 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7626 struct pipe_buffer *buf)
7627 {
7628 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7629
7630 buffer_ref_release(ref);
7631 buf->private = 0;
7632 }
7633
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7634 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7635 struct pipe_buffer *buf)
7636 {
7637 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7638
7639 if (refcount_read(&ref->refcount) > INT_MAX/2)
7640 return false;
7641
7642 refcount_inc(&ref->refcount);
7643 return true;
7644 }
7645
7646 /* Pipe buffer operations for a buffer. */
7647 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7648 .release = buffer_pipe_buf_release,
7649 .get = buffer_pipe_buf_get,
7650 };
7651
7652 /*
7653 * Callback from splice_to_pipe(), if we need to release some pages
7654 * at the end of the spd in case we error'ed out in filling the pipe.
7655 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7656 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7657 {
7658 struct buffer_ref *ref =
7659 (struct buffer_ref *)spd->partial[i].private;
7660
7661 buffer_ref_release(ref);
7662 spd->partial[i].private = 0;
7663 }
7664
7665 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7666 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7667 struct pipe_inode_info *pipe, size_t len,
7668 unsigned int flags)
7669 {
7670 struct ftrace_buffer_info *info = file->private_data;
7671 struct trace_iterator *iter = &info->iter;
7672 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7673 struct page *pages_def[PIPE_DEF_BUFFERS];
7674 struct splice_pipe_desc spd = {
7675 .pages = pages_def,
7676 .partial = partial_def,
7677 .nr_pages_max = PIPE_DEF_BUFFERS,
7678 .ops = &buffer_pipe_buf_ops,
7679 .spd_release = buffer_spd_release,
7680 };
7681 struct buffer_ref *ref;
7682 int entries, i;
7683 ssize_t ret = 0;
7684
7685 #ifdef CONFIG_TRACER_MAX_TRACE
7686 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7687 return -EBUSY;
7688 #endif
7689
7690 if (*ppos & (PAGE_SIZE - 1))
7691 return -EINVAL;
7692
7693 if (len & (PAGE_SIZE - 1)) {
7694 if (len < PAGE_SIZE)
7695 return -EINVAL;
7696 len &= PAGE_MASK;
7697 }
7698
7699 if (splice_grow_spd(pipe, &spd))
7700 return -ENOMEM;
7701
7702 again:
7703 trace_access_lock(iter->cpu_file);
7704 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7705
7706 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7707 struct page *page;
7708 int r;
7709
7710 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7711 if (!ref) {
7712 ret = -ENOMEM;
7713 break;
7714 }
7715
7716 refcount_set(&ref->refcount, 1);
7717 ref->buffer = iter->array_buffer->buffer;
7718 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7719 if (IS_ERR(ref->page)) {
7720 ret = PTR_ERR(ref->page);
7721 ref->page = NULL;
7722 kfree(ref);
7723 break;
7724 }
7725 ref->cpu = iter->cpu_file;
7726
7727 r = ring_buffer_read_page(ref->buffer, &ref->page,
7728 len, iter->cpu_file, 1);
7729 if (r < 0) {
7730 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7731 ref->page);
7732 kfree(ref);
7733 break;
7734 }
7735
7736 page = virt_to_page(ref->page);
7737
7738 spd.pages[i] = page;
7739 spd.partial[i].len = PAGE_SIZE;
7740 spd.partial[i].offset = 0;
7741 spd.partial[i].private = (unsigned long)ref;
7742 spd.nr_pages++;
7743 *ppos += PAGE_SIZE;
7744
7745 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7746 }
7747
7748 trace_access_unlock(iter->cpu_file);
7749 spd.nr_pages = i;
7750
7751 /* did we read anything? */
7752 if (!spd.nr_pages) {
7753 if (ret)
7754 goto out;
7755
7756 ret = -EAGAIN;
7757 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7758 goto out;
7759
7760 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7761 if (ret)
7762 goto out;
7763
7764 goto again;
7765 }
7766
7767 ret = splice_to_pipe(pipe, &spd);
7768 out:
7769 splice_shrink_spd(&spd);
7770
7771 return ret;
7772 }
7773
7774 static const struct file_operations tracing_buffers_fops = {
7775 .open = tracing_buffers_open,
7776 .read = tracing_buffers_read,
7777 .poll = tracing_buffers_poll,
7778 .release = tracing_buffers_release,
7779 .splice_read = tracing_buffers_splice_read,
7780 .llseek = no_llseek,
7781 };
7782
7783 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7784 tracing_stats_read(struct file *filp, char __user *ubuf,
7785 size_t count, loff_t *ppos)
7786 {
7787 struct inode *inode = file_inode(filp);
7788 struct trace_array *tr = inode->i_private;
7789 struct array_buffer *trace_buf = &tr->array_buffer;
7790 int cpu = tracing_get_cpu(inode);
7791 struct trace_seq *s;
7792 unsigned long cnt;
7793 unsigned long long t;
7794 unsigned long usec_rem;
7795
7796 s = kmalloc(sizeof(*s), GFP_KERNEL);
7797 if (!s)
7798 return -ENOMEM;
7799
7800 trace_seq_init(s);
7801
7802 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7803 trace_seq_printf(s, "entries: %ld\n", cnt);
7804
7805 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7806 trace_seq_printf(s, "overrun: %ld\n", cnt);
7807
7808 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7809 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7810
7811 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7812 trace_seq_printf(s, "bytes: %ld\n", cnt);
7813
7814 if (trace_clocks[tr->clock_id].in_ns) {
7815 /* local or global for trace_clock */
7816 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7817 usec_rem = do_div(t, USEC_PER_SEC);
7818 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7819 t, usec_rem);
7820
7821 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7822 usec_rem = do_div(t, USEC_PER_SEC);
7823 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7824 } else {
7825 /* counter or tsc mode for trace_clock */
7826 trace_seq_printf(s, "oldest event ts: %llu\n",
7827 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7828
7829 trace_seq_printf(s, "now ts: %llu\n",
7830 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7831 }
7832
7833 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7834 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7835
7836 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7837 trace_seq_printf(s, "read events: %ld\n", cnt);
7838
7839 count = simple_read_from_buffer(ubuf, count, ppos,
7840 s->buffer, trace_seq_used(s));
7841
7842 kfree(s);
7843
7844 return count;
7845 }
7846
7847 static const struct file_operations tracing_stats_fops = {
7848 .open = tracing_open_generic_tr,
7849 .read = tracing_stats_read,
7850 .llseek = generic_file_llseek,
7851 .release = tracing_release_generic_tr,
7852 };
7853
7854 #ifdef CONFIG_DYNAMIC_FTRACE
7855
7856 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7857 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7858 size_t cnt, loff_t *ppos)
7859 {
7860 ssize_t ret;
7861 char *buf;
7862 int r;
7863
7864 /* 256 should be plenty to hold the amount needed */
7865 buf = kmalloc(256, GFP_KERNEL);
7866 if (!buf)
7867 return -ENOMEM;
7868
7869 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7870 ftrace_update_tot_cnt,
7871 ftrace_number_of_pages,
7872 ftrace_number_of_groups);
7873
7874 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7875 kfree(buf);
7876 return ret;
7877 }
7878
7879 static const struct file_operations tracing_dyn_info_fops = {
7880 .open = tracing_open_generic,
7881 .read = tracing_read_dyn_info,
7882 .llseek = generic_file_llseek,
7883 };
7884 #endif /* CONFIG_DYNAMIC_FTRACE */
7885
7886 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7887 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7888 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7889 struct trace_array *tr, struct ftrace_probe_ops *ops,
7890 void *data)
7891 {
7892 tracing_snapshot_instance(tr);
7893 }
7894
7895 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7896 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7897 struct trace_array *tr, struct ftrace_probe_ops *ops,
7898 void *data)
7899 {
7900 struct ftrace_func_mapper *mapper = data;
7901 long *count = NULL;
7902
7903 if (mapper)
7904 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7905
7906 if (count) {
7907
7908 if (*count <= 0)
7909 return;
7910
7911 (*count)--;
7912 }
7913
7914 tracing_snapshot_instance(tr);
7915 }
7916
7917 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7918 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7919 struct ftrace_probe_ops *ops, void *data)
7920 {
7921 struct ftrace_func_mapper *mapper = data;
7922 long *count = NULL;
7923
7924 seq_printf(m, "%ps:", (void *)ip);
7925
7926 seq_puts(m, "snapshot");
7927
7928 if (mapper)
7929 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7930
7931 if (count)
7932 seq_printf(m, ":count=%ld\n", *count);
7933 else
7934 seq_puts(m, ":unlimited\n");
7935
7936 return 0;
7937 }
7938
7939 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7940 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7941 unsigned long ip, void *init_data, void **data)
7942 {
7943 struct ftrace_func_mapper *mapper = *data;
7944
7945 if (!mapper) {
7946 mapper = allocate_ftrace_func_mapper();
7947 if (!mapper)
7948 return -ENOMEM;
7949 *data = mapper;
7950 }
7951
7952 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7953 }
7954
7955 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7956 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7957 unsigned long ip, void *data)
7958 {
7959 struct ftrace_func_mapper *mapper = data;
7960
7961 if (!ip) {
7962 if (!mapper)
7963 return;
7964 free_ftrace_func_mapper(mapper, NULL);
7965 return;
7966 }
7967
7968 ftrace_func_mapper_remove_ip(mapper, ip);
7969 }
7970
7971 static struct ftrace_probe_ops snapshot_probe_ops = {
7972 .func = ftrace_snapshot,
7973 .print = ftrace_snapshot_print,
7974 };
7975
7976 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7977 .func = ftrace_count_snapshot,
7978 .print = ftrace_snapshot_print,
7979 .init = ftrace_snapshot_init,
7980 .free = ftrace_snapshot_free,
7981 };
7982
7983 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7984 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7985 char *glob, char *cmd, char *param, int enable)
7986 {
7987 struct ftrace_probe_ops *ops;
7988 void *count = (void *)-1;
7989 char *number;
7990 int ret;
7991
7992 if (!tr)
7993 return -ENODEV;
7994
7995 /* hash funcs only work with set_ftrace_filter */
7996 if (!enable)
7997 return -EINVAL;
7998
7999 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8000
8001 if (glob[0] == '!')
8002 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8003
8004 if (!param)
8005 goto out_reg;
8006
8007 number = strsep(¶m, ":");
8008
8009 if (!strlen(number))
8010 goto out_reg;
8011
8012 /*
8013 * We use the callback data field (which is a pointer)
8014 * as our counter.
8015 */
8016 ret = kstrtoul(number, 0, (unsigned long *)&count);
8017 if (ret)
8018 return ret;
8019
8020 out_reg:
8021 ret = tracing_alloc_snapshot_instance(tr);
8022 if (ret < 0)
8023 goto out;
8024
8025 ret = register_ftrace_function_probe(glob, tr, ops, count);
8026
8027 out:
8028 return ret < 0 ? ret : 0;
8029 }
8030
8031 static struct ftrace_func_command ftrace_snapshot_cmd = {
8032 .name = "snapshot",
8033 .func = ftrace_trace_snapshot_callback,
8034 };
8035
register_snapshot_cmd(void)8036 static __init int register_snapshot_cmd(void)
8037 {
8038 return register_ftrace_command(&ftrace_snapshot_cmd);
8039 }
8040 #else
register_snapshot_cmd(void)8041 static inline __init int register_snapshot_cmd(void) { return 0; }
8042 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8043
tracing_get_dentry(struct trace_array * tr)8044 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8045 {
8046 if (WARN_ON(!tr->dir))
8047 return ERR_PTR(-ENODEV);
8048
8049 /* Top directory uses NULL as the parent */
8050 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8051 return NULL;
8052
8053 /* All sub buffers have a descriptor */
8054 return tr->dir;
8055 }
8056
tracing_dentry_percpu(struct trace_array * tr,int cpu)8057 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8058 {
8059 struct dentry *d_tracer;
8060
8061 if (tr->percpu_dir)
8062 return tr->percpu_dir;
8063
8064 d_tracer = tracing_get_dentry(tr);
8065 if (IS_ERR(d_tracer))
8066 return NULL;
8067
8068 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8069
8070 MEM_FAIL(!tr->percpu_dir,
8071 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8072
8073 return tr->percpu_dir;
8074 }
8075
8076 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8077 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8078 void *data, long cpu, const struct file_operations *fops)
8079 {
8080 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8081
8082 if (ret) /* See tracing_get_cpu() */
8083 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8084 return ret;
8085 }
8086
8087 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8088 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8089 {
8090 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8091 struct dentry *d_cpu;
8092 char cpu_dir[30]; /* 30 characters should be more than enough */
8093
8094 if (!d_percpu)
8095 return;
8096
8097 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8098 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8099 if (!d_cpu) {
8100 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8101 return;
8102 }
8103
8104 /* per cpu trace_pipe */
8105 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8106 tr, cpu, &tracing_pipe_fops);
8107
8108 /* per cpu trace */
8109 trace_create_cpu_file("trace", 0644, d_cpu,
8110 tr, cpu, &tracing_fops);
8111
8112 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8113 tr, cpu, &tracing_buffers_fops);
8114
8115 trace_create_cpu_file("stats", 0444, d_cpu,
8116 tr, cpu, &tracing_stats_fops);
8117
8118 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8119 tr, cpu, &tracing_entries_fops);
8120
8121 #ifdef CONFIG_TRACER_SNAPSHOT
8122 trace_create_cpu_file("snapshot", 0644, d_cpu,
8123 tr, cpu, &snapshot_fops);
8124
8125 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8126 tr, cpu, &snapshot_raw_fops);
8127 #endif
8128 }
8129
8130 #ifdef CONFIG_FTRACE_SELFTEST
8131 /* Let selftest have access to static functions in this file */
8132 #include "trace_selftest.c"
8133 #endif
8134
8135 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8136 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8137 loff_t *ppos)
8138 {
8139 struct trace_option_dentry *topt = filp->private_data;
8140 char *buf;
8141
8142 if (topt->flags->val & topt->opt->bit)
8143 buf = "1\n";
8144 else
8145 buf = "0\n";
8146
8147 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8148 }
8149
8150 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8151 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8152 loff_t *ppos)
8153 {
8154 struct trace_option_dentry *topt = filp->private_data;
8155 unsigned long val;
8156 int ret;
8157
8158 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8159 if (ret)
8160 return ret;
8161
8162 if (val != 0 && val != 1)
8163 return -EINVAL;
8164
8165 if (!!(topt->flags->val & topt->opt->bit) != val) {
8166 mutex_lock(&trace_types_lock);
8167 ret = __set_tracer_option(topt->tr, topt->flags,
8168 topt->opt, !val);
8169 mutex_unlock(&trace_types_lock);
8170 if (ret)
8171 return ret;
8172 }
8173
8174 *ppos += cnt;
8175
8176 return cnt;
8177 }
8178
8179
8180 static const struct file_operations trace_options_fops = {
8181 .open = tracing_open_generic,
8182 .read = trace_options_read,
8183 .write = trace_options_write,
8184 .llseek = generic_file_llseek,
8185 };
8186
8187 /*
8188 * In order to pass in both the trace_array descriptor as well as the index
8189 * to the flag that the trace option file represents, the trace_array
8190 * has a character array of trace_flags_index[], which holds the index
8191 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8192 * The address of this character array is passed to the flag option file
8193 * read/write callbacks.
8194 *
8195 * In order to extract both the index and the trace_array descriptor,
8196 * get_tr_index() uses the following algorithm.
8197 *
8198 * idx = *ptr;
8199 *
8200 * As the pointer itself contains the address of the index (remember
8201 * index[1] == 1).
8202 *
8203 * Then to get the trace_array descriptor, by subtracting that index
8204 * from the ptr, we get to the start of the index itself.
8205 *
8206 * ptr - idx == &index[0]
8207 *
8208 * Then a simple container_of() from that pointer gets us to the
8209 * trace_array descriptor.
8210 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8211 static void get_tr_index(void *data, struct trace_array **ptr,
8212 unsigned int *pindex)
8213 {
8214 *pindex = *(unsigned char *)data;
8215
8216 *ptr = container_of(data - *pindex, struct trace_array,
8217 trace_flags_index);
8218 }
8219
8220 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8221 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8222 loff_t *ppos)
8223 {
8224 void *tr_index = filp->private_data;
8225 struct trace_array *tr;
8226 unsigned int index;
8227 char *buf;
8228
8229 get_tr_index(tr_index, &tr, &index);
8230
8231 if (tr->trace_flags & (1 << index))
8232 buf = "1\n";
8233 else
8234 buf = "0\n";
8235
8236 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8237 }
8238
8239 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8240 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8241 loff_t *ppos)
8242 {
8243 void *tr_index = filp->private_data;
8244 struct trace_array *tr;
8245 unsigned int index;
8246 unsigned long val;
8247 int ret;
8248
8249 get_tr_index(tr_index, &tr, &index);
8250
8251 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8252 if (ret)
8253 return ret;
8254
8255 if (val != 0 && val != 1)
8256 return -EINVAL;
8257
8258 mutex_lock(&event_mutex);
8259 mutex_lock(&trace_types_lock);
8260 ret = set_tracer_flag(tr, 1 << index, val);
8261 mutex_unlock(&trace_types_lock);
8262 mutex_unlock(&event_mutex);
8263
8264 if (ret < 0)
8265 return ret;
8266
8267 *ppos += cnt;
8268
8269 return cnt;
8270 }
8271
8272 static const struct file_operations trace_options_core_fops = {
8273 .open = tracing_open_generic,
8274 .read = trace_options_core_read,
8275 .write = trace_options_core_write,
8276 .llseek = generic_file_llseek,
8277 };
8278
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8279 struct dentry *trace_create_file(const char *name,
8280 umode_t mode,
8281 struct dentry *parent,
8282 void *data,
8283 const struct file_operations *fops)
8284 {
8285 struct dentry *ret;
8286
8287 ret = tracefs_create_file(name, mode, parent, data, fops);
8288 if (!ret)
8289 pr_warn("Could not create tracefs '%s' entry\n", name);
8290
8291 return ret;
8292 }
8293
8294
trace_options_init_dentry(struct trace_array * tr)8295 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8296 {
8297 struct dentry *d_tracer;
8298
8299 if (tr->options)
8300 return tr->options;
8301
8302 d_tracer = tracing_get_dentry(tr);
8303 if (IS_ERR(d_tracer))
8304 return NULL;
8305
8306 tr->options = tracefs_create_dir("options", d_tracer);
8307 if (!tr->options) {
8308 pr_warn("Could not create tracefs directory 'options'\n");
8309 return NULL;
8310 }
8311
8312 return tr->options;
8313 }
8314
8315 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8316 create_trace_option_file(struct trace_array *tr,
8317 struct trace_option_dentry *topt,
8318 struct tracer_flags *flags,
8319 struct tracer_opt *opt)
8320 {
8321 struct dentry *t_options;
8322
8323 t_options = trace_options_init_dentry(tr);
8324 if (!t_options)
8325 return;
8326
8327 topt->flags = flags;
8328 topt->opt = opt;
8329 topt->tr = tr;
8330
8331 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8332 &trace_options_fops);
8333
8334 }
8335
8336 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8337 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8338 {
8339 struct trace_option_dentry *topts;
8340 struct trace_options *tr_topts;
8341 struct tracer_flags *flags;
8342 struct tracer_opt *opts;
8343 int cnt;
8344 int i;
8345
8346 if (!tracer)
8347 return;
8348
8349 flags = tracer->flags;
8350
8351 if (!flags || !flags->opts)
8352 return;
8353
8354 /*
8355 * If this is an instance, only create flags for tracers
8356 * the instance may have.
8357 */
8358 if (!trace_ok_for_array(tracer, tr))
8359 return;
8360
8361 for (i = 0; i < tr->nr_topts; i++) {
8362 /* Make sure there's no duplicate flags. */
8363 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8364 return;
8365 }
8366
8367 opts = flags->opts;
8368
8369 for (cnt = 0; opts[cnt].name; cnt++)
8370 ;
8371
8372 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8373 if (!topts)
8374 return;
8375
8376 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8377 GFP_KERNEL);
8378 if (!tr_topts) {
8379 kfree(topts);
8380 return;
8381 }
8382
8383 tr->topts = tr_topts;
8384 tr->topts[tr->nr_topts].tracer = tracer;
8385 tr->topts[tr->nr_topts].topts = topts;
8386 tr->nr_topts++;
8387
8388 for (cnt = 0; opts[cnt].name; cnt++) {
8389 create_trace_option_file(tr, &topts[cnt], flags,
8390 &opts[cnt]);
8391 MEM_FAIL(topts[cnt].entry == NULL,
8392 "Failed to create trace option: %s",
8393 opts[cnt].name);
8394 }
8395 }
8396
8397 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8398 create_trace_option_core_file(struct trace_array *tr,
8399 const char *option, long index)
8400 {
8401 struct dentry *t_options;
8402
8403 t_options = trace_options_init_dentry(tr);
8404 if (!t_options)
8405 return NULL;
8406
8407 return trace_create_file(option, 0644, t_options,
8408 (void *)&tr->trace_flags_index[index],
8409 &trace_options_core_fops);
8410 }
8411
create_trace_options_dir(struct trace_array * tr)8412 static void create_trace_options_dir(struct trace_array *tr)
8413 {
8414 struct dentry *t_options;
8415 bool top_level = tr == &global_trace;
8416 int i;
8417
8418 t_options = trace_options_init_dentry(tr);
8419 if (!t_options)
8420 return;
8421
8422 for (i = 0; trace_options[i]; i++) {
8423 if (top_level ||
8424 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8425 create_trace_option_core_file(tr, trace_options[i], i);
8426 }
8427 }
8428
8429 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8430 rb_simple_read(struct file *filp, char __user *ubuf,
8431 size_t cnt, loff_t *ppos)
8432 {
8433 struct trace_array *tr = filp->private_data;
8434 char buf[64];
8435 int r;
8436
8437 r = tracer_tracing_is_on(tr);
8438 r = sprintf(buf, "%d\n", r);
8439
8440 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8441 }
8442
8443 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8444 rb_simple_write(struct file *filp, const char __user *ubuf,
8445 size_t cnt, loff_t *ppos)
8446 {
8447 struct trace_array *tr = filp->private_data;
8448 struct trace_buffer *buffer = tr->array_buffer.buffer;
8449 unsigned long val;
8450 int ret;
8451
8452 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8453 if (ret)
8454 return ret;
8455
8456 if (buffer) {
8457 mutex_lock(&trace_types_lock);
8458 if (!!val == tracer_tracing_is_on(tr)) {
8459 val = 0; /* do nothing */
8460 } else if (val) {
8461 tracer_tracing_on(tr);
8462 if (tr->current_trace->start)
8463 tr->current_trace->start(tr);
8464 } else {
8465 tracer_tracing_off(tr);
8466 if (tr->current_trace->stop)
8467 tr->current_trace->stop(tr);
8468 }
8469 mutex_unlock(&trace_types_lock);
8470 }
8471
8472 (*ppos)++;
8473
8474 return cnt;
8475 }
8476
8477 static const struct file_operations rb_simple_fops = {
8478 .open = tracing_open_generic_tr,
8479 .read = rb_simple_read,
8480 .write = rb_simple_write,
8481 .release = tracing_release_generic_tr,
8482 .llseek = default_llseek,
8483 };
8484
8485 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8486 buffer_percent_read(struct file *filp, char __user *ubuf,
8487 size_t cnt, loff_t *ppos)
8488 {
8489 struct trace_array *tr = filp->private_data;
8490 char buf[64];
8491 int r;
8492
8493 r = tr->buffer_percent;
8494 r = sprintf(buf, "%d\n", r);
8495
8496 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8497 }
8498
8499 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8500 buffer_percent_write(struct file *filp, const char __user *ubuf,
8501 size_t cnt, loff_t *ppos)
8502 {
8503 struct trace_array *tr = filp->private_data;
8504 unsigned long val;
8505 int ret;
8506
8507 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8508 if (ret)
8509 return ret;
8510
8511 if (val > 100)
8512 return -EINVAL;
8513
8514 if (!val)
8515 val = 1;
8516
8517 tr->buffer_percent = val;
8518
8519 (*ppos)++;
8520
8521 return cnt;
8522 }
8523
8524 static const struct file_operations buffer_percent_fops = {
8525 .open = tracing_open_generic_tr,
8526 .read = buffer_percent_read,
8527 .write = buffer_percent_write,
8528 .release = tracing_release_generic_tr,
8529 .llseek = default_llseek,
8530 };
8531
8532 static struct dentry *trace_instance_dir;
8533
8534 static void
8535 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8536
8537 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8538 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8539 {
8540 enum ring_buffer_flags rb_flags;
8541
8542 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8543
8544 buf->tr = tr;
8545
8546 buf->buffer = ring_buffer_alloc(size, rb_flags);
8547 if (!buf->buffer)
8548 return -ENOMEM;
8549
8550 buf->data = alloc_percpu(struct trace_array_cpu);
8551 if (!buf->data) {
8552 ring_buffer_free(buf->buffer);
8553 buf->buffer = NULL;
8554 return -ENOMEM;
8555 }
8556
8557 /* Allocate the first page for all buffers */
8558 set_buffer_entries(&tr->array_buffer,
8559 ring_buffer_size(tr->array_buffer.buffer, 0));
8560
8561 return 0;
8562 }
8563
allocate_trace_buffers(struct trace_array * tr,int size)8564 static int allocate_trace_buffers(struct trace_array *tr, int size)
8565 {
8566 int ret;
8567
8568 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8569 if (ret)
8570 return ret;
8571
8572 #ifdef CONFIG_TRACER_MAX_TRACE
8573 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8574 allocate_snapshot ? size : 1);
8575 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8576 ring_buffer_free(tr->array_buffer.buffer);
8577 tr->array_buffer.buffer = NULL;
8578 free_percpu(tr->array_buffer.data);
8579 tr->array_buffer.data = NULL;
8580 return -ENOMEM;
8581 }
8582 tr->allocated_snapshot = allocate_snapshot;
8583
8584 /*
8585 * Only the top level trace array gets its snapshot allocated
8586 * from the kernel command line.
8587 */
8588 allocate_snapshot = false;
8589 #endif
8590
8591 return 0;
8592 }
8593
free_trace_buffer(struct array_buffer * buf)8594 static void free_trace_buffer(struct array_buffer *buf)
8595 {
8596 if (buf->buffer) {
8597 ring_buffer_free(buf->buffer);
8598 buf->buffer = NULL;
8599 free_percpu(buf->data);
8600 buf->data = NULL;
8601 }
8602 }
8603
free_trace_buffers(struct trace_array * tr)8604 static void free_trace_buffers(struct trace_array *tr)
8605 {
8606 if (!tr)
8607 return;
8608
8609 free_trace_buffer(&tr->array_buffer);
8610
8611 #ifdef CONFIG_TRACER_MAX_TRACE
8612 free_trace_buffer(&tr->max_buffer);
8613 #endif
8614 }
8615
init_trace_flags_index(struct trace_array * tr)8616 static void init_trace_flags_index(struct trace_array *tr)
8617 {
8618 int i;
8619
8620 /* Used by the trace options files */
8621 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8622 tr->trace_flags_index[i] = i;
8623 }
8624
__update_tracer_options(struct trace_array * tr)8625 static void __update_tracer_options(struct trace_array *tr)
8626 {
8627 struct tracer *t;
8628
8629 for (t = trace_types; t; t = t->next)
8630 add_tracer_options(tr, t);
8631 }
8632
update_tracer_options(struct trace_array * tr)8633 static void update_tracer_options(struct trace_array *tr)
8634 {
8635 mutex_lock(&trace_types_lock);
8636 __update_tracer_options(tr);
8637 mutex_unlock(&trace_types_lock);
8638 }
8639
8640 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8641 struct trace_array *trace_array_find(const char *instance)
8642 {
8643 struct trace_array *tr, *found = NULL;
8644
8645 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8646 if (tr->name && strcmp(tr->name, instance) == 0) {
8647 found = tr;
8648 break;
8649 }
8650 }
8651
8652 return found;
8653 }
8654
trace_array_find_get(const char * instance)8655 struct trace_array *trace_array_find_get(const char *instance)
8656 {
8657 struct trace_array *tr;
8658
8659 mutex_lock(&trace_types_lock);
8660 tr = trace_array_find(instance);
8661 if (tr)
8662 tr->ref++;
8663 mutex_unlock(&trace_types_lock);
8664
8665 return tr;
8666 }
8667
trace_array_create_dir(struct trace_array * tr)8668 static int trace_array_create_dir(struct trace_array *tr)
8669 {
8670 int ret;
8671
8672 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8673 if (!tr->dir)
8674 return -EINVAL;
8675
8676 ret = event_trace_add_tracer(tr->dir, tr);
8677 if (ret)
8678 tracefs_remove(tr->dir);
8679
8680 init_tracer_tracefs(tr, tr->dir);
8681 __update_tracer_options(tr);
8682
8683 return ret;
8684 }
8685
trace_array_create(const char * name)8686 static struct trace_array *trace_array_create(const char *name)
8687 {
8688 struct trace_array *tr;
8689 int ret;
8690
8691 ret = -ENOMEM;
8692 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8693 if (!tr)
8694 return ERR_PTR(ret);
8695
8696 tr->name = kstrdup(name, GFP_KERNEL);
8697 if (!tr->name)
8698 goto out_free_tr;
8699
8700 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8701 goto out_free_tr;
8702
8703 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8704
8705 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8706
8707 raw_spin_lock_init(&tr->start_lock);
8708
8709 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8710
8711 tr->current_trace = &nop_trace;
8712
8713 INIT_LIST_HEAD(&tr->systems);
8714 INIT_LIST_HEAD(&tr->events);
8715 INIT_LIST_HEAD(&tr->hist_vars);
8716 INIT_LIST_HEAD(&tr->err_log);
8717
8718 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8719 goto out_free_tr;
8720
8721 if (ftrace_allocate_ftrace_ops(tr) < 0)
8722 goto out_free_tr;
8723
8724 ftrace_init_trace_array(tr);
8725
8726 init_trace_flags_index(tr);
8727
8728 if (trace_instance_dir) {
8729 ret = trace_array_create_dir(tr);
8730 if (ret)
8731 goto out_free_tr;
8732 } else
8733 __trace_early_add_events(tr);
8734
8735 list_add(&tr->list, &ftrace_trace_arrays);
8736
8737 tr->ref++;
8738
8739 return tr;
8740
8741 out_free_tr:
8742 ftrace_free_ftrace_ops(tr);
8743 free_trace_buffers(tr);
8744 free_cpumask_var(tr->tracing_cpumask);
8745 kfree(tr->name);
8746 kfree(tr);
8747
8748 return ERR_PTR(ret);
8749 }
8750
instance_mkdir(const char * name)8751 static int instance_mkdir(const char *name)
8752 {
8753 struct trace_array *tr;
8754 int ret;
8755
8756 mutex_lock(&event_mutex);
8757 mutex_lock(&trace_types_lock);
8758
8759 ret = -EEXIST;
8760 if (trace_array_find(name))
8761 goto out_unlock;
8762
8763 tr = trace_array_create(name);
8764
8765 ret = PTR_ERR_OR_ZERO(tr);
8766
8767 out_unlock:
8768 mutex_unlock(&trace_types_lock);
8769 mutex_unlock(&event_mutex);
8770 return ret;
8771 }
8772
8773 /**
8774 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8775 * @name: The name of the trace array to be looked up/created.
8776 *
8777 * Returns pointer to trace array with given name.
8778 * NULL, if it cannot be created.
8779 *
8780 * NOTE: This function increments the reference counter associated with the
8781 * trace array returned. This makes sure it cannot be freed while in use.
8782 * Use trace_array_put() once the trace array is no longer needed.
8783 * If the trace_array is to be freed, trace_array_destroy() needs to
8784 * be called after the trace_array_put(), or simply let user space delete
8785 * it from the tracefs instances directory. But until the
8786 * trace_array_put() is called, user space can not delete it.
8787 *
8788 */
trace_array_get_by_name(const char * name)8789 struct trace_array *trace_array_get_by_name(const char *name)
8790 {
8791 struct trace_array *tr;
8792
8793 mutex_lock(&event_mutex);
8794 mutex_lock(&trace_types_lock);
8795
8796 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8797 if (tr->name && strcmp(tr->name, name) == 0)
8798 goto out_unlock;
8799 }
8800
8801 tr = trace_array_create(name);
8802
8803 if (IS_ERR(tr))
8804 tr = NULL;
8805 out_unlock:
8806 if (tr)
8807 tr->ref++;
8808
8809 mutex_unlock(&trace_types_lock);
8810 mutex_unlock(&event_mutex);
8811 return tr;
8812 }
8813 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8814
__remove_instance(struct trace_array * tr)8815 static int __remove_instance(struct trace_array *tr)
8816 {
8817 int i;
8818
8819 /* Reference counter for a newly created trace array = 1. */
8820 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8821 return -EBUSY;
8822
8823 list_del(&tr->list);
8824
8825 /* Disable all the flags that were enabled coming in */
8826 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8827 if ((1 << i) & ZEROED_TRACE_FLAGS)
8828 set_tracer_flag(tr, 1 << i, 0);
8829 }
8830
8831 tracing_set_nop(tr);
8832 clear_ftrace_function_probes(tr);
8833 event_trace_del_tracer(tr);
8834 ftrace_clear_pids(tr);
8835 ftrace_destroy_function_files(tr);
8836 tracefs_remove(tr->dir);
8837 free_trace_buffers(tr);
8838
8839 for (i = 0; i < tr->nr_topts; i++) {
8840 kfree(tr->topts[i].topts);
8841 }
8842 kfree(tr->topts);
8843
8844 free_cpumask_var(tr->tracing_cpumask);
8845 kfree(tr->name);
8846 kfree(tr);
8847
8848 return 0;
8849 }
8850
trace_array_destroy(struct trace_array * this_tr)8851 int trace_array_destroy(struct trace_array *this_tr)
8852 {
8853 struct trace_array *tr;
8854 int ret;
8855
8856 if (!this_tr)
8857 return -EINVAL;
8858
8859 mutex_lock(&event_mutex);
8860 mutex_lock(&trace_types_lock);
8861
8862 ret = -ENODEV;
8863
8864 /* Making sure trace array exists before destroying it. */
8865 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8866 if (tr == this_tr) {
8867 ret = __remove_instance(tr);
8868 break;
8869 }
8870 }
8871
8872 mutex_unlock(&trace_types_lock);
8873 mutex_unlock(&event_mutex);
8874
8875 return ret;
8876 }
8877 EXPORT_SYMBOL_GPL(trace_array_destroy);
8878
instance_rmdir(const char * name)8879 static int instance_rmdir(const char *name)
8880 {
8881 struct trace_array *tr;
8882 int ret;
8883
8884 mutex_lock(&event_mutex);
8885 mutex_lock(&trace_types_lock);
8886
8887 ret = -ENODEV;
8888 tr = trace_array_find(name);
8889 if (tr)
8890 ret = __remove_instance(tr);
8891
8892 mutex_unlock(&trace_types_lock);
8893 mutex_unlock(&event_mutex);
8894
8895 return ret;
8896 }
8897
create_trace_instances(struct dentry * d_tracer)8898 static __init void create_trace_instances(struct dentry *d_tracer)
8899 {
8900 struct trace_array *tr;
8901
8902 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8903 instance_mkdir,
8904 instance_rmdir);
8905 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8906 return;
8907
8908 mutex_lock(&event_mutex);
8909 mutex_lock(&trace_types_lock);
8910
8911 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8912 if (!tr->name)
8913 continue;
8914 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8915 "Failed to create instance directory\n"))
8916 break;
8917 }
8918
8919 mutex_unlock(&trace_types_lock);
8920 mutex_unlock(&event_mutex);
8921 }
8922
8923 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8924 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8925 {
8926 struct trace_event_file *file;
8927 int cpu;
8928
8929 trace_create_file("available_tracers", 0444, d_tracer,
8930 tr, &show_traces_fops);
8931
8932 trace_create_file("current_tracer", 0644, d_tracer,
8933 tr, &set_tracer_fops);
8934
8935 trace_create_file("tracing_cpumask", 0644, d_tracer,
8936 tr, &tracing_cpumask_fops);
8937
8938 trace_create_file("trace_options", 0644, d_tracer,
8939 tr, &tracing_iter_fops);
8940
8941 trace_create_file("trace", 0644, d_tracer,
8942 tr, &tracing_fops);
8943
8944 trace_create_file("trace_pipe", 0444, d_tracer,
8945 tr, &tracing_pipe_fops);
8946
8947 trace_create_file("buffer_size_kb", 0644, d_tracer,
8948 tr, &tracing_entries_fops);
8949
8950 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8951 tr, &tracing_total_entries_fops);
8952
8953 trace_create_file("free_buffer", 0200, d_tracer,
8954 tr, &tracing_free_buffer_fops);
8955
8956 trace_create_file("trace_marker", 0220, d_tracer,
8957 tr, &tracing_mark_fops);
8958
8959 file = __find_event_file(tr, "ftrace", "print");
8960 if (file && file->dir)
8961 trace_create_file("trigger", 0644, file->dir, file,
8962 &event_trigger_fops);
8963 tr->trace_marker_file = file;
8964
8965 trace_create_file("trace_marker_raw", 0220, d_tracer,
8966 tr, &tracing_mark_raw_fops);
8967
8968 trace_create_file("trace_clock", 0644, d_tracer, tr,
8969 &trace_clock_fops);
8970
8971 trace_create_file("tracing_on", 0644, d_tracer,
8972 tr, &rb_simple_fops);
8973
8974 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8975 &trace_time_stamp_mode_fops);
8976
8977 tr->buffer_percent = 50;
8978
8979 trace_create_file("buffer_percent", 0444, d_tracer,
8980 tr, &buffer_percent_fops);
8981
8982 create_trace_options_dir(tr);
8983
8984 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8985 trace_create_maxlat_file(tr, d_tracer);
8986 #endif
8987
8988 if (ftrace_create_function_files(tr, d_tracer))
8989 MEM_FAIL(1, "Could not allocate function filter files");
8990
8991 #ifdef CONFIG_TRACER_SNAPSHOT
8992 trace_create_file("snapshot", 0644, d_tracer,
8993 tr, &snapshot_fops);
8994 #endif
8995
8996 trace_create_file("error_log", 0644, d_tracer,
8997 tr, &tracing_err_log_fops);
8998
8999 for_each_tracing_cpu(cpu)
9000 tracing_init_tracefs_percpu(tr, cpu);
9001
9002 ftrace_init_tracefs(tr, d_tracer);
9003 }
9004
trace_automount(struct dentry * mntpt,void * ingore)9005 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9006 {
9007 struct vfsmount *mnt;
9008 struct file_system_type *type;
9009
9010 /*
9011 * To maintain backward compatibility for tools that mount
9012 * debugfs to get to the tracing facility, tracefs is automatically
9013 * mounted to the debugfs/tracing directory.
9014 */
9015 type = get_fs_type("tracefs");
9016 if (!type)
9017 return NULL;
9018 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9019 put_filesystem(type);
9020 if (IS_ERR(mnt))
9021 return NULL;
9022 mntget(mnt);
9023
9024 return mnt;
9025 }
9026
9027 /**
9028 * tracing_init_dentry - initialize top level trace array
9029 *
9030 * This is called when creating files or directories in the tracing
9031 * directory. It is called via fs_initcall() by any of the boot up code
9032 * and expects to return the dentry of the top level tracing directory.
9033 */
tracing_init_dentry(void)9034 int tracing_init_dentry(void)
9035 {
9036 struct trace_array *tr = &global_trace;
9037
9038 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9039 pr_warn("Tracing disabled due to lockdown\n");
9040 return -EPERM;
9041 }
9042
9043 /* The top level trace array uses NULL as parent */
9044 if (tr->dir)
9045 return 0;
9046
9047 if (WARN_ON(!tracefs_initialized()))
9048 return -ENODEV;
9049
9050 /*
9051 * As there may still be users that expect the tracing
9052 * files to exist in debugfs/tracing, we must automount
9053 * the tracefs file system there, so older tools still
9054 * work with the newer kerenl.
9055 */
9056 tr->dir = debugfs_create_automount("tracing", NULL,
9057 trace_automount, NULL);
9058
9059 return 0;
9060 }
9061
9062 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9063 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9064
trace_eval_init(void)9065 static void __init trace_eval_init(void)
9066 {
9067 int len;
9068
9069 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9070 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9071 }
9072
9073 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9074 static void trace_module_add_evals(struct module *mod)
9075 {
9076 if (!mod->num_trace_evals)
9077 return;
9078
9079 /*
9080 * Modules with bad taint do not have events created, do
9081 * not bother with enums either.
9082 */
9083 if (trace_module_has_bad_taint(mod))
9084 return;
9085
9086 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9087 }
9088
9089 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9090 static void trace_module_remove_evals(struct module *mod)
9091 {
9092 union trace_eval_map_item *map;
9093 union trace_eval_map_item **last = &trace_eval_maps;
9094
9095 if (!mod->num_trace_evals)
9096 return;
9097
9098 mutex_lock(&trace_eval_mutex);
9099
9100 map = trace_eval_maps;
9101
9102 while (map) {
9103 if (map->head.mod == mod)
9104 break;
9105 map = trace_eval_jmp_to_tail(map);
9106 last = &map->tail.next;
9107 map = map->tail.next;
9108 }
9109 if (!map)
9110 goto out;
9111
9112 *last = trace_eval_jmp_to_tail(map)->tail.next;
9113 kfree(map);
9114 out:
9115 mutex_unlock(&trace_eval_mutex);
9116 }
9117 #else
trace_module_remove_evals(struct module * mod)9118 static inline void trace_module_remove_evals(struct module *mod) { }
9119 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9120
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9121 static int trace_module_notify(struct notifier_block *self,
9122 unsigned long val, void *data)
9123 {
9124 struct module *mod = data;
9125
9126 switch (val) {
9127 case MODULE_STATE_COMING:
9128 trace_module_add_evals(mod);
9129 break;
9130 case MODULE_STATE_GOING:
9131 trace_module_remove_evals(mod);
9132 break;
9133 }
9134
9135 return NOTIFY_OK;
9136 }
9137
9138 static struct notifier_block trace_module_nb = {
9139 .notifier_call = trace_module_notify,
9140 .priority = 0,
9141 };
9142 #endif /* CONFIG_MODULES */
9143
tracer_init_tracefs(void)9144 static __init int tracer_init_tracefs(void)
9145 {
9146 int ret;
9147
9148 trace_access_lock_init();
9149
9150 ret = tracing_init_dentry();
9151 if (ret)
9152 return 0;
9153
9154 event_trace_init();
9155
9156 init_tracer_tracefs(&global_trace, NULL);
9157 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9158
9159 trace_create_file("tracing_thresh", 0644, NULL,
9160 &global_trace, &tracing_thresh_fops);
9161
9162 trace_create_file("README", 0444, NULL,
9163 NULL, &tracing_readme_fops);
9164
9165 trace_create_file("saved_cmdlines", 0444, NULL,
9166 NULL, &tracing_saved_cmdlines_fops);
9167
9168 trace_create_file("saved_cmdlines_size", 0644, NULL,
9169 NULL, &tracing_saved_cmdlines_size_fops);
9170
9171 trace_create_file("saved_tgids", 0444, NULL,
9172 NULL, &tracing_saved_tgids_fops);
9173
9174 trace_eval_init();
9175
9176 trace_create_eval_file(NULL);
9177
9178 #ifdef CONFIG_MODULES
9179 register_module_notifier(&trace_module_nb);
9180 #endif
9181
9182 #ifdef CONFIG_DYNAMIC_FTRACE
9183 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9184 NULL, &tracing_dyn_info_fops);
9185 #endif
9186
9187 create_trace_instances(NULL);
9188
9189 update_tracer_options(&global_trace);
9190
9191 return 0;
9192 }
9193
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9194 static int trace_panic_handler(struct notifier_block *this,
9195 unsigned long event, void *unused)
9196 {
9197 if (ftrace_dump_on_oops)
9198 ftrace_dump(ftrace_dump_on_oops);
9199 return NOTIFY_OK;
9200 }
9201
9202 static struct notifier_block trace_panic_notifier = {
9203 .notifier_call = trace_panic_handler,
9204 .next = NULL,
9205 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9206 };
9207
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9208 static int trace_die_handler(struct notifier_block *self,
9209 unsigned long val,
9210 void *data)
9211 {
9212 switch (val) {
9213 case DIE_OOPS:
9214 if (ftrace_dump_on_oops)
9215 ftrace_dump(ftrace_dump_on_oops);
9216 break;
9217 default:
9218 break;
9219 }
9220 return NOTIFY_OK;
9221 }
9222
9223 static struct notifier_block trace_die_notifier = {
9224 .notifier_call = trace_die_handler,
9225 .priority = 200
9226 };
9227
9228 /*
9229 * printk is set to max of 1024, we really don't need it that big.
9230 * Nothing should be printing 1000 characters anyway.
9231 */
9232 #define TRACE_MAX_PRINT 1000
9233
9234 /*
9235 * Define here KERN_TRACE so that we have one place to modify
9236 * it if we decide to change what log level the ftrace dump
9237 * should be at.
9238 */
9239 #define KERN_TRACE KERN_EMERG
9240
9241 void
trace_printk_seq(struct trace_seq * s)9242 trace_printk_seq(struct trace_seq *s)
9243 {
9244 /* Probably should print a warning here. */
9245 if (s->seq.len >= TRACE_MAX_PRINT)
9246 s->seq.len = TRACE_MAX_PRINT;
9247
9248 /*
9249 * More paranoid code. Although the buffer size is set to
9250 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9251 * an extra layer of protection.
9252 */
9253 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9254 s->seq.len = s->seq.size - 1;
9255
9256 /* should be zero ended, but we are paranoid. */
9257 s->buffer[s->seq.len] = 0;
9258
9259 printk(KERN_TRACE "%s", s->buffer);
9260
9261 trace_seq_init(s);
9262 }
9263
trace_init_global_iter(struct trace_iterator * iter)9264 void trace_init_global_iter(struct trace_iterator *iter)
9265 {
9266 iter->tr = &global_trace;
9267 iter->trace = iter->tr->current_trace;
9268 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9269 iter->array_buffer = &global_trace.array_buffer;
9270
9271 if (iter->trace && iter->trace->open)
9272 iter->trace->open(iter);
9273
9274 /* Annotate start of buffers if we had overruns */
9275 if (ring_buffer_overruns(iter->array_buffer->buffer))
9276 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9277
9278 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9279 if (trace_clocks[iter->tr->clock_id].in_ns)
9280 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9281 }
9282
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9283 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9284 {
9285 /* use static because iter can be a bit big for the stack */
9286 static struct trace_iterator iter;
9287 static atomic_t dump_running;
9288 struct trace_array *tr = &global_trace;
9289 unsigned int old_userobj;
9290 unsigned long flags;
9291 int cnt = 0, cpu;
9292
9293 /* Only allow one dump user at a time. */
9294 if (atomic_inc_return(&dump_running) != 1) {
9295 atomic_dec(&dump_running);
9296 return;
9297 }
9298
9299 /*
9300 * Always turn off tracing when we dump.
9301 * We don't need to show trace output of what happens
9302 * between multiple crashes.
9303 *
9304 * If the user does a sysrq-z, then they can re-enable
9305 * tracing with echo 1 > tracing_on.
9306 */
9307 tracing_off();
9308
9309 local_irq_save(flags);
9310 printk_nmi_direct_enter();
9311
9312 /* Simulate the iterator */
9313 trace_init_global_iter(&iter);
9314 /* Can not use kmalloc for iter.temp */
9315 iter.temp = static_temp_buf;
9316 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9317
9318 for_each_tracing_cpu(cpu) {
9319 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9320 }
9321
9322 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9323
9324 /* don't look at user memory in panic mode */
9325 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9326
9327 switch (oops_dump_mode) {
9328 case DUMP_ALL:
9329 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9330 break;
9331 case DUMP_ORIG:
9332 iter.cpu_file = raw_smp_processor_id();
9333 break;
9334 case DUMP_NONE:
9335 goto out_enable;
9336 default:
9337 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9338 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9339 }
9340
9341 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9342
9343 /* Did function tracer already get disabled? */
9344 if (ftrace_is_dead()) {
9345 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9346 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9347 }
9348
9349 /*
9350 * We need to stop all tracing on all CPUS to read
9351 * the next buffer. This is a bit expensive, but is
9352 * not done often. We fill all what we can read,
9353 * and then release the locks again.
9354 */
9355
9356 while (!trace_empty(&iter)) {
9357
9358 if (!cnt)
9359 printk(KERN_TRACE "---------------------------------\n");
9360
9361 cnt++;
9362
9363 trace_iterator_reset(&iter);
9364 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9365
9366 if (trace_find_next_entry_inc(&iter) != NULL) {
9367 int ret;
9368
9369 ret = print_trace_line(&iter);
9370 if (ret != TRACE_TYPE_NO_CONSUME)
9371 trace_consume(&iter);
9372 }
9373 touch_nmi_watchdog();
9374
9375 trace_printk_seq(&iter.seq);
9376 }
9377
9378 if (!cnt)
9379 printk(KERN_TRACE " (ftrace buffer empty)\n");
9380 else
9381 printk(KERN_TRACE "---------------------------------\n");
9382
9383 out_enable:
9384 tr->trace_flags |= old_userobj;
9385
9386 for_each_tracing_cpu(cpu) {
9387 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9388 }
9389 atomic_dec(&dump_running);
9390 printk_nmi_direct_exit();
9391 local_irq_restore(flags);
9392 }
9393 EXPORT_SYMBOL_GPL(ftrace_dump);
9394
trace_run_command(const char * buf,int (* createfn)(int,char **))9395 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9396 {
9397 char **argv;
9398 int argc, ret;
9399
9400 argc = 0;
9401 ret = 0;
9402 argv = argv_split(GFP_KERNEL, buf, &argc);
9403 if (!argv)
9404 return -ENOMEM;
9405
9406 if (argc)
9407 ret = createfn(argc, argv);
9408
9409 argv_free(argv);
9410
9411 return ret;
9412 }
9413
9414 #define WRITE_BUFSIZE 4096
9415
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9416 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9417 size_t count, loff_t *ppos,
9418 int (*createfn)(int, char **))
9419 {
9420 char *kbuf, *buf, *tmp;
9421 int ret = 0;
9422 size_t done = 0;
9423 size_t size;
9424
9425 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9426 if (!kbuf)
9427 return -ENOMEM;
9428
9429 while (done < count) {
9430 size = count - done;
9431
9432 if (size >= WRITE_BUFSIZE)
9433 size = WRITE_BUFSIZE - 1;
9434
9435 if (copy_from_user(kbuf, buffer + done, size)) {
9436 ret = -EFAULT;
9437 goto out;
9438 }
9439 kbuf[size] = '\0';
9440 buf = kbuf;
9441 do {
9442 tmp = strchr(buf, '\n');
9443 if (tmp) {
9444 *tmp = '\0';
9445 size = tmp - buf + 1;
9446 } else {
9447 size = strlen(buf);
9448 if (done + size < count) {
9449 if (buf != kbuf)
9450 break;
9451 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9452 pr_warn("Line length is too long: Should be less than %d\n",
9453 WRITE_BUFSIZE - 2);
9454 ret = -EINVAL;
9455 goto out;
9456 }
9457 }
9458 done += size;
9459
9460 /* Remove comments */
9461 tmp = strchr(buf, '#');
9462
9463 if (tmp)
9464 *tmp = '\0';
9465
9466 ret = trace_run_command(buf, createfn);
9467 if (ret)
9468 goto out;
9469 buf += size;
9470
9471 } while (done < count);
9472 }
9473 ret = done;
9474
9475 out:
9476 kfree(kbuf);
9477
9478 return ret;
9479 }
9480
tracer_alloc_buffers(void)9481 __init static int tracer_alloc_buffers(void)
9482 {
9483 int ring_buf_size;
9484 int ret = -ENOMEM;
9485
9486
9487 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9488 pr_warn("Tracing disabled due to lockdown\n");
9489 return -EPERM;
9490 }
9491
9492 /*
9493 * Make sure we don't accidentally add more trace options
9494 * than we have bits for.
9495 */
9496 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9497
9498 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9499 goto out;
9500
9501 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9502 goto out_free_buffer_mask;
9503
9504 /* Only allocate trace_printk buffers if a trace_printk exists */
9505 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9506 /* Must be called before global_trace.buffer is allocated */
9507 trace_printk_init_buffers();
9508
9509 /* To save memory, keep the ring buffer size to its minimum */
9510 if (ring_buffer_expanded)
9511 ring_buf_size = trace_buf_size;
9512 else
9513 ring_buf_size = 1;
9514
9515 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9516 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9517
9518 raw_spin_lock_init(&global_trace.start_lock);
9519
9520 /*
9521 * The prepare callbacks allocates some memory for the ring buffer. We
9522 * don't free the buffer if the CPU goes down. If we were to free
9523 * the buffer, then the user would lose any trace that was in the
9524 * buffer. The memory will be removed once the "instance" is removed.
9525 */
9526 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9527 "trace/RB:preapre", trace_rb_cpu_prepare,
9528 NULL);
9529 if (ret < 0)
9530 goto out_free_cpumask;
9531 /* Used for event triggers */
9532 ret = -ENOMEM;
9533 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9534 if (!temp_buffer)
9535 goto out_rm_hp_state;
9536
9537 if (trace_create_savedcmd() < 0)
9538 goto out_free_temp_buffer;
9539
9540 /* TODO: make the number of buffers hot pluggable with CPUS */
9541 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9542 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9543 goto out_free_savedcmd;
9544 }
9545
9546 if (global_trace.buffer_disabled)
9547 tracing_off();
9548
9549 if (trace_boot_clock) {
9550 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9551 if (ret < 0)
9552 pr_warn("Trace clock %s not defined, going back to default\n",
9553 trace_boot_clock);
9554 }
9555
9556 /*
9557 * register_tracer() might reference current_trace, so it
9558 * needs to be set before we register anything. This is
9559 * just a bootstrap of current_trace anyway.
9560 */
9561 global_trace.current_trace = &nop_trace;
9562
9563 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9564
9565 ftrace_init_global_array_ops(&global_trace);
9566
9567 init_trace_flags_index(&global_trace);
9568
9569 register_tracer(&nop_trace);
9570
9571 /* Function tracing may start here (via kernel command line) */
9572 init_function_trace();
9573
9574 /* All seems OK, enable tracing */
9575 tracing_disabled = 0;
9576
9577 atomic_notifier_chain_register(&panic_notifier_list,
9578 &trace_panic_notifier);
9579
9580 register_die_notifier(&trace_die_notifier);
9581
9582 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9583
9584 INIT_LIST_HEAD(&global_trace.systems);
9585 INIT_LIST_HEAD(&global_trace.events);
9586 INIT_LIST_HEAD(&global_trace.hist_vars);
9587 INIT_LIST_HEAD(&global_trace.err_log);
9588 list_add(&global_trace.list, &ftrace_trace_arrays);
9589
9590 apply_trace_boot_options();
9591
9592 register_snapshot_cmd();
9593
9594 return 0;
9595
9596 out_free_savedcmd:
9597 free_saved_cmdlines_buffer(savedcmd);
9598 out_free_temp_buffer:
9599 ring_buffer_free(temp_buffer);
9600 out_rm_hp_state:
9601 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9602 out_free_cpumask:
9603 free_cpumask_var(global_trace.tracing_cpumask);
9604 out_free_buffer_mask:
9605 free_cpumask_var(tracing_buffer_mask);
9606 out:
9607 return ret;
9608 }
9609
early_trace_init(void)9610 void __init early_trace_init(void)
9611 {
9612 if (tracepoint_printk) {
9613 tracepoint_print_iter =
9614 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9615 if (MEM_FAIL(!tracepoint_print_iter,
9616 "Failed to allocate trace iterator\n"))
9617 tracepoint_printk = 0;
9618 else
9619 static_key_enable(&tracepoint_printk_key.key);
9620 }
9621 tracer_alloc_buffers();
9622 }
9623
trace_init(void)9624 void __init trace_init(void)
9625 {
9626 trace_event_init();
9627 }
9628
clear_boot_tracer(void)9629 __init static int clear_boot_tracer(void)
9630 {
9631 /*
9632 * The default tracer at boot buffer is an init section.
9633 * This function is called in lateinit. If we did not
9634 * find the boot tracer, then clear it out, to prevent
9635 * later registration from accessing the buffer that is
9636 * about to be freed.
9637 */
9638 if (!default_bootup_tracer)
9639 return 0;
9640
9641 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9642 default_bootup_tracer);
9643 default_bootup_tracer = NULL;
9644
9645 return 0;
9646 }
9647
9648 fs_initcall(tracer_init_tracefs);
9649 late_initcall_sync(clear_boot_tracer);
9650
9651 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9652 __init static int tracing_set_default_clock(void)
9653 {
9654 /* sched_clock_stable() is determined in late_initcall */
9655 if (!trace_boot_clock && !sched_clock_stable()) {
9656 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9657 pr_warn("Can not set tracing clock due to lockdown\n");
9658 return -EPERM;
9659 }
9660
9661 printk(KERN_WARNING
9662 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9663 "If you want to keep using the local clock, then add:\n"
9664 " \"trace_clock=local\"\n"
9665 "on the kernel command line\n");
9666 tracing_set_clock(&global_trace, "global");
9667 }
9668
9669 return 0;
9670 }
9671 late_initcall_sync(tracing_set_default_clock);
9672 #endif
9673