1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019 Intel Corporation. All rights reserved.
4  *
5  * Author: Marcin Maka <marcin.maka@linux.intel.com>
6  */
7 
8 /**
9  * \file xtos/include/sof/lib/perf_cnt.h
10  * \brief Simple performance counters
11  * \author Marcin Maka <marcin.maka@linux.intel.com>
12  */
13 
14 #ifndef __SOF_LIB_PERF_CNT_H__
15 #define __SOF_LIB_PERF_CNT_H__
16 
17 #include <rtos/timer.h>
18 
19 struct perf_cnt_data {
20 	uint32_t plat_ts;
21 	uint32_t cpu_ts;
22 	uint32_t plat_delta_last;
23 	uint32_t plat_delta_peak;
24 	uint32_t cpu_delta_last;
25 	uint32_t cpu_delta_peak;
26 	uint32_t cpu_delta_sum;
27 	uint32_t sample_cnt;
28 };
29 
30 #if CONFIG_PERFORMANCE_COUNTERS
31 
32 #define perf_cnt_trace(ctx, pcd) \
33 		tr_info(ctx, "perf plat last %u peak %u cpu last %u, peak %u", \
34 			(uint32_t)((pcd)->plat_delta_last),	\
35 			(uint32_t)((pcd)->plat_delta_peak),	\
36 			(uint32_t)((pcd)->cpu_delta_last),	\
37 			(uint32_t)((pcd)->cpu_delta_peak))
38 
39 /** \brief Clears performance counters data. */
40 #define perf_cnt_clear(pcd) memset((pcd), 0, sizeof(struct perf_cnt_data))
41 
42 /* NOTE: Zephyr's arch_timing_counter_get() might not be implemented
43  * for a particular platform. In this case let's fallback to use
44  * Zephyr's k_cycle_get_64(). This will result in both "platform" and
45  * "cpu" timestamps to be equal.
46  */
47 #ifdef __ZEPHYR__
48 	#ifdef CONFIG_TIMING_FUNCTIONS
49 		#define perf_cnt_get_cpu_ts arch_timing_counter_get
50 	#else
51 		#define perf_cnt_get_cpu_ts sof_cycle_get_64
52 	#endif	/* CONFIG_TIMING_FUNCTIONS */
53 #else
54 	#define perf_cnt_get_cpu_ts() timer_get_system(cpu_timer_get())
55 #endif	/* __ZEPHYR__ */
56 
57 /** \brief Initializes timestamps with current timer values. */
58 #define perf_cnt_init(pcd) do {						\
59 		(pcd)->plat_ts = sof_cycle_get_64();			\
60 		(pcd)->cpu_ts = perf_cnt_get_cpu_ts();			\
61 	} while (0)
62 
63 /* Trace macros that can be used as trace_m argument of the perf_cnt_stamp()
64  * to trace PCD values if the last arch timer reading exceeds the previous
65  * peak value.
66  *
67  * arg passed to perf_cnt_stamp() is forwarded to the trace_m() macro
68  * as the second argument.
69  */
70 
71 /** \brief No trace when detecting peak value. */
72 #define perf_trace_null(pcd, arg)
73 
74 /** \brief Simple trace, all values are printed, arg should be a tr_ctx address.
75  */
76 #define perf_trace_simple(pcd, arg) perf_cnt_trace(arg, pcd)
77 
78 /* perf measurement windows size 2^x */
79 #define PERF_CNT_CHECK_WINDOW_SIZE 10
80 #define task_perf_avg_info(pcd, task_p, class)					\
81 	tr_info(task_p, "perf_cycle task %p, %pU cpu avg %u peak %u",\
82 		  class, (class)->uid, \
83 		  (uint32_t)((pcd)->cpu_delta_sum),			\
84 		  (uint32_t)((pcd)->cpu_delta_peak))
85 #define task_perf_cnt_avg(pcd, trace_m, arg, class) do {                             \
86 		(pcd)->cpu_delta_sum += (pcd)->cpu_delta_last;          \
87 		if (++(pcd)->sample_cnt == 1 << PERF_CNT_CHECK_WINDOW_SIZE) { \
88 			(pcd)->cpu_delta_sum >>= PERF_CNT_CHECK_WINDOW_SIZE;      \
89 			trace_m(pcd, arg, class);                                 \
90 			(pcd)->cpu_delta_sum = 0;                                 \
91 			(pcd)->sample_cnt = 0;                                    \
92 			(pcd)->plat_delta_peak = 0;                               \
93 			(pcd)->cpu_delta_peak = 0;                                \
94 		}                                                             \
95 		} while (0)
96 
97 /** \brief Accumulates cpu timer delta samples calculated by perf_cnt_stamp().
98  *
99  *  If current sample count reaches the window size, compute the average and run trace_m.
100  *  \param pcd Performance counters data.
101  *  \param trace_m Trace function trace_m(pcd, arg) or trace macro if a
102  *         more precise line number is desired in the logs.
103  *  \param arg Argument passed to trace_m as arg.
104  */
105 #define perf_cnt_average(pcd, trace_m, arg) do {                             \
106 		(pcd)->cpu_delta_sum += (pcd)->cpu_delta_last;               \
107 		if (++(pcd)->sample_cnt == 1 << PERF_CNT_CHECK_WINDOW_SIZE) {\
108 			(pcd)->cpu_delta_sum >>= PERF_CNT_CHECK_WINDOW_SIZE; \
109 			trace_m(pcd, arg);                                   \
110 			(pcd)->cpu_delta_sum = 0;                            \
111 			(pcd)->sample_cnt = 0;                               \
112 			(pcd)->plat_delta_peak = 0;                          \
113 			(pcd)->cpu_delta_peak = 0;                           \
114 		}                                                            \
115 	} while (0)
116 
117 /** \brief Reads the timers and computes delta to the previous readings.
118  *
119  *  If current arch delta exceeds the previous peak value, trace_m is run.
120  *  \param pcd Performance counters data.
121  *  \param trace_m Trace function trace_m(pcd, arg) or trace macro if a
122  *         more precise line number is desired in the logs.
123  *  \param arg Argument passed to trace_m as arg.
124  */
125 #define perf_cnt_stamp(pcd, trace_m, arg) do {					\
126 		uint32_t plat_ts =						\
127 			(uint32_t)sof_cycle_get_64();				\
128 		uint32_t cpu_ts =						\
129 			(uint32_t)perf_cnt_get_cpu_ts();			\
130 		if (plat_ts > (pcd)->plat_ts)					\
131 			(pcd)->plat_delta_last = plat_ts - (pcd)->plat_ts;	\
132 		else                                             \
133 			(pcd)->plat_delta_last = UINT32_MAX - (pcd)->plat_ts   \
134 									+ plat_ts; \
135 		if (cpu_ts > (pcd)->cpu_ts)			\
136 			(pcd)->cpu_delta_last = cpu_ts - (pcd)->cpu_ts; \
137 		else								\
138 			(pcd)->cpu_delta_last = UINT32_MAX - (pcd)->cpu_ts	\
139 									+ cpu_ts;\
140 		if ((pcd)->plat_delta_last > (pcd)->plat_delta_peak)		\
141 			(pcd)->plat_delta_peak = (pcd)->plat_delta_last;	\
142 		if ((pcd)->cpu_delta_last > (pcd)->cpu_delta_peak) {		\
143 			(pcd)->cpu_delta_peak = (pcd)->cpu_delta_last;		\
144 			trace_m(pcd, arg);					\
145 		}								\
146 	} while (0)
147 
148 /**
149  * For simple performance measurement and optimization in development stage,
150  * tic-toc api is provided. Performance data are traced at each tok call,
151  * to allow fast clocks usage deviation estimation. Example:
152  *
153  * \code{.c}
154  * void foo(struct comp_dev *dev) {
155  *	static struct perf_cnt_data pcd;
156  *
157  *	perf_tic(&pcd);
158  *	bar();
159  *	perf_toc(&pcd, dev);
160  * }
161  * \endcode
162  */
163 
164 /** \brief Save start timestamp in pcd structure
165  *
166  * \param pcd Performance counters data.
167  */
168 #define perf_tic(pcd) \
169 	perf_cnt_init(pcd)
170 
171 /** \brief Save start timestamp in pcd structure
172  *
173  * \param pcd Performance counters data.
174  * \param comp Component used to get corresponding trace context.
175  */
176 #define perf_toc(pcd, comp) do { \
177 	perf_cnt_stamp(pcd, perf_trace_null, NULL); \
178 	perf_trace_simple(pcd, trace_comp_get_tr_ctx(comp)); \
179 	} while (0)
180 
181 #else
182 #define perf_cnt_clear(pcd)
183 #define perf_cnt_init(pcd)
184 #define perf_cnt_stamp(pcd, trace_m, arg)
185 #define perf_cnt_average(pcd, trace_m, arg)
186 #endif
187 
188 #endif /* __SOF_LIB_PERF_CNT_H__ */
189