1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019 Intel Corporation. All rights reserved. 4 * 5 * Author: Marcin Maka <marcin.maka@linux.intel.com> 6 */ 7 8 /** 9 * \file xtos/include/sof/lib/perf_cnt.h 10 * \brief Simple performance counters 11 * \author Marcin Maka <marcin.maka@linux.intel.com> 12 */ 13 14 #ifndef __SOF_LIB_PERF_CNT_H__ 15 #define __SOF_LIB_PERF_CNT_H__ 16 17 #include <rtos/timer.h> 18 19 struct perf_cnt_data { 20 uint32_t plat_ts; 21 uint32_t cpu_ts; 22 uint32_t plat_delta_last; 23 uint32_t plat_delta_peak; 24 uint32_t cpu_delta_last; 25 uint32_t cpu_delta_peak; 26 uint32_t cpu_delta_sum; 27 uint32_t sample_cnt; 28 }; 29 30 #if CONFIG_PERFORMANCE_COUNTERS 31 32 #define perf_cnt_trace(ctx, pcd) \ 33 tr_info(ctx, "perf plat last %u peak %u cpu last %u, peak %u", \ 34 (uint32_t)((pcd)->plat_delta_last), \ 35 (uint32_t)((pcd)->plat_delta_peak), \ 36 (uint32_t)((pcd)->cpu_delta_last), \ 37 (uint32_t)((pcd)->cpu_delta_peak)) 38 39 /** \brief Clears performance counters data. */ 40 #define perf_cnt_clear(pcd) memset((pcd), 0, sizeof(struct perf_cnt_data)) 41 42 /* NOTE: Zephyr's arch_timing_counter_get() might not be implemented 43 * for a particular platform. In this case let's fallback to use 44 * Zephyr's k_cycle_get_64(). This will result in both "platform" and 45 * "cpu" timestamps to be equal. 46 */ 47 #ifdef __ZEPHYR__ 48 #ifdef CONFIG_TIMING_FUNCTIONS 49 #define perf_cnt_get_cpu_ts arch_timing_counter_get 50 #else 51 #define perf_cnt_get_cpu_ts sof_cycle_get_64 52 #endif /* CONFIG_TIMING_FUNCTIONS */ 53 #else 54 #define perf_cnt_get_cpu_ts() timer_get_system(cpu_timer_get()) 55 #endif /* __ZEPHYR__ */ 56 57 /** \brief Initializes timestamps with current timer values. */ 58 #define perf_cnt_init(pcd) do { \ 59 (pcd)->plat_ts = sof_cycle_get_64(); \ 60 (pcd)->cpu_ts = perf_cnt_get_cpu_ts(); \ 61 } while (0) 62 63 /* Trace macros that can be used as trace_m argument of the perf_cnt_stamp() 64 * to trace PCD values if the last arch timer reading exceeds the previous 65 * peak value. 66 * 67 * arg passed to perf_cnt_stamp() is forwarded to the trace_m() macro 68 * as the second argument. 69 */ 70 71 /** \brief No trace when detecting peak value. */ 72 #define perf_trace_null(pcd, arg) 73 74 /** \brief Simple trace, all values are printed, arg should be a tr_ctx address. 75 */ 76 #define perf_trace_simple(pcd, arg) perf_cnt_trace(arg, pcd) 77 78 /* perf measurement windows size 2^x */ 79 #define PERF_CNT_CHECK_WINDOW_SIZE 10 80 #define task_perf_avg_info(pcd, task_p, class) \ 81 tr_info(task_p, "perf_cycle task %p, %pU cpu avg %u peak %u",\ 82 class, (class)->uid, \ 83 (uint32_t)((pcd)->cpu_delta_sum), \ 84 (uint32_t)((pcd)->cpu_delta_peak)) 85 #define task_perf_cnt_avg(pcd, trace_m, arg, class) do { \ 86 (pcd)->cpu_delta_sum += (pcd)->cpu_delta_last; \ 87 if (++(pcd)->sample_cnt == 1 << PERF_CNT_CHECK_WINDOW_SIZE) { \ 88 (pcd)->cpu_delta_sum >>= PERF_CNT_CHECK_WINDOW_SIZE; \ 89 trace_m(pcd, arg, class); \ 90 (pcd)->cpu_delta_sum = 0; \ 91 (pcd)->sample_cnt = 0; \ 92 (pcd)->plat_delta_peak = 0; \ 93 (pcd)->cpu_delta_peak = 0; \ 94 } \ 95 } while (0) 96 97 /** \brief Accumulates cpu timer delta samples calculated by perf_cnt_stamp(). 98 * 99 * If current sample count reaches the window size, compute the average and run trace_m. 100 * \param pcd Performance counters data. 101 * \param trace_m Trace function trace_m(pcd, arg) or trace macro if a 102 * more precise line number is desired in the logs. 103 * \param arg Argument passed to trace_m as arg. 104 */ 105 #define perf_cnt_average(pcd, trace_m, arg) do { \ 106 (pcd)->cpu_delta_sum += (pcd)->cpu_delta_last; \ 107 if (++(pcd)->sample_cnt == 1 << PERF_CNT_CHECK_WINDOW_SIZE) {\ 108 (pcd)->cpu_delta_sum >>= PERF_CNT_CHECK_WINDOW_SIZE; \ 109 trace_m(pcd, arg); \ 110 (pcd)->cpu_delta_sum = 0; \ 111 (pcd)->sample_cnt = 0; \ 112 (pcd)->plat_delta_peak = 0; \ 113 (pcd)->cpu_delta_peak = 0; \ 114 } \ 115 } while (0) 116 117 /** \brief Reads the timers and computes delta to the previous readings. 118 * 119 * If current arch delta exceeds the previous peak value, trace_m is run. 120 * \param pcd Performance counters data. 121 * \param trace_m Trace function trace_m(pcd, arg) or trace macro if a 122 * more precise line number is desired in the logs. 123 * \param arg Argument passed to trace_m as arg. 124 */ 125 #define perf_cnt_stamp(pcd, trace_m, arg) do { \ 126 uint32_t plat_ts = \ 127 (uint32_t)sof_cycle_get_64(); \ 128 uint32_t cpu_ts = \ 129 (uint32_t)perf_cnt_get_cpu_ts(); \ 130 if (plat_ts > (pcd)->plat_ts) \ 131 (pcd)->plat_delta_last = plat_ts - (pcd)->plat_ts; \ 132 else \ 133 (pcd)->plat_delta_last = UINT32_MAX - (pcd)->plat_ts \ 134 + plat_ts; \ 135 if (cpu_ts > (pcd)->cpu_ts) \ 136 (pcd)->cpu_delta_last = cpu_ts - (pcd)->cpu_ts; \ 137 else \ 138 (pcd)->cpu_delta_last = UINT32_MAX - (pcd)->cpu_ts \ 139 + cpu_ts;\ 140 if ((pcd)->plat_delta_last > (pcd)->plat_delta_peak) \ 141 (pcd)->plat_delta_peak = (pcd)->plat_delta_last; \ 142 if ((pcd)->cpu_delta_last > (pcd)->cpu_delta_peak) { \ 143 (pcd)->cpu_delta_peak = (pcd)->cpu_delta_last; \ 144 trace_m(pcd, arg); \ 145 } \ 146 } while (0) 147 148 /** 149 * For simple performance measurement and optimization in development stage, 150 * tic-toc api is provided. Performance data are traced at each tok call, 151 * to allow fast clocks usage deviation estimation. Example: 152 * 153 * \code{.c} 154 * void foo(struct comp_dev *dev) { 155 * static struct perf_cnt_data pcd; 156 * 157 * perf_tic(&pcd); 158 * bar(); 159 * perf_toc(&pcd, dev); 160 * } 161 * \endcode 162 */ 163 164 /** \brief Save start timestamp in pcd structure 165 * 166 * \param pcd Performance counters data. 167 */ 168 #define perf_tic(pcd) \ 169 perf_cnt_init(pcd) 170 171 /** \brief Save start timestamp in pcd structure 172 * 173 * \param pcd Performance counters data. 174 * \param comp Component used to get corresponding trace context. 175 */ 176 #define perf_toc(pcd, comp) do { \ 177 perf_cnt_stamp(pcd, perf_trace_null, NULL); \ 178 perf_trace_simple(pcd, trace_comp_get_tr_ctx(comp)); \ 179 } while (0) 180 181 #else 182 #define perf_cnt_clear(pcd) 183 #define perf_cnt_init(pcd) 184 #define perf_cnt_stamp(pcd, trace_m, arg) 185 #define perf_cnt_average(pcd, trace_m, arg) 186 #endif 187 188 #endif /* __SOF_LIB_PERF_CNT_H__ */ 189