1 // SPDX-License-Identifier: BSD-3-Clause
2 //
3 // Copyright(c) 2017 Intel Corporation. All rights reserved.
4 //
5 // Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
6
7 /*
8 * System Agent - Simple FW Monitor that can notify host drivers in the event
9 * of any FW errors. The SA checks if the DSP is still responsive and verifies
10 * the stability of the system by checking time elapsed between every timer
11 * tick. If the core exceeds the threshold by over 5% then the SA will emit
12 * error trace. However if it will be exceeded by over 100% the panic will be
13 * called.
14 */
15
16 #include <rtos/timer.h>
17 #include <sof/lib/agent.h>
18 #include <rtos/alloc.h>
19 #include <rtos/clk.h>
20 #include <sof/lib/memory.h>
21 #include <sof/lib/uuid.h>
22 #include <rtos/panic.h>
23 #include <sof/platform.h>
24 #include <sof/schedule/ll_schedule.h>
25 #include <sof/schedule/schedule.h>
26 #include <rtos/task.h>
27 #include <rtos/sof.h>
28 #include <sof/trace/trace.h>
29 #include <ipc/topology.h>
30 #include <ipc/trace.h>
31 #include <user/trace.h>
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <rtos/kernel.h>
37
38 LOG_MODULE_REGISTER(sa, CONFIG_SOF_LOG_LEVEL);
39
40 /* 5276b491-5b64-464e-8984-dc228ef9e6a1 */
41 DECLARE_SOF_UUID("sa", sa_uuid, 0x5276b491, 0x5b64, 0x464e,
42 0x89, 0x84, 0xdc, 0x22, 0x8e, 0xf9, 0xe6, 0xa1);
43
44 DECLARE_TR_CTX(sa_tr, SOF_UUID(sa_uuid), LOG_LEVEL_INFO);
45
46 /* c63c4e75-8f61-4420-9319-1395932efa9e */
47 DECLARE_SOF_UUID("agent-work", agent_work_task_uuid, 0xc63c4e75, 0x8f61, 0x4420,
48 0x93, 0x19, 0x13, 0x95, 0x93, 0x2e, 0xfa, 0x9e);
49
50 #if CONFIG_PERFORMANCE_COUNTERS
perf_sa_trace(struct perf_cnt_data * pcd,int ignored)51 static void perf_sa_trace(struct perf_cnt_data *pcd, int ignored)
52 {
53 tr_info(&sa_tr, "perf sys_load peak plat %u cpu %u",
54 (uint32_t)((pcd)->plat_delta_peak),
55 (uint32_t)((pcd)->cpu_delta_peak));
56 }
57
perf_avg_sa_trace(struct perf_cnt_data * pcd,int ignored)58 static void perf_avg_sa_trace(struct perf_cnt_data *pcd, int ignored)
59 {
60 tr_info(&sa_tr, "perf sys_load cpu avg %u (current peak %u)",
61 (uint32_t)((pcd)->cpu_delta_sum),
62 (uint32_t)((pcd)->cpu_delta_peak));
63 }
64
65 #endif
66
validate(void * data)67 static enum task_state validate(void *data)
68 {
69 struct sa *sa = data;
70 uint64_t current;
71 uint64_t delta;
72
73 current = sof_cycle_get_64();
74 delta = current - sa->last_check;
75
76 perf_cnt_stamp(&sa->pcd, perf_sa_trace, 0 /* ignored */);
77 perf_cnt_average(&sa->pcd, perf_avg_sa_trace, 0 /* ignored */);
78
79 #if CONFIG_AGENT_PANIC_ON_DELAY
80 /* panic timeout */
81 if (sa->panic_on_delay && delta > sa->panic_timeout)
82 sof_panic(SOF_IPC_PANIC_IDLE);
83 #endif
84
85 /* warning timeout */
86 if (delta > sa->warn_timeout) {
87 if (delta > UINT_MAX)
88 tr_warn(&sa_tr, "validate(), ll drift detected, delta > %u", UINT_MAX);
89 else
90 tr_warn(&sa_tr, "validate(), ll drift detected, delta = %u",
91 (unsigned int)delta);
92 }
93
94 /* update last_check to current */
95 sa->last_check = current;
96
97 return SOF_TASK_STATE_RESCHEDULE;
98 }
99
sa_init(struct sof * sof,uint64_t timeout)100 void sa_init(struct sof *sof, uint64_t timeout)
101 {
102 uint64_t ticks;
103
104 if (timeout > UINT_MAX)
105 tr_warn(&sa_tr, "sa_init(), timeout > %u", UINT_MAX);
106 else
107 tr_info(&sa_tr, "sa_init(), timeout = %u", (unsigned int)timeout);
108
109 sof->sa = rzalloc(SOF_MEM_ZONE_SYS_SHARED, 0, SOF_MEM_CAPS_RAM, sizeof(*sof->sa));
110
111 /* set default timeouts */
112 ticks = k_us_to_cyc_ceil64(timeout);
113
114 /* TODO: change values after minimal drifts will be assured */
115 sof->sa->panic_timeout = 2 * ticks; /* 100% delay */
116 sof->sa->warn_timeout = ticks + ticks / 20; /* 5% delay */
117
118 atomic_init(&sof->sa->panic_cnt, 0);
119 sof->sa->panic_on_delay = true;
120
121 if (ticks > UINT_MAX || sof->sa->warn_timeout > UINT_MAX ||
122 sof->sa->panic_timeout > UINT_MAX)
123 tr_info(&sa_tr,
124 "sa_init(), some of the values are > %u", UINT_MAX);
125 else
126 tr_info(&sa_tr,
127 "sa_init(), ticks = %u, sof->sa->warn_timeout = %u, sof->sa->panic_timeout = %u",
128 (unsigned int)ticks, (unsigned int)sof->sa->warn_timeout,
129 (unsigned int)sof->sa->panic_timeout);
130
131 schedule_task_init_ll(&sof->sa->work, SOF_UUID(agent_work_task_uuid),
132 SOF_SCHEDULE_LL_TIMER,
133 SOF_TASK_PRI_HIGH, validate, sof->sa, 0, 0);
134
135 schedule_task(&sof->sa->work, 0, timeout);
136
137 /* set last check time to now to give time for boot completion */
138 sof->sa->last_check = sof_cycle_get_64();
139
140 }
141
sa_exit(struct sof * sof)142 void sa_exit(struct sof *sof)
143 {
144 schedule_task_cancel(&sof->sa->work);
145 }
146