1 // SPDX-License-Identifier: BSD-3-Clause
2 //
3 // Copyright(c) 2017 Intel Corporation. All rights reserved.
4 //
5 // Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
6 
7 /*
8  * System Agent - Simple FW Monitor that can notify host drivers in the event
9  * of any FW errors. The SA checks if the DSP is still responsive and verifies
10  * the stability of the system by checking time elapsed between every timer
11  * tick. If the core exceeds the threshold by over 5% then the SA will emit
12  * error trace. However if it will be exceeded by over 100% the panic will be
13  * called.
14  */
15 
16 #include <rtos/timer.h>
17 #include <sof/lib/agent.h>
18 #include <rtos/alloc.h>
19 #include <rtos/clk.h>
20 #include <sof/lib/memory.h>
21 #include <sof/lib/uuid.h>
22 #include <rtos/panic.h>
23 #include <sof/platform.h>
24 #include <sof/schedule/ll_schedule.h>
25 #include <sof/schedule/schedule.h>
26 #include <rtos/task.h>
27 #include <rtos/sof.h>
28 #include <sof/trace/trace.h>
29 #include <ipc/topology.h>
30 #include <ipc/trace.h>
31 #include <user/trace.h>
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <rtos/kernel.h>
37 
38 LOG_MODULE_REGISTER(sa, CONFIG_SOF_LOG_LEVEL);
39 
40 /* 5276b491-5b64-464e-8984-dc228ef9e6a1 */
41 DECLARE_SOF_UUID("sa", sa_uuid, 0x5276b491, 0x5b64, 0x464e,
42 		 0x89, 0x84, 0xdc, 0x22, 0x8e, 0xf9, 0xe6, 0xa1);
43 
44 DECLARE_TR_CTX(sa_tr, SOF_UUID(sa_uuid), LOG_LEVEL_INFO);
45 
46 /* c63c4e75-8f61-4420-9319-1395932efa9e */
47 DECLARE_SOF_UUID("agent-work", agent_work_task_uuid, 0xc63c4e75, 0x8f61, 0x4420,
48 		 0x93, 0x19, 0x13, 0x95, 0x93, 0x2e, 0xfa, 0x9e);
49 
50 #if CONFIG_PERFORMANCE_COUNTERS
perf_sa_trace(struct perf_cnt_data * pcd,int ignored)51 static void perf_sa_trace(struct perf_cnt_data *pcd, int ignored)
52 {
53 	tr_info(&sa_tr, "perf sys_load peak plat %u cpu %u",
54 		(uint32_t)((pcd)->plat_delta_peak),
55 		(uint32_t)((pcd)->cpu_delta_peak));
56 }
57 
perf_avg_sa_trace(struct perf_cnt_data * pcd,int ignored)58 static void perf_avg_sa_trace(struct perf_cnt_data *pcd, int ignored)
59 {
60 	tr_info(&sa_tr, "perf sys_load cpu avg %u (current peak %u)",
61 		(uint32_t)((pcd)->cpu_delta_sum),
62 		(uint32_t)((pcd)->cpu_delta_peak));
63 }
64 
65 #endif
66 
validate(void * data)67 static enum task_state validate(void *data)
68 {
69 	struct sa *sa = data;
70 	uint64_t current;
71 	uint64_t delta;
72 
73 	current = sof_cycle_get_64();
74 	delta = current - sa->last_check;
75 
76 	perf_cnt_stamp(&sa->pcd, perf_sa_trace, 0 /* ignored */);
77 	perf_cnt_average(&sa->pcd, perf_avg_sa_trace, 0 /* ignored */);
78 
79 #if CONFIG_AGENT_PANIC_ON_DELAY
80 	/* panic timeout */
81 	if (sa->panic_on_delay && delta > sa->panic_timeout)
82 		sof_panic(SOF_IPC_PANIC_IDLE);
83 #endif
84 
85 	/* warning timeout */
86 	if (delta > sa->warn_timeout) {
87 		if (delta > UINT_MAX)
88 			tr_warn(&sa_tr, "validate(), ll drift detected, delta > %u", UINT_MAX);
89 		else
90 			tr_warn(&sa_tr, "validate(), ll drift detected, delta = %u",
91 				(unsigned int)delta);
92 	}
93 
94 	/* update last_check to current */
95 	sa->last_check = current;
96 
97 	return SOF_TASK_STATE_RESCHEDULE;
98 }
99 
sa_init(struct sof * sof,uint64_t timeout)100 void sa_init(struct sof *sof, uint64_t timeout)
101 {
102 	uint64_t ticks;
103 
104 	if (timeout > UINT_MAX)
105 		tr_warn(&sa_tr, "sa_init(), timeout > %u", UINT_MAX);
106 	else
107 		tr_info(&sa_tr, "sa_init(), timeout = %u", (unsigned int)timeout);
108 
109 	sof->sa = rzalloc(SOF_MEM_ZONE_SYS_SHARED, 0, SOF_MEM_CAPS_RAM, sizeof(*sof->sa));
110 
111 	/* set default timeouts */
112 	ticks = k_us_to_cyc_ceil64(timeout);
113 
114 	/* TODO: change values after minimal drifts will be assured */
115 	sof->sa->panic_timeout = 2 * ticks;	/* 100% delay */
116 	sof->sa->warn_timeout = ticks + ticks / 20;	/* 5% delay */
117 
118 	atomic_init(&sof->sa->panic_cnt, 0);
119 	sof->sa->panic_on_delay = true;
120 
121 	if (ticks > UINT_MAX || sof->sa->warn_timeout > UINT_MAX ||
122 	    sof->sa->panic_timeout > UINT_MAX)
123 		tr_info(&sa_tr,
124 			"sa_init(), some of the values are > %u", UINT_MAX);
125 	else
126 		tr_info(&sa_tr,
127 			"sa_init(), ticks = %u, sof->sa->warn_timeout = %u, sof->sa->panic_timeout = %u",
128 			(unsigned int)ticks, (unsigned int)sof->sa->warn_timeout,
129 			(unsigned int)sof->sa->panic_timeout);
130 
131 	schedule_task_init_ll(&sof->sa->work, SOF_UUID(agent_work_task_uuid),
132 			      SOF_SCHEDULE_LL_TIMER,
133 			      SOF_TASK_PRI_HIGH, validate, sof->sa, 0, 0);
134 
135 	schedule_task(&sof->sa->work, 0, timeout);
136 
137 	/* set last check time to now to give time for boot completion */
138 	sof->sa->last_check = sof_cycle_get_64();
139 
140 }
141 
sa_exit(struct sof * sof)142 void sa_exit(struct sof *sof)
143 {
144 	schedule_task_cancel(&sof->sa->work);
145 }
146