1 /*
2  * Copyright (c) 2018 Intel Corporation
3  *
4  * Author: Sathish Kuttan <sathish.k.kuttan@intel.com>
5  *
6  * SPDX-License-Identifier: Apache-2.0
7  */
8 
9 /** @file
10  * @brief Intel GNA device driver
11  *
12  * Device driver implementation for Intel's
13  * Gaussian Mixture Model and Neural Network Accelerator (GNA)
14  */
15 
16 #define DT_DRV_COMPAT intel_gna
17 
18 #include <kernel.h>
19 #include <string.h>
20 #include <device.h>
21 #include <drivers/gna.h>
22 
23 #include <memory.h>
24 #include "intel_gna.h"
25 
26 #define LOG_LEVEL CONFIG_NEURAL_NET_LOG_LEVEL
27 #include <logging/log.h>
28 LOG_MODULE_REGISTER(neural_net);
29 
30 #define DEV_NAME(dev) ((dev)->name)
31 #define DEV_CFG(dev) \
32 	((const struct intel_gna_config *const)(dev)->config)
33 #define DEV_DATA(dev) \
34 	((struct intel_gna_data *const)(dev)->data)
35 
36 #if LOG_LEVEL >= LOG_LEVEL_DBG
37 static void intel_gna_regs_dump(const struct device *dev);
38 static void intel_gna_config_desc_dump(const struct device *dev);
39 #define INTEL_GNA_REGS_DUMP(dev)	intel_gna_regs_dump((dev))
40 #define INTEL_GNA_CONFIG_DESC_DUMP(dev)	intel_gna_config_desc_dump((dev))
41 #else
42 #define INTEL_GNA_REGS_DUMP(dev)
43 #define INTEL_GNA_CONFIG_DESC_DUMP(dev)
44 #endif
45 
46 #define GNA_MODEL_VIRT_BASE_DEFAULT	0
47 
48 DEVICE_DECLARE(gna);
49 static struct intel_gna_config_desc __aligned(GNA_PG_SIZE_IN_BYTES)
50 	gna_config_desc;
__aligned(GNA_PG_SIZE_IN_BYTES)51 static struct intel_gna_page_table __aligned(GNA_PG_SIZE_IN_BYTES)
52 	gna_page_table[GNA_NUM_PG_TABLES_NEEDED];
53 
54 static void intel_gna_interrupt_handler(const struct device *dev)
55 {
56 	struct intel_gna_data *const gna = DEV_DATA(dev);
57 
58 	volatile struct intel_gna_regs *regs = gna->regs;
59 	struct intel_gna_pending_resp pending_resp;
60 	struct intel_gna_pending_req pending_req;
61 
62 	/* check for generic / virtual address out of range error */
63 	if (regs->gnasts & (GNA_STS_VIRT_ADDR_OOR | GNA_STS_ERROR)) {
64 		pending_resp.response.result = GNA_RESULT_GENERIC_ERROR;
65 	}
66 
67 	/* check for parameter out of range error */
68 	if (regs->gnasts & GNA_STS_PARAM_OOR) {
69 		pending_resp.response.result =
70 			GNA_RESULT_PARAM_OUT_OF_RANGE_ERROR;
71 	}
72 
73 	/* check for output buffer full error */
74 	if (regs->gnasts & GNA_STS_BUFFER_FULL) {
75 		pending_resp.response.result =
76 			GNA_RESULT_OUTPUT_BUFFER_FULL_ERROR;
77 	}
78 
79 	/* check for scoring completion out of range error */
80 	if (regs->gnasts & GNA_STS_SCORE_COMPL) {
81 		pending_resp.response.result = GNA_RESULT_INFERENCE_COMPLETE;
82 	}
83 
84 	if (k_msgq_get(&gna->request_queue, &pending_req, K_NO_WAIT) != 0) {
85 		LOG_ERR("Pending request queue is empty");
86 	} else {
87 		SOC_DCACHE_INVALIDATE(pending_req.model->output,
88 				pending_req.output_len);
89 		/* copy output from the model buffer to applciation buffer */
90 		memcpy(pending_req.output, pending_req.model->output,
91 				pending_req.output_len);
92 		pending_resp.response.output = pending_req.output;
93 		pending_resp.response.output_len = pending_req.output_len;
94 		pending_resp.callback = pending_req.callback;
95 
96 		pending_resp.response.stats.cycles_per_sec = 200000000U;
97 		if (regs->gnasts & GNA_STS_STATS_VALID) {
98 			pending_resp.response.stats.total_cycles = regs->gnaptc;
99 			pending_resp.response.stats.stall_cycles = regs->gnasc;
100 		} else {
101 			pending_resp.response.stats.total_cycles = 0U;
102 			pending_resp.response.stats.stall_cycles = 0U;
103 		}
104 
105 		k_msgq_put(&gna->response_queue, &pending_resp, K_NO_WAIT);
106 
107 		k_work_submit(&gna->gna_work);
108 	}
109 
110 	/* clear GNA operation and disable interrupt */
111 	regs->gnactrl |= GNA_CTRL_INTR_DISABLE | GNA_CTRL_ABORT_CLEAR;
112 	gna->state = GNA_STATE_IDLE;
113 }
114 
gna_work_handler(struct k_work * work)115 static void gna_work_handler(struct k_work *work)
116 {
117 	struct intel_gna_data *gna = (struct intel_gna_data *)work;
118 	struct intel_gna_pending_resp resp;
119 
120 	while (k_msgq_get(&gna->response_queue, &resp, K_NO_WAIT) == 0) {
121 		resp.callback(&resp.response);
122 	}
123 }
124 
intel_gna_setup_page_table(void * physical,size_t size,void * virtual)125 static int intel_gna_setup_page_table(void *physical, size_t size,
126 		void *virtual)
127 {
128 	uint32_t page;
129 	uint32_t dir_index;
130 	uint32_t table_index;
131 	uint32_t virt_addr = (uint32_t)virtual;
132 	uint32_t phys_addr = (uint32_t)physical;
133 
134 	LOG_DBG("physical %p size %u virtual %p", physical, size, virtual);
135 
136 	if (((phys_addr + size - L2_SRAM_BASE) > L2_SRAM_SIZE) ||
137 			(phys_addr < L2_SRAM_BASE)) {
138 		LOG_ERR("model at %p of size %u exceeds L2 SRAM space",
139 				physical, size);
140 		return -EINVAL;
141 	}
142 
143 	dir_index = GNA_VA_PG_DIR(virtual);
144 	table_index = GNA_VA_PG_TABLE(virtual);
145 
146 	if (dir_index >= GNA_NUM_PG_TABLES_NEEDED) {
147 		LOG_ERR("virtual addr %p is in page dir %u (max %u)",
148 				virtual, dir_index,
149 				(uint32_t)GNA_NUM_PG_TABLES_NEEDED);
150 		return -EINVAL;
151 	}
152 
153 	for (page = 0U; page < GNA_NUM_PAGES(size); page++) {
154 		dir_index = GNA_VA_PG_DIR(virt_addr);
155 		table_index = GNA_VA_PG_TABLE(virt_addr);
156 		gna_page_table[dir_index].entry[table_index] =
157 			GNA_PG_TABLE_ENTRY(phys_addr);
158 
159 		LOG_DBG("di %u tb %u @ %p va %08x pa %08x ent %08x",
160 				dir_index, table_index,
161 				&gna_page_table[dir_index].entry[table_index],
162 				virt_addr, phys_addr,
163 				gna_page_table[dir_index].entry[table_index]);
164 		phys_addr += GNA_PG_SIZE_IN_BYTES;
165 		virt_addr += GNA_PG_SIZE_IN_BYTES;
166 	}
167 
168 	return 0;
169 }
170 
intel_gna_initialize(const struct device * dev)171 static int intel_gna_initialize(const struct device *dev)
172 {
173 	struct intel_gna_data *const gna = DEV_DATA(dev);
174 	uint32_t page_dir_entry;
175 
176 	k_msgq_init(&gna->request_queue, (char *)gna->requests,
177 			sizeof(struct intel_gna_pending_req),
178 			GNA_REQUEST_QUEUE_LEN);
179 
180 	k_msgq_init(&gna->response_queue, (char *)gna->responses,
181 			sizeof(struct intel_gna_pending_resp),
182 			GNA_REQUEST_QUEUE_LEN);
183 
184 	k_mem_slab_init(&gna->model_slab, (char *)gna->models,
185 			sizeof(struct intel_gna_model), GNA_MAX_NUM_MODELS);
186 
187 	k_work_init(&gna->gna_work, gna_work_handler);
188 
189 	/* initialize the configuration descriptor's page directory table */
190 	for (int page = 0; page < GNA_CONFIG_DESC_PG_DIR_SIZE; page++) {
191 		page_dir_entry = (page < GNA_NUM_PG_TABLES_NEEDED) ?
192 			GNA_PG_DIR_ENTRY(&gna_page_table[page]) : (uint32_t)-1;
193 		gna_config_desc.pagedir[page] = page_dir_entry;
194 		LOG_DBG("%s: page %u pagetable %08x",
195 			DEV_NAME(dev), page, gna_config_desc.pagedir[page]);
196 	}
197 	gna_config_desc.vamaxaddr = GNA_ADDRESSABLE_MEM_SIZE;
198 	LOG_DBG("%s: max virtual address %08x",
199 			DEV_NAME(dev), gna_config_desc.vamaxaddr);
200 
201 	/* flush cache */
202 	SOC_DCACHE_FLUSH((void *)&gna_config_desc, sizeof(gna_config_desc));
203 
204 	LOG_INF("%s: initialized (max %u models & max %u pending requests)",
205 			DEV_NAME(dev), GNA_MAX_NUM_MODELS,
206 			GNA_REQUEST_QUEUE_LEN);
207 	LOG_INF("%s: max addressable memory %u MB",
208 			DEV_NAME(dev), GNA_ADDRESSABLE_MEM_SIZE >> 20);
209 	LOG_INF("%s: %u page table(s) at %p and %u bytes",
210 			DEV_NAME(dev), (uint32_t)GNA_NUM_PG_TABLES_NEEDED,
211 			gna_page_table, sizeof(gna_page_table));
212 	LOG_INF("%s: configuration descriptor at %p",
213 			DEV_NAME(dev), &gna_config_desc);
214 
215 	/* register interrupt handler */
216 	IRQ_CONNECT(DT_INST_IRQN(0), DT_INST_IRQ(0, priority),
217 			intel_gna_interrupt_handler, DEVICE_GET(gna), 0);
218 	/* enable interrupt */
219 	irq_enable(INTEL_GNA_IRQ_ID);
220 
221 	gna->state = GNA_STATE_INITIALIZED;
222 	return 0;
223 }
224 
intel_gna_configure(const struct device * dev,struct gna_config * cfg)225 static int intel_gna_configure(const struct device *dev,
226 			       struct gna_config *cfg)
227 {
228 	struct intel_gna_data *const gna = DEV_DATA(dev);
229 	volatile struct intel_gna_regs *regs = gna->regs;
230 
231 	if (gna->state != GNA_STATE_INITIALIZED) {
232 		LOG_ERR("Configuration attempt in invalid state (%u)",
233 			gna->state);
234 		return -EINVAL;
235 	}
236 
237 	if (cfg == NULL) {
238 		LOG_ERR("Config pointer is NULL");
239 		return -EINVAL;
240 	}
241 
242 	gna->config = *cfg;
243 
244 	regs->gnactrl |= GNA_CTRL_OPER_MODEL_XNN |
245 		GNA_CTRL_ERR_INTR_ENABLE | GNA_CTRL_COMPL_INTR_ENABLE;
246 
247 	switch (CONFIG_INTEL_GNA_POWER_MODE) {
248 	case GNA_POWER_MODE_ALWAYS_ON:
249 		regs->gnactrl |= GNA_CTRL_PM_OVRIDE_CLK_ON |
250 			GNA_CTRL_PM_OVRIDE_PWR_ON;
251 		break;
252 
253 	case GNA_POWER_MODE_CLOCK_GATED:
254 		regs->gnactrl |= GNA_CTRL_PM_OVRIDE_PWR_ON;
255 		break;
256 
257 	case GNA_POWER_MODE_POWER_GATED:
258 	case GNA_POWER_MODE_ALWAYS_OFF:
259 		break;
260 
261 	default:
262 		LOG_ERR("Invalid config CONFIG_INTEL_GNA_POWER_MODE (%u)",
263 				CONFIG_INTEL_GNA_POWER_MODE);
264 		break;
265 	}
266 
267 	/* assign the configuration descriptor address as the base */
268 	regs->gnadesbase = GNA_PHYS_ADDR_TO_PAGE(&gna_config_desc);
269 
270 	INTEL_GNA_CONFIG_DESC_DUMP(dev);
271 
272 	LOG_INF("Device %s (version %u.%u) configured with power mode %u",
273 			DEV_NAME(dev), regs->gnaversion >> 1,
274 			(uint32_t)(regs->gnaversion & BIT(0)),
275 			CONFIG_INTEL_GNA_POWER_MODE);
276 
277 	gna->state = GNA_STATE_IDLE;
278 	return 0;
279 }
280 
intel_gna_register_model(const struct device * dev,struct gna_model_info * model,void ** model_handle)281 static int intel_gna_register_model(const struct device *dev,
282 				    struct gna_model_info *model,
283 				    void **model_handle)
284 {
285 	struct intel_gna_data *const gna = DEV_DATA(dev);
286 	struct intel_gna_model *gna_model;
287 	struct gna_model_header *header;
288 	uint32_t ro_size, rw_size = 0;
289 	void *virtual_base;
290 	void *ro_region;
291 
292 	if ((gna->state != GNA_STATE_IDLE) &&
293 			(gna->state != GNA_STATE_ACTIVE)) {
294 		LOG_ERR("Invalid state (%u)", gna->state);
295 		return -EINVAL;
296 	}
297 
298 	if ((model_handle == NULL) || (model == NULL)) {
299 		LOG_ERR("model and/or model_handle is NULL");
300 		return -EINVAL;
301 	}
302 
303 	if ((model->header == NULL) || (model->rw_region == NULL)) {
304 		LOG_ERR("model header / rw_region is/are NULL");
305 		return -EINVAL;
306 	}
307 
308 	/* check for 64B alignment */
309 	if (((uint32_t)model->rw_region & BIT_MASK(6)) ||
310 			((uint32_t)model->ro_region & BIT_MASK(6))) {
311 		LOG_ERR("rw_region / ro_region not aligned to 64B");
312 		return -EINVAL;
313 	}
314 
315 	if (k_mem_slab_alloc(&gna->model_slab, (void **)&gna_model,
316 				K_NO_WAIT)) {
317 		LOG_ERR("No memory to register model");
318 		return -ENOMEM;
319 	}
320 
321 	LOG_INF("model header: %p rw: %p ro: %p", model->header,
322 			model->rw_region, model->ro_region);
323 
324 	header = model->header;
325 	virtual_base = (void *)GNA_MODEL_VIRT_BASE_DEFAULT;
326 
327 	LOG_INF("model_size: %u rw_region_size: %u", header->model_size,
328 			header->rw_region_size);
329 
330 	/* setup page table entries for RW region */
331 	if (model->rw_region && header->rw_region_size) {
332 		/* calculate layer descriptor size */
333 		rw_size = header->layer_count *
334 			sizeof(struct intel_gna_layer_desc);
335 		/* round up to page boundary */
336 		rw_size = GNA_PAGES_TO_BYTES(GNA_NUM_PAGES(rw_size));
337 		/* add the input rw_region_size to get total rw_region_size */
338 		rw_size += header->rw_region_size;
339 
340 		intel_gna_setup_page_table(model->rw_region, rw_size,
341 				virtual_base);
342 		SOC_DCACHE_FLUSH(model->rw_region, rw_size);
343 	}
344 
345 	if (model->ro_region == NULL) {
346 		ro_region = (void *)((uint32_t)model->rw_region + rw_size);
347 	} else {
348 		ro_region = model->ro_region;
349 	}
350 
351 	ro_size = header->model_size - rw_size;
352 
353 	LOG_INF("rw_region: %p (%u) ro_region: %p (%u)",
354 			model->rw_region, rw_size, ro_region, ro_size);
355 
356 	/* setup page table entries for RO region */
357 	intel_gna_setup_page_table(ro_region, ro_size,
358 			(void *)((uint32_t)virtual_base + rw_size));
359 
360 	SOC_DCACHE_FLUSH(ro_region, ro_size);
361 	SOC_DCACHE_FLUSH(gna_page_table, sizeof(gna_page_table));
362 
363 	/* copy the model pointers */
364 	gna_model->model = *model;
365 	gna_model->vabase = virtual_base;
366 	gna_model->input = (void *)((uint32_t)model->rw_region +
367 			*(uint32_t *)((uint32_t)model->rw_region +
368 				header->input_ptr_offset));
369 	gna_model->output = (void *)((uint32_t)model->rw_region +
370 			*(uint32_t *)((uint32_t)model->rw_region +
371 				header->output_ptr_offset));
372 	gna_model->registered = true;
373 
374 	LOG_INF("model->rw_region: %p", model->rw_region);
375 	LOG_INF("input offset: %u",
376 		*(uint32_t *)((uint32_t)model->rw_region + header->input_ptr_offset));
377 	LOG_INF("gna_model->input: %p", gna_model->input);
378 	LOG_INF("output offset: %u",
379 		*(uint32_t *)((uint32_t)model->rw_region +
380 			header->output_ptr_offset));
381 	LOG_INF("gna_model->output: %p", gna_model->output);
382 	LOG_DBG("returning model handle: %p", gna_model);
383 	*model_handle = (void *)gna_model;
384 	return 0;
385 }
386 
intel_gna_deregister_model(const struct device * dev,void * model_handle)387 static int intel_gna_deregister_model(const struct device *dev,
388 				      void *model_handle)
389 {
390 	struct intel_gna_data *const gna = DEV_DATA(dev);
391 	struct intel_gna_model *gna_model;
392 
393 	if (model_handle == NULL) {
394 		LOG_ERR("model_handle is NULL");
395 		return -EINVAL;
396 	}
397 
398 	gna_model = (struct intel_gna_model *)model_handle;
399 	gna_model->registered = false;
400 	k_mem_slab_free(&gna->model_slab, &model_handle);
401 
402 	return 0;
403 }
404 
intel_gna_infer(const struct device * dev,struct gna_inference_req * req,gna_callback callback)405 static int intel_gna_infer(const struct device *dev,
406 			   struct gna_inference_req *req,
407 			   gna_callback callback)
408 {
409 	struct intel_gna_data *const gna = DEV_DATA(dev);
410 	volatile struct intel_gna_regs *regs = gna->regs;
411 	struct intel_gna_pending_req pending_req;
412 	struct gna_model_header *header;
413 	struct intel_gna_model *handle;
414 	struct gna_model_info *model;
415 	size_t input_size;
416 	int ret;
417 
418 	LOG_DBG("device %p", dev);
419 	if (req == NULL) {
420 		LOG_ERR("Invalid request pointer");
421 		return -EINVAL;
422 	}
423 
424 	if (callback == NULL) {
425 		LOG_ERR("Invalid callback function pointer");
426 		return -EINVAL;
427 	}
428 
429 	handle = (struct intel_gna_model *)req->model_handle;
430 
431 	if (handle->registered != true) {
432 		LOG_ERR("Model is not registered. Handle %p", handle);
433 		return -EINVAL;
434 	}
435 
436 	if (req->input == NULL) {
437 		LOG_ERR("Invalid input buffer");
438 		return -EINVAL;
439 	}
440 
441 	if (req->output == NULL) {
442 		LOG_ERR("Invalid output buffer");
443 		return -EINVAL;
444 	}
445 
446 	model = &handle->model;
447 	header = model->header;
448 	input_size = header->bytes_per_input * header->num_input_nodes;
449 
450 	pending_req.model = handle;
451 	pending_req.output = req->output;
452 	pending_req.output_len = header->bytes_per_output *
453 		header->num_output_nodes;
454 	pending_req.callback = callback;
455 
456 	ret = k_msgq_put(&gna->request_queue, &pending_req, K_NO_WAIT);
457 	if (ret) {
458 		LOG_ERR("Unable to queue request (code %d)", ret);
459 		return ret;
460 	}
461 
462 	if (gna->state != GNA_STATE_IDLE) {
463 		/* multiple pending requests are not yet supported */
464 		return -EBUSY;
465 	}
466 
467 	/* copy input */
468 	memcpy(handle->input, req->input, input_size);
469 	SOC_DCACHE_FLUSH(handle->input, input_size);
470 
471 	/* assign layer descriptor base address to configuration descriptor */
472 	gna_config_desc.labase = (uint32_t)handle->vabase;
473 	gna_config_desc.lacnt = (uint16_t)header->layer_count;
474 	SOC_DCACHE_FLUSH(&gna_config_desc, sizeof(gna_config_desc));
475 
476 	gna->state = GNA_STATE_ACTIVE;
477 	regs->gnactrl = (regs->gnactrl & ~GNA_CTRL_INTR_DISABLE) |
478 		GNA_CTRL_ACCEL_START | GNA_CTRL_STATS_ENABLE_STALL;
479 
480 	return 0;
481 }
482 
483 #if LOG_LEVEL >= LOG_LEVEL_DBG
intel_gna_regs_dump(const struct device * dev)484 static void intel_gna_regs_dump(const struct device *dev)
485 {
486 	struct intel_gna_data *const gna = DEV_DATA(dev);
487 	volatile struct intel_gna_regs *regs = gna->regs;
488 
489 	LOG_DBG("gnasts     :%08x", regs->gnasts);
490 	LOG_DBG("gnactrl    :%08x", regs->gnactrl);
491 	LOG_DBG("gnamctl    :%08x", regs->gnamctl);
492 	LOG_DBG("gnaptc     :%08x", regs->gnaptc);
493 	LOG_DBG("gnasc      :%08x", regs->gnasc);
494 	LOG_DBG("gnaisi     :%08x", regs->gnaisi);
495 	LOG_DBG("gnais_low  :%08x", regs->gnais_low);
496 	LOG_DBG("gnais_high :%08x", regs->gnais_high);
497 	LOG_DBG("gnabp_low  :%08x", regs->gnabp_low);
498 	LOG_DBG("gnabp_high :%08x", regs->gnabp_high);
499 	LOG_DBG("gnadesbase :%08x", regs->gnadesbase);
500 	LOG_DBG("gnaibuffs  :%08x", regs->gnaibuffs);
501 	LOG_DBG("ovrcfgctl  :%08x", regs->gnaibuffs);
502 	LOG_DBG("gnaversion :%08x", regs->gnaversion);
503 }
504 
intel_gna_config_desc_dump(const struct device * dev)505 static void intel_gna_config_desc_dump(const struct device *dev)
506 {
507 	struct intel_gna_data *const gna = DEV_DATA(dev);
508 	volatile struct intel_gna_regs *regs = gna->regs;
509 
510 	LOG_DBG("gnadesbase :%08x", regs->gnadesbase);
511 	LOG_DBG("labase     :%08x", gna_config_desc.labase);
512 	LOG_DBG("lacnt      :%u", gna_config_desc.lacnt);
513 }
514 #endif
515 
516 static const struct gna_driver_api gna_driver_api = {
517 	.configure		= intel_gna_configure,
518 	.register_model		= intel_gna_register_model,
519 	.deregister_model	= intel_gna_deregister_model,
520 	.infer			= intel_gna_infer,
521 };
522 
523 static struct intel_gna_data intel_gna_driver_data = {
524 	.regs = (volatile struct intel_gna_regs *)DT_INST_REG_ADDR(0),
525 };
526 
527 DEVICE_DT_INST_DEFINE(0, intel_gna_initialize,
528 		      NULL,
529 		      (void *)&intel_gna_driver_data, NULL,
530 		      POST_KERNEL, CONFIG_INTEL_GNA_INIT_PRIORITY,
531 		      &gna_driver_api);
532