1 /*
2 * Copyright (c) 2018 Intel Corporation
3 *
4 * Author: Sathish Kuttan <sathish.k.kuttan@intel.com>
5 *
6 * SPDX-License-Identifier: Apache-2.0
7 */
8
9 /** @file
10 * @brief Intel GNA device driver
11 *
12 * Device driver implementation for Intel's
13 * Gaussian Mixture Model and Neural Network Accelerator (GNA)
14 */
15
16 #define DT_DRV_COMPAT intel_gna
17
18 #include <zephyr/kernel.h>
19 #include <string.h>
20 #include <zephyr/device.h>
21 #include <zephyr/drivers/gna.h>
22
23 #include <memory.h>
24 #include "intel_gna.h"
25
26 #define LOG_LEVEL CONFIG_NEURAL_NET_LOG_LEVEL
27 #include <zephyr/logging/log.h>
28 #include <zephyr/irq.h>
29 LOG_MODULE_REGISTER(neural_net);
30
31 #if LOG_LEVEL >= LOG_LEVEL_DBG
32 static void intel_gna_regs_dump(const struct device *dev);
33 static void intel_gna_config_desc_dump(const struct device *dev);
34 #define INTEL_GNA_REGS_DUMP(dev) intel_gna_regs_dump((dev))
35 #define INTEL_GNA_CONFIG_DESC_DUMP(dev) intel_gna_config_desc_dump((dev))
36 #else
37 #define INTEL_GNA_REGS_DUMP(dev)
38 #define INTEL_GNA_CONFIG_DESC_DUMP(dev)
39 #endif
40
41 #define GNA_MODEL_VIRT_BASE_DEFAULT 0
42
43 DEVICE_DECLARE(gna);
44 static struct intel_gna_config_desc __aligned(GNA_PG_SIZE_IN_BYTES)
45 gna_config_desc;
__aligned(GNA_PG_SIZE_IN_BYTES)46 static struct intel_gna_page_table __aligned(GNA_PG_SIZE_IN_BYTES)
47 gna_page_table[GNA_NUM_PG_TABLES_NEEDED];
48
49 static void intel_gna_interrupt_handler(const struct device *dev)
50 {
51 struct intel_gna_data *const gna = dev->data;
52
53 volatile struct intel_gna_regs *regs = gna->regs;
54 struct intel_gna_pending_resp pending_resp;
55 struct intel_gna_pending_req pending_req;
56
57 /* check for generic / virtual address out of range error */
58 if (regs->gnasts & (GNA_STS_VIRT_ADDR_OOR | GNA_STS_ERROR)) {
59 pending_resp.response.result = GNA_RESULT_GENERIC_ERROR;
60 }
61
62 /* check for parameter out of range error */
63 if (regs->gnasts & GNA_STS_PARAM_OOR) {
64 pending_resp.response.result =
65 GNA_RESULT_PARAM_OUT_OF_RANGE_ERROR;
66 }
67
68 /* check for output buffer full error */
69 if (regs->gnasts & GNA_STS_BUFFER_FULL) {
70 pending_resp.response.result =
71 GNA_RESULT_OUTPUT_BUFFER_FULL_ERROR;
72 }
73
74 /* check for scoring completion out of range error */
75 if (regs->gnasts & GNA_STS_SCORE_COMPL) {
76 pending_resp.response.result = GNA_RESULT_INFERENCE_COMPLETE;
77 }
78
79 if (k_msgq_get(&gna->request_queue, &pending_req, K_NO_WAIT) != 0) {
80 LOG_ERR("Pending request queue is empty");
81 } else {
82 sys_cache_data_invd_range(pending_req.model->output,
83 pending_req.output_len);
84 /* copy output from the model buffer to application buffer */
85 memcpy(pending_req.output, pending_req.model->output,
86 pending_req.output_len);
87 pending_resp.response.output = pending_req.output;
88 pending_resp.response.output_len = pending_req.output_len;
89 pending_resp.callback = pending_req.callback;
90
91 pending_resp.response.stats.cycles_per_sec = 200000000U;
92 if (regs->gnasts & GNA_STS_STATS_VALID) {
93 pending_resp.response.stats.total_cycles = regs->gnaptc;
94 pending_resp.response.stats.stall_cycles = regs->gnasc;
95 } else {
96 pending_resp.response.stats.total_cycles = 0U;
97 pending_resp.response.stats.stall_cycles = 0U;
98 }
99
100 k_msgq_put(&gna->response_queue, &pending_resp, K_NO_WAIT);
101
102 k_work_submit(&gna->gna_work);
103 }
104
105 /* clear GNA operation and disable interrupt */
106 regs->gnactrl |= GNA_CTRL_INTR_DISABLE | GNA_CTRL_ABORT_CLEAR;
107 gna->state = GNA_STATE_IDLE;
108 }
109
gna_work_handler(struct k_work * work)110 static void gna_work_handler(struct k_work *work)
111 {
112 struct intel_gna_data *gna = (struct intel_gna_data *)work;
113 struct intel_gna_pending_resp resp;
114
115 while (k_msgq_get(&gna->response_queue, &resp, K_NO_WAIT) == 0) {
116 resp.callback(&resp.response);
117 }
118 }
119
intel_gna_setup_page_table(void * physical,size_t size,void * virtual)120 static int intel_gna_setup_page_table(void *physical, size_t size,
121 void *virtual)
122 {
123 uint32_t page;
124 uint32_t dir_index;
125 uint32_t table_index;
126 uint32_t virt_addr = (uint32_t)virtual;
127 uint32_t phys_addr = (uint32_t)physical;
128
129 LOG_DBG("physical %p size %u virtual %p", physical, size, virtual);
130
131 if (((phys_addr + size - L2_SRAM_BASE) > L2_SRAM_SIZE) ||
132 (phys_addr < L2_SRAM_BASE)) {
133 LOG_ERR("model at %p of size %u exceeds L2 SRAM space",
134 physical, size);
135 return -EINVAL;
136 }
137
138 dir_index = GNA_VA_PG_DIR(virtual);
139 table_index = GNA_VA_PG_TABLE(virtual);
140
141 if (dir_index >= GNA_NUM_PG_TABLES_NEEDED) {
142 LOG_ERR("virtual addr %p is in page dir %u (max %u)",
143 virtual, dir_index,
144 (uint32_t)GNA_NUM_PG_TABLES_NEEDED);
145 return -EINVAL;
146 }
147
148 for (page = 0U; page < GNA_NUM_PAGES(size); page++) {
149 dir_index = GNA_VA_PG_DIR(virt_addr);
150 table_index = GNA_VA_PG_TABLE(virt_addr);
151 gna_page_table[dir_index].entry[table_index] =
152 GNA_PG_TABLE_ENTRY(phys_addr);
153
154 LOG_DBG("di %u tb %u @ %p va %08x pa %08x ent %08x",
155 dir_index, table_index,
156 &gna_page_table[dir_index].entry[table_index],
157 virt_addr, phys_addr,
158 gna_page_table[dir_index].entry[table_index]);
159 phys_addr += GNA_PG_SIZE_IN_BYTES;
160 virt_addr += GNA_PG_SIZE_IN_BYTES;
161 }
162
163 return 0;
164 }
165
intel_gna_initialize(const struct device * dev)166 static int intel_gna_initialize(const struct device *dev)
167 {
168 struct intel_gna_data *const gna = dev->data;
169 uint32_t page_dir_entry;
170
171 k_msgq_init(&gna->request_queue, (char *)gna->requests,
172 sizeof(struct intel_gna_pending_req),
173 GNA_REQUEST_QUEUE_LEN);
174
175 k_msgq_init(&gna->response_queue, (char *)gna->responses,
176 sizeof(struct intel_gna_pending_resp),
177 GNA_REQUEST_QUEUE_LEN);
178
179 k_mem_slab_init(&gna->model_slab, (char *)gna->models,
180 sizeof(struct intel_gna_model), GNA_MAX_NUM_MODELS);
181
182 k_work_init(&gna->gna_work, gna_work_handler);
183
184 /* initialize the configuration descriptor's page directory table */
185 for (int page = 0; page < GNA_CONFIG_DESC_PG_DIR_SIZE; page++) {
186 page_dir_entry = (page < GNA_NUM_PG_TABLES_NEEDED) ?
187 GNA_PG_DIR_ENTRY(&gna_page_table[page]) : (uint32_t)-1;
188 gna_config_desc.pagedir[page] = page_dir_entry;
189 LOG_DBG("%s: page %u pagetable %08x",
190 dev->name, page, gna_config_desc.pagedir[page]);
191 }
192 gna_config_desc.vamaxaddr = GNA_ADDRESSABLE_MEM_SIZE;
193 LOG_DBG("%s: max virtual address %08x",
194 dev->name, gna_config_desc.vamaxaddr);
195
196 /* flush cache */
197 sys_cache_data_flush_range((void *)&gna_config_desc, sizeof(gna_config_desc));
198
199 LOG_INF("%s: initialized (max %u models & max %u pending requests)",
200 dev->name, GNA_MAX_NUM_MODELS,
201 GNA_REQUEST_QUEUE_LEN);
202 LOG_INF("%s: max addressable memory %u MB",
203 dev->name, GNA_ADDRESSABLE_MEM_SIZE >> 20);
204 LOG_INF("%s: %u page table(s) at %p and %u bytes",
205 dev->name, (uint32_t)GNA_NUM_PG_TABLES_NEEDED,
206 gna_page_table, sizeof(gna_page_table));
207 LOG_INF("%s: configuration descriptor at %p",
208 dev->name, &gna_config_desc);
209
210 /* register interrupt handler */
211 IRQ_CONNECT(DT_INST_IRQN(0), DT_INST_IRQ(0, priority),
212 intel_gna_interrupt_handler, DEVICE_GET(gna), 0);
213 /* enable interrupt */
214 irq_enable(INTEL_GNA_IRQ_ID);
215
216 gna->state = GNA_STATE_INITIALIZED;
217 return 0;
218 }
219
intel_gna_configure(const struct device * dev,struct gna_config * cfg)220 static int intel_gna_configure(const struct device *dev,
221 struct gna_config *cfg)
222 {
223 struct intel_gna_data *const gna = dev->data;
224 volatile struct intel_gna_regs *regs = gna->regs;
225
226 if (gna->state != GNA_STATE_INITIALIZED) {
227 LOG_ERR("Configuration attempt in invalid state (%u)",
228 gna->state);
229 return -EINVAL;
230 }
231
232 if (cfg == NULL) {
233 LOG_ERR("Config pointer is NULL");
234 return -EINVAL;
235 }
236
237 gna->config = *cfg;
238
239 regs->gnactrl |= GNA_CTRL_OPER_MODEL_XNN |
240 GNA_CTRL_ERR_INTR_ENABLE | GNA_CTRL_COMPL_INTR_ENABLE;
241
242 switch (CONFIG_INTEL_GNA_POWER_MODE) {
243 case GNA_POWER_MODE_ALWAYS_ON:
244 regs->gnactrl |= GNA_CTRL_PM_OVRIDE_CLK_ON |
245 GNA_CTRL_PM_OVRIDE_PWR_ON;
246 break;
247
248 case GNA_POWER_MODE_CLOCK_GATED:
249 regs->gnactrl |= GNA_CTRL_PM_OVRIDE_PWR_ON;
250 break;
251
252 case GNA_POWER_MODE_POWER_GATED:
253 case GNA_POWER_MODE_ALWAYS_OFF:
254 break;
255
256 default:
257 LOG_ERR("Invalid config CONFIG_INTEL_GNA_POWER_MODE (%u)",
258 CONFIG_INTEL_GNA_POWER_MODE);
259 break;
260 }
261
262 /* assign the configuration descriptor address as the base */
263 regs->gnadesbase = GNA_PHYS_ADDR_TO_PAGE(&gna_config_desc);
264
265 INTEL_GNA_CONFIG_DESC_DUMP(dev);
266
267 LOG_INF("Device %s (version %u.%u) configured with power mode %u",
268 dev->name, regs->gnaversion >> 1,
269 (uint32_t)(regs->gnaversion & BIT(0)),
270 CONFIG_INTEL_GNA_POWER_MODE);
271
272 gna->state = GNA_STATE_IDLE;
273 return 0;
274 }
275
intel_gna_register_model(const struct device * dev,struct gna_model_info * model,void ** model_handle)276 static int intel_gna_register_model(const struct device *dev,
277 struct gna_model_info *model,
278 void **model_handle)
279 {
280 struct intel_gna_data *const gna = dev->data;
281 struct intel_gna_model *gna_model;
282 struct gna_model_header *header;
283 uint32_t ro_size, rw_size = 0;
284 void *virtual_base;
285 void *ro_region;
286
287 if ((gna->state != GNA_STATE_IDLE) &&
288 (gna->state != GNA_STATE_ACTIVE)) {
289 LOG_ERR("Invalid state (%u)", gna->state);
290 return -EINVAL;
291 }
292
293 if ((model_handle == NULL) || (model == NULL)) {
294 LOG_ERR("model and/or model_handle is NULL");
295 return -EINVAL;
296 }
297
298 if ((model->header == NULL) || (model->rw_region == NULL)) {
299 LOG_ERR("model header / rw_region is/are NULL");
300 return -EINVAL;
301 }
302
303 /* check for 64B alignment */
304 if (((uint32_t)model->rw_region & BIT_MASK(6)) ||
305 ((uint32_t)model->ro_region & BIT_MASK(6))) {
306 LOG_ERR("rw_region / ro_region not aligned to 64B");
307 return -EINVAL;
308 }
309
310 if (k_mem_slab_alloc(&gna->model_slab, (void **)&gna_model,
311 K_NO_WAIT)) {
312 LOG_ERR("No memory to register model");
313 return -ENOMEM;
314 }
315
316 LOG_INF("model header: %p rw: %p ro: %p", model->header,
317 model->rw_region, model->ro_region);
318
319 header = model->header;
320 virtual_base = (void *)GNA_MODEL_VIRT_BASE_DEFAULT;
321
322 LOG_INF("model_size: %u rw_region_size: %u", header->model_size,
323 header->rw_region_size);
324
325 /* setup page table entries for RW region */
326 if (model->rw_region && header->rw_region_size) {
327 /* calculate layer descriptor size */
328 rw_size = header->layer_count *
329 sizeof(struct intel_gna_layer_desc);
330 /* round up to page boundary */
331 rw_size = GNA_PAGES_TO_BYTES(GNA_NUM_PAGES(rw_size));
332 /* add the input rw_region_size to get total rw_region_size */
333 rw_size += header->rw_region_size;
334
335 intel_gna_setup_page_table(model->rw_region, rw_size,
336 virtual_base);
337 sys_cache_data_flush_range(model->rw_region, rw_size);
338 }
339
340 if (model->ro_region == NULL) {
341 ro_region = (void *)((uint32_t)model->rw_region + rw_size);
342 } else {
343 ro_region = model->ro_region;
344 }
345
346 ro_size = header->model_size - rw_size;
347
348 LOG_INF("rw_region: %p (%u) ro_region: %p (%u)",
349 model->rw_region, rw_size, ro_region, ro_size);
350
351 /* setup page table entries for RO region */
352 intel_gna_setup_page_table(ro_region, ro_size,
353 (void *)((uint32_t)virtual_base + rw_size));
354
355 sys_cache_data_flush_range(ro_region, ro_size);
356 sys_cache_data_flush_range(gna_page_table, sizeof(gna_page_table));
357
358 /* copy the model pointers */
359 gna_model->model = *model;
360 gna_model->vabase = virtual_base;
361 gna_model->input = (void *)((uint32_t)model->rw_region +
362 *(uint32_t *)((uint32_t)model->rw_region +
363 header->input_ptr_offset));
364 gna_model->output = (void *)((uint32_t)model->rw_region +
365 *(uint32_t *)((uint32_t)model->rw_region +
366 header->output_ptr_offset));
367 gna_model->registered = true;
368
369 LOG_INF("model->rw_region: %p", model->rw_region);
370 LOG_INF("input offset: %u",
371 *(uint32_t *)((uint32_t)model->rw_region + header->input_ptr_offset));
372 LOG_INF("gna_model->input: %p", gna_model->input);
373 LOG_INF("output offset: %u",
374 *(uint32_t *)((uint32_t)model->rw_region +
375 header->output_ptr_offset));
376 LOG_INF("gna_model->output: %p", gna_model->output);
377 LOG_DBG("returning model handle: %p", gna_model);
378 *model_handle = (void *)gna_model;
379 return 0;
380 }
381
intel_gna_deregister_model(const struct device * dev,void * model_handle)382 static int intel_gna_deregister_model(const struct device *dev,
383 void *model_handle)
384 {
385 struct intel_gna_data *const gna = dev->data;
386 struct intel_gna_model *gna_model;
387
388 if (model_handle == NULL) {
389 LOG_ERR("model_handle is NULL");
390 return -EINVAL;
391 }
392
393 gna_model = (struct intel_gna_model *)model_handle;
394 gna_model->registered = false;
395 k_mem_slab_free(&gna->model_slab, &model_handle);
396
397 return 0;
398 }
399
intel_gna_infer(const struct device * dev,struct gna_inference_req * req,gna_callback callback)400 static int intel_gna_infer(const struct device *dev,
401 struct gna_inference_req *req,
402 gna_callback callback)
403 {
404 struct intel_gna_data *const gna = dev->data;
405 volatile struct intel_gna_regs *regs = gna->regs;
406 struct intel_gna_pending_req pending_req;
407 struct gna_model_header *header;
408 struct intel_gna_model *handle;
409 struct gna_model_info *model;
410 size_t input_size;
411 int ret;
412
413 LOG_DBG("device %p", dev);
414 if (req == NULL) {
415 LOG_ERR("Invalid request pointer");
416 return -EINVAL;
417 }
418
419 if (callback == NULL) {
420 LOG_ERR("Invalid callback function pointer");
421 return -EINVAL;
422 }
423
424 handle = (struct intel_gna_model *)req->model_handle;
425
426 if (handle->registered != true) {
427 LOG_ERR("Model is not registered. Handle %p", handle);
428 return -EINVAL;
429 }
430
431 if (req->input == NULL) {
432 LOG_ERR("Invalid input buffer");
433 return -EINVAL;
434 }
435
436 if (req->output == NULL) {
437 LOG_ERR("Invalid output buffer");
438 return -EINVAL;
439 }
440
441 model = &handle->model;
442 header = model->header;
443 input_size = header->bytes_per_input * header->num_input_nodes;
444
445 pending_req.model = handle;
446 pending_req.output = req->output;
447 pending_req.output_len = header->bytes_per_output *
448 header->num_output_nodes;
449 pending_req.callback = callback;
450
451 ret = k_msgq_put(&gna->request_queue, &pending_req, K_NO_WAIT);
452 if (ret) {
453 LOG_ERR("Unable to queue request (code %d)", ret);
454 return ret;
455 }
456
457 if (gna->state != GNA_STATE_IDLE) {
458 /* multiple pending requests are not yet supported */
459 return -EBUSY;
460 }
461
462 /* copy input */
463 memcpy(handle->input, req->input, input_size);
464 sys_cache_data_flush_range(handle->input, input_size);
465
466 /* assign layer descriptor base address to configuration descriptor */
467 gna_config_desc.labase = (uint32_t)handle->vabase;
468 gna_config_desc.lacnt = (uint16_t)header->layer_count;
469 sys_cache_data_flush_range(&gna_config_desc, sizeof(gna_config_desc));
470
471 gna->state = GNA_STATE_ACTIVE;
472 regs->gnactrl = (regs->gnactrl & ~GNA_CTRL_INTR_DISABLE) |
473 GNA_CTRL_ACCEL_START | GNA_CTRL_STATS_ENABLE_STALL;
474
475 return 0;
476 }
477
478 #if LOG_LEVEL >= LOG_LEVEL_DBG
intel_gna_regs_dump(const struct device * dev)479 static void intel_gna_regs_dump(const struct device *dev)
480 {
481 struct intel_gna_data *const gna = dev->data;
482 volatile struct intel_gna_regs *regs = gna->regs;
483
484 LOG_DBG("gnasts :%08x", regs->gnasts);
485 LOG_DBG("gnactrl :%08x", regs->gnactrl);
486 LOG_DBG("gnamctl :%08x", regs->gnamctl);
487 LOG_DBG("gnaptc :%08x", regs->gnaptc);
488 LOG_DBG("gnasc :%08x", regs->gnasc);
489 LOG_DBG("gnaisi :%08x", regs->gnaisi);
490 LOG_DBG("gnais_low :%08x", regs->gnais_low);
491 LOG_DBG("gnais_high :%08x", regs->gnais_high);
492 LOG_DBG("gnabp_low :%08x", regs->gnabp_low);
493 LOG_DBG("gnabp_high :%08x", regs->gnabp_high);
494 LOG_DBG("gnadesbase :%08x", regs->gnadesbase);
495 LOG_DBG("gnaibuffs :%08x", regs->gnaibuffs);
496 LOG_DBG("ovrcfgctl :%08x", regs->gnaibuffs);
497 LOG_DBG("gnaversion :%08x", regs->gnaversion);
498 }
499
intel_gna_config_desc_dump(const struct device * dev)500 static void intel_gna_config_desc_dump(const struct device *dev)
501 {
502 struct intel_gna_data *const gna = dev->data;
503 volatile struct intel_gna_regs *regs = gna->regs;
504
505 LOG_DBG("gnadesbase :%08x", regs->gnadesbase);
506 LOG_DBG("labase :%08x", gna_config_desc.labase);
507 LOG_DBG("lacnt :%u", gna_config_desc.lacnt);
508 }
509 #endif
510
511 static const struct gna_driver_api gna_driver_api = {
512 .configure = intel_gna_configure,
513 .register_model = intel_gna_register_model,
514 .deregister_model = intel_gna_deregister_model,
515 .infer = intel_gna_infer,
516 };
517
518 static struct intel_gna_data intel_gna_driver_data = {
519 .regs = (volatile struct intel_gna_regs *)DT_INST_REG_ADDR(0),
520 };
521
522 DEVICE_DT_INST_DEFINE(0, intel_gna_initialize,
523 NULL,
524 (void *)&intel_gna_driver_data, NULL,
525 POST_KERNEL, CONFIG_INTEL_GNA_INIT_PRIORITY,
526 &gna_driver_api);
527