1 /*
2 * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 /******************************************************************************
20 * Includes
21 ******************************************************************************/
22
23 #include "ethosu_driver.h"
24 #include "ethosu_device.h"
25 #include "ethosu_log.h"
26
27 #ifdef ETHOSU55
28 #include "ethosu_config_u55.h"
29 #else
30 #include "ethosu_config_u65.h"
31 #endif
32
33 #include <assert.h>
34 #include <cmsis_compiler.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40
41 /******************************************************************************
42 * Defines
43 ******************************************************************************/
44
45 #define UNUSED(x) ((void)x)
46
47 #define BYTES_IN_32_BITS 4
48 #define MASK_16_BYTE_ALIGN (0xF)
49 #define OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD 2
50 #define DRIVER_ACTION_LENGTH_32_BIT_WORD 1
51 #define ETHOSU_FOURCC ('1' << 24 | 'P' << 16 | 'O' << 8 | 'C') // "Custom Operator Payload 1"
52
53 #define FAST_MEMORY_BASE_ADDR_INDEX 2
54
55 /******************************************************************************
56 * Types
57 ******************************************************************************/
58
59 // Driver actions
60 enum DRIVER_ACTION_e
61 {
62 RESERVED = 0,
63 OPTIMIZER_CONFIG = 1,
64 COMMAND_STREAM = 2,
65 NOP = 5,
66 };
67
68 // Custom operator payload data struct
69 struct cop_data_s
70 {
71 union
72 {
73 // Driver action data
74 struct
75 {
76 uint8_t driver_action_command; // (valid values in DRIVER_ACTION_e)
77 uint8_t reserved;
78
79 // Driver action data
80 union
81 {
82 // DA_CMD_OPT_CFG
83 struct
84 {
85 uint16_t rel_nbr : 4;
86 uint16_t patch_nbr : 4;
87 uint16_t opt_cfg_reserved : 8;
88 };
89
90 // DA_CMD_CMSTRM
91 struct
92 {
93 uint16_t length;
94 };
95
96 uint16_t driver_action_data;
97 };
98 };
99
100 uint32_t word;
101 };
102 };
103
104 // optimizer config struct
105 struct opt_cfg_s
106 {
107 struct cop_data_s da_data;
108 uint32_t cfg;
109 uint32_t id;
110 };
111
112 /******************************************************************************
113 * Variables
114 ******************************************************************************/
115
116 // Registered drivers linked list HEAD
117 static struct ethosu_driver *registered_drivers = NULL;
118
119 /******************************************************************************
120 * Weak functions - Cache
121 *
122 * Default NOP operations. Override if available on the targeted device.
123 ******************************************************************************/
124
125 /*
126 * Flush/clean the data cache by address and size. Passing NULL as p argument
127 * expects the whole cache to be flushed.
128 */
ethosu_flush_dcache(uint32_t * p,size_t bytes)129 void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes)
130 {
131 UNUSED(p);
132 UNUSED(bytes);
133 }
134
135 /*
136 * Invalidate the data cache by address and size. Passing NULL as p argument
137 * expects the whole cache to be invalidated.
138 */
ethosu_invalidate_dcache(uint32_t * p,size_t bytes)139 void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes)
140 {
141 UNUSED(p);
142 UNUSED(bytes);
143 }
144
145 /******************************************************************************
146 * Weak functions - Semaphore/Mutex for multi NPU
147 *
148 * Following section handles the minimal sempahore and mutex implementation in
149 * case of baremetal applications. Weak symbols will be overridden by RTOS
150 * definitions and implement true thread-safety (in application layer).
151 ******************************************************************************/
152
153 struct ethosu_semaphore_t
154 {
155 uint8_t count;
156 };
157
158 static void *ethosu_mutex;
159 static void *ethosu_semaphore;
160
ethosu_mutex_create(void)161 void *__attribute__((weak)) ethosu_mutex_create(void)
162 {
163 static uint8_t mutex_placeholder;
164 return &mutex_placeholder;
165 }
166
ethosu_mutex_destroy(void * mutex)167 void __attribute__((weak)) ethosu_mutex_destroy(void *mutex)
168 {
169 UNUSED(mutex);
170 }
171
ethosu_mutex_lock(void * mutex)172 int __attribute__((weak)) ethosu_mutex_lock(void *mutex)
173 {
174 UNUSED(mutex);
175 return 0;
176 }
177
ethosu_mutex_unlock(void * mutex)178 int __attribute__((weak)) ethosu_mutex_unlock(void *mutex)
179 {
180 UNUSED(mutex);
181 return 0;
182 }
183
184 // Baremetal implementation of creating a semaphore
ethosu_semaphore_create(void)185 void *__attribute__((weak)) ethosu_semaphore_create(void)
186 {
187 struct ethosu_semaphore_t *sem = malloc(sizeof(*sem));
188 if (sem != NULL)
189 {
190 sem->count = 0;
191 }
192 return sem;
193 }
194
ethosu_semaphore_destroy(void * sem)195 void __attribute__((weak)) ethosu_semaphore_destroy(void *sem)
196 {
197 free((struct ethosu_semaphore_t *)sem);
198 }
199
200 // Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics
ethosu_semaphore_take(void * sem,uint64_t timeout)201 int __attribute__((weak)) ethosu_semaphore_take(void *sem, uint64_t timeout)
202 {
203 UNUSED(timeout);
204 // Baremetal pseudo-example on how to trigger a timeout:
205 // if (timeout != ETHOSU_SEMAPHORE_WAIT_FOREVER) {
206 // setup_a_timer_to_call_SEV_after_time(timeout);
207 // }
208 struct ethosu_semaphore_t *s = sem;
209 while (s->count == 0)
210 {
211 __WFE();
212 // Baremetal pseudo-example check if timeout triggered:
213 // if (SEV_timer_triggered()) {
214 // return -1;
215 // }
216 }
217 s->count--;
218 return 0;
219 }
220
221 // Baremetal simulation of giving a semaphore and waking up processes using intrinsics
ethosu_semaphore_give(void * sem)222 int __attribute__((weak)) ethosu_semaphore_give(void *sem)
223 {
224 struct ethosu_semaphore_t *s = sem;
225 s->count++;
226 __SEV();
227 return 0;
228 }
229
230 /******************************************************************************
231 * Weak functions - Inference begin/end callbacks
232 ******************************************************************************/
233
ethosu_inference_begin(struct ethosu_driver * drv,void * user_arg)234 void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg)
235 {
236 UNUSED(user_arg);
237 UNUSED(drv);
238 }
239
ethosu_inference_end(struct ethosu_driver * drv,void * user_arg)240 void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, void *user_arg)
241 {
242 UNUSED(user_arg);
243 UNUSED(drv);
244 }
245
246 /******************************************************************************
247 * Static functions
248 ******************************************************************************/
ethosu_register_driver(struct ethosu_driver * drv)249 static void ethosu_register_driver(struct ethosu_driver *drv)
250 {
251 ethosu_mutex_lock(ethosu_mutex);
252 drv->next = registered_drivers;
253 registered_drivers = drv;
254 ethosu_mutex_unlock(ethosu_mutex);
255
256 ethosu_semaphore_give(ethosu_semaphore);
257
258 LOG_INFO("New NPU driver registered (handle: 0x%p, NPU: 0x%p)", drv, drv->dev->reg);
259 }
260
ethosu_deregister_driver(struct ethosu_driver * drv)261 static int ethosu_deregister_driver(struct ethosu_driver *drv)
262 {
263 struct ethosu_driver *curr;
264 struct ethosu_driver **prev;
265
266 ethosu_mutex_lock(ethosu_mutex);
267 curr = registered_drivers;
268 prev = ®istered_drivers;
269
270 while (curr != NULL)
271 {
272 if (curr == drv)
273 {
274 *prev = curr->next;
275 LOG_INFO("NPU driver handle %p deregistered.", drv);
276 ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER);
277 break;
278 }
279
280 prev = &curr->next;
281 curr = curr->next;
282 }
283
284 ethosu_mutex_unlock(ethosu_mutex);
285
286 if (curr == NULL)
287 {
288 LOG_ERR("No NPU driver handle registered at address %p.", drv);
289 return -1;
290 }
291
292 return 0;
293 }
294
ethosu_reset_job(struct ethosu_driver * drv)295 static void ethosu_reset_job(struct ethosu_driver *drv)
296 {
297 memset(&drv->job, 0, sizeof(struct ethosu_job));
298 }
299
handle_optimizer_config(struct ethosu_driver * drv,struct opt_cfg_s const * opt_cfg_p)300 static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s const *opt_cfg_p)
301 {
302 LOG_INFO("Optimizer release nbr: %u patch: %u", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr);
303
304 if (ethosu_dev_verify_optimizer_config(drv->dev, opt_cfg_p->cfg, opt_cfg_p->id) != true)
305 {
306 return -1;
307 }
308
309 return 0;
310 }
311
handle_command_stream(struct ethosu_driver * drv,const uint8_t * cmd_stream,const int cms_length)312 static int handle_command_stream(struct ethosu_driver *drv, const uint8_t *cmd_stream, const int cms_length)
313 {
314 uint32_t cms_bytes = cms_length * BYTES_IN_32_BITS;
315 ptrdiff_t cmd_stream_ptr = (ptrdiff_t)cmd_stream;
316
317 LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d", cmd_stream, cms_length);
318
319 if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN))
320 {
321 LOG_ERR("Command stream addr %p not aligned to 16 bytes", cmd_stream);
322 return -1;
323 }
324
325 // Verify 16 byte alignment for base address'
326 for (int i = 0; i < drv->job.num_base_addr; i++)
327 {
328 if (0 != (drv->job.base_addr[i] & MASK_16_BYTE_ALIGN))
329 {
330 LOG_ERR("Base addr %d: 0x%llx not aligned to 16 bytes", i, drv->job.base_addr[i]);
331 return -1;
332 }
333 }
334
335 // Flush the cache if available on CPU.
336 // The upcasting to uin32_t* is ok since the pointer never is dereferenced.
337 // The base_addr_size is null if invoking from prior to invoke_V2, in that case
338 // the whole cache is being flushed.
339
340 if (drv->job.base_addr_size != NULL)
341 {
342 ethosu_flush_dcache((uint32_t *)cmd_stream_ptr, cms_bytes);
343 for (int i = 0; i < drv->job.num_base_addr; i++)
344 {
345 ethosu_flush_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
346 }
347 }
348 else
349 {
350 ethosu_flush_dcache(NULL, 0);
351 }
352
353 // Request power gating disabled during inference run
354 if (ethosu_request_power(drv))
355 {
356 LOG_ERR("Failed to request power");
357 return -1;
358 }
359
360 drv->job.state = ETHOSU_JOB_RUNNING;
361
362 // Inference begin callback
363 ethosu_inference_begin(drv, drv->job.user_arg);
364
365 // Execute the command stream
366 ethosu_dev_run_command_stream(drv->dev, cmd_stream, cms_bytes, drv->job.base_addr, drv->job.num_base_addr);
367
368 return 0;
369 }
370
371 /******************************************************************************
372 * Weak functions - Interrupt handler
373 ******************************************************************************/
ethosu_irq_handler(struct ethosu_driver * drv)374 void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv)
375 {
376 LOG_DEBUG("Got interrupt from Ethos-U");
377
378 // Prevent race condition where interrupt triggered after a timeout waiting
379 // for semaphore, but before NPU is reset.
380 if (drv->job.result == ETHOSU_JOB_RESULT_TIMEOUT)
381 {
382 return;
383 }
384
385 drv->job.state = ETHOSU_JOB_DONE;
386 drv->job.result = ethosu_dev_handle_interrupt(drv->dev) ? ETHOSU_JOB_RESULT_OK : ETHOSU_JOB_RESULT_ERROR;
387 ethosu_semaphore_give(drv->semaphore);
388 }
389
390 /******************************************************************************
391 * Functions API
392 ******************************************************************************/
393
ethosu_init(struct ethosu_driver * drv,void * const base_address,const void * fast_memory,const size_t fast_memory_size,uint32_t secure_enable,uint32_t privilege_enable)394 int ethosu_init(struct ethosu_driver *drv,
395 void *const base_address,
396 const void *fast_memory,
397 const size_t fast_memory_size,
398 uint32_t secure_enable,
399 uint32_t privilege_enable)
400 {
401 LOG_INFO("Initializing NPU: base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32
402 ", privileged=%" PRIu32,
403 base_address,
404 fast_memory,
405 fast_memory_size,
406 secure_enable,
407 privilege_enable);
408
409 if (!ethosu_mutex)
410 {
411 ethosu_mutex = ethosu_mutex_create();
412 if (!ethosu_mutex)
413 {
414 LOG_ERR("Failed to create global driver mutex");
415 return -1;
416 }
417 }
418
419 if (!ethosu_semaphore)
420 {
421 ethosu_semaphore = ethosu_semaphore_create();
422 if (!ethosu_semaphore)
423 {
424 LOG_ERR("Failed to create global driver semaphore");
425 return -1;
426 }
427 }
428
429 drv->fast_memory = (uint32_t)fast_memory;
430 drv->fast_memory_size = fast_memory_size;
431 drv->power_request_counter = 0;
432
433 // Initialize the device and set requested security state and privilege mode
434 drv->dev = ethosu_dev_init(base_address, secure_enable, privilege_enable);
435
436 if (drv->dev == NULL)
437 {
438 LOG_ERR("Failed to initialize Ethos-U device");
439 return -1;
440 }
441
442 drv->semaphore = ethosu_semaphore_create();
443 if (!drv->semaphore)
444 {
445 LOG_ERR("Failed to create driver semaphore");
446 ethosu_dev_deinit(drv->dev);
447 drv->dev = NULL;
448 return -1;
449 }
450
451 ethosu_reset_job(drv);
452 ethosu_register_driver(drv);
453
454 return 0;
455 }
456
ethosu_deinit(struct ethosu_driver * drv)457 void ethosu_deinit(struct ethosu_driver *drv)
458 {
459 ethosu_deregister_driver(drv);
460 ethosu_semaphore_destroy(drv->semaphore);
461 ethosu_dev_deinit(drv->dev);
462 drv->dev = NULL;
463 }
464
ethosu_soft_reset(struct ethosu_driver * drv)465 int ethosu_soft_reset(struct ethosu_driver *drv)
466 {
467 // Soft reset the NPU
468 if (ethosu_dev_soft_reset(drv->dev) != ETHOSU_SUCCESS)
469 {
470 LOG_ERR("Failed to soft-reset NPU");
471 return -1;
472 }
473
474 // Update power and clock gating after the soft reset
475 ethosu_dev_set_clock_and_power(drv->dev,
476 drv->power_request_counter > 0 ? ETHOSU_CLOCK_Q_DISABLE : ETHOSU_CLOCK_Q_ENABLE,
477 drv->power_request_counter > 0 ? ETHOSU_POWER_Q_DISABLE : ETHOSU_POWER_Q_ENABLE);
478
479 return 0;
480 }
481
ethosu_request_power(struct ethosu_driver * drv)482 int ethosu_request_power(struct ethosu_driver *drv)
483 {
484 // Check if this is the first power request, increase counter
485 if (drv->power_request_counter++ == 0)
486 {
487 // Always reset to a known state. Changes to requested
488 // security state/privilege mode if necessary.
489 if (ethosu_soft_reset(drv))
490 {
491 LOG_ERR("Failed to request power for Ethos-U");
492 drv->power_request_counter--;
493 return -1;
494 }
495 }
496 return 0;
497 }
498
ethosu_release_power(struct ethosu_driver * drv)499 void ethosu_release_power(struct ethosu_driver *drv)
500 {
501 if (drv->power_request_counter == 0)
502 {
503 LOG_WARN("No power request left to release, reference counter is 0");
504 }
505 else
506 {
507 // Decrement ref counter and enable power gating if no requests remain
508 if (--drv->power_request_counter == 0)
509 {
510 ethosu_dev_set_clock_and_power(drv->dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
511 }
512 }
513 }
514
ethosu_get_driver_version(struct ethosu_driver_version * ver)515 void ethosu_get_driver_version(struct ethosu_driver_version *ver)
516 {
517 assert(ver != NULL);
518 ver->major = ETHOSU_DRIVER_VERSION_MAJOR;
519 ver->minor = ETHOSU_DRIVER_VERSION_MINOR;
520 ver->patch = ETHOSU_DRIVER_VERSION_PATCH;
521 }
522
ethosu_get_hw_info(struct ethosu_driver * drv,struct ethosu_hw_info * hw)523 void ethosu_get_hw_info(struct ethosu_driver *drv, struct ethosu_hw_info *hw)
524 {
525 assert(hw != NULL);
526 ethosu_dev_get_hw_info(drv->dev, hw);
527 }
528
ethosu_wait(struct ethosu_driver * drv,bool block)529 int ethosu_wait(struct ethosu_driver *drv, bool block)
530 {
531 int ret = 0;
532
533 switch (drv->job.state)
534 {
535 case ETHOSU_JOB_IDLE:
536 LOG_ERR("Inference job not running...");
537 ret = -2;
538 break;
539 case ETHOSU_JOB_RUNNING:
540 if (!block)
541 {
542 // Inference still running, do not block
543 ret = 1;
544 break;
545 }
546 // fall through
547 case ETHOSU_JOB_DONE:
548 // Wait for interrupt in blocking mode. In non-blocking mode
549 // the interrupt has already triggered
550 ret = ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
551 if (ret < 0)
552 {
553 drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT;
554
555 // There's a race where the NPU interrupt can have fired between semaphore
556 // timing out and setting the result above (checked in interrupt handler).
557 // By checking if the job state has been changed (only set to DONE by interrupt
558 // handler), we know if the interrupt handler has run, if so decrement the
559 // semaphore count by one (given in interrupt handler).
560 if (drv->job.state == ETHOSU_JOB_DONE)
561 {
562 drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT; // Reset back to timeout
563 ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
564 }
565 }
566
567 // Inference done callback - always called even in case of timeout
568 ethosu_inference_end(drv, drv->job.user_arg);
569
570 // Release power gating disabled requirement
571 ethosu_release_power(drv);
572
573 // Check NPU and interrupt status
574 if (drv->job.result)
575 {
576 if (drv->job.result == ETHOSU_JOB_RESULT_ERROR)
577 {
578 LOG_ERR("NPU error(s) occured during inference.");
579 ethosu_dev_print_err_status(drv->dev);
580 }
581 else
582 {
583 LOG_ERR("NPU inference timed out.");
584 }
585
586 // Reset the NPU
587 (void)ethosu_soft_reset(drv);
588
589 ret = -1;
590 }
591 else
592 {
593 // Invalidate cache
594 if (drv->job.base_addr_size != NULL)
595 {
596 for (int i = 0; i < drv->job.num_base_addr; i++)
597 {
598 ethosu_invalidate_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
599 }
600 }
601 else
602 {
603 ethosu_invalidate_dcache(NULL, 0);
604 }
605
606 LOG_DEBUG("Inference finished successfully...");
607 ret = 0;
608 }
609
610 // Reset internal job (state resets to IDLE)
611 ethosu_reset_job(drv);
612 break;
613
614 default:
615 LOG_ERR("Unexpected job state");
616 ethosu_reset_job(drv);
617 ret = -1;
618 break;
619 }
620
621 // Return inference job status
622 return ret;
623 }
624
ethosu_invoke_async(struct ethosu_driver * drv,const void * custom_data_ptr,const int custom_data_size,uint64_t * const base_addr,const size_t * base_addr_size,const int num_base_addr,void * user_arg)625 int ethosu_invoke_async(struct ethosu_driver *drv,
626 const void *custom_data_ptr,
627 const int custom_data_size,
628 uint64_t *const base_addr,
629 const size_t *base_addr_size,
630 const int num_base_addr,
631 void *user_arg)
632 {
633
634 const struct cop_data_s *data_ptr = custom_data_ptr;
635 const struct cop_data_s *data_end = (struct cop_data_s *)((ptrdiff_t)custom_data_ptr + custom_data_size);
636
637 // Make sure an inference is not already running
638 if (drv->job.state != ETHOSU_JOB_IDLE)
639 {
640 LOG_ERR("Inference already running, or waiting to be cleared...");
641 return -1;
642 }
643
644 drv->job.state = ETHOSU_JOB_IDLE;
645 drv->job.custom_data_ptr = custom_data_ptr;
646 drv->job.custom_data_size = custom_data_size;
647 drv->job.base_addr = base_addr;
648 drv->job.base_addr_size = base_addr_size;
649 drv->job.num_base_addr = num_base_addr;
650 drv->job.user_arg = user_arg;
651
652 // First word in custom_data_ptr should contain "Custom Operator Payload 1"
653 if (data_ptr->word != ETHOSU_FOURCC)
654 {
655 LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x", data_ptr->word, ETHOSU_FOURCC);
656 goto err;
657 }
658
659 // Custom data length must be a multiple of 32 bits
660 if ((custom_data_size % BYTES_IN_32_BITS) != 0)
661 {
662 LOG_ERR("custom_data_size=0x%x not a multiple of 4", (unsigned)custom_data_size);
663 goto err;
664 }
665
666 data_ptr++;
667
668 // Adjust base address to fast memory area
669 if (drv->fast_memory != 0 && num_base_addr >= FAST_MEMORY_BASE_ADDR_INDEX)
670 {
671
672 if (base_addr_size != NULL && base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size)
673 {
674 LOG_ERR("Fast memory area too small. fast_memory_size=%u, base_addr_size=%u",
675 drv->fast_memory_size,
676 base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]);
677 goto err;
678 }
679
680 base_addr[FAST_MEMORY_BASE_ADDR_INDEX] = drv->fast_memory;
681 }
682
683 // Parse Custom Operator Payload data
684 while (data_ptr < data_end)
685 {
686 switch (data_ptr->driver_action_command)
687 {
688 case OPTIMIZER_CONFIG:
689 LOG_DEBUG("OPTIMIZER_CONFIG");
690 struct opt_cfg_s const *opt_cfg_p = (const struct opt_cfg_s *)data_ptr;
691
692 if (handle_optimizer_config(drv, opt_cfg_p) < 0)
693 {
694 goto err;
695 }
696 data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD;
697 break;
698 case COMMAND_STREAM:
699 // Vela only supports putting one COMMAND_STREAM per op
700 LOG_DEBUG("COMMAND_STREAM");
701 const uint8_t *command_stream = (const uint8_t *)(data_ptr + 1);
702 int cms_length = (data_ptr->reserved << 16) | data_ptr->length;
703
704 if (handle_command_stream(drv, command_stream, cms_length) < 0)
705 {
706 goto err;
707 }
708 data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length;
709 break;
710 case NOP:
711 LOG_DEBUG("NOP");
712 data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
713 break;
714 default:
715 LOG_ERR("UNSUPPORTED driver_action_command: %u", data_ptr->driver_action_command);
716 goto err;
717 break;
718 }
719 }
720
721 return 0;
722 err:
723 LOG_ERR("Failed to invoke inference.");
724 ethosu_reset_job(drv);
725 return -1;
726 }
727
ethosu_invoke_v3(struct ethosu_driver * drv,const void * custom_data_ptr,const int custom_data_size,uint64_t * const base_addr,const size_t * base_addr_size,const int num_base_addr,void * user_arg)728 int ethosu_invoke_v3(struct ethosu_driver *drv,
729 const void *custom_data_ptr,
730 const int custom_data_size,
731 uint64_t *const base_addr,
732 const size_t *base_addr_size,
733 const int num_base_addr,
734 void *user_arg)
735 {
736 if (ethosu_invoke_async(
737 drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, user_arg) < 0)
738 {
739 return -1;
740 }
741
742 return ethosu_wait(drv, true);
743 }
744
ethosu_reserve_driver(void)745 struct ethosu_driver *ethosu_reserve_driver(void)
746 {
747 struct ethosu_driver *drv = NULL;
748
749 LOG_INFO("Acquiring NPU driver handle");
750 ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER); // This is meant to block until available
751
752 ethosu_mutex_lock(ethosu_mutex);
753 drv = registered_drivers;
754
755 while (drv != NULL)
756 {
757 if (!drv->reserved)
758 {
759 drv->reserved = true;
760 LOG_DEBUG("NPU driver handle %p reserved", drv);
761 break;
762 }
763 drv = drv->next;
764 }
765 ethosu_mutex_unlock(ethosu_mutex);
766
767 if (!drv)
768 {
769 LOG_ERR("No NPU driver handle available, but semaphore taken");
770 }
771
772 return drv;
773 }
774
ethosu_release_driver(struct ethosu_driver * drv)775 void ethosu_release_driver(struct ethosu_driver *drv)
776 {
777 ethosu_mutex_lock(ethosu_mutex);
778 if (drv != NULL && drv->reserved)
779 {
780 if (drv->job.state == ETHOSU_JOB_RUNNING || drv->job.state == ETHOSU_JOB_DONE)
781 {
782 // Give the inference one shot to complete or force kill the job
783 if (ethosu_wait(drv, false) == 1)
784 {
785 // Still running, soft reset the NPU and reset driver
786 drv->power_request_counter = 0;
787 ethosu_soft_reset(drv);
788 ethosu_reset_job(drv);
789 }
790 }
791
792 drv->reserved = false;
793 LOG_DEBUG("NPU driver handle %p released", drv);
794 ethosu_semaphore_give(ethosu_semaphore);
795 }
796 ethosu_mutex_unlock(ethosu_mutex);
797 }
798