1 /*
2 * SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the License); you may
6 * not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /******************************************************************************
19 * Includes
20 ******************************************************************************/
21
22 #include "ethosu_driver.h"
23 #include "ethosu_device.h"
24 #include "ethosu_log.h"
25
26 #if defined(ETHOSU55)
27 #include "ethosu_config_u55.h"
28 #elif defined(ETHOSU65)
29 #include "ethosu_config_u65.h"
30 #elif defined(ETHOSU85)
31 #include "ethosu_config_u85.h"
32 #else
33 #error Missing device type macro
34 #endif
35
36 #include <assert.h>
37 #include <cmsis_compiler.h>
38 #include <inttypes.h>
39 #include <stdbool.h>
40 #include <stddef.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43
44 /******************************************************************************
45 * Defines
46 ******************************************************************************/
47
48 #define UNUSED(x) ((void)x)
49
50 #define BYTES_IN_32_BITS 4
51 #define MASK_16_BYTE_ALIGN (0xF)
52 #define OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD 2
53 #define DRIVER_ACTION_LENGTH_32_BIT_WORD 1
54 #define ETHOSU_FOURCC ('1' << 24 | 'P' << 16 | 'O' << 8 | 'C') // "Custom Operator Payload 1"
55
56 #define SCRATCH_BASE_ADDR_INDEX 1
57 #define FAST_MEMORY_BASE_ADDR_INDEX 2
58
59 /******************************************************************************
60 * Types
61 ******************************************************************************/
62
63 // Driver actions
64 enum DRIVER_ACTION_e
65 {
66 RESERVED = 0,
67 OPTIMIZER_CONFIG = 1,
68 COMMAND_STREAM = 2,
69 NOP = 5,
70 };
71
72 // Custom operator payload data struct
73 struct cop_data_s
74 {
75 union
76 {
77 // Driver action data
78 struct
79 {
80 uint8_t driver_action_command; // (valid values in DRIVER_ACTION_e)
81 uint8_t reserved;
82
83 // Driver action data
84 union
85 {
86 // DA_CMD_OPT_CFG
87 struct
88 {
89 uint16_t rel_nbr : 4;
90 uint16_t patch_nbr : 4;
91 uint16_t opt_cfg_reserved : 8;
92 };
93
94 // DA_CMD_CMSTRM
95 struct
96 {
97 uint16_t length;
98 };
99
100 uint16_t driver_action_data;
101 };
102 };
103
104 uint32_t word;
105 };
106 };
107
108 // optimizer config struct
109 struct opt_cfg_s
110 {
111 struct cop_data_s da_data;
112 uint32_t cfg;
113 uint32_t id;
114 };
115
116 /******************************************************************************
117 * Variables
118 ******************************************************************************/
119
120 // Registered drivers linked list HEAD
121 static struct ethosu_driver *registered_drivers = NULL;
122
123 /******************************************************************************
124 * Weak functions - Cache
125 *
126 * Default NOP operations. Override if available on the targeted device.
127 ******************************************************************************/
128
129 /*
130 * Flush/clean the data cache by address and size. Passing NULL as p argument
131 * expects the whole cache to be flushed.
132 */
ethosu_flush_dcache(uint32_t * p,size_t bytes)133 void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes)
134 {
135 UNUSED(p);
136 UNUSED(bytes);
137 }
138
139 /*
140 * Invalidate the data cache by address and size. Passing NULL as p argument
141 * expects the whole cache to be invalidated.
142 */
ethosu_invalidate_dcache(uint32_t * p,size_t bytes)143 void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes)
144 {
145 UNUSED(p);
146 UNUSED(bytes);
147 }
148
149 /******************************************************************************
150 * Weak functions - Semaphore/Mutex for multi NPU
151 *
152 * Following section handles the minimal sempahore and mutex implementation in
153 * case of baremetal applications. Weak symbols will be overridden by RTOS
154 * definitions and implement true thread-safety (in application layer).
155 ******************************************************************************/
156
157 struct ethosu_semaphore_t
158 {
159 uint8_t count;
160 };
161
162 static void *ethosu_mutex;
163 static void *ethosu_semaphore;
164
ethosu_mutex_create(void)165 void *__attribute__((weak)) ethosu_mutex_create(void)
166 {
167 static uint8_t mutex_placeholder;
168 return &mutex_placeholder;
169 }
170
ethosu_mutex_destroy(void * mutex)171 void __attribute__((weak)) ethosu_mutex_destroy(void *mutex)
172 {
173 UNUSED(mutex);
174 }
175
ethosu_mutex_lock(void * mutex)176 int __attribute__((weak)) ethosu_mutex_lock(void *mutex)
177 {
178 UNUSED(mutex);
179 return 0;
180 }
181
ethosu_mutex_unlock(void * mutex)182 int __attribute__((weak)) ethosu_mutex_unlock(void *mutex)
183 {
184 UNUSED(mutex);
185 return 0;
186 }
187
188 // Baremetal implementation of creating a semaphore
ethosu_semaphore_create(void)189 void *__attribute__((weak)) ethosu_semaphore_create(void)
190 {
191 struct ethosu_semaphore_t *sem = malloc(sizeof(*sem));
192 if (sem != NULL)
193 {
194 sem->count = 0;
195 }
196 return sem;
197 }
198
ethosu_semaphore_destroy(void * sem)199 void __attribute__((weak)) ethosu_semaphore_destroy(void *sem)
200 {
201 free((struct ethosu_semaphore_t *)sem);
202 }
203
204 // Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics
ethosu_semaphore_take(void * sem,uint64_t timeout)205 int __attribute__((weak)) ethosu_semaphore_take(void *sem, uint64_t timeout)
206 {
207 UNUSED(timeout);
208 // Baremetal pseudo-example on how to trigger a timeout:
209 // if (timeout != ETHOSU_SEMAPHORE_WAIT_FOREVER) {
210 // setup_a_timer_to_call_SEV_after_time(timeout);
211 // }
212 struct ethosu_semaphore_t *s = sem;
213 while (s->count == 0)
214 {
215 __WFE();
216 // Baremetal pseudo-example check if timeout triggered:
217 // if (SEV_timer_triggered()) {
218 // return -1;
219 // }
220 }
221 s->count--;
222 return 0;
223 }
224
225 // Baremetal simulation of giving a semaphore and waking up processes using intrinsics
ethosu_semaphore_give(void * sem)226 int __attribute__((weak)) ethosu_semaphore_give(void *sem)
227 {
228 struct ethosu_semaphore_t *s = sem;
229 s->count++;
230 __SEV();
231 return 0;
232 }
233
234 /******************************************************************************
235 * Weak functions - Inference begin/end callbacks
236 ******************************************************************************/
237
ethosu_inference_begin(struct ethosu_driver * drv,void * user_arg)238 void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg)
239 {
240 UNUSED(user_arg);
241 UNUSED(drv);
242 }
243
ethosu_inference_end(struct ethosu_driver * drv,void * user_arg)244 void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, void *user_arg)
245 {
246 UNUSED(user_arg);
247 UNUSED(drv);
248 }
249
250 /******************************************************************************
251 * Static functions
252 ******************************************************************************/
ethosu_register_driver(struct ethosu_driver * drv)253 static void ethosu_register_driver(struct ethosu_driver *drv)
254 {
255 ethosu_mutex_lock(ethosu_mutex);
256 drv->next = registered_drivers;
257 registered_drivers = drv;
258 ethosu_mutex_unlock(ethosu_mutex);
259
260 ethosu_semaphore_give(ethosu_semaphore);
261
262 LOG_INFO("New NPU driver registered (handle: 0x%p, NPU: 0x%p)", drv, drv->dev.reg);
263 }
264
ethosu_deregister_driver(struct ethosu_driver * drv)265 static int ethosu_deregister_driver(struct ethosu_driver *drv)
266 {
267 struct ethosu_driver *curr;
268 struct ethosu_driver **prev;
269
270 ethosu_mutex_lock(ethosu_mutex);
271 curr = registered_drivers;
272 prev = ®istered_drivers;
273
274 while (curr != NULL)
275 {
276 if (curr == drv)
277 {
278 *prev = curr->next;
279 LOG_INFO("NPU driver handle %p deregistered.", drv);
280 ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER);
281 break;
282 }
283
284 prev = &curr->next;
285 curr = curr->next;
286 }
287
288 ethosu_mutex_unlock(ethosu_mutex);
289
290 if (curr == NULL)
291 {
292 LOG_ERR("No NPU driver handle registered at address %p.", drv);
293 return -1;
294 }
295
296 return 0;
297 }
298
ethosu_reset_job(struct ethosu_driver * drv)299 static void ethosu_reset_job(struct ethosu_driver *drv)
300 {
301 memset(&drv->job, 0, sizeof(struct ethosu_job));
302 }
303
handle_optimizer_config(struct ethosu_driver * drv,struct opt_cfg_s const * opt_cfg_p)304 static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s const *opt_cfg_p)
305 {
306 LOG_INFO("Optimizer release nbr: %u patch: %u", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr);
307
308 if (ethosu_dev_verify_optimizer_config(&drv->dev, opt_cfg_p->cfg, opt_cfg_p->id) != true)
309 {
310 return -1;
311 }
312
313 return 0;
314 }
315
handle_command_stream(struct ethosu_driver * drv,const uint8_t * cmd_stream,const int cms_length)316 static int handle_command_stream(struct ethosu_driver *drv, const uint8_t *cmd_stream, const int cms_length)
317 {
318 uint32_t cms_bytes = cms_length * BYTES_IN_32_BITS;
319
320 LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d", cmd_stream, cms_length);
321
322 if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN))
323 {
324 LOG_ERR("Command stream addr %p not aligned to 16 bytes", cmd_stream);
325 return -1;
326 }
327
328 // Verify minimum 16 byte alignment for base address'
329 for (int i = 0; i < drv->job.num_base_addr; i++)
330 {
331 if (0 != (drv->job.base_addr[i] & MASK_16_BYTE_ALIGN))
332 {
333 LOG_ERR("Base addr %d: 0x%" PRIx64 "not aligned to 16 bytes", i, drv->job.base_addr[i]);
334 return -1;
335 }
336 }
337
338 // DEPRECATION WARNING:
339 // It is advised against letting the driver handle flushing/cleaning of the cache, as this will
340 // be done for every invokation. It is up to the application code to ensure cache coherency
341 // before invoking an inference.
342 //
343 // The cache flush call below will flush/clean every base pointer marked in the flush mask.
344 // Typically only the scratch tensor contains RW data shared between the CPU and NPU, and needs
345 // to be flushed/cleaned before invoking an inference.
346 //
347 // It is recommended to not implement/override the default empty ethosu_flush_dcache() weak
348 // function.
349 //
350 // NOTE: It is required that any base pointer marked for cache flush/clean is aligned to the
351 // cache line size.
352
353 // Flush/clean the cache for base pointers marked in the mask
354 for (int i = 0; i < drv->job.num_base_addr; i++)
355 {
356 if (drv->basep_flush_mask & (1 << i))
357 {
358 ethosu_flush_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
359 }
360 }
361
362 // Request power gating disabled during inference run
363 if (ethosu_request_power(drv))
364 {
365 LOG_ERR("Failed to request power");
366 return -1;
367 }
368
369 drv->job.state = ETHOSU_JOB_RUNNING;
370
371 // Inference begin callback
372 ethosu_inference_begin(drv, drv->job.user_arg);
373
374 // Execute the command stream
375 ethosu_dev_run_command_stream(&drv->dev, cmd_stream, cms_bytes, drv->job.base_addr, drv->job.num_base_addr);
376
377 return 0;
378 }
379
380 /******************************************************************************
381 * Weak functions - Interrupt handler
382 ******************************************************************************/
ethosu_irq_handler(struct ethosu_driver * drv)383 void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv)
384 {
385 // Prevent race condition where interrupt triggered after a timeout waiting
386 // for semaphore, but before NPU is reset.
387 if (drv->job.result == ETHOSU_JOB_RESULT_TIMEOUT)
388 {
389 return;
390 }
391
392 drv->job.state = ETHOSU_JOB_DONE;
393 drv->job.result = ethosu_dev_handle_interrupt(&drv->dev) ? ETHOSU_JOB_RESULT_OK : ETHOSU_JOB_RESULT_ERROR;
394 ethosu_semaphore_give(drv->semaphore);
395 }
396
397 /******************************************************************************
398 * Functions API
399 ******************************************************************************/
400
ethosu_set_basep_cache_mask(struct ethosu_driver * drv,uint8_t flush_mask,uint8_t invalidate_mask)401 void ethosu_set_basep_cache_mask(struct ethosu_driver *drv, uint8_t flush_mask, uint8_t invalidate_mask)
402 {
403 drv->basep_flush_mask = flush_mask;
404 drv->basep_invalidate_mask = invalidate_mask;
405 }
406
ethosu_init(struct ethosu_driver * drv,void * const base_address,const void * fast_memory,const size_t fast_memory_size,uint32_t secure_enable,uint32_t privilege_enable)407 int ethosu_init(struct ethosu_driver *drv,
408 void *const base_address,
409 const void *fast_memory,
410 const size_t fast_memory_size,
411 uint32_t secure_enable,
412 uint32_t privilege_enable)
413 {
414 LOG_INFO("Initializing NPU: base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32
415 ", privileged=%" PRIu32,
416 base_address,
417 fast_memory,
418 fast_memory_size,
419 secure_enable,
420 privilege_enable);
421
422 if (!ethosu_mutex)
423 {
424 ethosu_mutex = ethosu_mutex_create();
425 if (!ethosu_mutex)
426 {
427 LOG_ERR("Failed to create global driver mutex");
428 return -1;
429 }
430 }
431
432 if (!ethosu_semaphore)
433 {
434 ethosu_semaphore = ethosu_semaphore_create();
435 if (!ethosu_semaphore)
436 {
437 LOG_ERR("Failed to create global driver semaphore");
438 return -1;
439 }
440 }
441
442 drv->fast_memory = (uintptr_t)fast_memory;
443 drv->fast_memory_size = fast_memory_size;
444 drv->power_request_counter = 0;
445
446 // Set default cache flush/clean and invalidate base pointer masks to invalidate the scratch
447 // base pointer where Vela for TFLM is placing the scratch buffer (tensor arena)
448 ethosu_set_basep_cache_mask(drv, (1 << SCRATCH_BASE_ADDR_INDEX), (1 << SCRATCH_BASE_ADDR_INDEX));
449
450 // Initialize the device and set requested security state and privilege mode
451 if (!ethosu_dev_init(&drv->dev, base_address, secure_enable, privilege_enable))
452 {
453 LOG_ERR("Failed to initialize Ethos-U device");
454 return -1;
455 }
456
457 drv->semaphore = ethosu_semaphore_create();
458 if (!drv->semaphore)
459 {
460 LOG_ERR("Failed to create driver semaphore");
461 return -1;
462 }
463
464 ethosu_reset_job(drv);
465 ethosu_register_driver(drv);
466
467 return 0;
468 }
469
ethosu_deinit(struct ethosu_driver * drv)470 void ethosu_deinit(struct ethosu_driver *drv)
471 {
472 ethosu_deregister_driver(drv);
473 ethosu_semaphore_destroy(drv->semaphore);
474 }
475
ethosu_soft_reset(struct ethosu_driver * drv)476 int ethosu_soft_reset(struct ethosu_driver *drv)
477 {
478 // Soft reset the NPU
479 if (ethosu_dev_soft_reset(&drv->dev) != ETHOSU_SUCCESS)
480 {
481 LOG_ERR("Failed to soft-reset NPU");
482 return -1;
483 }
484
485 // Update power and clock gating after the soft reset
486 ethosu_dev_set_clock_and_power(&drv->dev,
487 drv->power_request_counter > 0 ? ETHOSU_CLOCK_Q_DISABLE : ETHOSU_CLOCK_Q_ENABLE,
488 drv->power_request_counter > 0 ? ETHOSU_POWER_Q_DISABLE : ETHOSU_POWER_Q_ENABLE);
489
490 return 0;
491 }
492
ethosu_request_power(struct ethosu_driver * drv)493 int ethosu_request_power(struct ethosu_driver *drv)
494 {
495 // Check if this is the first power request, increase counter
496 if (drv->power_request_counter++ == 0)
497 {
498 // Always reset to a known state. Changes to requested
499 // security state/privilege mode if necessary.
500 if (ethosu_soft_reset(drv))
501 {
502 LOG_ERR("Failed to request power for Ethos-U");
503 drv->power_request_counter--;
504 return -1;
505 }
506 }
507 return 0;
508 }
509
ethosu_release_power(struct ethosu_driver * drv)510 void ethosu_release_power(struct ethosu_driver *drv)
511 {
512 if (drv->power_request_counter == 0)
513 {
514 LOG_WARN("No power request left to release, reference counter is 0");
515 }
516 else
517 {
518 // Decrement ref counter and enable power gating if no requests remain
519 if (--drv->power_request_counter == 0)
520 {
521 ethosu_dev_set_clock_and_power(&drv->dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
522 }
523 }
524 }
525
ethosu_get_driver_version(struct ethosu_driver_version * ver)526 void ethosu_get_driver_version(struct ethosu_driver_version *ver)
527 {
528 assert(ver != NULL);
529 ver->major = ETHOSU_DRIVER_VERSION_MAJOR;
530 ver->minor = ETHOSU_DRIVER_VERSION_MINOR;
531 ver->patch = ETHOSU_DRIVER_VERSION_PATCH;
532 }
533
ethosu_get_hw_info(struct ethosu_driver * drv,struct ethosu_hw_info * hw)534 void ethosu_get_hw_info(struct ethosu_driver *drv, struct ethosu_hw_info *hw)
535 {
536 assert(hw != NULL);
537 ethosu_dev_get_hw_info(&drv->dev, hw);
538 }
539
ethosu_wait(struct ethosu_driver * drv,bool block)540 int ethosu_wait(struct ethosu_driver *drv, bool block)
541 {
542 int ret = 0;
543
544 switch (drv->job.state)
545 {
546 case ETHOSU_JOB_IDLE:
547 LOG_ERR("Inference job not running...");
548 ret = -2;
549 break;
550 case ETHOSU_JOB_RUNNING:
551 if (!block)
552 {
553 // Inference still running, do not block
554 ret = 1;
555 break;
556 }
557 // fall through
558 case ETHOSU_JOB_DONE:
559 // Invalidate cache for base pointers marked to be invalidated, typically the
560 // scratch tensor base pointer containing the tensor arena.
561 // NOTE: Requires the base pointers to be cache line size aligned.
562 for (int i = 0; i < drv->job.num_base_addr; i++)
563 {
564 if (drv->basep_invalidate_mask & (1 << i))
565 {
566 ethosu_invalidate_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
567 }
568 }
569
570 // Wait for interrupt in blocking mode. In non-blocking mode
571 // the interrupt has already triggered
572 ret = ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
573 if (ret < 0)
574 {
575 drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT;
576
577 // There's a race where the NPU interrupt can have fired between semaphore
578 // timing out and setting the result above (checked in interrupt handler).
579 // By checking if the job state has been changed (only set to DONE by interrupt
580 // handler), we know if the interrupt handler has run, if so decrement the
581 // semaphore count by one (given in interrupt handler).
582 if (drv->job.state == ETHOSU_JOB_DONE)
583 {
584 drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT; // Reset back to timeout
585 ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
586 }
587 }
588
589 // Inference done callback - always called even in case of timeout
590 ethosu_inference_end(drv, drv->job.user_arg);
591
592 // Release power gating disabled requirement
593 ethosu_release_power(drv);
594
595 // Check NPU and interrupt status
596 if (drv->job.result)
597 {
598 if (drv->job.result == ETHOSU_JOB_RESULT_ERROR)
599 {
600 LOG_ERR("NPU error(s) occured during inference.");
601 ethosu_dev_print_err_status(&drv->dev);
602 }
603 else
604 {
605 LOG_ERR("NPU inference timed out.");
606 }
607
608 // Reset the NPU
609 (void)ethosu_soft_reset(drv);
610
611 ret = -1;
612 }
613 else
614 {
615 LOG_DEBUG("Inference finished successfully...");
616 ret = 0;
617 }
618
619 // Reset internal job (state resets to IDLE)
620 ethosu_reset_job(drv);
621 break;
622
623 default:
624 LOG_ERR("Unexpected job state");
625 ethosu_reset_job(drv);
626 ret = -1;
627 break;
628 }
629
630 // Return inference job status
631 return ret;
632 }
633
ethosu_invoke_async(struct ethosu_driver * drv,const void * custom_data_ptr,const int custom_data_size,uint64_t * const base_addr,const size_t * base_addr_size,const int num_base_addr,void * user_arg)634 int ethosu_invoke_async(struct ethosu_driver *drv,
635 const void *custom_data_ptr,
636 const int custom_data_size,
637 uint64_t *const base_addr,
638 const size_t *base_addr_size,
639 const int num_base_addr,
640 void *user_arg)
641 {
642 assert(custom_data_ptr != NULL);
643 assert(base_addr != NULL);
644 assert(base_addr_size != NULL);
645
646 const struct cop_data_s *data_ptr = custom_data_ptr;
647 const struct cop_data_s *data_end = (struct cop_data_s *)((ptrdiff_t)custom_data_ptr + custom_data_size);
648
649 // Make sure an inference is not already running
650 if (drv->job.state != ETHOSU_JOB_IDLE)
651 {
652 LOG_ERR("Inference already running, or waiting to be cleared...");
653 return -1;
654 }
655
656 drv->job.state = ETHOSU_JOB_IDLE;
657 drv->job.custom_data_ptr = custom_data_ptr;
658 drv->job.custom_data_size = custom_data_size;
659 drv->job.base_addr = base_addr;
660 drv->job.base_addr_size = base_addr_size;
661 drv->job.num_base_addr = num_base_addr;
662 drv->job.user_arg = user_arg;
663
664 // First word in custom_data_ptr should contain "Custom Operator Payload 1"
665 if (data_ptr->word != ETHOSU_FOURCC)
666 {
667 LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x", data_ptr->word, ETHOSU_FOURCC);
668 goto err;
669 }
670
671 // Custom data length must be a multiple of 32 bits
672 if ((custom_data_size % BYTES_IN_32_BITS) != 0)
673 {
674 LOG_ERR("custom_data_size=0x%x not a multiple of 4", (unsigned)custom_data_size);
675 goto err;
676 }
677
678 data_ptr++;
679
680 // Adjust base address to fast memory area
681 if (drv->fast_memory != 0 && num_base_addr > FAST_MEMORY_BASE_ADDR_INDEX)
682 {
683 if (base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size)
684 {
685 LOG_ERR("Fast memory area too small. fast_memory_size=%zu, base_addr_size=%zu",
686 drv->fast_memory_size,
687 base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]);
688 goto err;
689 }
690
691 base_addr[FAST_MEMORY_BASE_ADDR_INDEX] = drv->fast_memory;
692 }
693
694 // Parse Custom Operator Payload data
695 while (data_ptr < data_end)
696 {
697 switch (data_ptr->driver_action_command)
698 {
699 case OPTIMIZER_CONFIG:
700 LOG_DEBUG("OPTIMIZER_CONFIG");
701 struct opt_cfg_s const *opt_cfg_p = (const struct opt_cfg_s *)data_ptr;
702
703 if (handle_optimizer_config(drv, opt_cfg_p) < 0)
704 {
705 goto err;
706 }
707 data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD;
708 break;
709 case COMMAND_STREAM:
710 // Vela only supports putting one COMMAND_STREAM per op
711 LOG_DEBUG("COMMAND_STREAM");
712 const uint8_t *command_stream = (const uint8_t *)(data_ptr + 1);
713 int cms_length = (data_ptr->reserved << 16) | data_ptr->length;
714
715 if (handle_command_stream(drv, command_stream, cms_length) < 0)
716 {
717 goto err;
718 }
719 data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length;
720 break;
721 case NOP:
722 LOG_DEBUG("NOP");
723 data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
724 break;
725 default:
726 LOG_ERR("UNSUPPORTED driver_action_command: %u", data_ptr->driver_action_command);
727 goto err;
728 break;
729 }
730 }
731
732 return 0;
733 err:
734 LOG_ERR("Failed to invoke inference.");
735 ethosu_reset_job(drv);
736 return -1;
737 }
738
ethosu_invoke_v3(struct ethosu_driver * drv,const void * custom_data_ptr,const int custom_data_size,uint64_t * const base_addr,const size_t * base_addr_size,const int num_base_addr,void * user_arg)739 int ethosu_invoke_v3(struct ethosu_driver *drv,
740 const void *custom_data_ptr,
741 const int custom_data_size,
742 uint64_t *const base_addr,
743 const size_t *base_addr_size,
744 const int num_base_addr,
745 void *user_arg)
746 {
747 if (ethosu_invoke_async(
748 drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, user_arg) < 0)
749 {
750 return -1;
751 }
752
753 return ethosu_wait(drv, true);
754 }
755
ethosu_reserve_driver(void)756 struct ethosu_driver *ethosu_reserve_driver(void)
757 {
758 struct ethosu_driver *drv = NULL;
759
760 LOG_INFO("Acquiring NPU driver handle");
761 ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER); // This is meant to block until available
762
763 ethosu_mutex_lock(ethosu_mutex);
764 drv = registered_drivers;
765
766 while (drv != NULL)
767 {
768 if (!drv->reserved)
769 {
770 drv->reserved = true;
771 LOG_DEBUG("NPU driver handle %p reserved", drv);
772 break;
773 }
774 drv = drv->next;
775 }
776 ethosu_mutex_unlock(ethosu_mutex);
777
778 if (!drv)
779 {
780 LOG_ERR("No NPU driver handle available, but semaphore taken");
781 }
782
783 return drv;
784 }
785
ethosu_release_driver(struct ethosu_driver * drv)786 void ethosu_release_driver(struct ethosu_driver *drv)
787 {
788 ethosu_mutex_lock(ethosu_mutex);
789 if (drv != NULL && drv->reserved)
790 {
791 if (drv->job.state == ETHOSU_JOB_RUNNING || drv->job.state == ETHOSU_JOB_DONE)
792 {
793 // Give the inference one shot to complete or force kill the job
794 if (ethosu_wait(drv, false) == 1)
795 {
796 // Still running, soft reset the NPU and reset driver
797 drv->power_request_counter = 0;
798 ethosu_soft_reset(drv);
799 ethosu_reset_job(drv);
800 }
801 }
802
803 drv->reserved = false;
804 LOG_DEBUG("NPU driver handle %p released", drv);
805 ethosu_semaphore_give(ethosu_semaphore);
806 }
807 ethosu_mutex_unlock(ethosu_mutex);
808 }
809