1 /*
2  * SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
3  * SPDX-License-Identifier: Apache-2.0
4  *
5  * Licensed under the Apache License, Version 2.0 (the License); you may
6  * not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
13  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /******************************************************************************
19  * Includes
20  ******************************************************************************/
21 
22 #include "ethosu_driver.h"
23 #include "ethosu_device.h"
24 #include "ethosu_log.h"
25 
26 #if defined(ETHOSU55)
27 #include "ethosu_config_u55.h"
28 #elif defined(ETHOSU65)
29 #include "ethosu_config_u65.h"
30 #elif defined(ETHOSU85)
31 #include "ethosu_config_u85.h"
32 #else
33 #error Missing device type macro
34 #endif
35 
36 #include <assert.h>
37 #include <cmsis_compiler.h>
38 #include <inttypes.h>
39 #include <stdbool.h>
40 #include <stddef.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 
44 /******************************************************************************
45  * Defines
46  ******************************************************************************/
47 
48 #define UNUSED(x) ((void)x)
49 
50 #define BYTES_IN_32_BITS 4
51 #define MASK_16_BYTE_ALIGN (0xF)
52 #define OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD 2
53 #define DRIVER_ACTION_LENGTH_32_BIT_WORD 1
54 #define ETHOSU_FOURCC ('1' << 24 | 'P' << 16 | 'O' << 8 | 'C') // "Custom Operator Payload 1"
55 
56 #define SCRATCH_BASE_ADDR_INDEX 1
57 #define FAST_MEMORY_BASE_ADDR_INDEX 2
58 
59 /******************************************************************************
60  * Types
61  ******************************************************************************/
62 
63 // Driver actions
64 enum DRIVER_ACTION_e
65 {
66     RESERVED         = 0,
67     OPTIMIZER_CONFIG = 1,
68     COMMAND_STREAM   = 2,
69     NOP              = 5,
70 };
71 
72 // Custom operator payload data struct
73 struct cop_data_s
74 {
75     union
76     {
77         // Driver action data
78         struct
79         {
80             uint8_t driver_action_command; // (valid values in DRIVER_ACTION_e)
81             uint8_t reserved;
82 
83             // Driver action data
84             union
85             {
86                 // DA_CMD_OPT_CFG
87                 struct
88                 {
89                     uint16_t rel_nbr : 4;
90                     uint16_t patch_nbr : 4;
91                     uint16_t opt_cfg_reserved : 8;
92                 };
93 
94                 // DA_CMD_CMSTRM
95                 struct
96                 {
97                     uint16_t length;
98                 };
99 
100                 uint16_t driver_action_data;
101             };
102         };
103 
104         uint32_t word;
105     };
106 };
107 
108 // optimizer config struct
109 struct opt_cfg_s
110 {
111     struct cop_data_s da_data;
112     uint32_t cfg;
113     uint32_t id;
114 };
115 
116 /******************************************************************************
117  * Variables
118  ******************************************************************************/
119 
120 // Registered drivers linked list HEAD
121 static struct ethosu_driver *registered_drivers = NULL;
122 
123 /******************************************************************************
124  * Weak functions - Cache
125  *
126  * Default NOP operations. Override if available on the targeted device.
127  ******************************************************************************/
128 
129 /*
130  * Flush/clean the data cache by address and size. Passing NULL as p argument
131  * expects the whole cache to be flushed.
132  */
ethosu_flush_dcache(uint32_t * p,size_t bytes)133 void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes)
134 {
135     UNUSED(p);
136     UNUSED(bytes);
137 }
138 
139 /*
140  * Invalidate the data cache by address and size. Passing NULL as p argument
141  * expects the whole cache to be invalidated.
142  */
ethosu_invalidate_dcache(uint32_t * p,size_t bytes)143 void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes)
144 {
145     UNUSED(p);
146     UNUSED(bytes);
147 }
148 
149 /******************************************************************************
150  * Weak functions - Semaphore/Mutex for multi NPU
151  *
152  * Following section handles the minimal sempahore and mutex implementation in
153  * case of baremetal applications. Weak symbols will be overridden by RTOS
154  * definitions and implement true thread-safety (in application layer).
155  ******************************************************************************/
156 
157 struct ethosu_semaphore_t
158 {
159     uint8_t count;
160 };
161 
162 static void *ethosu_mutex;
163 static void *ethosu_semaphore;
164 
ethosu_mutex_create(void)165 void *__attribute__((weak)) ethosu_mutex_create(void)
166 {
167     static uint8_t mutex_placeholder;
168     return &mutex_placeholder;
169 }
170 
ethosu_mutex_destroy(void * mutex)171 void __attribute__((weak)) ethosu_mutex_destroy(void *mutex)
172 {
173     UNUSED(mutex);
174 }
175 
ethosu_mutex_lock(void * mutex)176 int __attribute__((weak)) ethosu_mutex_lock(void *mutex)
177 {
178     UNUSED(mutex);
179     return 0;
180 }
181 
ethosu_mutex_unlock(void * mutex)182 int __attribute__((weak)) ethosu_mutex_unlock(void *mutex)
183 {
184     UNUSED(mutex);
185     return 0;
186 }
187 
188 // Baremetal implementation of creating a semaphore
ethosu_semaphore_create(void)189 void *__attribute__((weak)) ethosu_semaphore_create(void)
190 {
191     struct ethosu_semaphore_t *sem = malloc(sizeof(*sem));
192     if (sem != NULL)
193     {
194         sem->count = 0;
195     }
196     return sem;
197 }
198 
ethosu_semaphore_destroy(void * sem)199 void __attribute__((weak)) ethosu_semaphore_destroy(void *sem)
200 {
201     free((struct ethosu_semaphore_t *)sem);
202 }
203 
204 // Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics
ethosu_semaphore_take(void * sem,uint64_t timeout)205 int __attribute__((weak)) ethosu_semaphore_take(void *sem, uint64_t timeout)
206 {
207     UNUSED(timeout);
208     // Baremetal pseudo-example on how to trigger a timeout:
209     // if (timeout != ETHOSU_SEMAPHORE_WAIT_FOREVER) {
210     //     setup_a_timer_to_call_SEV_after_time(timeout);
211     // }
212     struct ethosu_semaphore_t *s = sem;
213     while (s->count == 0)
214     {
215         __WFE();
216         // Baremetal pseudo-example check if timeout triggered:
217         // if (SEV_timer_triggered()) {
218         //     return -1;
219         // }
220     }
221     s->count--;
222     return 0;
223 }
224 
225 // Baremetal simulation of giving a semaphore and waking up processes using intrinsics
ethosu_semaphore_give(void * sem)226 int __attribute__((weak)) ethosu_semaphore_give(void *sem)
227 {
228     struct ethosu_semaphore_t *s = sem;
229     s->count++;
230     __SEV();
231     return 0;
232 }
233 
234 /******************************************************************************
235  * Weak functions - Inference begin/end callbacks
236  ******************************************************************************/
237 
ethosu_inference_begin(struct ethosu_driver * drv,void * user_arg)238 void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg)
239 {
240     UNUSED(user_arg);
241     UNUSED(drv);
242 }
243 
ethosu_inference_end(struct ethosu_driver * drv,void * user_arg)244 void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, void *user_arg)
245 {
246     UNUSED(user_arg);
247     UNUSED(drv);
248 }
249 
250 /******************************************************************************
251  * Static functions
252  ******************************************************************************/
ethosu_register_driver(struct ethosu_driver * drv)253 static void ethosu_register_driver(struct ethosu_driver *drv)
254 {
255     ethosu_mutex_lock(ethosu_mutex);
256     drv->next          = registered_drivers;
257     registered_drivers = drv;
258     ethosu_mutex_unlock(ethosu_mutex);
259 
260     ethosu_semaphore_give(ethosu_semaphore);
261 
262     LOG_INFO("New NPU driver registered (handle: 0x%p, NPU: 0x%p)", drv, drv->dev.reg);
263 }
264 
ethosu_deregister_driver(struct ethosu_driver * drv)265 static int ethosu_deregister_driver(struct ethosu_driver *drv)
266 {
267     struct ethosu_driver *curr;
268     struct ethosu_driver **prev;
269 
270     ethosu_mutex_lock(ethosu_mutex);
271     curr = registered_drivers;
272     prev = &registered_drivers;
273 
274     while (curr != NULL)
275     {
276         if (curr == drv)
277         {
278             *prev = curr->next;
279             LOG_INFO("NPU driver handle %p deregistered.", drv);
280             ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER);
281             break;
282         }
283 
284         prev = &curr->next;
285         curr = curr->next;
286     }
287 
288     ethosu_mutex_unlock(ethosu_mutex);
289 
290     if (curr == NULL)
291     {
292         LOG_ERR("No NPU driver handle registered at address %p.", drv);
293         return -1;
294     }
295 
296     return 0;
297 }
298 
ethosu_reset_job(struct ethosu_driver * drv)299 static void ethosu_reset_job(struct ethosu_driver *drv)
300 {
301     memset(&drv->job, 0, sizeof(struct ethosu_job));
302 }
303 
handle_optimizer_config(struct ethosu_driver * drv,struct opt_cfg_s const * opt_cfg_p)304 static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s const *opt_cfg_p)
305 {
306     LOG_INFO("Optimizer release nbr: %u patch: %u", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr);
307 
308     if (ethosu_dev_verify_optimizer_config(&drv->dev, opt_cfg_p->cfg, opt_cfg_p->id) != true)
309     {
310         return -1;
311     }
312 
313     return 0;
314 }
315 
handle_command_stream(struct ethosu_driver * drv,const uint8_t * cmd_stream,const int cms_length)316 static int handle_command_stream(struct ethosu_driver *drv, const uint8_t *cmd_stream, const int cms_length)
317 {
318     uint32_t cms_bytes = cms_length * BYTES_IN_32_BITS;
319 
320     LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d", cmd_stream, cms_length);
321 
322     if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN))
323     {
324         LOG_ERR("Command stream addr %p not aligned to 16 bytes", cmd_stream);
325         return -1;
326     }
327 
328     // Verify minimum 16 byte alignment for base address'
329     for (int i = 0; i < drv->job.num_base_addr; i++)
330     {
331         if (0 != (drv->job.base_addr[i] & MASK_16_BYTE_ALIGN))
332         {
333             LOG_ERR("Base addr %d: 0x%" PRIx64 "not aligned to 16 bytes", i, drv->job.base_addr[i]);
334             return -1;
335         }
336     }
337 
338     // DEPRECATION WARNING:
339     // It is advised against letting the driver handle flushing/cleaning of the cache, as this will
340     // be done for every invokation. It is up to the application code to ensure cache coherency
341     // before invoking an inference.
342     //
343     // The cache flush call below will flush/clean every base pointer marked in the flush mask.
344     // Typically only the scratch tensor contains RW data shared between the CPU and NPU, and needs
345     // to be flushed/cleaned before invoking an inference.
346     //
347     // It is recommended to not implement/override the default empty ethosu_flush_dcache() weak
348     // function.
349     //
350     // NOTE: It is required that any base pointer marked for cache flush/clean is aligned to the
351     // cache line size.
352 
353     // Flush/clean the cache for base pointers marked in the mask
354     for (int i = 0; i < drv->job.num_base_addr; i++)
355     {
356         if (drv->basep_flush_mask & (1 << i))
357         {
358             ethosu_flush_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
359         }
360     }
361 
362     // Request power gating disabled during inference run
363     if (ethosu_request_power(drv))
364     {
365         LOG_ERR("Failed to request power");
366         return -1;
367     }
368 
369     drv->job.state = ETHOSU_JOB_RUNNING;
370 
371     // Inference begin callback
372     ethosu_inference_begin(drv, drv->job.user_arg);
373 
374     // Execute the command stream
375     ethosu_dev_run_command_stream(&drv->dev, cmd_stream, cms_bytes, drv->job.base_addr, drv->job.num_base_addr);
376 
377     return 0;
378 }
379 
380 /******************************************************************************
381  * Weak functions - Interrupt handler
382  ******************************************************************************/
ethosu_irq_handler(struct ethosu_driver * drv)383 void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv)
384 {
385     // Prevent race condition where interrupt triggered after a timeout waiting
386     // for semaphore, but before NPU is reset.
387     if (drv->job.result == ETHOSU_JOB_RESULT_TIMEOUT)
388     {
389         return;
390     }
391 
392     drv->job.state  = ETHOSU_JOB_DONE;
393     drv->job.result = ethosu_dev_handle_interrupt(&drv->dev) ? ETHOSU_JOB_RESULT_OK : ETHOSU_JOB_RESULT_ERROR;
394     ethosu_semaphore_give(drv->semaphore);
395 }
396 
397 /******************************************************************************
398  * Functions API
399  ******************************************************************************/
400 
ethosu_set_basep_cache_mask(struct ethosu_driver * drv,uint8_t flush_mask,uint8_t invalidate_mask)401 void ethosu_set_basep_cache_mask(struct ethosu_driver *drv, uint8_t flush_mask, uint8_t invalidate_mask)
402 {
403     drv->basep_flush_mask      = flush_mask;
404     drv->basep_invalidate_mask = invalidate_mask;
405 }
406 
ethosu_init(struct ethosu_driver * drv,void * const base_address,const void * fast_memory,const size_t fast_memory_size,uint32_t secure_enable,uint32_t privilege_enable)407 int ethosu_init(struct ethosu_driver *drv,
408                 void *const base_address,
409                 const void *fast_memory,
410                 const size_t fast_memory_size,
411                 uint32_t secure_enable,
412                 uint32_t privilege_enable)
413 {
414     LOG_INFO("Initializing NPU: base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32
415              ", privileged=%" PRIu32,
416              base_address,
417              fast_memory,
418              fast_memory_size,
419              secure_enable,
420              privilege_enable);
421 
422     if (!ethosu_mutex)
423     {
424         ethosu_mutex = ethosu_mutex_create();
425         if (!ethosu_mutex)
426         {
427             LOG_ERR("Failed to create global driver mutex");
428             return -1;
429         }
430     }
431 
432     if (!ethosu_semaphore)
433     {
434         ethosu_semaphore = ethosu_semaphore_create();
435         if (!ethosu_semaphore)
436         {
437             LOG_ERR("Failed to create global driver semaphore");
438             return -1;
439         }
440     }
441 
442     drv->fast_memory           = (uintptr_t)fast_memory;
443     drv->fast_memory_size      = fast_memory_size;
444     drv->power_request_counter = 0;
445 
446     // Set default cache flush/clean and invalidate base pointer masks to invalidate the scratch
447     // base pointer where Vela for TFLM is placing the scratch buffer (tensor arena)
448     ethosu_set_basep_cache_mask(drv, (1 << SCRATCH_BASE_ADDR_INDEX), (1 << SCRATCH_BASE_ADDR_INDEX));
449 
450     // Initialize the device and set requested security state and privilege mode
451     if (!ethosu_dev_init(&drv->dev, base_address, secure_enable, privilege_enable))
452     {
453         LOG_ERR("Failed to initialize Ethos-U device");
454         return -1;
455     }
456 
457     drv->semaphore = ethosu_semaphore_create();
458     if (!drv->semaphore)
459     {
460         LOG_ERR("Failed to create driver semaphore");
461         return -1;
462     }
463 
464     ethosu_reset_job(drv);
465     ethosu_register_driver(drv);
466 
467     return 0;
468 }
469 
ethosu_deinit(struct ethosu_driver * drv)470 void ethosu_deinit(struct ethosu_driver *drv)
471 {
472     ethosu_deregister_driver(drv);
473     ethosu_semaphore_destroy(drv->semaphore);
474 }
475 
ethosu_soft_reset(struct ethosu_driver * drv)476 int ethosu_soft_reset(struct ethosu_driver *drv)
477 {
478     // Soft reset the NPU
479     if (ethosu_dev_soft_reset(&drv->dev) != ETHOSU_SUCCESS)
480     {
481         LOG_ERR("Failed to soft-reset NPU");
482         return -1;
483     }
484 
485     // Update power and clock gating after the soft reset
486     ethosu_dev_set_clock_and_power(&drv->dev,
487                                    drv->power_request_counter > 0 ? ETHOSU_CLOCK_Q_DISABLE : ETHOSU_CLOCK_Q_ENABLE,
488                                    drv->power_request_counter > 0 ? ETHOSU_POWER_Q_DISABLE : ETHOSU_POWER_Q_ENABLE);
489 
490     return 0;
491 }
492 
ethosu_request_power(struct ethosu_driver * drv)493 int ethosu_request_power(struct ethosu_driver *drv)
494 {
495     // Check if this is the first power request, increase counter
496     if (drv->power_request_counter++ == 0)
497     {
498         // Always reset to a known state. Changes to requested
499         // security state/privilege mode if necessary.
500         if (ethosu_soft_reset(drv))
501         {
502             LOG_ERR("Failed to request power for Ethos-U");
503             drv->power_request_counter--;
504             return -1;
505         }
506     }
507     return 0;
508 }
509 
ethosu_release_power(struct ethosu_driver * drv)510 void ethosu_release_power(struct ethosu_driver *drv)
511 {
512     if (drv->power_request_counter == 0)
513     {
514         LOG_WARN("No power request left to release, reference counter is 0");
515     }
516     else
517     {
518         // Decrement ref counter and enable power gating if no requests remain
519         if (--drv->power_request_counter == 0)
520         {
521             ethosu_dev_set_clock_and_power(&drv->dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
522         }
523     }
524 }
525 
ethosu_get_driver_version(struct ethosu_driver_version * ver)526 void ethosu_get_driver_version(struct ethosu_driver_version *ver)
527 {
528     assert(ver != NULL);
529     ver->major = ETHOSU_DRIVER_VERSION_MAJOR;
530     ver->minor = ETHOSU_DRIVER_VERSION_MINOR;
531     ver->patch = ETHOSU_DRIVER_VERSION_PATCH;
532 }
533 
ethosu_get_hw_info(struct ethosu_driver * drv,struct ethosu_hw_info * hw)534 void ethosu_get_hw_info(struct ethosu_driver *drv, struct ethosu_hw_info *hw)
535 {
536     assert(hw != NULL);
537     ethosu_dev_get_hw_info(&drv->dev, hw);
538 }
539 
ethosu_wait(struct ethosu_driver * drv,bool block)540 int ethosu_wait(struct ethosu_driver *drv, bool block)
541 {
542     int ret = 0;
543 
544     switch (drv->job.state)
545     {
546     case ETHOSU_JOB_IDLE:
547         LOG_ERR("Inference job not running...");
548         ret = -2;
549         break;
550     case ETHOSU_JOB_RUNNING:
551         if (!block)
552         {
553             // Inference still running, do not block
554             ret = 1;
555             break;
556         }
557         // fall through
558     case ETHOSU_JOB_DONE:
559         // Invalidate cache for base pointers marked to be invalidated, typically the
560         // scratch tensor base pointer containing the tensor arena.
561         // NOTE: Requires the base pointers to be cache line size aligned.
562         for (int i = 0; i < drv->job.num_base_addr; i++)
563         {
564             if (drv->basep_invalidate_mask & (1 << i))
565             {
566                 ethosu_invalidate_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
567             }
568         }
569 
570         // Wait for interrupt in blocking mode. In non-blocking mode
571         // the interrupt has already triggered
572         ret = ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
573         if (ret < 0)
574         {
575             drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT;
576 
577             // There's a race where the NPU interrupt can have fired between semaphore
578             // timing out and setting the result above (checked in interrupt handler).
579             // By checking if the job state has been changed (only set to DONE by interrupt
580             // handler), we know if the interrupt handler has run, if so decrement the
581             // semaphore count by one (given in interrupt handler).
582             if (drv->job.state == ETHOSU_JOB_DONE)
583             {
584                 drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT; // Reset back to timeout
585                 ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
586             }
587         }
588 
589         // Inference done callback - always called even in case of timeout
590         ethosu_inference_end(drv, drv->job.user_arg);
591 
592         // Release power gating disabled requirement
593         ethosu_release_power(drv);
594 
595         // Check NPU and interrupt status
596         if (drv->job.result)
597         {
598             if (drv->job.result == ETHOSU_JOB_RESULT_ERROR)
599             {
600                 LOG_ERR("NPU error(s) occured during inference.");
601                 ethosu_dev_print_err_status(&drv->dev);
602             }
603             else
604             {
605                 LOG_ERR("NPU inference timed out.");
606             }
607 
608             // Reset the NPU
609             (void)ethosu_soft_reset(drv);
610 
611             ret = -1;
612         }
613         else
614         {
615             LOG_DEBUG("Inference finished successfully...");
616             ret = 0;
617         }
618 
619         // Reset internal job (state resets to IDLE)
620         ethosu_reset_job(drv);
621         break;
622 
623     default:
624         LOG_ERR("Unexpected job state");
625         ethosu_reset_job(drv);
626         ret = -1;
627         break;
628     }
629 
630     // Return inference job status
631     return ret;
632 }
633 
ethosu_invoke_async(struct ethosu_driver * drv,const void * custom_data_ptr,const int custom_data_size,uint64_t * const base_addr,const size_t * base_addr_size,const int num_base_addr,void * user_arg)634 int ethosu_invoke_async(struct ethosu_driver *drv,
635                         const void *custom_data_ptr,
636                         const int custom_data_size,
637                         uint64_t *const base_addr,
638                         const size_t *base_addr_size,
639                         const int num_base_addr,
640                         void *user_arg)
641 {
642     assert(custom_data_ptr != NULL);
643     assert(base_addr != NULL);
644     assert(base_addr_size != NULL);
645 
646     const struct cop_data_s *data_ptr = custom_data_ptr;
647     const struct cop_data_s *data_end = (struct cop_data_s *)((ptrdiff_t)custom_data_ptr + custom_data_size);
648 
649     // Make sure an inference is not already running
650     if (drv->job.state != ETHOSU_JOB_IDLE)
651     {
652         LOG_ERR("Inference already running, or waiting to be cleared...");
653         return -1;
654     }
655 
656     drv->job.state            = ETHOSU_JOB_IDLE;
657     drv->job.custom_data_ptr  = custom_data_ptr;
658     drv->job.custom_data_size = custom_data_size;
659     drv->job.base_addr        = base_addr;
660     drv->job.base_addr_size   = base_addr_size;
661     drv->job.num_base_addr    = num_base_addr;
662     drv->job.user_arg         = user_arg;
663 
664     // First word in custom_data_ptr should contain "Custom Operator Payload 1"
665     if (data_ptr->word != ETHOSU_FOURCC)
666     {
667         LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x", data_ptr->word, ETHOSU_FOURCC);
668         goto err;
669     }
670 
671     // Custom data length must be a multiple of 32 bits
672     if ((custom_data_size % BYTES_IN_32_BITS) != 0)
673     {
674         LOG_ERR("custom_data_size=0x%x not a multiple of 4", (unsigned)custom_data_size);
675         goto err;
676     }
677 
678     data_ptr++;
679 
680     // Adjust base address to fast memory area
681     if (drv->fast_memory != 0 && num_base_addr > FAST_MEMORY_BASE_ADDR_INDEX)
682     {
683         if (base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size)
684         {
685             LOG_ERR("Fast memory area too small. fast_memory_size=%zu, base_addr_size=%zu",
686                     drv->fast_memory_size,
687                     base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]);
688             goto err;
689         }
690 
691         base_addr[FAST_MEMORY_BASE_ADDR_INDEX] = drv->fast_memory;
692     }
693 
694     // Parse Custom Operator Payload data
695     while (data_ptr < data_end)
696     {
697         switch (data_ptr->driver_action_command)
698         {
699         case OPTIMIZER_CONFIG:
700             LOG_DEBUG("OPTIMIZER_CONFIG");
701             struct opt_cfg_s const *opt_cfg_p = (const struct opt_cfg_s *)data_ptr;
702 
703             if (handle_optimizer_config(drv, opt_cfg_p) < 0)
704             {
705                 goto err;
706             }
707             data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD;
708             break;
709         case COMMAND_STREAM:
710             // Vela only supports putting one COMMAND_STREAM per op
711             LOG_DEBUG("COMMAND_STREAM");
712             const uint8_t *command_stream = (const uint8_t *)(data_ptr + 1);
713             int cms_length                = (data_ptr->reserved << 16) | data_ptr->length;
714 
715             if (handle_command_stream(drv, command_stream, cms_length) < 0)
716             {
717                 goto err;
718             }
719             data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length;
720             break;
721         case NOP:
722             LOG_DEBUG("NOP");
723             data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
724             break;
725         default:
726             LOG_ERR("UNSUPPORTED driver_action_command: %u", data_ptr->driver_action_command);
727             goto err;
728             break;
729         }
730     }
731 
732     return 0;
733 err:
734     LOG_ERR("Failed to invoke inference.");
735     ethosu_reset_job(drv);
736     return -1;
737 }
738 
ethosu_invoke_v3(struct ethosu_driver * drv,const void * custom_data_ptr,const int custom_data_size,uint64_t * const base_addr,const size_t * base_addr_size,const int num_base_addr,void * user_arg)739 int ethosu_invoke_v3(struct ethosu_driver *drv,
740                      const void *custom_data_ptr,
741                      const int custom_data_size,
742                      uint64_t *const base_addr,
743                      const size_t *base_addr_size,
744                      const int num_base_addr,
745                      void *user_arg)
746 {
747     if (ethosu_invoke_async(
748             drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, user_arg) < 0)
749     {
750         return -1;
751     }
752 
753     return ethosu_wait(drv, true);
754 }
755 
ethosu_reserve_driver(void)756 struct ethosu_driver *ethosu_reserve_driver(void)
757 {
758     struct ethosu_driver *drv = NULL;
759 
760     LOG_INFO("Acquiring NPU driver handle");
761     ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER); // This is meant to block until available
762 
763     ethosu_mutex_lock(ethosu_mutex);
764     drv = registered_drivers;
765 
766     while (drv != NULL)
767     {
768         if (!drv->reserved)
769         {
770             drv->reserved = true;
771             LOG_DEBUG("NPU driver handle %p reserved", drv);
772             break;
773         }
774         drv = drv->next;
775     }
776     ethosu_mutex_unlock(ethosu_mutex);
777 
778     if (!drv)
779     {
780         LOG_ERR("No NPU driver handle available, but semaphore taken");
781     }
782 
783     return drv;
784 }
785 
ethosu_release_driver(struct ethosu_driver * drv)786 void ethosu_release_driver(struct ethosu_driver *drv)
787 {
788     ethosu_mutex_lock(ethosu_mutex);
789     if (drv != NULL && drv->reserved)
790     {
791         if (drv->job.state == ETHOSU_JOB_RUNNING || drv->job.state == ETHOSU_JOB_DONE)
792         {
793             // Give the inference one shot to complete or force kill the job
794             if (ethosu_wait(drv, false) == 1)
795             {
796                 // Still running, soft reset the NPU and reset driver
797                 drv->power_request_counter = 0;
798                 ethosu_soft_reset(drv);
799                 ethosu_reset_job(drv);
800             }
801         }
802 
803         drv->reserved = false;
804         LOG_DEBUG("NPU driver handle %p released", drv);
805         ethosu_semaphore_give(ethosu_semaphore);
806     }
807     ethosu_mutex_unlock(ethosu_mutex);
808 }
809