1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
16 #define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
17 
18 #include <cstddef>
19 #include <cstdint>
20 
21 #include "tensorflow/lite/c/common.h"
22 #include "tensorflow/lite/core/api/error_reporter.h"
23 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
24 #include "tensorflow/lite/micro/compatibility.h"
25 #include "tensorflow/lite/micro/flatbuffer_utils.h"
26 #include "tensorflow/lite/micro/simple_memory_allocator.h"
27 #include "tensorflow/lite/schema/schema_generated.h"
28 
29 namespace tflite {
30 
31 namespace internal {
32 
33 // Sets up all of the data structure members for a TfLiteTensor based on the
34 // contents of a serialized tensor in the flatbuffer.
35 // TODO(b/162311891): Drop this method when the interpreter has an API for
36 // returning buffers on TfLiteEvalTensor.
37 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
38     SimpleMemoryAllocator* allocator, bool allocate_temp,
39     const tflite::Tensor& flatbuffer_tensor,
40     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
41     ErrorReporter* error_reporter, TfLiteTensor* result);
42 
43 // Holds placeholder information for a scratch buffer request from a kernel.
44 // This struct is only used during the model prepare stage. Each request from a
45 // kernel is stored in the head section. During the prepare stage, the head
46 // section will at least hold kMaxScratchBuffersPerOp number of requests plus
47 // any requests from previous kernel requests.
48 //
49 // When the memory plan is finalized, these structs are no longer used in favor
50 // of a sequential, array of ScratchBufferHandle allocations in the tail
51 // section. These allocations are indexed by the request API defined in the
52 // TfLiteContext struct.
53 typedef struct {
54   // Number of bytes required by the buffer. The actual allocated size might be
55   // greater than `bytes` due to buffer alignment.
56   size_t bytes;
57   // Node where the buffer is allocated for. This provides useful information to
58   // determine the lifetime of the buffer. In AllocationInfo, this buffer will
59   // have `before` = node_idx and `after` = node_idx.
60   int node_idx;
61 } ScratchBufferRequest;
62 
63 }  // namespace internal
64 
65 typedef struct {
66   TfLiteNode node;
67   const TfLiteRegistration* registration;
68 } NodeAndRegistration;
69 
70 // Holds a pointer to a buffer for a scratch buffer requested by a kernel during
71 // the model prepare stage. This struct is allocated in-place and allows for
72 // quick pointer-indexed lookup for speed during model inference.
73 typedef struct {
74   // Pointer to location of the scratch buffer:
75   uint8_t* data;
76 } ScratchBufferHandle;
77 
78 // Stores all per-subgraph allocations. This includes the node and registration
79 // array, tensor list and scratch buffer handles for each subgraph.
80 typedef struct {
81   NodeAndRegistration* node_and_registrations;
82   TfLiteEvalTensor* tensors;
83 } SubgraphAllocations;
84 
85 // Allocator responsible for allocating memory for all intermediate tensors
86 // necessary to invoke a model.
87 //
88 // The lifetime of the model, tensor arena and error reporter must be at
89 // least as long as that of the allocator object, since the allocator needs
90 // them to be accessible during its entire lifetime.
91 //
92 // The MicroAllocator simply plans out additional allocations that are required
93 // to standup a model for inference in TF Micro. This class currently relies on
94 // an additional allocator - SimpleMemoryAllocator - for all allocations from an
95 // arena. These allocations are divided into head (non-persistent) and tail
96 // (persistent) regions:
97 //
98 // Memory layout to help understand how it works
99 // This information could change in the future version.
100 // ************** .memory_allocator->GetBuffer()
101 // Tensors/Scratch buffers (head)
102 // ************** .head_watermark
103 // unused memory
104 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
105 //                                               - ->GetDataSize()
106 // persistent area (tail)
107 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
108 class MicroAllocator {
109  public:
110   // Creates a MicroAllocator instance from a given tensor arena. This arena
111   // will be managed by the created instance.
112   // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
113   // bytes aligned, otherwise some head room will be wasted.
114   // TODO(b/157615197): Cleanup constructor + factory usage.
115   static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
116                                 ErrorReporter* error_reporter);
117 
118   // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
119   // intance. This allocator instance will use the SimpleMemoryAllocator
120   // instance to manage allocations internally.
121   static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
122                                 ErrorReporter* error_reporter);
123 
124   // Allocates internal resources required for model inference for each subgraph
125   // from the arena.
126   //
127   // This method will run through the flatbuffer data supplied in the model to
128   // properly allocate tensor, node, and op registration data. This method is
129   // expected to be followed with a call to FinishModelAllocation()  Returns a
130   // pointer to an array of SubgraphAllocations (also stored in the tail of the
131   // arena) where each index corresponds to a different subgraph in the model.
132   // Return value is nullptr if the allocations failed.
133   SubgraphAllocations* StartModelAllocation(const Model* model);
134 
135   // Finish allocating internal resources required for model inference.
136   //
137   // -Plan the memory for activation tensors and scratch buffers.
138   // -Update eval tensors for each subgraph based on planned offsets.
139   // -Allocate scratch buffer handles array and update based on planned offsets.
140   //
141   // This method should be called after assigning model resources
142   // in StartModelAllocation(). The subgraph_allocations pointer should be the
143   // value passed into this class during StartModelAllocation(). Scratch buffer
144   // handles are stored in the out-param `scratch_buffer_handles` array which is
145   // allocated in this method. This value will be used in `GetScratchBuffer`
146   // call to retrieve scratch buffers.
147   TfLiteStatus FinishModelAllocation(
148       const Model* model, SubgraphAllocations* subgraph_allocations,
149       ScratchBufferHandle** scratch_buffer_handles);
150 
151   // Allocates a TfLiteTensor struct and populates the returned value with
152   // properties from the model flatbuffer. This struct is allocated from
153   // persistent arena memory is only guaranteed for the lifetime of the
154   // application. The eval_tensors pointer should be the value passed into this
155   // class during StartModelAllocation() and contains the source-of-truth for
156   // buffers.
157   virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
158       const Model* model, const SubgraphAllocations* subgraph_allocations,
159       int tensor_index, int subgraph_index);
160 
161   // Allocates a TfLiteTensor struct and populates the returned value with
162   // properties from the model flatbuffer. This struct is allocated from
163   // temporary arena memory is only guaranteed until a call is made to
164   // ResetTempAllocations(). Subgraph_allocaitons contains the array of
165   // TfLiteEvalTensors. If the newly allocated temp at the specified subgraph
166   // and tensor index is already present int the TfLiteEvalTensor array, its
167   // data buffer will be re-used.
168   virtual TfLiteTensor* AllocateTempTfLiteTensor(
169       const Model* model, const SubgraphAllocations* subgraph_allocations,
170       int tensor_index, int subgraph_index);
171 
172   // Resets all temporary allocations. This method should be called after a
173   // chain of temp allocations (e.g. chain of TfLiteTensor objects via
174   // AllocateTfLiteTensor()).
175   virtual void ResetTempAllocations();
176 
177   // Allocates persistent buffer which has the same life time as the allocator.
178   // The memory is immediately available and is allocated from the tail of the
179   // arena.
180   virtual void* AllocatePersistentBuffer(size_t bytes);
181 
182   // Register a scratch buffer of size `bytes` for Node with `node_id`.
183   // This method only requests a buffer with a given size to be used after a
184   // model has finished allocation via FinishModelAllocation(). All requested
185   // buffers will be accessible by the out-param in that method.
186   TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx,
187                                            int* buffer_idx);
188 
189   // Finish allocating a specific NodeAndRegistration prepare block (kernel
190   // entry for a model) with a given node ID. This call ensures that any scratch
191   // buffer requests and temporary allocations are handled and ready for the
192   // next node prepare block.
193   TfLiteStatus FinishPrepareNodeAllocations(int node_id);
194 
195   // Returns the arena usage in bytes, only available after
196   // `FinishModelAllocation`. Otherwise, it will return 0.
197   size_t used_bytes() const;
198 
199   // Converts a flatbuffer int32_t array to a TfLiteIntArray, accounting for
200   // endiannes.
201   TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
202       const flatbuffers::Vector<int32_t>* flatbuffer_array,
203       TfLiteIntArray** result);
204 
205   BuiltinDataAllocator* GetBuiltinDataAllocator();
206 
207  protected:
208   MicroAllocator(SimpleMemoryAllocator* memory_allocator,
209                  ErrorReporter* error_reporter);
210   virtual ~MicroAllocator();
211 
212   // Allocates an array in the arena to hold pointers to the node and
213   // registration pointers required to represent the inference graph of the
214   // model.
215   virtual TfLiteStatus AllocateNodeAndRegistrations(
216       const Model* model, SubgraphAllocations* subgraph_allocations);
217 
218   // Allocates the list of persistent TfLiteEvalTensors that are used for the
219   // "eval" phase of model inference. These structs will be the source of truth
220   // for all tensor buffers.
221   virtual TfLiteStatus AllocateTfLiteEvalTensors(
222       const Model* model, SubgraphAllocations* subgraph_allocations);
223   // Allocates persistent tensor buffers for variable tensors in the subgraph.
224   virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
225                                          TfLiteEvalTensor* eval_tensors);
226 
227   // Allocate and return a persistent TfLiteTensor.
228   // TODO(b/162311891): Drop this method when the interpreter has an API for
229   // accessing TfLiteEvalTensor structs.
230   virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal();
231 
232   // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
233   // quantization data is allocated from either the tail (persistent) or temp
234   // sections of the arena based on the allocation flag.
235   virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
236                                                           TfLiteTensor* tensor,
237                                                           int tensor_index,
238                                                           int subgraph_idx,
239                                                           bool allocate_temp);
240 
241   ErrorReporter* error_reporter() const;
242 
243  private:
244   // Commits a memory plan for all non-persistent buffer allocations in the
245   // 'head' section of the memory arena. The eval_tensors pointer is the list of
246   // pre-allocated TfLiteEvalTensor structs that will point to the buffers that
247   // will be allocated into the head section in this function call. The
248   // scratch_buffer_handles pointer is the array of pre-allocated
249   // ScratchBufferHandle structs that will point to allocated buffers also in
250   // the head section.
251   virtual TfLiteStatus CommitStaticMemoryPlan(
252       const Model* model, TfLiteEvalTensor* eval_tensors,
253       ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx);
254 
255   // Allocates an array of ScratchBufferHandle structs in the tail section for a
256   // given number of handles.
257   virtual TfLiteStatus AllocateScratchBufferHandles(
258       ScratchBufferHandle** scratch_buffer_handles, size_t handle_count);
259 
260   // Clears all internal scratch buffer request counts and resets the head to
261   // prepare for kernels to request scratch buffer data when a model is
262   // preparing.
263   TfLiteStatus InitScratchBufferData();
264 
265   // Returns the pointer for the array of ScratchBufferRequest allocations in
266   // the head section.
267   internal::ScratchBufferRequest* GetScratchBufferRequests();
268 
269   // A simple memory allocator that always allocate from the arena tail or head.
270   SimpleMemoryAllocator* memory_allocator_;
271 
272   // Allocator used to allocate persistent builtin data.
273   BuiltinDataAllocator* builtin_data_allocator_;
274 
275   ErrorReporter* error_reporter_;
276   bool model_is_allocating_;
277 
278   // Holds the number of ScratchBufferRequest instances stored in the head
279   // section when a model is allocating.
280   size_t scratch_buffer_request_count_ = 0;
281 
282   // Holds the byte length of the memory plan with the largest head usage. Used
283   // to ensure that multi-tenant allocations can share the head for buffers.
284   size_t max_head_buffer_usage_ = 0;
285 
286   TF_LITE_REMOVE_VIRTUAL_DELETE
287 };
288 
289 }  // namespace tflite
290 #endif  // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
291