1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_ 16 #define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_ 17 18 #include <cstddef> 19 #include <cstdint> 20 21 #include "tensorflow/lite/c/common.h" 22 #include "tensorflow/lite/core/api/error_reporter.h" 23 #include "tensorflow/lite/core/api/flatbuffer_conversions.h" 24 #include "tensorflow/lite/micro/compatibility.h" 25 #include "tensorflow/lite/micro/flatbuffer_utils.h" 26 #include "tensorflow/lite/micro/simple_memory_allocator.h" 27 #include "tensorflow/lite/schema/schema_generated.h" 28 29 namespace tflite { 30 31 namespace internal { 32 33 // Sets up all of the data structure members for a TfLiteTensor based on the 34 // contents of a serialized tensor in the flatbuffer. 35 // TODO(b/162311891): Drop this method when the interpreter has an API for 36 // returning buffers on TfLiteEvalTensor. 37 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( 38 SimpleMemoryAllocator* allocator, bool allocate_temp, 39 const tflite::Tensor& flatbuffer_tensor, 40 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers, 41 ErrorReporter* error_reporter, TfLiteTensor* result); 42 43 // Holds placeholder information for a scratch buffer request from a kernel. 44 // This struct is only used during the model prepare stage. Each request from a 45 // kernel is stored in the head section. During the prepare stage, the head 46 // section will at least hold kMaxScratchBuffersPerOp number of requests plus 47 // any requests from previous kernel requests. 48 // 49 // When the memory plan is finalized, these structs are no longer used in favor 50 // of a sequential, array of ScratchBufferHandle allocations in the tail 51 // section. These allocations are indexed by the request API defined in the 52 // TfLiteContext struct. 53 typedef struct { 54 // Number of bytes required by the buffer. The actual allocated size might be 55 // greater than `bytes` due to buffer alignment. 56 size_t bytes; 57 // Node where the buffer is allocated for. This provides useful information to 58 // determine the lifetime of the buffer. In AllocationInfo, this buffer will 59 // have `before` = node_idx and `after` = node_idx. 60 int node_idx; 61 } ScratchBufferRequest; 62 63 } // namespace internal 64 65 typedef struct { 66 TfLiteNode node; 67 const TfLiteRegistration* registration; 68 } NodeAndRegistration; 69 70 // Holds a pointer to a buffer for a scratch buffer requested by a kernel during 71 // the model prepare stage. This struct is allocated in-place and allows for 72 // quick pointer-indexed lookup for speed during model inference. 73 typedef struct { 74 // Pointer to location of the scratch buffer: 75 uint8_t* data; 76 } ScratchBufferHandle; 77 78 // Stores all per-subgraph allocations. This includes the node and registration 79 // array, tensor list and scratch buffer handles for each subgraph. 80 typedef struct { 81 NodeAndRegistration* node_and_registrations; 82 TfLiteEvalTensor* tensors; 83 } SubgraphAllocations; 84 85 // Allocator responsible for allocating memory for all intermediate tensors 86 // necessary to invoke a model. 87 // 88 // The lifetime of the model, tensor arena and error reporter must be at 89 // least as long as that of the allocator object, since the allocator needs 90 // them to be accessible during its entire lifetime. 91 // 92 // The MicroAllocator simply plans out additional allocations that are required 93 // to standup a model for inference in TF Micro. This class currently relies on 94 // an additional allocator - SimpleMemoryAllocator - for all allocations from an 95 // arena. These allocations are divided into head (non-persistent) and tail 96 // (persistent) regions: 97 // 98 // Memory layout to help understand how it works 99 // This information could change in the future version. 100 // ************** .memory_allocator->GetBuffer() 101 // Tensors/Scratch buffers (head) 102 // ************** .head_watermark 103 // unused memory 104 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() 105 // - ->GetDataSize() 106 // persistent area (tail) 107 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() 108 class MicroAllocator { 109 public: 110 // Creates a MicroAllocator instance from a given tensor arena. This arena 111 // will be managed by the created instance. 112 // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16 113 // bytes aligned, otherwise some head room will be wasted. 114 // TODO(b/157615197): Cleanup constructor + factory usage. 115 static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size, 116 ErrorReporter* error_reporter); 117 118 // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator 119 // intance. This allocator instance will use the SimpleMemoryAllocator 120 // instance to manage allocations internally. 121 static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator, 122 ErrorReporter* error_reporter); 123 124 // Allocates internal resources required for model inference for each subgraph 125 // from the arena. 126 // 127 // This method will run through the flatbuffer data supplied in the model to 128 // properly allocate tensor, node, and op registration data. This method is 129 // expected to be followed with a call to FinishModelAllocation() Returns a 130 // pointer to an array of SubgraphAllocations (also stored in the tail of the 131 // arena) where each index corresponds to a different subgraph in the model. 132 // Return value is nullptr if the allocations failed. 133 SubgraphAllocations* StartModelAllocation(const Model* model); 134 135 // Finish allocating internal resources required for model inference. 136 // 137 // -Plan the memory for activation tensors and scratch buffers. 138 // -Update eval tensors for each subgraph based on planned offsets. 139 // -Allocate scratch buffer handles array and update based on planned offsets. 140 // 141 // This method should be called after assigning model resources 142 // in StartModelAllocation(). The subgraph_allocations pointer should be the 143 // value passed into this class during StartModelAllocation(). Scratch buffer 144 // handles are stored in the out-param `scratch_buffer_handles` array which is 145 // allocated in this method. This value will be used in `GetScratchBuffer` 146 // call to retrieve scratch buffers. 147 TfLiteStatus FinishModelAllocation( 148 const Model* model, SubgraphAllocations* subgraph_allocations, 149 ScratchBufferHandle** scratch_buffer_handles); 150 151 // Allocates a TfLiteTensor struct and populates the returned value with 152 // properties from the model flatbuffer. This struct is allocated from 153 // persistent arena memory is only guaranteed for the lifetime of the 154 // application. The eval_tensors pointer should be the value passed into this 155 // class during StartModelAllocation() and contains the source-of-truth for 156 // buffers. 157 virtual TfLiteTensor* AllocatePersistentTfLiteTensor( 158 const Model* model, const SubgraphAllocations* subgraph_allocations, 159 int tensor_index, int subgraph_index); 160 161 // Allocates a TfLiteTensor struct and populates the returned value with 162 // properties from the model flatbuffer. This struct is allocated from 163 // temporary arena memory is only guaranteed until a call is made to 164 // ResetTempAllocations(). Subgraph_allocaitons contains the array of 165 // TfLiteEvalTensors. If the newly allocated temp at the specified subgraph 166 // and tensor index is already present int the TfLiteEvalTensor array, its 167 // data buffer will be re-used. 168 virtual TfLiteTensor* AllocateTempTfLiteTensor( 169 const Model* model, const SubgraphAllocations* subgraph_allocations, 170 int tensor_index, int subgraph_index); 171 172 // Resets all temporary allocations. This method should be called after a 173 // chain of temp allocations (e.g. chain of TfLiteTensor objects via 174 // AllocateTfLiteTensor()). 175 virtual void ResetTempAllocations(); 176 177 // Allocates persistent buffer which has the same life time as the allocator. 178 // The memory is immediately available and is allocated from the tail of the 179 // arena. 180 virtual void* AllocatePersistentBuffer(size_t bytes); 181 182 // Register a scratch buffer of size `bytes` for Node with `node_id`. 183 // This method only requests a buffer with a given size to be used after a 184 // model has finished allocation via FinishModelAllocation(). All requested 185 // buffers will be accessible by the out-param in that method. 186 TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx, 187 int* buffer_idx); 188 189 // Finish allocating a specific NodeAndRegistration prepare block (kernel 190 // entry for a model) with a given node ID. This call ensures that any scratch 191 // buffer requests and temporary allocations are handled and ready for the 192 // next node prepare block. 193 TfLiteStatus FinishPrepareNodeAllocations(int node_id); 194 195 // Returns the arena usage in bytes, only available after 196 // `FinishModelAllocation`. Otherwise, it will return 0. 197 size_t used_bytes() const; 198 199 // Converts a flatbuffer int32_t array to a TfLiteIntArray, accounting for 200 // endiannes. 201 TfLiteStatus FlatBufferVectorToTfLiteTypeArray( 202 const flatbuffers::Vector<int32_t>* flatbuffer_array, 203 TfLiteIntArray** result); 204 205 BuiltinDataAllocator* GetBuiltinDataAllocator(); 206 207 protected: 208 MicroAllocator(SimpleMemoryAllocator* memory_allocator, 209 ErrorReporter* error_reporter); 210 virtual ~MicroAllocator(); 211 212 // Allocates an array in the arena to hold pointers to the node and 213 // registration pointers required to represent the inference graph of the 214 // model. 215 virtual TfLiteStatus AllocateNodeAndRegistrations( 216 const Model* model, SubgraphAllocations* subgraph_allocations); 217 218 // Allocates the list of persistent TfLiteEvalTensors that are used for the 219 // "eval" phase of model inference. These structs will be the source of truth 220 // for all tensor buffers. 221 virtual TfLiteStatus AllocateTfLiteEvalTensors( 222 const Model* model, SubgraphAllocations* subgraph_allocations); 223 // Allocates persistent tensor buffers for variable tensors in the subgraph. 224 virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph, 225 TfLiteEvalTensor* eval_tensors); 226 227 // Allocate and return a persistent TfLiteTensor. 228 // TODO(b/162311891): Drop this method when the interpreter has an API for 229 // accessing TfLiteEvalTensor structs. 230 virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(); 231 232 // Populates a TfLiteTensor struct with data from the model flatbuffer. Any 233 // quantization data is allocated from either the tail (persistent) or temp 234 // sections of the arena based on the allocation flag. 235 virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model, 236 TfLiteTensor* tensor, 237 int tensor_index, 238 int subgraph_idx, 239 bool allocate_temp); 240 241 ErrorReporter* error_reporter() const; 242 243 private: 244 // Commits a memory plan for all non-persistent buffer allocations in the 245 // 'head' section of the memory arena. The eval_tensors pointer is the list of 246 // pre-allocated TfLiteEvalTensor structs that will point to the buffers that 247 // will be allocated into the head section in this function call. The 248 // scratch_buffer_handles pointer is the array of pre-allocated 249 // ScratchBufferHandle structs that will point to allocated buffers also in 250 // the head section. 251 virtual TfLiteStatus CommitStaticMemoryPlan( 252 const Model* model, TfLiteEvalTensor* eval_tensors, 253 ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx); 254 255 // Allocates an array of ScratchBufferHandle structs in the tail section for a 256 // given number of handles. 257 virtual TfLiteStatus AllocateScratchBufferHandles( 258 ScratchBufferHandle** scratch_buffer_handles, size_t handle_count); 259 260 // Clears all internal scratch buffer request counts and resets the head to 261 // prepare for kernels to request scratch buffer data when a model is 262 // preparing. 263 TfLiteStatus InitScratchBufferData(); 264 265 // Returns the pointer for the array of ScratchBufferRequest allocations in 266 // the head section. 267 internal::ScratchBufferRequest* GetScratchBufferRequests(); 268 269 // A simple memory allocator that always allocate from the arena tail or head. 270 SimpleMemoryAllocator* memory_allocator_; 271 272 // Allocator used to allocate persistent builtin data. 273 BuiltinDataAllocator* builtin_data_allocator_; 274 275 ErrorReporter* error_reporter_; 276 bool model_is_allocating_; 277 278 // Holds the number of ScratchBufferRequest instances stored in the head 279 // section when a model is allocating. 280 size_t scratch_buffer_request_count_ = 0; 281 282 // Holds the byte length of the memory plan with the largest head usage. Used 283 // to ensure that multi-tenant allocations can share the head for buffers. 284 size_t max_head_buffer_usage_ = 0; 285 286 TF_LITE_REMOVE_VIRTUAL_DELETE 287 }; 288 289 } // namespace tflite 290 #endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_ 291