1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
17
18 #include "tensorflow/lite/micro/micro_error_reporter.h"
19 #include "tensorflow/lite/micro/micro_string.h"
20
21 namespace tflite {
22
23 namespace {
24
25 // Returns a character representing a numbered buffer
26 // for GreedyMemoryPlanner::PrintMemoryPlan()
GetOrdinalCharacter(int i)27 char GetOrdinalCharacter(int i) {
28 if (i < 10) {
29 return '0' + i;
30 } else if (i < 36) {
31 return 'a' + (i - 10);
32 } else if (i < 62) {
33 return 'A' + (i - 36);
34 }
35 return '*';
36 }
37
38 } // namespace
39
40 // Simple stable in-place sort function. Not time-efficient for large arrays.
41 // Would normally be in an anonymous namespace to keep it private, but we want
42 // to be able to test it externally.
ReverseSortInPlace(int * values,int * ids,int size)43 void ReverseSortInPlace(int* values, int* ids, int size) {
44 bool any_swapped;
45 do {
46 any_swapped = false;
47 for (int i = 1; i < size; ++i) {
48 if (values[i - 1] < values[i]) {
49 const int value_temp = values[i - 1];
50 values[i - 1] = values[i];
51 values[i] = value_temp;
52 const int id_temp = ids[i - 1];
53 ids[i - 1] = ids[i];
54 ids[i] = id_temp;
55 any_swapped = true;
56 }
57 }
58 } while (any_swapped);
59 }
60
GreedyMemoryPlanner(unsigned char * scratch_buffer,int scratch_buffer_size)61 GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
62 int scratch_buffer_size)
63 : buffer_count_(0), need_to_calculate_offsets_(true) {
64 // Allocate the arrays we need within the scratch buffer arena.
65 max_buffer_count_ = scratch_buffer_size / per_buffer_size();
66
67 unsigned char* next_free = scratch_buffer;
68 requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
69 next_free += sizeof(BufferRequirements) * max_buffer_count_;
70
71 buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
72 next_free += sizeof(int) * max_buffer_count_;
73
74 buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
75 next_free += sizeof(int) * max_buffer_count_;
76
77 buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
78 next_free += sizeof(ListEntry) * max_buffer_count_;
79
80 buffer_offsets_ = reinterpret_cast<int*>(next_free);
81 }
82
~GreedyMemoryPlanner()83 GreedyMemoryPlanner::~GreedyMemoryPlanner() {
84 // We don't own the scratch buffer, so don't deallocate anything.
85 }
86
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used)87 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
88 tflite::ErrorReporter* error_reporter, int size, int first_time_used,
89 int last_time_used) {
90 if (buffer_count_ >= max_buffer_count_) {
91 TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
92 max_buffer_count_);
93 return kTfLiteError;
94 }
95 BufferRequirements* current = &requirements_[buffer_count_];
96 current->size = size;
97 current->first_time_used = first_time_used;
98 current->last_time_used = last_time_used;
99 current->offline_offset = kOnlinePlannedBuffer;
100 ++buffer_count_;
101 need_to_calculate_offsets_ = true;
102 return kTfLiteOk;
103 }
104
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used,int offline_offset)105 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
106 tflite::ErrorReporter* error_reporter, int size, int first_time_used,
107 int last_time_used, int offline_offset) {
108 BufferRequirements* current = &requirements_[buffer_count_];
109 if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
110 kTfLiteOk) {
111 return kTfLiteError;
112 }
113 current->offline_offset = offline_offset;
114 return kTfLiteOk;
115 }
116
DoesEntryOverlapInTime(const GreedyMemoryPlanner::ListEntry * entry,const int first_time_used,const int last_time_used) const117 bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
118 const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
119 const int last_time_used) const {
120 const BufferRequirements* entry_requirements =
121 &requirements_[entry->requirements_index];
122 if (entry_requirements->first_time_used > last_time_used) {
123 return false;
124 }
125 if (first_time_used > entry_requirements->last_time_used) {
126 return false;
127 }
128 return true;
129 }
130
131 GreedyMemoryPlanner::ListEntry*
NextSimultaneouslyActiveBuffer(const GreedyMemoryPlanner::ListEntry * start,const int first_time_used,const int last_time_used)132 GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
133 const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
134 const int last_time_used) {
135 ListEntry* result = nullptr;
136 ListEntry* candidate_next_entry;
137 if (start == nullptr) {
138 candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
139 } else {
140 if (start->next_entry_index == -1) {
141 return nullptr;
142 }
143 candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
144 }
145 do {
146 if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
147 last_time_used)) {
148 result = candidate_next_entry;
149 break;
150 }
151 if (candidate_next_entry->next_entry_index == -1) {
152 break;
153 }
154 candidate_next_entry =
155 &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
156 } while (true);
157 return result;
158 }
159
CalculateOffsetsIfNeeded()160 void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
161 if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
162 return;
163 }
164 need_to_calculate_offsets_ = false;
165
166 // Start off by ordering the buffers in descending order of size.
167 // This helps find a more compact layout. Intuitively, you can think
168 // about putting the large buffers in place first, and then the
169 // smaller buffers can fit in the gaps, rather than fragmenting the
170 // gaps with small buffers at the beginning. Add offline planned offsets
171 // first in the list, since they have a predetermined offset.
172 int idx_from_tail = buffer_count_;
173 int idx_from_head = 0;
174 for (int i = 0; i < buffer_count_; ++i) {
175 if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
176 idx_from_tail--;
177 buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
178 buffer_ids_sorted_[idx_from_tail] = i;
179 buffer_offsets_[i] = -1;
180 } else {
181 buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
182 buffer_ids_sorted_[idx_from_head] = i;
183 buffer_offsets_[i] = requirements_[i].offline_offset;
184 idx_from_head++;
185 }
186 }
187
188 // This sorting algorithm is naive, and may end up taking a very long time
189 // with hundreds of buffers. Do not sort the offline planned offsets.
190 ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
191 &buffer_ids_sorted_[idx_from_head],
192 buffer_count_ - idx_from_head);
193
194 // Initialize the first entry to the first buffer in
195 // buffer_ids_sorted_.
196 // - If there are no offline planned offsets, the largest buffer will be
197 // first, and the buffers will be handled in size order.
198 // - If offline offsets are present, these will be handled first in order
199 // for the greedy algorithm to utilized gaps in the offline plan.
200 first_entry_index_ = 0;
201 next_free_entry_ = 1;
202 ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
203 first_entry->next_entry_index = -1; // to mark the entry as end of list
204 int buffer_id = buffer_ids_sorted_[0];
205 first_entry->requirements_index = buffer_id;
206 if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
207 buffer_offsets_[buffer_id] = 0;
208 }
209 first_entry->offset = buffer_offsets_[buffer_id];
210
211 // Work through the rest of the buffers to find a good gap to place each one.
212 for (int i = 1; i < buffer_count_; ++i) {
213 // The id is the order the buffer was originally added by the client.
214 buffer_id = buffer_ids_sorted_[i];
215 // Look at what size and time range the buffer needs to be active.
216 BufferRequirements* wanted_requirements = &requirements_[buffer_id];
217 const int wanted_size = wanted_requirements->size;
218 const int wanted_first_time_used = wanted_requirements->first_time_used;
219 const int wanted_last_time_used = wanted_requirements->last_time_used;
220
221 // Find the first buffer that's active in our time range. All placed
222 // buffers are stored in the order of their starting position in the arena
223 // so that it's easy to find the next buffer in memory, and so the gap.
224 // The candidate_entry variable holds the buffer that we're considering
225 // placing the current buffer after.
226
227 int candidate_offset = 0;
228 // Loop through the offset-ordered list of buffers, looking for gaps.
229 if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
230 ListEntry* prior_entry = nullptr;
231 while (true) {
232 // Find out what the next active buffer is.
233 ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
234 prior_entry, wanted_first_time_used, wanted_last_time_used);
235
236 if (prior_entry) {
237 BufferRequirements* candidate_requirements =
238 &requirements_[prior_entry->requirements_index];
239 const int prior_entry_offset =
240 prior_entry->offset + candidate_requirements->size;
241 if (prior_entry_offset > candidate_offset) {
242 candidate_offset = prior_entry_offset;
243 }
244 }
245 if (next_entry == nullptr) {
246 // We're at the end of the list, so we can always append the buffer
247 // here.
248 break;
249 }
250 // Find out how much space there is between us and the next buffer.
251 const int gap = next_entry->offset - candidate_offset;
252 if (gap >= wanted_size) {
253 // This entry has a big enough gap between it and the next, so
254 // use it!
255 break;
256 }
257 // The gap wasn't big enough, so move on to another candidate.
258 prior_entry = next_entry;
259 }
260 } else {
261 // Offline planned offset are to be considered constant
262 candidate_offset = wanted_requirements->offline_offset;
263 }
264 // At this point, we've either found a gap (possibly at the end of the
265 // list) and want to place the buffer there, or there are no other active
266 // buffers in this time range and so we can put it at offset zero.
267 // Record the buffer's offset in our plan.
268 buffer_offsets_[buffer_id] = candidate_offset;
269 // Add the newly-placed buffer to our offset-ordered list, so that
270 // subsequent passes can fit in their buffers around it.
271 ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
272 new_entry->offset = candidate_offset;
273 new_entry->requirements_index = buffer_id;
274 const int new_entry_index = next_free_entry_;
275 ++next_free_entry_;
276
277 if (first_entry->offset > candidate_offset) {
278 // The new entry offset is smaller than the first entry offset =>
279 // replace the first entry
280 first_entry = new_entry;
281 first_entry->next_entry_index = first_entry_index_;
282 first_entry_index_ = new_entry_index;
283 } else {
284 ListEntry* current_entry = first_entry;
285 // Make sure that we insert the buffer at the correct place in the
286 // buffer-offset-ordered list
287 while (true) {
288 const int next_entry_index = current_entry->next_entry_index;
289 if (next_entry_index == -1) {
290 // We're at the end of the list, so just add the new entry here.
291 current_entry->next_entry_index = new_entry_index;
292 new_entry->next_entry_index = -1;
293 break;
294 }
295 // not at the end of the list -> take a look at next entry
296 ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
297 if (next_entry->offset > candidate_offset) {
298 // We're at the right spot to do an insertion and retain the sorting
299 // order, so place the new entry here.
300 new_entry->next_entry_index = current_entry->next_entry_index;
301 current_entry->next_entry_index = new_entry_index;
302 break;
303 }
304 current_entry = next_entry;
305 }
306 }
307 }
308 }
309
GetMaximumMemorySize()310 size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
311 CalculateOffsetsIfNeeded();
312 if (buffer_count_ == 0) {
313 return 0;
314 }
315 ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
316 size_t max_size = 0;
317 while (entry) {
318 BufferRequirements* requirements =
319 &requirements_[entry->requirements_index];
320 const size_t current_size = entry->offset + requirements->size;
321 if (current_size > max_size) {
322 max_size = current_size;
323 }
324 if (entry->next_entry_index == -1) {
325 break;
326 }
327 entry = &buffers_sorted_by_offset_[entry->next_entry_index];
328 }
329 return max_size;
330 }
331
PrintMemoryPlan()332 void GreedyMemoryPlanner::PrintMemoryPlan() {
333 CalculateOffsetsIfNeeded();
334
335 for (int i = 0; i < buffer_count_; ++i) {
336 MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
337 GetOrdinalCharacter(i), i, requirements_[i].size,
338 buffer_offsets_[i], requirements_[i].first_time_used,
339 requirements_[i].last_time_used);
340 }
341
342 constexpr int kLineWidth = 80;
343 int max_size = kLineWidth;
344 int max_time = 0;
345 for (int i = 0; i < buffer_count_; ++i) {
346 BufferRequirements* requirements = &requirements_[i];
347 const int offset = buffer_offsets_[i];
348 const int last_time_used = requirements->last_time_used;
349 const int size = offset + requirements->size;
350 if (size > max_size) {
351 max_size = size;
352 }
353 if (last_time_used > max_time) {
354 max_time = last_time_used;
355 }
356 }
357
358 char line[kLineWidth + 1];
359 for (int t = 0; t <= max_time; ++t) {
360 for (int c = 0; c < kLineWidth; ++c) {
361 line[c] = '.';
362 }
363 int memory_use = 0;
364 for (int i = 0; i < buffer_count_; ++i) {
365 BufferRequirements* requirements = &requirements_[i];
366 if ((t < requirements->first_time_used) ||
367 (t > requirements->last_time_used)) {
368 continue;
369 }
370 const int offset = buffer_offsets_[i];
371 if (offset == -1) {
372 continue;
373 }
374 const int size = requirements->size;
375 memory_use += size;
376 const int line_start = (offset * kLineWidth) / max_size;
377 const int line_end = ((offset + size) * kLineWidth) / max_size;
378 for (int n = line_start; n < line_end; ++n) {
379 if (line[n] == '.') {
380 line[n] = GetOrdinalCharacter(i);
381 } else {
382 line[n] = '!';
383 }
384 }
385 }
386 line[kLineWidth] = 0;
387
388 MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
389 (memory_use + 1023) / 1024);
390 }
391 }
392
GetBufferCount()393 int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
394
GetOffsetForBuffer(tflite::ErrorReporter * error_reporter,int buffer_index,int * offset)395 TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
396 tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
397 CalculateOffsetsIfNeeded();
398 if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
399 TF_LITE_REPORT_ERROR(error_reporter,
400 "buffer index %d is outside range 0 to %d",
401 buffer_index, buffer_count_);
402 return kTfLiteError;
403 }
404 *offset = buffer_offsets_[buffer_index];
405 return kTfLiteOk;
406 }
407
DoAnyBuffersOverlap(ErrorReporter * error_reporter)408 bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
409 CalculateOffsetsIfNeeded();
410 bool were_overlaps_found = false;
411 for (int i = 0; i < buffer_count_; ++i) {
412 BufferRequirements* a_requirements = &requirements_[i];
413 const int a_start_offset = buffer_offsets_[i];
414 const int a_first_time_used = a_requirements->first_time_used;
415 const int a_last_time_used = a_requirements->last_time_used;
416 const int a_end_offset = a_start_offset + a_requirements->size;
417 for (int j = 0; j < buffer_count_; ++j) {
418 if (i == j) {
419 continue;
420 }
421 BufferRequirements* b_requirements = &requirements_[j];
422 const int b_start_offset = buffer_offsets_[j];
423 const int b_first_time_used = b_requirements->first_time_used;
424 const int b_last_time_used = b_requirements->last_time_used;
425 const int b_end_offset = b_start_offset + b_requirements->size;
426 if ((a_first_time_used > b_last_time_used) ||
427 (b_first_time_used > a_last_time_used)) {
428 // Buffers don't overlap in time.
429 continue;
430 }
431 if ((a_start_offset >= b_end_offset) ||
432 (b_start_offset >= a_end_offset)) {
433 // No overlap in memory.
434 continue;
435 }
436 were_overlaps_found = true;
437 TF_LITE_REPORT_ERROR(
438 error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
439 i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
440 j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
441 }
442 }
443 return were_overlaps_found;
444 }
445
446 } // namespace tflite
447