1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
17 
18 #include "tensorflow/lite/micro/micro_error_reporter.h"
19 #include "tensorflow/lite/micro/micro_string.h"
20 
21 namespace tflite {
22 
23 namespace {
24 
25 // Returns a character representing a numbered buffer
26 // for GreedyMemoryPlanner::PrintMemoryPlan()
GetOrdinalCharacter(int i)27 char GetOrdinalCharacter(int i) {
28   if (i < 10) {
29     return '0' + i;
30   } else if (i < 36) {
31     return 'a' + (i - 10);
32   } else if (i < 62) {
33     return 'A' + (i - 36);
34   }
35   return '*';
36 }
37 
38 }  // namespace
39 
40 // Simple stable in-place sort function. Not time-efficient for large arrays.
41 // Would normally be in an anonymous namespace to keep it private, but we want
42 // to be able to test it externally.
ReverseSortInPlace(int * values,int * ids,int size)43 void ReverseSortInPlace(int* values, int* ids, int size) {
44   bool any_swapped;
45   do {
46     any_swapped = false;
47     for (int i = 1; i < size; ++i) {
48       if (values[i - 1] < values[i]) {
49         const int value_temp = values[i - 1];
50         values[i - 1] = values[i];
51         values[i] = value_temp;
52         const int id_temp = ids[i - 1];
53         ids[i - 1] = ids[i];
54         ids[i] = id_temp;
55         any_swapped = true;
56       }
57     }
58   } while (any_swapped);
59 }
60 
GreedyMemoryPlanner(unsigned char * scratch_buffer,int scratch_buffer_size)61 GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
62                                          int scratch_buffer_size)
63     : buffer_count_(0), need_to_calculate_offsets_(true) {
64   // Allocate the arrays we need within the scratch buffer arena.
65   max_buffer_count_ = scratch_buffer_size / per_buffer_size();
66 
67   unsigned char* next_free = scratch_buffer;
68   requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
69   next_free += sizeof(BufferRequirements) * max_buffer_count_;
70 
71   buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
72   next_free += sizeof(int) * max_buffer_count_;
73 
74   buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
75   next_free += sizeof(int) * max_buffer_count_;
76 
77   buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
78   next_free += sizeof(ListEntry) * max_buffer_count_;
79 
80   buffer_offsets_ = reinterpret_cast<int*>(next_free);
81 }
82 
~GreedyMemoryPlanner()83 GreedyMemoryPlanner::~GreedyMemoryPlanner() {
84   // We don't own the scratch buffer, so don't deallocate anything.
85 }
86 
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used)87 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
88     tflite::ErrorReporter* error_reporter, int size, int first_time_used,
89     int last_time_used) {
90   if (buffer_count_ >= max_buffer_count_) {
91     TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
92                          max_buffer_count_);
93     return kTfLiteError;
94   }
95   BufferRequirements* current = &requirements_[buffer_count_];
96   current->size = size;
97   current->first_time_used = first_time_used;
98   current->last_time_used = last_time_used;
99   current->offline_offset = kOnlinePlannedBuffer;
100   ++buffer_count_;
101   need_to_calculate_offsets_ = true;
102   return kTfLiteOk;
103 }
104 
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used,int offline_offset)105 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
106     tflite::ErrorReporter* error_reporter, int size, int first_time_used,
107     int last_time_used, int offline_offset) {
108   BufferRequirements* current = &requirements_[buffer_count_];
109   if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
110       kTfLiteOk) {
111     return kTfLiteError;
112   }
113   current->offline_offset = offline_offset;
114   return kTfLiteOk;
115 }
116 
DoesEntryOverlapInTime(const GreedyMemoryPlanner::ListEntry * entry,const int first_time_used,const int last_time_used) const117 bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
118     const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
119     const int last_time_used) const {
120   const BufferRequirements* entry_requirements =
121       &requirements_[entry->requirements_index];
122   if (entry_requirements->first_time_used > last_time_used) {
123     return false;
124   }
125   if (first_time_used > entry_requirements->last_time_used) {
126     return false;
127   }
128   return true;
129 }
130 
131 GreedyMemoryPlanner::ListEntry*
NextSimultaneouslyActiveBuffer(const GreedyMemoryPlanner::ListEntry * start,const int first_time_used,const int last_time_used)132 GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
133     const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
134     const int last_time_used) {
135   ListEntry* result = nullptr;
136   ListEntry* candidate_next_entry;
137   if (start == nullptr) {
138     candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
139   } else {
140     if (start->next_entry_index == -1) {
141       return nullptr;
142     }
143     candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
144   }
145   do {
146     if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
147                                last_time_used)) {
148       result = candidate_next_entry;
149       break;
150     }
151     if (candidate_next_entry->next_entry_index == -1) {
152       break;
153     }
154     candidate_next_entry =
155         &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
156   } while (true);
157   return result;
158 }
159 
CalculateOffsetsIfNeeded()160 void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
161   if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
162     return;
163   }
164   need_to_calculate_offsets_ = false;
165 
166   // Start off by ordering the buffers in descending order of size.
167   // This helps find a more compact layout. Intuitively, you can think
168   // about putting the large buffers in place first, and then the
169   // smaller buffers can fit in the gaps, rather than fragmenting the
170   // gaps with small buffers at the beginning. Add offline planned offsets
171   // first in the list, since they have a predetermined offset.
172   int idx_from_tail = buffer_count_;
173   int idx_from_head = 0;
174   for (int i = 0; i < buffer_count_; ++i) {
175     if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
176       idx_from_tail--;
177       buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
178       buffer_ids_sorted_[idx_from_tail] = i;
179       buffer_offsets_[i] = -1;
180     } else {
181       buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
182       buffer_ids_sorted_[idx_from_head] = i;
183       buffer_offsets_[i] = requirements_[i].offline_offset;
184       idx_from_head++;
185     }
186   }
187 
188   // This sorting algorithm is naive, and may end up taking a very long time
189   // with hundreds of buffers. Do not sort the offline planned offsets.
190   ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
191                      &buffer_ids_sorted_[idx_from_head],
192                      buffer_count_ - idx_from_head);
193 
194   // Initialize the first entry to the first buffer in
195   // buffer_ids_sorted_.
196   //   - If there are no offline planned offsets, the largest buffer will be
197   //     first, and the buffers will be handled in size order.
198   //   - If offline offsets are present, these will be handled first in order
199   //     for the greedy algorithm to utilized gaps in the offline plan.
200   first_entry_index_ = 0;
201   next_free_entry_ = 1;
202   ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
203   first_entry->next_entry_index = -1;  // to mark the entry as end of list
204   int buffer_id = buffer_ids_sorted_[0];
205   first_entry->requirements_index = buffer_id;
206   if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
207     buffer_offsets_[buffer_id] = 0;
208   }
209   first_entry->offset = buffer_offsets_[buffer_id];
210 
211   // Work through the rest of the buffers to find a good gap to place each one.
212   for (int i = 1; i < buffer_count_; ++i) {
213     // The id is the order the buffer was originally added by the client.
214     buffer_id = buffer_ids_sorted_[i];
215     // Look at what size and time range the buffer needs to be active.
216     BufferRequirements* wanted_requirements = &requirements_[buffer_id];
217     const int wanted_size = wanted_requirements->size;
218     const int wanted_first_time_used = wanted_requirements->first_time_used;
219     const int wanted_last_time_used = wanted_requirements->last_time_used;
220 
221     // Find the first buffer that's active in our time range. All placed
222     // buffers are stored in the order of their starting position in the arena
223     // so that it's easy to find the next buffer in memory, and so the gap.
224     // The candidate_entry variable holds the buffer that we're considering
225     // placing the current buffer after.
226 
227     int candidate_offset = 0;
228     // Loop through the offset-ordered list of buffers, looking for gaps.
229     if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
230       ListEntry* prior_entry = nullptr;
231       while (true) {
232         // Find out what the next active buffer is.
233         ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
234             prior_entry, wanted_first_time_used, wanted_last_time_used);
235 
236         if (prior_entry) {
237           BufferRequirements* candidate_requirements =
238               &requirements_[prior_entry->requirements_index];
239           const int prior_entry_offset =
240               prior_entry->offset + candidate_requirements->size;
241           if (prior_entry_offset > candidate_offset) {
242             candidate_offset = prior_entry_offset;
243           }
244         }
245         if (next_entry == nullptr) {
246           // We're at the end of the list, so we can always append the buffer
247           // here.
248           break;
249         }
250         // Find out how much space there is between us and the next buffer.
251         const int gap = next_entry->offset - candidate_offset;
252         if (gap >= wanted_size) {
253           // This entry has a big enough gap between it and the next, so
254           // use it!
255           break;
256         }
257         // The gap wasn't big enough, so move on to another candidate.
258         prior_entry = next_entry;
259       }
260     } else {
261       // Offline planned offset are to be considered constant
262       candidate_offset = wanted_requirements->offline_offset;
263     }
264     // At this point, we've either found a gap (possibly at the end of the
265     // list) and want to place the buffer there, or there are no other active
266     // buffers in this time range and so we can put it at offset zero.
267     // Record the buffer's offset in our plan.
268     buffer_offsets_[buffer_id] = candidate_offset;
269     // Add the newly-placed buffer to our offset-ordered list, so that
270     // subsequent passes can fit in their buffers around it.
271     ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
272     new_entry->offset = candidate_offset;
273     new_entry->requirements_index = buffer_id;
274     const int new_entry_index = next_free_entry_;
275     ++next_free_entry_;
276 
277     if (first_entry->offset > candidate_offset) {
278       // The new entry offset is smaller than the first entry offset =>
279       // replace the first entry
280       first_entry = new_entry;
281       first_entry->next_entry_index = first_entry_index_;
282       first_entry_index_ = new_entry_index;
283     } else {
284       ListEntry* current_entry = first_entry;
285       // Make sure that we insert the buffer at the correct place in the
286       // buffer-offset-ordered list
287       while (true) {
288         const int next_entry_index = current_entry->next_entry_index;
289         if (next_entry_index == -1) {
290           // We're at the end of the list, so just add the new entry here.
291           current_entry->next_entry_index = new_entry_index;
292           new_entry->next_entry_index = -1;
293           break;
294         }
295         // not at the end of the list -> take a look at next entry
296         ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
297         if (next_entry->offset > candidate_offset) {
298           // We're at the right spot to do an insertion and retain the sorting
299           // order, so place the new entry here.
300           new_entry->next_entry_index = current_entry->next_entry_index;
301           current_entry->next_entry_index = new_entry_index;
302           break;
303         }
304         current_entry = next_entry;
305       }
306     }
307   }
308 }
309 
GetMaximumMemorySize()310 size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
311   CalculateOffsetsIfNeeded();
312   if (buffer_count_ == 0) {
313     return 0;
314   }
315   ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
316   size_t max_size = 0;
317   while (entry) {
318     BufferRequirements* requirements =
319         &requirements_[entry->requirements_index];
320     const size_t current_size = entry->offset + requirements->size;
321     if (current_size > max_size) {
322       max_size = current_size;
323     }
324     if (entry->next_entry_index == -1) {
325       break;
326     }
327     entry = &buffers_sorted_by_offset_[entry->next_entry_index];
328   }
329   return max_size;
330 }
331 
PrintMemoryPlan()332 void GreedyMemoryPlanner::PrintMemoryPlan() {
333   CalculateOffsetsIfNeeded();
334 
335   for (int i = 0; i < buffer_count_; ++i) {
336     MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
337                 GetOrdinalCharacter(i), i, requirements_[i].size,
338                 buffer_offsets_[i], requirements_[i].first_time_used,
339                 requirements_[i].last_time_used);
340   }
341 
342   constexpr int kLineWidth = 80;
343   int max_size = kLineWidth;
344   int max_time = 0;
345   for (int i = 0; i < buffer_count_; ++i) {
346     BufferRequirements* requirements = &requirements_[i];
347     const int offset = buffer_offsets_[i];
348     const int last_time_used = requirements->last_time_used;
349     const int size = offset + requirements->size;
350     if (size > max_size) {
351       max_size = size;
352     }
353     if (last_time_used > max_time) {
354       max_time = last_time_used;
355     }
356   }
357 
358   char line[kLineWidth + 1];
359   for (int t = 0; t <= max_time; ++t) {
360     for (int c = 0; c < kLineWidth; ++c) {
361       line[c] = '.';
362     }
363     int memory_use = 0;
364     for (int i = 0; i < buffer_count_; ++i) {
365       BufferRequirements* requirements = &requirements_[i];
366       if ((t < requirements->first_time_used) ||
367           (t > requirements->last_time_used)) {
368         continue;
369       }
370       const int offset = buffer_offsets_[i];
371       if (offset == -1) {
372         continue;
373       }
374       const int size = requirements->size;
375       memory_use += size;
376       const int line_start = (offset * kLineWidth) / max_size;
377       const int line_end = ((offset + size) * kLineWidth) / max_size;
378       for (int n = line_start; n < line_end; ++n) {
379         if (line[n] == '.') {
380           line[n] = GetOrdinalCharacter(i);
381         } else {
382           line[n] = '!';
383         }
384       }
385     }
386     line[kLineWidth] = 0;
387 
388     MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
389                 (memory_use + 1023) / 1024);
390   }
391 }
392 
GetBufferCount()393 int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
394 
GetOffsetForBuffer(tflite::ErrorReporter * error_reporter,int buffer_index,int * offset)395 TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
396     tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
397   CalculateOffsetsIfNeeded();
398   if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
399     TF_LITE_REPORT_ERROR(error_reporter,
400                          "buffer index %d is outside range 0 to %d",
401                          buffer_index, buffer_count_);
402     return kTfLiteError;
403   }
404   *offset = buffer_offsets_[buffer_index];
405   return kTfLiteOk;
406 }
407 
DoAnyBuffersOverlap(ErrorReporter * error_reporter)408 bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
409   CalculateOffsetsIfNeeded();
410   bool were_overlaps_found = false;
411   for (int i = 0; i < buffer_count_; ++i) {
412     BufferRequirements* a_requirements = &requirements_[i];
413     const int a_start_offset = buffer_offsets_[i];
414     const int a_first_time_used = a_requirements->first_time_used;
415     const int a_last_time_used = a_requirements->last_time_used;
416     const int a_end_offset = a_start_offset + a_requirements->size;
417     for (int j = 0; j < buffer_count_; ++j) {
418       if (i == j) {
419         continue;
420       }
421       BufferRequirements* b_requirements = &requirements_[j];
422       const int b_start_offset = buffer_offsets_[j];
423       const int b_first_time_used = b_requirements->first_time_used;
424       const int b_last_time_used = b_requirements->last_time_used;
425       const int b_end_offset = b_start_offset + b_requirements->size;
426       if ((a_first_time_used > b_last_time_used) ||
427           (b_first_time_used > a_last_time_used)) {
428         // Buffers don't overlap in time.
429         continue;
430       }
431       if ((a_start_offset >= b_end_offset) ||
432           (b_start_offset >= a_end_offset)) {
433         // No overlap in memory.
434         continue;
435       }
436       were_overlaps_found = true;
437       TF_LITE_REPORT_ERROR(
438           error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
439           i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
440           j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
441     }
442   }
443   return were_overlaps_found;
444 }
445 
446 }  // namespace tflite
447