1 //
2 // Copyright (c) 2010-2024 Antmicro
3 // Copyright (c) 2011-2015 Realtime Embedded
4 //
5 // This file is licensed under the MIT License.
6 // Full license text is available in 'licenses/MIT.txt'.
7 //
8 using System;
9 using System.Collections.Generic;
10 using System.Linq;
11 
12 using ELFSharp.ELF;
13 using ELFSharp.ELF.Sections;
14 
15 using Antmicro.Renode.Utilities.Collections;
16 using System.Collections;
17 using Antmicro.Renode.Utilities;
18 
19 using Antmicro.Renode.Peripherals.Bus;
20 
21 namespace Antmicro.Renode.Core
22 {
23     public class SymbolLookup : IDisposable
24     {
SymbolLookup()25         public SymbolLookup()
26         {
27             symbolsByName = new MultiValueDictionary<string, Symbol>();
28             symbols = new SortedIntervals(this);
29             allSymbolSets.Add(this);
30         }
31 
Dispose()32         public void Dispose()
33         {
34             allSymbolSets.Remove(this);
35         }
36 
37         /// <summary>
38         /// Loads the symbols from ELF and puts the symbols into the SymbolLookup.
39         /// </summary>
40         /// <param name="elf">Elf.</param>
41         /// <param name="useVirtualAddress">Use the virtual address of the symbols, default is physical.</param>
42         /// <param name="textAddress">Override the starting address of the text section (actually the lowest-address loaded segment).</param>
LoadELF(IELF elf, bool useVirtualAddress = false, ulong? textAddress = null)43         public void LoadELF(IELF elf, bool useVirtualAddress = false, ulong? textAddress = null)
44         {
45             if(elf is ELF<uint> elf32)
46             {
47                 LoadELF(elf32, useVirtualAddress, textAddress);
48             }
49             else if(elf is ELF<ulong> elf64)
50             {
51                 LoadELF(elf64, useVirtualAddress, textAddress);
52             }
53             else
54             {
55                 throw new ArgumentException("Unsupported ELF format - only 32 and 64-bit ELFs are supported");
56             }
57         }
58 
59         /// <summary>
60         /// Inserts one symbol into the lookup structure.
61         /// </summary>
62         /// <param name="symbol">Symbol.</param>
InsertSymbol(Symbol symbol)63         public void InsertSymbol(Symbol symbol)
64         {
65             var symbolToAdd = GetUnique(symbol);
66             symbols.Add(symbolToAdd);
67             AddSymbolToNameLookup(symbol);
68             maxLoadAddress = SymbolAddress.Max(maxLoadAddress, symbol.End);
69         }
70 
71         /// <summary>
72         /// Inserts a new symbol with defined parameters into the lookup structure.
73         /// </summary>
74         /// <param name="name">Name.</param>
75         /// <param name="start">Start.</param>
76         /// <param name="size">Size.</param>
77         /// <param name="type">SymbolType.</param>
78         /// <param name="binding">Symbol binding</param>
79         /// <param name="isThumb">If set to <c>true</c>, symbol is marked as a thumb symbol.</param>
InsertSymbol(string name, SymbolAddress start, SymbolAddress size, SymbolType type = SymbolType.NotSpecified, SymbolBinding binding = SymbolBinding.Global, bool isThumb = false)80         public void InsertSymbol(string name, SymbolAddress start, SymbolAddress size, SymbolType type = SymbolType.NotSpecified, SymbolBinding binding = SymbolBinding.Global, bool isThumb = false)
81         {
82             var symbol = new Symbol(start, start + size, name, type, binding, isThumb);
83             InsertSymbol(symbol);
84         }
85 
86         /// <summary>
87         /// Inserts a batch of symbols.
88         /// </summary>
89         /// <param name="symbols">Symbols.</param>
InsertSymbols(IEnumerable<Symbol> symbols)90         public void InsertSymbols(IEnumerable<Symbol> symbols)
91         {
92             var symbolsToAdd = new List<Symbol>();
93 
94             // deduplicate symbols
95             foreach(var symbol in symbols)
96             {
97                 if(symbol.Name == "")
98                 {
99                     continue;
100                 }
101                 symbolsToAdd.Add(GetUnique(symbol));
102             }
103 
104             // Add symbols to name map
105             foreach(var symbolToAdd in symbolsToAdd)
106             {
107                 AddSymbolToNameLookup(symbolToAdd);
108                 maxLoadAddress = SymbolAddress.Max(maxLoadAddress, symbolToAdd.End);
109             }
110 
111             // Add symbols to interval set
112             this.symbols.Add(symbolsToAdd);
113         }
114 
TryGetSymbolsByName(string name, out IReadOnlyCollection<Symbol> symbols)115         public bool TryGetSymbolsByName(string name, out IReadOnlyCollection<Symbol> symbols)
116         {
117             return symbolsByName.TryGetValue(name, out symbols);
118         }
119 
GetSymbolsByName(string name)120         public IReadOnlyCollection<Symbol> GetSymbolsByName(string name)
121         {
122             IReadOnlyCollection<Symbol> candidates;
123             symbolsByName.TryGetValue(name, out candidates);
124             return candidates;
125         }
126 
127         /// <summary>
128         /// Tries to get the innermost symbol which contains the specified address.
129         /// </summary>
130         /// <returns><c>true</c>, if a symbol was found, <c>false</c> when no symbol contains specified address.</returns>
131         /// <param name="offset">Offset.</param>
132         /// <param name="symbol">Symbol.</param>
TryGetSymbolByAddress(SymbolAddress offset, out Symbol symbol)133         public bool TryGetSymbolByAddress(SymbolAddress offset, out Symbol symbol)
134         {
135             if(offset > maxLoadAddress)
136             {
137                 symbol = default(Symbol);
138                 return false;
139             }
140             return symbols.TryGet(offset, out symbol);
141         }
142 
143         /// <summary>
144         /// Gets the innermost symbol which contains the specified address.
145         /// </summary>
146         /// <returns>The symbol by address.</returns>
147         /// <param name="offset">Offset.</param>
GetSymbolByAddress(SymbolAddress offset)148         public Symbol GetSymbolByAddress(SymbolAddress offset)
149         {
150             Symbol symbol;
151             if(!TryGetSymbolByAddress(offset, out symbol))
152             {
153                 throw new KeyNotFoundException("No symbol for given address [" + offset + "] found.");
154             }
155             return symbol;
156         }
157 
158         public SymbolAddress? EntryPoint
159         {
160             get;
161             private set;
162         }
163 
164         public ulong? FirstNotNullSectionAddress
165         {
166             get;
167             private set;
168         }
169 
170         /// <summary>
171         /// All SymbolLookup objects.
172         /// </summary>
173         static private readonly HashSet<SymbolLookup> allSymbolSets = new HashSet<SymbolLookup>();
174         // Those symbols have a special meaning
175         // $a - marks ARM code segments
176         // $d - marks data segments
177         // $t - marks THUMB code segments
178         // $x - marks RISC-V code segments
179         // Sources: https://simplemachines.it/doc/aaelf.pdf section 4.5.6, https://www.mail-archive.com/bug-binutils@gnu.org/msg38960.html
180         // All of those symbols can be used by the disassembler when reading the binary and are not needed during the execution
181         static private readonly string[] excludedSymbolNames = { "$a", "$d", "$t", "$x" };
182         static private readonly SymbolType[] excludedSymbolTypes = { SymbolType.File };
183 
184         private void LoadELF<T>(ELF<T> elf, bool useVirtualAddress = false, ulong? textAddress = null) where T : struct
185         {
186             if(!elf.TryGetSection(".symtab", out var symtabSection))
187             {
188                 return;
189             }
190 
191             var segments = elf.Segments.Where(x => x.Type == ELFSharp.ELF.Segments.SegmentType.Load).OfType<ELFSharp.ELF.Segments.Segment<T>>();
192             foreach(var segment in segments)
193             {
194                 var loadAddress = useVirtualAddress ? segment.GetSegmentAddress() : segment.GetSegmentPhysicalAddress();
195                 minLoadAddress = SymbolAddress.Min(minLoadAddress, loadAddress);
196                 maxLoadAddress = SymbolAddress.Max(maxLoadAddress, loadAddress + segment.GetSegmentSize());
197             }
198 
199             var offset = (T)Convert.ChangeType(textAddress != null ? textAddress.Value - minLoadAddress.RawValue : 0, typeof(T));
200             var thumb = elf.Machine == ELFSharp.ELF.Machine.ARM;
201             var symtab = (SymbolTable<T>)symtabSection;
202 
203             // All names on the excluded list are valid C identifiers, so someone may name their function like that
204             // To guard against it we also check if the type of this symbol is not specified
205             var elfSymbols = symtab.Entries.Where(x => !(excludedSymbolNames.Contains(x.Name) && x.Type == SymbolType.NotSpecified))
206                                 .Where(x => !excludedSymbolTypes.Contains(x.Type))
207                                 .Where(x => x.PointedSectionIndex != (uint)SpecialSectionIndex.Undefined)
208                                 .Select(x => new Symbol(x.OffsetBy(offset), thumb));
InsertSymbolsAntmicro.Renode.Core.SymbolLookup.__anon1209             InsertSymbols(elfSymbols);
210             EntryPoint = elf.GetEntryPoint();
211             FirstNotNullSectionAddress  = elf.Sections
212                                     .Where(x => x.Type != SectionType.Null && x.Flags.HasFlag(SectionFlags.Allocatable))
213                                     .Select(x => x.GetSectionPhysicalAddress())
214                                     .Cast<ulong?>()
215                                     .Min();
216         }
217 
TryGetSymbol(Symbol symbol, out Symbol outSymbol)218         private bool TryGetSymbol(Symbol symbol, out Symbol outSymbol)
219         {
220             IReadOnlyCollection<Symbol> candidates;
221             outSymbol = null;
222             // Try getting cadidates via name, then if there are any present check whether they fit actual symbol.
223             if(TryGetSymbolsByName(symbol.Name, out candidates) && candidates.Count > 0)
224             {
225                 outSymbol = candidates.FirstOrDefault(s => s.Equals(symbol));
226             }
227             return outSymbol != null;
228         }
229 
230         /// <summary>
231         /// Removes the symbol from the local name look up. This is used internally when some symbol has to change, and needs to be evicted.
232         /// </summary>
RemoveSymbolFromNameLookup(Symbol symbol)233         private void RemoveSymbolFromNameLookup(Symbol symbol)
234         {
235             Symbol candidate;
236             if(TryGetSymbol(symbol, out candidate))
237             {
238                 symbolsByName.Remove(symbol.Name, symbol);
239             }
240         }
241 
242         /// <summary>
243         /// Adds the symbol to the name lookup. If the symbol with the same name exists, the one which
244         /// starts earlier will is chosen to be present in the name lookup/view.
245         /// </summary>
246         /// <param name="symbol">Symbol.</param>
AddSymbolToNameLookup(Symbol symbol)247         private void AddSymbolToNameLookup(Symbol symbol)
248         {
249             symbolsByName.Add(symbol.Name, symbol);
250 
251             var dotIndex = symbol.Name.IndexOf('.');
252             if(dotIndex >= 0)
253             {
254                 // NOTE: Add alias for the symbol if it contains clone suffix
255                 var withoutCloneSuffix = symbol.Name.Substring(0, dotIndex);
256                 symbolsByName.Add(withoutCloneSuffix, symbol);
257             }
258         }
259 
260         /// <summary>
261         /// Returs a unique reference for the equivalent of given <paramref name="symbol"/>. If there exists a duplicate, a reference
262         /// to the existing symbol is given, if not the argument is passed back.
263         /// </summary>
264         /// <description>
265         /// The algorithm for each known symbolSet works as follows:
266         /// 1. Check if the symbol is in the name set (this is O(1)), if it does it's our `candidate`.
267         ///  1.1. If not we are sure there is no matching symbol in the current set.
268         /// 2. Compare `candidate` to symbol. (It might be different, since symbols can have same names and different intervals.)
269         ///  2.1. If it is the same, return candidate.
270         ///  2.2. Otherwise, try to find the symbol via interval.
271         /// 3. If we didn't find a match in all sets return original reference.
272         /// </description>
273         /// <returns>The reference to the original symbol.</returns>
274         /// <param name="symbol">Symbol.</param>
GetUnique(Symbol symbol)275         static private Symbol GetUnique(Symbol symbol)
276         {
277             Symbol outSymbol;
278             foreach(var symbolSet in allSymbolSets)
279             {
280                 if(symbolSet.TryGetSymbol(symbol, out outSymbol))
281                 {
282                     return outSymbol;
283                 }
284             }
285             return symbol;
286         }
287 
288         /// <summary>
289         /// Interval to symbol mapping.
290         /// </summary>
291         private readonly SortedIntervals symbols;
292 
293         /// <summary>
294         /// Name to symbol mapping.
295         /// </summary>
296         private MultiValueDictionary<string, Symbol> symbolsByName;
297 
298         private SymbolAddress minLoadAddress = SymbolAddress.MaxValue;
299         private SymbolAddress maxLoadAddress;
300 
301         private class SortedIntervals : IEnumerable
302         {
SortedIntervals(SymbolLookup owner)303             public SortedIntervals(SymbolLookup owner)
304             {
305                 this.owner = owner;
306                 symbols = new Symbol[0];
307                 indexOfEnclosingInterval = new int[0];
308                 Count = 0;
309             }
310 
311             /// <summary>
312             /// Adds the intervals to the containers in batch mode.
313             /// </summary>
314             /// <param name="newSymbols">Intervals.</param>
Add(IEnumerable<Symbol> newSymbols)315             public void Add(IEnumerable<Symbol> newSymbols)
316             {
317                 var sortedIntervals = SelectDistinctViaImportance(newSymbols).ToArray();
318                 Array.Sort(sortedIntervals, comparer);
319                 sortedIntervals = DropUnusedLabels(sortedIntervals).ToArray();
320 
321                 if(Count > 0)
322                 {
323                     List<Symbol> createdSymbols;
324                     List<Symbol> deletedSymbols;
325                     MergeInSymbolArray(sortedIntervals, out createdSymbols, out deletedSymbols);
326                     foreach(var deletedSymbol in deletedSymbols)
327                     {
328                         owner.RemoveSymbolFromNameLookup(deletedSymbol);
329                     }
330                     foreach(var createdSymbol in createdSymbols)
331                     {
332                         owner.AddSymbolToNameLookup(SymbolLookup.GetUnique(createdSymbol));
333                     }
334                 }
335                 else
336                 {
337                     symbols = sortedIntervals;
338                 }
339 
340                 Count = symbols.Length;
341                 RebuildEnclosingIntervals();
342             }
343 
344             /// <summary>
345             /// Add the specified interval. NOTE: This is a version for single element. It's logic is complicated compared to the batch version,
346             /// but it should have better performance when it comes to constants with complexities.
347             /// Our typical use-cases do not use it very much, so if the maintain cost becomes too high, the implementation should be swapped
348             /// for to use range add. "Too high" is when you ask yourself a question "Is it too high?".
349             /// </summary>
350             /// <param name="symbol">Symbol.</param>
Add(Symbol symbol)351             public void Add(Symbol symbol)
352             {
353                 Add(new[] { symbol });
354             }
355 
356             /// <summary>
357             /// Tries to get the innermost symbol containing the specified scalar. If no symbol contains the scalar the trial fails.
358             /// </summary>
359             /// <returns><c>true</c>, if the interval was found, <c>false</c> when such interval does not exist.</returns>
360             /// <param name="scalar">Scalar.</param>
361             /// <param name="interval">Found interval.</param>
TryGet(SymbolAddress scalar, out Symbol interval)362             public bool TryGet(SymbolAddress scalar, out Symbol interval)
363             {
364                 interval = default(Symbol);
365                 var marker = new MarkerSymbol(scalar);
366                 var index = Array.BinarySearch<Symbol>(symbols, marker, comparer);
367                 if(index < 0)
368                 {
369                     index = ~index;
370                     // we are behind last element or before 1st
371                     if(index == 0)
372                     {
373                         return false;
374                     }
375                     // move back one element because search will always point to the 1st larger element than the marker
376                     index--;
377                 }
378 
379                 /***
380                 * We might be between two towers, that lie on the common ground (we will be between the top of the left tower and
381                 * before the base of the next one). `index` points to the left top symbol, we check if any enclosing symbol also contains the
382                 * scalar. If not, there is no such symbol (the common ground) and scalar just lies between two towers. If it exists scalar
383                 * is between two symbol lying on the same base.
384                 ****/
385                 index = FindEnclosingInterval(index, scalar);
386                 if(index != NoEnclosingInterval)
387                 {
388                     interval = symbols[index];
389                     return true;
390                 }
391 
392                 return false;
393             }
394 
395             /// <summary>
396             /// Tries to retrieve a Symbol with the given interval from the container.
397             /// </summary>
398             /// <returns><c>true</c>, if such container existed and was found, <c>false</c> otherwise.</returns>
399             /// <param name="other">Interval to look for.</param>
400             /// <param name="interval">Interval.</param>
TryGet(Symbol other, out Symbol interval)401             public bool TryGet(Symbol other, out Symbol interval)
402             {
403                 interval = null;
404                 var index = Array.BinarySearch<Symbol>(symbols, other, comparer);
405                 if(index > 0)
406                 {
407                     interval = symbols[index];
408                 }
409                 return interval != null;
410             }
411 
GetEnumerator()412             public IEnumerator GetEnumerator()
413             {
414                 return symbols.GetEnumerator();
415             }
416 
417             /// <summary>
418             /// Tells whether an exact interval is contained in the container.
419             /// </summary>
420             /// <param name="interval">Interval.</param>
Contains(Symbol interval)421             public bool Contains(Symbol interval)
422             {
423                 var index = Array.BinarySearch<Symbol>(symbols, interval, comparer);
424                 return index > 0;
425             }
426 
427             /// <summary>
428             /// Total number of elements.
429             /// </summary>
430             /// <value>The count.</value>
431             public int Count { get; private set; }
432 
433             /// <summary>
434             /// Filters out the symbols that are just labels and are also covered by non-label symbols.
435             /// </summary>
436             /// <param name="symbols">Sorted collection of symbols.</param>
DropUnusedLabels(IEnumerable<Symbol> symbols)437             private IEnumerable<Symbol> DropUnusedLabels(IEnumerable<Symbol> symbols)
438             {
439                 Symbol currentSymbol = null;
440                 foreach(var symbol in symbols)
441                 {
442                     // only include labels that are not part of other symbols
443                     if(!symbol.IsLabel)
444                     {
445                         if(currentSymbol == null || !currentSymbol.Contains(symbol))
446                         {
447                             // we advance currentSymbol only if we're in topmost (largest) symbol of the stack.
448                             // The reason is that it will definitely contain all smaller ones and we will not miss this situation:
449                             //
450                             //  aaaaaa  i
451                             // xxxxxxxxxxxxx
452                             //
453                             // in which `i` is not contained by `a`, but is contained by `x`.
454                             currentSymbol = symbol;
455                         }
456                         yield return symbol;
457                     }
458                     else
459                     {
460                         // label
461                         if(currentSymbol == null || !currentSymbol.Contains(symbol))
462                         {
463                             yield return symbol;
464                         }
465                     }
466                 }
467             }
468 
469             /// <summary>
470             /// Consumes whole cake from provider
471             /// </summary>
472             /// <param name="destination">Destination.</param>
473             /// <param name="source">Source.</param>
CopyCake(ICollection<Symbol> destination, ISymbolProvider source)474             private static void CopyCake(ICollection<Symbol> destination, ISymbolProvider source)
475             {
476                 Symbol cakeBase = source.Current;
477                 destination.Add(source.Consume()); // copy base
478                 // "move cake" - while current old symbol is contained in base copy it
479                 while(!source.Empty && cakeBase.Contains(source.Current))
480                 {
481                     destination.Add(source.Consume());
482                 }
483             }
484 
485             /// <summary>
486             /// Consumes all symbols from provider copying them to the destination.
487             /// </summary>
488             /// <param name="destination">Destination.</param>
489             /// <param name="source">Source.</param>
CopyRest(ICollection<Symbol> destination, ISymbolProvider source)490             private static void CopyRest(ICollection<Symbol> destination, ISymbolProvider source)
491             {
492                 while(!source.Empty)
493                 {
494                     destination.Add(source.Consume());
495                 }
496             }
497 
498             /// <summary>
499             /// This function filters out symbols with the same intervals perserving the most important ones.
500             /// </summary>
SelectDistinctViaImportance(IEnumerable<Symbol> symbolSet)501             private IEnumerable<Symbol> SelectDistinctViaImportance(IEnumerable<Symbol> symbolSet)
502             {
503                 var distinctSymbols = new MultiValueDictionary<int, Symbol>();
504                 IReadOnlyCollection<Symbol> symbolsWithSameHash;
505                 int hashCode;
506                 foreach(var symbol in symbolSet)
507                 {
508                     hashCode = comparer.GetHashCode(symbol);
509                     if(distinctSymbols.TryGetValue(hashCode, out symbolsWithSameHash))
510                     {
511                         // Replace all exisiting copies (interval-wise) of the symbol which that are less important.
512                         // There should be only one and we have to break after modification of the collection.
513                         foreach(var existingSymbol in symbolsWithSameHash)
514                         {
515                             if(comparer.Equals(symbol, existingSymbol) && symbol.IsMoreImportantThan(existingSymbol))
516                             {
517                                 distinctSymbols.Remove(hashCode, existingSymbol);
518                                 distinctSymbols.Add(hashCode, symbol);
519                                 break;
520                             }
521                         }
522                     }
523                     else
524                     {
525                         distinctSymbols.Add(hashCode, symbol);
526                     }
527                 }
528                 return distinctSymbols;
529             }
530 
531 
532             /// <summary>
533             /// Consumes a symbol from oldSymbols, and puts it into destination and/or tails of oldSymbols. Handles overlapping with newSymbols.
534             /// The head of a symbol is the part from start of the *old* symbol till the start of the *new* one.
535             /// The tail of the symbol is the part from the end of the *new* symbol till the end of the *old*.
536             /// This function copies over the head to the destination and adds a tail to the tail set (of course
537             /// when it makes sense).
538             /// </summary>
539             /// <param name="newSymbols">Provider of new symbols.</param>
540             /// <param name="destination">Destination container.</param>
541             /// <param name="oldSymbols">OldSymbolsProvider, symbols provider.</param>
542             /// <param name="createdSymbols">Container with the symbols that got created during merging.</param>
543             /// <param name="deletedSymbols">Container with the symbols that got got replaced/deleted during merging.</param>
AddSymbolWithHashing(OldSymbolProvider oldSymbols, ISymbolProvider newSymbols, ICollection<Symbol> destination, ICollection<Symbol> createdSymbols, ICollection<Symbol> deletedSymbols)544             private void AddSymbolWithHashing(OldSymbolProvider oldSymbols, ISymbolProvider newSymbols, ICollection<Symbol> destination, ICollection<Symbol> createdSymbols, ICollection<Symbol> deletedSymbols)
545             {
546                 var consumedSymbolType = oldSymbols.CurrentType;
547                 var symbolToAdd = oldSymbols.Consume();
548 
549                 // If there is no symbol to potentially overlap just copy the orignal one
550                 if(newSymbols.Empty)
551                 {
552                     destination.Add(symbolToAdd);
553                     return;
554                 }
555 
556                 if(consumedSymbolType == OldSymbolProvider.SymbolType.Original)
557                 {
558                     deletedSymbols.Add(symbolToAdd);
559                 }
560 
561                 // If the symbol to add starts before the new one, add its head to the destination.
562                 if(symbolToAdd.Start < newSymbols.Current.Start)
563                 {
564                     Symbol symbolHead;
565                     symbolToAdd.TryGetRightTrimmed(newSymbols.Current.Start, out symbolHead);
566                     // If there are symbols on the tail list, check if the trimming would produce the same interval.
567                     // If so, we skip the current addition. We do so, because in such case we want to preserve the top (most inner-one) former symbol.
568                     // We need, however, to process the possible remainder, a new tail might be generated.
569                     Symbol trimmedTail;
570                     if(!oldSymbols.HasNextTail ||
571                        !oldSymbols.NextTail.TryGetRightTrimmed(newSymbols.Current.Start, out trimmedTail) ||
572                        comparer.Compare(symbolHead, trimmedTail) != 0)
573                     {
574                         destination.Add(symbolHead);
575                         createdSymbols.Add(symbolHead);
576                     }
577                 }
578 
579                 if(newSymbols.Current.Length == 0 && symbolToAdd.Start == newSymbols.Current.Start)
580                 {
581                     destination.Add(symbolToAdd);
582                     return;
583                 }
584 
585                 // if there is something behind it add it to the new symbols list
586                 if(symbolToAdd.End > newSymbols.Current.End)
587                 {
588                     Symbol symbolTail;
589                     symbolToAdd.TryGetLeftTrimmed(newSymbols.Current.End, out symbolTail);
590                     oldSymbols.AddTail(symbolTail);
591 #if DEBUG
592                     DebugAssert.AssertFalse(
593                         newSymbols.Current.Length > 0 && newSymbols.Current.Overlaps(symbolTail),
594                         "New symbol overlaps with created tail! Old and new symbols should not overlap."
595                     );
596 #endif
597                     return;
598                 }
599 
600                 // There is one another case. Where the overlapping candidate completely overshadows the symbol.
601                 // Don't do anything, the overshadowed symbol got deleted anyways and no new symbol gets created.
602             }
603 
MergeInSymbolArray(Symbol[] symbolsToAdd, out List<Symbol> createdSymbols, out List<Symbol> deletedSymbols)604             private void MergeInSymbolArray(Symbol[] symbolsToAdd, out List<Symbol> createdSymbols, out List<Symbol> deletedSymbols)
605             {
606                 int capacity = symbols.Length + symbolsToAdd.Length;
607                 // We don't know exact numbers of the new symbols, because some might get split, and some might get removed.
608                 // Thus we use a list, and chosen initial capacity makes some sense.
609                 var mergedIntervals = new List<Symbol>(capacity);
610                 var oldSymbols = new OldSymbolProvider(comparer, symbols);
611                 var newSymbols = new SymbolProvider(symbolsToAdd);
612                 createdSymbols = new List<Symbol>();
613                 deletedSymbols = new List<Symbol>();
614 
615                 // While new and old symbols might interleave
616                 while(!oldSymbols.Empty && !newSymbols.Empty)
617                 {
618                     // While they do not overlap
619                     while(!oldSymbols.Empty && !newSymbols.Empty && !oldSymbols.Current.Overlaps(newSymbols.Current))
620                     {
621                         // Symbols do not overlap here so it is sufficient to check only beginnings to find out which comes 1st.
622                         // In case of copying new symbol, we can move whole cake, because it will never be cached.
623                         if(oldSymbols.Current.Start < newSymbols.Current.Start)
624                         {
625                             mergedIntervals.Add(oldSymbols.Consume());
626                         }
627                         else
628                         {
629                             CopyCake(mergedIntervals, newSymbols);
630                         }
631                     }
632                     if(oldSymbols.Empty || newSymbols.Empty)
633                     {
634                         break;
635                     }
636                     // Here we are sure that current old and new symbols overlap somehow
637                     //
638                     // While we are not behind the current new symbol, keep adding old symbols and hash them if needed.
639                     // Also handles zero length symbols. It is done with such an ugly condition, be cause otherwise,
640                     // Two loops would be needed, one before overlaps, and one after, for different placements of 0-length
641                     // symbols.
642                     while(!oldSymbols.Empty && (
643                               oldSymbols.Current.Start < newSymbols.Current.End || (
644                                   newSymbols.Current.Length == 0 &&
645                                   oldSymbols.Current.Start == newSymbols.Current.End
646                               )))
647                     {
648                         AddSymbolWithHashing(oldSymbols, newSymbols, mergedIntervals, createdSymbols, deletedSymbols);
649                     }
650                     // Copy in the new symbol cake
651                     CopyCake(mergedIntervals, newSymbols);
652                 }
653 
654                 // Copy rest of the symbols. If the symbol comes from temporary list, it should be "materialized"
655                 while(!oldSymbols.Empty)
656                 {
657                     if(oldSymbols.CurrentType == OldSymbolProvider.SymbolType.Temporary)
658                     {
659                         createdSymbols.Add(oldSymbols.Current);
660                     }
661                     mergedIntervals.Add(oldSymbols.Consume());
662                 }
663 
664                 // Copy rest of the newSymbols
665                 CopyRest(mergedIntervals, newSymbols);
666                 symbols = mergedIntervals.ToArray();
667             }
668 
669             private interface ISymbolProvider
670             {
671                 Symbol Current { get; }
672 
673                 bool Empty { get; }
674 
Consume()675                 Symbol Consume();
676             }
677 
678             private class OldSymbolProvider : ISymbolProvider
679             {
OldSymbolProvider(IComparer<Symbol> comparer, Symbol[] oldSymbols)680                 public OldSymbolProvider(IComparer<Symbol> comparer, Symbol[] oldSymbols)
681                 {
682                     intervalComparer = comparer;
683                     symbols = new SymbolProvider(oldSymbols);
684                     tails = new Queue<Symbol>();
685                     currentsSynced = false;
686                 }
687 
688                 public Symbol Current
689                 {
690                     get
691                     {
692                         UpdateCurrentSymbolAndType();
693                         return currentSymbol;
694                     }
695                 }
696 
697                 public SymbolType CurrentType
698                 {
699                     get
700                     {
701                         UpdateCurrentSymbolAndType();
702                         return currentSymbolType;
703                     }
704                 }
705 
706                 public bool Empty
707                 {
708                     get
709                     {
710                         UpdateCurrentSymbolAndType();
711                         return currentSymbol == null;
712                     }
713                 }
714 
715                 public bool HasNextTail { get { return tails.Count != 0; } }
716 
717                 public Symbol NextTail { get { return tails.Peek(); } }
718 
AddTail(Symbol tail)719                 public void AddTail(Symbol tail)
720                 {
721                     tails.Enqueue(tail);
722                     currentsSynced = false;
723                 }
724 
Consume()725                 public Symbol Consume()
726                 {
727                     Symbol ret = Current;
728                     DiscardCurrentSymbol();
729                     return ret;
730                 }
731 
732                 public enum SymbolType
733                 {
734                     NoSymbol,
735                     Original,
736                     Temporary
737                 }
738 
739                 /// <summary>
740                 /// Discards the current symbol. It consumes original symbol. Why the temporary symbols are not consumed here
741                 /// is described in updateCurrentSymbolAndType description.
742                 /// </summary>
DiscardCurrentSymbol()743                 private void DiscardCurrentSymbol()
744                 {
745                     switch(CurrentType)
746                     {
747                     case SymbolType.Original:
748                         symbols.Consume();
749                         break;
750                     case SymbolType.Temporary:
751                     case SymbolType.NoSymbol:
752                         break;
753                     }
754                     currentsSynced = false;
755                 }
756 
757                 /// <summary>
758                 /// Updates current symbol and its type. If current symbol is a temporary it is consumed,
759                 /// when it's original it is not consumed.
760                 /// </summary>
761                 /// <description>
762                 /// There can exists redundant temporary symbols. I.e. when trimming would produce the same intervals for two symbols.
763                 /// Everything but last symbol with the same interval is important, rest must be ignored.
764                 /// This is why temporary symbols are consumed upon update. We cannot remove redundant symbols upon addition,
765                 /// but we can ignore them upon update. We also cannot peek more than one symbol ahed, so we need to consume one and
766                 /// check if the next is the same.
767                 /// Original symbol cannot be consumed when set as current, because a new tail might will be placed before current
768                 /// it would need to be put back. Vice versa situation do not exists, because original symbols are immutable.
769                 /// Also tails are added in a monotonic manner, so adding a new tail will also not result in eventual put back.
770                 /// Original symbols are consumed in DiscardCurrentSymbol.
771                 /// </description>
UpdateCurrentSymbolAndType()772                 private void UpdateCurrentSymbolAndType()
773                 {
774                     if(currentsSynced)
775                     {
776                         return;
777                     }
778                     currentsSynced = true;
779 
780                     if(tails.Count == 0 && symbols.Empty)
781                     {
782                         currentSymbol = null;
783                         currentSymbolType = SymbolType.NoSymbol;
784                         return;
785                     }
786                     if(tails.Count == 0)
787                     {
788                         currentSymbol = symbols.Current;
789                         currentSymbolType = SymbolType.Original;
790                         return;
791                     }
792                     if(symbols.Empty)
793                     {
794                         currentSymbol = DequeueLastCopy(tails);
795                         currentSymbolType = SymbolType.Temporary;
796                         return;
797                     }
798                     if(intervalComparer.Compare(symbols.Current, tails.Peek()) < 0)
799                     {
800                         currentSymbol = symbols.Current;
801                         currentSymbolType = SymbolType.Original;
802                         return;
803                     }
804                     // We will get last original copy of the symbol from the tails queue.
805                     currentSymbol = DequeueLastCopy(tails);
806                     currentSymbolType = SymbolType.Temporary;
807                     // If it's exactly the same as the symbol in the old list, we prefer to ignore tail, because it
808                     // for sure started before the original symbol, thus would not be the innermost.
809                     if(intervalComparer.Compare(symbols.Current, currentSymbol) == 0)
810                     {
811                         currentSymbol = symbols.Current;
812                         currentSymbolType = SymbolType.Original;
813                     }
814                 }
815 
816                 /// <summary>
817                 /// Dequeues a tail symbol returning the last equivalent copy. This ensures that if there are tails of exact same interval,
818                 /// we will get last one, as comming from the more inner one original. This might happen when, two symbol end at the same
819                 /// address and both will get their heads trimmed.
820                 /// </summary>
821                 /// <returns>The last copy.</returns>
822                 /// <param name="symbolTails">Symbol tails.</param>
DequeueLastCopy(Queue<Symbol> symbolTails)823                 private Symbol DequeueLastCopy(Queue<Symbol> symbolTails)
824                 {
825                     var symbol = symbolTails.Dequeue();
826                     while(symbolTails.Count > 0 && intervalComparer.Compare(symbol, symbolTails.Peek()) == 0)
827                     {
828                         symbol = symbolTails.Dequeue();
829                     }
830                     return symbol;
831                 }
832 
833                 private Symbol currentSymbol;
834                 private SymbolType currentSymbolType;
835                 private bool currentsSynced;
836                 private readonly IComparer<Symbol> intervalComparer;
837                 private readonly Queue<Symbol> tails;
838                 private readonly SymbolProvider symbols;
839             }
840 
841             private class SymbolProvider : ISymbolProvider
842             {
843                 public int Length { get { return array.Length - iterator; } }
844 
845                 public Symbol Current { get { return Peek(); } }
846 
847                 public bool Empty { get { return !HasSymbolsLeft(); } }
848 
849 
SymbolProvider(Symbol[] symbols)850                 public SymbolProvider(Symbol[] symbols)
851                 {
852                     array = symbols;
853                 }
854 
Consume()855                 public Symbol Consume()
856                 {
857                     return array[iterator++];
858                 }
859 
HasSymbolsLeft()860                 private bool HasSymbolsLeft()
861                 {
862                     return iterator < array.Length;
863                 }
864 
Peek()865                 private Symbol Peek()
866                 {
867                     return array[iterator];
868                 }
869 
870                 private int iterator;
871                 private readonly Symbol[] array;
872             }
873 
874             /// <summary>
875             /// Rebuilds the helper table - `indexOfEnclosingInterval` from the intervals table in O(n). From fast back-of-the-envelope analysis the
876             /// the pessimistic run time is O(2n).
877             ///
878             /// The algorithms works by constructing a stack of enclosing intervals, and iterating over intervals.
879             /// 1. At the beginning the guard is created, meaning there is no enclosing interval. We also set it for the 1st element (it
880             /// obviously cannot be containted in anything else).
881             /// 2. We iterate over the remaining intervals.
882             ///   3. We check whether the current interval is enclosed in the previous one.
883             ///   4. If yes, it belong to the "tower", so we add it to the stack.
884             ///   5. If not, we descend the stack poping elements until we are enclosed by one or we hit the guard.
885             /// 6. The parent is top element from the stack.
886             /// </summary>
RebuildEnclosingIntervals()887             private void RebuildEnclosingIntervals()
888             {
889                 indexOfEnclosingInterval = new int[Count];
890                 if(Count < 1)
891                 {
892                     return;
893                 }
894                 var parents = new Stack<int>();
895 
896                 // 1.
897                 parents.Push(NoEnclosingInterval);
898                 indexOfEnclosingInterval[0] = NoEnclosingInterval;
899 
900                 // 2.
901                 for(int i = 1; i < Count; ++i)
902                 {
903                     var interval = symbols[i];
904                     // 3.
905                     if(symbols[i - 1].Contains(interval))
906                     {
907                         // 4.
908                         parents.Push(i - 1);
909                     }
910                     else
911                     {
912                         // 5.
913                         while(parents.Peek() != NoEnclosingInterval && !symbols[parents.Peek()].Contains(interval))
914                         {
915                             parents.Pop();
916                         }
917                     }
918                     // 6.
919                     indexOfEnclosingInterval[i] = parents.Peek();
920                 }
921             }
922 
923             /// <summary>
924             /// Finds the next sibling. NOTE: Orphans do not have siblings.
925             /// </summary>
926             /// <returns>The next sibling or <see cref="NoRightSibling"/> special value.</returns>
927             /// <param name="index">Index.</param>
FindNextSibling(int index)928             private int FindNextSibling(int index)
929             {
930                 var parentIdx = indexOfEnclosingInterval[index];
931 
932                 if(parentIdx == NoEnclosingInterval)
933                 {
934                     return NoRightSibling;
935                 }
936 
937                 var parentEnd = symbols[parentIdx].End;
938                 index++;
939                 // as long as there are any elements and we're not past our parent's end
940                 while(index < symbols.Length && symbols[index].End > parentEnd)
941                 {
942                     // check if the element has the same parent
943                     if(indexOfEnclosingInterval[index] == parentIdx)
944                     {
945                         return index;
946                     }
947                     index++;
948                 }
949                 return NoRightSibling;
950             }
951 
952             /// <summary>
953             /// Checks if the given <paramref name="interval"/> is enclosed by the one under <paramref name="index"/> or one of it's "parents".
954             /// </summary>
955             /// <returns>The enclosing interval index or NoEnclosingInterval.</returns>
956             /// <param name="index">Index of the input interval.</param>
957             /// <param name="interval">Interval.</param>
FindEnclosingInterval(int index, Symbol interval)958             private int FindEnclosingInterval(int index, Symbol interval)
959             {
960                 while(index != NoEnclosingInterval && !symbols[index].Contains(interval))
961                 {
962                     index = indexOfEnclosingInterval[index];
963                 }
964                 return index;
965             }
966 
967             /// <summary>
968             /// Checks if the given <paramref name="scalar"/> is enclosed by the one under <paramref name="index"/> or one of it's "parents".
969             /// </summary>
970             /// <returns>The enclosing interval index or NoEnclosingInterval.</returns>
971             /// <param name="index">Index of the input interval.</param>
972             /// <param name = "scalar"></param>
FindEnclosingInterval(int index, SymbolAddress scalar)973             private int FindEnclosingInterval(int index, SymbolAddress scalar)
974             {
975                 var original = index;
976                 while(index != NoEnclosingInterval && !symbols[index].Contains(scalar))
977                 {
978                     index = indexOfEnclosingInterval[index];
979                 }
980                 // A special case when we hit after a 0-length symbol thas does not have any parent.
981                 // We do not need to check it in the loop, as such a symbol would never enclose other symbols.
982                 // This gives an approximate result.
983                 if(index == NoEnclosingInterval)
984                 {
985                     if(symbols[original].Start <= scalar && symbols[original].End == symbols[original].Start && (symbols.Length == original + 1 || symbols[original + 1].Start > scalar))
986                     {
987                         index = original;
988                     }
989                 }
990                 return index;
991             }
992 
993             private SymbolLookup owner;
994             private const int NoEnclosingInterval = -1;
995             private const int NoRightSibling = -1;
996             private readonly MarkerComparer<SymbolAddress> comparer = new MarkerComparer<SymbolAddress>();
997 
998             private Symbol[] symbols;
999 
1000             /// <summary>
1001             /// Internal container holding mapping from interval index to its enclosing interval index,
1002             /// if such exists, otherwise a special value <see cref="NoEnclosingInterval"/> is used.
1003             /// It allows to get all the enclosing intervals in fast manner.
1004             /// indexOfEnclosingInterval[i] says where the ith's "parent" is.
1005             /// </summary>
1006             private int[] indexOfEnclosingInterval;
1007 
1008             /// <summary>
1009             /// This class is used with marker symbols, to find the innermost intervals which the marker (representing scalar) belongs to.
1010             /// It basically says if the symbol starts earlier or later than the marker.
1011             /// </summary>
1012             private class MarkerComparer<TScalar> : IntervalComparer<TScalar> where TScalar : struct, IComparable<TScalar>
1013             {
1014                 static TScalar MarkerValue = (TScalar)typeof(TScalar).GetField("MaxValue", System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.Static).GetValue(null);
1015 
GetMarkerValue()1016                 static public TScalar GetMarkerValue()
1017                 {
1018                     return MarkerValue;
1019                 }
1020 
Compare(IInterval<TScalar> lhs, IInterval<TScalar> rhs)1021                 public override int Compare(IInterval<TScalar> lhs, IInterval<TScalar> rhs)
1022                 {
1023                     var markerValue = GetMarkerValue();
1024                     if(lhs.End.CompareTo(markerValue) != 0 && rhs.End.CompareTo(markerValue) != 0)
1025                     {
1026                         return base.Compare(lhs, rhs);
1027                     }
1028 
1029                     IInterval<TScalar> marker;
1030                     IInterval<TScalar> symbol;
1031                     bool markerIsLhs;
1032                     if(lhs.End.CompareTo(markerValue) == 0)
1033                     {
1034                         marker = lhs;
1035                         symbol = rhs;
1036                         markerIsLhs = true;
1037                     }
1038                     else
1039                     {
1040                         marker = rhs;
1041                         symbol = lhs;
1042                         markerIsLhs = false;
1043                     }
1044 
1045                     /**
1046                     If the marker finds the symbol exactly matching the start, explicitly make the symbol smaller.
1047                     This will ensure the property, that marker cannot be exactly matched to the symbol.
1048                     This way, symbol starting directly at the pointer will be treated the same as the symbol
1049                     Starting before it, which makes sense. It both cases it will contain it depending on the end position.
1050                     In other words this defines that inclusive start boundary of comparison overrides exclusive end.
1051                     **/
1052                     var cmp = symbol.Start.CompareTo(marker.Start) == 0 ? marker.Start.CompareTo(symbol.End) : symbol.Start.CompareTo(marker.Start);
1053                     // If marker was passed as left hand side argument, we need to invert the comparison result.
1054                     return markerIsLhs ? -cmp : cmp;
1055                 }
1056             }
1057 
1058             /// <summary>
1059             /// This is a convienice class, that makes a marker symbol, which is treated differently by MarkerComparer.
1060             /// It is a special symbol that wraps a scalar value in the Symbol object. Its Start represents the scalar
1061             /// value, and its End is a special "magic value" that marks it as a marker symbol.
1062             /// </summary>
1063             private class MarkerSymbol : Symbol
1064             {
MarkerSymbol(SymbolAddress value)1065                 public MarkerSymbol(SymbolAddress value) : base(value, MarkerComparer<SymbolAddress>.GetMarkerValue())
1066                 {
1067                 }
1068             }
1069         }
1070     }
1071 }
1072