1 // 2 // Copyright (c) 2010-2024 Antmicro 3 // Copyright (c) 2011-2015 Realtime Embedded 4 // 5 // This file is licensed under the MIT License. 6 // Full license text is available in 'licenses/MIT.txt'. 7 // 8 using System; 9 using System.Collections.Generic; 10 using System.Linq; 11 12 using ELFSharp.ELF; 13 using ELFSharp.ELF.Sections; 14 15 using Antmicro.Renode.Utilities.Collections; 16 using System.Collections; 17 using Antmicro.Renode.Utilities; 18 19 using Antmicro.Renode.Peripherals.Bus; 20 21 namespace Antmicro.Renode.Core 22 { 23 public class SymbolLookup : IDisposable 24 { SymbolLookup()25 public SymbolLookup() 26 { 27 symbolsByName = new MultiValueDictionary<string, Symbol>(); 28 symbols = new SortedIntervals(this); 29 allSymbolSets.Add(this); 30 } 31 Dispose()32 public void Dispose() 33 { 34 allSymbolSets.Remove(this); 35 } 36 37 /// <summary> 38 /// Loads the symbols from ELF and puts the symbols into the SymbolLookup. 39 /// </summary> 40 /// <param name="elf">Elf.</param> 41 /// <param name="useVirtualAddress">Use the virtual address of the symbols, default is physical.</param> 42 /// <param name="textAddress">Override the starting address of the text section (actually the lowest-address loaded segment).</param> LoadELF(IELF elf, bool useVirtualAddress = false, ulong? textAddress = null)43 public void LoadELF(IELF elf, bool useVirtualAddress = false, ulong? textAddress = null) 44 { 45 if(elf is ELF<uint> elf32) 46 { 47 LoadELF(elf32, useVirtualAddress, textAddress); 48 } 49 else if(elf is ELF<ulong> elf64) 50 { 51 LoadELF(elf64, useVirtualAddress, textAddress); 52 } 53 else 54 { 55 throw new ArgumentException("Unsupported ELF format - only 32 and 64-bit ELFs are supported"); 56 } 57 } 58 59 /// <summary> 60 /// Inserts one symbol into the lookup structure. 61 /// </summary> 62 /// <param name="symbol">Symbol.</param> InsertSymbol(Symbol symbol)63 public void InsertSymbol(Symbol symbol) 64 { 65 var symbolToAdd = GetUnique(symbol); 66 symbols.Add(symbolToAdd); 67 AddSymbolToNameLookup(symbol); 68 maxLoadAddress = SymbolAddress.Max(maxLoadAddress, symbol.End); 69 } 70 71 /// <summary> 72 /// Inserts a new symbol with defined parameters into the lookup structure. 73 /// </summary> 74 /// <param name="name">Name.</param> 75 /// <param name="start">Start.</param> 76 /// <param name="size">Size.</param> 77 /// <param name="type">SymbolType.</param> 78 /// <param name="binding">Symbol binding</param> 79 /// <param name="isThumb">If set to <c>true</c>, symbol is marked as a thumb symbol.</param> InsertSymbol(string name, SymbolAddress start, SymbolAddress size, SymbolType type = SymbolType.NotSpecified, SymbolBinding binding = SymbolBinding.Global, bool isThumb = false)80 public void InsertSymbol(string name, SymbolAddress start, SymbolAddress size, SymbolType type = SymbolType.NotSpecified, SymbolBinding binding = SymbolBinding.Global, bool isThumb = false) 81 { 82 var symbol = new Symbol(start, start + size, name, type, binding, isThumb); 83 InsertSymbol(symbol); 84 } 85 86 /// <summary> 87 /// Inserts a batch of symbols. 88 /// </summary> 89 /// <param name="symbols">Symbols.</param> InsertSymbols(IEnumerable<Symbol> symbols)90 public void InsertSymbols(IEnumerable<Symbol> symbols) 91 { 92 var symbolsToAdd = new List<Symbol>(); 93 94 // deduplicate symbols 95 foreach(var symbol in symbols) 96 { 97 if(symbol.Name == "") 98 { 99 continue; 100 } 101 symbolsToAdd.Add(GetUnique(symbol)); 102 } 103 104 // Add symbols to name map 105 foreach(var symbolToAdd in symbolsToAdd) 106 { 107 AddSymbolToNameLookup(symbolToAdd); 108 maxLoadAddress = SymbolAddress.Max(maxLoadAddress, symbolToAdd.End); 109 } 110 111 // Add symbols to interval set 112 this.symbols.Add(symbolsToAdd); 113 } 114 TryGetSymbolsByName(string name, out IReadOnlyCollection<Symbol> symbols)115 public bool TryGetSymbolsByName(string name, out IReadOnlyCollection<Symbol> symbols) 116 { 117 return symbolsByName.TryGetValue(name, out symbols); 118 } 119 GetSymbolsByName(string name)120 public IReadOnlyCollection<Symbol> GetSymbolsByName(string name) 121 { 122 IReadOnlyCollection<Symbol> candidates; 123 symbolsByName.TryGetValue(name, out candidates); 124 return candidates; 125 } 126 127 /// <summary> 128 /// Tries to get the innermost symbol which contains the specified address. 129 /// </summary> 130 /// <returns><c>true</c>, if a symbol was found, <c>false</c> when no symbol contains specified address.</returns> 131 /// <param name="offset">Offset.</param> 132 /// <param name="symbol">Symbol.</param> TryGetSymbolByAddress(SymbolAddress offset, out Symbol symbol)133 public bool TryGetSymbolByAddress(SymbolAddress offset, out Symbol symbol) 134 { 135 if(offset > maxLoadAddress) 136 { 137 symbol = default(Symbol); 138 return false; 139 } 140 return symbols.TryGet(offset, out symbol); 141 } 142 143 /// <summary> 144 /// Gets the innermost symbol which contains the specified address. 145 /// </summary> 146 /// <returns>The symbol by address.</returns> 147 /// <param name="offset">Offset.</param> GetSymbolByAddress(SymbolAddress offset)148 public Symbol GetSymbolByAddress(SymbolAddress offset) 149 { 150 Symbol symbol; 151 if(!TryGetSymbolByAddress(offset, out symbol)) 152 { 153 throw new KeyNotFoundException("No symbol for given address [" + offset + "] found."); 154 } 155 return symbol; 156 } 157 158 public SymbolAddress? EntryPoint 159 { 160 get; 161 private set; 162 } 163 164 public ulong? FirstNotNullSectionAddress 165 { 166 get; 167 private set; 168 } 169 170 /// <summary> 171 /// All SymbolLookup objects. 172 /// </summary> 173 static private readonly HashSet<SymbolLookup> allSymbolSets = new HashSet<SymbolLookup>(); 174 // Those symbols have a special meaning 175 // $a - marks ARM code segments 176 // $d - marks data segments 177 // $t - marks THUMB code segments 178 // $x - marks RISC-V code segments 179 // Sources: https://simplemachines.it/doc/aaelf.pdf section 4.5.6, https://www.mail-archive.com/bug-binutils@gnu.org/msg38960.html 180 // All of those symbols can be used by the disassembler when reading the binary and are not needed during the execution 181 static private readonly string[] excludedSymbolNames = { "$a", "$d", "$t", "$x" }; 182 static private readonly SymbolType[] excludedSymbolTypes = { SymbolType.File }; 183 184 private void LoadELF<T>(ELF<T> elf, bool useVirtualAddress = false, ulong? textAddress = null) where T : struct 185 { 186 if(!elf.TryGetSection(".symtab", out var symtabSection)) 187 { 188 return; 189 } 190 191 var segments = elf.Segments.Where(x => x.Type == ELFSharp.ELF.Segments.SegmentType.Load).OfType<ELFSharp.ELF.Segments.Segment<T>>(); 192 foreach(var segment in segments) 193 { 194 var loadAddress = useVirtualAddress ? segment.GetSegmentAddress() : segment.GetSegmentPhysicalAddress(); 195 minLoadAddress = SymbolAddress.Min(minLoadAddress, loadAddress); 196 maxLoadAddress = SymbolAddress.Max(maxLoadAddress, loadAddress + segment.GetSegmentSize()); 197 } 198 199 var offset = (T)Convert.ChangeType(textAddress != null ? textAddress.Value - minLoadAddress.RawValue : 0, typeof(T)); 200 var thumb = elf.Machine == ELFSharp.ELF.Machine.ARM; 201 var symtab = (SymbolTable<T>)symtabSection; 202 203 // All names on the excluded list are valid C identifiers, so someone may name their function like that 204 // To guard against it we also check if the type of this symbol is not specified 205 var elfSymbols = symtab.Entries.Where(x => !(excludedSymbolNames.Contains(x.Name) && x.Type == SymbolType.NotSpecified)) 206 .Where(x => !excludedSymbolTypes.Contains(x.Type)) 207 .Where(x => x.PointedSectionIndex != (uint)SpecialSectionIndex.Undefined) 208 .Select(x => new Symbol(x.OffsetBy(offset), thumb)); InsertSymbolsAntmicro.Renode.Core.SymbolLookup.__anon1209 InsertSymbols(elfSymbols); 210 EntryPoint = elf.GetEntryPoint(); 211 FirstNotNullSectionAddress = elf.Sections 212 .Where(x => x.Type != SectionType.Null && x.Flags.HasFlag(SectionFlags.Allocatable)) 213 .Select(x => x.GetSectionPhysicalAddress()) 214 .Cast<ulong?>() 215 .Min(); 216 } 217 TryGetSymbol(Symbol symbol, out Symbol outSymbol)218 private bool TryGetSymbol(Symbol symbol, out Symbol outSymbol) 219 { 220 IReadOnlyCollection<Symbol> candidates; 221 outSymbol = null; 222 // Try getting cadidates via name, then if there are any present check whether they fit actual symbol. 223 if(TryGetSymbolsByName(symbol.Name, out candidates) && candidates.Count > 0) 224 { 225 outSymbol = candidates.FirstOrDefault(s => s.Equals(symbol)); 226 } 227 return outSymbol != null; 228 } 229 230 /// <summary> 231 /// Removes the symbol from the local name look up. This is used internally when some symbol has to change, and needs to be evicted. 232 /// </summary> RemoveSymbolFromNameLookup(Symbol symbol)233 private void RemoveSymbolFromNameLookup(Symbol symbol) 234 { 235 Symbol candidate; 236 if(TryGetSymbol(symbol, out candidate)) 237 { 238 symbolsByName.Remove(symbol.Name, symbol); 239 } 240 } 241 242 /// <summary> 243 /// Adds the symbol to the name lookup. If the symbol with the same name exists, the one which 244 /// starts earlier will is chosen to be present in the name lookup/view. 245 /// </summary> 246 /// <param name="symbol">Symbol.</param> AddSymbolToNameLookup(Symbol symbol)247 private void AddSymbolToNameLookup(Symbol symbol) 248 { 249 symbolsByName.Add(symbol.Name, symbol); 250 251 var dotIndex = symbol.Name.IndexOf('.'); 252 if(dotIndex >= 0) 253 { 254 // NOTE: Add alias for the symbol if it contains clone suffix 255 var withoutCloneSuffix = symbol.Name.Substring(0, dotIndex); 256 symbolsByName.Add(withoutCloneSuffix, symbol); 257 } 258 } 259 260 /// <summary> 261 /// Returs a unique reference for the equivalent of given <paramref name="symbol"/>. If there exists a duplicate, a reference 262 /// to the existing symbol is given, if not the argument is passed back. 263 /// </summary> 264 /// <description> 265 /// The algorithm for each known symbolSet works as follows: 266 /// 1. Check if the symbol is in the name set (this is O(1)), if it does it's our `candidate`. 267 /// 1.1. If not we are sure there is no matching symbol in the current set. 268 /// 2. Compare `candidate` to symbol. (It might be different, since symbols can have same names and different intervals.) 269 /// 2.1. If it is the same, return candidate. 270 /// 2.2. Otherwise, try to find the symbol via interval. 271 /// 3. If we didn't find a match in all sets return original reference. 272 /// </description> 273 /// <returns>The reference to the original symbol.</returns> 274 /// <param name="symbol">Symbol.</param> GetUnique(Symbol symbol)275 static private Symbol GetUnique(Symbol symbol) 276 { 277 Symbol outSymbol; 278 foreach(var symbolSet in allSymbolSets) 279 { 280 if(symbolSet.TryGetSymbol(symbol, out outSymbol)) 281 { 282 return outSymbol; 283 } 284 } 285 return symbol; 286 } 287 288 /// <summary> 289 /// Interval to symbol mapping. 290 /// </summary> 291 private readonly SortedIntervals symbols; 292 293 /// <summary> 294 /// Name to symbol mapping. 295 /// </summary> 296 private MultiValueDictionary<string, Symbol> symbolsByName; 297 298 private SymbolAddress minLoadAddress = SymbolAddress.MaxValue; 299 private SymbolAddress maxLoadAddress; 300 301 private class SortedIntervals : IEnumerable 302 { SortedIntervals(SymbolLookup owner)303 public SortedIntervals(SymbolLookup owner) 304 { 305 this.owner = owner; 306 symbols = new Symbol[0]; 307 indexOfEnclosingInterval = new int[0]; 308 Count = 0; 309 } 310 311 /// <summary> 312 /// Adds the intervals to the containers in batch mode. 313 /// </summary> 314 /// <param name="newSymbols">Intervals.</param> Add(IEnumerable<Symbol> newSymbols)315 public void Add(IEnumerable<Symbol> newSymbols) 316 { 317 var sortedIntervals = SelectDistinctViaImportance(newSymbols).ToArray(); 318 Array.Sort(sortedIntervals, comparer); 319 sortedIntervals = DropUnusedLabels(sortedIntervals).ToArray(); 320 321 if(Count > 0) 322 { 323 List<Symbol> createdSymbols; 324 List<Symbol> deletedSymbols; 325 MergeInSymbolArray(sortedIntervals, out createdSymbols, out deletedSymbols); 326 foreach(var deletedSymbol in deletedSymbols) 327 { 328 owner.RemoveSymbolFromNameLookup(deletedSymbol); 329 } 330 foreach(var createdSymbol in createdSymbols) 331 { 332 owner.AddSymbolToNameLookup(SymbolLookup.GetUnique(createdSymbol)); 333 } 334 } 335 else 336 { 337 symbols = sortedIntervals; 338 } 339 340 Count = symbols.Length; 341 RebuildEnclosingIntervals(); 342 } 343 344 /// <summary> 345 /// Add the specified interval. NOTE: This is a version for single element. It's logic is complicated compared to the batch version, 346 /// but it should have better performance when it comes to constants with complexities. 347 /// Our typical use-cases do not use it very much, so if the maintain cost becomes too high, the implementation should be swapped 348 /// for to use range add. "Too high" is when you ask yourself a question "Is it too high?". 349 /// </summary> 350 /// <param name="symbol">Symbol.</param> Add(Symbol symbol)351 public void Add(Symbol symbol) 352 { 353 Add(new[] { symbol }); 354 } 355 356 /// <summary> 357 /// Tries to get the innermost symbol containing the specified scalar. If no symbol contains the scalar the trial fails. 358 /// </summary> 359 /// <returns><c>true</c>, if the interval was found, <c>false</c> when such interval does not exist.</returns> 360 /// <param name="scalar">Scalar.</param> 361 /// <param name="interval">Found interval.</param> TryGet(SymbolAddress scalar, out Symbol interval)362 public bool TryGet(SymbolAddress scalar, out Symbol interval) 363 { 364 interval = default(Symbol); 365 var marker = new MarkerSymbol(scalar); 366 var index = Array.BinarySearch<Symbol>(symbols, marker, comparer); 367 if(index < 0) 368 { 369 index = ~index; 370 // we are behind last element or before 1st 371 if(index == 0) 372 { 373 return false; 374 } 375 // move back one element because search will always point to the 1st larger element than the marker 376 index--; 377 } 378 379 /*** 380 * We might be between two towers, that lie on the common ground (we will be between the top of the left tower and 381 * before the base of the next one). `index` points to the left top symbol, we check if any enclosing symbol also contains the 382 * scalar. If not, there is no such symbol (the common ground) and scalar just lies between two towers. If it exists scalar 383 * is between two symbol lying on the same base. 384 ****/ 385 index = FindEnclosingInterval(index, scalar); 386 if(index != NoEnclosingInterval) 387 { 388 interval = symbols[index]; 389 return true; 390 } 391 392 return false; 393 } 394 395 /// <summary> 396 /// Tries to retrieve a Symbol with the given interval from the container. 397 /// </summary> 398 /// <returns><c>true</c>, if such container existed and was found, <c>false</c> otherwise.</returns> 399 /// <param name="other">Interval to look for.</param> 400 /// <param name="interval">Interval.</param> TryGet(Symbol other, out Symbol interval)401 public bool TryGet(Symbol other, out Symbol interval) 402 { 403 interval = null; 404 var index = Array.BinarySearch<Symbol>(symbols, other, comparer); 405 if(index > 0) 406 { 407 interval = symbols[index]; 408 } 409 return interval != null; 410 } 411 GetEnumerator()412 public IEnumerator GetEnumerator() 413 { 414 return symbols.GetEnumerator(); 415 } 416 417 /// <summary> 418 /// Tells whether an exact interval is contained in the container. 419 /// </summary> 420 /// <param name="interval">Interval.</param> Contains(Symbol interval)421 public bool Contains(Symbol interval) 422 { 423 var index = Array.BinarySearch<Symbol>(symbols, interval, comparer); 424 return index > 0; 425 } 426 427 /// <summary> 428 /// Total number of elements. 429 /// </summary> 430 /// <value>The count.</value> 431 public int Count { get; private set; } 432 433 /// <summary> 434 /// Filters out the symbols that are just labels and are also covered by non-label symbols. 435 /// </summary> 436 /// <param name="symbols">Sorted collection of symbols.</param> DropUnusedLabels(IEnumerable<Symbol> symbols)437 private IEnumerable<Symbol> DropUnusedLabels(IEnumerable<Symbol> symbols) 438 { 439 Symbol currentSymbol = null; 440 foreach(var symbol in symbols) 441 { 442 // only include labels that are not part of other symbols 443 if(!symbol.IsLabel) 444 { 445 if(currentSymbol == null || !currentSymbol.Contains(symbol)) 446 { 447 // we advance currentSymbol only if we're in topmost (largest) symbol of the stack. 448 // The reason is that it will definitely contain all smaller ones and we will not miss this situation: 449 // 450 // aaaaaa i 451 // xxxxxxxxxxxxx 452 // 453 // in which `i` is not contained by `a`, but is contained by `x`. 454 currentSymbol = symbol; 455 } 456 yield return symbol; 457 } 458 else 459 { 460 // label 461 if(currentSymbol == null || !currentSymbol.Contains(symbol)) 462 { 463 yield return symbol; 464 } 465 } 466 } 467 } 468 469 /// <summary> 470 /// Consumes whole cake from provider 471 /// </summary> 472 /// <param name="destination">Destination.</param> 473 /// <param name="source">Source.</param> CopyCake(ICollection<Symbol> destination, ISymbolProvider source)474 private static void CopyCake(ICollection<Symbol> destination, ISymbolProvider source) 475 { 476 Symbol cakeBase = source.Current; 477 destination.Add(source.Consume()); // copy base 478 // "move cake" - while current old symbol is contained in base copy it 479 while(!source.Empty && cakeBase.Contains(source.Current)) 480 { 481 destination.Add(source.Consume()); 482 } 483 } 484 485 /// <summary> 486 /// Consumes all symbols from provider copying them to the destination. 487 /// </summary> 488 /// <param name="destination">Destination.</param> 489 /// <param name="source">Source.</param> CopyRest(ICollection<Symbol> destination, ISymbolProvider source)490 private static void CopyRest(ICollection<Symbol> destination, ISymbolProvider source) 491 { 492 while(!source.Empty) 493 { 494 destination.Add(source.Consume()); 495 } 496 } 497 498 /// <summary> 499 /// This function filters out symbols with the same intervals perserving the most important ones. 500 /// </summary> SelectDistinctViaImportance(IEnumerable<Symbol> symbolSet)501 private IEnumerable<Symbol> SelectDistinctViaImportance(IEnumerable<Symbol> symbolSet) 502 { 503 var distinctSymbols = new MultiValueDictionary<int, Symbol>(); 504 IReadOnlyCollection<Symbol> symbolsWithSameHash; 505 int hashCode; 506 foreach(var symbol in symbolSet) 507 { 508 hashCode = comparer.GetHashCode(symbol); 509 if(distinctSymbols.TryGetValue(hashCode, out symbolsWithSameHash)) 510 { 511 // Replace all exisiting copies (interval-wise) of the symbol which that are less important. 512 // There should be only one and we have to break after modification of the collection. 513 foreach(var existingSymbol in symbolsWithSameHash) 514 { 515 if(comparer.Equals(symbol, existingSymbol) && symbol.IsMoreImportantThan(existingSymbol)) 516 { 517 distinctSymbols.Remove(hashCode, existingSymbol); 518 distinctSymbols.Add(hashCode, symbol); 519 break; 520 } 521 } 522 } 523 else 524 { 525 distinctSymbols.Add(hashCode, symbol); 526 } 527 } 528 return distinctSymbols; 529 } 530 531 532 /// <summary> 533 /// Consumes a symbol from oldSymbols, and puts it into destination and/or tails of oldSymbols. Handles overlapping with newSymbols. 534 /// The head of a symbol is the part from start of the *old* symbol till the start of the *new* one. 535 /// The tail of the symbol is the part from the end of the *new* symbol till the end of the *old*. 536 /// This function copies over the head to the destination and adds a tail to the tail set (of course 537 /// when it makes sense). 538 /// </summary> 539 /// <param name="newSymbols">Provider of new symbols.</param> 540 /// <param name="destination">Destination container.</param> 541 /// <param name="oldSymbols">OldSymbolsProvider, symbols provider.</param> 542 /// <param name="createdSymbols">Container with the symbols that got created during merging.</param> 543 /// <param name="deletedSymbols">Container with the symbols that got got replaced/deleted during merging.</param> AddSymbolWithHashing(OldSymbolProvider oldSymbols, ISymbolProvider newSymbols, ICollection<Symbol> destination, ICollection<Symbol> createdSymbols, ICollection<Symbol> deletedSymbols)544 private void AddSymbolWithHashing(OldSymbolProvider oldSymbols, ISymbolProvider newSymbols, ICollection<Symbol> destination, ICollection<Symbol> createdSymbols, ICollection<Symbol> deletedSymbols) 545 { 546 var consumedSymbolType = oldSymbols.CurrentType; 547 var symbolToAdd = oldSymbols.Consume(); 548 549 // If there is no symbol to potentially overlap just copy the orignal one 550 if(newSymbols.Empty) 551 { 552 destination.Add(symbolToAdd); 553 return; 554 } 555 556 if(consumedSymbolType == OldSymbolProvider.SymbolType.Original) 557 { 558 deletedSymbols.Add(symbolToAdd); 559 } 560 561 // If the symbol to add starts before the new one, add its head to the destination. 562 if(symbolToAdd.Start < newSymbols.Current.Start) 563 { 564 Symbol symbolHead; 565 symbolToAdd.TryGetRightTrimmed(newSymbols.Current.Start, out symbolHead); 566 // If there are symbols on the tail list, check if the trimming would produce the same interval. 567 // If so, we skip the current addition. We do so, because in such case we want to preserve the top (most inner-one) former symbol. 568 // We need, however, to process the possible remainder, a new tail might be generated. 569 Symbol trimmedTail; 570 if(!oldSymbols.HasNextTail || 571 !oldSymbols.NextTail.TryGetRightTrimmed(newSymbols.Current.Start, out trimmedTail) || 572 comparer.Compare(symbolHead, trimmedTail) != 0) 573 { 574 destination.Add(symbolHead); 575 createdSymbols.Add(symbolHead); 576 } 577 } 578 579 if(newSymbols.Current.Length == 0 && symbolToAdd.Start == newSymbols.Current.Start) 580 { 581 destination.Add(symbolToAdd); 582 return; 583 } 584 585 // if there is something behind it add it to the new symbols list 586 if(symbolToAdd.End > newSymbols.Current.End) 587 { 588 Symbol symbolTail; 589 symbolToAdd.TryGetLeftTrimmed(newSymbols.Current.End, out symbolTail); 590 oldSymbols.AddTail(symbolTail); 591 #if DEBUG 592 DebugAssert.AssertFalse( 593 newSymbols.Current.Length > 0 && newSymbols.Current.Overlaps(symbolTail), 594 "New symbol overlaps with created tail! Old and new symbols should not overlap." 595 ); 596 #endif 597 return; 598 } 599 600 // There is one another case. Where the overlapping candidate completely overshadows the symbol. 601 // Don't do anything, the overshadowed symbol got deleted anyways and no new symbol gets created. 602 } 603 MergeInSymbolArray(Symbol[] symbolsToAdd, out List<Symbol> createdSymbols, out List<Symbol> deletedSymbols)604 private void MergeInSymbolArray(Symbol[] symbolsToAdd, out List<Symbol> createdSymbols, out List<Symbol> deletedSymbols) 605 { 606 int capacity = symbols.Length + symbolsToAdd.Length; 607 // We don't know exact numbers of the new symbols, because some might get split, and some might get removed. 608 // Thus we use a list, and chosen initial capacity makes some sense. 609 var mergedIntervals = new List<Symbol>(capacity); 610 var oldSymbols = new OldSymbolProvider(comparer, symbols); 611 var newSymbols = new SymbolProvider(symbolsToAdd); 612 createdSymbols = new List<Symbol>(); 613 deletedSymbols = new List<Symbol>(); 614 615 // While new and old symbols might interleave 616 while(!oldSymbols.Empty && !newSymbols.Empty) 617 { 618 // While they do not overlap 619 while(!oldSymbols.Empty && !newSymbols.Empty && !oldSymbols.Current.Overlaps(newSymbols.Current)) 620 { 621 // Symbols do not overlap here so it is sufficient to check only beginnings to find out which comes 1st. 622 // In case of copying new symbol, we can move whole cake, because it will never be cached. 623 if(oldSymbols.Current.Start < newSymbols.Current.Start) 624 { 625 mergedIntervals.Add(oldSymbols.Consume()); 626 } 627 else 628 { 629 CopyCake(mergedIntervals, newSymbols); 630 } 631 } 632 if(oldSymbols.Empty || newSymbols.Empty) 633 { 634 break; 635 } 636 // Here we are sure that current old and new symbols overlap somehow 637 // 638 // While we are not behind the current new symbol, keep adding old symbols and hash them if needed. 639 // Also handles zero length symbols. It is done with such an ugly condition, be cause otherwise, 640 // Two loops would be needed, one before overlaps, and one after, for different placements of 0-length 641 // symbols. 642 while(!oldSymbols.Empty && ( 643 oldSymbols.Current.Start < newSymbols.Current.End || ( 644 newSymbols.Current.Length == 0 && 645 oldSymbols.Current.Start == newSymbols.Current.End 646 ))) 647 { 648 AddSymbolWithHashing(oldSymbols, newSymbols, mergedIntervals, createdSymbols, deletedSymbols); 649 } 650 // Copy in the new symbol cake 651 CopyCake(mergedIntervals, newSymbols); 652 } 653 654 // Copy rest of the symbols. If the symbol comes from temporary list, it should be "materialized" 655 while(!oldSymbols.Empty) 656 { 657 if(oldSymbols.CurrentType == OldSymbolProvider.SymbolType.Temporary) 658 { 659 createdSymbols.Add(oldSymbols.Current); 660 } 661 mergedIntervals.Add(oldSymbols.Consume()); 662 } 663 664 // Copy rest of the newSymbols 665 CopyRest(mergedIntervals, newSymbols); 666 symbols = mergedIntervals.ToArray(); 667 } 668 669 private interface ISymbolProvider 670 { 671 Symbol Current { get; } 672 673 bool Empty { get; } 674 Consume()675 Symbol Consume(); 676 } 677 678 private class OldSymbolProvider : ISymbolProvider 679 { OldSymbolProvider(IComparer<Symbol> comparer, Symbol[] oldSymbols)680 public OldSymbolProvider(IComparer<Symbol> comparer, Symbol[] oldSymbols) 681 { 682 intervalComparer = comparer; 683 symbols = new SymbolProvider(oldSymbols); 684 tails = new Queue<Symbol>(); 685 currentsSynced = false; 686 } 687 688 public Symbol Current 689 { 690 get 691 { 692 UpdateCurrentSymbolAndType(); 693 return currentSymbol; 694 } 695 } 696 697 public SymbolType CurrentType 698 { 699 get 700 { 701 UpdateCurrentSymbolAndType(); 702 return currentSymbolType; 703 } 704 } 705 706 public bool Empty 707 { 708 get 709 { 710 UpdateCurrentSymbolAndType(); 711 return currentSymbol == null; 712 } 713 } 714 715 public bool HasNextTail { get { return tails.Count != 0; } } 716 717 public Symbol NextTail { get { return tails.Peek(); } } 718 AddTail(Symbol tail)719 public void AddTail(Symbol tail) 720 { 721 tails.Enqueue(tail); 722 currentsSynced = false; 723 } 724 Consume()725 public Symbol Consume() 726 { 727 Symbol ret = Current; 728 DiscardCurrentSymbol(); 729 return ret; 730 } 731 732 public enum SymbolType 733 { 734 NoSymbol, 735 Original, 736 Temporary 737 } 738 739 /// <summary> 740 /// Discards the current symbol. It consumes original symbol. Why the temporary symbols are not consumed here 741 /// is described in updateCurrentSymbolAndType description. 742 /// </summary> DiscardCurrentSymbol()743 private void DiscardCurrentSymbol() 744 { 745 switch(CurrentType) 746 { 747 case SymbolType.Original: 748 symbols.Consume(); 749 break; 750 case SymbolType.Temporary: 751 case SymbolType.NoSymbol: 752 break; 753 } 754 currentsSynced = false; 755 } 756 757 /// <summary> 758 /// Updates current symbol and its type. If current symbol is a temporary it is consumed, 759 /// when it's original it is not consumed. 760 /// </summary> 761 /// <description> 762 /// There can exists redundant temporary symbols. I.e. when trimming would produce the same intervals for two symbols. 763 /// Everything but last symbol with the same interval is important, rest must be ignored. 764 /// This is why temporary symbols are consumed upon update. We cannot remove redundant symbols upon addition, 765 /// but we can ignore them upon update. We also cannot peek more than one symbol ahed, so we need to consume one and 766 /// check if the next is the same. 767 /// Original symbol cannot be consumed when set as current, because a new tail might will be placed before current 768 /// it would need to be put back. Vice versa situation do not exists, because original symbols are immutable. 769 /// Also tails are added in a monotonic manner, so adding a new tail will also not result in eventual put back. 770 /// Original symbols are consumed in DiscardCurrentSymbol. 771 /// </description> UpdateCurrentSymbolAndType()772 private void UpdateCurrentSymbolAndType() 773 { 774 if(currentsSynced) 775 { 776 return; 777 } 778 currentsSynced = true; 779 780 if(tails.Count == 0 && symbols.Empty) 781 { 782 currentSymbol = null; 783 currentSymbolType = SymbolType.NoSymbol; 784 return; 785 } 786 if(tails.Count == 0) 787 { 788 currentSymbol = symbols.Current; 789 currentSymbolType = SymbolType.Original; 790 return; 791 } 792 if(symbols.Empty) 793 { 794 currentSymbol = DequeueLastCopy(tails); 795 currentSymbolType = SymbolType.Temporary; 796 return; 797 } 798 if(intervalComparer.Compare(symbols.Current, tails.Peek()) < 0) 799 { 800 currentSymbol = symbols.Current; 801 currentSymbolType = SymbolType.Original; 802 return; 803 } 804 // We will get last original copy of the symbol from the tails queue. 805 currentSymbol = DequeueLastCopy(tails); 806 currentSymbolType = SymbolType.Temporary; 807 // If it's exactly the same as the symbol in the old list, we prefer to ignore tail, because it 808 // for sure started before the original symbol, thus would not be the innermost. 809 if(intervalComparer.Compare(symbols.Current, currentSymbol) == 0) 810 { 811 currentSymbol = symbols.Current; 812 currentSymbolType = SymbolType.Original; 813 } 814 } 815 816 /// <summary> 817 /// Dequeues a tail symbol returning the last equivalent copy. This ensures that if there are tails of exact same interval, 818 /// we will get last one, as comming from the more inner one original. This might happen when, two symbol end at the same 819 /// address and both will get their heads trimmed. 820 /// </summary> 821 /// <returns>The last copy.</returns> 822 /// <param name="symbolTails">Symbol tails.</param> DequeueLastCopy(Queue<Symbol> symbolTails)823 private Symbol DequeueLastCopy(Queue<Symbol> symbolTails) 824 { 825 var symbol = symbolTails.Dequeue(); 826 while(symbolTails.Count > 0 && intervalComparer.Compare(symbol, symbolTails.Peek()) == 0) 827 { 828 symbol = symbolTails.Dequeue(); 829 } 830 return symbol; 831 } 832 833 private Symbol currentSymbol; 834 private SymbolType currentSymbolType; 835 private bool currentsSynced; 836 private readonly IComparer<Symbol> intervalComparer; 837 private readonly Queue<Symbol> tails; 838 private readonly SymbolProvider symbols; 839 } 840 841 private class SymbolProvider : ISymbolProvider 842 { 843 public int Length { get { return array.Length - iterator; } } 844 845 public Symbol Current { get { return Peek(); } } 846 847 public bool Empty { get { return !HasSymbolsLeft(); } } 848 849 SymbolProvider(Symbol[] symbols)850 public SymbolProvider(Symbol[] symbols) 851 { 852 array = symbols; 853 } 854 Consume()855 public Symbol Consume() 856 { 857 return array[iterator++]; 858 } 859 HasSymbolsLeft()860 private bool HasSymbolsLeft() 861 { 862 return iterator < array.Length; 863 } 864 Peek()865 private Symbol Peek() 866 { 867 return array[iterator]; 868 } 869 870 private int iterator; 871 private readonly Symbol[] array; 872 } 873 874 /// <summary> 875 /// Rebuilds the helper table - `indexOfEnclosingInterval` from the intervals table in O(n). From fast back-of-the-envelope analysis the 876 /// the pessimistic run time is O(2n). 877 /// 878 /// The algorithms works by constructing a stack of enclosing intervals, and iterating over intervals. 879 /// 1. At the beginning the guard is created, meaning there is no enclosing interval. We also set it for the 1st element (it 880 /// obviously cannot be containted in anything else). 881 /// 2. We iterate over the remaining intervals. 882 /// 3. We check whether the current interval is enclosed in the previous one. 883 /// 4. If yes, it belong to the "tower", so we add it to the stack. 884 /// 5. If not, we descend the stack poping elements until we are enclosed by one or we hit the guard. 885 /// 6. The parent is top element from the stack. 886 /// </summary> RebuildEnclosingIntervals()887 private void RebuildEnclosingIntervals() 888 { 889 indexOfEnclosingInterval = new int[Count]; 890 if(Count < 1) 891 { 892 return; 893 } 894 var parents = new Stack<int>(); 895 896 // 1. 897 parents.Push(NoEnclosingInterval); 898 indexOfEnclosingInterval[0] = NoEnclosingInterval; 899 900 // 2. 901 for(int i = 1; i < Count; ++i) 902 { 903 var interval = symbols[i]; 904 // 3. 905 if(symbols[i - 1].Contains(interval)) 906 { 907 // 4. 908 parents.Push(i - 1); 909 } 910 else 911 { 912 // 5. 913 while(parents.Peek() != NoEnclosingInterval && !symbols[parents.Peek()].Contains(interval)) 914 { 915 parents.Pop(); 916 } 917 } 918 // 6. 919 indexOfEnclosingInterval[i] = parents.Peek(); 920 } 921 } 922 923 /// <summary> 924 /// Finds the next sibling. NOTE: Orphans do not have siblings. 925 /// </summary> 926 /// <returns>The next sibling or <see cref="NoRightSibling"/> special value.</returns> 927 /// <param name="index">Index.</param> FindNextSibling(int index)928 private int FindNextSibling(int index) 929 { 930 var parentIdx = indexOfEnclosingInterval[index]; 931 932 if(parentIdx == NoEnclosingInterval) 933 { 934 return NoRightSibling; 935 } 936 937 var parentEnd = symbols[parentIdx].End; 938 index++; 939 // as long as there are any elements and we're not past our parent's end 940 while(index < symbols.Length && symbols[index].End > parentEnd) 941 { 942 // check if the element has the same parent 943 if(indexOfEnclosingInterval[index] == parentIdx) 944 { 945 return index; 946 } 947 index++; 948 } 949 return NoRightSibling; 950 } 951 952 /// <summary> 953 /// Checks if the given <paramref name="interval"/> is enclosed by the one under <paramref name="index"/> or one of it's "parents". 954 /// </summary> 955 /// <returns>The enclosing interval index or NoEnclosingInterval.</returns> 956 /// <param name="index">Index of the input interval.</param> 957 /// <param name="interval">Interval.</param> FindEnclosingInterval(int index, Symbol interval)958 private int FindEnclosingInterval(int index, Symbol interval) 959 { 960 while(index != NoEnclosingInterval && !symbols[index].Contains(interval)) 961 { 962 index = indexOfEnclosingInterval[index]; 963 } 964 return index; 965 } 966 967 /// <summary> 968 /// Checks if the given <paramref name="scalar"/> is enclosed by the one under <paramref name="index"/> or one of it's "parents". 969 /// </summary> 970 /// <returns>The enclosing interval index or NoEnclosingInterval.</returns> 971 /// <param name="index">Index of the input interval.</param> 972 /// <param name = "scalar"></param> FindEnclosingInterval(int index, SymbolAddress scalar)973 private int FindEnclosingInterval(int index, SymbolAddress scalar) 974 { 975 var original = index; 976 while(index != NoEnclosingInterval && !symbols[index].Contains(scalar)) 977 { 978 index = indexOfEnclosingInterval[index]; 979 } 980 // A special case when we hit after a 0-length symbol thas does not have any parent. 981 // We do not need to check it in the loop, as such a symbol would never enclose other symbols. 982 // This gives an approximate result. 983 if(index == NoEnclosingInterval) 984 { 985 if(symbols[original].Start <= scalar && symbols[original].End == symbols[original].Start && (symbols.Length == original + 1 || symbols[original + 1].Start > scalar)) 986 { 987 index = original; 988 } 989 } 990 return index; 991 } 992 993 private SymbolLookup owner; 994 private const int NoEnclosingInterval = -1; 995 private const int NoRightSibling = -1; 996 private readonly MarkerComparer<SymbolAddress> comparer = new MarkerComparer<SymbolAddress>(); 997 998 private Symbol[] symbols; 999 1000 /// <summary> 1001 /// Internal container holding mapping from interval index to its enclosing interval index, 1002 /// if such exists, otherwise a special value <see cref="NoEnclosingInterval"/> is used. 1003 /// It allows to get all the enclosing intervals in fast manner. 1004 /// indexOfEnclosingInterval[i] says where the ith's "parent" is. 1005 /// </summary> 1006 private int[] indexOfEnclosingInterval; 1007 1008 /// <summary> 1009 /// This class is used with marker symbols, to find the innermost intervals which the marker (representing scalar) belongs to. 1010 /// It basically says if the symbol starts earlier or later than the marker. 1011 /// </summary> 1012 private class MarkerComparer<TScalar> : IntervalComparer<TScalar> where TScalar : struct, IComparable<TScalar> 1013 { 1014 static TScalar MarkerValue = (TScalar)typeof(TScalar).GetField("MaxValue", System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.Static).GetValue(null); 1015 GetMarkerValue()1016 static public TScalar GetMarkerValue() 1017 { 1018 return MarkerValue; 1019 } 1020 Compare(IInterval<TScalar> lhs, IInterval<TScalar> rhs)1021 public override int Compare(IInterval<TScalar> lhs, IInterval<TScalar> rhs) 1022 { 1023 var markerValue = GetMarkerValue(); 1024 if(lhs.End.CompareTo(markerValue) != 0 && rhs.End.CompareTo(markerValue) != 0) 1025 { 1026 return base.Compare(lhs, rhs); 1027 } 1028 1029 IInterval<TScalar> marker; 1030 IInterval<TScalar> symbol; 1031 bool markerIsLhs; 1032 if(lhs.End.CompareTo(markerValue) == 0) 1033 { 1034 marker = lhs; 1035 symbol = rhs; 1036 markerIsLhs = true; 1037 } 1038 else 1039 { 1040 marker = rhs; 1041 symbol = lhs; 1042 markerIsLhs = false; 1043 } 1044 1045 /** 1046 If the marker finds the symbol exactly matching the start, explicitly make the symbol smaller. 1047 This will ensure the property, that marker cannot be exactly matched to the symbol. 1048 This way, symbol starting directly at the pointer will be treated the same as the symbol 1049 Starting before it, which makes sense. It both cases it will contain it depending on the end position. 1050 In other words this defines that inclusive start boundary of comparison overrides exclusive end. 1051 **/ 1052 var cmp = symbol.Start.CompareTo(marker.Start) == 0 ? marker.Start.CompareTo(symbol.End) : symbol.Start.CompareTo(marker.Start); 1053 // If marker was passed as left hand side argument, we need to invert the comparison result. 1054 return markerIsLhs ? -cmp : cmp; 1055 } 1056 } 1057 1058 /// <summary> 1059 /// This is a convienice class, that makes a marker symbol, which is treated differently by MarkerComparer. 1060 /// It is a special symbol that wraps a scalar value in the Symbol object. Its Start represents the scalar 1061 /// value, and its End is a special "magic value" that marks it as a marker symbol. 1062 /// </summary> 1063 private class MarkerSymbol : Symbol 1064 { MarkerSymbol(SymbolAddress value)1065 public MarkerSymbol(SymbolAddress value) : base(value, MarkerComparer<SymbolAddress>.GetMarkerValue()) 1066 { 1067 } 1068 } 1069 } 1070 } 1071 } 1072