1#!/usr/bin/env python3 2# 3# Copyright (c) 2016, 2020-2024 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7# Based on a script by: 8# Chereau, Fabien <fabien.chereau@intel.com> 9 10""" 11Process an ELF file to generate size report on RAM and ROM. 12""" 13 14import argparse 15import locale 16import os 17import sys 18import re 19from pathlib import Path 20import json 21 22from packaging import version 23 24from colorama import init, Fore, Style 25 26from anytree import RenderTree, NodeMixin, findall_by_attr 27from anytree.exporter import DictExporter 28 29import elftools 30from elftools.elf.elffile import ELFFile 31from elftools.elf.sections import SymbolTableSection 32from elftools.dwarf.descriptions import describe_form_class 33from elftools.dwarf.descriptions import ( 34 describe_DWARF_expr, set_global_machine_arch) 35from elftools.dwarf.locationlists import ( 36 LocationExpr, LocationParser) 37 38if version.parse(elftools.__version__) < version.parse('0.24'): 39 sys.exit("pyelftools is out of date, need version 0.24 or later") 40 41 42# ELF section flags 43SHF_WRITE = 0x1 44SHF_ALLOC = 0x2 45SHF_EXEC = 0x4 46SHF_WRITE_ALLOC = SHF_WRITE | SHF_ALLOC 47SHF_ALLOC_EXEC = SHF_ALLOC | SHF_EXEC 48 49DT_LOCATION = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)\)") 50 51SRC_FILE_EXT = ('.h', '.c', '.hpp', '.cpp', '.hxx', '.cxx', '.c++') 52 53 54def get_symbol_addr(sym): 55 """Get the address of a symbol""" 56 return sym['st_value'] 57 58 59def get_symbol_size(sym): 60 """Get the size of a symbol""" 61 return sym['st_size'] 62 63 64def is_symbol_in_ranges(sym, ranges): 65 """ 66 Given a list of start/end addresses, test if the symbol 67 lies within any of these address ranges. 68 """ 69 for bound in ranges: 70 if bound['start'] <= sym['st_value'] <= bound['end']: 71 return bound 72 73 return None 74 75 76def get_die_mapped_address(die, parser, dwarfinfo): 77 """Get the bounding addresses from a DIE variable or subprogram""" 78 low = None 79 high = None 80 81 if die.tag == 'DW_TAG_variable': 82 if 'DW_AT_location' in die.attributes: 83 loc_attr = die.attributes['DW_AT_location'] 84 if parser.attribute_has_location(loc_attr, die.cu['version']): 85 loc = parser.parse_from_attribute(loc_attr, die.cu['version'], die) 86 if isinstance(loc, LocationExpr): 87 addr = describe_DWARF_expr(loc.loc_expr, 88 dwarfinfo.structs) 89 90 matcher = DT_LOCATION.match(addr) 91 if matcher: 92 low = int(matcher.group(1), 16) 93 high = low + 1 94 95 if die.tag == 'DW_TAG_subprogram': 96 if 'DW_AT_low_pc' in die.attributes: 97 low = die.attributes['DW_AT_low_pc'].value 98 99 high_pc = die.attributes['DW_AT_high_pc'] 100 high_pc_class = describe_form_class(high_pc.form) 101 if high_pc_class == 'address': 102 high = high_pc.value 103 elif high_pc_class == 'constant': 104 high = low + high_pc.value 105 106 return low, high 107 108 109def match_symbol_address(symlist, die, parser, dwarfinfo): 110 """ 111 Find the symbol from a symbol list 112 where it matches the address in DIE variable, 113 or within the range of a DIE subprogram. 114 """ 115 low, high = get_die_mapped_address(die, parser, dwarfinfo) 116 117 if low is None: 118 return None 119 120 for sym in symlist: 121 if low <= sym['symbol']['st_value'] < high: 122 return sym 123 124 return None 125 126 127def get_symbols(elf, addr_ranges): 128 """ 129 Fetch the symbols from the symbol table and put them 130 into ROM, RAM, unassigned buckets. 131 """ 132 rom_syms = dict() 133 ram_syms = dict() 134 all_syms = dict() 135 unassigned_syms = dict() 136 137 def entry(sym, loc, section): 138 """ 139 Factor function for a symbol entry. 140 """ 141 return {'name': sym.name, 142 'symbol': sym, 143 'section': section, 144 'loc': loc, 145 'mapped_files': set()} 146 147 rom_addr_ranges = addr_ranges['rom'] 148 ram_addr_ranges = addr_ranges['ram'] 149 unassigned_addr_ranges = addr_ranges['unassigned'] 150 151 for section in elf.iter_sections(): 152 if isinstance(section, SymbolTableSection): 153 for sym in section.iter_symbols(): 154 # Ignore symbols with size == 0 155 if get_symbol_size(sym) == 0: 156 continue 157 158 ram_sym = is_symbol_in_ranges(sym, ram_addr_ranges) 159 rom_sym = is_symbol_in_ranges(sym, rom_addr_ranges) 160 161 # Determine the location(s) for this symbol. 162 loc = [] 163 if ram_sym: 164 loc.append('ram') 165 if rom_sym: 166 loc.append('rom') 167 168 # If symbol is in ROM area? 169 if rom_sym: 170 if sym.name not in rom_syms: 171 rom_syms[sym.name] = list() 172 rom_syms[sym.name].append(entry(sym, loc, rom_sym['name'])) 173 174 # If symbol is in RAM area? 175 if ram_sym: 176 if sym.name not in ram_syms: 177 ram_syms[sym.name] = list() 178 ram_syms[sym.name].append(entry(sym, loc, ram_sym['name'])) 179 180 # If symbol is in either area add to "all" list. 181 if ram_sym or rom_sym: 182 if sym.name not in all_syms: 183 all_syms[sym.name] = list() 184 all_syms[sym.name].append(entry(sym, loc, ram_sym['name'] if ram_sym else rom_sym['name'])) 185 else: 186 bound = is_symbol_in_ranges(sym, unassigned_addr_ranges) 187 if sym.name not in unassigned_syms: 188 unassigned_syms[sym.name] = list() 189 unassigned_syms[sym.name].append(entry(sym, ["unassigned"], bound['name'] if bound else None)) 190 191 ret = {'rom': rom_syms, 192 'ram': ram_syms, 193 'all': all_syms, 194 'unassigned': unassigned_syms} 195 return ret 196 197 198def print_section_info(section, descr=""): 199 if args.verbose: 200 sec_size = section['sh_size'] 201 sec_start = section['sh_addr'] 202 sec_end = sec_start + (sec_size - 1 if sec_size else 0) 203 print(f"DEBUG: " 204 f"0x{sec_start:08x}-0x{sec_end:08x} " 205 f"{descr} '{section.name}': size={sec_size}, " 206 f"{section['sh_type']}, 0x{section['sh_flags']:08x}") 207# 208 209 210def get_section_ranges(elf): 211 """ 212 Parse ELF header to find out the address ranges of ROM or RAM sections 213 and their total sizes. 214 """ 215 rom_addr_ranges = list() 216 ram_addr_ranges = list() 217 unassigned_addr_ranges = list() 218 219 rom_size = 0 220 ram_size = 0 221 unassigned_size = 0 222 total_size = 0 223 224 xip = any(section.get_symbol_by_name('CONFIG_XIP') 225 for section in elf.iter_sections('SHT_SYMTAB')) 226 if args.verbose: 227 print(f'INFO: XIP={xip}') 228 229 for section in elf.iter_sections(): 230 size = section['sh_size'] 231 sec_start = section['sh_addr'] 232 sec_end = sec_start + (size - 1 if size else 0) 233 bound = {'start': sec_start, 'end': sec_end, 'name': section.name} 234 is_assigned = False 235 236 if section['sh_type'] == 'SHT_NOBITS': 237 # BSS and noinit sections 238 ram_addr_ranges.append(bound) 239 ram_size += size 240 total_size += size 241 is_assigned = True 242 print_section_info(section, "RAM bss section") 243 244 elif section['sh_type'] == 'SHT_PROGBITS': 245 # Sections to be in flash or memory 246 flags = section['sh_flags'] 247 if (flags & SHF_ALLOC_EXEC) == SHF_ALLOC_EXEC: 248 # Text section 249 rom_addr_ranges.append(bound) 250 rom_size += size 251 total_size += size 252 is_assigned = True 253 print_section_info(section, "ROM txt section") 254 255 elif (flags & SHF_WRITE_ALLOC) == SHF_WRITE_ALLOC: 256 # Read/write data 257 if xip: 258 # For XIP, the data section occupies both ROM and RAM 259 # since at boot, content is copied from ROM to RAM 260 rom_addr_ranges.append(bound) 261 rom_size += size 262 ram_addr_ranges.append(bound) 263 ram_size += size 264 total_size += size 265 is_assigned = True 266 print_section_info(section, "DATA r/w section") 267 268 elif (flags & SHF_ALLOC) == SHF_ALLOC: 269 # Read only data 270 rom_addr_ranges.append(bound) 271 rom_size += size 272 total_size += size 273 is_assigned = True 274 print_section_info(section, "ROM r/o section") 275 276 if not is_assigned: 277 print_section_info(section, "unassigned section") 278 unassigned_addr_ranges.append(bound) 279 unassigned_size += size 280 281 ret = {'rom': rom_addr_ranges, 282 'rom_total_size': rom_size, 283 'ram': ram_addr_ranges, 284 'ram_total_size': ram_size, 285 'unassigned': unassigned_addr_ranges, 286 'unassigned_total_size': unassigned_size, 287 'all': ram_addr_ranges + rom_addr_ranges, 288 'all_total_size': total_size} 289 return ret 290 291 292def get_die_filename(die, lineprog): 293 """Get the source code filename associated with a DIE""" 294 file_index = die.attributes['DW_AT_decl_file'].value 295 file_entry = lineprog['file_entry'][file_index - 1] 296 297 dir_index = file_entry['dir_index'] 298 if dir_index == 0: 299 filename = file_entry.name 300 else: 301 directory = lineprog.header['include_directory'][dir_index - 1] 302 filename = os.path.join(directory, file_entry.name) 303 304 path = Path(filename.decode(locale.getpreferredencoding())) 305 306 # Prepend output path to relative path 307 if not path.is_absolute(): 308 output = Path(args.output) 309 path = output.joinpath(path) 310 311 # Change path to relative to Zephyr base 312 try: 313 path = path.resolve() 314 except OSError as e: 315 # built-ins can't be resolved, so it's not an issue 316 if '<built-in>' not in str(path): 317 raise e 318 319 return path 320 321 322def do_simple_name_matching(dwarfinfo, symbol_dict, processed): 323 """ 324 Sequentially process DIEs in compiler units with direct file mappings 325 within the DIEs themselves, and do simply matching between DIE names 326 and symbol names. 327 """ 328 mapped_symbols = processed['mapped_symbols'] 329 mapped_addresses = processed['mapped_addr'] 330 unmapped_symbols = processed['unmapped_symbols'] 331 newly_mapped_syms = set() 332 333 location_lists = dwarfinfo.location_lists() 334 location_parser = LocationParser(location_lists) 335 336 unmapped_dies = set() 337 338 # Loop through all compile units 339 for compile_unit in dwarfinfo.iter_CUs(): 340 lineprog = dwarfinfo.line_program_for_CU(compile_unit) 341 if lineprog is None: 342 continue 343 344 # Loop through each DIE and find variables and 345 # subprograms (i.e. functions) 346 for die in compile_unit.iter_DIEs(): 347 sym_name = None 348 349 # Process variables 350 if die.tag == 'DW_TAG_variable': 351 # DW_AT_declaration 352 353 # having 'DW_AT_location' means this maps 354 # to an actual address (e.g. not an extern) 355 if 'DW_AT_location' in die.attributes: 356 sym_name = die.get_full_path() 357 358 # Process subprograms (i.e. functions) if they are valid 359 if die.tag == 'DW_TAG_subprogram': 360 # Refer to another DIE for name 361 if ('DW_AT_abstract_origin' in die.attributes) or ( 362 'DW_AT_specification' in die.attributes): 363 unmapped_dies.add(die) 364 365 # having 'DW_AT_low_pc' means it maps to 366 # an actual address 367 elif 'DW_AT_low_pc' in die.attributes: 368 # DW_AT_low_pc == 0 is a weak function 369 # which has been overriden 370 if die.attributes['DW_AT_low_pc'].value != 0: 371 sym_name = die.get_full_path() 372 373 # For mangled function names, the linkage name 374 # is what appears in the symbol list 375 if 'DW_AT_linkage_name' in die.attributes: 376 linkage = die.attributes['DW_AT_linkage_name'] 377 sym_name = linkage.value.decode() 378 379 if sym_name is not None: 380 # Skip DIE with no reference back to a file 381 if not 'DW_AT_decl_file' in die.attributes: 382 continue 383 384 is_die_mapped = False 385 if sym_name in symbol_dict: 386 mapped_symbols.add(sym_name) 387 symlist = symbol_dict[sym_name] 388 symbol = match_symbol_address(symlist, die, 389 location_parser, 390 dwarfinfo) 391 392 if symbol is not None: 393 symaddr = symbol['symbol']['st_value'] 394 if symaddr not in mapped_addresses: 395 is_die_mapped = True 396 path = get_die_filename(die, lineprog) 397 symbol['mapped_files'].add(path) 398 mapped_addresses.add(symaddr) 399 newly_mapped_syms.add(sym_name) 400 401 if not is_die_mapped: 402 unmapped_dies.add(die) 403 404 mapped_symbols = mapped_symbols.union(newly_mapped_syms) 405 unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) 406 407 processed['mapped_symbols'] = mapped_symbols 408 processed['mapped_addr'] = mapped_addresses 409 processed['unmapped_symbols'] = unmapped_symbols 410 processed['unmapped_dies'] = unmapped_dies 411 412 413def mark_address_aliases(symbol_dict, processed): 414 """ 415 Mark symbol aliases as already mapped to prevent 416 double counting. 417 418 There are functions and variables which are aliases to 419 other functions/variables. So this marks them as mapped 420 so they will not get counted again when a tree is being 421 built for display. 422 """ 423 mapped_symbols = processed['mapped_symbols'] 424 mapped_addresses = processed['mapped_addr'] 425 unmapped_symbols = processed['unmapped_symbols'] 426 already_mapped_syms = set() 427 428 for ums in unmapped_symbols: 429 for one_sym in symbol_dict[ums]: 430 symbol = one_sym['symbol'] 431 if symbol['st_value'] in mapped_addresses: 432 already_mapped_syms.add(ums) 433 434 mapped_symbols = mapped_symbols.union(already_mapped_syms) 435 unmapped_symbols = unmapped_symbols.difference(already_mapped_syms) 436 437 processed['mapped_symbols'] = mapped_symbols 438 processed['mapped_addr'] = mapped_addresses 439 processed['unmapped_symbols'] = unmapped_symbols 440 441 442def do_address_range_matching(dwarfinfo, symbol_dict, processed): 443 """ 444 Match symbols indirectly using address ranges. 445 446 This uses the address ranges of DIEs and map them to symbols 447 residing within those ranges, and works on DIEs that have not 448 been mapped in previous steps. This works on symbol names 449 that do not match the names in DIEs, e.g. "<func>" in DIE, 450 but "<func>.constprop.*" in symbol name list. This also 451 helps with mapping the mangled function names in C++, 452 since the names in DIE are actual function names in source 453 code and not mangled version of them. 454 """ 455 if 'unmapped_dies' not in processed: 456 return 457 458 mapped_symbols = processed['mapped_symbols'] 459 mapped_addresses = processed['mapped_addr'] 460 unmapped_symbols = processed['unmapped_symbols'] 461 newly_mapped_syms = set() 462 463 location_lists = dwarfinfo.location_lists() 464 location_parser = LocationParser(location_lists) 465 466 unmapped_dies = processed['unmapped_dies'] 467 468 # Group DIEs by compile units 469 cu_list = dict() 470 471 for die in unmapped_dies: 472 cu = die.cu 473 if cu not in cu_list: 474 cu_list[cu] = {'dies': set()} 475 cu_list[cu]['dies'].add(die) 476 477 # Loop through all compile units 478 for cu in cu_list: 479 lineprog = dwarfinfo.line_program_for_CU(cu) 480 481 # Map offsets from DIEs 482 offset_map = dict() 483 for die in cu.iter_DIEs(): 484 offset_map[die.offset] = die 485 486 for die in cu_list[cu]['dies']: 487 if not (die.tag == 'DW_TAG_subprogram' or die.tag == 'DW_TAG_variable'): 488 continue 489 490 path = None 491 492 # Has direct reference to file, so use it 493 if 'DW_AT_decl_file' in die.attributes: 494 path = get_die_filename(die, lineprog) 495 496 # Loop through indirect reference until a direct 497 # reference to file is found 498 if ('DW_AT_abstract_origin' in die.attributes) or ( 499 'DW_AT_specification' in die.attributes): 500 die_ptr = die 501 while path is None: 502 if not (die_ptr.tag == 'DW_TAG_subprogram') or not ( 503 ('DW_AT_abstract_origin' in die_ptr.attributes) or 504 ('DW_AT_specification' in die_ptr.attributes)): 505 break 506 507 if 'DW_AT_abstract_origin' in die_ptr.attributes: 508 ofname = 'DW_AT_abstract_origin' 509 elif 'DW_AT_specification' in die_ptr.attributes: 510 ofname = 'DW_AT_specification' 511 512 offset = die_ptr.attributes[ofname].value 513 offset += die_ptr.cu.cu_offset 514 515 # There is nothing to reference so no need to continue 516 if offset not in offset_map: 517 break 518 519 die_ptr = offset_map[offset] 520 if 'DW_AT_decl_file' in die_ptr.attributes: 521 path = get_die_filename(die_ptr, lineprog) 522 523 # Nothing to map 524 if path is not None: 525 low, high = get_die_mapped_address(die, location_parser, 526 dwarfinfo) 527 if low is None: 528 continue 529 530 # Case 1: Match for a function (using a range) 531 if die.tag == 'DW_TAG_subprogram': 532 for ums in unmapped_symbols: 533 for one_sym in symbol_dict[ums]: 534 symbol = one_sym['symbol'] 535 symaddr = symbol['st_value'] 536 537 if symaddr not in mapped_addresses: 538 if low <= symaddr < high: 539 one_sym['mapped_files'].add(path) 540 mapped_addresses.add(symaddr) 541 newly_mapped_syms.add(ums) 542 543 # Case 2: Match for a variable (using a single address) 544 elif die.tag == 'DW_TAG_variable': 545 for ums in unmapped_symbols: 546 for one_sym in symbol_dict[ums]: 547 symbol = one_sym['symbol'] 548 symaddr = symbol['st_value'] 549 550 if symaddr not in mapped_addresses: 551 # We expect the 'high' value to be 'low + 1' for a variable 552 if low == symaddr: 553 one_sym['mapped_files'].add(path) 554 mapped_addresses.add(symaddr) 555 newly_mapped_syms.add(ums) 556 557 mapped_symbols = mapped_symbols.union(newly_mapped_syms) 558 unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) 559 560 processed['mapped_symbols'] = mapped_symbols 561 processed['mapped_addr'] = mapped_addresses 562 processed['unmapped_symbols'] = unmapped_symbols 563 564 565def set_root_path_for_unmapped_symbols(symbol_dict, addr_range, processed): 566 """ 567 Set root path for unmapped symbols. 568 569 Any unmapped symbols are added under the root node if those 570 symbols reside within the desired memory address ranges 571 (e.g. ROM or RAM). 572 """ 573 mapped_symbols = processed['mapped_symbols'] 574 mapped_addresses = processed['mapped_addr'] 575 unmapped_symbols = processed['unmapped_symbols'] 576 newly_mapped_syms = set() 577 578 for ums in unmapped_symbols: 579 for one_sym in symbol_dict[ums]: 580 symbol = one_sym['symbol'] 581 symaddr = symbol['st_value'] 582 583 if is_symbol_in_ranges(symbol, addr_range): 584 if symaddr not in mapped_addresses: 585 path = Path(':') 586 one_sym['mapped_files'].add(path) 587 mapped_addresses.add(symaddr) 588 newly_mapped_syms.add(ums) 589 590 mapped_symbols = mapped_symbols.union(newly_mapped_syms) 591 unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) 592 593 processed['mapped_symbols'] = mapped_symbols 594 processed['mapped_addr'] = mapped_addresses 595 processed['unmapped_symbols'] = unmapped_symbols 596 597def find_common_path_prefix(symbol_dict): 598 """ 599 Find the common path prefix of all mapped files. 600 Must be called before set_root_path_for_unmapped_symbols(). 601 """ 602 paths = list() 603 604 for _, sym in symbol_dict.items(): 605 for symbol in sym: 606 for file in symbol['mapped_files']: 607 paths.append(file) 608 609 try: 610 return os.path.commonpath(paths) 611 except ValueError: 612 return None 613 614class TreeNode(NodeMixin): 615 """ 616 A symbol node. 617 """ 618 619 def __init__(self, name, identifier, size=0, parent=None, children=None, address=None, section=None): 620 super().__init__() 621 self._name = name 622 self._size = size 623 self.parent = parent 624 self._identifier = identifier 625 if address is not None: 626 self.address = address 627 if section is not None: 628 self.section = section 629 if children: 630 self.children = children 631 self.loc = [] 632 633 def __repr__(self): 634 return self._name 635 636 637def sum_node_children_size(node): 638 """ 639 Calculate the sum of symbol size of all direct children. 640 """ 641 size = 0 642 643 for child in node.children: 644 size += child._size 645 646 return size 647 648 649def generate_any_tree(symbol_dict, total_size, path_prefix): 650 """ 651 Generate a symbol tree for output. 652 """ 653 root = TreeNode('Root', "root") 654 node_no_paths = TreeNode('(no paths)', ":", parent=root) 655 656 if path_prefix and Path(path_prefix) == Path(args.zephyrbase): 657 # All source files are under ZEPHYR_BASE so there is 658 # no need for another level. 659 node_zephyr_base = root 660 node_output_dir = root 661 node_workspace = root 662 node_others = root 663 else: 664 node_zephyr_base = TreeNode('ZEPHYR_BASE', args.zephyrbase) 665 node_output_dir = TreeNode('OUTPUT_DIR', args.output) 666 node_others = TreeNode("/", "/") 667 668 if args.workspace: 669 node_workspace = TreeNode('WORKSPACE', args.workspace) 670 else: 671 node_workspace = node_others 672 673 # A set of helper function for building a simple tree with a path-like 674 # hierarchy. 675 def _insert_one_elem(root, path, size, addr, section, loc): 676 cur = None 677 node = None 678 parent = root 679 for part in path.parts: 680 if cur is None: 681 cur = part 682 else: 683 cur = str(Path(cur, part)) 684 685 results = findall_by_attr(root, cur, name="_identifier") 686 if results: 687 item = results[0] 688 if not hasattr(item, 'address'): 689 # Passing down through a non-terminal parent node. 690 parent = item 691 parent._size += size 692 else: 693 # Another symbol node here with the same name; stick to its parent as well. 694 parent = item.parent 695 node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent) 696 else: 697 # There is no such terminal symbol in the tree yet; let's add it. 698 if node: 699 parent = node 700 node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent) 701 if node: 702 # Set memory block address and section name properties only for terminal symbol nodes. 703 # Don't do it on file- and directory- level parent nodes. 704 node.address = addr 705 node.section = section 706 node.loc = loc 707 else: 708 # normally this shouldn't happen; just to detect data or logic errors. 709 print(f"ERROR: no end node created for {root}, {path}, 0x{addr:08x}+{size}@{section}") 710 # 711 712 # Mapping paths to tree nodes 713 path_node_map = [ 714 [Path(args.zephyrbase), node_zephyr_base], 715 [Path(args.output), node_output_dir], 716 ] 717 718 if args.workspace: 719 path_node_map.append( 720 [Path(args.workspace), node_workspace] 721 ) 722 723 for name, sym in symbol_dict.items(): 724 for symbol in sym: 725 size = get_symbol_size(symbol['symbol']) 726 addr = get_symbol_addr(symbol['symbol']) 727 section = symbol['section'] 728 for file in symbol['mapped_files']: 729 path = Path(file, name) 730 if path.is_absolute(): 731 has_node = False 732 733 for one_path in path_node_map: 734 if one_path[0] in path.parents: 735 path = path.relative_to(one_path[0]) 736 dest_node = one_path[1] 737 has_node = True 738 break 739 740 if not has_node: 741 dest_node = node_others 742 else: 743 dest_node = node_no_paths 744 745 _insert_one_elem(dest_node, path, size, addr, section, symbol['loc']) 746 747 748 if node_zephyr_base is not root: 749 # ZEPHYR_BASE and OUTPUT_DIR nodes don't have sum of symbol size 750 # so calculate them here. 751 node_zephyr_base._size = sum_node_children_size(node_zephyr_base) 752 node_output_dir._size = sum_node_children_size(node_output_dir) 753 754 # Find out which nodes need to be in the tree. 755 # "(no path)", ZEPHYR_BASE nodes are essential. 756 children = [node_no_paths, node_zephyr_base] 757 if node_output_dir.height != 0: 758 # OUTPUT_DIR may be under ZEPHYR_BASE. 759 children.append(node_output_dir) 760 if node_others.height != 0: 761 # Only include "others" node if there is something. 762 children.append(node_others) 763 764 if args.workspace: 765 node_workspace._size = sum_node_children_size(node_workspace) 766 if node_workspace.height != 0: 767 children.append(node_workspace) 768 769 root.children = children 770 771 root._size = total_size 772 773 # Need to account for code and data where there are not emitted 774 # symbols associated with them. 775 node_hidden_syms = TreeNode('(hidden)', "(hidden)", parent=root) 776 node_no_paths._size = sum_node_children_size(node_no_paths) 777 node_hidden_syms._size = root._size - sum_node_children_size(root) 778 779 return root 780 781 782def node_sort(items): 783 """ 784 Node sorting used with RenderTree. 785 """ 786 return sorted(items, key=lambda item: item._name) 787 788 789def print_any_tree(root, total_size, depth, header=None): 790 """ 791 Print the symbol tree. 792 """ 793 if header: 794 print(f"{Fore.WHITE}{Style.BRIGHT}{header}") 795 print('-' * 110 + f"{Fore.RESET}{Style.RESET_ALL}") 796 797 print('{:98s} {:>7s} {:>7s} {:11s} {:16s}'.format( 798 Fore.YELLOW + "Path", "Size", "%", " Address", "Section" + Fore.RESET)) 799 print('=' * 138) 800 for row in RenderTree(root, childiter=node_sort, maxlevel=depth): 801 f = len(row.pre) + len(row.node._name) 802 s = str(row.node._size).rjust(100-f) 803 percent = 100 * float(row.node._size) / float(total_size) 804 805 hex_addr = "-" 806 section_name = "" 807 cc = cr = "" 808 if not row.node.children: 809 if hasattr(row.node, 'section'): 810 section_name = row.node.section 811 if hasattr(row.node, 'address'): 812 hex_addr = "0x{:08x}".format(row.node.address) 813 cc = Fore.CYAN 814 cr = Fore.RESET 815 elif row.node._name.endswith(SRC_FILE_EXT): 816 cc = Fore.GREEN 817 cr = Fore.RESET 818 819 print(f"{row.pre}{cc}{row.node._name} {s} {cr}{Fore.BLUE}{percent:6.2f}%{Fore.RESET} {hex_addr} {section_name}") 820 print('=' * 138) 821 print(f'{total_size:>101}') 822 823 824def parse_args(): 825 """ 826 Parse command line arguments. 827 """ 828 global args 829 830 parser = argparse.ArgumentParser(allow_abbrev=False) 831 832 parser.add_argument("-k", "--kernel", required=True, 833 help="Zephyr ELF binary") 834 parser.add_argument("-z", "--zephyrbase", required=True, 835 help="Zephyr base path") 836 parser.add_argument("-q", "--quiet", action="store_true", 837 help="Do not output anything on the screen.") 838 parser.add_argument("-o", "--output", required=True, 839 help="Output path") 840 parser.add_argument("-w", "--workspace", default=None, 841 help="Workspace path (Usually the same as WEST_TOPDIR)") 842 parser.add_argument("target", choices=['rom', 'ram', 'all'], nargs="+") 843 parser.add_argument("-d", "--depth", dest="depth", 844 type=int, default=None, 845 help="How deep should we go into the tree", 846 metavar="DEPTH") 847 parser.add_argument("-v", "--verbose", action="store_true", 848 help="Print extra debugging information") 849 parser.add_argument("--json", help='Store results in the given JSON file ' + \ 850 '(a "{target}" string in the filename will ' + \ 851 'be replaced by "ram", "rom", or "all").') 852 args = parser.parse_args() 853 854 855def main(): 856 """ 857 Main program. 858 """ 859 parse_args() 860 861 sys.stdout.reconfigure(encoding='utf-8') 862 863 # Init colorama 864 init() 865 866 assert os.path.exists(args.kernel), "{0} does not exist.".format(args.kernel) 867 targets = args.target 868 869 with ELFFile(open(args.kernel, "rb")) as elf: 870 assert elf.has_dwarf_info(), "ELF file has no DWARF information" 871 872 set_global_machine_arch(elf.get_machine_arch()) 873 addr_ranges = get_section_ranges(elf) 874 dwarfinfo = elf.get_dwarf_info() 875 876 symbols = get_symbols(elf, addr_ranges) 877 for sym in symbols['unassigned'].values(): 878 for sym_entry in sym: 879 print(f"WARN: Symbol '{sym_entry['name']}' section '{sym_entry['section']}' " 880 "is not in RAM or ROM.") 881 882 for t in targets: 883 if args.json: 884 jsonout = args.json.replace('{target}', t) 885 else: 886 jsonout = os.path.join(args.output, f'{t}.json') 887 888 symbol_dict = symbols[t] 889 symsize = addr_ranges[f'{t}_total_size'] 890 ranges = addr_ranges[t] 891 892 if symbol_dict is not None: 893 processed = {"mapped_symbols": set(), 894 "mapped_addr": set(), 895 "unmapped_symbols": set(symbol_dict.keys())} 896 897 do_simple_name_matching(dwarfinfo, symbol_dict, processed) 898 mark_address_aliases(symbol_dict, processed) 899 do_address_range_matching(dwarfinfo, symbol_dict, processed) 900 mark_address_aliases(symbol_dict, processed) 901 common_path_prefix = find_common_path_prefix(symbol_dict) 902 set_root_path_for_unmapped_symbols(symbol_dict, ranges, processed) 903 904 if args.verbose: 905 for sym in processed['unmapped_symbols']: 906 print("INFO: Unmapped symbol: {0}".format(sym)) 907 908 root = generate_any_tree(symbol_dict, symsize, common_path_prefix) 909 if not args.quiet: 910 header = f"{t.upper()} Report" if len(targets) > 1 else None 911 print_any_tree(root, symsize, args.depth, header) 912 913 exporter = DictExporter( 914 attriter=lambda attrs: [(k.lstrip('_'), v) for k, v in attrs] 915 ) 916 data = dict() 917 data["symbols"] = exporter.export(root) 918 data["total_size"] = symsize 919 with open(jsonout, "w") as fp: 920 json.dump(data, fp, indent=4) 921 922 923if __name__ == "__main__": 924 main() 925