1#!/usr/bin/env python3
2#
3# Copyright (c) 2016, 2020-2024 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7# Based on a script by:
8#       Chereau, Fabien <fabien.chereau@intel.com>
9
10"""
11Process an ELF file to generate size report on RAM and ROM.
12"""
13
14import argparse
15import locale
16import os
17import sys
18import re
19from pathlib import Path
20import json
21
22from packaging import version
23
24from colorama import init, Fore
25
26from anytree import RenderTree, NodeMixin, findall_by_attr
27from anytree.exporter import DictExporter
28
29import elftools
30from elftools.elf.elffile import ELFFile
31from elftools.elf.sections import SymbolTableSection
32from elftools.dwarf.descriptions import describe_form_class
33from elftools.dwarf.descriptions import (
34    describe_DWARF_expr, set_global_machine_arch)
35from elftools.dwarf.locationlists import (
36    LocationExpr, LocationParser)
37
38if version.parse(elftools.__version__) < version.parse('0.24'):
39    sys.exit("pyelftools is out of date, need version 0.24 or later")
40
41
42# ELF section flags
43SHF_WRITE = 0x1
44SHF_ALLOC = 0x2
45SHF_EXEC = 0x4
46SHF_WRITE_ALLOC = SHF_WRITE | SHF_ALLOC
47SHF_ALLOC_EXEC = SHF_ALLOC | SHF_EXEC
48
49DT_LOCATION = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)\)")
50
51SRC_FILE_EXT = ('.h', '.c', '.hpp', '.cpp', '.hxx', '.cxx', '.c++')
52
53
54def get_symbol_addr(sym):
55    """Get the address of a symbol"""
56    return sym['st_value']
57
58
59def get_symbol_size(sym):
60    """Get the size of a symbol"""
61    return sym['st_size']
62
63
64def is_symbol_in_ranges(sym, ranges):
65    """
66    Given a list of start/end addresses, test if the symbol
67    lies within any of these address ranges.
68    """
69    for bound in ranges:
70        if bound['start'] <= sym['st_value'] <= bound['end']:
71            return bound
72
73    return None
74
75
76def get_die_mapped_address(die, parser, dwarfinfo):
77    """Get the bounding addresses from a DIE variable or subprogram"""
78    low = None
79    high = None
80
81    if die.tag == 'DW_TAG_variable':
82        if 'DW_AT_location' in die.attributes:
83            loc_attr = die.attributes['DW_AT_location']
84            if parser.attribute_has_location(loc_attr, die.cu['version']):
85                loc = parser.parse_from_attribute(loc_attr, die.cu['version'], die)
86                if isinstance(loc, LocationExpr):
87                    addr = describe_DWARF_expr(loc.loc_expr,
88                                               dwarfinfo.structs)
89
90                    matcher = DT_LOCATION.match(addr)
91                    if matcher:
92                        low = int(matcher.group(1), 16)
93                        high = low + 1
94
95    if die.tag == 'DW_TAG_subprogram':
96        if 'DW_AT_low_pc' in die.attributes:
97            low = die.attributes['DW_AT_low_pc'].value
98
99            high_pc = die.attributes['DW_AT_high_pc']
100            high_pc_class = describe_form_class(high_pc.form)
101            if high_pc_class == 'address':
102                high = high_pc.value
103            elif high_pc_class == 'constant':
104                high = low + high_pc.value
105
106    return low, high
107
108
109def match_symbol_address(symlist, die, parser, dwarfinfo):
110    """
111    Find the symbol from a symbol list
112    where it matches the address in DIE variable,
113    or within the range of a DIE subprogram.
114    """
115    low, high = get_die_mapped_address(die, parser, dwarfinfo)
116
117    if low is None:
118        return None
119
120    for sym in symlist:
121        if low <= sym['symbol']['st_value'] < high:
122            return sym
123
124    return None
125
126
127def get_symbols(elf, addr_ranges):
128    """
129    Fetch the symbols from the symbol table and put them
130    into ROM, RAM, unassigned buckets.
131    """
132    rom_syms = dict()
133    ram_syms = dict()
134    unassigned_syms = dict()
135
136    rom_addr_ranges = addr_ranges['rom']
137    ram_addr_ranges = addr_ranges['ram']
138    unassigned_addr_ranges = addr_ranges['unassigned']
139
140    for section in elf.iter_sections():
141        if isinstance(section, SymbolTableSection):
142            for sym in section.iter_symbols():
143                # Ignore symbols with size == 0
144                if get_symbol_size(sym) == 0:
145                    continue
146
147                found_sec = False
148                entry = {'name': sym.name,
149                         'symbol': sym,
150                         'mapped_files': set(),
151                         'section': None}
152
153                # If symbol is in ROM area?
154                bound = is_symbol_in_ranges(sym, rom_addr_ranges)
155                if bound:
156                    if sym.name not in rom_syms:
157                        rom_syms[sym.name] = list()
158                    entry['section'] = bound['name']
159                    rom_syms[sym.name].append(entry)
160                    found_sec = True
161
162                # If symbol is in RAM area?
163                bound = is_symbol_in_ranges(sym, ram_addr_ranges)
164                if bound:
165                    if sym.name not in ram_syms:
166                        ram_syms[sym.name] = list()
167                    entry['section'] = bound['name']
168                    ram_syms[sym.name].append(entry)
169                    found_sec = True
170
171                if not found_sec:
172                    bound = is_symbol_in_ranges(sym, unassigned_addr_ranges)
173                    if bound:
174                        entry['section'] = bound['name']
175                    if sym.name not in unassigned_syms:
176                        unassigned_syms[sym.name] = list()
177                    unassigned_syms[sym.name].append(entry)
178
179    ret = {'rom': rom_syms,
180           'ram': ram_syms,
181           'unassigned': unassigned_syms}
182    return ret
183
184
185def print_section_info(section, descr=""):
186    if args.verbose:
187        sec_size = section['sh_size']
188        sec_start = section['sh_addr']
189        sec_end = sec_start + (sec_size - 1 if sec_size else 0)
190        print(f"DEBUG: "
191              f"0x{sec_start:08x}-0x{sec_end:08x} "
192              f"{descr} '{section.name}': size={sec_size}, "
193              f"{section['sh_type']}, 0x{section['sh_flags']:08x}")
194#
195
196
197def get_section_ranges(elf):
198    """
199    Parse ELF header to find out the address ranges of ROM or RAM sections
200    and their total sizes.
201    """
202    rom_addr_ranges = list()
203    ram_addr_ranges = list()
204    unassigned_addr_ranges = list()
205
206    rom_size = 0
207    ram_size = 0
208    unassigned_size = 0
209
210    xip = any(section.get_symbol_by_name('CONFIG_XIP')
211              for section in elf.iter_sections('SHT_SYMTAB'))
212    if args.verbose:
213        print(f'INFO: XIP={xip}')
214
215    for section in elf.iter_sections():
216        size = section['sh_size']
217        sec_start = section['sh_addr']
218        sec_end = sec_start + (size - 1 if size else 0)
219        bound = {'start': sec_start, 'end': sec_end, 'name': section.name}
220        is_assigned = False
221
222        if section['sh_type'] == 'SHT_NOBITS':
223            # BSS and noinit sections
224            ram_addr_ranges.append(bound)
225            ram_size += size
226            is_assigned = True
227            print_section_info(section, "RAM bss section")
228
229        elif section['sh_type'] == 'SHT_PROGBITS':
230            # Sections to be in flash or memory
231            flags = section['sh_flags']
232            if (flags & SHF_ALLOC_EXEC) == SHF_ALLOC_EXEC:
233                # Text section
234                rom_addr_ranges.append(bound)
235                rom_size += size
236                is_assigned = True
237                print_section_info(section, "ROM txt section")
238
239            elif (flags & SHF_WRITE_ALLOC) == SHF_WRITE_ALLOC:
240                # Read/write data
241                if xip:
242                    # For XIP, the data section occupies both ROM and RAM
243                    # since at boot, content is copied from ROM to RAM
244                    rom_addr_ranges.append(bound)
245                    rom_size += size
246                ram_addr_ranges.append(bound)
247                ram_size += size
248                is_assigned = True
249                print_section_info(section, "DATA r/w section")
250
251            elif (flags & SHF_ALLOC) == SHF_ALLOC:
252                # Read only data
253                rom_addr_ranges.append(bound)
254                rom_size += size
255                is_assigned = True
256                print_section_info(section, "ROM r/o section")
257
258        if not is_assigned:
259            print_section_info(section, "unassigned section")
260            unassigned_addr_ranges.append(bound)
261            unassigned_size += size
262
263    ret = {'rom': rom_addr_ranges,
264           'rom_total_size': rom_size,
265           'ram': ram_addr_ranges,
266           'ram_total_size': ram_size,
267           'unassigned': unassigned_addr_ranges,
268           'unassigned_total_size': unassigned_size}
269    return ret
270
271
272def get_die_filename(die, lineprog):
273    """Get the source code filename associated with a DIE"""
274    file_index = die.attributes['DW_AT_decl_file'].value
275    file_entry = lineprog['file_entry'][file_index - 1]
276
277    dir_index = file_entry['dir_index']
278    if dir_index == 0:
279        filename = file_entry.name
280    else:
281        directory = lineprog.header['include_directory'][dir_index - 1]
282        filename = os.path.join(directory, file_entry.name)
283
284    path = Path(filename.decode(locale.getpreferredencoding()))
285
286    # Prepend output path to relative path
287    if not path.is_absolute():
288        output = Path(args.output)
289        path = output.joinpath(path)
290
291    # Change path to relative to Zephyr base
292    try:
293        path = path.resolve()
294    except OSError as e:
295        # built-ins can't be resolved, so it's not an issue
296        if '<built-in>' not in str(path):
297            raise e
298
299    return path
300
301
302def do_simple_name_matching(dwarfinfo, symbol_dict, processed):
303    """
304    Sequentially process DIEs in compiler units with direct file mappings
305    within the DIEs themselves, and do simply matching between DIE names
306    and symbol names.
307    """
308    mapped_symbols = processed['mapped_symbols']
309    mapped_addresses = processed['mapped_addr']
310    unmapped_symbols = processed['unmapped_symbols']
311    newly_mapped_syms = set()
312
313    location_lists = dwarfinfo.location_lists()
314    location_parser = LocationParser(location_lists)
315
316    unmapped_dies = set()
317
318    # Loop through all compile units
319    for compile_unit in dwarfinfo.iter_CUs():
320        lineprog = dwarfinfo.line_program_for_CU(compile_unit)
321        if lineprog is None:
322            continue
323
324        # Loop through each DIE and find variables and
325        # subprograms (i.e. functions)
326        for die in compile_unit.iter_DIEs():
327            sym_name = None
328
329            # Process variables
330            if die.tag == 'DW_TAG_variable':
331                # DW_AT_declaration
332
333                # having 'DW_AT_location' means this maps
334                # to an actual address (e.g. not an extern)
335                if 'DW_AT_location' in die.attributes:
336                    sym_name = die.get_full_path()
337
338            # Process subprograms (i.e. functions) if they are valid
339            if die.tag == 'DW_TAG_subprogram':
340                # Refer to another DIE for name
341                if ('DW_AT_abstract_origin' in die.attributes) or (
342                        'DW_AT_specification' in die.attributes):
343                    unmapped_dies.add(die)
344
345                # having 'DW_AT_low_pc' means it maps to
346                # an actual address
347                elif 'DW_AT_low_pc' in die.attributes:
348                    # DW_AT_low_pc == 0 is a weak function
349                    # which has been overriden
350                    if die.attributes['DW_AT_low_pc'].value != 0:
351                        sym_name = die.get_full_path()
352
353                # For mangled function names, the linkage name
354                # is what appears in the symbol list
355                if 'DW_AT_linkage_name' in die.attributes:
356                    linkage = die.attributes['DW_AT_linkage_name']
357                    sym_name = linkage.value.decode()
358
359            if sym_name is not None:
360                # Skip DIE with no reference back to a file
361                if not 'DW_AT_decl_file' in die.attributes:
362                    continue
363
364                is_die_mapped = False
365                if sym_name in symbol_dict:
366                    mapped_symbols.add(sym_name)
367                    symlist = symbol_dict[sym_name]
368                    symbol = match_symbol_address(symlist, die,
369                                                  location_parser,
370                                                  dwarfinfo)
371
372                    if symbol is not None:
373                        symaddr = symbol['symbol']['st_value']
374                        if symaddr not in mapped_addresses:
375                            is_die_mapped = True
376                            path = get_die_filename(die, lineprog)
377                            symbol['mapped_files'].add(path)
378                            mapped_addresses.add(symaddr)
379                            newly_mapped_syms.add(sym_name)
380
381                if not is_die_mapped:
382                    unmapped_dies.add(die)
383
384    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
385    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)
386
387    processed['mapped_symbols'] = mapped_symbols
388    processed['mapped_addr'] = mapped_addresses
389    processed['unmapped_symbols'] = unmapped_symbols
390    processed['unmapped_dies'] = unmapped_dies
391
392
393def mark_address_aliases(symbol_dict, processed):
394    """
395    Mark symbol aliases as already mapped to prevent
396    double counting.
397
398    There are functions and variables which are aliases to
399    other functions/variables. So this marks them as mapped
400    so they will not get counted again when a tree is being
401    built for display.
402    """
403    mapped_symbols = processed['mapped_symbols']
404    mapped_addresses = processed['mapped_addr']
405    unmapped_symbols = processed['unmapped_symbols']
406    already_mapped_syms = set()
407
408    for ums in unmapped_symbols:
409        for one_sym in symbol_dict[ums]:
410            symbol = one_sym['symbol']
411            if symbol['st_value'] in mapped_addresses:
412                already_mapped_syms.add(ums)
413
414    mapped_symbols = mapped_symbols.union(already_mapped_syms)
415    unmapped_symbols = unmapped_symbols.difference(already_mapped_syms)
416
417    processed['mapped_symbols'] = mapped_symbols
418    processed['mapped_addr'] = mapped_addresses
419    processed['unmapped_symbols'] = unmapped_symbols
420
421
422def do_address_range_matching(dwarfinfo, symbol_dict, processed):
423    """
424    Match symbols indirectly using address ranges.
425
426    This uses the address ranges of DIEs and map them to symbols
427    residing within those ranges, and works on DIEs that have not
428    been mapped in previous steps. This works on symbol names
429    that do not match the names in DIEs, e.g. "<func>" in DIE,
430    but "<func>.constprop.*" in symbol name list. This also
431    helps with mapping the mangled function names in C++,
432    since the names in DIE are actual function names in source
433    code and not mangled version of them.
434    """
435    if 'unmapped_dies' not in processed:
436        return
437
438    mapped_symbols = processed['mapped_symbols']
439    mapped_addresses = processed['mapped_addr']
440    unmapped_symbols = processed['unmapped_symbols']
441    newly_mapped_syms = set()
442
443    location_lists = dwarfinfo.location_lists()
444    location_parser = LocationParser(location_lists)
445
446    unmapped_dies = processed['unmapped_dies']
447
448    # Group DIEs by compile units
449    cu_list = dict()
450
451    for die in unmapped_dies:
452        cu = die.cu
453        if cu not in cu_list:
454            cu_list[cu] = {'dies': set()}
455        cu_list[cu]['dies'].add(die)
456
457    # Loop through all compile units
458    for cu in cu_list:
459        lineprog = dwarfinfo.line_program_for_CU(cu)
460
461        # Map offsets from DIEs
462        offset_map = dict()
463        for die in cu.iter_DIEs():
464            offset_map[die.offset] = die
465
466        for die in cu_list[cu]['dies']:
467            if not die.tag == 'DW_TAG_subprogram':
468                continue
469
470            path = None
471
472            # Has direct reference to file, so use it
473            if 'DW_AT_decl_file' in die.attributes:
474                path = get_die_filename(die, lineprog)
475
476            # Loop through indirect reference until a direct
477            # reference to file is found
478            if ('DW_AT_abstract_origin' in die.attributes) or (
479                    'DW_AT_specification' in die.attributes):
480                die_ptr = die
481                while path is None:
482                    if not (die_ptr.tag == 'DW_TAG_subprogram') or not (
483                            ('DW_AT_abstract_origin' in die_ptr.attributes) or
484                            ('DW_AT_specification' in die_ptr.attributes)):
485                        break
486
487                    if 'DW_AT_abstract_origin' in die_ptr.attributes:
488                        ofname = 'DW_AT_abstract_origin'
489                    elif 'DW_AT_specification' in die_ptr.attributes:
490                        ofname = 'DW_AT_specification'
491
492                    offset = die_ptr.attributes[ofname].value
493                    offset += die_ptr.cu.cu_offset
494
495                    # There is nothing to reference so no need to continue
496                    if offset not in offset_map:
497                        break
498
499                    die_ptr = offset_map[offset]
500                    if 'DW_AT_decl_file' in die_ptr.attributes:
501                        path = get_die_filename(die_ptr, lineprog)
502
503            # Nothing to map
504            if path is not None:
505                low, high = get_die_mapped_address(die, location_parser,
506                                                   dwarfinfo)
507                if low is None:
508                    continue
509
510                for ums in unmapped_symbols:
511                    for one_sym in symbol_dict[ums]:
512                        symbol = one_sym['symbol']
513                        symaddr = symbol['st_value']
514
515                        if symaddr not in mapped_addresses:
516                            if low <= symaddr < high:
517                                one_sym['mapped_files'].add(path)
518                                mapped_addresses.add(symaddr)
519                                newly_mapped_syms.add(ums)
520
521    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
522    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)
523
524    processed['mapped_symbols'] = mapped_symbols
525    processed['mapped_addr'] = mapped_addresses
526    processed['unmapped_symbols'] = unmapped_symbols
527
528
529def set_root_path_for_unmapped_symbols(symbol_dict, addr_range, processed):
530    """
531    Set root path for unmapped symbols.
532
533    Any unmapped symbols are added under the root node if those
534    symbols reside within the desired memory address ranges
535    (e.g. ROM or RAM).
536    """
537    mapped_symbols = processed['mapped_symbols']
538    mapped_addresses = processed['mapped_addr']
539    unmapped_symbols = processed['unmapped_symbols']
540    newly_mapped_syms = set()
541
542    for ums in unmapped_symbols:
543        for one_sym in symbol_dict[ums]:
544            symbol = one_sym['symbol']
545            symaddr = symbol['st_value']
546
547            if is_symbol_in_ranges(symbol, addr_range):
548                if symaddr not in mapped_addresses:
549                    path = Path(':')
550                    one_sym['mapped_files'].add(path)
551                    mapped_addresses.add(symaddr)
552                    newly_mapped_syms.add(ums)
553
554    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
555    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)
556
557    processed['mapped_symbols'] = mapped_symbols
558    processed['mapped_addr'] = mapped_addresses
559    processed['unmapped_symbols'] = unmapped_symbols
560
561def find_common_path_prefix(symbol_dict):
562    """
563    Find the common path prefix of all mapped files.
564    Must be called before set_root_path_for_unmapped_symbols().
565    """
566    paths = list()
567
568    for _, sym in symbol_dict.items():
569        for symbol in sym:
570            for file in symbol['mapped_files']:
571                paths.append(file)
572
573    try:
574        return os.path.commonpath(paths)
575    except ValueError:
576        return None
577
578class TreeNode(NodeMixin):
579    """
580    A symbol node.
581    """
582
583    def __init__(self, name, identifier, size=0, parent=None, children=None, address=None, section=None):
584        super().__init__()
585        self._name = name
586        self._size = size
587        self.parent = parent
588        self._identifier = identifier
589        if address is not None:
590            self.address = address
591        if section is not None:
592            self.section = section
593        if children:
594            self.children = children
595
596    def __repr__(self):
597        return self._name
598
599
600def sum_node_children_size(node):
601    """
602    Calculate the sum of symbol size of all direct children.
603    """
604    size = 0
605
606    for child in node.children:
607        size += child._size
608
609    return size
610
611
612def generate_any_tree(symbol_dict, total_size, path_prefix):
613    """
614    Generate a symbol tree for output.
615    """
616    root = TreeNode('Root', "root")
617    node_no_paths = TreeNode('(no paths)', ":", parent=root)
618
619    if path_prefix and Path(path_prefix) == Path(args.zephyrbase):
620        # All source files are under ZEPHYR_BASE so there is
621        # no need for another level.
622        node_zephyr_base = root
623        node_output_dir = root
624        node_workspace = root
625        node_others = root
626    else:
627        node_zephyr_base = TreeNode('ZEPHYR_BASE', args.zephyrbase)
628        node_output_dir = TreeNode('OUTPUT_DIR', args.output)
629        node_others = TreeNode("/", "/")
630
631        if args.workspace:
632            node_workspace = TreeNode('WORKSPACE', args.workspace)
633        else:
634            node_workspace = node_others
635
636    # A set of helper function for building a simple tree with a path-like
637    # hierarchy.
638    def _insert_one_elem(root, path, size, addr, section):
639        cur = None
640        node = None
641        parent = root
642        for part in path.parts:
643            if cur is None:
644                cur = part
645            else:
646                cur = str(Path(cur, part))
647
648            results = findall_by_attr(root, cur, name="_identifier")
649            if results:
650                item = results[0]
651                if not hasattr(item, 'address'):
652                    # Passing down through a non-terminal parent node.
653                    parent = item
654                    parent._size += size
655                else:
656                    # Another symbol node here with the same name; stick to its parent as well.
657                    parent = item.parent
658                    node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent)
659            else:
660                # There is no such terminal symbol in the tree yet; let's add it.
661                if node:
662                    parent = node
663                node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent)
664        if node:
665            # Set memory block address and section name properties only for terminal symbol nodes.
666            # Don't do it on file- and directory- level parent nodes.
667            node.address = addr
668            node.section = section
669        else:
670            # normally this shouldn't happen; just to detect data or logic errors.
671            print(f"ERROR: no end node created for {root}, {path}, 0x{addr:08x}+{size}@{section}")
672    #
673
674    # Mapping paths to tree nodes
675    path_node_map = [
676        [Path(args.zephyrbase), node_zephyr_base],
677        [Path(args.output), node_output_dir],
678    ]
679
680    if args.workspace:
681        path_node_map.append(
682            [Path(args.workspace), node_workspace]
683        )
684
685    for name, sym in symbol_dict.items():
686        for symbol in sym:
687            size = get_symbol_size(symbol['symbol'])
688            addr = get_symbol_addr(symbol['symbol'])
689            section = symbol['section']
690            for file in symbol['mapped_files']:
691                path = Path(file, name)
692                if path.is_absolute():
693                    has_node = False
694
695                    for one_path in path_node_map:
696                        if one_path[0] in path.parents:
697                            path = path.relative_to(one_path[0])
698                            dest_node = one_path[1]
699                            has_node = True
700                            break
701
702                    if not has_node:
703                        dest_node = node_others
704                else:
705                    dest_node = node_no_paths
706
707                _insert_one_elem(dest_node, path, size, addr, section)
708
709
710    if node_zephyr_base is not root:
711        # ZEPHYR_BASE and OUTPUT_DIR nodes don't have sum of symbol size
712        # so calculate them here.
713        node_zephyr_base._size = sum_node_children_size(node_zephyr_base)
714        node_output_dir._size = sum_node_children_size(node_output_dir)
715
716        # Find out which nodes need to be in the tree.
717        # "(no path)", ZEPHYR_BASE nodes are essential.
718        children = [node_no_paths, node_zephyr_base]
719        if node_output_dir.height != 0:
720            # OUTPUT_DIR may be under ZEPHYR_BASE.
721            children.append(node_output_dir)
722        if node_others.height != 0:
723            # Only include "others" node if there is something.
724            children.append(node_others)
725
726        if args.workspace:
727            node_workspace._size = sum_node_children_size(node_workspace)
728            if node_workspace.height != 0:
729                children.append(node_workspace)
730
731        root.children = children
732
733    root._size = total_size
734
735    # Need to account for code and data where there are not emitted
736    # symbols associated with them.
737    node_hidden_syms = TreeNode('(hidden)', "(hidden)", parent=root)
738    node_hidden_syms._size = root._size - sum_node_children_size(root)
739
740    return root
741
742
743def node_sort(items):
744    """
745    Node sorting used with RenderTree.
746    """
747    return sorted(items, key=lambda item: item._name)
748
749
750def print_any_tree(root, total_size, depth):
751    """
752    Print the symbol tree.
753    """
754    print('{:98s} {:>7s} {:>7s} {:11s} {:16s}'.format(
755        Fore.YELLOW + "Path", "Size", "%", " Address", "Section" + Fore.RESET))
756    print('=' * 138)
757    for row in RenderTree(root, childiter=node_sort, maxlevel=depth):
758        f = len(row.pre) + len(row.node._name)
759        s = str(row.node._size).rjust(100-f)
760        percent = 100 * float(row.node._size) / float(total_size)
761
762        hex_addr = "-"
763        section_name = ""
764        cc = cr = ""
765        if not row.node.children:
766            if hasattr(row.node, 'section'):
767                section_name = row.node.section
768            if hasattr(row.node, 'address'):
769                hex_addr = "0x{:08x}".format(row.node.address)
770                cc = Fore.CYAN
771                cr = Fore.RESET
772        elif row.node._name.endswith(SRC_FILE_EXT):
773            cc = Fore.GREEN
774            cr = Fore.RESET
775
776        print(f"{row.pre}{cc}{row.node._name} {s} {cr}{Fore.BLUE}{percent:6.2f}%{Fore.RESET}  {hex_addr} {section_name}")
777    print('=' * 138)
778    print(f'{total_size:>101}')
779
780
781def parse_args():
782    """
783    Parse command line arguments.
784    """
785    global args
786
787    parser = argparse.ArgumentParser(allow_abbrev=False)
788
789    parser.add_argument("-k", "--kernel", required=True,
790                        help="Zephyr ELF binary")
791    parser.add_argument("-z", "--zephyrbase", required=True,
792                        help="Zephyr base path")
793    parser.add_argument("-q", "--quiet", action="store_true",
794                        help="Do not output anything on the screen.")
795    parser.add_argument("-o", "--output", required=True,
796                        help="Output path")
797    parser.add_argument("-w", "--workspace", default=None,
798                        help="Workspace path (Usually the same as WEST_TOPDIR)")
799    parser.add_argument("target", choices=['rom', 'ram', 'all'])
800    parser.add_argument("-d", "--depth", dest="depth",
801                        type=int, default=None,
802                        help="How deep should we go into the tree",
803                        metavar="DEPTH")
804    parser.add_argument("-v", "--verbose", action="store_true",
805                        help="Print extra debugging information")
806    parser.add_argument("--json", help="store results in a JSON file.")
807    args = parser.parse_args()
808
809
810def main():
811    """
812    Main program.
813    """
814    parse_args()
815
816    sys.stdout.reconfigure(encoding='utf-8')
817
818    # Init colorama
819    init()
820
821    assert os.path.exists(args.kernel), "{0} does not exist.".format(args.kernel)
822    if args.target == 'ram':
823        targets = ['ram']
824    elif args.target == 'rom':
825        targets = ['rom']
826    elif args.target == 'all':
827        targets = ['rom', 'ram']
828
829    elf = ELFFile(open(args.kernel, "rb"))
830    assert elf.has_dwarf_info(), "ELF file has no DWARF information"
831
832    set_global_machine_arch(elf.get_machine_arch())
833    addr_ranges = get_section_ranges(elf)
834    dwarfinfo = elf.get_dwarf_info()
835
836    for t in targets:
837
838        symbols = get_symbols(elf, addr_ranges)
839
840        for sym in symbols['unassigned'].values():
841            for sym_entry in sym:
842                print(f"WARN: Symbol '{sym_entry['name']}' section '{sym_entry['section']}' "
843                      "is not in RAM or ROM.")
844
845        if args.json:
846            jsonout = args.json
847        else:
848            jsonout = os.path.join(args.output, f'{t}.json')
849
850        symbol_dict = symbols[t]
851        symsize = addr_ranges[f'{t}_total_size']
852        ranges = addr_ranges[t]
853
854        if symbol_dict is not None:
855            processed = {"mapped_symbols": set(),
856                         "mapped_addr": set(),
857                         "unmapped_symbols": set(symbol_dict.keys())}
858
859            do_simple_name_matching(dwarfinfo, symbol_dict, processed)
860            mark_address_aliases(symbol_dict, processed)
861            do_address_range_matching(dwarfinfo, symbol_dict, processed)
862            mark_address_aliases(symbol_dict, processed)
863            common_path_prefix = find_common_path_prefix(symbol_dict)
864            set_root_path_for_unmapped_symbols(symbol_dict, ranges, processed)
865
866            if args.verbose:
867                for sym in processed['unmapped_symbols']:
868                    print("INFO: Unmapped symbol: {0}".format(sym))
869
870            root = generate_any_tree(symbol_dict, symsize, common_path_prefix)
871            if not args.quiet:
872                print_any_tree(root, symsize, args.depth)
873
874            exporter = DictExporter(attriter=lambda attrs: [(k.lstrip('_'), v) for k, v in attrs])
875            data = dict()
876            data["symbols"] = exporter.export(root)
877            data["total_size"] = symsize
878            with open(jsonout, "w") as fp:
879                json.dump(data, fp, indent=4)
880
881
882if __name__ == "__main__":
883    main()
884