1#!/usr/bin/env python3
2#
3# Copyright (c) 2016, 2020-2024 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7# Based on a script by:
8#       Chereau, Fabien <fabien.chereau@intel.com>
9
10"""
11Process an ELF file to generate size report on RAM and ROM.
12"""
13
14import argparse
15import locale
16import os
17import sys
18import re
19from pathlib import Path
20import json
21
22from packaging import version
23
24from colorama import init, Fore, Style
25
26from anytree import RenderTree, NodeMixin, findall_by_attr
27from anytree.exporter import DictExporter
28
29import elftools
30from elftools.elf.elffile import ELFFile
31from elftools.elf.sections import SymbolTableSection
32from elftools.dwarf.descriptions import describe_form_class
33from elftools.dwarf.descriptions import (
34    describe_DWARF_expr, set_global_machine_arch)
35from elftools.dwarf.locationlists import (
36    LocationExpr, LocationParser)
37
38if version.parse(elftools.__version__) < version.parse('0.24'):
39    sys.exit("pyelftools is out of date, need version 0.24 or later")
40
41
42# ELF section flags
43SHF_WRITE = 0x1
44SHF_ALLOC = 0x2
45SHF_EXEC = 0x4
46SHF_WRITE_ALLOC = SHF_WRITE | SHF_ALLOC
47SHF_ALLOC_EXEC = SHF_ALLOC | SHF_EXEC
48
49DT_LOCATION = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)\)")
50
51SRC_FILE_EXT = ('.h', '.c', '.hpp', '.cpp', '.hxx', '.cxx', '.c++')
52
53
54def get_symbol_addr(sym):
55    """Get the address of a symbol"""
56    return sym['st_value']
57
58
59def get_symbol_size(sym):
60    """Get the size of a symbol"""
61    return sym['st_size']
62
63
64def is_symbol_in_ranges(sym, ranges):
65    """
66    Given a list of start/end addresses, test if the symbol
67    lies within any of these address ranges.
68    """
69    for bound in ranges:
70        if bound['start'] <= sym['st_value'] <= bound['end']:
71            return bound
72
73    return None
74
75
76def get_die_mapped_address(die, parser, dwarfinfo):
77    """Get the bounding addresses from a DIE variable or subprogram"""
78    low = None
79    high = None
80
81    if die.tag == 'DW_TAG_variable':
82        if 'DW_AT_location' in die.attributes:
83            loc_attr = die.attributes['DW_AT_location']
84            if parser.attribute_has_location(loc_attr, die.cu['version']):
85                loc = parser.parse_from_attribute(loc_attr, die.cu['version'], die)
86                if isinstance(loc, LocationExpr):
87                    addr = describe_DWARF_expr(loc.loc_expr,
88                                               dwarfinfo.structs)
89
90                    matcher = DT_LOCATION.match(addr)
91                    if matcher:
92                        low = int(matcher.group(1), 16)
93                        high = low + 1
94
95    if die.tag == 'DW_TAG_subprogram':
96        if 'DW_AT_low_pc' in die.attributes:
97            low = die.attributes['DW_AT_low_pc'].value
98
99            high_pc = die.attributes['DW_AT_high_pc']
100            high_pc_class = describe_form_class(high_pc.form)
101            if high_pc_class == 'address':
102                high = high_pc.value
103            elif high_pc_class == 'constant':
104                high = low + high_pc.value
105
106    return low, high
107
108
109def match_symbol_address(symlist, die, parser, dwarfinfo):
110    """
111    Find the symbol from a symbol list
112    where it matches the address in DIE variable,
113    or within the range of a DIE subprogram.
114    """
115    low, high = get_die_mapped_address(die, parser, dwarfinfo)
116
117    if low is None:
118        return None
119
120    for sym in symlist:
121        if low <= sym['symbol']['st_value'] < high:
122            return sym
123
124    return None
125
126
127def get_symbols(elf, addr_ranges):
128    """
129    Fetch the symbols from the symbol table and put them
130    into ROM, RAM, unassigned buckets.
131    """
132    rom_syms = dict()
133    ram_syms = dict()
134    all_syms = dict()
135    unassigned_syms = dict()
136
137    def entry(sym, loc, section):
138        """
139        Factor function for a symbol entry.
140        """
141        return {'name': sym.name,
142                'symbol': sym,
143                'section': section,
144                'loc': loc,
145                'mapped_files': set()}
146
147    rom_addr_ranges = addr_ranges['rom']
148    ram_addr_ranges = addr_ranges['ram']
149    unassigned_addr_ranges = addr_ranges['unassigned']
150
151    for section in elf.iter_sections():
152        if isinstance(section, SymbolTableSection):
153            for sym in section.iter_symbols():
154                # Ignore symbols with size == 0
155                if get_symbol_size(sym) == 0:
156                    continue
157
158                ram_sym = is_symbol_in_ranges(sym, ram_addr_ranges)
159                rom_sym = is_symbol_in_ranges(sym, rom_addr_ranges)
160
161                # Determine the location(s) for this symbol.
162                loc = []
163                if ram_sym:
164                    loc.append('ram')
165                if rom_sym:
166                    loc.append('rom')
167
168                # If symbol is in ROM area?
169                if rom_sym:
170                    if sym.name not in rom_syms:
171                        rom_syms[sym.name] = list()
172                    rom_syms[sym.name].append(entry(sym, loc, rom_sym['name']))
173
174                # If symbol is in RAM area?
175                if ram_sym:
176                    if sym.name not in ram_syms:
177                        ram_syms[sym.name] = list()
178                    ram_syms[sym.name].append(entry(sym, loc, ram_sym['name']))
179
180                # If symbol is in either area add to "all" list.
181                if ram_sym or rom_sym:
182                    if sym.name not in all_syms:
183                        all_syms[sym.name] = list()
184                    all_syms[sym.name].append(entry(sym, loc, ram_sym['name'] if ram_sym else rom_sym['name']))
185                else:
186                    bound = is_symbol_in_ranges(sym, unassigned_addr_ranges)
187                    if sym.name not in unassigned_syms:
188                        unassigned_syms[sym.name] = list()
189                    unassigned_syms[sym.name].append(entry(sym, ["unassigned"], bound['name'] if bound else None))
190
191    ret = {'rom': rom_syms,
192           'ram': ram_syms,
193           'all': all_syms,
194           'unassigned': unassigned_syms}
195    return ret
196
197
198def print_section_info(section, descr=""):
199    if args.verbose:
200        sec_size = section['sh_size']
201        sec_start = section['sh_addr']
202        sec_end = sec_start + (sec_size - 1 if sec_size else 0)
203        print(f"DEBUG: "
204              f"0x{sec_start:08x}-0x{sec_end:08x} "
205              f"{descr} '{section.name}': size={sec_size}, "
206              f"{section['sh_type']}, 0x{section['sh_flags']:08x}")
207#
208
209
210def get_section_ranges(elf):
211    """
212    Parse ELF header to find out the address ranges of ROM or RAM sections
213    and their total sizes.
214    """
215    rom_addr_ranges = list()
216    ram_addr_ranges = list()
217    unassigned_addr_ranges = list()
218
219    rom_size = 0
220    ram_size = 0
221    unassigned_size = 0
222    total_size = 0
223
224    xip = any(section.get_symbol_by_name('CONFIG_XIP')
225              for section in elf.iter_sections('SHT_SYMTAB'))
226    if args.verbose:
227        print(f'INFO: XIP={xip}')
228
229    for section in elf.iter_sections():
230        size = section['sh_size']
231        sec_start = section['sh_addr']
232        sec_end = sec_start + (size - 1 if size else 0)
233        bound = {'start': sec_start, 'end': sec_end, 'name': section.name}
234        is_assigned = False
235
236        if section['sh_type'] == 'SHT_NOBITS':
237            # BSS and noinit sections
238            ram_addr_ranges.append(bound)
239            ram_size += size
240            total_size += size
241            is_assigned = True
242            print_section_info(section, "RAM bss section")
243
244        elif section['sh_type'] == 'SHT_PROGBITS':
245            # Sections to be in flash or memory
246            flags = section['sh_flags']
247            if (flags & SHF_ALLOC_EXEC) == SHF_ALLOC_EXEC:
248                # Text section
249                rom_addr_ranges.append(bound)
250                rom_size += size
251                total_size += size
252                is_assigned = True
253                print_section_info(section, "ROM txt section")
254
255            elif (flags & SHF_WRITE_ALLOC) == SHF_WRITE_ALLOC:
256                # Read/write data
257                if xip:
258                    # For XIP, the data section occupies both ROM and RAM
259                    # since at boot, content is copied from ROM to RAM
260                    rom_addr_ranges.append(bound)
261                    rom_size += size
262                ram_addr_ranges.append(bound)
263                ram_size += size
264                total_size += size
265                is_assigned = True
266                print_section_info(section, "DATA r/w section")
267
268            elif (flags & SHF_ALLOC) == SHF_ALLOC:
269                # Read only data
270                rom_addr_ranges.append(bound)
271                rom_size += size
272                total_size += size
273                is_assigned = True
274                print_section_info(section, "ROM r/o section")
275
276        if not is_assigned:
277            print_section_info(section, "unassigned section")
278            unassigned_addr_ranges.append(bound)
279            unassigned_size += size
280
281    ret = {'rom': rom_addr_ranges,
282           'rom_total_size': rom_size,
283           'ram': ram_addr_ranges,
284           'ram_total_size': ram_size,
285           'unassigned': unassigned_addr_ranges,
286           'unassigned_total_size': unassigned_size,
287           'all': ram_addr_ranges + rom_addr_ranges,
288           'all_total_size': total_size}
289    return ret
290
291
292def get_die_filename(die, lineprog):
293    """Get the source code filename associated with a DIE"""
294    file_index = die.attributes['DW_AT_decl_file'].value
295    file_entry = lineprog['file_entry'][file_index - 1]
296
297    dir_index = file_entry['dir_index']
298    if dir_index == 0:
299        filename = file_entry.name
300    else:
301        directory = lineprog.header['include_directory'][dir_index - 1]
302        filename = os.path.join(directory, file_entry.name)
303
304    path = Path(filename.decode(locale.getpreferredencoding()))
305
306    # Prepend output path to relative path
307    if not path.is_absolute():
308        output = Path(args.output)
309        path = output.joinpath(path)
310
311    # Change path to relative to Zephyr base
312    try:
313        path = path.resolve()
314    except OSError as e:
315        # built-ins can't be resolved, so it's not an issue
316        if '<built-in>' not in str(path):
317            raise e
318
319    return path
320
321
322def do_simple_name_matching(dwarfinfo, symbol_dict, processed):
323    """
324    Sequentially process DIEs in compiler units with direct file mappings
325    within the DIEs themselves, and do simply matching between DIE names
326    and symbol names.
327    """
328    mapped_symbols = processed['mapped_symbols']
329    mapped_addresses = processed['mapped_addr']
330    unmapped_symbols = processed['unmapped_symbols']
331    newly_mapped_syms = set()
332
333    location_lists = dwarfinfo.location_lists()
334    location_parser = LocationParser(location_lists)
335
336    unmapped_dies = set()
337
338    # Loop through all compile units
339    for compile_unit in dwarfinfo.iter_CUs():
340        lineprog = dwarfinfo.line_program_for_CU(compile_unit)
341        if lineprog is None:
342            continue
343
344        # Loop through each DIE and find variables and
345        # subprograms (i.e. functions)
346        for die in compile_unit.iter_DIEs():
347            sym_name = None
348
349            # Process variables
350            if die.tag == 'DW_TAG_variable':
351                # DW_AT_declaration
352
353                # having 'DW_AT_location' means this maps
354                # to an actual address (e.g. not an extern)
355                if 'DW_AT_location' in die.attributes:
356                    sym_name = die.get_full_path()
357
358            # Process subprograms (i.e. functions) if they are valid
359            if die.tag == 'DW_TAG_subprogram':
360                # Refer to another DIE for name
361                if ('DW_AT_abstract_origin' in die.attributes) or (
362                        'DW_AT_specification' in die.attributes):
363                    unmapped_dies.add(die)
364
365                # having 'DW_AT_low_pc' means it maps to
366                # an actual address
367                elif 'DW_AT_low_pc' in die.attributes:
368                    # DW_AT_low_pc == 0 is a weak function
369                    # which has been overriden
370                    if die.attributes['DW_AT_low_pc'].value != 0:
371                        sym_name = die.get_full_path()
372
373                # For mangled function names, the linkage name
374                # is what appears in the symbol list
375                if 'DW_AT_linkage_name' in die.attributes:
376                    linkage = die.attributes['DW_AT_linkage_name']
377                    sym_name = linkage.value.decode()
378
379            if sym_name is not None:
380                # Skip DIE with no reference back to a file
381                if not 'DW_AT_decl_file' in die.attributes:
382                    continue
383
384                is_die_mapped = False
385                if sym_name in symbol_dict:
386                    mapped_symbols.add(sym_name)
387                    symlist = symbol_dict[sym_name]
388                    symbol = match_symbol_address(symlist, die,
389                                                  location_parser,
390                                                  dwarfinfo)
391
392                    if symbol is not None:
393                        symaddr = symbol['symbol']['st_value']
394                        if symaddr not in mapped_addresses:
395                            is_die_mapped = True
396                            path = get_die_filename(die, lineprog)
397                            symbol['mapped_files'].add(path)
398                            mapped_addresses.add(symaddr)
399                            newly_mapped_syms.add(sym_name)
400
401                if not is_die_mapped:
402                    unmapped_dies.add(die)
403
404    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
405    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)
406
407    processed['mapped_symbols'] = mapped_symbols
408    processed['mapped_addr'] = mapped_addresses
409    processed['unmapped_symbols'] = unmapped_symbols
410    processed['unmapped_dies'] = unmapped_dies
411
412
413def mark_address_aliases(symbol_dict, processed):
414    """
415    Mark symbol aliases as already mapped to prevent
416    double counting.
417
418    There are functions and variables which are aliases to
419    other functions/variables. So this marks them as mapped
420    so they will not get counted again when a tree is being
421    built for display.
422    """
423    mapped_symbols = processed['mapped_symbols']
424    mapped_addresses = processed['mapped_addr']
425    unmapped_symbols = processed['unmapped_symbols']
426    already_mapped_syms = set()
427
428    for ums in unmapped_symbols:
429        for one_sym in symbol_dict[ums]:
430            symbol = one_sym['symbol']
431            if symbol['st_value'] in mapped_addresses:
432                already_mapped_syms.add(ums)
433
434    mapped_symbols = mapped_symbols.union(already_mapped_syms)
435    unmapped_symbols = unmapped_symbols.difference(already_mapped_syms)
436
437    processed['mapped_symbols'] = mapped_symbols
438    processed['mapped_addr'] = mapped_addresses
439    processed['unmapped_symbols'] = unmapped_symbols
440
441
442def do_address_range_matching(dwarfinfo, symbol_dict, processed):
443    """
444    Match symbols indirectly using address ranges.
445
446    This uses the address ranges of DIEs and map them to symbols
447    residing within those ranges, and works on DIEs that have not
448    been mapped in previous steps. This works on symbol names
449    that do not match the names in DIEs, e.g. "<func>" in DIE,
450    but "<func>.constprop.*" in symbol name list. This also
451    helps with mapping the mangled function names in C++,
452    since the names in DIE are actual function names in source
453    code and not mangled version of them.
454    """
455    if 'unmapped_dies' not in processed:
456        return
457
458    mapped_symbols = processed['mapped_symbols']
459    mapped_addresses = processed['mapped_addr']
460    unmapped_symbols = processed['unmapped_symbols']
461    newly_mapped_syms = set()
462
463    location_lists = dwarfinfo.location_lists()
464    location_parser = LocationParser(location_lists)
465
466    unmapped_dies = processed['unmapped_dies']
467
468    # Group DIEs by compile units
469    cu_list = dict()
470
471    for die in unmapped_dies:
472        cu = die.cu
473        if cu not in cu_list:
474            cu_list[cu] = {'dies': set()}
475        cu_list[cu]['dies'].add(die)
476
477    # Loop through all compile units
478    for cu in cu_list:
479        lineprog = dwarfinfo.line_program_for_CU(cu)
480
481        # Map offsets from DIEs
482        offset_map = dict()
483        for die in cu.iter_DIEs():
484            offset_map[die.offset] = die
485
486        for die in cu_list[cu]['dies']:
487            if not (die.tag == 'DW_TAG_subprogram' or die.tag == 'DW_TAG_variable'):
488                continue
489
490            path = None
491
492            # Has direct reference to file, so use it
493            if 'DW_AT_decl_file' in die.attributes:
494                path = get_die_filename(die, lineprog)
495
496            # Loop through indirect reference until a direct
497            # reference to file is found
498            if ('DW_AT_abstract_origin' in die.attributes) or (
499                    'DW_AT_specification' in die.attributes):
500                die_ptr = die
501                while path is None:
502                    if not (die_ptr.tag == 'DW_TAG_subprogram') or not (
503                            ('DW_AT_abstract_origin' in die_ptr.attributes) or
504                            ('DW_AT_specification' in die_ptr.attributes)):
505                        break
506
507                    if 'DW_AT_abstract_origin' in die_ptr.attributes:
508                        ofname = 'DW_AT_abstract_origin'
509                    elif 'DW_AT_specification' in die_ptr.attributes:
510                        ofname = 'DW_AT_specification'
511
512                    offset = die_ptr.attributes[ofname].value
513                    offset += die_ptr.cu.cu_offset
514
515                    # There is nothing to reference so no need to continue
516                    if offset not in offset_map:
517                        break
518
519                    die_ptr = offset_map[offset]
520                    if 'DW_AT_decl_file' in die_ptr.attributes:
521                        path = get_die_filename(die_ptr, lineprog)
522
523            # Nothing to map
524            if path is not None:
525                low, high = get_die_mapped_address(die, location_parser,
526                                                   dwarfinfo)
527                if low is None:
528                    continue
529
530                # Case 1: Match for a function (using a range)
531                if die.tag == 'DW_TAG_subprogram':
532                    for ums in unmapped_symbols:
533                        for one_sym in symbol_dict[ums]:
534                            symbol = one_sym['symbol']
535                            symaddr = symbol['st_value']
536
537                            if symaddr not in mapped_addresses:
538                                if low <= symaddr < high:
539                                    one_sym['mapped_files'].add(path)
540                                    mapped_addresses.add(symaddr)
541                                    newly_mapped_syms.add(ums)
542
543                # Case 2: Match for a variable (using a single address)
544                elif die.tag == 'DW_TAG_variable':
545                    for ums in unmapped_symbols:
546                        for one_sym in symbol_dict[ums]:
547                            symbol = one_sym['symbol']
548                            symaddr = symbol['st_value']
549
550                            if symaddr not in mapped_addresses:
551                                # We expect the 'high' value to be 'low + 1' for a variable
552                                if low == symaddr:
553                                    one_sym['mapped_files'].add(path)
554                                    mapped_addresses.add(symaddr)
555                                    newly_mapped_syms.add(ums)
556
557    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
558    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)
559
560    processed['mapped_symbols'] = mapped_symbols
561    processed['mapped_addr'] = mapped_addresses
562    processed['unmapped_symbols'] = unmapped_symbols
563
564
565def set_root_path_for_unmapped_symbols(symbol_dict, addr_range, processed):
566    """
567    Set root path for unmapped symbols.
568
569    Any unmapped symbols are added under the root node if those
570    symbols reside within the desired memory address ranges
571    (e.g. ROM or RAM).
572    """
573    mapped_symbols = processed['mapped_symbols']
574    mapped_addresses = processed['mapped_addr']
575    unmapped_symbols = processed['unmapped_symbols']
576    newly_mapped_syms = set()
577
578    for ums in unmapped_symbols:
579        for one_sym in symbol_dict[ums]:
580            symbol = one_sym['symbol']
581            symaddr = symbol['st_value']
582
583            if is_symbol_in_ranges(symbol, addr_range):
584                if symaddr not in mapped_addresses:
585                    path = Path(':')
586                    one_sym['mapped_files'].add(path)
587                    mapped_addresses.add(symaddr)
588                    newly_mapped_syms.add(ums)
589
590    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
591    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)
592
593    processed['mapped_symbols'] = mapped_symbols
594    processed['mapped_addr'] = mapped_addresses
595    processed['unmapped_symbols'] = unmapped_symbols
596
597def find_common_path_prefix(symbol_dict):
598    """
599    Find the common path prefix of all mapped files.
600    Must be called before set_root_path_for_unmapped_symbols().
601    """
602    paths = list()
603
604    for _, sym in symbol_dict.items():
605        for symbol in sym:
606            for file in symbol['mapped_files']:
607                paths.append(file)
608
609    try:
610        return os.path.commonpath(paths)
611    except ValueError:
612        return None
613
614class TreeNode(NodeMixin):
615    """
616    A symbol node.
617    """
618
619    def __init__(self, name, identifier, size=0, parent=None, children=None, address=None, section=None):
620        super().__init__()
621        self._name = name
622        self._size = size
623        self.parent = parent
624        self._identifier = identifier
625        if address is not None:
626            self.address = address
627        if section is not None:
628            self.section = section
629        if children:
630            self.children = children
631        self.loc = []
632
633    def __repr__(self):
634        return self._name
635
636
637def sum_node_children_size(node):
638    """
639    Calculate the sum of symbol size of all direct children.
640    """
641    size = 0
642
643    for child in node.children:
644        size += child._size
645
646    return size
647
648
649def generate_any_tree(symbol_dict, total_size, path_prefix):
650    """
651    Generate a symbol tree for output.
652    """
653    root = TreeNode('Root', "root")
654    node_no_paths = TreeNode('(no paths)', ":", parent=root)
655
656    if path_prefix and Path(path_prefix) == Path(args.zephyrbase):
657        # All source files are under ZEPHYR_BASE so there is
658        # no need for another level.
659        node_zephyr_base = root
660        node_output_dir = root
661        node_workspace = root
662        node_others = root
663    else:
664        node_zephyr_base = TreeNode('ZEPHYR_BASE', args.zephyrbase)
665        node_output_dir = TreeNode('OUTPUT_DIR', args.output)
666        node_others = TreeNode("/", "/")
667
668        if args.workspace:
669            node_workspace = TreeNode('WORKSPACE', args.workspace)
670        else:
671            node_workspace = node_others
672
673    # A set of helper function for building a simple tree with a path-like
674    # hierarchy.
675    def _insert_one_elem(root, path, size, addr, section, loc):
676        cur = None
677        node = None
678        parent = root
679        for part in path.parts:
680            if cur is None:
681                cur = part
682            else:
683                cur = str(Path(cur, part))
684
685            results = findall_by_attr(root, cur, name="_identifier")
686            if results:
687                item = results[0]
688                if not hasattr(item, 'address'):
689                    # Passing down through a non-terminal parent node.
690                    parent = item
691                    parent._size += size
692                else:
693                    # Another symbol node here with the same name; stick to its parent as well.
694                    parent = item.parent
695                    node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent)
696            else:
697                # There is no such terminal symbol in the tree yet; let's add it.
698                if node:
699                    parent = node
700                node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent)
701        if node:
702            # Set memory block address and section name properties only for terminal symbol nodes.
703            # Don't do it on file- and directory- level parent nodes.
704            node.address = addr
705            node.section = section
706            node.loc = loc
707        else:
708            # normally this shouldn't happen; just to detect data or logic errors.
709            print(f"ERROR: no end node created for {root}, {path}, 0x{addr:08x}+{size}@{section}")
710    #
711
712    # Mapping paths to tree nodes
713    path_node_map = [
714        [Path(args.zephyrbase), node_zephyr_base],
715        [Path(args.output), node_output_dir],
716    ]
717
718    if args.workspace:
719        path_node_map.append(
720            [Path(args.workspace), node_workspace]
721        )
722
723    for name, sym in symbol_dict.items():
724        for symbol in sym:
725            size = get_symbol_size(symbol['symbol'])
726            addr = get_symbol_addr(symbol['symbol'])
727            section = symbol['section']
728            for file in symbol['mapped_files']:
729                path = Path(file, name)
730                if path.is_absolute():
731                    has_node = False
732
733                    for one_path in path_node_map:
734                        if one_path[0] in path.parents:
735                            path = path.relative_to(one_path[0])
736                            dest_node = one_path[1]
737                            has_node = True
738                            break
739
740                    if not has_node:
741                        dest_node = node_others
742                else:
743                    dest_node = node_no_paths
744
745                _insert_one_elem(dest_node, path, size, addr, section, symbol['loc'])
746
747
748    if node_zephyr_base is not root:
749        # ZEPHYR_BASE and OUTPUT_DIR nodes don't have sum of symbol size
750        # so calculate them here.
751        node_zephyr_base._size = sum_node_children_size(node_zephyr_base)
752        node_output_dir._size = sum_node_children_size(node_output_dir)
753
754        # Find out which nodes need to be in the tree.
755        # "(no path)", ZEPHYR_BASE nodes are essential.
756        children = [node_no_paths, node_zephyr_base]
757        if node_output_dir.height != 0:
758            # OUTPUT_DIR may be under ZEPHYR_BASE.
759            children.append(node_output_dir)
760        if node_others.height != 0:
761            # Only include "others" node if there is something.
762            children.append(node_others)
763
764        if args.workspace:
765            node_workspace._size = sum_node_children_size(node_workspace)
766            if node_workspace.height != 0:
767                children.append(node_workspace)
768
769        root.children = children
770
771    root._size = total_size
772
773    # Need to account for code and data where there are not emitted
774    # symbols associated with them.
775    node_hidden_syms = TreeNode('(hidden)', "(hidden)", parent=root)
776    node_no_paths._size = sum_node_children_size(node_no_paths)
777    node_hidden_syms._size = root._size - sum_node_children_size(root)
778
779    return root
780
781
782def node_sort(items):
783    """
784    Node sorting used with RenderTree.
785    """
786    return sorted(items, key=lambda item: item._name)
787
788
789def print_any_tree(root, total_size, depth, header=None):
790    """
791    Print the symbol tree.
792    """
793    if header:
794        print(f"{Fore.WHITE}{Style.BRIGHT}{header}")
795        print('-' * 110 + f"{Fore.RESET}{Style.RESET_ALL}")
796
797    print('{:98s} {:>7s} {:>7s} {:11s} {:16s}'.format(
798        Fore.YELLOW + "Path", "Size", "%", " Address", "Section" + Fore.RESET))
799    print('=' * 138)
800    for row in RenderTree(root, childiter=node_sort, maxlevel=depth):
801        f = len(row.pre) + len(row.node._name)
802        s = str(row.node._size).rjust(100-f)
803        percent = 100 * float(row.node._size) / float(total_size)
804
805        hex_addr = "-"
806        section_name = ""
807        cc = cr = ""
808        if not row.node.children:
809            if hasattr(row.node, 'section'):
810                section_name = row.node.section
811            if hasattr(row.node, 'address'):
812                hex_addr = "0x{:08x}".format(row.node.address)
813                cc = Fore.CYAN
814                cr = Fore.RESET
815        elif row.node._name.endswith(SRC_FILE_EXT):
816            cc = Fore.GREEN
817            cr = Fore.RESET
818
819        print(f"{row.pre}{cc}{row.node._name} {s} {cr}{Fore.BLUE}{percent:6.2f}%{Fore.RESET}  {hex_addr} {section_name}")
820    print('=' * 138)
821    print(f'{total_size:>101}')
822
823
824def parse_args():
825    """
826    Parse command line arguments.
827    """
828    global args
829
830    parser = argparse.ArgumentParser(allow_abbrev=False)
831
832    parser.add_argument("-k", "--kernel", required=True,
833                        help="Zephyr ELF binary")
834    parser.add_argument("-z", "--zephyrbase", required=True,
835                        help="Zephyr base path")
836    parser.add_argument("-q", "--quiet", action="store_true",
837                        help="Do not output anything on the screen.")
838    parser.add_argument("-o", "--output", required=True,
839                        help="Output path")
840    parser.add_argument("-w", "--workspace", default=None,
841                        help="Workspace path (Usually the same as WEST_TOPDIR)")
842    parser.add_argument("target", choices=['rom', 'ram', 'all'], nargs="+")
843    parser.add_argument("-d", "--depth", dest="depth",
844                        type=int, default=None,
845                        help="How deep should we go into the tree",
846                        metavar="DEPTH")
847    parser.add_argument("-v", "--verbose", action="store_true",
848                        help="Print extra debugging information")
849    parser.add_argument("--json", help='Store results in the given JSON file ' + \
850                                       '(a "{target}" string in the filename will ' + \
851                                       'be replaced by "ram", "rom", or "all").')
852    args = parser.parse_args()
853
854
855def main():
856    """
857    Main program.
858    """
859    parse_args()
860
861    sys.stdout.reconfigure(encoding='utf-8')
862
863    # Init colorama
864    init()
865
866    assert os.path.exists(args.kernel), "{0} does not exist.".format(args.kernel)
867    targets = args.target
868
869    with ELFFile(open(args.kernel, "rb")) as elf:
870        assert elf.has_dwarf_info(), "ELF file has no DWARF information"
871
872        set_global_machine_arch(elf.get_machine_arch())
873        addr_ranges = get_section_ranges(elf)
874        dwarfinfo = elf.get_dwarf_info()
875
876        symbols = get_symbols(elf, addr_ranges)
877        for sym in symbols['unassigned'].values():
878            for sym_entry in sym:
879                print(f"WARN: Symbol '{sym_entry['name']}' section '{sym_entry['section']}' "
880                      "is not in RAM or ROM.")
881
882        for t in targets:
883            if args.json:
884                jsonout = args.json.replace('{target}', t)
885            else:
886                jsonout = os.path.join(args.output, f'{t}.json')
887
888            symbol_dict = symbols[t]
889            symsize = addr_ranges[f'{t}_total_size']
890            ranges = addr_ranges[t]
891
892            if symbol_dict is not None:
893                processed = {"mapped_symbols": set(),
894                             "mapped_addr": set(),
895                             "unmapped_symbols": set(symbol_dict.keys())}
896
897                do_simple_name_matching(dwarfinfo, symbol_dict, processed)
898                mark_address_aliases(symbol_dict, processed)
899                do_address_range_matching(dwarfinfo, symbol_dict, processed)
900                mark_address_aliases(symbol_dict, processed)
901                common_path_prefix = find_common_path_prefix(symbol_dict)
902                set_root_path_for_unmapped_symbols(symbol_dict, ranges, processed)
903
904                if args.verbose:
905                    for sym in processed['unmapped_symbols']:
906                        print("INFO: Unmapped symbol: {0}".format(sym))
907
908                root = generate_any_tree(symbol_dict, symsize, common_path_prefix)
909                if not args.quiet:
910                    header = f"{t.upper()} Report" if len(targets) > 1 else None
911                    print_any_tree(root, symsize, args.depth, header)
912
913                exporter = DictExporter(
914                    attriter=lambda attrs: [(k.lstrip('_'), v) for k, v in attrs]
915                )
916                data = dict()
917                data["symbols"] = exporter.export(root)
918                data["total_size"] = symsize
919                with open(jsonout, "w") as fp:
920                    json.dump(data, fp, indent=4)
921
922
923if __name__ == "__main__":
924    main()
925