1#!/usr/bin/env python
3# Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
4# Copyright 2020 Espressif Systems (Shanghai) PTE LTD
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
10#     http://www.apache.org/licenses/LICENSE-2.0
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
18import argparse
19import os
20import re
21from functools import partial
23import elftools
24from elftools.elf import elffile
27    from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple
28except ImportError:
29    pass
31FUNCTION_REGEX = re.compile(
32    r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$'
34CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
35SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
38class RtlFunction(object):
39    def __init__(self, name, rtl_filename, tu_filename):
40        self.name = name
41        self.rtl_filename = rtl_filename
42        self.tu_filename = tu_filename
43        self.calls = list()  # type: List[str]
44        self.refs = list()  # type: List[str]
45        self.sym = None
48class SectionAddressRange(object):
49    def __init__(self, name, addr, size):  # type: (str, int, int) -> None
50        self.name = name
51        self.low = addr
52        self.high = addr + size
54    def __str__(self):
55        return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high)
57    def contains_address(self, addr):
58        return self.low <= addr < self.high
62    'esp32': [
63        SectionAddressRange('.rom.text', 0x40000000, 0x70000),
64        SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018)
65    ],
66    'esp32s2': [
67        SectionAddressRange('.rom.text', 0x40000000, 0x1bed0),
68        SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c)
69    ],
70    'esp32s3': [
71        SectionAddressRange('.rom.text', 0x40000000, 0x568d0),
72        SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30)
73    ]
74}  # type: Dict[str, List[SectionAddressRange]]
77class Symbol(object):
78    def __init__(self, name, addr, local, filename, section):  # type: (str, int, bool, Optional[str], Optional[str]) -> None
79        self.name = name
80        self.addr = addr
81        self.local = local
82        self.filename = filename
83        self.section = section
84        self.refers_to = list()  # type: List[Symbol]
85        self.referred_from = list()  # type: List[Symbol]
87    def __str__(self):
88        return '{} @0x{:08x} [{}]{} {}'.format(
89            self.name,
90            self.addr,
91            self.section or 'unknown',
92            ' (local)' if self.local else '',
93            self.filename
94        )
97class Reference(object):
98    def __init__(self, from_sym, to_sym):  # type: (Symbol, Symbol) -> None
99        self.from_sym = from_sym
100        self.to_sym = to_sym
102    def __str__(self):
103        return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format(
104            self.from_sym.name,
105            self.from_sym.addr,
106            self.from_sym.section,
107            self.to_sym.name,
108            self.to_sym.addr,
109            self.to_sym.section
110        )
113class ElfInfo(object):
114    def __init__(self, elf_file):  # type: (BinaryIO) -> None
115        self.elf_file = elf_file
116        self.elf_obj = elffile.ELFFile(self.elf_file)
117        self.section_ranges = self._load_sections()
118        self.symbols = self._load_symbols()
120    def _load_symbols(self):  # type: () -> List[Symbol]
121        symbols = []
122        for s in self.elf_obj.iter_sections():
123            if not isinstance(s, elftools.elf.sections.SymbolTableSection):
124                continue
125            filename = None
126            for sym in s.iter_symbols():
127                sym_type = sym.entry['st_info']['type']
128                if sym_type == 'STT_FILE':
129                    filename = sym.name
130                if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']:
131                    local = sym.entry['st_info']['bind'] == 'STB_LOCAL'
132                    addr = sym.entry['st_value']
133                    symbols.append(
134                        Symbol(
135                            sym.name,
136                            addr,
137                            local,
138                            filename if local else None,
139                            self.section_for_addr(addr),
140                        )
141                    )
142        return symbols
144    def _load_sections(self):  # type: () -> List[SectionAddressRange]
145        result = []
146        for segment in self.elf_obj.iter_segments():
147            if segment['p_type'] == 'PT_LOAD':
148                for section in self.elf_obj.iter_sections():
149                    if not segment.section_in_segment(section):
150                        continue
151                    result.append(
152                        SectionAddressRange(
153                            section.name, section['sh_addr'], section['sh_size']
154                        )
155                    )
157        target = os.environ.get('IDF_TARGET')
158        if target in TARGET_SECTIONS:
159            result += TARGET_SECTIONS[target]
161        return result
163    def symbols_by_name(self, name):  # type: (str) -> List[Symbol]
164        res = []
165        for sym in self.symbols:
166            if sym.name == name:
167                res.append(sym)
168        return res
170    def section_for_addr(self, sym_addr):  # type: (int) -> Optional[str]
171        for sar in self.section_ranges:
172            if sar.contains_address(sym_addr):
173                return sar.name
174        return None
177def load_rtl_file(rtl_filename, tu_filename, functions):  # type: (str, str, List[RtlFunction]) -> None
178    last_function = None  # type: Optional[RtlFunction]
179    for line in open(rtl_filename):
180        # Find function definition
181        match = re.match(FUNCTION_REGEX, line)
182        if match:
183            function_name = match.group('function')
184            last_function = RtlFunction(function_name, rtl_filename, tu_filename)
185            functions.append(last_function)
186            continue
188        if last_function:
189            # Find direct function calls
190            match = re.match(CALL_REGEX, line)
191            if match:
192                target = match.group('target')
193                if target not in last_function.calls:
194                    last_function.calls.append(target)
195                continue
197            # Find symbol references
198            match = re.match(SYMBOL_REF_REGEX, line)
199            if match:
200                target = match.group('target')
201                if target not in last_function.refs:
202                    last_function.refs.append(target)
203                continue
206def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename):  # type: (str, str) -> bool
207    # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
208    # RTL file names are paths relative to the build directory, e.g.:
209    # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
210    #
211    # The check below may give a false positive if there are two files with the same name in
212    # different directories. This doesn't seem to happen in IDF now, but if it does happen,
213    # an assert in find_symbol_by_rtl_func should catch this.
214    #
215    # If this becomes and issue, consider also loading the .map file and using it to figure out
216    # which object file was used as the source of each symbol. Names of the object files and RTL files
217    # should be much easier to match.
218    return os.path.basename(rtl_filename).startswith(symbol_filename)
221class SymbolNotFound(RuntimeError):
222    pass
225def find_symbol_by_name(name, elfinfo, local_func_matcher):  # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol]
226    """
227    Find an ELF symbol for the given name.
228    local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
229    """
230    syms = elfinfo.symbols_by_name(name)
231    if not syms:
232        return None
233    if len(syms) == 1:
234        return syms[0]
235    else:
236        # There are multiple symbols with a given name. Find the best fit.
237        local_candidate = None
238        global_candidate = None
239        for sym in syms:
240            if not sym.local:
241                assert not global_candidate  # can't have two global symbols with the same name
242                global_candidate = sym
243            elif local_func_matcher(sym):
244                assert not local_candidate  # can't have two symbols with the same name in a single file
245                local_candidate = sym
247        # If two symbols with the same name are defined, a global and a local one,
248        # prefer the local symbol as the reference target.
249        return local_candidate or global_candidate
252def match_local_source_func(rtl_filename, sym):  # type: (str, Symbol) -> bool
253    """
254    Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
255    reference source (caller), based on the RTL file name.
256    """
257    assert sym.filename  # should be set for local functions
258    return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
261def match_local_target_func(rtl_filename, sym_from, sym):  # type: (str, Symbol, Symbol) -> bool
262    """
263    Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
264    reference target (callee or referenced data), based on RTL filename of the source symbol
265    and the source symbol itself.
266    """
267    assert sym.filename  # should be set for local functions
268    if sym_from.local:
269        # local symbol referencing another local symbol
270        return sym_from.filename == sym.filename
271    else:
272        # global symbol referencing a local symbol;
273        # source filename is not known, use RTL filename as a hint
274        return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
277def match_rtl_funcs_to_symbols(rtl_functions, elfinfo):  # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]]
278    symbols = []  # type: List[Symbol]
279    refs = []  # type: List[Reference]
281    # General idea:
282    # - iterate over RTL functions.
283    #   - for each RTL function, find the corresponding symbol
284    #   - iterate over the functions and variables referenced from this RTL function
285    #     - find symbols corresponding to the references
286    #     - record every pair (sym_from, sym_to) as a Reference object
288    for source_rtl_func in rtl_functions:
289        maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
290        if maybe_sym_from is None:
291            # RTL references a symbol, but the symbol is not defined in the generated object file.
292            # This means that the symbol was likely removed (or not included) at link time.
293            # There is nothing we can do to check section placement in this case.
294            continue
295        sym_from = maybe_sym_from
297        if sym_from not in symbols:
298            symbols.append(sym_from)
300        for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
301            if '*.LC' in target_rtl_func_name:  # skip local labels
302                continue
304            maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
305            if not maybe_sym_to:
306                # This may happen for a extern reference in the RTL file, if the reference was later removed
307                # by one of the optimization passes, and the external definition got garbage-collected.
308                # TODO: consider adding some sanity check that we are here not because of some bug in
309                # find_symbol_by_name?..
310                continue
311            sym_to = maybe_sym_to
313            sym_from.refers_to.append(sym_to)
314            sym_to.referred_from.append(sym_from)
315            refs.append(Reference(sym_from, sym_to))
316            if sym_to not in symbols:
317                symbols.append(sym_to)
319    return symbols, refs
322def get_symbols_and_refs(rtl_list, elf_file):  # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]]
323    elfinfo = ElfInfo(elf_file)
325    rtl_functions = []  # type: List[RtlFunction]
326    for file_name in rtl_list:
327        load_rtl_file(file_name, file_name, rtl_functions)
329    return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
332def list_refs_from_to_sections(refs, from_sections, to_sections):  # type: (List[Reference], List[str], List[str]) -> int
333    found = 0
334    for ref in refs:
335        if (not from_sections or ref.from_sym.section in from_sections) and \
336           (not to_sections or ref.to_sym.section in to_sections):
337            print(str(ref))
338            found += 1
339    return found
342def find_files_recursive(root_path, ext):  # type: (str, str) -> Generator[str, None, None]
343    for root, _, files in os.walk(root_path):
344        for basename in files:
345            if basename.endswith(ext):
346                filename = os.path.join(root, basename)
347                yield filename
350def main():
351    parser = argparse.ArgumentParser()
353    parser.add_argument(
354        '--rtl-list',
355        help='File with the list of RTL files',
356        type=argparse.FileType('r'),
357    )
358    parser.add_argument(
359        '--rtl-dir', help='Directory where to look for RTL files, recursively'
360    )
361    parser.add_argument(
362        '--elf-file',
363        required=True,
364        help='Program ELF file',
365        type=argparse.FileType('rb'),
366    )
367    action_sub = parser.add_subparsers(dest='action')
368    find_refs_parser = action_sub.add_parser(
369        'find-refs',
370        help='List the references coming from a given list of source sections'
371             'to a given list of target sections.',
372    )
373    find_refs_parser.add_argument(
374        '--from-sections', help='comma-separated list of source sections'
375    )
376    find_refs_parser.add_argument(
377        '--to-sections', help='comma-separated list of target sections'
378    )
379    find_refs_parser.add_argument(
380        '--exit-code',
381        action='store_true',
382        help='If set, exits with non-zero code when any references found',
383    )
384    action_sub.add_parser(
385        'all-refs',
386        help='Print the list of all references',
387    )
389    parser.parse_args()
390    args = parser.parse_args()
391    if args.rtl_list:
392        with open(args.rtl_list, 'r') as rtl_list_file:
393            rtl_list = [line.strip for line in rtl_list_file]
394    else:
395        if not args.rtl_dir:
396            raise RuntimeError('Either --rtl-list or --rtl-dir must be specified')
397        rtl_list = list(find_files_recursive(args.rtl_dir, '.expand'))
399    if not rtl_list:
400        raise RuntimeError('No RTL files specified')
402    _, refs = get_symbols_and_refs(rtl_list, args.elf_file)
404    if args.action == 'find-refs':
405        from_sections = args.from_sections.split(',') if args.from_sections else []
406        to_sections = args.to_sections.split(',') if args.to_sections else []
407        found = list_refs_from_to_sections(
408            refs, from_sections, to_sections
409        )
410        if args.exit_code and found:
411            raise SystemExit(1)
412    elif args.action == 'all-refs':
413        for r in refs:
414            print(str(r))
417if __name__ == '__main__':
418    main()