1#!/usr/bin/env python
2#
3# Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
4# Copyright 2020 Espressif Systems (Shanghai) PTE LTD
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18import argparse
19import os
20import re
21from functools import partial
22
23import elftools
24from elftools.elf import elffile
25
26try:
27    from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple
28except ImportError:
29    pass
30
31FUNCTION_REGEX = re.compile(
32    r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$'
33)
34CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
35SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
36
37
38class RtlFunction(object):
39    def __init__(self, name, rtl_filename, tu_filename):
40        self.name = name
41        self.rtl_filename = rtl_filename
42        self.tu_filename = tu_filename
43        self.calls = list()  # type: List[str]
44        self.refs = list()  # type: List[str]
45        self.sym = None
46
47
48class SectionAddressRange(object):
49    def __init__(self, name, addr, size):  # type: (str, int, int) -> None
50        self.name = name
51        self.low = addr
52        self.high = addr + size
53
54    def __str__(self):
55        return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high)
56
57    def contains_address(self, addr):
58        return self.low <= addr < self.high
59
60
61TARGET_SECTIONS = {
62    'esp32': [
63        SectionAddressRange('.rom.text', 0x40000000, 0x70000),
64        SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018)
65    ],
66    'esp32s2': [
67        SectionAddressRange('.rom.text', 0x40000000, 0x1bed0),
68        SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c)
69    ],
70    'esp32s3': [
71        SectionAddressRange('.rom.text', 0x40000000, 0x568d0),
72        SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30)
73    ]
74}  # type: Dict[str, List[SectionAddressRange]]
75
76
77class Symbol(object):
78    def __init__(self, name, addr, local, filename, section):  # type: (str, int, bool, Optional[str], Optional[str]) -> None
79        self.name = name
80        self.addr = addr
81        self.local = local
82        self.filename = filename
83        self.section = section
84        self.refers_to = list()  # type: List[Symbol]
85        self.referred_from = list()  # type: List[Symbol]
86
87    def __str__(self):
88        return '{} @0x{:08x} [{}]{} {}'.format(
89            self.name,
90            self.addr,
91            self.section or 'unknown',
92            ' (local)' if self.local else '',
93            self.filename
94        )
95
96
97class Reference(object):
98    def __init__(self, from_sym, to_sym):  # type: (Symbol, Symbol) -> None
99        self.from_sym = from_sym
100        self.to_sym = to_sym
101
102    def __str__(self):
103        return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format(
104            self.from_sym.name,
105            self.from_sym.addr,
106            self.from_sym.section,
107            self.to_sym.name,
108            self.to_sym.addr,
109            self.to_sym.section
110        )
111
112
113class ElfInfo(object):
114    def __init__(self, elf_file):  # type: (BinaryIO) -> None
115        self.elf_file = elf_file
116        self.elf_obj = elffile.ELFFile(self.elf_file)
117        self.section_ranges = self._load_sections()
118        self.symbols = self._load_symbols()
119
120    def _load_symbols(self):  # type: () -> List[Symbol]
121        symbols = []
122        for s in self.elf_obj.iter_sections():
123            if not isinstance(s, elftools.elf.sections.SymbolTableSection):
124                continue
125            filename = None
126            for sym in s.iter_symbols():
127                sym_type = sym.entry['st_info']['type']
128                if sym_type == 'STT_FILE':
129                    filename = sym.name
130                if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']:
131                    local = sym.entry['st_info']['bind'] == 'STB_LOCAL'
132                    addr = sym.entry['st_value']
133                    symbols.append(
134                        Symbol(
135                            sym.name,
136                            addr,
137                            local,
138                            filename if local else None,
139                            self.section_for_addr(addr),
140                        )
141                    )
142        return symbols
143
144    def _load_sections(self):  # type: () -> List[SectionAddressRange]
145        result = []
146        for segment in self.elf_obj.iter_segments():
147            if segment['p_type'] == 'PT_LOAD':
148                for section in self.elf_obj.iter_sections():
149                    if not segment.section_in_segment(section):
150                        continue
151                    result.append(
152                        SectionAddressRange(
153                            section.name, section['sh_addr'], section['sh_size']
154                        )
155                    )
156
157        target = os.environ.get('IDF_TARGET')
158        if target in TARGET_SECTIONS:
159            result += TARGET_SECTIONS[target]
160
161        return result
162
163    def symbols_by_name(self, name):  # type: (str) -> List[Symbol]
164        res = []
165        for sym in self.symbols:
166            if sym.name == name:
167                res.append(sym)
168        return res
169
170    def section_for_addr(self, sym_addr):  # type: (int) -> Optional[str]
171        for sar in self.section_ranges:
172            if sar.contains_address(sym_addr):
173                return sar.name
174        return None
175
176
177def load_rtl_file(rtl_filename, tu_filename, functions):  # type: (str, str, List[RtlFunction]) -> None
178    last_function = None  # type: Optional[RtlFunction]
179    for line in open(rtl_filename):
180        # Find function definition
181        match = re.match(FUNCTION_REGEX, line)
182        if match:
183            function_name = match.group('function')
184            last_function = RtlFunction(function_name, rtl_filename, tu_filename)
185            functions.append(last_function)
186            continue
187
188        if last_function:
189            # Find direct function calls
190            match = re.match(CALL_REGEX, line)
191            if match:
192                target = match.group('target')
193                if target not in last_function.calls:
194                    last_function.calls.append(target)
195                continue
196
197            # Find symbol references
198            match = re.match(SYMBOL_REF_REGEX, line)
199            if match:
200                target = match.group('target')
201                if target not in last_function.refs:
202                    last_function.refs.append(target)
203                continue
204
205
206def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename):  # type: (str, str) -> bool
207    # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
208    # RTL file names are paths relative to the build directory, e.g.:
209    # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
210    #
211    # The check below may give a false positive if there are two files with the same name in
212    # different directories. This doesn't seem to happen in IDF now, but if it does happen,
213    # an assert in find_symbol_by_rtl_func should catch this.
214    #
215    # If this becomes and issue, consider also loading the .map file and using it to figure out
216    # which object file was used as the source of each symbol. Names of the object files and RTL files
217    # should be much easier to match.
218    return os.path.basename(rtl_filename).startswith(symbol_filename)
219
220
221class SymbolNotFound(RuntimeError):
222    pass
223
224
225def find_symbol_by_name(name, elfinfo, local_func_matcher):  # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol]
226    """
227    Find an ELF symbol for the given name.
228    local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
229    """
230    syms = elfinfo.symbols_by_name(name)
231    if not syms:
232        return None
233    if len(syms) == 1:
234        return syms[0]
235    else:
236        # There are multiple symbols with a given name. Find the best fit.
237        local_candidate = None
238        global_candidate = None
239        for sym in syms:
240            if not sym.local:
241                assert not global_candidate  # can't have two global symbols with the same name
242                global_candidate = sym
243            elif local_func_matcher(sym):
244                assert not local_candidate  # can't have two symbols with the same name in a single file
245                local_candidate = sym
246
247        # If two symbols with the same name are defined, a global and a local one,
248        # prefer the local symbol as the reference target.
249        return local_candidate or global_candidate
250
251
252def match_local_source_func(rtl_filename, sym):  # type: (str, Symbol) -> bool
253    """
254    Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
255    reference source (caller), based on the RTL file name.
256    """
257    assert sym.filename  # should be set for local functions
258    return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
259
260
261def match_local_target_func(rtl_filename, sym_from, sym):  # type: (str, Symbol, Symbol) -> bool
262    """
263    Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
264    reference target (callee or referenced data), based on RTL filename of the source symbol
265    and the source symbol itself.
266    """
267    assert sym.filename  # should be set for local functions
268    if sym_from.local:
269        # local symbol referencing another local symbol
270        return sym_from.filename == sym.filename
271    else:
272        # global symbol referencing a local symbol;
273        # source filename is not known, use RTL filename as a hint
274        return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
275
276
277def match_rtl_funcs_to_symbols(rtl_functions, elfinfo):  # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]]
278    symbols = []  # type: List[Symbol]
279    refs = []  # type: List[Reference]
280
281    # General idea:
282    # - iterate over RTL functions.
283    #   - for each RTL function, find the corresponding symbol
284    #   - iterate over the functions and variables referenced from this RTL function
285    #     - find symbols corresponding to the references
286    #     - record every pair (sym_from, sym_to) as a Reference object
287
288    for source_rtl_func in rtl_functions:
289        maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
290        if maybe_sym_from is None:
291            # RTL references a symbol, but the symbol is not defined in the generated object file.
292            # This means that the symbol was likely removed (or not included) at link time.
293            # There is nothing we can do to check section placement in this case.
294            continue
295        sym_from = maybe_sym_from
296
297        if sym_from not in symbols:
298            symbols.append(sym_from)
299
300        for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
301            if '*.LC' in target_rtl_func_name:  # skip local labels
302                continue
303
304            maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
305            if not maybe_sym_to:
306                # This may happen for a extern reference in the RTL file, if the reference was later removed
307                # by one of the optimization passes, and the external definition got garbage-collected.
308                # TODO: consider adding some sanity check that we are here not because of some bug in
309                # find_symbol_by_name?..
310                continue
311            sym_to = maybe_sym_to
312
313            sym_from.refers_to.append(sym_to)
314            sym_to.referred_from.append(sym_from)
315            refs.append(Reference(sym_from, sym_to))
316            if sym_to not in symbols:
317                symbols.append(sym_to)
318
319    return symbols, refs
320
321
322def get_symbols_and_refs(rtl_list, elf_file):  # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]]
323    elfinfo = ElfInfo(elf_file)
324
325    rtl_functions = []  # type: List[RtlFunction]
326    for file_name in rtl_list:
327        load_rtl_file(file_name, file_name, rtl_functions)
328
329    return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
330
331
332def list_refs_from_to_sections(refs, from_sections, to_sections):  # type: (List[Reference], List[str], List[str]) -> int
333    found = 0
334    for ref in refs:
335        if (not from_sections or ref.from_sym.section in from_sections) and \
336           (not to_sections or ref.to_sym.section in to_sections):
337            print(str(ref))
338            found += 1
339    return found
340
341
342def find_files_recursive(root_path, ext):  # type: (str, str) -> Generator[str, None, None]
343    for root, _, files in os.walk(root_path):
344        for basename in files:
345            if basename.endswith(ext):
346                filename = os.path.join(root, basename)
347                yield filename
348
349
350def main():
351    parser = argparse.ArgumentParser()
352
353    parser.add_argument(
354        '--rtl-list',
355        help='File with the list of RTL files',
356        type=argparse.FileType('r'),
357    )
358    parser.add_argument(
359        '--rtl-dir', help='Directory where to look for RTL files, recursively'
360    )
361    parser.add_argument(
362        '--elf-file',
363        required=True,
364        help='Program ELF file',
365        type=argparse.FileType('rb'),
366    )
367    action_sub = parser.add_subparsers(dest='action')
368    find_refs_parser = action_sub.add_parser(
369        'find-refs',
370        help='List the references coming from a given list of source sections'
371             'to a given list of target sections.',
372    )
373    find_refs_parser.add_argument(
374        '--from-sections', help='comma-separated list of source sections'
375    )
376    find_refs_parser.add_argument(
377        '--to-sections', help='comma-separated list of target sections'
378    )
379    find_refs_parser.add_argument(
380        '--exit-code',
381        action='store_true',
382        help='If set, exits with non-zero code when any references found',
383    )
384    action_sub.add_parser(
385        'all-refs',
386        help='Print the list of all references',
387    )
388
389    parser.parse_args()
390    args = parser.parse_args()
391    if args.rtl_list:
392        with open(args.rtl_list, 'r') as rtl_list_file:
393            rtl_list = [line.strip for line in rtl_list_file]
394    else:
395        if not args.rtl_dir:
396            raise RuntimeError('Either --rtl-list or --rtl-dir must be specified')
397        rtl_list = list(find_files_recursive(args.rtl_dir, '.expand'))
398
399    if not rtl_list:
400        raise RuntimeError('No RTL files specified')
401
402    _, refs = get_symbols_and_refs(rtl_list, args.elf_file)
403
404    if args.action == 'find-refs':
405        from_sections = args.from_sections.split(',') if args.from_sections else []
406        to_sections = args.to_sections.split(',') if args.to_sections else []
407        found = list_refs_from_to_sections(
408            refs, from_sections, to_sections
409        )
410        if args.exit_code and found:
411            raise SystemExit(1)
412    elif args.action == 'all-refs':
413        for r in refs:
414            print(str(r))
415
416
417if __name__ == '__main__':
418    main()
419