1#!/usr/bin/env python 2# 3# Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron 4# Copyright 2020 Espressif Systems (Shanghai) PTE LTD 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18import argparse 19import os 20import re 21from functools import partial 22 23import elftools 24from elftools.elf import elffile 25 26try: 27 from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple 28except ImportError: 29 pass 30 31FUNCTION_REGEX = re.compile( 32 r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$' 33) 34CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$') 35SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$') 36 37 38class RtlFunction(object): 39 def __init__(self, name, rtl_filename, tu_filename): 40 self.name = name 41 self.rtl_filename = rtl_filename 42 self.tu_filename = tu_filename 43 self.calls = list() # type: List[str] 44 self.refs = list() # type: List[str] 45 self.sym = None 46 47 48class SectionAddressRange(object): 49 def __init__(self, name, addr, size): # type: (str, int, int) -> None 50 self.name = name 51 self.low = addr 52 self.high = addr + size 53 54 def __str__(self): 55 return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high) 56 57 def contains_address(self, addr): 58 return self.low <= addr < self.high 59 60 61TARGET_SECTIONS = { 62 'esp32': [ 63 SectionAddressRange('.rom.text', 0x40000000, 0x70000), 64 SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018) 65 ], 66 'esp32s2': [ 67 SectionAddressRange('.rom.text', 0x40000000, 0x1bed0), 68 SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c) 69 ], 70 'esp32s3': [ 71 SectionAddressRange('.rom.text', 0x40000000, 0x568d0), 72 SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30) 73 ] 74} # type: Dict[str, List[SectionAddressRange]] 75 76 77class Symbol(object): 78 def __init__(self, name, addr, local, filename, section): # type: (str, int, bool, Optional[str], Optional[str]) -> None 79 self.name = name 80 self.addr = addr 81 self.local = local 82 self.filename = filename 83 self.section = section 84 self.refers_to = list() # type: List[Symbol] 85 self.referred_from = list() # type: List[Symbol] 86 87 def __str__(self): 88 return '{} @0x{:08x} [{}]{} {}'.format( 89 self.name, 90 self.addr, 91 self.section or 'unknown', 92 ' (local)' if self.local else '', 93 self.filename 94 ) 95 96 97class Reference(object): 98 def __init__(self, from_sym, to_sym): # type: (Symbol, Symbol) -> None 99 self.from_sym = from_sym 100 self.to_sym = to_sym 101 102 def __str__(self): 103 return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format( 104 self.from_sym.name, 105 self.from_sym.addr, 106 self.from_sym.section, 107 self.to_sym.name, 108 self.to_sym.addr, 109 self.to_sym.section 110 ) 111 112 113class ElfInfo(object): 114 def __init__(self, elf_file): # type: (BinaryIO) -> None 115 self.elf_file = elf_file 116 self.elf_obj = elffile.ELFFile(self.elf_file) 117 self.section_ranges = self._load_sections() 118 self.symbols = self._load_symbols() 119 120 def _load_symbols(self): # type: () -> List[Symbol] 121 symbols = [] 122 for s in self.elf_obj.iter_sections(): 123 if not isinstance(s, elftools.elf.sections.SymbolTableSection): 124 continue 125 filename = None 126 for sym in s.iter_symbols(): 127 sym_type = sym.entry['st_info']['type'] 128 if sym_type == 'STT_FILE': 129 filename = sym.name 130 if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']: 131 local = sym.entry['st_info']['bind'] == 'STB_LOCAL' 132 addr = sym.entry['st_value'] 133 symbols.append( 134 Symbol( 135 sym.name, 136 addr, 137 local, 138 filename if local else None, 139 self.section_for_addr(addr), 140 ) 141 ) 142 return symbols 143 144 def _load_sections(self): # type: () -> List[SectionAddressRange] 145 result = [] 146 for segment in self.elf_obj.iter_segments(): 147 if segment['p_type'] == 'PT_LOAD': 148 for section in self.elf_obj.iter_sections(): 149 if not segment.section_in_segment(section): 150 continue 151 result.append( 152 SectionAddressRange( 153 section.name, section['sh_addr'], section['sh_size'] 154 ) 155 ) 156 157 target = os.environ.get('IDF_TARGET') 158 if target in TARGET_SECTIONS: 159 result += TARGET_SECTIONS[target] 160 161 return result 162 163 def symbols_by_name(self, name): # type: (str) -> List[Symbol] 164 res = [] 165 for sym in self.symbols: 166 if sym.name == name: 167 res.append(sym) 168 return res 169 170 def section_for_addr(self, sym_addr): # type: (int) -> Optional[str] 171 for sar in self.section_ranges: 172 if sar.contains_address(sym_addr): 173 return sar.name 174 return None 175 176 177def load_rtl_file(rtl_filename, tu_filename, functions): # type: (str, str, List[RtlFunction]) -> None 178 last_function = None # type: Optional[RtlFunction] 179 for line in open(rtl_filename): 180 # Find function definition 181 match = re.match(FUNCTION_REGEX, line) 182 if match: 183 function_name = match.group('function') 184 last_function = RtlFunction(function_name, rtl_filename, tu_filename) 185 functions.append(last_function) 186 continue 187 188 if last_function: 189 # Find direct function calls 190 match = re.match(CALL_REGEX, line) 191 if match: 192 target = match.group('target') 193 if target not in last_function.calls: 194 last_function.calls.append(target) 195 continue 196 197 # Find symbol references 198 match = re.match(SYMBOL_REF_REGEX, line) 199 if match: 200 target = match.group('target') 201 if target not in last_function.refs: 202 last_function.refs.append(target) 203 continue 204 205 206def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename): # type: (str, str) -> bool 207 # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c". 208 # RTL file names are paths relative to the build directory, e.g.: 209 # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand" 210 # 211 # The check below may give a false positive if there are two files with the same name in 212 # different directories. This doesn't seem to happen in IDF now, but if it does happen, 213 # an assert in find_symbol_by_rtl_func should catch this. 214 # 215 # If this becomes and issue, consider also loading the .map file and using it to figure out 216 # which object file was used as the source of each symbol. Names of the object files and RTL files 217 # should be much easier to match. 218 return os.path.basename(rtl_filename).startswith(symbol_filename) 219 220 221class SymbolNotFound(RuntimeError): 222 pass 223 224 225def find_symbol_by_name(name, elfinfo, local_func_matcher): # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol] 226 """ 227 Find an ELF symbol for the given name. 228 local_func_matcher is a callback function which checks is the candidate local symbol is suitable. 229 """ 230 syms = elfinfo.symbols_by_name(name) 231 if not syms: 232 return None 233 if len(syms) == 1: 234 return syms[0] 235 else: 236 # There are multiple symbols with a given name. Find the best fit. 237 local_candidate = None 238 global_candidate = None 239 for sym in syms: 240 if not sym.local: 241 assert not global_candidate # can't have two global symbols with the same name 242 global_candidate = sym 243 elif local_func_matcher(sym): 244 assert not local_candidate # can't have two symbols with the same name in a single file 245 local_candidate = sym 246 247 # If two symbols with the same name are defined, a global and a local one, 248 # prefer the local symbol as the reference target. 249 return local_candidate or global_candidate 250 251 252def match_local_source_func(rtl_filename, sym): # type: (str, Symbol) -> bool 253 """ 254 Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the 255 reference source (caller), based on the RTL file name. 256 """ 257 assert sym.filename # should be set for local functions 258 return rtl_filename_matches_sym_filename(rtl_filename, sym.filename) 259 260 261def match_local_target_func(rtl_filename, sym_from, sym): # type: (str, Symbol, Symbol) -> bool 262 """ 263 Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the 264 reference target (callee or referenced data), based on RTL filename of the source symbol 265 and the source symbol itself. 266 """ 267 assert sym.filename # should be set for local functions 268 if sym_from.local: 269 # local symbol referencing another local symbol 270 return sym_from.filename == sym.filename 271 else: 272 # global symbol referencing a local symbol; 273 # source filename is not known, use RTL filename as a hint 274 return rtl_filename_matches_sym_filename(rtl_filename, sym.filename) 275 276 277def match_rtl_funcs_to_symbols(rtl_functions, elfinfo): # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]] 278 symbols = [] # type: List[Symbol] 279 refs = [] # type: List[Reference] 280 281 # General idea: 282 # - iterate over RTL functions. 283 # - for each RTL function, find the corresponding symbol 284 # - iterate over the functions and variables referenced from this RTL function 285 # - find symbols corresponding to the references 286 # - record every pair (sym_from, sym_to) as a Reference object 287 288 for source_rtl_func in rtl_functions: 289 maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename)) 290 if maybe_sym_from is None: 291 # RTL references a symbol, but the symbol is not defined in the generated object file. 292 # This means that the symbol was likely removed (or not included) at link time. 293 # There is nothing we can do to check section placement in this case. 294 continue 295 sym_from = maybe_sym_from 296 297 if sym_from not in symbols: 298 symbols.append(sym_from) 299 300 for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs: 301 if '*.LC' in target_rtl_func_name: # skip local labels 302 continue 303 304 maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from)) 305 if not maybe_sym_to: 306 # This may happen for a extern reference in the RTL file, if the reference was later removed 307 # by one of the optimization passes, and the external definition got garbage-collected. 308 # TODO: consider adding some sanity check that we are here not because of some bug in 309 # find_symbol_by_name?.. 310 continue 311 sym_to = maybe_sym_to 312 313 sym_from.refers_to.append(sym_to) 314 sym_to.referred_from.append(sym_from) 315 refs.append(Reference(sym_from, sym_to)) 316 if sym_to not in symbols: 317 symbols.append(sym_to) 318 319 return symbols, refs 320 321 322def get_symbols_and_refs(rtl_list, elf_file): # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]] 323 elfinfo = ElfInfo(elf_file) 324 325 rtl_functions = [] # type: List[RtlFunction] 326 for file_name in rtl_list: 327 load_rtl_file(file_name, file_name, rtl_functions) 328 329 return match_rtl_funcs_to_symbols(rtl_functions, elfinfo) 330 331 332def list_refs_from_to_sections(refs, from_sections, to_sections): # type: (List[Reference], List[str], List[str]) -> int 333 found = 0 334 for ref in refs: 335 if (not from_sections or ref.from_sym.section in from_sections) and \ 336 (not to_sections or ref.to_sym.section in to_sections): 337 print(str(ref)) 338 found += 1 339 return found 340 341 342def find_files_recursive(root_path, ext): # type: (str, str) -> Generator[str, None, None] 343 for root, _, files in os.walk(root_path): 344 for basename in files: 345 if basename.endswith(ext): 346 filename = os.path.join(root, basename) 347 yield filename 348 349 350def main(): 351 parser = argparse.ArgumentParser() 352 353 parser.add_argument( 354 '--rtl-list', 355 help='File with the list of RTL files', 356 type=argparse.FileType('r'), 357 ) 358 parser.add_argument( 359 '--rtl-dir', help='Directory where to look for RTL files, recursively' 360 ) 361 parser.add_argument( 362 '--elf-file', 363 required=True, 364 help='Program ELF file', 365 type=argparse.FileType('rb'), 366 ) 367 action_sub = parser.add_subparsers(dest='action') 368 find_refs_parser = action_sub.add_parser( 369 'find-refs', 370 help='List the references coming from a given list of source sections' 371 'to a given list of target sections.', 372 ) 373 find_refs_parser.add_argument( 374 '--from-sections', help='comma-separated list of source sections' 375 ) 376 find_refs_parser.add_argument( 377 '--to-sections', help='comma-separated list of target sections' 378 ) 379 find_refs_parser.add_argument( 380 '--exit-code', 381 action='store_true', 382 help='If set, exits with non-zero code when any references found', 383 ) 384 action_sub.add_parser( 385 'all-refs', 386 help='Print the list of all references', 387 ) 388 389 parser.parse_args() 390 args = parser.parse_args() 391 if args.rtl_list: 392 with open(args.rtl_list, 'r') as rtl_list_file: 393 rtl_list = [line.strip for line in rtl_list_file] 394 else: 395 if not args.rtl_dir: 396 raise RuntimeError('Either --rtl-list or --rtl-dir must be specified') 397 rtl_list = list(find_files_recursive(args.rtl_dir, '.expand')) 398 399 if not rtl_list: 400 raise RuntimeError('No RTL files specified') 401 402 _, refs = get_symbols_and_refs(rtl_list, args.elf_file) 403 404 if args.action == 'find-refs': 405 from_sections = args.from_sections.split(',') if args.from_sections else [] 406 to_sections = args.to_sections.split(',') if args.to_sections else [] 407 found = list_refs_from_to_sections( 408 refs, from_sections, to_sections 409 ) 410 if args.exit_code and found: 411 raise SystemExit(1) 412 elif args.action == 'all-refs': 413 for r in refs: 414 print(str(r)) 415 416 417if __name__ == '__main__': 418 main() 419