1#!/usr/bin/env python3 2# 3# Copyright (c) 2020 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Database Generator 9 10This takes the built Zephyr ELF binary and produces a JSON database 11file for dictionary-based logging. This database is used together 12with the parser to decode binary log messages. 13""" 14 15import argparse 16import logging 17import os 18import re 19import string 20import struct 21import sys 22 23import dictionary_parser.log_database 24import elftools 25from dictionary_parser.log_database import LogDatabase 26from dictionary_parser.utils import extract_one_string_in_section, find_string_in_mappings 27from elftools.dwarf.descriptions import describe_DWARF_expr 28from elftools.dwarf.locationlists import LocationExpr, LocationParser 29from elftools.elf.constants import SH_FLAGS 30from elftools.elf.descriptions import describe_ei_data 31from elftools.elf.elffile import ELFFile 32from elftools.elf.sections import SymbolTableSection 33 34LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s" 35logger = logging.getLogger(os.path.basename(sys.argv[0])) 36 37 38# Sections that contains static strings 39STATIC_STRING_SECTIONS = [ 40 'rodata', 41 '.rodata', 42 'pinned.rodata', 43] 44 45# Sections that contains static strings but are not part of the binary (allocable). 46REMOVED_STRING_SECTIONS = ['log_strings'] 47 48 49# Regulation expression to match DWARF location 50DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)") 51 52 53# Format string for pointers (default for 32-bit pointers) 54PTR_FMT = '0x%08x' 55 56 57# Potential string encodings. Add as needed. 58STR_ENCODINGS = [ 59 'ascii', 60 'iso-8859-1', 61] 62 63 64# List of acceptable escape character 65ACCEPTABLE_ESCAPE_CHARS = [ 66 b'\r', 67 b'\n', 68] 69 70 71def parse_args(): 72 """Parse command line arguments""" 73 argparser = argparse.ArgumentParser(allow_abbrev=False) 74 75 argparser.add_argument("elffile", help="Zephyr ELF binary") 76 argparser.add_argument("--build", help="Build ID") 77 argparser.add_argument("--build-header", help="Header file containing BUILD_VERSION define") 78 argparser.add_argument("--debug", action="store_true", help="Print extra debugging information") 79 argparser.add_argument("-v", "--verbose", action="store_true", help="Print more information") 80 81 outfile_grp = argparser.add_mutually_exclusive_group(required=True) 82 outfile_grp.add_argument("--json", help="Output Dictionary Logging Database file in JSON") 83 outfile_grp.add_argument("--syst", help="Output MIPI Sys-T Collateral XML file") 84 85 return argparser.parse_args() 86 87 88def extract_elf_code_data_sections(elf, wildcards=None): 89 """Find all sections in ELF file""" 90 sections = {} 91 92 for sect in elf.iter_sections(): 93 # Only Allocated sections with PROGBITS are included 94 # since they actually have code/data. 95 # 96 # On contrary, BSS is allocated but NOBITS. 97 if ((wildcards is not None) and (sect.name in wildcards)) or ( 98 (sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC 99 and sect['sh_type'] == 'SHT_PROGBITS' 100 ): 101 sections[sect.name] = { 102 'name': sect.name, 103 'size': sect['sh_size'], 104 'start': sect['sh_addr'], 105 'end': sect['sh_addr'] + sect['sh_size'] - 1, 106 'data': sect.data(), 107 } 108 109 return sections 110 111 112def find_elf_sections(elf, sh_name): 113 """Find all sections in ELF file""" 114 for section in elf.iter_sections(): 115 if section.name == sh_name: 116 ret = { 117 'name': section.name, 118 'size': section['sh_size'], 119 'start': section['sh_addr'], 120 'end': section['sh_addr'] + section['sh_size'] - 1, 121 'data': section.data(), 122 } 123 124 return ret 125 126 return None 127 128 129def get_kconfig_symbols(elf): 130 """Get kconfig symbols from the ELF file""" 131 for section in elf.iter_sections(): 132 if isinstance(section, SymbolTableSection) and section['sh_type'] != 'SHT_DYNSYM': 133 return { 134 sym.name: sym.entry.st_value 135 for sym in section.iter_symbols() 136 if sym.name.startswith("CONFIG_") 137 } 138 139 raise LookupError("Could not find symbol table") 140 141 142def find_log_const_symbols(elf): 143 """Extract all "log_const_*" symbols from ELF file""" 144 symbol_tables = [ 145 s for s in elf.iter_sections() if isinstance(s, elftools.elf.sections.SymbolTableSection) 146 ] 147 148 ret_list = [] 149 150 for section in symbol_tables: 151 if not isinstance(section, elftools.elf.sections.SymbolTableSection): 152 continue 153 154 if section['sh_entsize'] == 0: 155 continue 156 157 for symbol in section.iter_symbols(): 158 if symbol.name.startswith("log_const_") or symbol.name.startswith("_log_const_"): 159 ret_list.append(symbol) 160 161 return ret_list 162 163 164def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings): 165 """Find the log instances and map source IDs to names""" 166 if database.is_tgt_little_endian(): 167 formatter = "<" 168 else: 169 formatter = ">" 170 171 if database.is_tgt_64bit(): 172 # 64-bit pointer to string 173 formatter += "Q" 174 else: 175 # 32-bit pointer to string 176 formatter += "L" 177 178 # log instance level 179 formatter += "B" 180 181 datum_size = struct.calcsize(formatter) 182 183 # Get the address of first log instance 184 first_offset = log_const_symbols[0].entry['st_value'] 185 for sym in log_const_symbols: 186 if sym.entry['st_value'] < first_offset: 187 first_offset = sym.entry['st_value'] 188 189 first_offset -= log_const_area['start'] 190 191 # find all log_const_* 192 for sym in log_const_symbols: 193 # Find data offset in log_const_area for this symbol 194 offset = sym.entry['st_value'] - log_const_area['start'] 195 196 idx_s = offset 197 idx_e = offset + datum_size 198 199 datum = log_const_area['data'][idx_s:idx_e] 200 201 if len(datum) != datum_size: 202 # Not enough data to unpack 203 continue 204 205 if sym.entry['st_size'] == 0: 206 # Empty entry 207 continue 208 209 str_ptr, level = struct.unpack(formatter, datum) 210 211 # Offset to rodata section for string 212 instance_name = find_string_in_mappings(string_mappings, str_ptr) 213 if instance_name is None: 214 instance_name = "unknown" 215 216 logger.info("Found Log Instance: %s, level: %d", instance_name, level) 217 218 # source ID is simply the element index in the log instance array 219 source_id = int((offset - first_offset) / sym.entry['st_size']) 220 221 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value']) 222 223 224def extract_elf_information(elf, database): 225 """Extract information from ELF file and store in database""" 226 e_ident = elf.header['e_ident'] 227 elf_data = describe_ei_data(e_ident['EI_DATA']) 228 229 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']: 230 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN) 231 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']: 232 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN) 233 else: 234 logger.error("Cannot determine endianness from ELF file, exiting...") 235 sys.exit(1) 236 237 238def process_kconfigs(elf, database): 239 """Process kconfigs to extract information""" 240 kconfigs = get_kconfig_symbols(elf) 241 242 # 32 or 64-bit target 243 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32) 244 245 # Architecture 246 for name, arch in dictionary_parser.log_database.ARCHS.items(): 247 if arch['kconfig'] in kconfigs: 248 database.set_arch(name) 249 break 250 else: 251 logger.error("Did not found architecture") 252 sys.exit(1) 253 254 # Put some kconfigs into the database 255 # 256 # Use 32-bit timestamp? or 64-bit? 257 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs: 258 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", kconfigs['CONFIG_LOG_TIMESTAMP_64BIT']) 259 260 261def extract_logging_subsys_information(elf, database, string_mappings): 262 """ 263 Extract logging subsys related information and store in database. 264 265 For example, this extracts the list of log instances to establish 266 mapping from source ID to name. 267 """ 268 # Extract log constant section for module names 269 section_log_const = find_elf_sections(elf, "log_const_area") 270 if section_log_const is None: 271 # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas 272 section_log_const = find_elf_sections(elf, "log_static_section") 273 274 if section_log_const is None: 275 logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...") 276 sys.exit(1) 277 278 # Find all "log_const_*" symbols and parse them 279 log_const_symbols = find_log_const_symbols(elf) 280 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings) 281 282 283def is_die_attr_ref(attr): 284 """ 285 Returns True if the DIE attribute is a reference. 286 """ 287 return bool( 288 attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref') 289 ) 290 291 292def find_die_var_base_type(compile_unit, die, is_const): 293 """ 294 Finds the base type of a DIE and returns the name. 295 If DW_AT_type is a reference, it will recursively go through 296 the references to find the base type. Returns None is no 297 base type is found. 298 """ 299 # DIE is of base type. So extract the name. 300 if die.tag == 'DW_TAG_base_type': 301 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const 302 303 # Not a type, cannot continue 304 if 'DW_AT_type' not in die.attributes: 305 return None, None 306 307 if die.tag == 'DW_TAG_const_type': 308 is_const = True 309 310 # DIE is probably a reference to another. 311 # If so, check if the reference is a base type. 312 type_attr = die.attributes['DW_AT_type'] 313 if is_die_attr_ref(type_attr): 314 ref_addr = compile_unit.cu_offset + type_attr.raw_value 315 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr) 316 317 return find_die_var_base_type(compile_unit, ref_die, is_const) 318 319 # Not a base type, and not reference 320 return None, None 321 322 323def is_die_var_const_char(compile_unit, die): 324 """ 325 Returns True if DIE of type variable is const char. 326 """ 327 var_type, is_const = find_die_var_base_type(compile_unit, die, False) 328 329 return bool(var_type is not None and var_type.endswith('char') and is_const) 330 331 332def extract_string_variables(elf): 333 """ 334 Find all string variables (char) in all Compilation Units and 335 Debug information Entry (DIE) in ELF file. 336 """ 337 dwarf_info = elf.get_dwarf_info() 338 loc_lists = dwarf_info.location_lists() 339 loc_parser = LocationParser(loc_lists) 340 341 strings = [] 342 343 # Loop through all Compilation Units and 344 # Debug information Entry (DIE) to extract all string variables 345 for compile_unit in dwarf_info.iter_CUs(): 346 for die in compile_unit.iter_DIEs(): 347 # Only care about variables with location information 348 # and of type "char" 349 if die.tag == 'DW_TAG_variable' and ( 350 'DW_AT_type' in die.attributes 351 and 'DW_AT_location' in die.attributes 352 and is_die_var_const_char(compile_unit, die) 353 ): 354 # Extract location information, which is 355 # its address in memory. 356 loc_attr = die.attributes['DW_AT_location'] 357 if loc_parser.attribute_has_location(loc_attr, die.cu['version']): 358 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'], die) 359 if isinstance(loc, LocationExpr): 360 try: 361 addr = describe_DWARF_expr(loc.loc_expr, dwarf_info.structs) 362 363 matcher = DT_LOCATION_REGEX.match(addr) 364 if matcher: 365 addr = int(matcher.group(1), 16) 366 if addr > 0: 367 strings.append( 368 { 369 'name': die.attributes['DW_AT_name'].value, 370 'addr': addr, 371 'die': die, 372 } 373 ) 374 except KeyError: 375 pass 376 377 return strings 378 379 380def try_decode_string(str_maybe): 381 """Check if it is a printable string""" 382 for encoding in STR_ENCODINGS: 383 try: 384 return str_maybe.decode(encoding) 385 except UnicodeDecodeError: 386 pass 387 388 return None 389 390 391def is_printable(b): 392 # Check if string is printable according to Python 393 # since the parser (written in Python) will need to 394 # print the string. 395 # 396 # Note that '\r' and '\n' are not included in 397 # string.printable so they need to be checked separately. 398 return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS) 399 400 401def extract_strings_in_one_section(section, str_mappings): 402 """Extract NULL-terminated strings in one ELF section""" 403 data = section['data'] 404 start = None 405 for idx, x in enumerate(data): 406 if is_printable(chr(x)): 407 # Printable character, potential part of string 408 if start is None: 409 # Beginning of potential string 410 start = idx 411 elif x == 0: 412 # End of possible string 413 if start is not None: 414 # Found potential string 415 str_maybe = data[start:idx] 416 decoded_str = try_decode_string(str_maybe) 417 418 if decoded_str is not None: 419 addr = section['start'] + start 420 421 if addr not in str_mappings: 422 str_mappings[addr] = decoded_str 423 424 # Decoded string may contain un-printable characters 425 # (e.g. extended ASC-II characters) or control 426 # characters (e.g. '\r' or '\n'), so simply print 427 # the byte string instead. 428 logger.debug( 429 'Found string via extraction at ' + PTR_FMT + ': %s', addr, str_maybe 430 ) 431 432 # GCC-based toolchain will reuse the NULL character 433 # for empty strings. There is no way to know which 434 # one is being reused, so just treat all NULL character 435 # at the end of legitimate strings as empty strings. 436 null_addr = section['start'] + idx 437 str_mappings[null_addr] = '' 438 439 logger.debug('Found null string via extraction at ' + PTR_FMT, null_addr) 440 start = None 441 else: 442 # Non-printable byte, remove start location 443 start = None 444 445 return str_mappings 446 447 448def extract_static_strings(elf, database, section_extraction=False): 449 """ 450 Extract static strings from ELF file using DWARF, 451 and also extraction from binary data. 452 """ 453 string_mappings = {} 454 455 elf_sections = extract_elf_code_data_sections(elf, REMOVED_STRING_SECTIONS) 456 457 # Extract strings using ELF DWARF information 458 str_vars = extract_string_variables(elf) 459 for str_var in str_vars: 460 for _, sect in elf_sections.items(): 461 one_str = extract_one_string_in_section(sect, str_var['addr']) 462 if one_str is not None: 463 string_mappings[str_var['addr']] = one_str 464 logger.debug( 465 'Found string variable at ' + PTR_FMT + ': %s', str_var['addr'], one_str 466 ) 467 break 468 469 if section_extraction: 470 # Extract strings from ELF sections 471 string_sections = STATIC_STRING_SECTIONS 472 rawstr_map = {} 473 474 # Some architectures may put static strings into additional sections. 475 # So need to extract them too. 476 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()] 477 if "extra_string_section" in arch_data: 478 string_sections.extend(arch_data['extra_string_section']) 479 480 for sect_name in string_sections: 481 if sect_name in elf_sections: 482 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], rawstr_map) 483 484 for one_str in rawstr_map: 485 if one_str not in string_mappings: 486 string_mappings[one_str] = rawstr_map[one_str] 487 488 return string_mappings 489 490 491def main(): 492 """Main function of database generator""" 493 args = parse_args() 494 495 # Setup logging 496 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING) 497 if args.debug: 498 logger.setLevel(logging.DEBUG) 499 elif args.verbose: 500 logger.setLevel(logging.INFO) 501 502 elffile = open(args.elffile, "rb") # noqa: SIM115 503 if not elffile: 504 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile) 505 sys.exit(1) 506 507 logger.info("ELF file %s", args.elffile) 508 509 if args.json: 510 logger.info("JSON Database file %s", args.json) 511 section_extraction = True 512 513 if args.syst: 514 logger.info("MIPI Sys-T Collateral file %s", args.syst) 515 section_extraction = False 516 517 elf = ELFFile(elffile) 518 519 database = LogDatabase() 520 521 if args.build_header: 522 with open(args.build_header) as f: 523 for line in f: 524 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', line) 525 if match: 526 database.set_build_id(match.group(1)) 527 break 528 529 if args.build: 530 database.set_build_id(args.build) 531 logger.info("Build ID: %s", args.build) 532 533 extract_elf_information(elf, database) 534 535 process_kconfigs(elf, database) 536 537 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) 538 if database.is_tgt_little_endian(): 539 logger.info("Endianness: Little") 540 else: 541 logger.info("Endianness: Big") 542 543 if database.is_tgt_64bit(): 544 global PTR_FMT 545 PTR_FMT = '0x%016x' 546 547 # Extract strings from ELF files 548 string_mappings = extract_static_strings(elf, database, section_extraction) 549 if len(string_mappings) > 0: 550 database.set_string_mappings(string_mappings) 551 logger.info("Found %d strings", len(string_mappings)) 552 553 # Extract information related to logging subsystem 554 if not section_extraction: 555 # The logging subsys information (e.g. log module names) 556 # may require additional strings outside of those extracted 557 # via ELF DWARF variables. So generate a new string mappings 558 # with strings in various ELF sections. 559 string_mappings = extract_static_strings(elf, database, section_extraction=True) 560 561 extract_logging_subsys_information(elf, database, string_mappings) 562 563 # Write database file 564 if args.json and not LogDatabase.write_json_database(args.json, database): 565 logger.error("ERROR: Cannot open database file for write: %s, exiting...", args.json) 566 sys.exit(1) 567 568 if args.syst and not LogDatabase.write_syst_database(args.syst, database): 569 logger.error("ERROR: Cannot open database file for write: %s, exiting...", args.syst) 570 sys.exit(1) 571 572 elffile.close() 573 574 575if __name__ == "__main__": 576 main() 577