1#!/usr/bin/env python3 2# 3# Copyright (c) 2020 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Database Generator 9 10This takes the built Zephyr ELF binary and produces a JSON database 11file for dictionary-based logging. This database is used together 12with the parser to decode binary log messages. 13""" 14 15import argparse 16import logging 17import os 18import re 19import string 20import struct 21import sys 22 23import dictionary_parser.log_database 24from dictionary_parser.log_database import LogDatabase 25from dictionary_parser.utils import extract_one_string_in_section 26from dictionary_parser.utils import find_string_in_mappings 27 28import elftools 29from elftools.elf.constants import SH_FLAGS 30from elftools.elf.elffile import ELFFile 31from elftools.elf.descriptions import describe_ei_data 32from elftools.elf.sections import SymbolTableSection 33from elftools.dwarf.descriptions import ( 34 describe_DWARF_expr 35) 36from elftools.dwarf.locationlists import ( 37 LocationExpr, LocationParser 38) 39 40 41LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s" 42logger = logging.getLogger(os.path.basename(sys.argv[0])) 43 44 45# Sections that contains static strings 46STATIC_STRING_SECTIONS = [ 47 'rodata', 48 '.rodata', 49 'pinned.rodata', 50] 51 52# Sections that contains static strings but are not part of the binary (allocable). 53REMOVED_STRING_SECTIONS = [ 54 'log_strings' 55] 56 57 58# Regulation expression to match DWARF location 59DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)") 60 61 62# Format string for pointers (default for 32-bit pointers) 63PTR_FMT = '0x%08x' 64 65 66# Potential string encodings. Add as needed. 67STR_ENCODINGS = [ 68 'ascii', 69 'iso-8859-1', 70] 71 72 73# List of acceptable escape character 74ACCEPTABLE_ESCAPE_CHARS = [ 75 b'\r', 76 b'\n', 77] 78 79 80def parse_args(): 81 """Parse command line arguments""" 82 argparser = argparse.ArgumentParser(allow_abbrev=False) 83 84 argparser.add_argument("elffile", help="Zephyr ELF binary") 85 argparser.add_argument("--build", help="Build ID") 86 argparser.add_argument("--build-header", 87 help="Header file containing BUILD_VERSION define") 88 argparser.add_argument("--debug", action="store_true", 89 help="Print extra debugging information") 90 argparser.add_argument("-v", "--verbose", action="store_true", 91 help="Print more information") 92 93 outfile_grp = argparser.add_mutually_exclusive_group(required=True) 94 outfile_grp.add_argument("--json", 95 help="Output Dictionary Logging Database file in JSON") 96 outfile_grp.add_argument("--syst", 97 help="Output MIPI Sys-T Collateral XML file") 98 99 return argparser.parse_args() 100 101 102def extract_elf_code_data_sections(elf, wildcards = None): 103 """Find all sections in ELF file""" 104 sections = {} 105 106 for sect in elf.iter_sections(): 107 # Only Allocated sections with PROGBITS are included 108 # since they actually have code/data. 109 # 110 # On contrary, BSS is allocated but NOBITS. 111 if (((wildcards is not None) and (sect.name in wildcards)) or 112 ((sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC 113 and sect['sh_type'] == 'SHT_PROGBITS') 114 ): 115 sections[sect.name] = { 116 'name' : sect.name, 117 'size' : sect['sh_size'], 118 'start' : sect['sh_addr'], 119 'end' : sect['sh_addr'] + sect['sh_size'] - 1, 120 'data' : sect.data(), 121 } 122 123 return sections 124 125 126def find_elf_sections(elf, sh_name): 127 """Find all sections in ELF file""" 128 for section in elf.iter_sections(): 129 if section.name == sh_name: 130 ret = { 131 'name' : section.name, 132 'size' : section['sh_size'], 133 'start' : section['sh_addr'], 134 'end' : section['sh_addr'] + section['sh_size'] - 1, 135 'data' : section.data(), 136 } 137 138 return ret 139 140 return None 141 142 143def get_kconfig_symbols(elf): 144 """Get kconfig symbols from the ELF file""" 145 for section in elf.iter_sections(): 146 if isinstance(section, SymbolTableSection) and section['sh_type'] != 'SHT_DYNSYM': 147 return {sym.name: sym.entry.st_value 148 for sym in section.iter_symbols() 149 if sym.name.startswith("CONFIG_")} 150 151 raise LookupError("Could not find symbol table") 152 153 154def find_log_const_symbols(elf): 155 """Extract all "log_const_*" symbols from ELF file""" 156 symbol_tables = [s for s in elf.iter_sections() 157 if isinstance(s, elftools.elf.sections.SymbolTableSection)] 158 159 ret_list = [] 160 161 for section in symbol_tables: 162 if not isinstance(section, elftools.elf.sections.SymbolTableSection): 163 continue 164 165 if section['sh_entsize'] == 0: 166 continue 167 168 for symbol in section.iter_symbols(): 169 if symbol.name.startswith("log_const_"): 170 ret_list.append(symbol) 171 172 return ret_list 173 174 175def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings): 176 """Find the log instances and map source IDs to names""" 177 if database.is_tgt_little_endian(): 178 formatter = "<" 179 else: 180 formatter = ">" 181 182 if database.is_tgt_64bit(): 183 # 64-bit pointer to string 184 formatter += "Q" 185 else: 186 # 32-bit pointer to string 187 formatter += "L" 188 189 # log instance level 190 formatter += "B" 191 192 datum_size = struct.calcsize(formatter) 193 194 # Get the address of first log instance 195 first_offset = log_const_symbols[0].entry['st_value'] 196 for sym in log_const_symbols: 197 if sym.entry['st_value'] < first_offset: 198 first_offset = sym.entry['st_value'] 199 200 first_offset -= log_const_area['start'] 201 202 # find all log_const_* 203 for sym in log_const_symbols: 204 # Find data offset in log_const_area for this symbol 205 offset = sym.entry['st_value'] - log_const_area['start'] 206 207 idx_s = offset 208 idx_e = offset + datum_size 209 210 datum = log_const_area['data'][idx_s:idx_e] 211 212 if len(datum) != datum_size: 213 # Not enough data to unpack 214 continue 215 216 str_ptr, level = struct.unpack(formatter, datum) 217 218 # Offset to rodata section for string 219 instance_name = find_string_in_mappings(string_mappings, str_ptr) 220 if instance_name is None: 221 instance_name = "unknown" 222 223 logger.info("Found Log Instance: %s, level: %d", instance_name, level) 224 225 # source ID is simply the element index in the log instance array 226 source_id = int((offset - first_offset) / sym.entry['st_size']) 227 228 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value']) 229 230 231def extract_elf_information(elf, database): 232 """Extract information from ELF file and store in database""" 233 e_ident = elf.header['e_ident'] 234 elf_data = describe_ei_data(e_ident['EI_DATA']) 235 236 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']: 237 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN) 238 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']: 239 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN) 240 else: 241 logger.error("Cannot determine endianness from ELF file, exiting...") 242 sys.exit(1) 243 244 245def process_kconfigs(elf, database): 246 """Process kconfigs to extract information""" 247 kconfigs = get_kconfig_symbols(elf) 248 249 # 32 or 64-bit target 250 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32) 251 252 # Architecture 253 for name, arch in dictionary_parser.log_database.ARCHS.items(): 254 if arch['kconfig'] in kconfigs: 255 database.set_arch(name) 256 break 257 else: 258 logger.error("Did not found architecture") 259 sys.exit(1) 260 261 # Put some kconfigs into the database 262 # 263 # Use 32-bit timestamp? or 64-bit? 264 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs: 265 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", 266 kconfigs['CONFIG_LOG_TIMESTAMP_64BIT']) 267 268 269def extract_logging_subsys_information(elf, database, string_mappings): 270 """ 271 Extract logging subsys related information and store in database. 272 273 For example, this extracts the list of log instances to establish 274 mapping from source ID to name. 275 """ 276 # Extract log constant section for module names 277 section_log_const = find_elf_sections(elf, "log_const_area") 278 if section_log_const is None: 279 # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas 280 section_log_const = find_elf_sections(elf, "log_static_section") 281 282 if section_log_const is None: 283 logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...") 284 sys.exit(1) 285 286 # Find all "log_const_*" symbols and parse them 287 log_const_symbols = find_log_const_symbols(elf) 288 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings) 289 290 291def is_die_attr_ref(attr): 292 """ 293 Returns True if the DIE attribute is a reference. 294 """ 295 return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 296 'DW_FORM_ref4', 'DW_FORM_ref8', 297 'DW_FORM_ref')) 298 299 300def find_die_var_base_type(compile_unit, die, is_const): 301 """ 302 Finds the base type of a DIE and returns the name. 303 If DW_AT_type is a reference, it will recursively go through 304 the references to find the base type. Returns None is no 305 base type is found. 306 """ 307 # DIE is of base type. So extract the name. 308 if die.tag == 'DW_TAG_base_type': 309 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const 310 311 # Not a type, cannot continue 312 if not 'DW_AT_type' in die.attributes: 313 return None, None 314 315 if die.tag == 'DW_TAG_const_type': 316 is_const = True 317 318 # DIE is probably a reference to another. 319 # If so, check if the reference is a base type. 320 type_attr = die.attributes['DW_AT_type'] 321 if is_die_attr_ref(type_attr): 322 ref_addr = compile_unit.cu_offset + type_attr.raw_value 323 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr) 324 325 return find_die_var_base_type(compile_unit, ref_die, is_const) 326 327 # Not a base type, and not reference 328 return None, None 329 330 331def is_die_var_const_char(compile_unit, die): 332 """ 333 Returns True if DIE of type variable is const char. 334 """ 335 var_type, is_const = find_die_var_base_type(compile_unit, die, False) 336 337 if var_type is not None and var_type.endswith('char') and is_const: 338 return True 339 340 return False 341 342 343def extract_string_variables(elf): 344 """ 345 Find all string variables (char) in all Compilation Units and 346 Debug information Entry (DIE) in ELF file. 347 """ 348 dwarf_info = elf.get_dwarf_info() 349 loc_lists = dwarf_info.location_lists() 350 loc_parser = LocationParser(loc_lists) 351 352 strings = [] 353 354 # Loop through all Compilation Units and 355 # Debug information Entry (DIE) to extract all string variables 356 for compile_unit in dwarf_info.iter_CUs(): 357 for die in compile_unit.iter_DIEs(): 358 # Only care about variables with location information 359 # and of type "char" 360 if die.tag == 'DW_TAG_variable': 361 if ('DW_AT_type' in die.attributes 362 and 'DW_AT_location' in die.attributes 363 and is_die_var_const_char(compile_unit, die) 364 ): 365 # Extract location information, which is 366 # its address in memory. 367 loc_attr = die.attributes['DW_AT_location'] 368 if loc_parser.attribute_has_location(loc_attr, die.cu['version']): 369 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version']) 370 if isinstance(loc, LocationExpr): 371 try: 372 addr = describe_DWARF_expr(loc.loc_expr, 373 dwarf_info.structs) 374 375 matcher = DT_LOCATION_REGEX.match(addr) 376 if matcher: 377 addr = int(matcher.group(1), 16) 378 if addr > 0: 379 strings.append({ 380 'name': die.attributes['DW_AT_name'].value, 381 'addr': addr, 382 'die': die 383 }) 384 except KeyError: 385 pass 386 387 return strings 388 389def try_decode_string(str_maybe): 390 """Check if it is a printable string""" 391 for encoding in STR_ENCODINGS: 392 try: 393 return str_maybe.decode(encoding) 394 except UnicodeDecodeError: 395 pass 396 397 return None 398 399def is_printable(b): 400 # Check if string is printable according to Python 401 # since the parser (written in Python) will need to 402 # print the string. 403 # 404 # Note that '\r' and '\n' are not included in 405 # string.printable so they need to be checked separately. 406 return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS) 407 408def extract_strings_in_one_section(section, str_mappings): 409 """Extract NULL-terminated strings in one ELF section""" 410 data = section['data'] 411 idx = 0 412 start = None 413 for x in data: 414 if is_printable(chr(x)): 415 # Printable character, potential part of string 416 if start is None: 417 # Beginning of potential string 418 start = idx 419 elif x == 0: 420 # End of possible string 421 if start is not None: 422 # Found potential string 423 str_maybe = data[start : idx] 424 decoded_str = try_decode_string(str_maybe) 425 426 if decoded_str is not None: 427 addr = section['start'] + start 428 429 if addr not in str_mappings: 430 str_mappings[addr] = decoded_str 431 432 # Decoded string may contain un-printable characters 433 # (e.g. extended ASC-II characters) or control 434 # characters (e.g. '\r' or '\n'), so simply print 435 # the byte string instead. 436 logger.debug('Found string via extraction at ' + PTR_FMT + ': %s', 437 addr, str_maybe) 438 439 # GCC-based toolchain will reuse the NULL character 440 # for empty strings. There is no way to know which 441 # one is being reused, so just treat all NULL character 442 # at the end of legitimate strings as empty strings. 443 null_addr = section['start'] + idx 444 str_mappings[null_addr] = '' 445 446 logger.debug('Found null string via extraction at ' + PTR_FMT, 447 null_addr) 448 start = None 449 else: 450 # Non-printable byte, remove start location 451 start = None 452 idx += 1 453 454 return str_mappings 455 456 457def extract_static_strings(elf, database, section_extraction=False): 458 """ 459 Extract static strings from ELF file using DWARF, 460 and also extraction from binary data. 461 """ 462 string_mappings = {} 463 464 elf_sections = extract_elf_code_data_sections(elf, REMOVED_STRING_SECTIONS) 465 466 # Extract strings using ELF DWARF information 467 str_vars = extract_string_variables(elf) 468 for str_var in str_vars: 469 for _, sect in elf_sections.items(): 470 one_str = extract_one_string_in_section(sect, str_var['addr']) 471 if one_str is not None: 472 string_mappings[str_var['addr']] = one_str 473 logger.debug('Found string variable at ' + PTR_FMT + ': %s', 474 str_var['addr'], one_str) 475 break 476 477 if section_extraction: 478 # Extract strings from ELF sections 479 string_sections = STATIC_STRING_SECTIONS 480 rawstr_map = {} 481 482 # Some architectures may put static strings into additional sections. 483 # So need to extract them too. 484 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()] 485 if "extra_string_section" in arch_data: 486 string_sections.extend(arch_data['extra_string_section']) 487 488 for sect_name in string_sections: 489 if sect_name in elf_sections: 490 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], 491 rawstr_map) 492 493 for one_str in rawstr_map: 494 if one_str not in string_mappings: 495 string_mappings[one_str] = rawstr_map[one_str] 496 497 return string_mappings 498 499 500def main(): 501 """Main function of database generator""" 502 args = parse_args() 503 504 # Setup logging 505 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING) 506 if args.debug: 507 logger.setLevel(logging.DEBUG) 508 elif args.verbose: 509 logger.setLevel(logging.INFO) 510 511 elffile = open(args.elffile, "rb") 512 if not elffile: 513 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile) 514 sys.exit(1) 515 516 logger.info("ELF file %s", args.elffile) 517 518 if args.json: 519 logger.info("JSON Database file %s", args.json) 520 section_extraction = True 521 522 if args.syst: 523 logger.info("MIPI Sys-T Collateral file %s", args.syst) 524 section_extraction = False 525 526 elf = ELFFile(elffile) 527 528 database = LogDatabase() 529 530 if args.build_header: 531 with open(args.build_header) as f: 532 for l in f: 533 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', l) 534 if match: 535 database.set_build_id(match.group(1)) 536 break 537 538 if args.build: 539 database.set_build_id(args.build) 540 logger.info("Build ID: %s", args.build) 541 542 extract_elf_information(elf, database) 543 544 process_kconfigs(elf, database) 545 546 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) 547 if database.is_tgt_little_endian(): 548 logger.info("Endianness: Little") 549 else: 550 logger.info("Endianness: Big") 551 552 if database.is_tgt_64bit(): 553 global PTR_FMT 554 PTR_FMT = '0x%016x' 555 556 # Extract strings from ELF files 557 string_mappings = extract_static_strings(elf, database, section_extraction) 558 if len(string_mappings) > 0: 559 database.set_string_mappings(string_mappings) 560 logger.info("Found %d strings", len(string_mappings)) 561 562 # Extract information related to logging subsystem 563 if not section_extraction: 564 # The logging subsys information (e.g. log module names) 565 # may require additional strings outside of those extracted 566 # via ELF DWARF variables. So generate a new string mappings 567 # with strings in various ELF sections. 568 string_mappings = extract_static_strings(elf, database, section_extraction=True) 569 570 extract_logging_subsys_information(elf, database, string_mappings) 571 572 # Write database file 573 if args.json: 574 if not LogDatabase.write_json_database(args.json, database): 575 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 576 args.json) 577 sys.exit(1) 578 579 if args.syst: 580 if not LogDatabase.write_syst_database(args.syst, database): 581 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 582 args.syst) 583 sys.exit(1) 584 585 elffile.close() 586 587 588if __name__ == "__main__": 589 main() 590