1#!/usr/bin/env python3 2# 3# Copyright (c) 2020 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Database Generator 9 10This takes the built Zephyr ELF binary and produces a JSON database 11file for dictionary-based logging. This database is used together 12with the parser to decode binary log messages. 13""" 14 15import argparse 16import logging 17import os 18import re 19import string 20import struct 21import sys 22 23import dictionary_parser.log_database 24from dictionary_parser.log_database import LogDatabase 25from dictionary_parser.utils import extract_one_string_in_section 26from dictionary_parser.utils import find_string_in_mappings 27 28import elftools 29from elftools.elf.constants import SH_FLAGS 30from elftools.elf.elffile import ELFFile 31from elftools.elf.descriptions import describe_ei_data 32from elftools.elf.sections import SymbolTableSection 33from elftools.dwarf.descriptions import ( 34 describe_DWARF_expr 35) 36from elftools.dwarf.locationlists import ( 37 LocationExpr, LocationParser 38) 39 40 41LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s" 42logger = logging.getLogger(os.path.basename(sys.argv[0])) 43 44 45# Sections that contains static strings 46STATIC_STRING_SECTIONS = [ 47 'rodata', 48 '.rodata', 49 'pinned.rodata', 50] 51 52 53# Regulation expression to match DWARF location 54DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)") 55 56 57# Format string for pointers (default for 32-bit pointers) 58PTR_FMT = '0x%08x' 59 60 61# Potential string encodings. Add as needed. 62STR_ENCODINGS = [ 63 'ascii', 64 'iso-8859-1', 65] 66 67 68# List of acceptable escape character 69ACCEPTABLE_ESCAPE_CHARS = [ 70 b'\r', 71 b'\n', 72] 73 74 75def parse_args(): 76 """Parse command line arguments""" 77 argparser = argparse.ArgumentParser(allow_abbrev=False) 78 79 argparser.add_argument("elffile", help="Zephyr ELF binary") 80 argparser.add_argument("--build", help="Build ID") 81 argparser.add_argument("--build-header", 82 help="Header file containing BUILD_VERSION define") 83 argparser.add_argument("--debug", action="store_true", 84 help="Print extra debugging information") 85 argparser.add_argument("-v", "--verbose", action="store_true", 86 help="Print more information") 87 88 outfile_grp = argparser.add_mutually_exclusive_group(required=True) 89 outfile_grp.add_argument("--json", 90 help="Output Dictionary Logging Database file in JSON") 91 outfile_grp.add_argument("--syst", 92 help="Output MIPI Sys-T Collateral XML file") 93 94 return argparser.parse_args() 95 96 97def extract_elf_code_data_sections(elf): 98 """Find all sections in ELF file""" 99 sections = {} 100 101 for sect in elf.iter_sections(): 102 # Only Allocated sections with PROGBITS are included 103 # since they actually have code/data. 104 # 105 # On contrary, BSS is allocated but NOBITS. 106 if ( 107 (sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC 108 and sect['sh_type'] == 'SHT_PROGBITS' 109 ): 110 sections[sect.name] = { 111 'name' : sect.name, 112 'size' : sect['sh_size'], 113 'start' : sect['sh_addr'], 114 'end' : sect['sh_addr'] + sect['sh_size'] - 1, 115 'data' : sect.data(), 116 } 117 118 return sections 119 120 121def find_elf_sections(elf, sh_name): 122 """Find all sections in ELF file""" 123 for section in elf.iter_sections(): 124 if section.name == sh_name: 125 ret = { 126 'name' : section.name, 127 'size' : section['sh_size'], 128 'start' : section['sh_addr'], 129 'end' : section['sh_addr'] + section['sh_size'] - 1, 130 'data' : section.data(), 131 } 132 133 return ret 134 135 return None 136 137 138def get_kconfig_symbols(elf): 139 """Get kconfig symbols from the ELF file""" 140 for section in elf.iter_sections(): 141 if isinstance(section, SymbolTableSection): 142 return {sym.name: sym.entry.st_value 143 for sym in section.iter_symbols() 144 if sym.name.startswith("CONFIG_")} 145 146 raise LookupError("Could not find symbol table") 147 148 149def find_log_const_symbols(elf): 150 """Extract all "log_const_*" symbols from ELF file""" 151 symbol_tables = [s for s in elf.iter_sections() 152 if isinstance(s, elftools.elf.sections.SymbolTableSection)] 153 154 ret_list = [] 155 156 for section in symbol_tables: 157 if not isinstance(section, elftools.elf.sections.SymbolTableSection): 158 continue 159 160 if section['sh_entsize'] == 0: 161 continue 162 163 for symbol in section.iter_symbols(): 164 if symbol.name.startswith("log_const_"): 165 ret_list.append(symbol) 166 167 return ret_list 168 169 170def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings): 171 """Find the log instances and map source IDs to names""" 172 if database.is_tgt_little_endian(): 173 formatter = "<" 174 else: 175 formatter = ">" 176 177 if database.is_tgt_64bit(): 178 # 64-bit pointer to string 179 formatter += "Q" 180 else: 181 # 32-bit pointer to string 182 formatter += "L" 183 184 # log instance level 185 formatter += "B" 186 187 datum_size = struct.calcsize(formatter) 188 189 # Get the address of first log instance 190 first_offset = log_const_symbols[0].entry['st_value'] 191 for sym in log_const_symbols: 192 if sym.entry['st_value'] < first_offset: 193 first_offset = sym.entry['st_value'] 194 195 first_offset -= log_const_area['start'] 196 197 # find all log_const_* 198 for sym in log_const_symbols: 199 # Find data offset in log_const_area for this symbol 200 offset = sym.entry['st_value'] - log_const_area['start'] 201 202 idx_s = offset 203 idx_e = offset + datum_size 204 205 datum = log_const_area['data'][idx_s:idx_e] 206 207 if len(datum) != datum_size: 208 # Not enough data to unpack 209 continue 210 211 str_ptr, level = struct.unpack(formatter, datum) 212 213 # Offset to rodata section for string 214 instance_name = find_string_in_mappings(string_mappings, str_ptr) 215 if instance_name is None: 216 instance_name = "unknown" 217 218 logger.info("Found Log Instance: %s, level: %d", instance_name, level) 219 220 # source ID is simply the element index in the log instance array 221 source_id = int((offset - first_offset) / sym.entry['st_size']) 222 223 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value']) 224 225 226def extract_elf_information(elf, database): 227 """Extract information from ELF file and store in database""" 228 e_ident = elf.header['e_ident'] 229 elf_data = describe_ei_data(e_ident['EI_DATA']) 230 231 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']: 232 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN) 233 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']: 234 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN) 235 else: 236 logger.error("Cannot determine endianness from ELF file, exiting...") 237 sys.exit(1) 238 239 240def process_kconfigs(elf, database): 241 """Process kconfigs to extract information""" 242 kconfigs = get_kconfig_symbols(elf) 243 244 # 32 or 64-bit target 245 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32) 246 247 # Architecture 248 for name, arch in dictionary_parser.log_database.ARCHS.items(): 249 if arch['kconfig'] in kconfigs: 250 database.set_arch(name) 251 break 252 253 # Put some kconfigs into the database 254 # 255 # Use 32-bit timestamp? or 64-bit? 256 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs: 257 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", 258 kconfigs['CONFIG_LOG_TIMESTAMP_64BIT']) 259 260 261def extract_logging_subsys_information(elf, database, string_mappings): 262 """ 263 Extract logging subsys related information and store in database. 264 265 For example, this extracts the list of log instances to establish 266 mapping from source ID to name. 267 """ 268 # Extract log constant section for module names 269 section_log_const = find_elf_sections(elf, "log_const_area") 270 if section_log_const is None: 271 # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas 272 section_log_const = find_elf_sections(elf, "log_static_section") 273 274 if section_log_const is None: 275 logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...") 276 sys.exit(1) 277 278 # Find all "log_const_*" symbols and parse them 279 log_const_symbols = find_log_const_symbols(elf) 280 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings) 281 282 283def is_die_attr_ref(attr): 284 """ 285 Returns True if the DIE attribute is a reference. 286 """ 287 return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 288 'DW_FORM_ref4', 'DW_FORM_ref8', 289 'DW_FORM_ref')) 290 291 292def find_die_var_base_type(compile_unit, die, is_const): 293 """ 294 Finds the base type of a DIE and returns the name. 295 If DW_AT_type is a reference, it will recursively go through 296 the references to find the base type. Returns None is no 297 base type is found. 298 """ 299 # DIE is of base type. So extract the name. 300 if die.tag == 'DW_TAG_base_type': 301 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const 302 303 # Not a type, cannot continue 304 if not 'DW_AT_type' in die.attributes: 305 return None, None 306 307 if die.tag == 'DW_TAG_const_type': 308 is_const = True 309 310 # DIE is probably a reference to another. 311 # If so, check if the reference is a base type. 312 type_attr = die.attributes['DW_AT_type'] 313 if is_die_attr_ref(type_attr): 314 ref_addr = compile_unit.cu_offset + type_attr.raw_value 315 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr) 316 317 return find_die_var_base_type(compile_unit, ref_die, is_const) 318 319 # Not a base type, and not reference 320 return None, None 321 322 323def is_die_var_const_char(compile_unit, die): 324 """ 325 Returns True if DIE of type variable is const char. 326 """ 327 var_type, is_const = find_die_var_base_type(compile_unit, die, False) 328 329 if var_type is not None and var_type.endswith('char') and is_const: 330 return True 331 332 return False 333 334 335def extract_string_variables(elf): 336 """ 337 Find all string variables (char) in all Compilation Units and 338 Debug information Entry (DIE) in ELF file. 339 """ 340 dwarf_info = elf.get_dwarf_info() 341 loc_lists = dwarf_info.location_lists() 342 loc_parser = LocationParser(loc_lists) 343 344 strings = [] 345 346 # Loop through all Compilation Units and 347 # Debug information Entry (DIE) to extract all string variables 348 for compile_unit in dwarf_info.iter_CUs(): 349 for die in compile_unit.iter_DIEs(): 350 # Only care about variables with location information 351 # and of type "char" 352 if die.tag == 'DW_TAG_variable': 353 if ('DW_AT_type' in die.attributes 354 and 'DW_AT_location' in die.attributes 355 and is_die_var_const_char(compile_unit, die) 356 ): 357 # Extract location information, which is 358 # its address in memory. 359 loc_attr = die.attributes['DW_AT_location'] 360 if loc_parser.attribute_has_location(loc_attr, die.cu['version']): 361 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version']) 362 if isinstance(loc, LocationExpr): 363 try: 364 addr = describe_DWARF_expr(loc.loc_expr, 365 dwarf_info.structs) 366 367 matcher = DT_LOCATION_REGEX.match(addr) 368 if matcher: 369 addr = int(matcher.group(1), 16) 370 if addr > 0: 371 strings.append({ 372 'name': die.attributes['DW_AT_name'].value, 373 'addr': addr, 374 'die': die 375 }) 376 except KeyError: 377 pass 378 379 return strings 380 381 382def try_decode_string(str_maybe): 383 """Check if it is a printable string""" 384 for encoding in STR_ENCODINGS: 385 try: 386 decoded_str = str_maybe.decode(encoding) 387 388 # Check if string is printable according to Python 389 # since the parser (written in Python) will need to 390 # print the string. 391 # 392 # Note that '\r' and '\n' are not included in 393 # string.printable so they need to be checked separately. 394 printable = True 395 for one_char in decoded_str: 396 if (one_char not in string.printable 397 and one_char not in ACCEPTABLE_ESCAPE_CHARS): 398 printable = False 399 break 400 401 if printable: 402 return decoded_str 403 except UnicodeDecodeError: 404 pass 405 406 return None 407 408 409def extract_strings_in_one_section(section, str_mappings): 410 """Extract NULL-terminated strings in one ELF section""" 411 bindata = section['data'] 412 413 if len(bindata) < 2: 414 # Can't have a NULL-terminated string with fewer than 2 bytes. 415 return str_mappings 416 417 idx = 0 418 419 # If first byte is not NULL, it may be a string. 420 if bindata[0] == 0: 421 start = None 422 else: 423 start = 0 424 425 while idx < len(bindata): 426 if start is None: 427 if bindata[idx] == 0: 428 # Skip NULL bytes to find next string 429 idx += 1 430 else: 431 # Beginning of possible string 432 start = idx 433 idx += 1 434 else: 435 if bindata[idx] != 0: 436 # Skipping till next NULL byte for possible string 437 idx += 1 438 else: 439 # End of possible string 440 end = idx 441 442 if start != end: 443 str_maybe = bindata[start:end] 444 decoded_str = try_decode_string(str_maybe) 445 446 # Only store readable string 447 if decoded_str is not None: 448 addr = section['start'] + start 449 450 if addr not in str_mappings: 451 str_mappings[addr] = decoded_str 452 453 # Decoded string may contain un-printable characters 454 # (e.g. extended ASC-II characters) or control 455 # characters (e.g. '\r' or '\n'), so simply print 456 # the byte string instead. 457 logger.debug('Found string via extraction at ' + PTR_FMT + ': %s', 458 addr, str_maybe) 459 460 # GCC-based toolchain will reuse the NULL character 461 # for empty strings. There is no way to know which 462 # one is being reused, so just treat all NULL character 463 # at the end of legitimate strings as empty strings. 464 null_addr = section['start'] + end 465 str_mappings[null_addr] = '' 466 467 logger.debug('Found null string via extraction at ' + PTR_FMT, 468 null_addr) 469 470 start = None 471 idx += 1 472 473 return str_mappings 474 475 476def extract_static_strings(elf, database, section_extraction=False): 477 """ 478 Extract static strings from ELF file using DWARF, 479 and also extraction from binary data. 480 """ 481 string_mappings = {} 482 483 elf_sections = extract_elf_code_data_sections(elf) 484 485 # Extract strings using ELF DWARF information 486 str_vars = extract_string_variables(elf) 487 for str_var in str_vars: 488 for _, sect in elf_sections.items(): 489 one_str = extract_one_string_in_section(sect, str_var['addr']) 490 if one_str is not None: 491 string_mappings[str_var['addr']] = one_str 492 logger.debug('Found string variable at ' + PTR_FMT + ': %s', 493 str_var['addr'], one_str) 494 break 495 496 if section_extraction: 497 # Extract strings from ELF sections 498 string_sections = STATIC_STRING_SECTIONS 499 rawstr_map = {} 500 501 # Some architectures may put static strings into additional sections. 502 # So need to extract them too. 503 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()] 504 if "extra_string_section" in arch_data: 505 string_sections.extend(arch_data['extra_string_section']) 506 507 for sect_name in string_sections: 508 if sect_name in elf_sections: 509 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], 510 rawstr_map) 511 512 for one_str in rawstr_map: 513 if one_str not in string_mappings: 514 string_mappings[one_str] = rawstr_map[one_str] 515 516 return string_mappings 517 518 519def main(): 520 """Main function of database generator""" 521 args = parse_args() 522 523 # Setup logging 524 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING) 525 if args.debug: 526 logger.setLevel(logging.DEBUG) 527 elif args.verbose: 528 logger.setLevel(logging.INFO) 529 530 elffile = open(args.elffile, "rb") 531 if not elffile: 532 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile) 533 sys.exit(1) 534 535 logger.info("ELF file %s", args.elffile) 536 537 if args.json: 538 logger.info("JSON Database file %s", args.json) 539 section_extraction = True 540 541 if args.syst: 542 logger.info("MIPI Sys-T Collateral file %s", args.syst) 543 section_extraction = False 544 545 elf = ELFFile(elffile) 546 547 database = LogDatabase() 548 549 if args.build_header: 550 with open(args.build_header) as f: 551 for l in f: 552 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', l) 553 if match: 554 database.set_build_id(match.group(1)) 555 break 556 557 if args.build: 558 database.set_build_id(args.build) 559 logger.info("Build ID: %s", args.build) 560 561 extract_elf_information(elf, database) 562 563 process_kconfigs(elf, database) 564 565 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) 566 if database.is_tgt_little_endian(): 567 logger.info("Endianness: Little") 568 else: 569 logger.info("Endianness: Big") 570 571 if database.is_tgt_64bit(): 572 global PTR_FMT 573 PTR_FMT = '0x%016x' 574 575 # Extract strings from ELF files 576 string_mappings = extract_static_strings(elf, database, section_extraction) 577 if len(string_mappings) > 0: 578 database.set_string_mappings(string_mappings) 579 logger.info("Found %d strings", len(string_mappings)) 580 581 # Extract information related to logging subsystem 582 if not section_extraction: 583 # The logging subsys information (e.g. log module names) 584 # may require additional strings outside of those extracted 585 # via ELF DWARF variables. So generate a new string mappings 586 # with strings in various ELF sections. 587 string_mappings = extract_static_strings(elf, database, section_extraction=True) 588 589 extract_logging_subsys_information(elf, database, string_mappings) 590 591 # Write database file 592 if args.json: 593 if not LogDatabase.write_json_database(args.json, database): 594 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 595 args.json) 596 sys.exit(1) 597 598 if args.syst: 599 if not LogDatabase.write_syst_database(args.syst, database): 600 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 601 args.syst) 602 sys.exit(1) 603 604 elffile.close() 605 606 607if __name__ == "__main__": 608 main() 609