1#!/usr/bin/env python3 2# 3# Copyright (c) 2020 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Database Generator 9 10This takes the built Zephyr ELF binary and produces a JSON database 11file for dictionary-based logging. This database is used together 12with the parser to decode binary log messages. 13""" 14 15import argparse 16import logging 17import os 18import re 19import string 20import struct 21import sys 22 23import dictionary_parser.log_database 24from dictionary_parser.log_database import LogDatabase 25from dictionary_parser.utils import extract_one_string_in_section 26from dictionary_parser.utils import find_string_in_mappings 27 28import elftools 29from elftools.elf.constants import SH_FLAGS 30from elftools.elf.elffile import ELFFile 31from elftools.elf.descriptions import describe_ei_data 32from elftools.elf.sections import SymbolTableSection 33from elftools.dwarf.descriptions import ( 34 describe_DWARF_expr 35) 36from elftools.dwarf.locationlists import ( 37 LocationExpr, LocationParser 38) 39 40 41LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s" 42logger = logging.getLogger(os.path.basename(sys.argv[0])) 43 44 45# Sections that contains static strings 46STATIC_STRING_SECTIONS = [ 47 'rodata', 48 '.rodata', 49 'pinned.rodata', 50] 51 52# Sections that contains static strings but are not part of the binary (allocable). 53REMOVED_STRING_SECTIONS = [ 54 'log_strings' 55] 56 57 58# Regulation expression to match DWARF location 59DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)") 60 61 62# Format string for pointers (default for 32-bit pointers) 63PTR_FMT = '0x%08x' 64 65 66# Potential string encodings. Add as needed. 67STR_ENCODINGS = [ 68 'ascii', 69 'iso-8859-1', 70] 71 72 73# List of acceptable escape character 74ACCEPTABLE_ESCAPE_CHARS = [ 75 b'\r', 76 b'\n', 77] 78 79 80def parse_args(): 81 """Parse command line arguments""" 82 argparser = argparse.ArgumentParser(allow_abbrev=False) 83 84 argparser.add_argument("elffile", help="Zephyr ELF binary") 85 argparser.add_argument("--build", help="Build ID") 86 argparser.add_argument("--build-header", 87 help="Header file containing BUILD_VERSION define") 88 argparser.add_argument("--debug", action="store_true", 89 help="Print extra debugging information") 90 argparser.add_argument("-v", "--verbose", action="store_true", 91 help="Print more information") 92 93 outfile_grp = argparser.add_mutually_exclusive_group(required=True) 94 outfile_grp.add_argument("--json", 95 help="Output Dictionary Logging Database file in JSON") 96 outfile_grp.add_argument("--syst", 97 help="Output MIPI Sys-T Collateral XML file") 98 99 return argparser.parse_args() 100 101 102def extract_elf_code_data_sections(elf, wildcards = None): 103 """Find all sections in ELF file""" 104 sections = {} 105 106 for sect in elf.iter_sections(): 107 # Only Allocated sections with PROGBITS are included 108 # since they actually have code/data. 109 # 110 # On contrary, BSS is allocated but NOBITS. 111 if (((wildcards is not None) and (sect.name in wildcards)) or 112 ((sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC 113 and sect['sh_type'] == 'SHT_PROGBITS') 114 ): 115 sections[sect.name] = { 116 'name' : sect.name, 117 'size' : sect['sh_size'], 118 'start' : sect['sh_addr'], 119 'end' : sect['sh_addr'] + sect['sh_size'] - 1, 120 'data' : sect.data(), 121 } 122 123 return sections 124 125 126def find_elf_sections(elf, sh_name): 127 """Find all sections in ELF file""" 128 for section in elf.iter_sections(): 129 if section.name == sh_name: 130 ret = { 131 'name' : section.name, 132 'size' : section['sh_size'], 133 'start' : section['sh_addr'], 134 'end' : section['sh_addr'] + section['sh_size'] - 1, 135 'data' : section.data(), 136 } 137 138 return ret 139 140 return None 141 142 143def get_kconfig_symbols(elf): 144 """Get kconfig symbols from the ELF file""" 145 for section in elf.iter_sections(): 146 if isinstance(section, SymbolTableSection): 147 return {sym.name: sym.entry.st_value 148 for sym in section.iter_symbols() 149 if sym.name.startswith("CONFIG_")} 150 151 raise LookupError("Could not find symbol table") 152 153 154def find_log_const_symbols(elf): 155 """Extract all "log_const_*" symbols from ELF file""" 156 symbol_tables = [s for s in elf.iter_sections() 157 if isinstance(s, elftools.elf.sections.SymbolTableSection)] 158 159 ret_list = [] 160 161 for section in symbol_tables: 162 if not isinstance(section, elftools.elf.sections.SymbolTableSection): 163 continue 164 165 if section['sh_entsize'] == 0: 166 continue 167 168 for symbol in section.iter_symbols(): 169 if symbol.name.startswith("log_const_"): 170 ret_list.append(symbol) 171 172 return ret_list 173 174 175def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings): 176 """Find the log instances and map source IDs to names""" 177 if database.is_tgt_little_endian(): 178 formatter = "<" 179 else: 180 formatter = ">" 181 182 if database.is_tgt_64bit(): 183 # 64-bit pointer to string 184 formatter += "Q" 185 else: 186 # 32-bit pointer to string 187 formatter += "L" 188 189 # log instance level 190 formatter += "B" 191 192 datum_size = struct.calcsize(formatter) 193 194 # Get the address of first log instance 195 first_offset = log_const_symbols[0].entry['st_value'] 196 for sym in log_const_symbols: 197 if sym.entry['st_value'] < first_offset: 198 first_offset = sym.entry['st_value'] 199 200 first_offset -= log_const_area['start'] 201 202 # find all log_const_* 203 for sym in log_const_symbols: 204 # Find data offset in log_const_area for this symbol 205 offset = sym.entry['st_value'] - log_const_area['start'] 206 207 idx_s = offset 208 idx_e = offset + datum_size 209 210 datum = log_const_area['data'][idx_s:idx_e] 211 212 if len(datum) != datum_size: 213 # Not enough data to unpack 214 continue 215 216 str_ptr, level = struct.unpack(formatter, datum) 217 218 # Offset to rodata section for string 219 instance_name = find_string_in_mappings(string_mappings, str_ptr) 220 if instance_name is None: 221 instance_name = "unknown" 222 223 logger.info("Found Log Instance: %s, level: %d", instance_name, level) 224 225 # source ID is simply the element index in the log instance array 226 source_id = int((offset - first_offset) / sym.entry['st_size']) 227 228 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value']) 229 230 231def extract_elf_information(elf, database): 232 """Extract information from ELF file and store in database""" 233 e_ident = elf.header['e_ident'] 234 elf_data = describe_ei_data(e_ident['EI_DATA']) 235 236 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']: 237 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN) 238 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']: 239 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN) 240 else: 241 logger.error("Cannot determine endianness from ELF file, exiting...") 242 sys.exit(1) 243 244 245def process_kconfigs(elf, database): 246 """Process kconfigs to extract information""" 247 kconfigs = get_kconfig_symbols(elf) 248 249 # 32 or 64-bit target 250 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32) 251 252 # Architecture 253 for name, arch in dictionary_parser.log_database.ARCHS.items(): 254 if arch['kconfig'] in kconfigs: 255 database.set_arch(name) 256 break 257 258 # Put some kconfigs into the database 259 # 260 # Use 32-bit timestamp? or 64-bit? 261 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs: 262 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", 263 kconfigs['CONFIG_LOG_TIMESTAMP_64BIT']) 264 265 266def extract_logging_subsys_information(elf, database, string_mappings): 267 """ 268 Extract logging subsys related information and store in database. 269 270 For example, this extracts the list of log instances to establish 271 mapping from source ID to name. 272 """ 273 # Extract log constant section for module names 274 section_log_const = find_elf_sections(elf, "log_const_area") 275 if section_log_const is None: 276 # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas 277 section_log_const = find_elf_sections(elf, "log_static_section") 278 279 if section_log_const is None: 280 logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...") 281 sys.exit(1) 282 283 # Find all "log_const_*" symbols and parse them 284 log_const_symbols = find_log_const_symbols(elf) 285 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings) 286 287 288def is_die_attr_ref(attr): 289 """ 290 Returns True if the DIE attribute is a reference. 291 """ 292 return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 293 'DW_FORM_ref4', 'DW_FORM_ref8', 294 'DW_FORM_ref')) 295 296 297def find_die_var_base_type(compile_unit, die, is_const): 298 """ 299 Finds the base type of a DIE and returns the name. 300 If DW_AT_type is a reference, it will recursively go through 301 the references to find the base type. Returns None is no 302 base type is found. 303 """ 304 # DIE is of base type. So extract the name. 305 if die.tag == 'DW_TAG_base_type': 306 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const 307 308 # Not a type, cannot continue 309 if not 'DW_AT_type' in die.attributes: 310 return None, None 311 312 if die.tag == 'DW_TAG_const_type': 313 is_const = True 314 315 # DIE is probably a reference to another. 316 # If so, check if the reference is a base type. 317 type_attr = die.attributes['DW_AT_type'] 318 if is_die_attr_ref(type_attr): 319 ref_addr = compile_unit.cu_offset + type_attr.raw_value 320 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr) 321 322 return find_die_var_base_type(compile_unit, ref_die, is_const) 323 324 # Not a base type, and not reference 325 return None, None 326 327 328def is_die_var_const_char(compile_unit, die): 329 """ 330 Returns True if DIE of type variable is const char. 331 """ 332 var_type, is_const = find_die_var_base_type(compile_unit, die, False) 333 334 if var_type is not None and var_type.endswith('char') and is_const: 335 return True 336 337 return False 338 339 340def extract_string_variables(elf): 341 """ 342 Find all string variables (char) in all Compilation Units and 343 Debug information Entry (DIE) in ELF file. 344 """ 345 dwarf_info = elf.get_dwarf_info() 346 loc_lists = dwarf_info.location_lists() 347 loc_parser = LocationParser(loc_lists) 348 349 strings = [] 350 351 # Loop through all Compilation Units and 352 # Debug information Entry (DIE) to extract all string variables 353 for compile_unit in dwarf_info.iter_CUs(): 354 for die in compile_unit.iter_DIEs(): 355 # Only care about variables with location information 356 # and of type "char" 357 if die.tag == 'DW_TAG_variable': 358 if ('DW_AT_type' in die.attributes 359 and 'DW_AT_location' in die.attributes 360 and is_die_var_const_char(compile_unit, die) 361 ): 362 # Extract location information, which is 363 # its address in memory. 364 loc_attr = die.attributes['DW_AT_location'] 365 if loc_parser.attribute_has_location(loc_attr, die.cu['version']): 366 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version']) 367 if isinstance(loc, LocationExpr): 368 try: 369 addr = describe_DWARF_expr(loc.loc_expr, 370 dwarf_info.structs) 371 372 matcher = DT_LOCATION_REGEX.match(addr) 373 if matcher: 374 addr = int(matcher.group(1), 16) 375 if addr > 0: 376 strings.append({ 377 'name': die.attributes['DW_AT_name'].value, 378 'addr': addr, 379 'die': die 380 }) 381 except KeyError: 382 pass 383 384 return strings 385 386def try_decode_string(str_maybe): 387 """Check if it is a printable string""" 388 for encoding in STR_ENCODINGS: 389 try: 390 return str_maybe.decode(encoding) 391 except UnicodeDecodeError: 392 pass 393 394 return None 395 396def is_printable(b): 397 # Check if string is printable according to Python 398 # since the parser (written in Python) will need to 399 # print the string. 400 # 401 # Note that '\r' and '\n' are not included in 402 # string.printable so they need to be checked separately. 403 return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS) 404 405def extract_strings_in_one_section(section, str_mappings): 406 """Extract NULL-terminated strings in one ELF section""" 407 data = section['data'] 408 idx = 0 409 start = None 410 for x in data: 411 if is_printable(chr(x)): 412 # Printable character, potential part of string 413 if start is None: 414 # Beginning of potential string 415 start = idx 416 elif x == 0: 417 # End of possible string 418 if start is not None: 419 # Found potential string 420 str_maybe = data[start : idx] 421 decoded_str = try_decode_string(str_maybe) 422 423 if decoded_str is not None: 424 addr = section['start'] + start 425 426 if addr not in str_mappings: 427 str_mappings[addr] = decoded_str 428 429 # Decoded string may contain un-printable characters 430 # (e.g. extended ASC-II characters) or control 431 # characters (e.g. '\r' or '\n'), so simply print 432 # the byte string instead. 433 logger.debug('Found string via extraction at ' + PTR_FMT + ': %s', 434 addr, str_maybe) 435 436 # GCC-based toolchain will reuse the NULL character 437 # for empty strings. There is no way to know which 438 # one is being reused, so just treat all NULL character 439 # at the end of legitimate strings as empty strings. 440 null_addr = section['start'] + idx 441 str_mappings[null_addr] = '' 442 443 logger.debug('Found null string via extraction at ' + PTR_FMT, 444 null_addr) 445 start = None 446 else: 447 # Non-printable byte, remove start location 448 start = None 449 idx += 1 450 451 return str_mappings 452 453 454def extract_static_strings(elf, database, section_extraction=False): 455 """ 456 Extract static strings from ELF file using DWARF, 457 and also extraction from binary data. 458 """ 459 string_mappings = {} 460 461 elf_sections = extract_elf_code_data_sections(elf, REMOVED_STRING_SECTIONS) 462 463 # Extract strings using ELF DWARF information 464 str_vars = extract_string_variables(elf) 465 for str_var in str_vars: 466 for _, sect in elf_sections.items(): 467 one_str = extract_one_string_in_section(sect, str_var['addr']) 468 if one_str is not None: 469 string_mappings[str_var['addr']] = one_str 470 logger.debug('Found string variable at ' + PTR_FMT + ': %s', 471 str_var['addr'], one_str) 472 break 473 474 if section_extraction: 475 # Extract strings from ELF sections 476 string_sections = STATIC_STRING_SECTIONS 477 rawstr_map = {} 478 479 # Some architectures may put static strings into additional sections. 480 # So need to extract them too. 481 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()] 482 if "extra_string_section" in arch_data: 483 string_sections.extend(arch_data['extra_string_section']) 484 485 for sect_name in string_sections: 486 if sect_name in elf_sections: 487 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], 488 rawstr_map) 489 490 for one_str in rawstr_map: 491 if one_str not in string_mappings: 492 string_mappings[one_str] = rawstr_map[one_str] 493 494 return string_mappings 495 496 497def main(): 498 """Main function of database generator""" 499 args = parse_args() 500 501 # Setup logging 502 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING) 503 if args.debug: 504 logger.setLevel(logging.DEBUG) 505 elif args.verbose: 506 logger.setLevel(logging.INFO) 507 508 elffile = open(args.elffile, "rb") 509 if not elffile: 510 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile) 511 sys.exit(1) 512 513 logger.info("ELF file %s", args.elffile) 514 515 if args.json: 516 logger.info("JSON Database file %s", args.json) 517 section_extraction = True 518 519 if args.syst: 520 logger.info("MIPI Sys-T Collateral file %s", args.syst) 521 section_extraction = False 522 523 elf = ELFFile(elffile) 524 525 database = LogDatabase() 526 527 if args.build_header: 528 with open(args.build_header) as f: 529 for l in f: 530 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', l) 531 if match: 532 database.set_build_id(match.group(1)) 533 break 534 535 if args.build: 536 database.set_build_id(args.build) 537 logger.info("Build ID: %s", args.build) 538 539 extract_elf_information(elf, database) 540 541 process_kconfigs(elf, database) 542 543 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) 544 if database.is_tgt_little_endian(): 545 logger.info("Endianness: Little") 546 else: 547 logger.info("Endianness: Big") 548 549 if database.is_tgt_64bit(): 550 global PTR_FMT 551 PTR_FMT = '0x%016x' 552 553 # Extract strings from ELF files 554 string_mappings = extract_static_strings(elf, database, section_extraction) 555 if len(string_mappings) > 0: 556 database.set_string_mappings(string_mappings) 557 logger.info("Found %d strings", len(string_mappings)) 558 559 # Extract information related to logging subsystem 560 if not section_extraction: 561 # The logging subsys information (e.g. log module names) 562 # may require additional strings outside of those extracted 563 # via ELF DWARF variables. So generate a new string mappings 564 # with strings in various ELF sections. 565 string_mappings = extract_static_strings(elf, database, section_extraction=True) 566 567 extract_logging_subsys_information(elf, database, string_mappings) 568 569 # Write database file 570 if args.json: 571 if not LogDatabase.write_json_database(args.json, database): 572 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 573 args.json) 574 sys.exit(1) 575 576 if args.syst: 577 if not LogDatabase.write_syst_database(args.syst, database): 578 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 579 args.syst) 580 sys.exit(1) 581 582 elffile.close() 583 584 585if __name__ == "__main__": 586 main() 587