1#!/usr/bin/env python3 2# 3# Copyright (c) 2020 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Database Generator 9 10This takes the built Zephyr ELF binary and produces a JSON database 11file for dictionary-based logging. This database is used together 12with the parser to decode binary log messages. 13""" 14 15import argparse 16import logging 17import os 18import re 19import string 20import struct 21import sys 22 23import dictionary_parser.log_database 24from dictionary_parser.log_database import LogDatabase 25from dictionary_parser.utils import extract_one_string_in_section 26from dictionary_parser.utils import find_string_in_mappings 27 28import elftools 29from elftools.elf.constants import SH_FLAGS 30from elftools.elf.elffile import ELFFile 31from elftools.elf.descriptions import describe_ei_data 32from elftools.elf.sections import SymbolTableSection 33from elftools.dwarf.descriptions import ( 34 describe_DWARF_expr 35) 36from elftools.dwarf.locationlists import ( 37 LocationExpr, LocationParser 38) 39 40 41LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s" 42logger = logging.getLogger(os.path.basename(sys.argv[0])) 43 44 45# Sections that contains static strings 46STATIC_STRING_SECTIONS = [ 47 'rodata', 48 '.rodata', 49 'pinned.rodata', 50] 51 52 53# Regulation expression to match DWARF location 54DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)") 55 56 57# Format string for pointers (default for 32-bit pointers) 58PTR_FMT = '0x%08x' 59 60 61# Potential string encodings. Add as needed. 62STR_ENCODINGS = [ 63 'ascii', 64 'iso-8859-1', 65] 66 67 68# List of acceptable escape character 69ACCEPTABLE_ESCAPE_CHARS = [ 70 b'\r', 71 b'\n', 72] 73 74 75def parse_args(): 76 """Parse command line arguments""" 77 argparser = argparse.ArgumentParser(allow_abbrev=False) 78 79 argparser.add_argument("elffile", help="Zephyr ELF binary") 80 argparser.add_argument("--build", help="Build ID") 81 argparser.add_argument("--build-header", 82 help="Header file containing BUILD_VERSION define") 83 argparser.add_argument("--debug", action="store_true", 84 help="Print extra debugging information") 85 argparser.add_argument("-v", "--verbose", action="store_true", 86 help="Print more information") 87 88 outfile_grp = argparser.add_mutually_exclusive_group(required=True) 89 outfile_grp.add_argument("--json", 90 help="Output Dictionary Logging Database file in JSON") 91 outfile_grp.add_argument("--syst", 92 help="Output MIPI Sys-T Collateral XML file") 93 94 return argparser.parse_args() 95 96 97def extract_elf_code_data_sections(elf): 98 """Find all sections in ELF file""" 99 sections = {} 100 101 for sect in elf.iter_sections(): 102 # Only Allocated sections with PROGBITS are included 103 # since they actually have code/data. 104 # 105 # On contrary, BSS is allocated but NOBITS. 106 if ( 107 (sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC 108 and sect['sh_type'] == 'SHT_PROGBITS' 109 ): 110 sections[sect.name] = { 111 'name' : sect.name, 112 'size' : sect['sh_size'], 113 'start' : sect['sh_addr'], 114 'end' : sect['sh_addr'] + sect['sh_size'] - 1, 115 'data' : sect.data(), 116 } 117 118 return sections 119 120 121def find_elf_sections(elf, sh_name): 122 """Find all sections in ELF file""" 123 for section in elf.iter_sections(): 124 if section.name == sh_name: 125 ret = { 126 'name' : section.name, 127 'size' : section['sh_size'], 128 'start' : section['sh_addr'], 129 'end' : section['sh_addr'] + section['sh_size'] - 1, 130 'data' : section.data(), 131 } 132 133 return ret 134 135 return None 136 137 138def get_kconfig_symbols(elf): 139 """Get kconfig symbols from the ELF file""" 140 for section in elf.iter_sections(): 141 if isinstance(section, SymbolTableSection): 142 return {sym.name: sym.entry.st_value 143 for sym in section.iter_symbols() 144 if sym.name.startswith("CONFIG_")} 145 146 raise LookupError("Could not find symbol table") 147 148 149def find_log_const_symbols(elf): 150 """Extract all "log_const_*" symbols from ELF file""" 151 symbol_tables = [s for s in elf.iter_sections() 152 if isinstance(s, elftools.elf.sections.SymbolTableSection)] 153 154 ret_list = [] 155 156 for section in symbol_tables: 157 if not isinstance(section, elftools.elf.sections.SymbolTableSection): 158 continue 159 160 if section['sh_entsize'] == 0: 161 continue 162 163 for symbol in section.iter_symbols(): 164 if symbol.name.startswith("log_const_"): 165 ret_list.append(symbol) 166 167 return ret_list 168 169 170def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings): 171 """Find the log instances and map source IDs to names""" 172 if database.is_tgt_little_endian(): 173 formatter = "<" 174 else: 175 formatter = ">" 176 177 if database.is_tgt_64bit(): 178 # 64-bit pointer to string 179 formatter += "Q" 180 else: 181 # 32-bit pointer to string 182 formatter += "L" 183 184 # log instance level 185 formatter += "B" 186 187 datum_size = struct.calcsize(formatter) 188 189 # Get the address of first log instance 190 first_offset = log_const_symbols[0].entry['st_value'] 191 for sym in log_const_symbols: 192 if sym.entry['st_value'] < first_offset: 193 first_offset = sym.entry['st_value'] 194 195 first_offset -= log_const_area['start'] 196 197 # find all log_const_* 198 for sym in log_const_symbols: 199 # Find data offset in log_const_area for this symbol 200 offset = sym.entry['st_value'] - log_const_area['start'] 201 202 idx_s = offset 203 idx_e = offset + datum_size 204 205 datum = log_const_area['data'][idx_s:idx_e] 206 207 if len(datum) != datum_size: 208 # Not enough data to unpack 209 continue 210 211 str_ptr, level = struct.unpack(formatter, datum) 212 213 # Offset to rodata section for string 214 instance_name = find_string_in_mappings(string_mappings, str_ptr) 215 if instance_name is None: 216 instance_name = "unknown" 217 218 logger.info("Found Log Instance: %s, level: %d", instance_name, level) 219 220 # source ID is simply the element index in the log instance array 221 source_id = int((offset - first_offset) / sym.entry['st_size']) 222 223 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value']) 224 225 226def extract_elf_information(elf, database): 227 """Extract information from ELF file and store in database""" 228 e_ident = elf.header['e_ident'] 229 elf_data = describe_ei_data(e_ident['EI_DATA']) 230 231 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']: 232 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN) 233 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']: 234 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN) 235 else: 236 logger.error("Cannot determine endianness from ELF file, exiting...") 237 sys.exit(1) 238 239 240def process_kconfigs(elf, database): 241 """Process kconfigs to extract information""" 242 kconfigs = get_kconfig_symbols(elf) 243 244 # 32 or 64-bit target 245 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32) 246 247 # Architecture 248 for name, arch in dictionary_parser.log_database.ARCHS.items(): 249 if arch['kconfig'] in kconfigs: 250 database.set_arch(name) 251 break 252 253 # Put some kconfigs into the database 254 # 255 # Use 32-bit timestamp? or 64-bit? 256 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs: 257 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", 258 kconfigs['CONFIG_LOG_TIMESTAMP_64BIT']) 259 260 261def extract_logging_subsys_information(elf, database, string_mappings): 262 """ 263 Extract logging subsys related information and store in database. 264 265 For example, this extracts the list of log instances to establish 266 mapping from source ID to name. 267 """ 268 # Extract log constant section for module names 269 section_log_const = find_elf_sections(elf, "log_const_area") 270 if section_log_const is None: 271 # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas 272 section_log_const = find_elf_sections(elf, "log_static_section") 273 274 if section_log_const is None: 275 logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...") 276 sys.exit(1) 277 278 # Find all "log_const_*" symbols and parse them 279 log_const_symbols = find_log_const_symbols(elf) 280 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings) 281 282 283def is_die_attr_ref(attr): 284 """ 285 Returns True if the DIE attribute is a reference. 286 """ 287 return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 288 'DW_FORM_ref4', 'DW_FORM_ref8', 289 'DW_FORM_ref')) 290 291 292def find_die_var_base_type(compile_unit, die, is_const): 293 """ 294 Finds the base type of a DIE and returns the name. 295 If DW_AT_type is a reference, it will recursively go through 296 the references to find the base type. Returns None is no 297 base type is found. 298 """ 299 # DIE is of base type. So extract the name. 300 if die.tag == 'DW_TAG_base_type': 301 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const 302 303 # Not a type, cannot continue 304 if not 'DW_AT_type' in die.attributes: 305 return None, None 306 307 if die.tag == 'DW_TAG_const_type': 308 is_const = True 309 310 # DIE is probably a reference to another. 311 # If so, check if the reference is a base type. 312 type_attr = die.attributes['DW_AT_type'] 313 if is_die_attr_ref(type_attr): 314 ref_addr = compile_unit.cu_offset + type_attr.raw_value 315 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr) 316 317 return find_die_var_base_type(compile_unit, ref_die, is_const) 318 319 # Not a base type, and not reference 320 return None, None 321 322 323def is_die_var_const_char(compile_unit, die): 324 """ 325 Returns True if DIE of type variable is const char. 326 """ 327 var_type, is_const = find_die_var_base_type(compile_unit, die, False) 328 329 if var_type is not None and var_type.endswith('char') and is_const: 330 return True 331 332 return False 333 334 335def extract_string_variables(elf): 336 """ 337 Find all string variables (char) in all Compilation Units and 338 Debug information Entry (DIE) in ELF file. 339 """ 340 dwarf_info = elf.get_dwarf_info() 341 loc_lists = dwarf_info.location_lists() 342 loc_parser = LocationParser(loc_lists) 343 344 strings = [] 345 346 # Loop through all Compilation Units and 347 # Debug information Entry (DIE) to extract all string variables 348 for compile_unit in dwarf_info.iter_CUs(): 349 for die in compile_unit.iter_DIEs(): 350 # Only care about variables with location information 351 # and of type "char" 352 if die.tag == 'DW_TAG_variable': 353 if ('DW_AT_type' in die.attributes 354 and 'DW_AT_location' in die.attributes 355 and is_die_var_const_char(compile_unit, die) 356 ): 357 # Extract location information, which is 358 # its address in memory. 359 loc_attr = die.attributes['DW_AT_location'] 360 if loc_parser.attribute_has_location(loc_attr, die.cu['version']): 361 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version']) 362 if isinstance(loc, LocationExpr): 363 try: 364 addr = describe_DWARF_expr(loc.loc_expr, 365 dwarf_info.structs) 366 367 matcher = DT_LOCATION_REGEX.match(addr) 368 if matcher: 369 addr = int(matcher.group(1), 16) 370 if addr > 0: 371 strings.append({ 372 'name': die.attributes['DW_AT_name'].value, 373 'addr': addr, 374 'die': die 375 }) 376 except KeyError: 377 pass 378 379 return strings 380 381def try_decode_string(str_maybe): 382 """Check if it is a printable string""" 383 for encoding in STR_ENCODINGS: 384 try: 385 return str_maybe.decode(encoding) 386 except UnicodeDecodeError: 387 pass 388 389 return None 390 391def is_printable(b): 392 # Check if string is printable according to Python 393 # since the parser (written in Python) will need to 394 # print the string. 395 # 396 # Note that '\r' and '\n' are not included in 397 # string.printable so they need to be checked separately. 398 return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS) 399 400def extract_strings_in_one_section(section, str_mappings): 401 """Extract NULL-terminated strings in one ELF section""" 402 data = section['data'] 403 idx = 0 404 start = None 405 for x in data: 406 if is_printable(chr(x)): 407 # Printable character, potential part of string 408 if start is None: 409 # Beginning of potential string 410 start = idx 411 elif x == 0: 412 # End of possible string 413 if start is not None: 414 # Found potential string 415 str_maybe = data[start : idx] 416 decoded_str = try_decode_string(str_maybe) 417 418 if decoded_str is not None: 419 addr = section['start'] + start 420 421 if addr not in str_mappings: 422 str_mappings[addr] = decoded_str 423 424 # Decoded string may contain un-printable characters 425 # (e.g. extended ASC-II characters) or control 426 # characters (e.g. '\r' or '\n'), so simply print 427 # the byte string instead. 428 logger.debug('Found string via extraction at ' + PTR_FMT + ': %s', 429 addr, str_maybe) 430 431 # GCC-based toolchain will reuse the NULL character 432 # for empty strings. There is no way to know which 433 # one is being reused, so just treat all NULL character 434 # at the end of legitimate strings as empty strings. 435 null_addr = section['start'] + idx 436 str_mappings[null_addr] = '' 437 438 logger.debug('Found null string via extraction at ' + PTR_FMT, 439 null_addr) 440 start = None 441 else: 442 # Non-printable byte, remove start location 443 start = None 444 idx += 1 445 446 return str_mappings 447 448 449def extract_static_strings(elf, database, section_extraction=False): 450 """ 451 Extract static strings from ELF file using DWARF, 452 and also extraction from binary data. 453 """ 454 string_mappings = {} 455 456 elf_sections = extract_elf_code_data_sections(elf) 457 458 # Extract strings using ELF DWARF information 459 str_vars = extract_string_variables(elf) 460 for str_var in str_vars: 461 for _, sect in elf_sections.items(): 462 one_str = extract_one_string_in_section(sect, str_var['addr']) 463 if one_str is not None: 464 string_mappings[str_var['addr']] = one_str 465 logger.debug('Found string variable at ' + PTR_FMT + ': %s', 466 str_var['addr'], one_str) 467 break 468 469 if section_extraction: 470 # Extract strings from ELF sections 471 string_sections = STATIC_STRING_SECTIONS 472 rawstr_map = {} 473 474 # Some architectures may put static strings into additional sections. 475 # So need to extract them too. 476 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()] 477 if "extra_string_section" in arch_data: 478 string_sections.extend(arch_data['extra_string_section']) 479 480 for sect_name in string_sections: 481 if sect_name in elf_sections: 482 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], 483 rawstr_map) 484 485 for one_str in rawstr_map: 486 if one_str not in string_mappings: 487 string_mappings[one_str] = rawstr_map[one_str] 488 489 return string_mappings 490 491 492def main(): 493 """Main function of database generator""" 494 args = parse_args() 495 496 # Setup logging 497 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING) 498 if args.debug: 499 logger.setLevel(logging.DEBUG) 500 elif args.verbose: 501 logger.setLevel(logging.INFO) 502 503 elffile = open(args.elffile, "rb") 504 if not elffile: 505 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile) 506 sys.exit(1) 507 508 logger.info("ELF file %s", args.elffile) 509 510 if args.json: 511 logger.info("JSON Database file %s", args.json) 512 section_extraction = True 513 514 if args.syst: 515 logger.info("MIPI Sys-T Collateral file %s", args.syst) 516 section_extraction = False 517 518 elf = ELFFile(elffile) 519 520 database = LogDatabase() 521 522 if args.build_header: 523 with open(args.build_header) as f: 524 for l in f: 525 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', l) 526 if match: 527 database.set_build_id(match.group(1)) 528 break 529 530 if args.build: 531 database.set_build_id(args.build) 532 logger.info("Build ID: %s", args.build) 533 534 extract_elf_information(elf, database) 535 536 process_kconfigs(elf, database) 537 538 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) 539 if database.is_tgt_little_endian(): 540 logger.info("Endianness: Little") 541 else: 542 logger.info("Endianness: Big") 543 544 if database.is_tgt_64bit(): 545 global PTR_FMT 546 PTR_FMT = '0x%016x' 547 548 # Extract strings from ELF files 549 string_mappings = extract_static_strings(elf, database, section_extraction) 550 if len(string_mappings) > 0: 551 database.set_string_mappings(string_mappings) 552 logger.info("Found %d strings", len(string_mappings)) 553 554 # Extract information related to logging subsystem 555 if not section_extraction: 556 # The logging subsys information (e.g. log module names) 557 # may require additional strings outside of those extracted 558 # via ELF DWARF variables. So generate a new string mappings 559 # with strings in various ELF sections. 560 string_mappings = extract_static_strings(elf, database, section_extraction=True) 561 562 extract_logging_subsys_information(elf, database, string_mappings) 563 564 # Write database file 565 if args.json: 566 if not LogDatabase.write_json_database(args.json, database): 567 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 568 args.json) 569 sys.exit(1) 570 571 if args.syst: 572 if not LogDatabase.write_syst_database(args.syst, database): 573 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 574 args.syst) 575 sys.exit(1) 576 577 elffile.close() 578 579 580if __name__ == "__main__": 581 main() 582