1#!/usr/bin/env python3
2#
3# Copyright (c) 2020 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Database Generator
9
10This takes the built Zephyr ELF binary and produces a JSON database
11file for dictionary-based logging. This database is used together
12with the parser to decode binary log messages.
13"""
14
15import argparse
16import logging
17import os
18import re
19import string
20import struct
21import sys
22
23import dictionary_parser.log_database
24import elftools
25from dictionary_parser.log_database import LogDatabase
26from dictionary_parser.utils import extract_one_string_in_section, find_string_in_mappings
27from elftools.dwarf.descriptions import describe_DWARF_expr
28from elftools.dwarf.locationlists import LocationExpr, LocationParser
29from elftools.elf.constants import SH_FLAGS
30from elftools.elf.descriptions import describe_ei_data
31from elftools.elf.elffile import ELFFile
32from elftools.elf.sections import SymbolTableSection
33
34LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
35logger = logging.getLogger(os.path.basename(sys.argv[0]))
36
37
38# Sections that contains static strings
39STATIC_STRING_SECTIONS = [
40    'rodata',
41    '.rodata',
42    'pinned.rodata',
43]
44
45# Sections that contains static strings but are not part of the binary (allocable).
46REMOVED_STRING_SECTIONS = ['log_strings']
47
48
49# Regulation expression to match DWARF location
50DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)")
51
52
53# Format string for pointers (default for 32-bit pointers)
54PTR_FMT = '0x%08x'
55
56
57# Potential string encodings. Add as needed.
58STR_ENCODINGS = [
59    'ascii',
60    'iso-8859-1',
61]
62
63
64# List of acceptable escape character
65ACCEPTABLE_ESCAPE_CHARS = [
66    b'\r',
67    b'\n',
68]
69
70
71def parse_args():
72    """Parse command line arguments"""
73    argparser = argparse.ArgumentParser(allow_abbrev=False)
74
75    argparser.add_argument("elffile", help="Zephyr ELF binary")
76    argparser.add_argument("--build", help="Build ID")
77    argparser.add_argument("--build-header", help="Header file containing BUILD_VERSION define")
78    argparser.add_argument("--debug", action="store_true", help="Print extra debugging information")
79    argparser.add_argument("-v", "--verbose", action="store_true", help="Print more information")
80
81    outfile_grp = argparser.add_mutually_exclusive_group(required=True)
82    outfile_grp.add_argument("--json", help="Output Dictionary Logging Database file in JSON")
83    outfile_grp.add_argument("--syst", help="Output MIPI Sys-T Collateral XML file")
84
85    return argparser.parse_args()
86
87
88def extract_elf_code_data_sections(elf, wildcards=None):
89    """Find all sections in ELF file"""
90    sections = {}
91
92    for sect in elf.iter_sections():
93        # Only Allocated sections with PROGBITS are included
94        # since they actually have code/data.
95        #
96        # On contrary, BSS is allocated but NOBITS.
97        if ((wildcards is not None) and (sect.name in wildcards)) or (
98            (sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC
99            and sect['sh_type'] == 'SHT_PROGBITS'
100        ):
101            sections[sect.name] = {
102                'name': sect.name,
103                'size': sect['sh_size'],
104                'start': sect['sh_addr'],
105                'end': sect['sh_addr'] + sect['sh_size'] - 1,
106                'data': sect.data(),
107            }
108
109    return sections
110
111
112def find_elf_sections(elf, sh_name):
113    """Find all sections in ELF file"""
114    for section in elf.iter_sections():
115        if section.name == sh_name:
116            ret = {
117                'name': section.name,
118                'size': section['sh_size'],
119                'start': section['sh_addr'],
120                'end': section['sh_addr'] + section['sh_size'] - 1,
121                'data': section.data(),
122            }
123
124            return ret
125
126    return None
127
128
129def get_kconfig_symbols(elf):
130    """Get kconfig symbols from the ELF file"""
131    for section in elf.iter_sections():
132        if isinstance(section, SymbolTableSection) and section['sh_type'] != 'SHT_DYNSYM':
133            return {
134                sym.name: sym.entry.st_value
135                for sym in section.iter_symbols()
136                if sym.name.startswith("CONFIG_")
137            }
138
139    raise LookupError("Could not find symbol table")
140
141
142def find_log_const_symbols(elf):
143    """Extract all "log_const_*" symbols from ELF file"""
144    symbol_tables = [
145        s for s in elf.iter_sections() if isinstance(s, elftools.elf.sections.SymbolTableSection)
146    ]
147
148    ret_list = []
149
150    for section in symbol_tables:
151        if not isinstance(section, elftools.elf.sections.SymbolTableSection):
152            continue
153
154        if section['sh_entsize'] == 0:
155            continue
156
157        for symbol in section.iter_symbols():
158            if symbol.name.startswith("log_const_") or symbol.name.startswith("_log_const_"):
159                ret_list.append(symbol)
160
161    return ret_list
162
163
164def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings):
165    """Find the log instances and map source IDs to names"""
166    if database.is_tgt_little_endian():
167        formatter = "<"
168    else:
169        formatter = ">"
170
171    if database.is_tgt_64bit():
172        # 64-bit pointer to string
173        formatter += "Q"
174    else:
175        # 32-bit pointer to string
176        formatter += "L"
177
178    # log instance level
179    formatter += "B"
180
181    datum_size = struct.calcsize(formatter)
182
183    # Get the address of first log instance
184    first_offset = log_const_symbols[0].entry['st_value']
185    for sym in log_const_symbols:
186        if sym.entry['st_value'] < first_offset:
187            first_offset = sym.entry['st_value']
188
189    first_offset -= log_const_area['start']
190
191    # find all log_const_*
192    for sym in log_const_symbols:
193        # Find data offset in log_const_area for this symbol
194        offset = sym.entry['st_value'] - log_const_area['start']
195
196        idx_s = offset
197        idx_e = offset + datum_size
198
199        datum = log_const_area['data'][idx_s:idx_e]
200
201        if len(datum) != datum_size:
202            # Not enough data to unpack
203            continue
204
205        if sym.entry['st_size'] == 0:
206            # Empty entry
207            continue
208
209        str_ptr, level = struct.unpack(formatter, datum)
210
211        # Offset to rodata section for string
212        instance_name = find_string_in_mappings(string_mappings, str_ptr)
213        if instance_name is None:
214            instance_name = "unknown"
215
216        logger.info("Found Log Instance: %s, level: %d", instance_name, level)
217
218        # source ID is simply the element index in the log instance array
219        source_id = int((offset - first_offset) / sym.entry['st_size'])
220
221        database.add_log_instance(source_id, instance_name, level, sym.entry['st_value'])
222
223
224def extract_elf_information(elf, database):
225    """Extract information from ELF file and store in database"""
226    e_ident = elf.header['e_ident']
227    elf_data = describe_ei_data(e_ident['EI_DATA'])
228
229    if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']:
230        database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN)
231    elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']:
232        database.set_tgt_endianness(LogDatabase.BIG_ENDIAN)
233    else:
234        logger.error("Cannot determine endianness from ELF file, exiting...")
235        sys.exit(1)
236
237
238def process_kconfigs(elf, database):
239    """Process kconfigs to extract information"""
240    kconfigs = get_kconfig_symbols(elf)
241
242    # 32 or 64-bit target
243    database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32)
244
245    # Architecture
246    for name, arch in dictionary_parser.log_database.ARCHS.items():
247        if arch['kconfig'] in kconfigs:
248            database.set_arch(name)
249            break
250    else:
251        logger.error("Did not found architecture")
252        sys.exit(1)
253
254    # Put some kconfigs into the database
255    #
256    # Use 32-bit timestamp? or 64-bit?
257    if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs:
258        database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
259
260
261def extract_logging_subsys_information(elf, database, string_mappings):
262    """
263    Extract logging subsys related information and store in database.
264
265    For example, this extracts the list of log instances to establish
266    mapping from source ID to name.
267    """
268    # Extract log constant section for module names
269    section_log_const = find_elf_sections(elf, "log_const_area")
270    if section_log_const is None:
271        # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas
272        section_log_const = find_elf_sections(elf, "log_static_section")
273
274    if section_log_const is None:
275        logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...")
276        sys.exit(1)
277
278    # Find all "log_const_*" symbols and parse them
279    log_const_symbols = find_log_const_symbols(elf)
280    parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings)
281
282
283def is_die_attr_ref(attr):
284    """
285    Returns True if the DIE attribute is a reference.
286    """
287    return bool(
288        attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref')
289    )
290
291
292def find_die_var_base_type(compile_unit, die, is_const):
293    """
294    Finds the base type of a DIE and returns the name.
295    If DW_AT_type is a reference, it will recursively go through
296    the references to find the base type. Returns None is no
297    base type is found.
298    """
299    # DIE is of base type. So extract the name.
300    if die.tag == 'DW_TAG_base_type':
301        return die.attributes['DW_AT_name'].value.decode('ascii'), is_const
302
303    # Not a type, cannot continue
304    if 'DW_AT_type' not in die.attributes:
305        return None, None
306
307    if die.tag == 'DW_TAG_const_type':
308        is_const = True
309
310    # DIE is probably a reference to another.
311    # If so, check if the reference is a base type.
312    type_attr = die.attributes['DW_AT_type']
313    if is_die_attr_ref(type_attr):
314        ref_addr = compile_unit.cu_offset + type_attr.raw_value
315        ref_die = compile_unit.get_DIE_from_refaddr(ref_addr)
316
317        return find_die_var_base_type(compile_unit, ref_die, is_const)
318
319    # Not a base type, and not reference
320    return None, None
321
322
323def is_die_var_const_char(compile_unit, die):
324    """
325    Returns True if DIE of type variable is const char.
326    """
327    var_type, is_const = find_die_var_base_type(compile_unit, die, False)
328
329    return bool(var_type is not None and var_type.endswith('char') and is_const)
330
331
332def extract_string_variables(elf):
333    """
334    Find all string variables (char) in all Compilation Units and
335    Debug information Entry (DIE) in ELF file.
336    """
337    dwarf_info = elf.get_dwarf_info()
338    loc_lists = dwarf_info.location_lists()
339    loc_parser = LocationParser(loc_lists)
340
341    strings = []
342
343    # Loop through all Compilation Units and
344    # Debug information Entry (DIE) to extract all string variables
345    for compile_unit in dwarf_info.iter_CUs():
346        for die in compile_unit.iter_DIEs():
347            # Only care about variables with location information
348            # and of type "char"
349            if die.tag == 'DW_TAG_variable' and (
350                'DW_AT_type' in die.attributes
351                and 'DW_AT_location' in die.attributes
352                and is_die_var_const_char(compile_unit, die)
353            ):
354                # Extract location information, which is
355                # its address in memory.
356                loc_attr = die.attributes['DW_AT_location']
357                if loc_parser.attribute_has_location(loc_attr, die.cu['version']):
358                    loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'], die)
359                    if isinstance(loc, LocationExpr):
360                        try:
361                            addr = describe_DWARF_expr(loc.loc_expr, dwarf_info.structs)
362
363                            matcher = DT_LOCATION_REGEX.match(addr)
364                            if matcher:
365                                addr = int(matcher.group(1), 16)
366                                if addr > 0:
367                                    strings.append(
368                                        {
369                                            'name': die.attributes['DW_AT_name'].value,
370                                            'addr': addr,
371                                            'die': die,
372                                        }
373                                    )
374                        except KeyError:
375                            pass
376
377    return strings
378
379
380def try_decode_string(str_maybe):
381    """Check if it is a printable string"""
382    for encoding in STR_ENCODINGS:
383        try:
384            return str_maybe.decode(encoding)
385        except UnicodeDecodeError:
386            pass
387
388    return None
389
390
391def is_printable(b):
392    # Check if string is printable according to Python
393    # since the parser (written in Python) will need to
394    # print the string.
395    #
396    # Note that '\r' and '\n' are not included in
397    # string.printable so they need to be checked separately.
398    return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)
399
400
401def extract_strings_in_one_section(section, str_mappings):
402    """Extract NULL-terminated strings in one ELF section"""
403    data = section['data']
404    start = None
405    for idx, x in enumerate(data):
406        if is_printable(chr(x)):
407            # Printable character, potential part of string
408            if start is None:
409                # Beginning of potential string
410                start = idx
411        elif x == 0:
412            # End of possible string
413            if start is not None:
414                # Found potential string
415                str_maybe = data[start:idx]
416                decoded_str = try_decode_string(str_maybe)
417
418                if decoded_str is not None:
419                    addr = section['start'] + start
420
421                    if addr not in str_mappings:
422                        str_mappings[addr] = decoded_str
423
424                        # Decoded string may contain un-printable characters
425                        # (e.g. extended ASC-II characters) or control
426                        # characters (e.g. '\r' or '\n'), so simply print
427                        # the byte string instead.
428                        logger.debug(
429                            'Found string via extraction at ' + PTR_FMT + ': %s', addr, str_maybe
430                        )
431
432                        # GCC-based toolchain will reuse the NULL character
433                        # for empty strings. There is no way to know which
434                        # one is being reused, so just treat all NULL character
435                        # at the end of legitimate strings as empty strings.
436                        null_addr = section['start'] + idx
437                        str_mappings[null_addr] = ''
438
439                        logger.debug('Found null string via extraction at ' + PTR_FMT, null_addr)
440                start = None
441        else:
442            # Non-printable byte, remove start location
443            start = None
444
445    return str_mappings
446
447
448def extract_static_strings(elf, database, section_extraction=False):
449    """
450    Extract static strings from ELF file using DWARF,
451    and also extraction from binary data.
452    """
453    string_mappings = {}
454
455    elf_sections = extract_elf_code_data_sections(elf, REMOVED_STRING_SECTIONS)
456
457    # Extract strings using ELF DWARF information
458    str_vars = extract_string_variables(elf)
459    for str_var in str_vars:
460        for _, sect in elf_sections.items():
461            one_str = extract_one_string_in_section(sect, str_var['addr'])
462            if one_str is not None:
463                string_mappings[str_var['addr']] = one_str
464                logger.debug(
465                    'Found string variable at ' + PTR_FMT + ': %s', str_var['addr'], one_str
466                )
467                break
468
469    if section_extraction:
470        # Extract strings from ELF sections
471        string_sections = STATIC_STRING_SECTIONS
472        rawstr_map = {}
473
474        # Some architectures may put static strings into additional sections.
475        # So need to extract them too.
476        arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()]
477        if "extra_string_section" in arch_data:
478            string_sections.extend(arch_data['extra_string_section'])
479
480        for sect_name in string_sections:
481            if sect_name in elf_sections:
482                rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], rawstr_map)
483
484        for one_str in rawstr_map:
485            if one_str not in string_mappings:
486                string_mappings[one_str] = rawstr_map[one_str]
487
488    return string_mappings
489
490
491def main():
492    """Main function of database generator"""
493    args = parse_args()
494
495    # Setup logging
496    logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING)
497    if args.debug:
498        logger.setLevel(logging.DEBUG)
499    elif args.verbose:
500        logger.setLevel(logging.INFO)
501
502    elffile = open(args.elffile, "rb")  # noqa: SIM115
503    if not elffile:
504        logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile)
505        sys.exit(1)
506
507    logger.info("ELF file %s", args.elffile)
508
509    if args.json:
510        logger.info("JSON Database file %s", args.json)
511        section_extraction = True
512
513    if args.syst:
514        logger.info("MIPI Sys-T Collateral file %s", args.syst)
515        section_extraction = False
516
517    elf = ELFFile(elffile)
518
519    database = LogDatabase()
520
521    if args.build_header:
522        with open(args.build_header) as f:
523            for line in f:
524                match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', line)
525                if match:
526                    database.set_build_id(match.group(1))
527                    break
528
529    if args.build:
530        database.set_build_id(args.build)
531        logger.info("Build ID: %s", args.build)
532
533    extract_elf_information(elf, database)
534
535    process_kconfigs(elf, database)
536
537    logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
538    if database.is_tgt_little_endian():
539        logger.info("Endianness: Little")
540    else:
541        logger.info("Endianness: Big")
542
543    if database.is_tgt_64bit():
544        global PTR_FMT
545        PTR_FMT = '0x%016x'
546
547    # Extract strings from ELF files
548    string_mappings = extract_static_strings(elf, database, section_extraction)
549    if len(string_mappings) > 0:
550        database.set_string_mappings(string_mappings)
551        logger.info("Found %d strings", len(string_mappings))
552
553    # Extract information related to logging subsystem
554    if not section_extraction:
555        # The logging subsys information (e.g. log module names)
556        # may require additional strings outside of those extracted
557        # via ELF DWARF variables. So generate a new string mappings
558        # with strings in various ELF sections.
559        string_mappings = extract_static_strings(elf, database, section_extraction=True)
560
561    extract_logging_subsys_information(elf, database, string_mappings)
562
563    # Write database file
564    if args.json and not LogDatabase.write_json_database(args.json, database):
565        logger.error("ERROR: Cannot open database file for write: %s, exiting...", args.json)
566        sys.exit(1)
567
568    if args.syst and not LogDatabase.write_syst_database(args.syst, database):
569        logger.error("ERROR: Cannot open database file for write: %s, exiting...", args.syst)
570        sys.exit(1)
571
572    elffile.close()
573
574
575if __name__ == "__main__":
576    main()
577