1#!/usr/bin/env python3
2#
3# Copyright (c) 2020 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Database Generator
9
10This takes the built Zephyr ELF binary and produces a JSON database
11file for dictionary-based logging. This database is used together
12with the parser to decode binary log messages.
13"""
14
15import argparse
16import logging
17import os
18import re
19import string
20import struct
21import sys
22
23import dictionary_parser.log_database
24from dictionary_parser.log_database import LogDatabase
25from dictionary_parser.utils import extract_one_string_in_section
26from dictionary_parser.utils import find_string_in_mappings
27
28import elftools
29from elftools.elf.constants import SH_FLAGS
30from elftools.elf.elffile import ELFFile
31from elftools.elf.descriptions import describe_ei_data
32from elftools.elf.sections import SymbolTableSection
33from elftools.dwarf.descriptions import (
34    describe_DWARF_expr
35)
36from elftools.dwarf.locationlists import (
37    LocationExpr, LocationParser
38)
39
40
41LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
42logger = logging.getLogger(os.path.basename(sys.argv[0]))
43
44
45# Sections that contains static strings
46STATIC_STRING_SECTIONS = [
47    'rodata',
48    '.rodata',
49    'pinned.rodata',
50]
51
52
53# Regulation expression to match DWARF location
54DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)")
55
56
57# Format string for pointers (default for 32-bit pointers)
58PTR_FMT = '0x%08x'
59
60
61# Potential string encodings. Add as needed.
62STR_ENCODINGS = [
63    'ascii',
64    'iso-8859-1',
65]
66
67
68# List of acceptable escape character
69ACCEPTABLE_ESCAPE_CHARS = [
70    b'\r',
71    b'\n',
72]
73
74
75def parse_args():
76    """Parse command line arguments"""
77    argparser = argparse.ArgumentParser(allow_abbrev=False)
78
79    argparser.add_argument("elffile", help="Zephyr ELF binary")
80    argparser.add_argument("--build", help="Build ID")
81    argparser.add_argument("--build-header",
82                           help="Header file containing BUILD_VERSION define")
83    argparser.add_argument("--debug", action="store_true",
84                           help="Print extra debugging information")
85    argparser.add_argument("-v", "--verbose", action="store_true",
86                           help="Print more information")
87
88    outfile_grp = argparser.add_mutually_exclusive_group(required=True)
89    outfile_grp.add_argument("--json",
90                             help="Output Dictionary Logging Database file in JSON")
91    outfile_grp.add_argument("--syst",
92                             help="Output MIPI Sys-T Collateral XML file")
93
94    return argparser.parse_args()
95
96
97def extract_elf_code_data_sections(elf):
98    """Find all sections in ELF file"""
99    sections = {}
100
101    for sect in elf.iter_sections():
102        # Only Allocated sections with PROGBITS are included
103        # since they actually have code/data.
104        #
105        # On contrary, BSS is allocated but NOBITS.
106        if (
107            (sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC
108            and sect['sh_type'] == 'SHT_PROGBITS'
109        ):
110            sections[sect.name] = {
111                    'name'    : sect.name,
112                    'size'    : sect['sh_size'],
113                    'start'   : sect['sh_addr'],
114                    'end'     : sect['sh_addr'] + sect['sh_size'] - 1,
115                    'data'    : sect.data(),
116                }
117
118    return sections
119
120
121def find_elf_sections(elf, sh_name):
122    """Find all sections in ELF file"""
123    for section in elf.iter_sections():
124        if section.name == sh_name:
125            ret = {
126                'name'    : section.name,
127                'size'    : section['sh_size'],
128                'start'   : section['sh_addr'],
129                'end'     : section['sh_addr'] + section['sh_size'] - 1,
130                'data'    : section.data(),
131            }
132
133            return ret
134
135    return None
136
137
138def get_kconfig_symbols(elf):
139    """Get kconfig symbols from the ELF file"""
140    for section in elf.iter_sections():
141        if isinstance(section, SymbolTableSection):
142            return {sym.name: sym.entry.st_value
143                    for sym in section.iter_symbols()
144                       if sym.name.startswith("CONFIG_")}
145
146    raise LookupError("Could not find symbol table")
147
148
149def find_log_const_symbols(elf):
150    """Extract all "log_const_*" symbols from ELF file"""
151    symbol_tables = [s for s in elf.iter_sections()
152                     if isinstance(s, elftools.elf.sections.SymbolTableSection)]
153
154    ret_list = []
155
156    for section in symbol_tables:
157        if not isinstance(section, elftools.elf.sections.SymbolTableSection):
158            continue
159
160        if section['sh_entsize'] == 0:
161            continue
162
163        for symbol in section.iter_symbols():
164            if symbol.name.startswith("log_const_"):
165                ret_list.append(symbol)
166
167    return ret_list
168
169
170def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings):
171    """Find the log instances and map source IDs to names"""
172    if database.is_tgt_little_endian():
173        formatter = "<"
174    else:
175        formatter = ">"
176
177    if database.is_tgt_64bit():
178        # 64-bit pointer to string
179        formatter += "Q"
180    else:
181        # 32-bit pointer to string
182        formatter += "L"
183
184    # log instance level
185    formatter += "B"
186
187    datum_size = struct.calcsize(formatter)
188
189    # Get the address of first log instance
190    first_offset = log_const_symbols[0].entry['st_value']
191    for sym in log_const_symbols:
192        if sym.entry['st_value'] < first_offset:
193            first_offset = sym.entry['st_value']
194
195    first_offset -= log_const_area['start']
196
197    # find all log_const_*
198    for sym in log_const_symbols:
199        # Find data offset in log_const_area for this symbol
200        offset = sym.entry['st_value'] - log_const_area['start']
201
202        idx_s = offset
203        idx_e = offset + datum_size
204
205        datum = log_const_area['data'][idx_s:idx_e]
206
207        if len(datum) != datum_size:
208            # Not enough data to unpack
209            continue
210
211        str_ptr, level = struct.unpack(formatter, datum)
212
213        # Offset to rodata section for string
214        instance_name = find_string_in_mappings(string_mappings, str_ptr)
215        if instance_name is None:
216            instance_name = "unknown"
217
218        logger.info("Found Log Instance: %s, level: %d", instance_name, level)
219
220        # source ID is simply the element index in the log instance array
221        source_id = int((offset - first_offset) / sym.entry['st_size'])
222
223        database.add_log_instance(source_id, instance_name, level, sym.entry['st_value'])
224
225
226def extract_elf_information(elf, database):
227    """Extract information from ELF file and store in database"""
228    e_ident = elf.header['e_ident']
229    elf_data = describe_ei_data(e_ident['EI_DATA'])
230
231    if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']:
232        database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN)
233    elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']:
234        database.set_tgt_endianness(LogDatabase.BIG_ENDIAN)
235    else:
236        logger.error("Cannot determine endianness from ELF file, exiting...")
237        sys.exit(1)
238
239
240def process_kconfigs(elf, database):
241    """Process kconfigs to extract information"""
242    kconfigs = get_kconfig_symbols(elf)
243
244    # 32 or 64-bit target
245    database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32)
246
247    # Architecture
248    for name, arch in dictionary_parser.log_database.ARCHS.items():
249        if arch['kconfig'] in kconfigs:
250            database.set_arch(name)
251            break
252
253    # Put some kconfigs into the database
254    #
255    # Use 32-bit timestamp? or 64-bit?
256    if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs:
257        database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT",
258                             kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
259
260
261def extract_logging_subsys_information(elf, database, string_mappings):
262    """
263    Extract logging subsys related information and store in database.
264
265    For example, this extracts the list of log instances to establish
266    mapping from source ID to name.
267    """
268    # Extract log constant section for module names
269    section_log_const = find_elf_sections(elf, "log_const_area")
270    if section_log_const is None:
271        # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas
272        section_log_const = find_elf_sections(elf, "log_static_section")
273
274    if section_log_const is None:
275        logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...")
276        sys.exit(1)
277
278    # Find all "log_const_*" symbols and parse them
279    log_const_symbols = find_log_const_symbols(elf)
280    parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings)
281
282
283def is_die_attr_ref(attr):
284    """
285    Returns True if the DIE attribute is a reference.
286    """
287    return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2',
288                              'DW_FORM_ref4', 'DW_FORM_ref8',
289                              'DW_FORM_ref'))
290
291
292def find_die_var_base_type(compile_unit, die, is_const):
293    """
294    Finds the base type of a DIE and returns the name.
295    If DW_AT_type is a reference, it will recursively go through
296    the references to find the base type. Returns None is no
297    base type is found.
298    """
299    # DIE is of base type. So extract the name.
300    if die.tag == 'DW_TAG_base_type':
301        return die.attributes['DW_AT_name'].value.decode('ascii'), is_const
302
303    # Not a type, cannot continue
304    if not 'DW_AT_type' in die.attributes:
305        return None, None
306
307    if die.tag == 'DW_TAG_const_type':
308        is_const = True
309
310    # DIE is probably a reference to another.
311    # If so, check if the reference is a base type.
312    type_attr = die.attributes['DW_AT_type']
313    if is_die_attr_ref(type_attr):
314        ref_addr = compile_unit.cu_offset + type_attr.raw_value
315        ref_die = compile_unit.get_DIE_from_refaddr(ref_addr)
316
317        return find_die_var_base_type(compile_unit, ref_die, is_const)
318
319    # Not a base type, and not reference
320    return None, None
321
322
323def is_die_var_const_char(compile_unit, die):
324    """
325    Returns True if DIE of type variable is const char.
326    """
327    var_type, is_const = find_die_var_base_type(compile_unit, die, False)
328
329    if var_type is not None and var_type.endswith('char') and is_const:
330        return True
331
332    return False
333
334
335def extract_string_variables(elf):
336    """
337    Find all string variables (char) in all Compilation Units and
338    Debug information Entry (DIE) in ELF file.
339    """
340    dwarf_info = elf.get_dwarf_info()
341    loc_lists = dwarf_info.location_lists()
342    loc_parser = LocationParser(loc_lists)
343
344    strings = []
345
346    # Loop through all Compilation Units and
347    # Debug information Entry (DIE) to extract all string variables
348    for compile_unit in dwarf_info.iter_CUs():
349        for die in compile_unit.iter_DIEs():
350            # Only care about variables with location information
351            # and of type "char"
352            if die.tag == 'DW_TAG_variable':
353                if ('DW_AT_type' in die.attributes
354                    and 'DW_AT_location' in die.attributes
355                    and is_die_var_const_char(compile_unit, die)
356                ):
357                    # Extract location information, which is
358                    # its address in memory.
359                    loc_attr = die.attributes['DW_AT_location']
360                    if loc_parser.attribute_has_location(loc_attr, die.cu['version']):
361                        loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'])
362                        if isinstance(loc, LocationExpr):
363                            try:
364                                addr = describe_DWARF_expr(loc.loc_expr,
365                                                        dwarf_info.structs)
366
367                                matcher = DT_LOCATION_REGEX.match(addr)
368                                if matcher:
369                                    addr = int(matcher.group(1), 16)
370                                    if addr > 0:
371                                        strings.append({
372                                            'name': die.attributes['DW_AT_name'].value,
373                                            'addr': addr,
374                                            'die': die
375                                        })
376                            except KeyError:
377                                pass
378
379    return strings
380
381def try_decode_string(str_maybe):
382    """Check if it is a printable string"""
383    for encoding in STR_ENCODINGS:
384        try:
385            return str_maybe.decode(encoding)
386        except UnicodeDecodeError:
387            pass
388
389    return None
390
391def is_printable(b):
392    # Check if string is printable according to Python
393    # since the parser (written in Python) will need to
394    # print the string.
395    #
396    # Note that '\r' and '\n' are not included in
397    # string.printable so they need to be checked separately.
398    return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)
399
400def extract_strings_in_one_section(section, str_mappings):
401    """Extract NULL-terminated strings in one ELF section"""
402    data = section['data']
403    idx = 0
404    start = None
405    for x in data:
406        if is_printable(chr(x)):
407            # Printable character, potential part of string
408            if start is None:
409                # Beginning of potential string
410                start = idx
411        elif x == 0:
412            # End of possible string
413            if start is not None:
414                # Found potential string
415                str_maybe = data[start : idx]
416                decoded_str = try_decode_string(str_maybe)
417
418                if decoded_str is not None:
419                    addr = section['start'] + start
420
421                    if addr not in str_mappings:
422                        str_mappings[addr] = decoded_str
423
424                        # Decoded string may contain un-printable characters
425                        # (e.g. extended ASC-II characters) or control
426                        # characters (e.g. '\r' or '\n'), so simply print
427                        # the byte string instead.
428                        logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
429                                     addr, str_maybe)
430
431                        # GCC-based toolchain will reuse the NULL character
432                        # for empty strings. There is no way to know which
433                        # one is being reused, so just treat all NULL character
434                        # at the end of legitimate strings as empty strings.
435                        null_addr = section['start'] + idx
436                        str_mappings[null_addr] = ''
437
438                        logger.debug('Found null string via extraction at ' + PTR_FMT,
439                                     null_addr)
440                start = None
441        else:
442            # Non-printable byte, remove start location
443            start = None
444        idx += 1
445
446    return str_mappings
447
448
449def extract_static_strings(elf, database, section_extraction=False):
450    """
451    Extract static strings from ELF file using DWARF,
452    and also extraction from binary data.
453    """
454    string_mappings = {}
455
456    elf_sections = extract_elf_code_data_sections(elf)
457
458    # Extract strings using ELF DWARF information
459    str_vars = extract_string_variables(elf)
460    for str_var in str_vars:
461        for _, sect in elf_sections.items():
462            one_str = extract_one_string_in_section(sect, str_var['addr'])
463            if one_str is not None:
464                string_mappings[str_var['addr']] = one_str
465                logger.debug('Found string variable at ' + PTR_FMT + ': %s',
466                             str_var['addr'], one_str)
467                break
468
469    if section_extraction:
470        # Extract strings from ELF sections
471        string_sections = STATIC_STRING_SECTIONS
472        rawstr_map = {}
473
474        # Some architectures may put static strings into additional sections.
475        # So need to extract them too.
476        arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()]
477        if "extra_string_section" in arch_data:
478            string_sections.extend(arch_data['extra_string_section'])
479
480        for sect_name in string_sections:
481            if sect_name in elf_sections:
482                rawstr_map = extract_strings_in_one_section(elf_sections[sect_name],
483                                                                rawstr_map)
484
485        for one_str in rawstr_map:
486            if one_str not in string_mappings:
487                string_mappings[one_str] = rawstr_map[one_str]
488
489    return string_mappings
490
491
492def main():
493    """Main function of database generator"""
494    args = parse_args()
495
496    # Setup logging
497    logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING)
498    if args.debug:
499        logger.setLevel(logging.DEBUG)
500    elif args.verbose:
501        logger.setLevel(logging.INFO)
502
503    elffile = open(args.elffile, "rb")
504    if not elffile:
505        logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile)
506        sys.exit(1)
507
508    logger.info("ELF file %s", args.elffile)
509
510    if args.json:
511        logger.info("JSON Database file %s", args.json)
512        section_extraction = True
513
514    if args.syst:
515        logger.info("MIPI Sys-T Collateral file %s", args.syst)
516        section_extraction = False
517
518    elf = ELFFile(elffile)
519
520    database = LogDatabase()
521
522    if args.build_header:
523        with open(args.build_header) as f:
524            for l in f:
525                match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', l)
526                if match:
527                    database.set_build_id(match.group(1))
528                    break
529
530    if args.build:
531        database.set_build_id(args.build)
532        logger.info("Build ID: %s", args.build)
533
534    extract_elf_information(elf, database)
535
536    process_kconfigs(elf, database)
537
538    logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
539    if database.is_tgt_little_endian():
540        logger.info("Endianness: Little")
541    else:
542        logger.info("Endianness: Big")
543
544    if database.is_tgt_64bit():
545        global PTR_FMT
546        PTR_FMT = '0x%016x'
547
548    # Extract strings from ELF files
549    string_mappings = extract_static_strings(elf, database, section_extraction)
550    if len(string_mappings) > 0:
551        database.set_string_mappings(string_mappings)
552        logger.info("Found %d strings", len(string_mappings))
553
554    # Extract information related to logging subsystem
555    if not section_extraction:
556        # The logging subsys information (e.g. log module names)
557        # may require additional strings outside of those extracted
558        # via ELF DWARF variables. So generate a new string mappings
559        # with strings in various ELF sections.
560        string_mappings = extract_static_strings(elf, database, section_extraction=True)
561
562    extract_logging_subsys_information(elf, database, string_mappings)
563
564    # Write database file
565    if args.json:
566        if not LogDatabase.write_json_database(args.json, database):
567            logger.error("ERROR: Cannot open database file for write: %s, exiting...",
568                         args.json)
569            sys.exit(1)
570
571    if args.syst:
572        if not LogDatabase.write_syst_database(args.syst, database):
573            logger.error("ERROR: Cannot open database file for write: %s, exiting...",
574                         args.syst)
575            sys.exit(1)
576
577    elffile.close()
578
579
580if __name__ == "__main__":
581    main()
582