1#!/usr/bin/env python3
2#
3# Copyright (c) 2021 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Parser Version 1
9
10This contains the implementation of the parser for
11version 1 databases.
12"""
13
14import logging
15import math
16import struct
17import colorama
18from colorama import Fore
19
20from .log_parser import LogParser
21
22
23HEX_BYTES_IN_LINE = 16
24
25LOG_LEVELS = [
26    ('none', Fore.WHITE),
27    ('err', Fore.RED),
28    ('wrn', Fore.YELLOW),
29    ('inf', Fore.GREEN),
30    ('dbg', Fore.BLUE)
31]
32
33# Need to keep sync with struct log_dict_output_msg_hdr in
34# include/logging/log_output_dict.h.
35#
36# struct log_dict_output_normal_msg_hdr_t {
37#     uint8_t type;
38#     uint32_t domain:3;
39#     uint32_t level:3;
40#     uint32_t package_len:10;
41#     uint32_t data_len:12;
42#     uintptr_t source;
43#     log_timestamp_t timestamp;
44# } __packed;
45#
46# Note "type" and "timestamp" are encoded separately below.
47FMT_MSG_HDR_32 = "II"
48FMT_MSG_HDR_64 = "IQ"
49
50# Message type
51# 0: normal message
52# 1: number of dropped messages
53FMT_MSG_TYPE = "B"
54
55# Depends on CONFIG_LOG_TIMESTAMP_64BIT
56FMT_MSG_TIMESTAMP_32 = "I"
57FMT_MSG_TIMESTAMP_64 = "Q"
58
59# Keep message types in sync with include/logging/log_output_dict.h
60MSG_TYPE_NORMAL = 0
61MSG_TYPE_DROPPED = 1
62
63# Number of dropped messages
64FMT_DROPPED_CNT = "H"
65
66
67logger = logging.getLogger("parser")
68
69
70def get_log_level_str_color(lvl):
71    """Convert numeric log level to string"""
72    if lvl < 0 or lvl >= len(LOG_LEVELS):
73        return ("unk", Fore.WHITE)
74
75    return LOG_LEVELS[lvl]
76
77
78def formalize_fmt_string(fmt_str):
79    """Replace unsupported formatter"""
80    new_str = fmt_str
81
82    for spec in ['d', 'i', 'o', 'u', 'x', 'X']:
83        # Python doesn't support %ll for integer specifiers, so remove extra 'l'
84        new_str = new_str.replace("%ll" + spec, "%l" + spec)
85
86        # Python doesn't support %hh for integer specifiers, so remove extra 'h'
87        new_str = new_str.replace("%hh" + spec, "%h" + spec)
88
89    # No %p for pointer either, so use %x
90    new_str = new_str.replace("%p", "0x%x")
91
92    return new_str
93
94
95class DataTypes():
96    """Class regarding data types, their alignments and sizes"""
97    INT = 0
98    UINT = 1
99    LONG = 2
100    ULONG = 3
101    LONG_LONG = 4
102    ULONG_LONG = 5
103    PTR = 6
104    DOUBLE = 7
105    LONG_DOUBLE = 8
106    NUM_TYPES = 9
107
108    def __init__(self, database):
109        self.database = database
110        self.data_types = {}
111
112        if database.is_tgt_64bit():
113            self.add_data_type(self.LONG, "q")
114            self.add_data_type(self.LONG_LONG, "q")
115            self.add_data_type(self.PTR, "Q")
116        else:
117            self.add_data_type(self.LONG, "i")
118            self.add_data_type(self.LONG_LONG, "q")
119            self.add_data_type(self.PTR, "I")
120
121        self.add_data_type(self.INT, "i")
122        self.add_data_type(self.DOUBLE, "d")
123        self.add_data_type(self.LONG_DOUBLE, "d")
124
125
126    def add_data_type(self, data_type, fmt):
127        """Add one data type"""
128        if self.database.is_tgt_little_endian():
129            endianness = "<"
130        else:
131            endianness = ">"
132
133        formatter = endianness + fmt
134
135        self.data_types[data_type] = {}
136        self.data_types[data_type]['fmt'] = formatter
137
138        size = struct.calcsize(formatter)
139
140        if data_type == self.LONG_DOUBLE:
141            # Python doesn't have long double but we still
142            # need to skip correct number of bytes
143            size = 16
144
145        self.data_types[data_type]['sizeof'] = size
146
147        # Might need actual number for different architectures
148        # but these seem to work fine for now.
149        if self.database.is_tgt_64bit():
150            self.data_types[data_type]['align'] = 8
151        else:
152            self.data_types[data_type]['align'] = 4
153
154
155    def get_sizeof(self, data_type):
156        """Get sizeof() of a data type"""
157        return self.data_types[data_type]['sizeof']
158
159
160    def get_alignment(self, data_type):
161        """Get the alignment of a data type"""
162        return self.data_types[data_type]['align']
163
164
165    def get_formatter(self, data_type):
166        """Get the formatter for a data type"""
167        return self.data_types[data_type]['fmt']
168
169
170class LogParserV1(LogParser):
171    """Log Parser V1"""
172    def __init__(self, database):
173        super().__init__(database=database)
174
175        if self.database.is_tgt_little_endian():
176            endian = "<"
177        else:
178            endian = ">"
179
180        self.fmt_msg_type = endian + FMT_MSG_TYPE
181        self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
182
183        if self.database.is_tgt_64bit():
184            self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
185        else:
186            self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
187
188        if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
189            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
190        else:
191            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
192
193        self.data_types = DataTypes(self.database)
194
195
196    def __get_string(self, arg, arg_offset, string_tbl):
197        one_str = self.database.find_string(arg)
198        if one_str is not None:
199            ret = one_str
200        else:
201            # The index from the string table is basically
202            # the order in va_list. Need to add to the index
203            # to skip the packaged string header and
204            # the format string.
205            str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
206            str_idx /= self.data_types.get_sizeof(DataTypes.INT)
207
208            if int(str_idx) not in string_tbl:
209                ret = "<string@0x{0:x}>".format(arg)
210            else:
211                ret = string_tbl[int(str_idx)]
212
213        return ret
214
215
216    def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
217        """Parse the format string to extract arguments from
218        the binary arglist and return a tuple usable with
219        Python's string formatting"""
220        idx = 0
221        arg_offset = 0
222        is_parsing = False
223        do_extract = False
224
225        args = []
226
227        # Translated from cbvprintf_package()
228        for idx, fmt in enumerate(fmt_str):
229            if not is_parsing:
230                if fmt == '%':
231                    is_parsing = True
232                    arg_data_type = DataTypes.INT
233                    continue
234
235            elif fmt == '%':
236                # '%%' -> literal percentage sign
237                is_parsing = False
238                continue
239
240            elif fmt == '*':
241                pass
242
243            elif fmt.isdecimal() or str.lower(fmt) == 'l' \
244                or fmt in (' ', '#', '-', '+', '.', 'h'):
245                # formatting modifiers, just ignore
246                continue
247
248            elif fmt in ('j', 'z', 't'):
249                # intmax_t, size_t or ptrdiff_t
250                arg_data_type = DataTypes.LONG
251
252            elif fmt in ('c', 'd', 'i', 'o', 'u') or str.lower(fmt) == 'x':
253                if fmt_str[idx - 1] == 'l':
254                    if fmt_str[idx - 2] == 'l':
255                        arg_data_type = DataTypes.LONG_LONG
256                    else:
257                        arg_data_type = DataTypes.LONG
258                else:
259                    arg_data_type = DataTypes.INT
260
261                is_parsing = False
262                do_extract = True
263
264            elif fmt in ('s', 'p', 'n'):
265                arg_data_type = DataTypes.PTR
266
267                is_parsing = False
268                do_extract = True
269
270            elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
271                # Python doesn't do"long double".
272                #
273                # Parse it as double (probably incorrect), but
274                # still have to skip enough bytes.
275                if fmt_str[idx - 1] == 'L':
276                    arg_data_type = DataTypes.LONG_DOUBLE
277                else:
278                    arg_data_type = DataTypes.DOUBLE
279
280                is_parsing = False
281                do_extract = True
282
283            else:
284                is_parsing = False
285                continue
286
287            if do_extract:
288                do_extract = False
289
290                align = self.data_types.get_alignment(arg_data_type)
291                size = self.data_types.get_sizeof(arg_data_type)
292                unpack_fmt = self.data_types.get_formatter(arg_data_type)
293
294                one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
295
296                if fmt == 's':
297                    one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
298
299                args.append(one_arg)
300                arg_offset += size
301
302                # Align the offset
303                arg_offset = int((arg_offset + align - 1) / align) * align
304
305        return tuple(args)
306
307
308    @staticmethod
309    def extract_string_table(str_tbl):
310        """Extract string table in a packaged log message"""
311        tbl = {}
312
313        one_str = ""
314        next_new_string = True
315        # Translated from cbvprintf_package()
316        for one_ch in str_tbl:
317            if next_new_string:
318                str_idx = one_ch
319                next_new_string = False
320                continue
321
322            if one_ch == 0:
323                tbl[str_idx] = one_str
324                one_str = ""
325                next_new_string = True
326                continue
327
328            one_str += chr(one_ch)
329
330        return tbl
331
332
333    @staticmethod
334    def print_hexdump(hex_data, prefix_len, color):
335        """Print hex dump"""
336        hex_vals = ""
337        chr_vals = ""
338        chr_done = 0
339
340        for one_hex in hex_data:
341            hex_vals += "%x " % one_hex
342            chr_vals += chr(one_hex)
343            chr_done += 1
344
345            if chr_done == HEX_BYTES_IN_LINE / 2:
346                hex_vals += " "
347                chr_vals += " "
348
349            elif chr_done == HEX_BYTES_IN_LINE:
350                print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
351                      hex_vals, chr_vals))
352                hex_vals = ""
353                chr_vals = ""
354                chr_done = 0
355
356        if len(chr_vals) > 0:
357            hex_padding = "   " * (HEX_BYTES_IN_LINE - chr_done)
358            print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
359                  hex_vals, hex_padding, chr_vals))
360
361
362    def parse_one_normal_msg(self, logdata, offset):
363        """Parse one normal log message and print the encoded message"""
364        # Parse log message header
365        log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
366        offset += struct.calcsize(self.fmt_msg_hdr)
367
368        timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
369        offset += struct.calcsize(self.fmt_msg_timestamp)
370
371        # domain_id, level, pkg_len, data_len
372        domain_id = log_desc & 0x07
373        level = (log_desc >> 3) & 0x07
374        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
375        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
376
377        level_str, color = get_log_level_str_color(level)
378        source_id_str = self.database.get_log_source_string(domain_id, source_id)
379
380        # Skip over data to point to next message (save as return value)
381        next_msg_offset = offset + pkg_len + data_len
382
383        # Offset from beginning of cbprintf_packaged data to end of va_list arguments
384        offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
385        offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
386        offset_end_of_args += offset
387
388        # Extra data after packaged log
389        extra_data = logdata[(offset + pkg_len):next_msg_offset]
390
391        # Number of appended strings in package
392        num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0]
393
394        # Number of read-only string indexes
395        num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0]
396        offset_end_of_args += num_ro_str_indexes
397
398        # Number of read-write string indexes
399        num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0]
400        offset_end_of_args += num_rw_str_indexes
401
402        # Extract the string table in the packaged log message
403        string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)])
404
405        if len(string_tbl) != num_packed_strings:
406            logger.error("------ Error extracting string table")
407            return None
408
409        # Skip packaged string header
410        offset += self.data_types.get_sizeof(DataTypes.PTR)
411
412        # Grab the format string
413        #
414        # Note the negative offset to __get_string(). It is because
415        # the offset begins at 0 for va_list. However, the format string
416        # itself is before the va_list, so need to go back the width of
417        # a pointer.
418        fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR),
419                                         logdata, offset)[0]
420        fmt_str = self.__get_string(fmt_str_ptr,
421                                    -self.data_types.get_sizeof(DataTypes.PTR),
422                                    string_tbl)
423        offset += self.data_types.get_sizeof(DataTypes.PTR)
424
425        if not fmt_str:
426            logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
427            return None
428
429        args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
430
431        fmt_str = formalize_fmt_string(fmt_str)
432        log_msg = fmt_str % args
433
434        if level == 0:
435            print(f"{log_msg}", end='')
436        else:
437            log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
438            print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg))
439
440        if data_len > 0:
441            # Has hexdump data
442            self.print_hexdump(extra_data, len(log_prefix), color)
443
444        # Point to next message
445        return next_msg_offset
446
447
448    def parse_log_data(self, logdata, debug=False):
449        """Parse binary log data and print the encoded log messages"""
450        offset = 0
451
452        while offset < len(logdata):
453            # Get message type
454            msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
455            offset += struct.calcsize(self.fmt_msg_type)
456
457            if msg_type == MSG_TYPE_DROPPED:
458                num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)
459                offset += struct.calcsize(self.fmt_dropped_cnt)
460
461                print(f"--- {num_dropped} messages dropped ---")
462
463            elif msg_type == MSG_TYPE_NORMAL:
464                ret = self.parse_one_normal_msg(logdata, offset)
465                if ret is None:
466                    return False
467
468                offset = ret
469
470            else:
471                logger.error("------ Unknown message type: %s", msg_type)
472                return False
473
474        return True
475
476colorama.init()
477