1#!/usr/bin/env python3
2#
3# Copyright (c) 2021 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Parser Version 1
9
10This contains the implementation of the parser for
11version 1 databases.
12"""
13
14import logging
15import math
16import struct
17import colorama
18from colorama import Fore
19
20from .log_parser import (LogParser, get_log_level_str_color, formalize_fmt_string)
21from .data_types import DataTypes
22
23
24HEX_BYTES_IN_LINE = 16
25
26# Need to keep sync with struct log_dict_output_msg_hdr in
27# include/logging/log_output_dict.h.
28#
29# struct log_dict_output_normal_msg_hdr_t {
30#     uint8_t type;
31#     uint32_t domain:3;
32#     uint32_t level:3;
33#     uint32_t package_len:10;
34#     uint32_t data_len:12;
35#     uintptr_t source;
36#     log_timestamp_t timestamp;
37# } __packed;
38#
39# Note "type" and "timestamp" are encoded separately below.
40FMT_MSG_HDR_32 = "II"
41FMT_MSG_HDR_64 = "IQ"
42
43# Message type
44# 0: normal message
45# 1: number of dropped messages
46FMT_MSG_TYPE = "B"
47
48# Depends on CONFIG_LOG_TIMESTAMP_64BIT
49FMT_MSG_TIMESTAMP_32 = "I"
50FMT_MSG_TIMESTAMP_64 = "Q"
51
52# Keep message types in sync with include/logging/log_output_dict.h
53MSG_TYPE_NORMAL = 0
54MSG_TYPE_DROPPED = 1
55
56# Number of dropped messages
57FMT_DROPPED_CNT = "H"
58
59
60logger = logging.getLogger("parser")
61
62
63class LogParserV1(LogParser):
64    """Log Parser V1"""
65    def __init__(self, database):
66        super().__init__(database=database)
67
68        if self.database.is_tgt_little_endian():
69            endian = "<"
70        else:
71            endian = ">"
72
73        self.fmt_msg_type = endian + FMT_MSG_TYPE
74        self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
75
76        if self.database.is_tgt_64bit():
77            self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
78        else:
79            self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
80
81        if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
82            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
83        else:
84            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
85
86
87    def __get_string(self, arg, arg_offset, string_tbl):
88        one_str = self.database.find_string(arg)
89        if one_str is not None:
90            ret = one_str
91        else:
92            # The index from the string table is basically
93            # the order in va_list. Need to add to the index
94            # to skip the packaged string header and
95            # the format string.
96            str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
97            str_idx /= self.data_types.get_sizeof(DataTypes.INT)
98
99            if int(str_idx) not in string_tbl:
100                ret = "<string@0x{0:x}>".format(arg)
101            else:
102                ret = string_tbl[int(str_idx)]
103
104        return ret
105
106
107    def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
108        """Parse the format string to extract arguments from
109        the binary arglist and return a tuple usable with
110        Python's string formatting"""
111        idx = 0
112        arg_offset = 0
113        arg_data_type = None
114        is_parsing = False
115        do_extract = False
116
117        args = []
118
119        # Translated from cbvprintf_package()
120        for idx, fmt in enumerate(fmt_str):
121            if not is_parsing:
122                if fmt == '%':
123                    is_parsing = True
124                    arg_data_type = DataTypes.INT
125                    continue
126
127            elif fmt == '%':
128                # '%%' -> literal percentage sign
129                is_parsing = False
130                continue
131
132            elif fmt == '*':
133                pass
134
135            elif fmt.isdecimal() or str.lower(fmt) == 'l' \
136                or fmt in (' ', '#', '-', '+', '.', 'h'):
137                # formatting modifiers, just ignore
138                continue
139
140            elif fmt in ('j', 'z', 't'):
141                # intmax_t, size_t or ptrdiff_t
142                arg_data_type = DataTypes.LONG
143
144            elif fmt in ('c', 'd', 'i', 'o', 'u', 'x', 'X'):
145                unsigned = fmt in ('c', 'o', 'u', 'x', 'X')
146
147                if fmt_str[idx - 1] == 'l':
148                    if fmt_str[idx - 2] == 'l':
149                        arg_data_type = DataTypes.ULONG_LONG if unsigned else DataTypes.LONG_LONG
150                    else:
151                        arg_data_type = DataTypes.ULONG if unsigned else DataTypes.LONG
152                else:
153                    arg_data_type = DataTypes.UINT if unsigned else DataTypes.INT
154
155                is_parsing = False
156                do_extract = True
157
158            elif fmt in ('s', 'p', 'n'):
159                arg_data_type = DataTypes.PTR
160
161                is_parsing = False
162                do_extract = True
163
164            elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
165                # Python doesn't do"long double".
166                #
167                # Parse it as double (probably incorrect), but
168                # still have to skip enough bytes.
169                if fmt_str[idx - 1] == 'L':
170                    arg_data_type = DataTypes.LONG_DOUBLE
171                else:
172                    arg_data_type = DataTypes.DOUBLE
173
174                is_parsing = False
175                do_extract = True
176
177            else:
178                is_parsing = False
179                continue
180
181            if do_extract:
182                do_extract = False
183
184                align = self.data_types.get_alignment(arg_data_type)
185                size = self.data_types.get_sizeof(arg_data_type)
186                unpack_fmt = self.data_types.get_formatter(arg_data_type)
187
188                # Align the argument list by rounding up
189                stack_align = self.data_types.get_stack_alignment(arg_data_type)
190                if stack_align > 1:
191                    arg_offset = int((arg_offset + (align - 1)) / align) * align
192
193                one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
194
195                if fmt == 's':
196                    one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
197
198                args.append(one_arg)
199                arg_offset += size
200
201                # Align the offset
202                if stack_align > 1:
203                    arg_offset = int((arg_offset + align - 1) / align) * align
204
205        return tuple(args)
206
207
208    @staticmethod
209    def extract_string_table(str_tbl):
210        """Extract string table in a packaged log message"""
211        tbl = {}
212
213        one_str = ""
214        next_new_string = True
215        # Translated from cbvprintf_package()
216        for one_ch in str_tbl:
217            if next_new_string:
218                str_idx = one_ch
219                next_new_string = False
220                continue
221
222            if one_ch == 0:
223                tbl[str_idx] = one_str
224                one_str = ""
225                next_new_string = True
226                continue
227
228            one_str += chr(one_ch)
229
230        return tbl
231
232
233    @staticmethod
234    def print_hexdump(hex_data, prefix_len, color):
235        """Print hex dump"""
236        hex_vals = ""
237        chr_vals = ""
238        chr_done = 0
239
240        for one_hex in hex_data:
241            hex_vals += "%x " % one_hex
242            chr_vals += chr(one_hex)
243            chr_done += 1
244
245            if chr_done == HEX_BYTES_IN_LINE / 2:
246                hex_vals += " "
247                chr_vals += " "
248
249            elif chr_done == HEX_BYTES_IN_LINE:
250                print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
251                      hex_vals, chr_vals))
252                hex_vals = ""
253                chr_vals = ""
254                chr_done = 0
255
256        if len(chr_vals) > 0:
257            hex_padding = "   " * (HEX_BYTES_IN_LINE - chr_done)
258            print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
259                  hex_vals, hex_padding, chr_vals))
260
261
262    def parse_one_normal_msg(self, logdata, offset):
263        """Parse one normal log message and print the encoded message"""
264        # Parse log message header
265        log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
266        offset += struct.calcsize(self.fmt_msg_hdr)
267
268        timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
269        offset += struct.calcsize(self.fmt_msg_timestamp)
270
271        # domain_id, level, pkg_len, data_len
272        domain_id = log_desc & 0x07
273        level = (log_desc >> 3) & 0x07
274        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
275        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
276
277        level_str, color = get_log_level_str_color(level)
278        source_id_str = self.database.get_log_source_string(domain_id, source_id)
279
280        # Skip over data to point to next message (save as return value)
281        next_msg_offset = offset + pkg_len + data_len
282
283        # Offset from beginning of cbprintf_packaged data to end of va_list arguments
284        offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
285        offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
286        offset_end_of_args += offset
287
288        # Extra data after packaged log
289        extra_data = logdata[(offset + pkg_len):next_msg_offset]
290
291        # Number of appended strings in package
292        num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0]
293
294        # Number of read-only string indexes
295        num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0]
296        offset_end_of_args += num_ro_str_indexes
297
298        # Number of read-write string indexes
299        num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0]
300        offset_end_of_args += num_rw_str_indexes
301
302        # Extract the string table in the packaged log message
303        string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)])
304
305        if len(string_tbl) != num_packed_strings:
306            logger.error("------ Error extracting string table")
307            return None
308
309        # Skip packaged string header
310        offset += self.data_types.get_sizeof(DataTypes.PTR)
311
312        # Grab the format string
313        #
314        # Note the negative offset to __get_string(). It is because
315        # the offset begins at 0 for va_list. However, the format string
316        # itself is before the va_list, so need to go back the width of
317        # a pointer.
318        fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR),
319                                         logdata, offset)[0]
320        fmt_str = self.__get_string(fmt_str_ptr,
321                                    -self.data_types.get_sizeof(DataTypes.PTR),
322                                    string_tbl)
323        offset += self.data_types.get_sizeof(DataTypes.PTR)
324
325        if not fmt_str:
326            logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
327            return None
328
329        args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
330
331        fmt_str = formalize_fmt_string(fmt_str)
332        log_msg = fmt_str % args
333
334        if level == 0:
335            print(f"{log_msg}", end='')
336        else:
337            log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
338            print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg))
339
340        if data_len > 0:
341            # Has hexdump data
342            self.print_hexdump(extra_data, len(log_prefix), color)
343
344        # Point to next message
345        return next_msg_offset
346
347
348    def parse_log_data(self, logdata, debug=False):
349        """Parse binary log data and print the encoded log messages"""
350        offset = 0
351
352        while offset < len(logdata):
353            # Get message type
354            msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
355            offset += struct.calcsize(self.fmt_msg_type)
356
357            if msg_type == MSG_TYPE_DROPPED:
358                num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)
359                offset += struct.calcsize(self.fmt_dropped_cnt)
360
361                print(f"--- {num_dropped} messages dropped ---")
362
363            elif msg_type == MSG_TYPE_NORMAL:
364                ret = self.parse_one_normal_msg(logdata, offset)
365                if ret is None:
366                    return False
367
368                offset = ret
369
370            else:
371                logger.error("------ Unknown message type: %s", msg_type)
372                return False
373
374        return True
375
376colorama.init()
377