1#!/usr/bin/env python3
2#
3# Copyright (c) 2021 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Parser Version 1
9
10This contains the implementation of the parser for
11version 1 databases.
12"""
13
14import logging
15import math
16import struct
17
18import colorama
19from colorama import Fore
20
21from .data_types import DataTypes
22from .log_parser import LogParser, formalize_fmt_string, get_log_level_str_color
23
24HEX_BYTES_IN_LINE = 16
25
26# Need to keep sync with struct log_dict_output_msg_hdr in
27# include/logging/log_output_dict.h.
28#
29# struct log_dict_output_normal_msg_hdr_t {
30#     uint8_t type;
31#     uint32_t domain:3;
32#     uint32_t level:3;
33#     uint32_t package_len:10;
34#     uint32_t data_len:12;
35#     uintptr_t source;
36#     log_timestamp_t timestamp;
37# } __packed;
38#
39# Note "type" and "timestamp" are encoded separately below.
40FMT_MSG_HDR_32 = "II"
41FMT_MSG_HDR_64 = "IQ"
42
43# Message type
44# 0: normal message
45# 1: number of dropped messages
46FMT_MSG_TYPE = "B"
47
48# Depends on CONFIG_LOG_TIMESTAMP_64BIT
49FMT_MSG_TIMESTAMP_32 = "I"
50FMT_MSG_TIMESTAMP_64 = "Q"
51
52# Keep message types in sync with include/logging/log_output_dict.h
53MSG_TYPE_NORMAL = 0
54MSG_TYPE_DROPPED = 1
55
56# Number of dropped messages
57FMT_DROPPED_CNT = "H"
58
59
60logger = logging.getLogger("parser")
61
62
63class LogParserV1(LogParser):
64    """Log Parser V1"""
65
66    def __init__(self, database):
67        super().__init__(database=database)
68
69        if self.database.is_tgt_little_endian():
70            endian = "<"
71        else:
72            endian = ">"
73
74        self.fmt_msg_type = endian + FMT_MSG_TYPE
75        self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
76
77        if self.database.is_tgt_64bit():
78            self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
79        else:
80            self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
81
82        if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
83            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
84        else:
85            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
86
87    def __get_string(self, arg, arg_offset, string_tbl):
88        one_str = self.database.find_string(arg)
89        if one_str is not None:
90            ret = one_str
91        else:
92            # The index from the string table is basically
93            # the order in va_list. Need to add to the index
94            # to skip the packaged string header and
95            # the format string.
96            str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
97            str_idx /= self.data_types.get_sizeof(DataTypes.INT)
98
99            ret = string_tbl.get(int(str_idx), f"<string@0x{arg:x}>")
100
101        return ret
102
103    def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
104        """Parse the format string to extract arguments from
105        the binary arglist and return a tuple usable with
106        Python's string formatting"""
107        idx = 0
108        arg_offset = 0
109        arg_data_type = None
110        is_parsing = False
111        do_extract = False
112
113        args = []
114
115        # Translated from cbvprintf_package()
116        for idx, fmt in enumerate(fmt_str):
117            if not is_parsing:
118                if fmt == '%':
119                    is_parsing = True
120                    arg_data_type = DataTypes.INT
121                    continue
122
123            elif fmt == '%':
124                # '%%' -> literal percentage sign
125                is_parsing = False
126                continue
127
128            elif fmt == '*':
129                pass
130
131            elif fmt.isdecimal() or str.lower(fmt) == 'l' or fmt in (' ', '#', '-', '+', '.', 'h'):
132                # formatting modifiers, just ignore
133                continue
134
135            elif fmt in ('j', 'z', 't'):
136                # intmax_t, size_t or ptrdiff_t
137                arg_data_type = DataTypes.LONG
138
139            elif fmt in ('c', 'd', 'i', 'o', 'u', 'x', 'X'):
140                unsigned = fmt in ('c', 'o', 'u', 'x', 'X')
141
142                if fmt_str[idx - 1] == 'l':
143                    if fmt_str[idx - 2] == 'l':
144                        arg_data_type = DataTypes.ULONG_LONG if unsigned else DataTypes.LONG_LONG
145                    else:
146                        arg_data_type = DataTypes.ULONG if unsigned else DataTypes.LONG
147                else:
148                    arg_data_type = DataTypes.UINT if unsigned else DataTypes.INT
149
150                is_parsing = False
151                do_extract = True
152
153            elif fmt in ('s', 'p', 'n'):
154                arg_data_type = DataTypes.PTR
155
156                is_parsing = False
157                do_extract = True
158
159            elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
160                # Python doesn't do"long double".
161                #
162                # Parse it as double (probably incorrect), but
163                # still have to skip enough bytes.
164                if fmt_str[idx - 1] == 'L':
165                    arg_data_type = DataTypes.LONG_DOUBLE
166                else:
167                    arg_data_type = DataTypes.DOUBLE
168
169                is_parsing = False
170                do_extract = True
171
172            else:
173                is_parsing = False
174                continue
175
176            if do_extract:
177                do_extract = False
178
179                align = self.data_types.get_alignment(arg_data_type)
180                size = self.data_types.get_sizeof(arg_data_type)
181                unpack_fmt = self.data_types.get_formatter(arg_data_type)
182
183                # Align the argument list by rounding up
184                stack_align = self.data_types.get_stack_alignment(arg_data_type)
185                if stack_align > 1:
186                    arg_offset = int((arg_offset + (align - 1)) / align) * align
187
188                one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
189
190                if fmt == 's':
191                    one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
192
193                args.append(one_arg)
194                arg_offset += size
195
196                # Align the offset
197                if stack_align > 1:
198                    arg_offset = int((arg_offset + align - 1) / align) * align
199
200        return tuple(args)
201
202    @staticmethod
203    def extract_string_table(str_tbl):
204        """Extract string table in a packaged log message"""
205        tbl = {}
206
207        one_str = ""
208        next_new_string = True
209        # Translated from cbvprintf_package()
210        for one_ch in str_tbl:
211            if next_new_string:
212                str_idx = one_ch
213                next_new_string = False
214                continue
215
216            if one_ch == 0:
217                tbl[str_idx] = one_str
218                one_str = ""
219                next_new_string = True
220                continue
221
222            one_str += chr(one_ch)
223
224        return tbl
225
226    @staticmethod
227    def print_hexdump(hex_data, prefix_len, color):
228        """Print hex dump"""
229        hex_vals = ""
230        chr_vals = ""
231        chr_done = 0
232
233        for one_hex in hex_data:
234            hex_vals += f"{one_hex:x} "
235            chr_vals += chr(one_hex)
236            chr_done += 1
237
238            if chr_done == HEX_BYTES_IN_LINE / 2:
239                hex_vals += " "
240                chr_vals += " "
241
242            elif chr_done == HEX_BYTES_IN_LINE:
243                print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len), hex_vals, chr_vals))
244                hex_vals = ""
245                chr_vals = ""
246                chr_done = 0
247
248        if len(chr_vals) > 0:
249            hex_padding = "   " * (HEX_BYTES_IN_LINE - chr_done)
250            print(
251                f"{color}%s%s%s|%s{Fore.RESET}"
252                % ((" " * prefix_len), hex_vals, hex_padding, chr_vals)
253            )
254
255    def get_full_msg_hdr_size(self):
256        """Get the size of the full message header"""
257        return (
258            struct.calcsize(self.fmt_msg_type)
259            + struct.calcsize(self.fmt_msg_hdr)
260            + struct.calcsize(self.fmt_msg_timestamp)
261        )
262
263    def get_normal_msg_size(self, logdata, offset):
264        """Get the needed size of the normal log message at offset"""
265        log_desc, _ = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
266        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
267        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
268        return self.get_full_msg_hdr_size() + pkg_len + data_len
269
270    def parse_one_normal_msg(self, logdata, offset):
271        """Parse one normal log message and print the encoded message"""
272        # Parse log message header
273        log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
274        offset += struct.calcsize(self.fmt_msg_hdr)
275
276        timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
277        offset += struct.calcsize(self.fmt_msg_timestamp)
278
279        # domain_id, level, pkg_len, data_len
280        domain_id = log_desc & 0x07
281        level = (log_desc >> 3) & 0x07
282        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
283        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
284
285        level_str, color = get_log_level_str_color(level)
286        source_id_str = self.database.get_log_source_string(domain_id, source_id)
287
288        # Skip over data to point to next message (save as return value)
289        next_msg_offset = offset + pkg_len + data_len
290
291        # Offset from beginning of cbprintf_packaged data to end of va_list arguments
292        offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
293        offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
294        offset_end_of_args += offset
295
296        # Extra data after packaged log
297        extra_data = logdata[(offset + pkg_len) : next_msg_offset]
298
299        # Number of appended strings in package
300        num_packed_strings = struct.unpack_from("B", logdata, offset + 1)[0]
301
302        # Number of read-only string indexes
303        num_ro_str_indexes = struct.unpack_from("B", logdata, offset + 2)[0]
304        offset_end_of_args += num_ro_str_indexes
305
306        # Number of read-write string indexes
307        num_rw_str_indexes = struct.unpack_from("B", logdata, offset + 3)[0]
308        offset_end_of_args += num_rw_str_indexes
309
310        # Extract the string table in the packaged log message
311        string_tbl = self.extract_string_table(logdata[offset_end_of_args : (offset + pkg_len)])
312
313        if len(string_tbl) != num_packed_strings:
314            logger.error("------ Error extracting string table")
315            return None
316
317        # Skip packaged string header
318        offset += self.data_types.get_sizeof(DataTypes.PTR)
319
320        # Grab the format string
321        #
322        # Note the negative offset to __get_string(). It is because
323        # the offset begins at 0 for va_list. However, the format string
324        # itself is before the va_list, so need to go back the width of
325        # a pointer.
326        fmt_str_ptr = struct.unpack_from(
327            self.data_types.get_formatter(DataTypes.PTR), logdata, offset
328        )[0]
329        fmt_str = self.__get_string(
330            fmt_str_ptr, -self.data_types.get_sizeof(DataTypes.PTR), string_tbl
331        )
332        offset += self.data_types.get_sizeof(DataTypes.PTR)
333
334        if not fmt_str:
335            logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
336            return None
337
338        args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
339
340        fmt_str = formalize_fmt_string(fmt_str)
341        log_msg = fmt_str % args
342
343        if level == 0:
344            print(f"{log_msg}", end='')
345        else:
346            log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
347            print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg))
348
349        if data_len > 0:
350            # Has hexdump data
351            self.print_hexdump(extra_data, len(log_prefix), color)
352
353        # Point to next message
354        return next_msg_offset
355
356    def parse_one_msg(self, logdata, offset):
357        if offset + struct.calcsize(self.fmt_msg_type) > len(logdata):
358            return False, offset
359
360        # Get message type
361        msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
362
363        if msg_type == MSG_TYPE_DROPPED:
364            if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata):
365                return False, offset
366            offset += struct.calcsize(self.fmt_msg_type)
367
368            num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)[0]
369            offset += struct.calcsize(self.fmt_dropped_cnt)
370
371            print(f"--- {num_dropped} messages dropped ---")
372
373        elif msg_type == MSG_TYPE_NORMAL:
374            if (offset + self.get_full_msg_hdr_size() > len(logdata)) or (
375                offset + self.get_normal_msg_size(logdata, offset) > len(logdata)
376            ):
377                return False, offset
378
379            offset += struct.calcsize(self.fmt_msg_type)
380
381            ret = self.parse_one_normal_msg(logdata, offset)
382            if ret is None:
383                raise ValueError("Error parsing normal log message")
384
385            offset = ret
386
387        else:
388            logger.error("------ Unknown message type: %s", msg_type)
389            raise ValueError(f"Unknown message type: {msg_type}")
390
391        return True, offset
392
393    def parse_log_data(self, logdata, debug=False):
394        """Parse binary log data and print the encoded log messages"""
395        offset = 0
396        still_parsing = True
397
398        while offset < len(logdata) and still_parsing:
399            still_parsing, offset = self.parse_one_msg(logdata, offset)
400
401        return offset
402
403
404colorama.init()
405