1#!/usr/bin/env python3 2# 3# Copyright (c) 2021 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Parser Version 1 9 10This contains the implementation of the parser for 11version 1 databases. 12""" 13 14import logging 15import math 16import struct 17import colorama 18from colorama import Fore 19 20from .log_parser import LogParser 21 22 23HEX_BYTES_IN_LINE = 16 24 25LOG_LEVELS = [ 26 ('none', Fore.WHITE), 27 ('err', Fore.RED), 28 ('wrn', Fore.YELLOW), 29 ('inf', Fore.GREEN), 30 ('dbg', Fore.BLUE) 31] 32 33# Need to keep sync with struct log_dict_output_msg_hdr in 34# include/logging/log_output_dict.h. 35# 36# struct log_dict_output_normal_msg_hdr_t { 37# uint8_t type; 38# uint32_t domain:3; 39# uint32_t level:3; 40# uint32_t package_len:10; 41# uint32_t data_len:12; 42# uintptr_t source; 43# log_timestamp_t timestamp; 44# } __packed; 45# 46# Note "type" and "timestamp" are encoded separately below. 47FMT_MSG_HDR_32 = "II" 48FMT_MSG_HDR_64 = "IQ" 49 50# Message type 51# 0: normal message 52# 1: number of dropped messages 53FMT_MSG_TYPE = "B" 54 55# Depends on CONFIG_LOG_TIMESTAMP_64BIT 56FMT_MSG_TIMESTAMP_32 = "I" 57FMT_MSG_TIMESTAMP_64 = "Q" 58 59# Keep message types in sync with include/logging/log_output_dict.h 60MSG_TYPE_NORMAL = 0 61MSG_TYPE_DROPPED = 1 62 63# Number of dropped messages 64FMT_DROPPED_CNT = "H" 65 66 67logger = logging.getLogger("parser") 68 69 70def get_log_level_str_color(lvl): 71 """Convert numeric log level to string""" 72 if lvl < 0 or lvl >= len(LOG_LEVELS): 73 return ("unk", Fore.WHITE) 74 75 return LOG_LEVELS[lvl] 76 77 78def formalize_fmt_string(fmt_str): 79 """Replace unsupported formatter""" 80 new_str = fmt_str 81 82 for spec in ['d', 'i', 'o', 'u', 'x', 'X']: 83 # Python doesn't support %ll for integer specifiers, so remove extra 'l' 84 new_str = new_str.replace("%ll" + spec, "%l" + spec) 85 86 # Python doesn't support %hh for integer specifiers, so remove extra 'h' 87 new_str = new_str.replace("%hh" + spec, "%h" + spec) 88 89 # No %p for pointer either, so use %x 90 new_str = new_str.replace("%p", "0x%x") 91 92 return new_str 93 94 95class DataTypes(): 96 """Class regarding data types, their alignments and sizes""" 97 INT = 0 98 UINT = 1 99 LONG = 2 100 ULONG = 3 101 LONG_LONG = 4 102 ULONG_LONG = 5 103 PTR = 6 104 DOUBLE = 7 105 LONG_DOUBLE = 8 106 NUM_TYPES = 9 107 108 def __init__(self, database): 109 self.database = database 110 self.data_types = {} 111 112 if database.is_tgt_64bit(): 113 self.add_data_type(self.LONG, "q") 114 self.add_data_type(self.LONG_LONG, "q") 115 self.add_data_type(self.PTR, "Q") 116 else: 117 self.add_data_type(self.LONG, "i") 118 self.add_data_type(self.LONG_LONG, "q") 119 self.add_data_type(self.PTR, "I") 120 121 self.add_data_type(self.INT, "i") 122 self.add_data_type(self.DOUBLE, "d") 123 self.add_data_type(self.LONG_DOUBLE, "d") 124 125 126 def add_data_type(self, data_type, fmt): 127 """Add one data type""" 128 if self.database.is_tgt_little_endian(): 129 endianness = "<" 130 else: 131 endianness = ">" 132 133 formatter = endianness + fmt 134 135 self.data_types[data_type] = {} 136 self.data_types[data_type]['fmt'] = formatter 137 138 size = struct.calcsize(formatter) 139 140 if data_type == self.LONG_DOUBLE: 141 # Python doesn't have long double but we still 142 # need to skip correct number of bytes 143 size = 16 144 145 self.data_types[data_type]['sizeof'] = size 146 147 # Might need actual number for different architectures 148 # but these seem to work fine for now. 149 if self.database.is_tgt_64bit(): 150 self.data_types[data_type]['align'] = 8 151 else: 152 self.data_types[data_type]['align'] = 4 153 154 155 def get_sizeof(self, data_type): 156 """Get sizeof() of a data type""" 157 return self.data_types[data_type]['sizeof'] 158 159 160 def get_alignment(self, data_type): 161 """Get the alignment of a data type""" 162 return self.data_types[data_type]['align'] 163 164 165 def get_formatter(self, data_type): 166 """Get the formatter for a data type""" 167 return self.data_types[data_type]['fmt'] 168 169 170class LogParserV1(LogParser): 171 """Log Parser V1""" 172 def __init__(self, database): 173 super().__init__(database=database) 174 175 if self.database.is_tgt_little_endian(): 176 endian = "<" 177 else: 178 endian = ">" 179 180 self.fmt_msg_type = endian + FMT_MSG_TYPE 181 self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT 182 183 if self.database.is_tgt_64bit(): 184 self.fmt_msg_hdr = endian + FMT_MSG_HDR_64 185 else: 186 self.fmt_msg_hdr = endian + FMT_MSG_HDR_32 187 188 if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs(): 189 self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64 190 else: 191 self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32 192 193 self.data_types = DataTypes(self.database) 194 195 196 def __get_string(self, arg, arg_offset, string_tbl): 197 one_str = self.database.find_string(arg) 198 if one_str is not None: 199 ret = one_str 200 else: 201 # The index from the string table is basically 202 # the order in va_list. Need to add to the index 203 # to skip the packaged string header and 204 # the format string. 205 str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2 206 str_idx /= self.data_types.get_sizeof(DataTypes.INT) 207 208 if int(str_idx) not in string_tbl: 209 ret = "<string@0x{0:x}>".format(arg) 210 else: 211 ret = string_tbl[int(str_idx)] 212 213 return ret 214 215 216 def process_one_fmt_str(self, fmt_str, arg_list, string_tbl): 217 """Parse the format string to extract arguments from 218 the binary arglist and return a tuple usable with 219 Python's string formatting""" 220 idx = 0 221 arg_offset = 0 222 is_parsing = False 223 do_extract = False 224 225 args = [] 226 227 # Translated from cbvprintf_package() 228 for idx, fmt in enumerate(fmt_str): 229 if not is_parsing: 230 if fmt == '%': 231 is_parsing = True 232 arg_data_type = DataTypes.INT 233 continue 234 235 elif fmt == '%': 236 # '%%' -> literal percentage sign 237 is_parsing = False 238 continue 239 240 elif fmt == '*': 241 pass 242 243 elif fmt.isdecimal() or str.lower(fmt) == 'l' \ 244 or fmt in (' ', '#', '-', '+', '.', 'h'): 245 # formatting modifiers, just ignore 246 continue 247 248 elif fmt in ('j', 'z', 't'): 249 # intmax_t, size_t or ptrdiff_t 250 arg_data_type = DataTypes.LONG 251 252 elif fmt in ('c', 'd', 'i', 'o', 'u') or str.lower(fmt) == 'x': 253 if fmt_str[idx - 1] == 'l': 254 if fmt_str[idx - 2] == 'l': 255 arg_data_type = DataTypes.LONG_LONG 256 else: 257 arg_data_type = DataTypes.LONG 258 else: 259 arg_data_type = DataTypes.INT 260 261 is_parsing = False 262 do_extract = True 263 264 elif fmt in ('s', 'p', 'n'): 265 arg_data_type = DataTypes.PTR 266 267 is_parsing = False 268 do_extract = True 269 270 elif str.lower(fmt) in ('a', 'e', 'f', 'g'): 271 # Python doesn't do"long double". 272 # 273 # Parse it as double (probably incorrect), but 274 # still have to skip enough bytes. 275 if fmt_str[idx - 1] == 'L': 276 arg_data_type = DataTypes.LONG_DOUBLE 277 else: 278 arg_data_type = DataTypes.DOUBLE 279 280 is_parsing = False 281 do_extract = True 282 283 else: 284 is_parsing = False 285 continue 286 287 if do_extract: 288 do_extract = False 289 290 align = self.data_types.get_alignment(arg_data_type) 291 size = self.data_types.get_sizeof(arg_data_type) 292 unpack_fmt = self.data_types.get_formatter(arg_data_type) 293 294 one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0] 295 296 if fmt == 's': 297 one_arg = self.__get_string(one_arg, arg_offset, string_tbl) 298 299 args.append(one_arg) 300 arg_offset += size 301 302 # Align the offset 303 arg_offset = int((arg_offset + align - 1) / align) * align 304 305 return tuple(args) 306 307 308 @staticmethod 309 def extract_string_table(str_tbl): 310 """Extract string table in a packaged log message""" 311 tbl = {} 312 313 one_str = "" 314 next_new_string = True 315 # Translated from cbvprintf_package() 316 for one_ch in str_tbl: 317 if next_new_string: 318 str_idx = one_ch 319 next_new_string = False 320 continue 321 322 if one_ch == 0: 323 tbl[str_idx] = one_str 324 one_str = "" 325 next_new_string = True 326 continue 327 328 one_str += chr(one_ch) 329 330 return tbl 331 332 333 @staticmethod 334 def print_hexdump(hex_data, prefix_len, color): 335 """Print hex dump""" 336 hex_vals = "" 337 chr_vals = "" 338 chr_done = 0 339 340 for one_hex in hex_data: 341 hex_vals += "%x " % one_hex 342 chr_vals += chr(one_hex) 343 chr_done += 1 344 345 if chr_done == HEX_BYTES_IN_LINE / 2: 346 hex_vals += " " 347 chr_vals += " " 348 349 elif chr_done == HEX_BYTES_IN_LINE: 350 print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len), 351 hex_vals, chr_vals)) 352 hex_vals = "" 353 chr_vals = "" 354 chr_done = 0 355 356 if len(chr_vals) > 0: 357 hex_padding = " " * (HEX_BYTES_IN_LINE - chr_done) 358 print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len), 359 hex_vals, hex_padding, chr_vals)) 360 361 362 def parse_one_normal_msg(self, logdata, offset): 363 """Parse one normal log message and print the encoded message""" 364 # Parse log message header 365 log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset) 366 offset += struct.calcsize(self.fmt_msg_hdr) 367 368 timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0] 369 offset += struct.calcsize(self.fmt_msg_timestamp) 370 371 # domain_id, level, pkg_len, data_len 372 domain_id = log_desc & 0x07 373 level = (log_desc >> 3) & 0x07 374 pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1) 375 data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1) 376 377 level_str, color = get_log_level_str_color(level) 378 source_id_str = self.database.get_log_source_string(domain_id, source_id) 379 380 # Skip over data to point to next message (save as return value) 381 next_msg_offset = offset + pkg_len + data_len 382 383 # Offset from beginning of cbprintf_packaged data to end of va_list arguments 384 offset_end_of_args = struct.unpack_from("B", logdata, offset)[0] 385 offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT) 386 offset_end_of_args += offset 387 388 # Extra data after packaged log 389 extra_data = logdata[(offset + pkg_len):next_msg_offset] 390 391 # Number of appended strings in package 392 num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0] 393 394 # Number of read-only string indexes 395 num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0] 396 offset_end_of_args += num_ro_str_indexes 397 398 # Number of read-write string indexes 399 num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0] 400 offset_end_of_args += num_rw_str_indexes 401 402 # Extract the string table in the packaged log message 403 string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)]) 404 405 if len(string_tbl) != num_packed_strings: 406 logger.error("------ Error extracting string table") 407 return None 408 409 # Skip packaged string header 410 offset += self.data_types.get_sizeof(DataTypes.PTR) 411 412 # Grab the format string 413 # 414 # Note the negative offset to __get_string(). It is because 415 # the offset begins at 0 for va_list. However, the format string 416 # itself is before the va_list, so need to go back the width of 417 # a pointer. 418 fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR), 419 logdata, offset)[0] 420 fmt_str = self.__get_string(fmt_str_ptr, 421 -self.data_types.get_sizeof(DataTypes.PTR), 422 string_tbl) 423 offset += self.data_types.get_sizeof(DataTypes.PTR) 424 425 if not fmt_str: 426 logger.error("------ Error getting format string at 0x%x", fmt_str_ptr) 427 return None 428 429 args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl) 430 431 fmt_str = formalize_fmt_string(fmt_str) 432 log_msg = fmt_str % args 433 434 if level == 0: 435 print(f"{log_msg}", end='') 436 else: 437 log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: " 438 print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg)) 439 440 if data_len > 0: 441 # Has hexdump data 442 self.print_hexdump(extra_data, len(log_prefix), color) 443 444 # Point to next message 445 return next_msg_offset 446 447 448 def parse_log_data(self, logdata, debug=False): 449 """Parse binary log data and print the encoded log messages""" 450 offset = 0 451 452 while offset < len(logdata): 453 # Get message type 454 msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] 455 offset += struct.calcsize(self.fmt_msg_type) 456 457 if msg_type == MSG_TYPE_DROPPED: 458 num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) 459 offset += struct.calcsize(self.fmt_dropped_cnt) 460 461 print(f"--- {num_dropped} messages dropped ---") 462 463 elif msg_type == MSG_TYPE_NORMAL: 464 ret = self.parse_one_normal_msg(logdata, offset) 465 if ret is None: 466 return False 467 468 offset = ret 469 470 else: 471 logger.error("------ Unknown message type: %s", msg_type) 472 return False 473 474 return True 475 476colorama.init() 477