1# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15r"""Analyze function call stack from GDB or Renode 16 17See README for detail usage 18 19Example usage: 20 21python log_parser.py profile.txt --regex=gdb_regex.json --visualize --top=7 22 23* To add a title in the graph, use the optional argument --title to set it 24 25Example usage: 26 27python log_parser.py profile.txt --regex=gdb_regex.json \ 28--visualize --top=7 --title=magic_wand 29 30""" 31 32from __future__ import absolute_import 33from __future__ import division 34from __future__ import print_function 35 36import argparse 37import collections 38import json 39import os 40import re 41import matplotlib.pyplot as plt 42 43 44def readlines(filename): 45 """ 46 Arg: 47 filename(str): 48 49 Return: 50 (list of str): 51 """ 52 with open(filename, "r") as f: 53 content = f.read().splitlines() 54 55 return content 56 57 58def writelines(data, filename): 59 # Write parsed log to file 60 with open(filename, "w") as f: 61 for line in data: 62 f.write(line + "\n") 63 64 65def load_regex_parser(filename): 66 """ 67 Arg: 68 filename: string for the input json file containing regex 69 """ 70 assert filename is not None 71 72 with open(filename, "r") as f: 73 content = json.load(f) 74 75 regex_parser = {} 76 for key, val in content.items(): 77 if isinstance(val, list): 78 regexs = [] 79 for pattern in val: 80 regexs.append(re.compile(pattern)) 81 82 regex_parser[key] = regexs 83 else: 84 regex_parser[key] = re.compile(val) 85 86 return regex_parser 87 88 89def gdb_log_parser(data, output, re_file, ignore_list=None, full_trace=False): 90 """ 91 Args: 92 data: list of strings of logs from GDB 93 output: string of output filename 94 re_file: path to the regex *.json file 95 ignore_list: list of string (functions) to ignore 96 full_trace: bool to generate full stack trace of the log 97 """ 98 regex_parser = load_regex_parser(re_file) 99 100 trace = collections.defaultdict(list) 101 stack = [] 102 processed = [] 103 for line in data: 104 # Skip invalid lines 105 if not line.startswith("#"): 106 continue 107 108 # Skip redundant lines 109 if not full_trace and not line.startswith("#0"): 110 continue 111 112 # Remove ANSI color symbols 113 # line = ANSI_CLEANER.sub("", line) 114 line = regex_parser["base"].sub("", line) 115 116 # Extract function names with regex 117 find = None 118 for r in regex_parser["custom"]: 119 find = r.findall(line) 120 121 if len(find) != 0: 122 break 123 124 if find is None or len(find) == 0: 125 continue 126 127 # Extract content from `re.findall` results 128 target = find[0][0] if isinstance(find[0], tuple) else find[0] 129 130 # Extract function name from `$ADDR in $NAME`, e.g. 131 # `0x40002998 in __addsf3` -> `__addsf3` 132 if " in " in target: 133 target = target.split()[-1] 134 135 # Remove leading/trailing spaces 136 target = target.strip() 137 138 if full_trace: 139 if line.startswith("#0") and stack: 140 # Encode the trace to string 141 temp = "/".join(stack) 142 trace[stack[0]].append(temp) 143 144 # Clear up previous stack 145 stack.clear() 146 147 stack.append(target) 148 149 if not line.startswith("#0"): 150 continue 151 152 if ignore_list and target in ignore_list: 153 continue 154 155 # Strip the string before adding into parsed list 156 processed.append(target) 157 158 print("Extracted {} lines".format(len(processed))) 159 160 # Write parsed log to file 161 writelines(processed, output) 162 163 if full_trace: 164 content = {} 165 for top, paths in trace.items(): 166 content[top] = [] 167 counter = collections.Counter(paths) 168 169 for path, counts in counter.items(): 170 info = {"counts": counts, "path": path.split("/")} 171 content[top].append(info) 172 173 name = os.path.splitext(output)[0] 174 with open(name + ".json", "w") as f: 175 json.dump(content, f, sort_keys=True, indent=4) 176 177 print("Parsed the log to `{}`".format(output)) 178 179 180def renode_log_parser(data, output, ignore_list=None): 181 """ 182 Args: 183 data: list of strings of logs from Renode 184 output: string of output filename 185 ignore_list: list of string (functions) to ignore 186 """ 187 message = "Entering function" 188 extractor = re.compile(r"{} (.*) at".format(message)) 189 190 ignore_count = 0 191 processed = [] 192 for idx, line in enumerate(data): 193 print("Processing {:.2f}%".format((idx + 1) / len(data) * 100.), end="\r") 194 195 if message not in line: 196 continue 197 198 find = extractor.findall(line) 199 200 # Skip invalid find or unnamed functions 201 if len(find) == 0 or len(find[0].split()) == 0: 202 continue 203 204 entry = find[0].split()[0] 205 206 if ignore_list and entry in ignore_list: 207 ignore_count += 1 208 continue 209 210 processed.append(entry) 211 212 print("Extracted {} lines ({:.2f}%); {} lines are ignored ({:.2f}%)".format( 213 len(processed), 214 len(processed) / len(data) * 100., ignore_count, 215 ignore_count / len(data) * 100.)) 216 217 # Write parsed log to file 218 writelines(processed, output) 219 220 print("Parsed the log to `{}`".format(output)) 221 222 223def parse_log(filename, 224 output=None, 225 re_file=None, 226 source="gdb", 227 ignore=None, 228 full_trace=False): 229 """ 230 Args: 231 filename(str) 232 output(str) 233 """ 234 data = readlines(filename) 235 print("Raw log: {} lines".format(len(data))) 236 237 ignore_list = None 238 if ignore is not None: 239 ignore_list = set(readlines(ignore)) 240 print("* {} patterns in the ignore list".format(len(ignore_list))) 241 242 name, ext = None, None 243 if output is None: 244 name, ext = os.path.splitext(filename) 245 output = "{}-parsed{}".format(name, ext) 246 247 if source == "gdb": 248 gdb_log_parser(data, output, re_file, ignore_list, full_trace) 249 elif source == "renode": 250 renode_log_parser(data, output, ignore_list=ignore_list) 251 else: 252 raise NotImplementedError 253 254 255def visualize_log(filename, top=None, title=None, show=False, save=True): 256 """ 257 Arg: 258 filename(str) 259 """ 260 data = readlines(filename) 261 print("Parsed log: {} lines".format(len(data))) 262 263 x, y = get_frequency(data) 264 265 if top is not None: 266 top *= -1 267 x, y = x[top:], y[top:] 268 269 plt.figure(figsize=(3, 5)) 270 plt.barh(x, y) 271 plt.xlabel("Frequency") 272 273 if title: 274 plt.title(title) 275 276 if show: 277 plt.show() 278 279 if save: 280 fig_name = "{}.png".format(os.path.splitext(filename)[0]) 281 plt.savefig(fname=fig_name, bbox_inches="tight", dpi=300) 282 print("Figure saved in {}".format(fig_name)) 283 284 285def get_frequency(data): 286 """ 287 Arg: 288 data(list of str): 289 290 Return: 291 keys(list of str): 292 vals(list of str): 293 """ 294 counter = collections.Counter(data) 295 296 keys = [pair[0] for pair in sorted(counter.items(), key=lambda x: x[1])] 297 vals = sorted(counter.values()) 298 299 return keys, vals 300 301 302if __name__ == "__main__": 303 parser = argparse.ArgumentParser() 304 parser.add_argument("input", type=str, help="Input raw log file.") 305 parser.add_argument("--output", 306 type=str, 307 help="Parsed log file. Default: [NAME]-parsed.[EXT]") 308 parser.add_argument("--regex", 309 type=str, 310 help="Path to the regex files for parsing GDB log.") 311 parser.add_argument("--visualize", 312 action="store_true", 313 help="Parse and visualize") 314 parser.add_argument("--top", type=int, help="Top # to visualize") 315 parser.add_argument("--source", 316 type=str, 317 default="gdb", 318 choices=["gdb", "renode"], 319 help="Source of where the log is captured") 320 parser.add_argument( 321 "--ignore", 322 type=str, 323 help="List of functions (one for each line in the file) to \ 324 ignore after parsing.") 325 parser.add_argument("--full-trace", action="store_true", help="") 326 parser.add_argument("--title", 327 type=str, 328 help="Set title for the visualized image") 329 330 args = parser.parse_args() 331 332 if args.output is None: 333 fname, extension = os.path.splitext(args.input) 334 args.output = "{}-parsed{}".format(fname, extension) 335 336 parse_log(args.input, args.output, args.regex, args.source, args.ignore, 337 args.full_trace) 338 339 if args.visualize: 340 visualize_log(args.output, top=args.top, title=args.title) 341