1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15r"""Analyze function call stack from GDB or Renode
16
17See README for detail usage
18
19Example usage:
20
21python log_parser.py profile.txt --regex=gdb_regex.json --visualize --top=7
22
23* To add a title in the graph, use the optional argument --title to set it
24
25Example usage:
26
27python log_parser.py profile.txt --regex=gdb_regex.json \
28--visualize --top=7 --title=magic_wand
29
30"""
31
32from __future__ import absolute_import
33from __future__ import division
34from __future__ import print_function
35
36import argparse
37import collections
38import json
39import os
40import re
41import matplotlib.pyplot as plt
42
43
44def readlines(filename):
45  """
46  Arg:
47    filename(str):
48
49  Return:
50    (list of str):
51  """
52  with open(filename, "r") as f:
53    content = f.read().splitlines()
54
55  return content
56
57
58def writelines(data, filename):
59  # Write parsed log to file
60  with open(filename, "w") as f:
61    for line in data:
62      f.write(line + "\n")
63
64
65def load_regex_parser(filename):
66  """
67  Arg:
68    filename: string for the input json file containing regex
69  """
70  assert filename is not None
71
72  with open(filename, "r") as f:
73    content = json.load(f)
74
75  regex_parser = {}
76  for key, val in content.items():
77    if isinstance(val, list):
78      regexs = []
79      for pattern in val:
80        regexs.append(re.compile(pattern))
81
82      regex_parser[key] = regexs
83    else:
84      regex_parser[key] = re.compile(val)
85
86  return regex_parser
87
88
89def gdb_log_parser(data, output, re_file, ignore_list=None, full_trace=False):
90  """
91  Args:
92    data: list of strings of logs from GDB
93    output: string of output filename
94    re_file: path to the regex *.json file
95    ignore_list: list of string (functions) to ignore
96    full_trace: bool to generate full stack trace of the log
97  """
98  regex_parser = load_regex_parser(re_file)
99
100  trace = collections.defaultdict(list)
101  stack = []
102  processed = []
103  for line in data:
104    # Skip invalid lines
105    if not line.startswith("#"):
106      continue
107
108    # Skip redundant lines
109    if not full_trace and not line.startswith("#0"):
110      continue
111
112    # Remove ANSI color symbols
113    # line = ANSI_CLEANER.sub("", line)
114    line = regex_parser["base"].sub("", line)
115
116    # Extract function names with regex
117    find = None
118    for r in regex_parser["custom"]:
119      find = r.findall(line)
120
121      if len(find) != 0:
122        break
123
124    if find is None or len(find) == 0:
125      continue
126
127    # Extract content from `re.findall` results
128    target = find[0][0] if isinstance(find[0], tuple) else find[0]
129
130    # Extract function name from `$ADDR in $NAME`, e.g.
131    # `0x40002998 in __addsf3` -> `__addsf3`
132    if " in " in target:
133      target = target.split()[-1]
134
135    # Remove leading/trailing spaces
136    target = target.strip()
137
138    if full_trace:
139      if line.startswith("#0") and stack:
140        # Encode the trace to string
141        temp = "/".join(stack)
142        trace[stack[0]].append(temp)
143
144        # Clear up previous stack
145        stack.clear()
146
147      stack.append(target)
148
149    if not line.startswith("#0"):
150      continue
151
152    if ignore_list and target in ignore_list:
153      continue
154
155    # Strip the string before adding into parsed list
156    processed.append(target)
157
158  print("Extracted {} lines".format(len(processed)))
159
160  # Write parsed log to file
161  writelines(processed, output)
162
163  if full_trace:
164    content = {}
165    for top, paths in trace.items():
166      content[top] = []
167      counter = collections.Counter(paths)
168
169      for path, counts in counter.items():
170        info = {"counts": counts, "path": path.split("/")}
171        content[top].append(info)
172
173    name = os.path.splitext(output)[0]
174    with open(name + ".json", "w") as f:
175      json.dump(content, f, sort_keys=True, indent=4)
176
177  print("Parsed the log to `{}`".format(output))
178
179
180def renode_log_parser(data, output, ignore_list=None):
181  """
182  Args:
183    data: list of strings of logs from Renode
184    output: string of output filename
185    ignore_list: list of string (functions) to ignore
186  """
187  message = "Entering function"
188  extractor = re.compile(r"{} (.*) at".format(message))
189
190  ignore_count = 0
191  processed = []
192  for idx, line in enumerate(data):
193    print("Processing {:.2f}%".format((idx + 1) / len(data) * 100.), end="\r")
194
195    if message not in line:
196      continue
197
198    find = extractor.findall(line)
199
200    # Skip invalid find or unnamed functions
201    if len(find) == 0 or len(find[0].split()) == 0:
202      continue
203
204    entry = find[0].split()[0]
205
206    if ignore_list and entry in ignore_list:
207      ignore_count += 1
208      continue
209
210    processed.append(entry)
211
212  print("Extracted {} lines ({:.2f}%); {} lines are ignored ({:.2f}%)".format(
213      len(processed),
214      len(processed) / len(data) * 100., ignore_count,
215      ignore_count / len(data) * 100.))
216
217  # Write parsed log to file
218  writelines(processed, output)
219
220  print("Parsed the log to `{}`".format(output))
221
222
223def parse_log(filename,
224              output=None,
225              re_file=None,
226              source="gdb",
227              ignore=None,
228              full_trace=False):
229  """
230  Args:
231    filename(str)
232    output(str)
233  """
234  data = readlines(filename)
235  print("Raw log: {} lines".format(len(data)))
236
237  ignore_list = None
238  if ignore is not None:
239    ignore_list = set(readlines(ignore))
240    print("* {} patterns in the ignore list".format(len(ignore_list)))
241
242  name, ext = None, None
243  if output is None:
244    name, ext = os.path.splitext(filename)
245    output = "{}-parsed{}".format(name, ext)
246
247  if source == "gdb":
248    gdb_log_parser(data, output, re_file, ignore_list, full_trace)
249  elif source == "renode":
250    renode_log_parser(data, output, ignore_list=ignore_list)
251  else:
252    raise NotImplementedError
253
254
255def visualize_log(filename, top=None, title=None, show=False, save=True):
256  """
257  Arg:
258    filename(str)
259  """
260  data = readlines(filename)
261  print("Parsed log: {} lines".format(len(data)))
262
263  x, y = get_frequency(data)
264
265  if top is not None:
266    top *= -1
267    x, y = x[top:], y[top:]
268
269  plt.figure(figsize=(3, 5))
270  plt.barh(x, y)
271  plt.xlabel("Frequency")
272
273  if title:
274    plt.title(title)
275
276  if show:
277    plt.show()
278
279  if save:
280    fig_name = "{}.png".format(os.path.splitext(filename)[0])
281    plt.savefig(fname=fig_name, bbox_inches="tight", dpi=300)
282    print("Figure saved in {}".format(fig_name))
283
284
285def get_frequency(data):
286  """
287  Arg:
288    data(list of str):
289
290  Return:
291    keys(list of str):
292    vals(list of str):
293  """
294  counter = collections.Counter(data)
295
296  keys = [pair[0] for pair in sorted(counter.items(), key=lambda x: x[1])]
297  vals = sorted(counter.values())
298
299  return keys, vals
300
301
302if __name__ == "__main__":
303  parser = argparse.ArgumentParser()
304  parser.add_argument("input", type=str, help="Input raw log file.")
305  parser.add_argument("--output",
306                      type=str,
307                      help="Parsed log file. Default: [NAME]-parsed.[EXT]")
308  parser.add_argument("--regex",
309                      type=str,
310                      help="Path to the regex files for parsing GDB log.")
311  parser.add_argument("--visualize",
312                      action="store_true",
313                      help="Parse and visualize")
314  parser.add_argument("--top", type=int, help="Top # to visualize")
315  parser.add_argument("--source",
316                      type=str,
317                      default="gdb",
318                      choices=["gdb", "renode"],
319                      help="Source of where the log is captured")
320  parser.add_argument(
321      "--ignore",
322      type=str,
323      help="List of functions (one for each line in the file) to \
324                  ignore after parsing.")
325  parser.add_argument("--full-trace", action="store_true", help="")
326  parser.add_argument("--title",
327                      type=str,
328                      help="Set title for the visualized image")
329
330  args = parser.parse_args()
331
332  if args.output is None:
333    fname, extension = os.path.splitext(args.input)
334    args.output = "{}-parsed{}".format(fname, extension)
335
336  parse_log(args.input, args.output, args.regex, args.source, args.ignore,
337            args.full_trace)
338
339  if args.visualize:
340    visualize_log(args.output, top=args.top, title=args.title)
341