1#!/usr/bin/env python3
2#
3# Aggregate and report call-stack propagated block-device operations
4# from trace output.
5#
6# Example:
7# ./scripts/bench.py -ttrace
8# ./scripts/perfbd.py trace -j -Flfs.c -Flfs_util.c -Serased -Sproged -Sreaded
9#
10# Copyright (c) 2022, The littlefs authors.
11# SPDX-License-Identifier: BSD-3-Clause
12#
13
14import bisect
15import collections as co
16import csv
17import functools as ft
18import itertools as it
19import math as m
20import multiprocessing as mp
21import os
22import re
23import shlex
24import subprocess as sp
25
26
27OBJDUMP_PATH = ['objdump']
28THRESHOLD = (0.5, 0.85)
29
30
31# integer fields
32class Int(co.namedtuple('Int', 'x')):
33    __slots__ = ()
34    def __new__(cls, x=0):
35        if isinstance(x, Int):
36            return x
37        if isinstance(x, str):
38            try:
39                x = int(x, 0)
40            except ValueError:
41                # also accept +-∞ and +-inf
42                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
43                    x = m.inf
44                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
45                    x = -m.inf
46                else:
47                    raise
48        assert isinstance(x, int) or m.isinf(x), x
49        return super().__new__(cls, x)
50
51    def __str__(self):
52        if self.x == m.inf:
53            return '∞'
54        elif self.x == -m.inf:
55            return '-∞'
56        else:
57            return str(self.x)
58
59    def __int__(self):
60        assert not m.isinf(self.x)
61        return self.x
62
63    def __float__(self):
64        return float(self.x)
65
66    none = '%7s' % '-'
67    def table(self):
68        return '%7s' % (self,)
69
70    diff_none = '%7s' % '-'
71    diff_table = table
72
73    def diff_diff(self, other):
74        new = self.x if self else 0
75        old = other.x if other else 0
76        diff = new - old
77        if diff == +m.inf:
78            return '%7s' % '+∞'
79        elif diff == -m.inf:
80            return '%7s' % '-∞'
81        else:
82            return '%+7d' % diff
83
84    def ratio(self, other):
85        new = self.x if self else 0
86        old = other.x if other else 0
87        if m.isinf(new) and m.isinf(old):
88            return 0.0
89        elif m.isinf(new):
90            return +m.inf
91        elif m.isinf(old):
92            return -m.inf
93        elif not old and not new:
94            return 0.0
95        elif not old:
96            return 1.0
97        else:
98            return (new-old) / old
99
100    def __add__(self, other):
101        return self.__class__(self.x + other.x)
102
103    def __sub__(self, other):
104        return self.__class__(self.x - other.x)
105
106    def __mul__(self, other):
107        return self.__class__(self.x * other.x)
108
109# perf results
110class PerfBdResult(co.namedtuple('PerfBdResult', [
111        'file', 'function', 'line',
112        'readed', 'proged', 'erased',
113        'children'])):
114    _by = ['file', 'function', 'line']
115    _fields = ['readed', 'proged', 'erased']
116    _sort = ['erased', 'proged', 'readed']
117    _types = {'readed': Int, 'proged': Int, 'erased': Int}
118
119    __slots__ = ()
120    def __new__(cls, file='', function='', line=0,
121            readed=0, proged=0, erased=0,
122            children=[]):
123        return super().__new__(cls, file, function, int(Int(line)),
124            Int(readed), Int(proged), Int(erased),
125            children)
126
127    def __add__(self, other):
128        return PerfBdResult(self.file, self.function, self.line,
129            self.readed + other.readed,
130            self.proged + other.proged,
131            self.erased + other.erased,
132            self.children + other.children)
133
134
135def openio(path, mode='r', buffering=-1):
136    # allow '-' for stdin/stdout
137    if path == '-':
138        if mode == 'r':
139            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
140        else:
141            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
142    else:
143        return open(path, mode, buffering)
144
145def collect_syms_and_lines(obj_path, *,
146        objdump_path=None,
147        **args):
148    symbol_pattern = re.compile(
149        '^(?P<addr>[0-9a-fA-F]+)'
150            '\s+.*'
151            '\s+(?P<size>[0-9a-fA-F]+)'
152            '\s+(?P<name>[^\s]+)\s*$')
153    line_pattern = re.compile(
154        '^\s+(?:'
155            # matches dir/file table
156            '(?P<no>[0-9]+)'
157                '(?:\s+(?P<dir>[0-9]+))?'
158                '\s+.*'
159                '\s+(?P<path>[^\s]+)'
160            # matches line opcodes
161            '|' '\[[^\]]*\]\s+'
162                '(?:'
163                    '(?P<op_special>Special)'
164                    '|' '(?P<op_copy>Copy)'
165                    '|' '(?P<op_end>End of Sequence)'
166                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
167                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
168                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
169                    '|' '.' ')*'
170            ')$', re.IGNORECASE)
171
172    # figure out symbol addresses
173    syms = {}
174    sym_at = []
175    cmd = objdump_path + ['-t', obj_path]
176    if args.get('verbose'):
177        print(' '.join(shlex.quote(c) for c in cmd))
178    proc = sp.Popen(cmd,
179        stdout=sp.PIPE,
180        stderr=sp.PIPE if not args.get('verbose') else None,
181        universal_newlines=True,
182        errors='replace',
183        close_fds=False)
184    for line in proc.stdout:
185        m = symbol_pattern.match(line)
186        if m:
187            name = m.group('name')
188            addr = int(m.group('addr'), 16)
189            size = int(m.group('size'), 16)
190            # ignore zero-sized symbols
191            if not size:
192                continue
193            # note multiple symbols can share a name
194            if name not in syms:
195                syms[name] = set()
196            syms[name].add((addr, size))
197            sym_at.append((addr, name, size))
198    proc.wait()
199    if proc.returncode != 0:
200        if not args.get('verbose'):
201            for line in proc.stderr:
202                sys.stdout.write(line)
203        # assume no debug-info on failure
204        pass
205
206    # sort and keep largest/first when duplicates
207    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
208    sym_at_ = []
209    for addr, name, size in sym_at:
210        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
211            sym_at_.append((addr, name, size))
212    sym_at = sym_at_
213
214    # state machine for dwarf line numbers, note that objdump's
215    # decodedline seems to have issues with multiple dir/file
216    # tables, which is why we need this
217    lines = []
218    line_at = []
219    dirs = {}
220    files = {}
221    op_file = 1
222    op_line = 1
223    op_addr = 0
224    cmd = objdump_path + ['--dwarf=rawline', obj_path]
225    if args.get('verbose'):
226        print(' '.join(shlex.quote(c) for c in cmd))
227    proc = sp.Popen(cmd,
228        stdout=sp.PIPE,
229        stderr=sp.PIPE if not args.get('verbose') else None,
230        universal_newlines=True,
231        errors='replace',
232        close_fds=False)
233    for line in proc.stdout:
234        m = line_pattern.match(line)
235        if m:
236            if m.group('no') and not m.group('dir'):
237                # found a directory entry
238                dirs[int(m.group('no'))] = m.group('path')
239            elif m.group('no'):
240                # found a file entry
241                dir = int(m.group('dir'))
242                if dir in dirs:
243                    files[int(m.group('no'))] = os.path.join(
244                        dirs[dir],
245                        m.group('path'))
246                else:
247                    files[int(m.group('no'))] = m.group('path')
248            else:
249                # found a state machine update
250                if m.group('op_file'):
251                    op_file = int(m.group('op_file'), 0)
252                if m.group('op_line'):
253                    op_line = int(m.group('op_line'), 0)
254                if m.group('op_addr'):
255                    op_addr = int(m.group('op_addr'), 0)
256
257                if (m.group('op_special')
258                        or m.group('op_copy')
259                        or m.group('op_end')):
260                    file = os.path.abspath(files.get(op_file, '?'))
261                    lines.append((file, op_line, op_addr))
262                    line_at.append((op_addr, file, op_line))
263
264                if m.group('op_end'):
265                    op_file = 1
266                    op_line = 1
267                    op_addr = 0
268    proc.wait()
269    if proc.returncode != 0:
270        if not args.get('verbose'):
271            for line in proc.stderr:
272                sys.stdout.write(line)
273        # assume no debug-info on failure
274        pass
275
276    # sort and keep first when duplicates
277    lines.sort()
278    lines_ = []
279    for file, line, addr in lines:
280        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
281            lines_.append((file, line, addr))
282    lines = lines_
283
284    # sort and keep first when duplicates
285    line_at.sort()
286    line_at_ = []
287    for addr, file, line in line_at:
288        if len(line_at_) == 0 or line_at_[-1][0] != addr:
289            line_at_.append((addr, file, line))
290    line_at = line_at_
291
292    return syms, sym_at, lines, line_at
293
294
295def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
296        sources=None,
297        everything=False,
298        propagate=0,
299        depth=1,
300        **args):
301    trace_pattern = re.compile(
302        '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:\s*(?P<prefix>[^\s]*?bd_)(?:'
303            '(?P<read>read)\('
304                '\s*(?P<read_ctx>\w+)' '\s*,'
305                '\s*(?P<read_block>\w+)' '\s*,'
306                '\s*(?P<read_off>\w+)' '\s*,'
307                '\s*(?P<read_buffer>\w+)' '\s*,'
308                '\s*(?P<read_size>\w+)' '\s*\)'
309            '|' '(?P<prog>prog)\('
310                '\s*(?P<prog_ctx>\w+)' '\s*,'
311                '\s*(?P<prog_block>\w+)' '\s*,'
312                '\s*(?P<prog_off>\w+)' '\s*,'
313                '\s*(?P<prog_buffer>\w+)' '\s*,'
314                '\s*(?P<prog_size>\w+)' '\s*\)'
315            '|' '(?P<erase>erase)\('
316                '\s*(?P<erase_ctx>\w+)' '\s*,'
317                '\s*(?P<erase_block>\w+)'
318                '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)' ')\s*$')
319    frame_pattern = re.compile(
320        '^\s+at (?P<addr>\w+)\s*$')
321
322    # parse all of the trace files for read/prog/erase operations
323    last_filtered = False
324    last_file = None
325    last_line = None
326    last_sym = None
327    last_readed = 0
328    last_proged = 0
329    last_erased = 0
330    last_stack = []
331    last_delta = None
332    at_cache = {}
333    results = {}
334
335    def commit():
336        # fallback to just capturing top-level measurements
337        if not last_stack:
338            file = last_file
339            sym = last_sym
340            line = last_line
341
342            # ignore filtered sources
343            if sources is not None:
344                if not any(
345                        os.path.abspath(file)
346                            == os.path.abspath(s)
347                        for s in sources):
348                    return
349            else:
350                # default to only cwd
351                if not everything and not os.path.commonpath([
352                        os.getcwd(),
353                        os.path.abspath(file)]) == os.getcwd():
354                    return
355
356            # simplify path
357            if os.path.commonpath([
358                    os.getcwd(),
359                    os.path.abspath(file)]) == os.getcwd():
360                file = os.path.relpath(file)
361            else:
362                file = os.path.abspath(file)
363
364            results[(file, sym, line)] = (
365                last_readed,
366                last_proged,
367                last_erased,
368                {})
369        else:
370            # tail-recursively propagate measurements
371            for i in range(len(last_stack)):
372                results_ = results
373                for j in reversed(range(i+1)):
374                    if i+1-j > depth:
375                        break
376
377                    # propagate
378                    name = last_stack[j]
379                    if name in results_:
380                        r, p, e, children = results_[name]
381                    else:
382                        r, p, e, children = 0, 0, 0, {}
383                    results_[name] = (
384                        r+last_readed,
385                        p+last_proged,
386                        e+last_erased,
387                        children)
388
389                    # recurse
390                    results_ = results_[name][-1]
391
392    with openio(path) as f:
393        # try to jump to middle of file? need step out of utf8-safe mode and
394        # then resync up with the next newline to avoid parsing half a line
395        if start is not None and start > 0:
396            fd = f.fileno()
397            os.lseek(fd, start, os.SEEK_SET)
398            while os.read(fd, 1) not in {b'\n', b'\r', b''}:
399                pass
400            f = os.fdopen(fd)
401
402        for line in f:
403            # we have a lot of data, try to take a few shortcuts,
404            # string search is much faster than regex so try to use
405            # regex as late as possible.
406            if not line.startswith('\t'):
407                if last_filtered:
408                    commit()
409                last_filtered = False
410
411                # done processing our slice?
412                if stop is not None:
413                    if os.lseek(f.fileno(), 0, os.SEEK_CUR) > stop:
414                        break
415
416                if 'trace' in line and 'bd' in line:
417                    m = trace_pattern.match(line)
418                    if m:
419                        last_filtered = True
420                        last_file = os.path.abspath(m.group('file'))
421                        last_line = int(m.group('line'), 0)
422                        last_sym = m.group('prefix')
423                        last_readed = 0
424                        last_proged = 0
425                        last_erased = 0
426                        last_stack = []
427                        last_delta = None
428
429                        if m.group('read'):
430                            last_sym += m.group('read')
431                            last_readed += int(m.group('read_size'))
432                        elif m.group('prog'):
433                            last_sym += m.group('prog')
434                            last_proged += int(m.group('prog_size'))
435                        elif m.group('erase'):
436                            last_sym += m.group('erase')
437                            last_erased += int(m.group('erase_size'))
438
439            elif last_filtered:
440                m = frame_pattern.match(line)
441                if m:
442                    addr_ = int(m.group('addr'), 0)
443
444                    # before we can do anything with addr, we need to
445                    # reverse ASLR, fortunately we know the file+line of
446                    # the first stack frame, so we can use that as a point
447                    # of reference
448                    if last_delta is None:
449                        i = bisect.bisect(lines, (last_file, last_line),
450                            key=lambda x: (x[0], x[1]))
451                        if i > 0:
452                            last_delta = lines[i-1][2] - addr_
453                        else:
454                            # can't reverse ASLR, give up on backtrace
455                            commit()
456                            last_filtered = False
457                            continue
458
459                    addr = addr_ + last_delta
460
461                    # cached?
462                    if addr in at_cache:
463                        cached = at_cache[addr]
464                        if cached is None:
465                            # cache says to skip
466                            continue
467                        file, sym, line = cached
468                    else:
469                        # find sym
470                        i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
471                        # check that we're actually in the sym's size
472                        if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
473                            _, sym, _ = sym_at[i-1]
474                        else:
475                            sym = hex(addr)
476
477                        # filter out internal/unknown functions
478                        if not everything and (
479                                sym.startswith('__')
480                                or sym.startswith('0')
481                                or sym.startswith('-')
482                                or sym == '_start'):
483                            at_cache[addr] = None
484                            continue
485
486                        # find file+line
487                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
488                        if i > 0:
489                            _, file, line = line_at[i-1]
490                        elif len(last_stack) == 0:
491                            file, line = last_file, last_line
492                        else:
493                            file, line = re.sub('(\.o)?$', '.c', obj_path, 1), 0
494
495                        # ignore filtered sources
496                        if sources is not None:
497                            if not any(
498                                    os.path.abspath(file)
499                                        == os.path.abspath(s)
500                                    for s in sources):
501                                at_cache[addr] = None
502                                continue
503                        else:
504                            # default to only cwd
505                            if not everything and not os.path.commonpath([
506                                    os.getcwd(),
507                                    os.path.abspath(file)]) == os.getcwd():
508                                at_cache[addr] = None
509                                continue
510
511                        # simplify path
512                        if os.path.commonpath([
513                                os.getcwd(),
514                                os.path.abspath(file)]) == os.getcwd():
515                            file = os.path.relpath(file)
516                        else:
517                            file = os.path.abspath(file)
518
519                        at_cache[addr] = file, sym, line
520
521                    last_stack.append((file, sym, line))
522
523                    # stop propagating?
524                    if propagate and len(last_stack) >= propagate:
525                        commit()
526                        last_filtered = False
527        if last_filtered:
528            commit()
529
530    # rearrange results into result type
531    def to_results(results):
532        results_ = []
533        for name, (r, p, e, children) in results.items():
534            results_.append(PerfBdResult(*name,
535                r, p, e,
536                children=to_results(children)))
537        return results_
538
539    return to_results(results)
540
541def starapply(args):
542    f, args, kwargs = args
543    return f(*args, **kwargs)
544
545def collect(obj_path, trace_paths, *,
546        jobs=None,
547        **args):
548    # automatic job detection?
549    if jobs == 0:
550        jobs = len(os.sched_getaffinity(0))
551
552    # find sym/line info to reverse ASLR
553    syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
554
555    if jobs is not None:
556        # try to split up files so that even single files can be processed
557        # in parallel
558        #
559        # this looks naive, since we're splitting up text files by bytes, but
560        # we do proper backtrace delimination in collect_job
561        trace_ranges = []
562        for path in trace_paths:
563            if path == '-':
564                trace_ranges.append([(None, None)])
565                continue
566
567            size = os.path.getsize(path)
568            if size == 0:
569                trace_ranges.append([(None, None)])
570                continue
571
572            perjob = m.ceil(size // jobs)
573            trace_ranges.append([(i, i+perjob) for i in range(0, size, perjob)])
574
575        results = []
576        with mp.Pool(jobs) as p:
577            for results_ in p.imap_unordered(
578                    starapply,
579                    ((collect_job, (path, start, stop,
580                        syms, sym_at, lines, line_at),
581                        args)
582                        for path, ranges in zip(trace_paths, trace_ranges)
583                        for start, stop in ranges)):
584                results.extend(results_)
585
586    else:
587        results = []
588        for path in trace_paths:
589            results.extend(collect_job(path, None, None,
590                syms, sym_at, lines, line_at,
591                **args))
592
593    return results
594
595
596def fold(Result, results, *,
597        by=None,
598        defines=None,
599        **_):
600    if by is None:
601        by = Result._by
602
603    for k in it.chain(by or [], (k for k, _ in defines or [])):
604        if k not in Result._by and k not in Result._fields:
605            print("error: could not find field %r?" % k)
606            sys.exit(-1)
607
608    # filter by matching defines
609    if defines is not None:
610        results_ = []
611        for r in results:
612            if all(getattr(r, k) in vs for k, vs in defines):
613                results_.append(r)
614        results = results_
615
616    # organize results into conflicts
617    folding = co.OrderedDict()
618    for r in results:
619        name = tuple(getattr(r, k) for k in by)
620        if name not in folding:
621            folding[name] = []
622        folding[name].append(r)
623
624    # merge conflicts
625    folded = []
626    for name, rs in folding.items():
627        folded.append(sum(rs[1:], start=rs[0]))
628
629    # fold recursively
630    folded_ = []
631    for r in folded:
632        folded_.append(r._replace(children=fold(
633            Result, r.children,
634            by=by,
635            defines=defines)))
636    folded = folded_
637
638    return folded
639
640def table(Result, results, diff_results=None, *,
641        by=None,
642        fields=None,
643        sort=None,
644        summary=False,
645        all=False,
646        percent=False,
647        depth=1,
648        **_):
649    all_, all = all, __builtins__.all
650
651    if by is None:
652        by = Result._by
653    if fields is None:
654        fields = Result._fields
655    types = Result._types
656
657    # fold again
658    results = fold(Result, results, by=by)
659    if diff_results is not None:
660        diff_results = fold(Result, diff_results, by=by)
661
662    # organize by name
663    table = {
664        ','.join(str(getattr(r, k) or '') for k in by): r
665        for r in results}
666    diff_table = {
667        ','.join(str(getattr(r, k) or '') for k in by): r
668        for r in diff_results or []}
669    names = list(table.keys() | diff_table.keys())
670
671    # sort again, now with diff info, note that python's sort is stable
672    names.sort()
673    if diff_results is not None:
674        names.sort(key=lambda n: tuple(
675            types[k].ratio(
676                getattr(table.get(n), k, None),
677                getattr(diff_table.get(n), k, None))
678            for k in fields),
679            reverse=True)
680    if sort:
681        for k, reverse in reversed(sort):
682            names.sort(
683                key=lambda n: tuple(
684                    (getattr(table[n], k),)
685                    if getattr(table.get(n), k, None) is not None else ()
686                    for k in ([k] if k else [
687                        k for k in Result._sort if k in fields])),
688                reverse=reverse ^ (not k or k in Result._fields))
689
690
691    # build up our lines
692    lines = []
693
694    # header
695    header = []
696    header.append('%s%s' % (
697        ','.join(by),
698        ' (%d added, %d removed)' % (
699            sum(1 for n in table if n not in diff_table),
700            sum(1 for n in diff_table if n not in table))
701            if diff_results is not None and not percent else '')
702        if not summary else '')
703    if diff_results is None:
704        for k in fields:
705            header.append(k)
706    elif percent:
707        for k in fields:
708            header.append(k)
709    else:
710        for k in fields:
711            header.append('o'+k)
712        for k in fields:
713            header.append('n'+k)
714        for k in fields:
715            header.append('d'+k)
716    header.append('')
717    lines.append(header)
718
719    def table_entry(name, r, diff_r=None, ratios=[]):
720        entry = []
721        entry.append(name)
722        if diff_results is None:
723            for k in fields:
724                entry.append(getattr(r, k).table()
725                    if getattr(r, k, None) is not None
726                    else types[k].none)
727        elif percent:
728            for k in fields:
729                entry.append(getattr(r, k).diff_table()
730                    if getattr(r, k, None) is not None
731                    else types[k].diff_none)
732        else:
733            for k in fields:
734                entry.append(getattr(diff_r, k).diff_table()
735                    if getattr(diff_r, k, None) is not None
736                    else types[k].diff_none)
737            for k in fields:
738                entry.append(getattr(r, k).diff_table()
739                    if getattr(r, k, None) is not None
740                    else types[k].diff_none)
741            for k in fields:
742                entry.append(types[k].diff_diff(
743                        getattr(r, k, None),
744                        getattr(diff_r, k, None)))
745        if diff_results is None:
746            entry.append('')
747        elif percent:
748            entry.append(' (%s)' % ', '.join(
749                '+∞%' if t == +m.inf
750                else '-∞%' if t == -m.inf
751                else '%+.1f%%' % (100*t)
752                for t in ratios))
753        else:
754            entry.append(' (%s)' % ', '.join(
755                    '+∞%' if t == +m.inf
756                    else '-∞%' if t == -m.inf
757                    else '%+.1f%%' % (100*t)
758                    for t in ratios
759                    if t)
760                if any(ratios) else '')
761        return entry
762
763    # entries
764    if not summary:
765        for name in names:
766            r = table.get(name)
767            if diff_results is None:
768                diff_r = None
769                ratios = None
770            else:
771                diff_r = diff_table.get(name)
772                ratios = [
773                    types[k].ratio(
774                        getattr(r, k, None),
775                        getattr(diff_r, k, None))
776                    for k in fields]
777                if not all_ and not any(ratios):
778                    continue
779            lines.append(table_entry(name, r, diff_r, ratios))
780
781    # total
782    r = next(iter(fold(Result, results, by=[])), None)
783    if diff_results is None:
784        diff_r = None
785        ratios = None
786    else:
787        diff_r = next(iter(fold(Result, diff_results, by=[])), None)
788        ratios = [
789            types[k].ratio(
790                getattr(r, k, None),
791                getattr(diff_r, k, None))
792            for k in fields]
793    lines.append(table_entry('TOTAL', r, diff_r, ratios))
794
795    # find the best widths, note that column 0 contains the names and column -1
796    # the ratios, so those are handled a bit differently
797    widths = [
798        ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
799        for w, i in zip(
800            it.chain([23], it.repeat(7)),
801            range(len(lines[0])-1))]
802
803    # adjust the name width based on the expected call depth, though
804    # note this doesn't really work with unbounded recursion
805    if not summary and not m.isinf(depth):
806        widths[0] += 4*(depth-1)
807
808    # print the tree recursively
809    print('%-*s  %s%s' % (
810        widths[0], lines[0][0],
811        ' '.join('%*s' % (w, x)
812            for w, x in zip(widths[1:], lines[0][1:-1])),
813        lines[0][-1]))
814
815    if not summary:
816        def recurse(results_, depth_, prefixes=('', '', '', '')):
817            # rebuild our tables at each layer
818            table_ = {
819                ','.join(str(getattr(r, k) or '') for k in by): r
820                for r in results_}
821            names_ = list(table_.keys())
822
823            # sort again at each layer, keep in mind the numbers are
824            # changing as we descend
825            names_.sort()
826            if sort:
827                for k, reverse in reversed(sort):
828                    names_.sort(
829                        key=lambda n: tuple(
830                            (getattr(table_[n], k),)
831                            if getattr(table_.get(n), k, None) is not None
832                            else ()
833                            for k in ([k] if k else [
834                                k for k in Result._sort if k in fields])),
835                        reverse=reverse ^ (not k or k in Result._fields))
836
837            for i, name in enumerate(names_):
838                r = table_[name]
839                is_last = (i == len(names_)-1)
840
841                print('%s%-*s  %s' % (
842                    prefixes[0+is_last],
843                    widths[0] - (
844                        len(prefixes[0+is_last])
845                        if not m.isinf(depth) else 0),
846                    name,
847                    ' '.join('%*s' % (w, x)
848                        for w, x in zip(
849                            widths[1:],
850                            table_entry(name, r)[1:]))))
851
852                # recurse?
853                if depth_ > 1:
854                    recurse(
855                        r.children,
856                        depth_-1,
857                        (prefixes[2+is_last] + "|-> ",
858                         prefixes[2+is_last] + "'-> ",
859                         prefixes[2+is_last] + "|   ",
860                         prefixes[2+is_last] + "    "))
861
862        # we have enough going on with diffing to make the top layer
863        # a special case
864        for name, line in zip(names, lines[1:-1]):
865            print('%-*s  %s%s' % (
866                widths[0], line[0],
867                ' '.join('%*s' % (w, x)
868                    for w, x in zip(widths[1:], line[1:-1])),
869                line[-1]))
870
871            if name in table and depth > 1:
872                recurse(
873                    table[name].children,
874                    depth-1,
875                    ("|-> ",
876                     "'-> ",
877                     "|   ",
878                     "    "))
879
880    print('%-*s  %s%s' % (
881        widths[0], lines[-1][0],
882        ' '.join('%*s' % (w, x)
883            for w, x in zip(widths[1:], lines[-1][1:-1])),
884        lines[-1][-1]))
885
886
887def annotate(Result, results, *,
888        annotate=None,
889        threshold=None,
890        read_threshold=None,
891        prog_threshold=None,
892        erase_threshold=None,
893        **args):
894    # figure out the thresholds
895    if threshold is None:
896        threshold = THRESHOLD
897    elif len(threshold) == 1:
898        threshold = threshold[0], threshold[0]
899
900    if read_threshold is None:
901        read_t0, read_t1 = threshold
902    elif len(read_threshold) == 1:
903        read_t0, read_t1 = read_threshold[0], read_threshold[0]
904    else:
905        read_t0, read_t1 = read_threshold
906    read_t0, read_t1 = min(read_t0, read_t1), max(read_t0, read_t1)
907
908    if prog_threshold is None:
909        prog_t0, prog_t1 = threshold
910    elif len(prog_threshold) == 1:
911        prog_t0, prog_t1 = prog_threshold[0], prog_threshold[0]
912    else:
913        prog_t0, prog_t1 = prog_threshold
914    prog_t0, prog_t1 = min(prog_t0, prog_t1), max(prog_t0, prog_t1)
915
916    if erase_threshold is None:
917        erase_t0, erase_t1 = threshold
918    elif len(erase_threshold) == 1:
919        erase_t0, erase_t1 = erase_threshold[0], erase_threshold[0]
920    else:
921        erase_t0, erase_t1 = erase_threshold
922    erase_t0, erase_t1 = min(erase_t0, erase_t1), max(erase_t0, erase_t1)
923
924    # find maxs
925    max_readed = max(it.chain((float(r.readed) for r in results), [1]))
926    max_proged = max(it.chain((float(r.proged) for r in results), [1]))
927    max_erased = max(it.chain((float(r.erased) for r in results), [1]))
928
929    for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
930        # flatten to line info
931        results = fold(Result, results, by=['file', 'line'])
932        table = {r.line: r for r in results if r.file == path}
933
934        # calculate spans to show
935        if not annotate:
936            spans = []
937            last = None
938            func = None
939            for line, r in sorted(table.items()):
940                if (float(r.readed) / max_readed >= read_t0
941                        or float(r.proged) / max_proged >= prog_t0
942                        or float(r.erased) / max_erased >= erase_t0):
943                    if last is not None and line - last.stop <= args['context']:
944                        last = range(
945                            last.start,
946                            line+1+args['context'])
947                    else:
948                        if last is not None:
949                            spans.append((last, func))
950                        last = range(
951                            line-args['context'],
952                            line+1+args['context'])
953                        func = r.function
954            if last is not None:
955                spans.append((last, func))
956
957        with open(path) as f:
958            skipped = False
959            for i, line in enumerate(f):
960                # skip lines not in spans?
961                if not annotate and not any(i+1 in s for s, _ in spans):
962                    skipped = True
963                    continue
964
965                if skipped:
966                    skipped = False
967                    print('%s@@ %s:%d: %s @@%s' % (
968                        '\x1b[36m' if args['color'] else '',
969                        path,
970                        i+1,
971                        next(iter(f for _, f in spans)),
972                        '\x1b[m' if args['color'] else ''))
973
974                # build line
975                if line.endswith('\n'):
976                    line = line[:-1]
977
978                if i+1 in table:
979                    r = table[i+1]
980                    line = '%-*s // %s readed, %s proged, %s erased' % (
981                        args['width'],
982                        line,
983                        r.readed,
984                        r.proged,
985                        r.erased)
986
987                    if args['color']:
988                        if (float(r.readed) / max_readed >= read_t1
989                                or float(r.proged) / max_proged >= prog_t1
990                                or float(r.erased) / max_erased >= erase_t1):
991                            line = '\x1b[1;31m%s\x1b[m' % line
992                        elif (float(r.readed) / max_readed >= read_t0
993                                or float(r.proged) / max_proged >= prog_t0
994                                or float(r.erased) / max_erased >= erase_t0):
995                            line = '\x1b[35m%s\x1b[m' % line
996
997                print(line)
998
999
1000def report(obj_path='', trace_paths=[], *,
1001        by=None,
1002        fields=None,
1003        defines=None,
1004        sort=None,
1005        **args):
1006    # figure out what color should be
1007    if args.get('color') == 'auto':
1008        args['color'] = sys.stdout.isatty()
1009    elif args.get('color') == 'always':
1010        args['color'] = True
1011    else:
1012        args['color'] = False
1013
1014    # depth of 0 == m.inf
1015    if args.get('depth') == 0:
1016        args['depth'] = m.inf
1017
1018    # find sizes
1019    if not args.get('use', None):
1020        results = collect(obj_path, trace_paths, **args)
1021    else:
1022        results = []
1023        with openio(args['use']) as f:
1024            reader = csv.DictReader(f, restval='')
1025            for r in reader:
1026                if not any('perfbd_'+k in r and r['perfbd_'+k].strip()
1027                        for k in PerfBdResult._fields):
1028                    continue
1029                try:
1030                    results.append(PerfBdResult(
1031                        **{k: r[k] for k in PerfBdResult._by
1032                            if k in r and r[k].strip()},
1033                        **{k: r['perfbd_'+k] for k in PerfBdResult._fields
1034                            if 'perfbd_'+k in r and r['perfbd_'+k].strip()}))
1035                except TypeError:
1036                    pass
1037
1038    # fold
1039    results = fold(PerfBdResult, results, by=by, defines=defines)
1040
1041    # sort, note that python's sort is stable
1042    results.sort()
1043    if sort:
1044        for k, reverse in reversed(sort):
1045            results.sort(
1046                key=lambda r: tuple(
1047                    (getattr(r, k),) if getattr(r, k) is not None else ()
1048                    for k in ([k] if k else PerfBdResult._sort)),
1049                reverse=reverse ^ (not k or k in PerfBdResult._fields))
1050
1051    # write results to CSV
1052    if args.get('output'):
1053        with openio(args['output'], 'w') as f:
1054            writer = csv.DictWriter(f,
1055                (by if by is not None else PerfBdResult._by)
1056                + ['perfbd_'+k for k in (
1057                    fields if fields is not None else PerfBdResult._fields)])
1058            writer.writeheader()
1059            for r in results:
1060                writer.writerow(
1061                    {k: getattr(r, k) for k in (
1062                        by if by is not None else PerfBdResult._by)}
1063                    | {'perfbd_'+k: getattr(r, k) for k in (
1064                        fields if fields is not None else PerfBdResult._fields)})
1065
1066    # find previous results?
1067    if args.get('diff'):
1068        diff_results = []
1069        try:
1070            with openio(args['diff']) as f:
1071                reader = csv.DictReader(f, restval='')
1072                for r in reader:
1073                    if not any('perfbd_'+k in r and r['perfbd_'+k].strip()
1074                            for k in PerfBdResult._fields):
1075                        continue
1076                    try:
1077                        diff_results.append(PerfBdResult(
1078                            **{k: r[k] for k in PerfBdResult._by
1079                                if k in r and r[k].strip()},
1080                            **{k: r['perfbd_'+k] for k in PerfBdResult._fields
1081                                if 'perfbd_'+k in r
1082                                    and r['perfbd_'+k].strip()}))
1083                    except TypeError:
1084                        pass
1085        except FileNotFoundError:
1086            pass
1087
1088        # fold
1089        diff_results = fold(PerfBdResult, diff_results, by=by, defines=defines)
1090
1091    # print table
1092    if not args.get('quiet'):
1093        if (args.get('annotate')
1094                or args.get('threshold')
1095                or args.get('read_threshold')
1096                or args.get('prog_threshold')
1097                or args.get('erase_threshold')):
1098            # annotate sources
1099            annotate(PerfBdResult, results, **args)
1100        else:
1101            # print table
1102            table(PerfBdResult, results,
1103                diff_results if args.get('diff') else None,
1104                by=by if by is not None else ['function'],
1105                fields=fields,
1106                sort=sort,
1107                **args)
1108
1109
1110def main(**args):
1111    if args.get('record'):
1112        return record(**args)
1113    else:
1114        return report(**args)
1115
1116
1117if __name__ == "__main__":
1118    import argparse
1119    import sys
1120    parser = argparse.ArgumentParser(
1121        description="Aggregate and report call-stack propagated "
1122            "block-device operations from trace output.",
1123        allow_abbrev=False)
1124    parser.add_argument(
1125        'obj_path',
1126        nargs='?',
1127        help="Input executable for mapping addresses to symbols.")
1128    parser.add_argument(
1129        'trace_paths',
1130        nargs='*',
1131        help="Input *.trace files.")
1132    parser.add_argument(
1133        '-v', '--verbose',
1134        action='store_true',
1135        help="Output commands that run behind the scenes.")
1136    parser.add_argument(
1137        '-q', '--quiet',
1138        action='store_true',
1139        help="Don't show anything, useful with -o.")
1140    parser.add_argument(
1141        '-o', '--output',
1142        help="Specify CSV file to store results.")
1143    parser.add_argument(
1144        '-u', '--use',
1145        help="Don't parse anything, use this CSV file.")
1146    parser.add_argument(
1147        '-d', '--diff',
1148        help="Specify CSV file to diff against.")
1149    parser.add_argument(
1150        '-a', '--all',
1151        action='store_true',
1152        help="Show all, not just the ones that changed.")
1153    parser.add_argument(
1154        '-p', '--percent',
1155        action='store_true',
1156        help="Only show percentage change, not a full diff.")
1157    parser.add_argument(
1158        '-b', '--by',
1159        action='append',
1160        choices=PerfBdResult._by,
1161        help="Group by this field.")
1162    parser.add_argument(
1163        '-f', '--field',
1164        dest='fields',
1165        action='append',
1166        choices=PerfBdResult._fields,
1167        help="Show this field.")
1168    parser.add_argument(
1169        '-D', '--define',
1170        dest='defines',
1171        action='append',
1172        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
1173        help="Only include results where this field is this value.")
1174    class AppendSort(argparse.Action):
1175        def __call__(self, parser, namespace, value, option):
1176            if namespace.sort is None:
1177                namespace.sort = []
1178            namespace.sort.append((value, True if option == '-S' else False))
1179    parser.add_argument(
1180        '-s', '--sort',
1181        nargs='?',
1182        action=AppendSort,
1183        help="Sort by this field.")
1184    parser.add_argument(
1185        '-S', '--reverse-sort',
1186        nargs='?',
1187        action=AppendSort,
1188        help="Sort by this field, but backwards.")
1189    parser.add_argument(
1190        '-Y', '--summary',
1191        action='store_true',
1192        help="Only show the total.")
1193    parser.add_argument(
1194        '-F', '--source',
1195        dest='sources',
1196        action='append',
1197        help="Only consider definitions in this file. Defaults to anything "
1198            "in the current directory.")
1199    parser.add_argument(
1200        '--everything',
1201        action='store_true',
1202        help="Include builtin and libc specific symbols.")
1203    parser.add_argument(
1204        '-P', '--propagate',
1205        type=lambda x: int(x, 0),
1206        help="Depth to propagate samples up the call-stack. 0 propagates up "
1207            "to the entry point, 1 does no propagation. Defaults to 0.")
1208    parser.add_argument(
1209        '-Z', '--depth',
1210        nargs='?',
1211        type=lambda x: int(x, 0),
1212        const=0,
1213        help="Depth of function calls to show. 0 shows all calls but may not "
1214            "terminate!")
1215    parser.add_argument(
1216        '-A', '--annotate',
1217        action='store_true',
1218        help="Show source files annotated with coverage info.")
1219    parser.add_argument(
1220        '-T', '--threshold',
1221        nargs='?',
1222        type=lambda x: tuple(float(x) for x in x.split(',')),
1223        const=THRESHOLD,
1224        help="Show lines with any ops above this threshold as a percent of "
1225            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
1226    parser.add_argument(
1227        '--read-threshold',
1228        nargs='?',
1229        type=lambda x: tuple(float(x) for x in x.split(',')),
1230        const=THRESHOLD,
1231        help="Show lines with reads above this threshold as a percent of "
1232            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
1233    parser.add_argument(
1234        '--prog-threshold',
1235        nargs='?',
1236        type=lambda x: tuple(float(x) for x in x.split(',')),
1237        const=THRESHOLD,
1238        help="Show lines with progs above this threshold as a percent of "
1239            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
1240    parser.add_argument(
1241        '--erase-threshold',
1242        nargs='?',
1243        type=lambda x: tuple(float(x) for x in x.split(',')),
1244        const=THRESHOLD,
1245        help="Show lines with erases above this threshold as a percent of "
1246            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
1247    parser.add_argument(
1248        '-c', '--context',
1249        type=lambda x: int(x, 0),
1250        default=3,
1251        help="Show n additional lines of context. Defaults to 3.")
1252    parser.add_argument(
1253        '-W', '--width',
1254        type=lambda x: int(x, 0),
1255        default=80,
1256        help="Assume source is styled with this many columns. Defaults to 80.")
1257    parser.add_argument(
1258        '--color',
1259        choices=['never', 'always', 'auto'],
1260        default='auto',
1261        help="When to use terminal colors. Defaults to 'auto'.")
1262    parser.add_argument(
1263        '-j', '--jobs',
1264        nargs='?',
1265        type=lambda x: int(x, 0),
1266        const=0,
1267        help="Number of processes to use. 0 spawns one process per core.")
1268    parser.add_argument(
1269        '--objdump-path',
1270        type=lambda x: x.split(),
1271        default=OBJDUMP_PATH,
1272        help="Path to the objdump executable, may include flags. "
1273            "Defaults to %r." % OBJDUMP_PATH)
1274    sys.exit(main(**{k: v
1275        for k, v in vars(parser.parse_intermixed_args()).items()
1276        if v is not None}))
1277