1#!/usr/bin/env python3
2#
3# Script to aggregate and report Linux perf results.
4#
5# Example:
6# ./scripts/perf.py -R -obench.perf ./runners/bench_runner
7# ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
8#
9# Copyright (c) 2022, The littlefs authors.
10# SPDX-License-Identifier: BSD-3-Clause
11#
12
13import bisect
14import collections as co
15import csv
16import errno
17import fcntl
18import functools as ft
19import itertools as it
20import math as m
21import multiprocessing as mp
22import os
23import re
24import shlex
25import shutil
26import subprocess as sp
27import tempfile
28import zipfile
29
30# TODO support non-zip perf results?
31
32
33PERF_PATH = ['perf']
34PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
35PERF_FREQ = 100
36OBJDUMP_PATH = ['objdump']
37THRESHOLD = (0.5, 0.85)
38
39
40# integer fields
41class Int(co.namedtuple('Int', 'x')):
42    __slots__ = ()
43    def __new__(cls, x=0):
44        if isinstance(x, Int):
45            return x
46        if isinstance(x, str):
47            try:
48                x = int(x, 0)
49            except ValueError:
50                # also accept +-∞ and +-inf
51                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
52                    x = m.inf
53                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
54                    x = -m.inf
55                else:
56                    raise
57        assert isinstance(x, int) or m.isinf(x), x
58        return super().__new__(cls, x)
59
60    def __str__(self):
61        if self.x == m.inf:
62            return '∞'
63        elif self.x == -m.inf:
64            return '-∞'
65        else:
66            return str(self.x)
67
68    def __int__(self):
69        assert not m.isinf(self.x)
70        return self.x
71
72    def __float__(self):
73        return float(self.x)
74
75    none = '%7s' % '-'
76    def table(self):
77        return '%7s' % (self,)
78
79    diff_none = '%7s' % '-'
80    diff_table = table
81
82    def diff_diff(self, other):
83        new = self.x if self else 0
84        old = other.x if other else 0
85        diff = new - old
86        if diff == +m.inf:
87            return '%7s' % '+∞'
88        elif diff == -m.inf:
89            return '%7s' % '-∞'
90        else:
91            return '%+7d' % diff
92
93    def ratio(self, other):
94        new = self.x if self else 0
95        old = other.x if other else 0
96        if m.isinf(new) and m.isinf(old):
97            return 0.0
98        elif m.isinf(new):
99            return +m.inf
100        elif m.isinf(old):
101            return -m.inf
102        elif not old and not new:
103            return 0.0
104        elif not old:
105            return 1.0
106        else:
107            return (new-old) / old
108
109    def __add__(self, other):
110        return self.__class__(self.x + other.x)
111
112    def __sub__(self, other):
113        return self.__class__(self.x - other.x)
114
115    def __mul__(self, other):
116        return self.__class__(self.x * other.x)
117
118# perf results
119class PerfResult(co.namedtuple('PerfResult', [
120        'file', 'function', 'line',
121        'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
122        'children'])):
123    _by = ['file', 'function', 'line']
124    _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
125    _sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches']
126    _types = {
127        'cycles': Int,
128        'bmisses': Int, 'branches': Int,
129        'cmisses': Int, 'caches': Int}
130
131    __slots__ = ()
132    def __new__(cls, file='', function='', line=0,
133            cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
134            children=[]):
135        return super().__new__(cls, file, function, int(Int(line)),
136            Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches),
137            children)
138
139    def __add__(self, other):
140        return PerfResult(self.file, self.function, self.line,
141            self.cycles + other.cycles,
142            self.bmisses + other.bmisses,
143            self.branches + other.branches,
144            self.cmisses + other.cmisses,
145            self.caches + other.caches,
146            self.children + other.children)
147
148
149def openio(path, mode='r', buffering=-1):
150    # allow '-' for stdin/stdout
151    if path == '-':
152        if mode == 'r':
153            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
154        else:
155            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
156    else:
157        return open(path, mode, buffering)
158
159# run perf as a subprocess, storing measurements into a zip file
160def record(command, *,
161        output=None,
162        perf_freq=PERF_FREQ,
163        perf_period=None,
164        perf_events=PERF_EVENTS,
165        perf_path=PERF_PATH,
166        **args):
167    # create a temporary file for perf to write to, as far as I can tell
168    # this is strictly needed because perf's pipe-mode only works with stdout
169    with tempfile.NamedTemporaryFile('rb') as f:
170        # figure out our perf invocation
171        perf = perf_path + list(filter(None, [
172            'record',
173            '-F%s' % perf_freq
174                if perf_freq is not None
175                and perf_period is None else None,
176            '-c%s' % perf_period
177                if perf_period is not None else None,
178            '-B',
179            '-g',
180            '--all-user',
181            '-e%s' % perf_events,
182            '-o%s' % f.name]))
183
184        # run our command
185        try:
186            if args.get('verbose'):
187                print(' '.join(shlex.quote(c) for c in perf + command))
188            err = sp.call(perf + command, close_fds=False)
189
190        except KeyboardInterrupt:
191            err = errno.EOWNERDEAD
192
193        # synchronize access
194        z = os.open(output, os.O_RDWR | os.O_CREAT)
195        fcntl.flock(z, fcntl.LOCK_EX)
196
197        # copy measurements into our zip file
198        with os.fdopen(z, 'r+b') as z:
199            with zipfile.ZipFile(z, 'a',
200                    compression=zipfile.ZIP_DEFLATED,
201                    compresslevel=1) as z:
202                with z.open('perf.%d' % os.getpid(), 'w') as g:
203                    shutil.copyfileobj(f, g)
204
205    # forward the return code
206    return err
207
208
209# try to only process each dso onceS
210#
211# note this only caches with the non-keyword arguments
212def multiprocessing_cache(f):
213    local_cache = {}
214    manager = mp.Manager()
215    global_cache = manager.dict()
216    lock = mp.Lock()
217
218    def multiprocessing_cache(*args, **kwargs):
219        # check local cache?
220        if args in local_cache:
221            return local_cache[args]
222        # check global cache?
223        with lock:
224            if args in global_cache:
225                v = global_cache[args]
226                local_cache[args] = v
227                return v
228            # fall back to calling the function
229            v = f(*args, **kwargs)
230            global_cache[args] = v
231            local_cache[args] = v
232            return v
233
234    return multiprocessing_cache
235
236@multiprocessing_cache
237def collect_syms_and_lines(obj_path, *,
238        objdump_path=None,
239        **args):
240    symbol_pattern = re.compile(
241        '^(?P<addr>[0-9a-fA-F]+)'
242            '\s+.*'
243            '\s+(?P<size>[0-9a-fA-F]+)'
244            '\s+(?P<name>[^\s]+)\s*$')
245    line_pattern = re.compile(
246        '^\s+(?:'
247            # matches dir/file table
248            '(?P<no>[0-9]+)'
249                '(?:\s+(?P<dir>[0-9]+))?'
250                '\s+.*'
251                '\s+(?P<path>[^\s]+)'
252            # matches line opcodes
253            '|' '\[[^\]]*\]\s+'
254                '(?:'
255                    '(?P<op_special>Special)'
256                    '|' '(?P<op_copy>Copy)'
257                    '|' '(?P<op_end>End of Sequence)'
258                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
259                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
260                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
261                    '|' '.' ')*'
262            ')$', re.IGNORECASE)
263
264    # figure out symbol addresses and file+line ranges
265    syms = {}
266    sym_at = []
267    cmd = objdump_path + ['-t', obj_path]
268    if args.get('verbose'):
269        print(' '.join(shlex.quote(c) for c in cmd))
270    proc = sp.Popen(cmd,
271        stdout=sp.PIPE,
272        stderr=sp.PIPE if not args.get('verbose') else None,
273        universal_newlines=True,
274        errors='replace',
275        close_fds=False)
276    for line in proc.stdout:
277        m = symbol_pattern.match(line)
278        if m:
279            name = m.group('name')
280            addr = int(m.group('addr'), 16)
281            size = int(m.group('size'), 16)
282            # ignore zero-sized symbols
283            if not size:
284                continue
285            # note multiple symbols can share a name
286            if name not in syms:
287                syms[name] = set()
288            syms[name].add((addr, size))
289            sym_at.append((addr, name, size))
290    proc.wait()
291    if proc.returncode != 0:
292        if not args.get('verbose'):
293            for line in proc.stderr:
294                sys.stdout.write(line)
295        # assume no debug-info on failure
296        pass
297
298    # sort and keep largest/first when duplicates
299    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
300    sym_at_ = []
301    for addr, name, size in sym_at:
302        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
303            sym_at_.append((addr, name, size))
304    sym_at = sym_at_
305
306    # state machine for dwarf line numbers, note that objdump's
307    # decodedline seems to have issues with multiple dir/file
308    # tables, which is why we need this
309    lines = []
310    line_at = []
311    dirs = {}
312    files = {}
313    op_file = 1
314    op_line = 1
315    op_addr = 0
316    cmd = objdump_path + ['--dwarf=rawline', obj_path]
317    if args.get('verbose'):
318        print(' '.join(shlex.quote(c) for c in cmd))
319    proc = sp.Popen(cmd,
320        stdout=sp.PIPE,
321        stderr=sp.PIPE if not args.get('verbose') else None,
322        universal_newlines=True,
323        errors='replace',
324        close_fds=False)
325    for line in proc.stdout:
326        m = line_pattern.match(line)
327        if m:
328            if m.group('no') and not m.group('dir'):
329                # found a directory entry
330                dirs[int(m.group('no'))] = m.group('path')
331            elif m.group('no'):
332                # found a file entry
333                dir = int(m.group('dir'))
334                if dir in dirs:
335                    files[int(m.group('no'))] = os.path.join(
336                        dirs[dir],
337                        m.group('path'))
338                else:
339                    files[int(m.group('no'))] = m.group('path')
340            else:
341                # found a state machine update
342                if m.group('op_file'):
343                    op_file = int(m.group('op_file'), 0)
344                if m.group('op_line'):
345                    op_line = int(m.group('op_line'), 0)
346                if m.group('op_addr'):
347                    op_addr = int(m.group('op_addr'), 0)
348
349                if (m.group('op_special')
350                        or m.group('op_copy')
351                        or m.group('op_end')):
352                    file = os.path.abspath(files.get(op_file, '?'))
353                    lines.append((file, op_line, op_addr))
354                    line_at.append((op_addr, file, op_line))
355
356                if m.group('op_end'):
357                    op_file = 1
358                    op_line = 1
359                    op_addr = 0
360    proc.wait()
361    if proc.returncode != 0:
362        if not args.get('verbose'):
363            for line in proc.stderr:
364                sys.stdout.write(line)
365        # assume no debug-info on failure
366        pass
367
368    # sort and keep first when duplicates
369    lines.sort()
370    lines_ = []
371    for file, line, addr in lines:
372        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
373            lines_.append((file, line, addr))
374    lines = lines_
375
376    # sort and keep first when duplicates
377    line_at.sort()
378    line_at_ = []
379    for addr, file, line in line_at:
380        if len(line_at_) == 0 or line_at_[-1][0] != addr:
381            line_at_.append((addr, file, line))
382    line_at = line_at_
383
384    return syms, sym_at, lines, line_at
385
386
387def collect_decompressed(path, *,
388        perf_path=PERF_PATH,
389        sources=None,
390        everything=False,
391        propagate=0,
392        depth=1,
393        **args):
394    sample_pattern = re.compile(
395        '(?P<comm>\w+)'
396        '\s+(?P<pid>\w+)'
397        '\s+(?P<time>[\w.]+):'
398        '\s*(?P<period>\w+)'
399        '\s+(?P<event>[^:]+):')
400    frame_pattern = re.compile(
401        '\s+(?P<addr>\w+)'
402        '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
403        '\s+\((?P<dso>[^\)]+)\)')
404    events = {
405        'cycles':           'cycles',
406        'branch-misses':    'bmisses',
407        'branches':         'branches',
408        'cache-misses':     'cmisses',
409        'cache-references': 'caches'}
410
411    # note perf_path may contain extra args
412    cmd = perf_path + [
413        'script',
414        '-i%s' % path]
415    if args.get('verbose'):
416        print(' '.join(shlex.quote(c) for c in cmd))
417    proc = sp.Popen(cmd,
418        stdout=sp.PIPE,
419        stderr=sp.PIPE if not args.get('verbose') else None,
420        universal_newlines=True,
421        errors='replace',
422        close_fds=False)
423
424    last_filtered = False
425    last_event = ''
426    last_period = 0
427    last_stack = []
428    deltas = co.defaultdict(lambda: {})
429    syms_ = co.defaultdict(lambda: {})
430    at_cache = {}
431    results = {}
432
433    def commit():
434        # tail-recursively propagate measurements
435        for i in range(len(last_stack)):
436            results_ = results
437            for j in reversed(range(i+1)):
438                if i+1-j > depth:
439                    break
440
441                # propagate
442                name = last_stack[j]
443                if name not in results_:
444                    results_[name] = (co.defaultdict(lambda: 0), {})
445                results_[name][0][last_event] += last_period
446
447                # recurse
448                results_ = results_[name][1]
449
450    for line in proc.stdout:
451        # we need to process a lot of data, so wait to use regex as late
452        # as possible
453        if not line.startswith('\t'):
454            if last_filtered:
455                commit()
456            last_filtered = False
457
458            if line:
459                m = sample_pattern.match(line)
460                if m and m.group('event') in events:
461                    last_filtered = True
462                    last_event = m.group('event')
463                    last_period = int(m.group('period'), 0)
464                    last_stack = []
465
466        elif last_filtered:
467            m = frame_pattern.match(line)
468            if m:
469                # filter out internal/kernel functions
470                if not everything and (
471                        m.group('sym').startswith('__')
472                        or m.group('sym').startswith('0')
473                        or m.group('sym').startswith('-')
474                        or m.group('sym').startswith('[')
475                        or m.group('dso').startswith('/usr/lib')):
476                    continue
477
478                dso = m.group('dso')
479                sym = m.group('sym')
480                off = int(m.group('off'), 0) if m.group('off') else 0
481                addr_ = int(m.group('addr'), 16)
482
483                # get the syms/lines for the dso, this is cached
484                syms, sym_at, lines, line_at = collect_syms_and_lines(
485                    dso,
486                    **args)
487
488                # ASLR is tricky, we have symbols+offsets, but static symbols
489                # means we may have multiple options for each symbol.
490                #
491                # To try to solve this, we use previous seen symbols to build
492                # confidence for the correct ASLR delta. This means we may
493                # guess incorrectly for early symbols, but this will only affect
494                # a few samples.
495                if sym in syms:
496                    sym_addr_ = addr_ - off
497
498                    # track possible deltas?
499                    for sym_addr, size in syms[sym]:
500                        delta = sym_addr - sym_addr_
501                        if delta not in deltas[dso]:
502                            deltas[dso][delta] = sum(
503                                abs(a_+delta - a)
504                                for s, (a_, _) in syms_[dso].items()
505                                for a, _ in syms[s])
506                    for delta in deltas[dso].keys():
507                        deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
508                    syms_[dso][sym] = sym_addr_, size
509
510                    # guess the best delta
511                    delta, _ = min(deltas[dso].items(),
512                        key=lambda x: (x[1], x[0]))
513                    addr = addr_ + delta
514
515                    # cached?
516                    if (dso,addr) in at_cache:
517                        cached = at_cache[(dso,addr)]
518                        if cached is None:
519                            # cache says to skip
520                            continue
521                        file, line = cached
522                    else:
523                        # find file+line
524                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
525                        if i > 0:
526                            _, file, line = line_at[i-1]
527                        else:
528                            file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
529
530                        # ignore filtered sources
531                        if sources is not None:
532                            if not any(
533                                    os.path.abspath(file) == os.path.abspath(s)
534                                    for s in sources):
535                                at_cache[(dso,addr)] = None
536                                continue
537                        else:
538                            # default to only cwd
539                            if not everything and not os.path.commonpath([
540                                    os.getcwd(),
541                                    os.path.abspath(file)]) == os.getcwd():
542                                at_cache[(dso,addr)] = None
543                                continue
544
545                        # simplify path
546                        if os.path.commonpath([
547                                os.getcwd(),
548                                os.path.abspath(file)]) == os.getcwd():
549                            file = os.path.relpath(file)
550                        else:
551                            file = os.path.abspath(file)
552
553                        at_cache[(dso,addr)] = file, line
554                else:
555                    file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
556
557                last_stack.append((file, sym, line))
558
559                # stop propogating?
560                if propagate and len(last_stack) >= propagate:
561                    commit()
562                    last_filtered = False
563    if last_filtered:
564        commit()
565
566    proc.wait()
567    if proc.returncode != 0:
568        if not args.get('verbose'):
569            for line in proc.stderr:
570                sys.stdout.write(line)
571        sys.exit(-1)
572
573    # rearrange results into result type
574    def to_results(results):
575        results_ = []
576        for name, (r, children) in results.items():
577            results_.append(PerfResult(*name,
578                **{events[k]: v for k, v in r.items()},
579                children=to_results(children)))
580        return results_
581
582    return to_results(results)
583
584def collect_job(path, i, **args):
585    # decompress into a temporary file, this is to work around
586    # some limitations of perf
587    with zipfile.ZipFile(path) as z:
588        with z.open(i) as f:
589            with tempfile.NamedTemporaryFile('wb') as g:
590                shutil.copyfileobj(f, g)
591                g.flush()
592
593                return collect_decompressed(g.name, **args)
594
595def starapply(args):
596    f, args, kwargs = args
597    return f(*args, **kwargs)
598
599def collect(perf_paths, *,
600        jobs=None,
601        **args):
602    # automatic job detection?
603    if jobs == 0:
604        jobs = len(os.sched_getaffinity(0))
605
606    records = []
607    for path in perf_paths:
608        # each .perf file is actually a zip file containing perf files from
609        # multiple runs
610        with zipfile.ZipFile(path) as z:
611            records.extend((path, i) for i in z.infolist())
612
613    # we're dealing with a lot of data but also surprisingly
614    # parallelizable
615    if jobs is not None:
616        results = []
617        with mp.Pool(jobs) as p:
618            for results_ in p.imap_unordered(
619                    starapply,
620                    ((collect_job, (path, i), args) for path, i in records)):
621                results.extend(results_)
622    else:
623        results = []
624        for path, i in records:
625            results.extend(collect_job(path, i, **args))
626
627    return results
628
629
630def fold(Result, results, *,
631        by=None,
632        defines=None,
633        **_):
634    if by is None:
635        by = Result._by
636
637    for k in it.chain(by or [], (k for k, _ in defines or [])):
638        if k not in Result._by and k not in Result._fields:
639            print("error: could not find field %r?" % k)
640            sys.exit(-1)
641
642    # filter by matching defines
643    if defines is not None:
644        results_ = []
645        for r in results:
646            if all(getattr(r, k) in vs for k, vs in defines):
647                results_.append(r)
648        results = results_
649
650    # organize results into conflicts
651    folding = co.OrderedDict()
652    for r in results:
653        name = tuple(getattr(r, k) for k in by)
654        if name not in folding:
655            folding[name] = []
656        folding[name].append(r)
657
658    # merge conflicts
659    folded = []
660    for name, rs in folding.items():
661        folded.append(sum(rs[1:], start=rs[0]))
662
663    # fold recursively
664    folded_ = []
665    for r in folded:
666        folded_.append(r._replace(children=fold(
667            Result, r.children,
668            by=by,
669            defines=defines)))
670    folded = folded_
671
672    return folded
673
674def table(Result, results, diff_results=None, *,
675        by=None,
676        fields=None,
677        sort=None,
678        summary=False,
679        all=False,
680        percent=False,
681        depth=1,
682        **_):
683    all_, all = all, __builtins__.all
684
685    if by is None:
686        by = Result._by
687    if fields is None:
688        fields = Result._fields
689    types = Result._types
690
691    # fold again
692    results = fold(Result, results, by=by)
693    if diff_results is not None:
694        diff_results = fold(Result, diff_results, by=by)
695
696    # organize by name
697    table = {
698        ','.join(str(getattr(r, k) or '') for k in by): r
699        for r in results}
700    diff_table = {
701        ','.join(str(getattr(r, k) or '') for k in by): r
702        for r in diff_results or []}
703    names = list(table.keys() | diff_table.keys())
704
705    # sort again, now with diff info, note that python's sort is stable
706    names.sort()
707    if diff_results is not None:
708        names.sort(key=lambda n: tuple(
709            types[k].ratio(
710                getattr(table.get(n), k, None),
711                getattr(diff_table.get(n), k, None))
712            for k in fields),
713            reverse=True)
714    if sort:
715        for k, reverse in reversed(sort):
716            names.sort(
717                key=lambda n: tuple(
718                    (getattr(table[n], k),)
719                    if getattr(table.get(n), k, None) is not None else ()
720                    for k in ([k] if k else [
721                        k for k in Result._sort if k in fields])),
722                reverse=reverse ^ (not k or k in Result._fields))
723
724
725    # build up our lines
726    lines = []
727
728    # header
729    header = []
730    header.append('%s%s' % (
731        ','.join(by),
732        ' (%d added, %d removed)' % (
733            sum(1 for n in table if n not in diff_table),
734            sum(1 for n in diff_table if n not in table))
735            if diff_results is not None and not percent else '')
736        if not summary else '')
737    if diff_results is None:
738        for k in fields:
739            header.append(k)
740    elif percent:
741        for k in fields:
742            header.append(k)
743    else:
744        for k in fields:
745            header.append('o'+k)
746        for k in fields:
747            header.append('n'+k)
748        for k in fields:
749            header.append('d'+k)
750    header.append('')
751    lines.append(header)
752
753    def table_entry(name, r, diff_r=None, ratios=[]):
754        entry = []
755        entry.append(name)
756        if diff_results is None:
757            for k in fields:
758                entry.append(getattr(r, k).table()
759                    if getattr(r, k, None) is not None
760                    else types[k].none)
761        elif percent:
762            for k in fields:
763                entry.append(getattr(r, k).diff_table()
764                    if getattr(r, k, None) is not None
765                    else types[k].diff_none)
766        else:
767            for k in fields:
768                entry.append(getattr(diff_r, k).diff_table()
769                    if getattr(diff_r, k, None) is not None
770                    else types[k].diff_none)
771            for k in fields:
772                entry.append(getattr(r, k).diff_table()
773                    if getattr(r, k, None) is not None
774                    else types[k].diff_none)
775            for k in fields:
776                entry.append(types[k].diff_diff(
777                        getattr(r, k, None),
778                        getattr(diff_r, k, None)))
779        if diff_results is None:
780            entry.append('')
781        elif percent:
782            entry.append(' (%s)' % ', '.join(
783                '+∞%' if t == +m.inf
784                else '-∞%' if t == -m.inf
785                else '%+.1f%%' % (100*t)
786                for t in ratios))
787        else:
788            entry.append(' (%s)' % ', '.join(
789                    '+∞%' if t == +m.inf
790                    else '-∞%' if t == -m.inf
791                    else '%+.1f%%' % (100*t)
792                    for t in ratios
793                    if t)
794                if any(ratios) else '')
795        return entry
796
797    # entries
798    if not summary:
799        for name in names:
800            r = table.get(name)
801            if diff_results is None:
802                diff_r = None
803                ratios = None
804            else:
805                diff_r = diff_table.get(name)
806                ratios = [
807                    types[k].ratio(
808                        getattr(r, k, None),
809                        getattr(diff_r, k, None))
810                    for k in fields]
811                if not all_ and not any(ratios):
812                    continue
813            lines.append(table_entry(name, r, diff_r, ratios))
814
815    # total
816    r = next(iter(fold(Result, results, by=[])), None)
817    if diff_results is None:
818        diff_r = None
819        ratios = None
820    else:
821        diff_r = next(iter(fold(Result, diff_results, by=[])), None)
822        ratios = [
823            types[k].ratio(
824                getattr(r, k, None),
825                getattr(diff_r, k, None))
826            for k in fields]
827    lines.append(table_entry('TOTAL', r, diff_r, ratios))
828
829    # find the best widths, note that column 0 contains the names and column -1
830    # the ratios, so those are handled a bit differently
831    widths = [
832        ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
833        for w, i in zip(
834            it.chain([23], it.repeat(7)),
835            range(len(lines[0])-1))]
836
837    # adjust the name width based on the expected call depth, though
838    # note this doesn't really work with unbounded recursion
839    if not summary and not m.isinf(depth):
840        widths[0] += 4*(depth-1)
841
842    # print the tree recursively
843    print('%-*s  %s%s' % (
844        widths[0], lines[0][0],
845        ' '.join('%*s' % (w, x)
846            for w, x in zip(widths[1:], lines[0][1:-1])),
847        lines[0][-1]))
848
849    if not summary:
850        def recurse(results_, depth_, prefixes=('', '', '', '')):
851            # rebuild our tables at each layer
852            table_ = {
853                ','.join(str(getattr(r, k) or '') for k in by): r
854                for r in results_}
855            names_ = list(table_.keys())
856
857            # sort again at each layer, keep in mind the numbers are
858            # changing as we descend
859            names_.sort()
860            if sort:
861                for k, reverse in reversed(sort):
862                    names_.sort(
863                        key=lambda n: tuple(
864                            (getattr(table_[n], k),)
865                            if getattr(table_.get(n), k, None) is not None
866                            else ()
867                            for k in ([k] if k else [
868                                k for k in Result._sort if k in fields])),
869                        reverse=reverse ^ (not k or k in Result._fields))
870
871            for i, name in enumerate(names_):
872                r = table_[name]
873                is_last = (i == len(names_)-1)
874
875                print('%s%-*s  %s' % (
876                    prefixes[0+is_last],
877                    widths[0] - (
878                        len(prefixes[0+is_last])
879                        if not m.isinf(depth) else 0),
880                    name,
881                    ' '.join('%*s' % (w, x)
882                        for w, x in zip(
883                            widths[1:],
884                            table_entry(name, r)[1:]))))
885
886                # recurse?
887                if depth_ > 1:
888                    recurse(
889                        r.children,
890                        depth_-1,
891                        (prefixes[2+is_last] + "|-> ",
892                         prefixes[2+is_last] + "'-> ",
893                         prefixes[2+is_last] + "|   ",
894                         prefixes[2+is_last] + "    "))
895
896        # we have enough going on with diffing to make the top layer
897        # a special case
898        for name, line in zip(names, lines[1:-1]):
899            print('%-*s  %s%s' % (
900                widths[0], line[0],
901                ' '.join('%*s' % (w, x)
902                    for w, x in zip(widths[1:], line[1:-1])),
903                line[-1]))
904
905            if name in table and depth > 1:
906                recurse(
907                    table[name].children,
908                    depth-1,
909                    ("|-> ",
910                     "'-> ",
911                     "|   ",
912                     "    "))
913
914    print('%-*s  %s%s' % (
915        widths[0], lines[-1][0],
916        ' '.join('%*s' % (w, x)
917            for w, x in zip(widths[1:], lines[-1][1:-1])),
918        lines[-1][-1]))
919
920
921def annotate(Result, results, *,
922        annotate=None,
923        threshold=None,
924        branches=False,
925        caches=False,
926        **args):
927    # figure out the threshold
928    if threshold is None:
929        t0, t1 = THRESHOLD
930    elif len(threshold) == 1:
931        t0, t1 = threshold[0], threshold[0]
932    else:
933        t0, t1 = threshold
934    t0, t1 = min(t0, t1), max(t0, t1)
935
936    if not branches and not caches:
937        tk = 'cycles'
938    elif branches:
939        tk = 'bmisses'
940    else:
941        tk = 'cmisses'
942
943    # find max cycles
944    max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
945
946    for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
947        # flatten to line info
948        results = fold(Result, results, by=['file', 'line'])
949        table = {r.line: r for r in results if r.file == path}
950
951        # calculate spans to show
952        if not annotate:
953            spans = []
954            last = None
955            func = None
956            for line, r in sorted(table.items()):
957                if float(getattr(r, tk)) / max_ >= t0:
958                    if last is not None and line - last.stop <= args['context']:
959                        last = range(
960                            last.start,
961                            line+1+args['context'])
962                    else:
963                        if last is not None:
964                            spans.append((last, func))
965                        last = range(
966                            line-args['context'],
967                            line+1+args['context'])
968                        func = r.function
969            if last is not None:
970                spans.append((last, func))
971
972        with open(path) as f:
973            skipped = False
974            for i, line in enumerate(f):
975                # skip lines not in spans?
976                if not annotate and not any(i+1 in s for s, _ in spans):
977                    skipped = True
978                    continue
979
980                if skipped:
981                    skipped = False
982                    print('%s@@ %s:%d: %s @@%s' % (
983                        '\x1b[36m' if args['color'] else '',
984                        path,
985                        i+1,
986                        next(iter(f for _, f in spans)),
987                        '\x1b[m' if args['color'] else ''))
988
989                # build line
990                if line.endswith('\n'):
991                    line = line[:-1]
992
993                r = table.get(i+1)
994                if r is not None and (
995                        float(r.cycles) > 0
996                        if not branches and not caches
997                        else float(r.bmisses) > 0 or float(r.branches) > 0
998                        if branches
999                        else float(r.cmisses) > 0 or float(r.caches) > 0):
1000                    line = '%-*s // %s' % (
1001                        args['width'],
1002                        line,
1003                        '%s cycles' % r.cycles
1004                        if not branches and not caches
1005                        else '%s bmisses, %s branches' % (r.bmisses, r.branches)
1006                        if branches
1007                        else '%s cmisses, %s caches' % (r.cmisses, r.caches))
1008
1009                    if args['color']:
1010                        if float(getattr(r, tk)) / max_ >= t1:
1011                            line = '\x1b[1;31m%s\x1b[m' % line
1012                        elif float(getattr(r, tk)) / max_ >= t0:
1013                            line = '\x1b[35m%s\x1b[m' % line
1014
1015                print(line)
1016
1017
1018def report(perf_paths, *,
1019        by=None,
1020        fields=None,
1021        defines=None,
1022        sort=None,
1023        branches=False,
1024        caches=False,
1025        **args):
1026    # figure out what color should be
1027    if args.get('color') == 'auto':
1028        args['color'] = sys.stdout.isatty()
1029    elif args.get('color') == 'always':
1030        args['color'] = True
1031    else:
1032        args['color'] = False
1033
1034    # depth of 0 == m.inf
1035    if args.get('depth') == 0:
1036        args['depth'] = m.inf
1037
1038    # find sizes
1039    if not args.get('use', None):
1040        results = collect(perf_paths, **args)
1041    else:
1042        results = []
1043        with openio(args['use']) as f:
1044            reader = csv.DictReader(f, restval='')
1045            for r in reader:
1046                if not any('perf_'+k in r and r['perf_'+k].strip()
1047                        for k in PerfResult._fields):
1048                    continue
1049                try:
1050                    results.append(PerfResult(
1051                        **{k: r[k] for k in PerfResult._by
1052                            if k in r and r[k].strip()},
1053                        **{k: r['perf_'+k] for k in PerfResult._fields
1054                            if 'perf_'+k in r and r['perf_'+k].strip()}))
1055                except TypeError:
1056                    pass
1057
1058    # fold
1059    results = fold(PerfResult, results, by=by, defines=defines)
1060
1061    # sort, note that python's sort is stable
1062    results.sort()
1063    if sort:
1064        for k, reverse in reversed(sort):
1065            results.sort(
1066                key=lambda r: tuple(
1067                    (getattr(r, k),) if getattr(r, k) is not None else ()
1068                    for k in ([k] if k else PerfResult._sort)),
1069                reverse=reverse ^ (not k or k in PerfResult._fields))
1070
1071    # write results to CSV
1072    if args.get('output'):
1073        with openio(args['output'], 'w') as f:
1074            writer = csv.DictWriter(f,
1075                (by if by is not None else PerfResult._by)
1076                + ['perf_'+k for k in (
1077                    fields if fields is not None else PerfResult._fields)])
1078            writer.writeheader()
1079            for r in results:
1080                writer.writerow(
1081                    {k: getattr(r, k) for k in (
1082                        by if by is not None else PerfResult._by)}
1083                    | {'perf_'+k: getattr(r, k) for k in (
1084                        fields if fields is not None else PerfResult._fields)})
1085
1086    # find previous results?
1087    if args.get('diff'):
1088        diff_results = []
1089        try:
1090            with openio(args['diff']) as f:
1091                reader = csv.DictReader(f, restval='')
1092                for r in reader:
1093                    if not any('perf_'+k in r and r['perf_'+k].strip()
1094                            for k in PerfResult._fields):
1095                        continue
1096                    try:
1097                        diff_results.append(PerfResult(
1098                            **{k: r[k] for k in PerfResult._by
1099                                if k in r and r[k].strip()},
1100                            **{k: r['perf_'+k] for k in PerfResult._fields
1101                                if 'perf_'+k in r and r['perf_'+k].strip()}))
1102                    except TypeError:
1103                        pass
1104        except FileNotFoundError:
1105            pass
1106
1107        # fold
1108        diff_results = fold(PerfResult, diff_results, by=by, defines=defines)
1109
1110    # print table
1111    if not args.get('quiet'):
1112        if args.get('annotate') or args.get('threshold'):
1113            # annotate sources
1114            annotate(PerfResult, results,
1115                branches=branches,
1116                caches=caches,
1117                **args)
1118        else:
1119            # print table
1120            table(PerfResult, results,
1121                diff_results if args.get('diff') else None,
1122                by=by if by is not None else ['function'],
1123                fields=fields if fields is not None
1124                    else ['cycles'] if not branches and not caches
1125                    else ['bmisses', 'branches'] if branches
1126                    else ['cmisses', 'caches'],
1127                sort=sort,
1128                **args)
1129
1130
1131def main(**args):
1132    if args.get('record'):
1133        return record(**args)
1134    else:
1135        return report(**args)
1136
1137
1138if __name__ == "__main__":
1139    import argparse
1140    import sys
1141
1142    # bit of a hack, but parse_intermixed_args and REMAINDER are
1143    # incompatible, so we need to figure out what we want before running
1144    # argparse
1145    if '-R' in sys.argv or '--record' in sys.argv:
1146        nargs = argparse.REMAINDER
1147    else:
1148        nargs = '*'
1149
1150    argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
1151    argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
1152    parser = argparse.ArgumentParser(
1153        description="Aggregate and report Linux perf results.",
1154        allow_abbrev=False,
1155        conflict_handler='ignore')
1156    parser.add_argument(
1157        'perf_paths',
1158        nargs=nargs,
1159        help="Input *.perf files.")
1160    parser.add_argument(
1161        '-v', '--verbose',
1162        action='store_true',
1163        help="Output commands that run behind the scenes.")
1164    parser.add_argument(
1165        '-q', '--quiet',
1166        action='store_true',
1167        help="Don't show anything, useful with -o.")
1168    parser.add_argument(
1169        '-o', '--output',
1170        help="Specify CSV file to store results.")
1171    parser.add_argument(
1172        '-u', '--use',
1173        help="Don't parse anything, use this CSV file.")
1174    parser.add_argument(
1175        '-d', '--diff',
1176        help="Specify CSV file to diff against.")
1177    parser.add_argument(
1178        '-a', '--all',
1179        action='store_true',
1180        help="Show all, not just the ones that changed.")
1181    parser.add_argument(
1182        '-p', '--percent',
1183        action='store_true',
1184        help="Only show percentage change, not a full diff.")
1185    parser.add_argument(
1186        '-b', '--by',
1187        action='append',
1188        choices=PerfResult._by,
1189        help="Group by this field.")
1190    parser.add_argument(
1191        '-f', '--field',
1192        dest='fields',
1193        action='append',
1194        choices=PerfResult._fields,
1195        help="Show this field.")
1196    parser.add_argument(
1197        '-D', '--define',
1198        dest='defines',
1199        action='append',
1200        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
1201        help="Only include results where this field is this value.")
1202    class AppendSort(argparse.Action):
1203        def __call__(self, parser, namespace, value, option):
1204            if namespace.sort is None:
1205                namespace.sort = []
1206            namespace.sort.append((value, True if option == '-S' else False))
1207    parser.add_argument(
1208        '-s', '--sort',
1209        nargs='?',
1210        action=AppendSort,
1211        help="Sort by this field.")
1212    parser.add_argument(
1213        '-S', '--reverse-sort',
1214        nargs='?',
1215        action=AppendSort,
1216        help="Sort by this field, but backwards.")
1217    parser.add_argument(
1218        '-Y', '--summary',
1219        action='store_true',
1220        help="Only show the total.")
1221    parser.add_argument(
1222        '-F', '--source',
1223        dest='sources',
1224        action='append',
1225        help="Only consider definitions in this file. Defaults to anything "
1226            "in the current directory.")
1227    parser.add_argument(
1228        '--everything',
1229        action='store_true',
1230        help="Include builtin and libc specific symbols.")
1231    parser.add_argument(
1232        '--branches',
1233        action='store_true',
1234        help="Show branches and branch misses.")
1235    parser.add_argument(
1236        '--caches',
1237        action='store_true',
1238        help="Show cache accesses and cache misses.")
1239    parser.add_argument(
1240        '-P', '--propagate',
1241        type=lambda x: int(x, 0),
1242        help="Depth to propagate samples up the call-stack. 0 propagates up "
1243            "to the entry point, 1 does no propagation. Defaults to 0.")
1244    parser.add_argument(
1245        '-Z', '--depth',
1246        nargs='?',
1247        type=lambda x: int(x, 0),
1248        const=0,
1249        help="Depth of function calls to show. 0 shows all calls but may not "
1250            "terminate!")
1251    parser.add_argument(
1252        '-A', '--annotate',
1253        action='store_true',
1254        help="Show source files annotated with coverage info.")
1255    parser.add_argument(
1256        '-T', '--threshold',
1257        nargs='?',
1258        type=lambda x: tuple(float(x) for x in x.split(',')),
1259        const=THRESHOLD,
1260        help="Show lines with samples above this threshold as a percent of "
1261            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
1262    parser.add_argument(
1263        '-c', '--context',
1264        type=lambda x: int(x, 0),
1265        default=3,
1266        help="Show n additional lines of context. Defaults to 3.")
1267    parser.add_argument(
1268        '-W', '--width',
1269        type=lambda x: int(x, 0),
1270        default=80,
1271        help="Assume source is styled with this many columns. Defaults to 80.")
1272    parser.add_argument(
1273        '--color',
1274        choices=['never', 'always', 'auto'],
1275        default='auto',
1276        help="When to use terminal colors. Defaults to 'auto'.")
1277    parser.add_argument(
1278        '-j', '--jobs',
1279        nargs='?',
1280        type=lambda x: int(x, 0),
1281        const=0,
1282        help="Number of processes to use. 0 spawns one process per core.")
1283    parser.add_argument(
1284        '--perf-path',
1285        type=lambda x: x.split(),
1286        help="Path to the perf executable, may include flags. "
1287            "Defaults to %r." % PERF_PATH)
1288    parser.add_argument(
1289        '--objdump-path',
1290        type=lambda x: x.split(),
1291        default=OBJDUMP_PATH,
1292        help="Path to the objdump executable, may include flags. "
1293            "Defaults to %r." % OBJDUMP_PATH)
1294
1295    # record flags
1296    record_parser = parser.add_argument_group('record options')
1297    record_parser.add_argument(
1298        'command',
1299        nargs=nargs,
1300        help="Command to run.")
1301    record_parser.add_argument(
1302        '-R', '--record',
1303        action='store_true',
1304        help="Run a command and aggregate perf measurements.")
1305    record_parser.add_argument(
1306        '-o', '--output',
1307        help="Output file. Uses flock to synchronize. This is stored as a "
1308            "zip-file of multiple perf results.")
1309    record_parser.add_argument(
1310        '--perf-freq',
1311        help="perf sampling frequency. This is passed directly to perf. "
1312            "Defaults to %r." % PERF_FREQ)
1313    record_parser.add_argument(
1314        '--perf-period',
1315        help="perf sampling period. This is passed directly to perf.")
1316    record_parser.add_argument(
1317        '--perf-events',
1318        help="perf events to record. This is passed directly to perf. "
1319            "Defaults to %r." % PERF_EVENTS)
1320    record_parser.add_argument(
1321        '--perf-path',
1322        type=lambda x: x.split(),
1323        help="Path to the perf executable, may include flags. "
1324            "Defaults to %r." % PERF_PATH)
1325
1326    # avoid intermixed/REMAINDER conflict, see above
1327    if nargs == argparse.REMAINDER:
1328        args = parser.parse_args()
1329    else:
1330        args = parser.parse_intermixed_args()
1331
1332    # perf_paths/command overlap, so need to do some munging here
1333    args.command = args.perf_paths
1334    if args.record:
1335        if not args.command:
1336            print('error: no command specified?')
1337            sys.exit(-1)
1338        if not args.output:
1339            print('error: no output file specified?')
1340            sys.exit(-1)
1341
1342    sys.exit(main(**{k: v
1343        for k, v in vars(args).items()
1344        if v is not None}))
1345