1#!/usr/bin/env python3
2#
3# Script to find data size at the function level. Basically just a bit wrapper
4# around nm with some extra conveniences for comparing builds. Heavily inspired
5# by Linux's Bloat-O-Meter.
6#
7
8import os
9import glob
10import itertools as it
11import subprocess as sp
12import shlex
13import re
14import csv
15import collections as co
16
17
18OBJ_PATHS = ['*.o']
19
20def collect(paths, **args):
21    results = co.defaultdict(lambda: 0)
22    pattern = re.compile(
23        '^(?P<size>[0-9a-fA-F]+)' +
24        ' (?P<type>[%s])' % re.escape(args['type']) +
25        ' (?P<func>.+?)$')
26    for path in paths:
27        # note nm-tool may contain extra args
28        cmd = args['nm_tool'] + ['--size-sort', path]
29        if args.get('verbose'):
30            print(' '.join(shlex.quote(c) for c in cmd))
31        proc = sp.Popen(cmd,
32            stdout=sp.PIPE,
33            stderr=sp.PIPE if not args.get('verbose') else None,
34            universal_newlines=True,
35            errors='replace')
36        for line in proc.stdout:
37            m = pattern.match(line)
38            if m:
39                results[(path, m.group('func'))] += int(m.group('size'), 16)
40        proc.wait()
41        if proc.returncode != 0:
42            if not args.get('verbose'):
43                for line in proc.stderr:
44                    sys.stdout.write(line)
45            sys.exit(-1)
46
47    flat_results = []
48    for (file, func), size in results.items():
49        # map to source files
50        if args.get('build_dir'):
51            file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
52        # replace .o with .c, different scripts report .o/.c, we need to
53        # choose one if we want to deduplicate csv files
54        file = re.sub('\.o$', '.c', file)
55        # discard internal functions
56        if not args.get('everything'):
57            if func.startswith('__'):
58                continue
59        # discard .8449 suffixes created by optimizer
60        func = re.sub('\.[0-9]+', '', func)
61        flat_results.append((file, func, size))
62
63    return flat_results
64
65def main(**args):
66    def openio(path, mode='r'):
67        if path == '-':
68            if 'r' in mode:
69                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
70            else:
71                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
72        else:
73            return open(path, mode)
74
75    # find sizes
76    if not args.get('use', None):
77        # find .o files
78        paths = []
79        for path in args['obj_paths']:
80            if os.path.isdir(path):
81                path = path + '/*.o'
82
83            for path in glob.glob(path):
84                paths.append(path)
85
86        if not paths:
87            print('no .obj files found in %r?' % args['obj_paths'])
88            sys.exit(-1)
89
90        results = collect(paths, **args)
91    else:
92        with openio(args['use']) as f:
93            r = csv.DictReader(f)
94            results = [
95                (   result['file'],
96                    result['name'],
97                    int(result['data_size']))
98                for result in r
99                if result.get('data_size') not in {None, ''}]
100
101    total = 0
102    for _, _, size in results:
103        total += size
104
105    # find previous results?
106    if args.get('diff'):
107        try:
108            with openio(args['diff']) as f:
109                r = csv.DictReader(f)
110                prev_results = [
111                    (   result['file'],
112                        result['name'],
113                        int(result['data_size']))
114                    for result in r
115                    if result.get('data_size') not in {None, ''}]
116        except FileNotFoundError:
117            prev_results = []
118
119        prev_total = 0
120        for _, _, size in prev_results:
121            prev_total += size
122
123    # write results to CSV
124    if args.get('output'):
125        merged_results = co.defaultdict(lambda: {})
126        other_fields = []
127
128        # merge?
129        if args.get('merge'):
130            try:
131                with openio(args['merge']) as f:
132                    r = csv.DictReader(f)
133                    for result in r:
134                        file = result.pop('file', '')
135                        func = result.pop('name', '')
136                        result.pop('data_size', None)
137                        merged_results[(file, func)] = result
138                        other_fields = result.keys()
139            except FileNotFoundError:
140                pass
141
142        for file, func, size in results:
143            merged_results[(file, func)]['data_size'] = size
144
145        with openio(args['output'], 'w') as f:
146            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size'])
147            w.writeheader()
148            for (file, func), result in sorted(merged_results.items()):
149                w.writerow({'file': file, 'name': func, **result})
150
151    # print results
152    def dedup_entries(results, by='name'):
153        entries = co.defaultdict(lambda: 0)
154        for file, func, size in results:
155            entry = (file if by == 'file' else func)
156            entries[entry] += size
157        return entries
158
159    def diff_entries(olds, news):
160        diff = co.defaultdict(lambda: (0, 0, 0, 0))
161        for name, new in news.items():
162            diff[name] = (0, new, new, 1.0)
163        for name, old in olds.items():
164            _, new, _, _ = diff[name]
165            diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
166        return diff
167
168    def sorted_entries(entries):
169        if args.get('size_sort'):
170            return sorted(entries, key=lambda x: (-x[1], x))
171        elif args.get('reverse_size_sort'):
172            return sorted(entries, key=lambda x: (+x[1], x))
173        else:
174            return sorted(entries)
175
176    def sorted_diff_entries(entries):
177        if args.get('size_sort'):
178            return sorted(entries, key=lambda x: (-x[1][1], x))
179        elif args.get('reverse_size_sort'):
180            return sorted(entries, key=lambda x: (+x[1][1], x))
181        else:
182            return sorted(entries, key=lambda x: (-x[1][3], x))
183
184    def print_header(by=''):
185        if not args.get('diff'):
186            print('%-36s %7s' % (by, 'size'))
187        else:
188            print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
189
190    def print_entry(name, size):
191        print("%-36s %7d" % (name, size))
192
193    def print_diff_entry(name, old, new, diff, ratio):
194        print("%-36s %7s %7s %+7d%s" % (name,
195            old or "-",
196            new or "-",
197            diff,
198            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
199
200    def print_entries(by='name'):
201        entries = dedup_entries(results, by=by)
202
203        if not args.get('diff'):
204            print_header(by=by)
205            for name, size in sorted_entries(entries.items()):
206                print_entry(name, size)
207        else:
208            prev_entries = dedup_entries(prev_results, by=by)
209            diff = diff_entries(prev_entries, entries)
210            print_header(by='%s (%d added, %d removed)' % (by,
211                sum(1 for old, _, _, _ in diff.values() if not old),
212                sum(1 for _, new, _, _ in diff.values() if not new)))
213            for name, (old, new, diff, ratio) in sorted_diff_entries(
214                    diff.items()):
215                if ratio or args.get('all'):
216                    print_diff_entry(name, old, new, diff, ratio)
217
218    def print_totals():
219        if not args.get('diff'):
220            print_entry('TOTAL', total)
221        else:
222            ratio = (0.0 if not prev_total and not total
223                else 1.0 if not prev_total
224                else (total-prev_total)/prev_total)
225            print_diff_entry('TOTAL',
226                prev_total, total,
227                total-prev_total,
228                ratio)
229
230    if args.get('quiet'):
231        pass
232    elif args.get('summary'):
233        print_header()
234        print_totals()
235    elif args.get('files'):
236        print_entries(by='file')
237        print_totals()
238    else:
239        print_entries(by='name')
240        print_totals()
241
242if __name__ == "__main__":
243    import argparse
244    import sys
245    parser = argparse.ArgumentParser(
246        description="Find data size at the function level.")
247    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
248        help="Description of where to find *.o files. May be a directory \
249            or a list of paths. Defaults to %r." % OBJ_PATHS)
250    parser.add_argument('-v', '--verbose', action='store_true',
251        help="Output commands that run behind the scenes.")
252    parser.add_argument('-q', '--quiet', action='store_true',
253        help="Don't show anything, useful with -o.")
254    parser.add_argument('-o', '--output',
255        help="Specify CSV file to store results.")
256    parser.add_argument('-u', '--use',
257        help="Don't compile and find data sizes, instead use this CSV file.")
258    parser.add_argument('-d', '--diff',
259        help="Specify CSV file to diff data size against.")
260    parser.add_argument('-m', '--merge',
261        help="Merge with an existing CSV file when writing to output.")
262    parser.add_argument('-a', '--all', action='store_true',
263        help="Show all functions, not just the ones that changed.")
264    parser.add_argument('-A', '--everything', action='store_true',
265        help="Include builtin and libc specific symbols.")
266    parser.add_argument('-s', '--size-sort', action='store_true',
267        help="Sort by size.")
268    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
269        help="Sort by size, but backwards.")
270    parser.add_argument('-F', '--files', action='store_true',
271        help="Show file-level data sizes. Note this does not include padding! "
272            "So sizes may differ from other tools.")
273    parser.add_argument('-Y', '--summary', action='store_true',
274        help="Only show the total data size.")
275    parser.add_argument('--type', default='dDbB',
276        help="Type of symbols to report, this uses the same single-character "
277            "type-names emitted by nm. Defaults to %(default)r.")
278    parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
279        help="Path to the nm tool to use.")
280    parser.add_argument('--build-dir',
281        help="Specify the relative build directory. Used to map object files \
282            to the correct source files.")
283    sys.exit(main(**vars(parser.parse_args())))
284