1#!/usr/bin/env python3 2# 3# Script to find data size at the function level. Basically just a bit wrapper 4# around nm with some extra conveniences for comparing builds. Heavily inspired 5# by Linux's Bloat-O-Meter. 6# 7 8import os 9import glob 10import itertools as it 11import subprocess as sp 12import shlex 13import re 14import csv 15import collections as co 16 17 18OBJ_PATHS = ['*.o'] 19 20def collect(paths, **args): 21 results = co.defaultdict(lambda: 0) 22 pattern = re.compile( 23 '^(?P<size>[0-9a-fA-F]+)' + 24 ' (?P<type>[%s])' % re.escape(args['type']) + 25 ' (?P<func>.+?)$') 26 for path in paths: 27 # note nm-tool may contain extra args 28 cmd = args['nm_tool'] + ['--size-sort', path] 29 if args.get('verbose'): 30 print(' '.join(shlex.quote(c) for c in cmd)) 31 proc = sp.Popen(cmd, 32 stdout=sp.PIPE, 33 stderr=sp.PIPE if not args.get('verbose') else None, 34 universal_newlines=True, 35 errors='replace') 36 for line in proc.stdout: 37 m = pattern.match(line) 38 if m: 39 results[(path, m.group('func'))] += int(m.group('size'), 16) 40 proc.wait() 41 if proc.returncode != 0: 42 if not args.get('verbose'): 43 for line in proc.stderr: 44 sys.stdout.write(line) 45 sys.exit(-1) 46 47 flat_results = [] 48 for (file, func), size in results.items(): 49 # map to source files 50 if args.get('build_dir'): 51 file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) 52 # replace .o with .c, different scripts report .o/.c, we need to 53 # choose one if we want to deduplicate csv files 54 file = re.sub('\.o$', '.c', file) 55 # discard internal functions 56 if not args.get('everything'): 57 if func.startswith('__'): 58 continue 59 # discard .8449 suffixes created by optimizer 60 func = re.sub('\.[0-9]+', '', func) 61 flat_results.append((file, func, size)) 62 63 return flat_results 64 65def main(**args): 66 def openio(path, mode='r'): 67 if path == '-': 68 if 'r' in mode: 69 return os.fdopen(os.dup(sys.stdin.fileno()), 'r') 70 else: 71 return os.fdopen(os.dup(sys.stdout.fileno()), 'w') 72 else: 73 return open(path, mode) 74 75 # find sizes 76 if not args.get('use', None): 77 # find .o files 78 paths = [] 79 for path in args['obj_paths']: 80 if os.path.isdir(path): 81 path = path + '/*.o' 82 83 for path in glob.glob(path): 84 paths.append(path) 85 86 if not paths: 87 print('no .obj files found in %r?' % args['obj_paths']) 88 sys.exit(-1) 89 90 results = collect(paths, **args) 91 else: 92 with openio(args['use']) as f: 93 r = csv.DictReader(f) 94 results = [ 95 ( result['file'], 96 result['name'], 97 int(result['data_size'])) 98 for result in r 99 if result.get('data_size') not in {None, ''}] 100 101 total = 0 102 for _, _, size in results: 103 total += size 104 105 # find previous results? 106 if args.get('diff'): 107 try: 108 with openio(args['diff']) as f: 109 r = csv.DictReader(f) 110 prev_results = [ 111 ( result['file'], 112 result['name'], 113 int(result['data_size'])) 114 for result in r 115 if result.get('data_size') not in {None, ''}] 116 except FileNotFoundError: 117 prev_results = [] 118 119 prev_total = 0 120 for _, _, size in prev_results: 121 prev_total += size 122 123 # write results to CSV 124 if args.get('output'): 125 merged_results = co.defaultdict(lambda: {}) 126 other_fields = [] 127 128 # merge? 129 if args.get('merge'): 130 try: 131 with openio(args['merge']) as f: 132 r = csv.DictReader(f) 133 for result in r: 134 file = result.pop('file', '') 135 func = result.pop('name', '') 136 result.pop('data_size', None) 137 merged_results[(file, func)] = result 138 other_fields = result.keys() 139 except FileNotFoundError: 140 pass 141 142 for file, func, size in results: 143 merged_results[(file, func)]['data_size'] = size 144 145 with openio(args['output'], 'w') as f: 146 w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size']) 147 w.writeheader() 148 for (file, func), result in sorted(merged_results.items()): 149 w.writerow({'file': file, 'name': func, **result}) 150 151 # print results 152 def dedup_entries(results, by='name'): 153 entries = co.defaultdict(lambda: 0) 154 for file, func, size in results: 155 entry = (file if by == 'file' else func) 156 entries[entry] += size 157 return entries 158 159 def diff_entries(olds, news): 160 diff = co.defaultdict(lambda: (0, 0, 0, 0)) 161 for name, new in news.items(): 162 diff[name] = (0, new, new, 1.0) 163 for name, old in olds.items(): 164 _, new, _, _ = diff[name] 165 diff[name] = (old, new, new-old, (new-old)/old if old else 1.0) 166 return diff 167 168 def sorted_entries(entries): 169 if args.get('size_sort'): 170 return sorted(entries, key=lambda x: (-x[1], x)) 171 elif args.get('reverse_size_sort'): 172 return sorted(entries, key=lambda x: (+x[1], x)) 173 else: 174 return sorted(entries) 175 176 def sorted_diff_entries(entries): 177 if args.get('size_sort'): 178 return sorted(entries, key=lambda x: (-x[1][1], x)) 179 elif args.get('reverse_size_sort'): 180 return sorted(entries, key=lambda x: (+x[1][1], x)) 181 else: 182 return sorted(entries, key=lambda x: (-x[1][3], x)) 183 184 def print_header(by=''): 185 if not args.get('diff'): 186 print('%-36s %7s' % (by, 'size')) 187 else: 188 print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff')) 189 190 def print_entry(name, size): 191 print("%-36s %7d" % (name, size)) 192 193 def print_diff_entry(name, old, new, diff, ratio): 194 print("%-36s %7s %7s %+7d%s" % (name, 195 old or "-", 196 new or "-", 197 diff, 198 ' (%+.1f%%)' % (100*ratio) if ratio else '')) 199 200 def print_entries(by='name'): 201 entries = dedup_entries(results, by=by) 202 203 if not args.get('diff'): 204 print_header(by=by) 205 for name, size in sorted_entries(entries.items()): 206 print_entry(name, size) 207 else: 208 prev_entries = dedup_entries(prev_results, by=by) 209 diff = diff_entries(prev_entries, entries) 210 print_header(by='%s (%d added, %d removed)' % (by, 211 sum(1 for old, _, _, _ in diff.values() if not old), 212 sum(1 for _, new, _, _ in diff.values() if not new))) 213 for name, (old, new, diff, ratio) in sorted_diff_entries( 214 diff.items()): 215 if ratio or args.get('all'): 216 print_diff_entry(name, old, new, diff, ratio) 217 218 def print_totals(): 219 if not args.get('diff'): 220 print_entry('TOTAL', total) 221 else: 222 ratio = (0.0 if not prev_total and not total 223 else 1.0 if not prev_total 224 else (total-prev_total)/prev_total) 225 print_diff_entry('TOTAL', 226 prev_total, total, 227 total-prev_total, 228 ratio) 229 230 if args.get('quiet'): 231 pass 232 elif args.get('summary'): 233 print_header() 234 print_totals() 235 elif args.get('files'): 236 print_entries(by='file') 237 print_totals() 238 else: 239 print_entries(by='name') 240 print_totals() 241 242if __name__ == "__main__": 243 import argparse 244 import sys 245 parser = argparse.ArgumentParser( 246 description="Find data size at the function level.") 247 parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS, 248 help="Description of where to find *.o files. May be a directory \ 249 or a list of paths. Defaults to %r." % OBJ_PATHS) 250 parser.add_argument('-v', '--verbose', action='store_true', 251 help="Output commands that run behind the scenes.") 252 parser.add_argument('-q', '--quiet', action='store_true', 253 help="Don't show anything, useful with -o.") 254 parser.add_argument('-o', '--output', 255 help="Specify CSV file to store results.") 256 parser.add_argument('-u', '--use', 257 help="Don't compile and find data sizes, instead use this CSV file.") 258 parser.add_argument('-d', '--diff', 259 help="Specify CSV file to diff data size against.") 260 parser.add_argument('-m', '--merge', 261 help="Merge with an existing CSV file when writing to output.") 262 parser.add_argument('-a', '--all', action='store_true', 263 help="Show all functions, not just the ones that changed.") 264 parser.add_argument('-A', '--everything', action='store_true', 265 help="Include builtin and libc specific symbols.") 266 parser.add_argument('-s', '--size-sort', action='store_true', 267 help="Sort by size.") 268 parser.add_argument('-S', '--reverse-size-sort', action='store_true', 269 help="Sort by size, but backwards.") 270 parser.add_argument('-F', '--files', action='store_true', 271 help="Show file-level data sizes. Note this does not include padding! " 272 "So sizes may differ from other tools.") 273 parser.add_argument('-Y', '--summary', action='store_true', 274 help="Only show the total data size.") 275 parser.add_argument('--type', default='dDbB', 276 help="Type of symbols to report, this uses the same single-character " 277 "type-names emitted by nm. Defaults to %(default)r.") 278 parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(), 279 help="Path to the nm tool to use.") 280 parser.add_argument('--build-dir', 281 help="Specify the relative build directory. Used to map object files \ 282 to the correct source files.") 283 sys.exit(main(**vars(parser.parse_args()))) 284