1#!/usr/bin/env python3 2# 3# Script to summarize the outputs of other scripts. Operates on CSV files. 4# 5# Example: 6# ./scripts/code.py lfs.o lfs_util.o -q -o lfs.code.csv 7# ./scripts/data.py lfs.o lfs_util.o -q -o lfs.data.csv 8# ./scripts/summary.py lfs.code.csv lfs.data.csv -q -o lfs.csv 9# ./scripts/summary.py -Y lfs.csv -f code=code_size,data=data_size 10# 11# Copyright (c) 2022, The littlefs authors. 12# SPDX-License-Identifier: BSD-3-Clause 13# 14 15import collections as co 16import csv 17import functools as ft 18import itertools as it 19import math as m 20import os 21import re 22 23 24# supported merge operations 25# 26# this is a terrible way to express these 27# 28OPS = { 29 'sum': lambda xs: sum(xs[1:], start=xs[0]), 30 'prod': lambda xs: m.prod(xs[1:], start=xs[0]), 31 'min': min, 32 'max': max, 33 'mean': lambda xs: Float(sum(float(x) for x in xs) / len(xs)), 34 'stddev': lambda xs: ( 35 lambda mean: Float( 36 m.sqrt(sum((float(x) - mean)**2 for x in xs) / len(xs))) 37 )(sum(float(x) for x in xs) / len(xs)), 38 'gmean': lambda xs: Float(m.prod(float(x) for x in xs)**(1/len(xs))), 39 'gstddev': lambda xs: ( 40 lambda gmean: Float( 41 m.exp(m.sqrt(sum(m.log(float(x)/gmean)**2 for x in xs) / len(xs))) 42 if gmean else m.inf) 43 )(m.prod(float(x) for x in xs)**(1/len(xs))), 44} 45 46 47# integer fields 48class Int(co.namedtuple('Int', 'x')): 49 __slots__ = () 50 def __new__(cls, x=0): 51 if isinstance(x, Int): 52 return x 53 if isinstance(x, str): 54 try: 55 x = int(x, 0) 56 except ValueError: 57 # also accept +-∞ and +-inf 58 if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): 59 x = m.inf 60 elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): 61 x = -m.inf 62 else: 63 raise 64 assert isinstance(x, int) or m.isinf(x), x 65 return super().__new__(cls, x) 66 67 def __str__(self): 68 if self.x == m.inf: 69 return '∞' 70 elif self.x == -m.inf: 71 return '-∞' 72 else: 73 return str(self.x) 74 75 def __int__(self): 76 assert not m.isinf(self.x) 77 return self.x 78 79 def __float__(self): 80 return float(self.x) 81 82 none = '%7s' % '-' 83 def table(self): 84 return '%7s' % (self,) 85 86 diff_none = '%7s' % '-' 87 diff_table = table 88 89 def diff_diff(self, other): 90 new = self.x if self else 0 91 old = other.x if other else 0 92 diff = new - old 93 if diff == +m.inf: 94 return '%7s' % '+∞' 95 elif diff == -m.inf: 96 return '%7s' % '-∞' 97 else: 98 return '%+7d' % diff 99 100 def ratio(self, other): 101 new = self.x if self else 0 102 old = other.x if other else 0 103 if m.isinf(new) and m.isinf(old): 104 return 0.0 105 elif m.isinf(new): 106 return +m.inf 107 elif m.isinf(old): 108 return -m.inf 109 elif not old and not new: 110 return 0.0 111 elif not old: 112 return 1.0 113 else: 114 return (new-old) / old 115 116 def __add__(self, other): 117 return self.__class__(self.x + other.x) 118 119 def __sub__(self, other): 120 return self.__class__(self.x - other.x) 121 122 def __mul__(self, other): 123 return self.__class__(self.x * other.x) 124 125# float fields 126class Float(co.namedtuple('Float', 'x')): 127 __slots__ = () 128 def __new__(cls, x=0.0): 129 if isinstance(x, Float): 130 return x 131 if isinstance(x, str): 132 try: 133 x = float(x) 134 except ValueError: 135 # also accept +-∞ and +-inf 136 if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): 137 x = m.inf 138 elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): 139 x = -m.inf 140 else: 141 raise 142 assert isinstance(x, float), x 143 return super().__new__(cls, x) 144 145 def __str__(self): 146 if self.x == m.inf: 147 return '∞' 148 elif self.x == -m.inf: 149 return '-∞' 150 else: 151 return '%.1f' % self.x 152 153 def __float__(self): 154 return float(self.x) 155 156 none = Int.none 157 table = Int.table 158 diff_none = Int.diff_none 159 diff_table = Int.diff_table 160 diff_diff = Int.diff_diff 161 ratio = Int.ratio 162 __add__ = Int.__add__ 163 __sub__ = Int.__sub__ 164 __mul__ = Int.__mul__ 165 166# fractional fields, a/b 167class Frac(co.namedtuple('Frac', 'a,b')): 168 __slots__ = () 169 def __new__(cls, a=0, b=None): 170 if isinstance(a, Frac) and b is None: 171 return a 172 if isinstance(a, str) and b is None: 173 a, b = a.split('/', 1) 174 if b is None: 175 b = a 176 return super().__new__(cls, Int(a), Int(b)) 177 178 def __str__(self): 179 return '%s/%s' % (self.a, self.b) 180 181 def __float__(self): 182 return float(self.a) 183 184 none = '%11s %7s' % ('-', '-') 185 def table(self): 186 t = self.a.x/self.b.x if self.b.x else 1.0 187 return '%11s %7s' % ( 188 self, 189 '∞%' if t == +m.inf 190 else '-∞%' if t == -m.inf 191 else '%.1f%%' % (100*t)) 192 193 diff_none = '%11s' % '-' 194 def diff_table(self): 195 return '%11s' % (self,) 196 197 def diff_diff(self, other): 198 new_a, new_b = self if self else (Int(0), Int(0)) 199 old_a, old_b = other if other else (Int(0), Int(0)) 200 return '%11s' % ('%s/%s' % ( 201 new_a.diff_diff(old_a).strip(), 202 new_b.diff_diff(old_b).strip())) 203 204 def ratio(self, other): 205 new_a, new_b = self if self else (Int(0), Int(0)) 206 old_a, old_b = other if other else (Int(0), Int(0)) 207 new = new_a.x/new_b.x if new_b.x else 1.0 208 old = old_a.x/old_b.x if old_b.x else 1.0 209 return new - old 210 211 def __add__(self, other): 212 return self.__class__(self.a + other.a, self.b + other.b) 213 214 def __sub__(self, other): 215 return self.__class__(self.a - other.a, self.b - other.b) 216 217 def __mul__(self, other): 218 return self.__class__(self.a * other.a, self.b + other.b) 219 220 def __lt__(self, other): 221 self_t = self.a.x/self.b.x if self.b.x else 1.0 222 other_t = other.a.x/other.b.x if other.b.x else 1.0 223 return (self_t, self.a.x) < (other_t, other.a.x) 224 225 def __gt__(self, other): 226 return self.__class__.__lt__(other, self) 227 228 def __le__(self, other): 229 return not self.__gt__(other) 230 231 def __ge__(self, other): 232 return not self.__lt__(other) 233 234# available types 235TYPES = co.OrderedDict([ 236 ('int', Int), 237 ('float', Float), 238 ('frac', Frac) 239]) 240 241 242def infer(results, *, 243 by=None, 244 fields=None, 245 types={}, 246 ops={}, 247 renames=[], 248 **_): 249 # if fields not specified, try to guess from data 250 if fields is None: 251 fields = co.OrderedDict() 252 for r in results: 253 for k, v in r.items(): 254 if (by is None or k not in by) and v.strip(): 255 types_ = [] 256 for t in fields.get(k, TYPES.values()): 257 try: 258 t(v) 259 types_.append(t) 260 except ValueError: 261 pass 262 fields[k] = types_ 263 fields = list(k for k, v in fields.items() if v) 264 265 # deduplicate fields 266 fields = list(co.OrderedDict.fromkeys(fields).keys()) 267 268 # if by not specified, guess it's anything not in fields and not a 269 # source of a rename 270 if by is None: 271 by = co.OrderedDict() 272 for r in results: 273 # also ignore None keys, these are introduced by csv.DictReader 274 # when header + row mismatch 275 by.update((k, True) for k in r.keys() 276 if k is not None 277 and k not in fields 278 and not any(k == old_k for _, old_k in renames)) 279 by = list(by.keys()) 280 281 # deduplicate fields 282 by = list(co.OrderedDict.fromkeys(by).keys()) 283 284 # find best type for all fields 285 types_ = {} 286 for k in fields: 287 if k in types: 288 types_[k] = types[k] 289 else: 290 for t in TYPES.values(): 291 for r in results: 292 if k in r and r[k].strip(): 293 try: 294 t(r[k]) 295 except ValueError: 296 break 297 else: 298 types_[k] = t 299 break 300 else: 301 print("error: no type matches field %r?" % k) 302 sys.exit(-1) 303 types = types_ 304 305 # does folding change the type? 306 types_ = {} 307 for k, t in types.items(): 308 types_[k] = ops.get(k, OPS['sum'])([t()]).__class__ 309 310 311 # create result class 312 def __new__(cls, **r): 313 return cls.__mro__[1].__new__(cls, 314 **{k: r.get(k, '') for k in by}, 315 **{k: r[k] if k in r and isinstance(r[k], list) 316 else [types[k](r[k])] if k in r 317 else [] 318 for k in fields}) 319 320 def __add__(self, other): 321 return self.__class__( 322 **{k: getattr(self, k) for k in by}, 323 **{k: object.__getattribute__(self, k) 324 + object.__getattribute__(other, k) 325 for k in fields}) 326 327 def __getattribute__(self, k): 328 if k in fields: 329 if object.__getattribute__(self, k): 330 return ops.get(k, OPS['sum'])(object.__getattribute__(self, k)) 331 else: 332 return None 333 return object.__getattribute__(self, k) 334 335 return type('Result', (co.namedtuple('Result', by + fields),), { 336 '__slots__': (), 337 '__new__': __new__, 338 '__add__': __add__, 339 '__getattribute__': __getattribute__, 340 '_by': by, 341 '_fields': fields, 342 '_sort': fields, 343 '_types': types_, 344 }) 345 346 347def fold(Result, results, *, 348 by=None, 349 defines=None, 350 **_): 351 if by is None: 352 by = Result._by 353 354 for k in it.chain(by or [], (k for k, _ in defines or [])): 355 if k not in Result._by and k not in Result._fields: 356 print("error: could not find field %r?" % k) 357 sys.exit(-1) 358 359 # filter by matching defines 360 if defines is not None: 361 results_ = [] 362 for r in results: 363 if all(getattr(r, k) in vs for k, vs in defines): 364 results_.append(r) 365 results = results_ 366 367 # organize results into conflicts 368 folding = co.OrderedDict() 369 for r in results: 370 name = tuple(getattr(r, k) for k in by) 371 if name not in folding: 372 folding[name] = [] 373 folding[name].append(r) 374 375 # merge conflicts 376 folded = [] 377 for name, rs in folding.items(): 378 folded.append(sum(rs[1:], start=rs[0])) 379 380 return folded 381 382def table(Result, results, diff_results=None, *, 383 by=None, 384 fields=None, 385 sort=None, 386 summary=False, 387 all=False, 388 percent=False, 389 **_): 390 all_, all = all, __builtins__.all 391 392 if by is None: 393 by = Result._by 394 if fields is None: 395 fields = Result._fields 396 types = Result._types 397 398 # fold again 399 results = fold(Result, results, by=by) 400 if diff_results is not None: 401 diff_results = fold(Result, diff_results, by=by) 402 403 # organize by name 404 table = { 405 ','.join(str(getattr(r, k) or '') for k in by): r 406 for r in results} 407 diff_table = { 408 ','.join(str(getattr(r, k) or '') for k in by): r 409 for r in diff_results or []} 410 names = list(table.keys() | diff_table.keys()) 411 412 # sort again, now with diff info, note that python's sort is stable 413 names.sort() 414 if diff_results is not None: 415 names.sort(key=lambda n: tuple( 416 types[k].ratio( 417 getattr(table.get(n), k, None), 418 getattr(diff_table.get(n), k, None)) 419 for k in fields), 420 reverse=True) 421 if sort: 422 for k, reverse in reversed(sort): 423 names.sort( 424 key=lambda n: tuple( 425 (getattr(table[n], k),) 426 if getattr(table.get(n), k, None) is not None else () 427 for k in ([k] if k else [ 428 k for k in Result._sort if k in fields])), 429 reverse=reverse ^ (not k or k in Result._fields)) 430 431 432 # build up our lines 433 lines = [] 434 435 # header 436 header = [] 437 header.append('%s%s' % ( 438 ','.join(by), 439 ' (%d added, %d removed)' % ( 440 sum(1 for n in table if n not in diff_table), 441 sum(1 for n in diff_table if n not in table)) 442 if diff_results is not None and not percent else '') 443 if not summary else '') 444 if diff_results is None: 445 for k in fields: 446 header.append(k) 447 elif percent: 448 for k in fields: 449 header.append(k) 450 else: 451 for k in fields: 452 header.append('o'+k) 453 for k in fields: 454 header.append('n'+k) 455 for k in fields: 456 header.append('d'+k) 457 header.append('') 458 lines.append(header) 459 460 def table_entry(name, r, diff_r=None, ratios=[]): 461 entry = [] 462 entry.append(name) 463 if diff_results is None: 464 for k in fields: 465 entry.append(getattr(r, k).table() 466 if getattr(r, k, None) is not None 467 else types[k].none) 468 elif percent: 469 for k in fields: 470 entry.append(getattr(r, k).diff_table() 471 if getattr(r, k, None) is not None 472 else types[k].diff_none) 473 else: 474 for k in fields: 475 entry.append(getattr(diff_r, k).diff_table() 476 if getattr(diff_r, k, None) is not None 477 else types[k].diff_none) 478 for k in fields: 479 entry.append(getattr(r, k).diff_table() 480 if getattr(r, k, None) is not None 481 else types[k].diff_none) 482 for k in fields: 483 entry.append(types[k].diff_diff( 484 getattr(r, k, None), 485 getattr(diff_r, k, None))) 486 if diff_results is None: 487 entry.append('') 488 elif percent: 489 entry.append(' (%s)' % ', '.join( 490 '+∞%' if t == +m.inf 491 else '-∞%' if t == -m.inf 492 else '%+.1f%%' % (100*t) 493 for t in ratios)) 494 else: 495 entry.append(' (%s)' % ', '.join( 496 '+∞%' if t == +m.inf 497 else '-∞%' if t == -m.inf 498 else '%+.1f%%' % (100*t) 499 for t in ratios 500 if t) 501 if any(ratios) else '') 502 return entry 503 504 # entries 505 if not summary: 506 for name in names: 507 r = table.get(name) 508 if diff_results is None: 509 diff_r = None 510 ratios = None 511 else: 512 diff_r = diff_table.get(name) 513 ratios = [ 514 types[k].ratio( 515 getattr(r, k, None), 516 getattr(diff_r, k, None)) 517 for k in fields] 518 if not all_ and not any(ratios): 519 continue 520 lines.append(table_entry(name, r, diff_r, ratios)) 521 522 # total 523 r = next(iter(fold(Result, results, by=[])), None) 524 if diff_results is None: 525 diff_r = None 526 ratios = None 527 else: 528 diff_r = next(iter(fold(Result, diff_results, by=[])), None) 529 ratios = [ 530 types[k].ratio( 531 getattr(r, k, None), 532 getattr(diff_r, k, None)) 533 for k in fields] 534 lines.append(table_entry('TOTAL', r, diff_r, ratios)) 535 536 # find the best widths, note that column 0 contains the names and column -1 537 # the ratios, so those are handled a bit differently 538 widths = [ 539 ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1 540 for w, i in zip( 541 it.chain([23], it.repeat(7)), 542 range(len(lines[0])-1))] 543 544 # print our table 545 for line in lines: 546 print('%-*s %s%s' % ( 547 widths[0], line[0], 548 ' '.join('%*s' % (w, x) 549 for w, x in zip(widths[1:], line[1:-1])), 550 line[-1])) 551 552 553def openio(path, mode='r', buffering=-1): 554 # allow '-' for stdin/stdout 555 if path == '-': 556 if mode == 'r': 557 return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering) 558 else: 559 return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering) 560 else: 561 return open(path, mode, buffering) 562 563def main(csv_paths, *, 564 by=None, 565 fields=None, 566 defines=None, 567 sort=None, 568 **args): 569 # separate out renames 570 renames = list(it.chain.from_iterable( 571 ((k, v) for v in vs) 572 for k, vs in it.chain(by or [], fields or []))) 573 if by is not None: 574 by = [k for k, _ in by] 575 if fields is not None: 576 fields = [k for k, _ in fields] 577 578 # figure out types 579 types = {} 580 for t in TYPES.keys(): 581 for k in args.get(t, []): 582 if k in types: 583 print("error: conflicting type for field %r?" % k) 584 sys.exit(-1) 585 types[k] = TYPES[t] 586 # rename types? 587 if renames: 588 types_ = {} 589 for new_k, old_k in renames: 590 if old_k in types: 591 types_[new_k] = types[old_k] 592 types.update(types_) 593 594 # figure out merge operations 595 ops = {} 596 for o in OPS.keys(): 597 for k in args.get(o, []): 598 if k in ops: 599 print("error: conflicting op for field %r?" % k) 600 sys.exit(-1) 601 ops[k] = OPS[o] 602 # rename ops? 603 if renames: 604 ops_ = {} 605 for new_k, old_k in renames: 606 if old_k in ops: 607 ops_[new_k] = ops[old_k] 608 ops.update(ops_) 609 610 # find CSV files 611 results = [] 612 for path in csv_paths: 613 try: 614 with openio(path) as f: 615 reader = csv.DictReader(f, restval='') 616 for r in reader: 617 # rename fields? 618 if renames: 619 # make a copy so renames can overlap 620 r_ = {} 621 for new_k, old_k in renames: 622 if old_k in r: 623 r_[new_k] = r[old_k] 624 r.update(r_) 625 626 results.append(r) 627 except FileNotFoundError: 628 pass 629 630 # homogenize 631 Result = infer(results, 632 by=by, 633 fields=fields, 634 types=types, 635 ops=ops, 636 renames=renames) 637 results_ = [] 638 for r in results: 639 if not any(k in r and r[k].strip() 640 for k in Result._fields): 641 continue 642 try: 643 results_.append(Result(**{ 644 k: r[k] for k in Result._by + Result._fields 645 if k in r and r[k].strip()})) 646 except TypeError: 647 pass 648 results = results_ 649 650 # fold 651 results = fold(Result, results, by=by, defines=defines) 652 653 # sort, note that python's sort is stable 654 results.sort() 655 if sort: 656 for k, reverse in reversed(sort): 657 results.sort( 658 key=lambda r: tuple( 659 (getattr(r, k),) if getattr(r, k) is not None else () 660 for k in ([k] if k else Result._sort)), 661 reverse=reverse ^ (not k or k in Result._fields)) 662 663 # write results to CSV 664 if args.get('output'): 665 with openio(args['output'], 'w') as f: 666 writer = csv.DictWriter(f, Result._by + Result._fields) 667 writer.writeheader() 668 for r in results: 669 # note we need to go through getattr to resolve lazy fields 670 writer.writerow({ 671 k: getattr(r, k) for k in Result._by + Result._fields}) 672 673 # find previous results? 674 if args.get('diff'): 675 diff_results = [] 676 try: 677 with openio(args['diff']) as f: 678 reader = csv.DictReader(f, restval='') 679 for r in reader: 680 # rename fields? 681 if renames: 682 # make a copy so renames can overlap 683 r_ = {} 684 for new_k, old_k in renames: 685 if old_k in r: 686 r_[new_k] = r[old_k] 687 r.update(r_) 688 689 if not any(k in r and r[k].strip() 690 for k in Result._fields): 691 continue 692 try: 693 diff_results.append(Result(**{ 694 k: r[k] for k in Result._by + Result._fields 695 if k in r and r[k].strip()})) 696 except TypeError: 697 pass 698 except FileNotFoundError: 699 pass 700 701 # fold 702 diff_results = fold(Result, diff_results, by=by, defines=defines) 703 704 # print table 705 if not args.get('quiet'): 706 table(Result, results, 707 diff_results if args.get('diff') else None, 708 by=by, 709 fields=fields, 710 sort=sort, 711 **args) 712 713 714if __name__ == "__main__": 715 import argparse 716 import sys 717 parser = argparse.ArgumentParser( 718 description="Summarize measurements in CSV files.", 719 allow_abbrev=False) 720 parser.add_argument( 721 'csv_paths', 722 nargs='*', 723 help="Input *.csv files.") 724 parser.add_argument( 725 '-q', '--quiet', 726 action='store_true', 727 help="Don't show anything, useful with -o.") 728 parser.add_argument( 729 '-o', '--output', 730 help="Specify CSV file to store results.") 731 parser.add_argument( 732 '-d', '--diff', 733 help="Specify CSV file to diff against.") 734 parser.add_argument( 735 '-a', '--all', 736 action='store_true', 737 help="Show all, not just the ones that changed.") 738 parser.add_argument( 739 '-p', '--percent', 740 action='store_true', 741 help="Only show percentage change, not a full diff.") 742 parser.add_argument( 743 '-b', '--by', 744 action='append', 745 type=lambda x: ( 746 lambda k,v=None: (k, v.split(',') if v is not None else ()) 747 )(*x.split('=', 1)), 748 help="Group by this field. Can rename fields with new_name=old_name.") 749 parser.add_argument( 750 '-f', '--field', 751 dest='fields', 752 action='append', 753 type=lambda x: ( 754 lambda k,v=None: (k, v.split(',') if v is not None else ()) 755 )(*x.split('=', 1)), 756 help="Show this field. Can rename fields with new_name=old_name.") 757 parser.add_argument( 758 '-D', '--define', 759 dest='defines', 760 action='append', 761 type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)), 762 help="Only include results where this field is this value. May include " 763 "comma-separated options.") 764 class AppendSort(argparse.Action): 765 def __call__(self, parser, namespace, value, option): 766 if namespace.sort is None: 767 namespace.sort = [] 768 namespace.sort.append((value, True if option == '-S' else False)) 769 parser.add_argument( 770 '-s', '--sort', 771 nargs='?', 772 action=AppendSort, 773 help="Sort by this field.") 774 parser.add_argument( 775 '-S', '--reverse-sort', 776 nargs='?', 777 action=AppendSort, 778 help="Sort by this field, but backwards.") 779 parser.add_argument( 780 '-Y', '--summary', 781 action='store_true', 782 help="Only show the total.") 783 parser.add_argument( 784 '--int', 785 action='append', 786 help="Treat these fields as ints.") 787 parser.add_argument( 788 '--float', 789 action='append', 790 help="Treat these fields as floats.") 791 parser.add_argument( 792 '--frac', 793 action='append', 794 help="Treat these fields as fractions.") 795 parser.add_argument( 796 '--sum', 797 action='append', 798 help="Add these fields (the default).") 799 parser.add_argument( 800 '--prod', 801 action='append', 802 help="Multiply these fields.") 803 parser.add_argument( 804 '--min', 805 action='append', 806 help="Take the minimum of these fields.") 807 parser.add_argument( 808 '--max', 809 action='append', 810 help="Take the maximum of these fields.") 811 parser.add_argument( 812 '--mean', 813 action='append', 814 help="Average these fields.") 815 parser.add_argument( 816 '--stddev', 817 action='append', 818 help="Find the standard deviation of these fields.") 819 parser.add_argument( 820 '--gmean', 821 action='append', 822 help="Find the geometric mean of these fields.") 823 parser.add_argument( 824 '--gstddev', 825 action='append', 826 help="Find the geometric standard deviation of these fields.") 827 sys.exit(main(**{k: v 828 for k, v in vars(parser.parse_intermixed_args()).items() 829 if v is not None})) 830