Reworked scripts to move field details into classes

These scripts can't easily share the common logic, but separating field details from the print/merge/csv logic should make the common part of these scripts much easier to create/modify going forward. This also tweaked the behavior of summary.py slightly.
2026-03-11 16:37:33 -07:00 · 2022-05-21 16:46:25 -05:00
parent 4a7e94fb15
commit 5b0a6d4747
8 changed files with 1102 additions and 898 deletions
@@ -24,6 +24,60 @@ def openio(path, mode='r'):
    else:
        return open(path, mode)

+class StructsResult(co.namedtuple('StructsResult', 'struct_size')):
+    __slots__ = ()
+    def __new__(cls, struct_size=0):
+        return super().__new__(cls, int(struct_size))
+
+    def __add__(self, other):
+        return self.__class__(self.struct_size + other.struct_size)
+
+    def __sub__(self, other):
+        return StructsDiff(other, self)
+
+    def __rsub__(self, other):
+        return self.__class__.__sub__(other, self)
+
+    def key(self, **args):
+        if args.get('size_sort'):
+            return -self.struct_size
+        elif args.get('reverse_size_sort'):
+            return +self.struct_size
+        else:
+            return None
+
+    _header = '%7s' % 'size'
+    def __str__(self):
+        return '%7d' % self.struct_size
+
+class StructsDiff(co.namedtuple('StructsDiff',  'old,new')):
+    __slots__ = ()
+
+    def ratio(self):
+        old = self.old.struct_size if self.old is not None else 0
+        new = self.new.struct_size if self.new is not None else 0
+        return (new-old) / old if old else 1.0
+
+    def key(self, **args):
+        return (
+            self.new.key(**args) if self.new is not None else 0,
+            -self.ratio())
+
+    def __bool__(self):
+        return bool(self.ratio())
+
+    _header = '%7s %7s %7s' % ('old', 'new', 'diff')
+    def __str__(self):
+        old = self.old.struct_size if self.old is not None else 0
+        new = self.new.struct_size if self.new is not None else 0
+        diff = new - old
+        ratio = self.ratio()
+        return '%7s %7s %+7d%s' % (
+            old or "-",
+            new or "-",
+            diff,
+            ' (%+.1f%%)' % (100*ratio) if ratio else '')
+
 def collect(paths, **args):
    decl_pattern = re.compile(
        '^\s+(?P<no>[0-9]+)'
@@ -36,7 +90,7 @@ def collect(paths, **args):
            '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')

-    results = co.defaultdict(lambda: 0)
+    results = {}
    for path in paths:
        # find decl, we want to filter by structs in .h files
        decls = {}
@@ -84,8 +138,18 @@ def collect(paths, **args):
                    if (name is not None
                            and decl is not None
                            and size is not None):
-                        decl = decls.get(decl, '?')
-                        results[(decl, name)] = size
+                        file = decls.get(decl, '?')
+                        # map to source file
+                        file = re.sub('\.o$', '.c', file)
+                        if args.get('build_dir'):
+                            file = re.sub(
+                                '%s/*' % re.escape(args['build_dir']), '',
+                                file)
+                        # only include structs declared in header files in the
+                        # current directory, ignore internal-only structs (
+                        # these are represented in other measurements)
+                        if args.get('everything') or file.endswith('.h'):
+                            results[(file, name)] = StructsResult(size)
                    found = (m.group('tag') == 'structure_type')
                    name = None
                    decl = None
@@ -103,24 +167,7 @@ def collect(paths, **args):
                    sys.stdout.write(line)
            sys.exit(-1)

-    flat_results = []
-    for (file, struct), size in results.items():
-        # map to source files
-        if args.get('build_dir'):
-            file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
-        # only include structs declared in header files in the current
-        # directory, ignore internal-only # structs (these are represented
-        # in other measurements)
-        if not args.get('everything'):
-            if not file.endswith('.h'):
-                continue
-        # replace .o with .c, different scripts report .o/.c, we need to
-        # choose one if we want to deduplicate csv files
-        file = re.sub('\.o$', '.c', file)
-
-        flat_results.append((file, struct, size))
-
-    return flat_results
+    return results


 def main(**args):
@@ -143,35 +190,27 @@ def main(**args):
    else:
        with openio(args['use']) as f:
            r = csv.DictReader(f)
-            results = [
-                (   result['file'],
-                    result['name'],
-                    int(result['struct_size']))
+            results = {
+                (result['file'], result['name']): StructsResult(
+                    *(result[f] for f in StructsResult._fields))
                for result in r
-                if result.get('struct_size') not in {None, ''}]
-
-    total = 0
-    for _, _, size in results:
-        total += size
+                if all(result.get(f) not in {None, ''}
+                    for f in StructsResult._fields)}

    # find previous results?
    if args.get('diff'):
        try:
            with openio(args['diff']) as f:
                r = csv.DictReader(f)
-                prev_results = [
-                    (   result['file'],
-                        result['name'],
-                        int(result['struct_size']))
+                prev_results = {
+                    (result['file'], result['name']): StructsResult(
+                        *(result[f] for f in StructsResult._fields))
                    for result in r
-                    if result.get('struct_size') not in {None, ''}]
+                    if all(result.get(f) not in {None, ''}
+                        for f in StructsResult._fields)}
        except FileNotFoundError:
            prev_results = []

-        prev_total = 0
-        for _, _, size in prev_results:
-            prev_total += size
-
    # write results to CSV
    if args.get('output'):
        merged_results = co.defaultdict(lambda: {})
@@ -184,112 +223,88 @@ def main(**args):
                    r = csv.DictReader(f)
                    for result in r:
                        file = result.pop('file', '')
-                        struct = result.pop('name', '')
-                        result.pop('struct_size', None)
-                        merged_results[(file, struct)] = result
+                        func = result.pop('name', '')
+                        for f in StructsResult._fields:
+                            result.pop(f, None)
+                        merged_results[(file, func)] = result
                        other_fields = result.keys()
            except FileNotFoundError:
                pass

-        for file, struct, size in results:
-            merged_results[(file, struct)]['struct_size'] = size
+        for (file, func), result in results.items():
+            merged_results[(file, func)] |= result._asdict()

        with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size'])
+            w = csv.DictWriter(f, ['file', 'name',
+                *other_fields, *StructsResult._fields])
            w.writeheader()
-            for (file, struct), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': struct, **result})
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})

    # print results
-    def dedup_entries(results, by='name'):
-        entries = co.defaultdict(lambda: 0)
-        for file, struct, size in results:
-            entry = (file if by == 'file' else struct)
-            entries[entry] += size
-        return entries
-
-    def diff_entries(olds, news):
-        diff = co.defaultdict(lambda: (0, 0, 0, 0))
-        for name, new in news.items():
-            diff[name] = (0, new, new, 1.0)
-        for name, old in olds.items():
-            _, new, _, _ = diff[name]
-            diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
-        return diff
-
-    def sorted_entries(entries):
-        if args.get('size_sort'):
-            return sorted(entries, key=lambda x: (-x[1], x))
-        elif args.get('reverse_size_sort'):
-            return sorted(entries, key=lambda x: (+x[1], x))
+    def print_header(by):
+        if by == 'total':
+            entry = lambda k: 'TOTAL'
+        elif by == 'file':
+            entry = lambda k: k[0]
        else:
-            return sorted(entries)
-
-    def sorted_diff_entries(entries):
-        if args.get('size_sort'):
-            return sorted(entries, key=lambda x: (-x[1][1], x))
-        elif args.get('reverse_size_sort'):
-            return sorted(entries, key=lambda x: (+x[1][1], x))
-        else:
-            return sorted(entries, key=lambda x: (-x[1][3], x))
-
-    def print_header(by=''):
-        if not args.get('diff'):
-            print('%-36s %7s' % (by, 'size'))
-        else:
-            print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
-
-    def print_entry(name, size):
-        print("%-36s %7d" % (name, size))
-
-    def print_diff_entry(name, old, new, diff, ratio):
-        print("%-36s %7s %7s %+7d%s" % (name,
-            old or "-",
-            new or "-",
-            diff,
-            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
-
-    def print_entries(by='name'):
-        entries = dedup_entries(results, by=by)
+            entry = lambda k: k[1]

        if not args.get('diff'):
-            print_header(by=by)
-            for name, size in sorted_entries(entries.items()):
-                print_entry(name, size)
+            print('%-36s %s' % (by, StructsResult._header))
        else:
-            prev_entries = dedup_entries(prev_results, by=by)
-            diff = diff_entries(prev_entries, entries)
-            print_header(by='%s (%d added, %d removed)' % (by,
-                sum(1 for old, _, _, _ in diff.values() if not old),
-                sum(1 for _, new, _, _ in diff.values() if not new)))
-            for name, (old, new, diff, ratio) in sorted_diff_entries(
-                    diff.items()):
-                if ratio or args.get('all'):
-                    print_diff_entry(name, old, new, diff, ratio)
+            old = {entry(k) for k in results.keys()}
+            new = {entry(k) for k in prev_results.keys()}
+            print('%-36s %s' % (
+                '%s (%d added, %d removed)' % (by,
+                        sum(1 for k in new if k not in old),
+                        sum(1 for k in old if k not in new))
+                    if by else '',
+                StructsDiff._header))

-    def print_totals():
-        if not args.get('diff'):
-            print_entry('TOTAL', total)
+    def print_entries(by):
+        if by == 'total':
+            entry = lambda k: 'TOTAL'
+        elif by == 'file':
+            entry = lambda k: k[0]
        else:
-            ratio = (0.0 if not prev_total and not total
-                else 1.0 if not prev_total
-                else (total-prev_total)/prev_total)
-            print_diff_entry('TOTAL',
-                prev_total, total,
-                total-prev_total,
-                ratio)
+            entry = lambda k: k[1]
+
+        entries = co.defaultdict(lambda: StructsResult())
+        for k, result in results.items():
+            entries[entry(k)] += result
+
+        if not args.get('diff'):
+            for name, result in sorted(entries.items(),
+                    key=lambda p: (p[1].key(**args), p)):
+                print('%-36s %s' % (name, result))
+        else:
+            prev_entries = co.defaultdict(lambda: StructsResult())
+            for k, result in prev_results.items():
+                prev_entries[entry(k)] += result
+
+            diff_entries = {name: entries.get(name) - prev_entries.get(name)
+                for name in (entries.keys() | prev_entries.keys())}
+
+            for name, diff in sorted(diff_entries.items(),
+                    key=lambda p: (p[1].key(**args), p)):
+                if diff or args.get('all'):
+                    print('%-36s %s' % (name, diff))

    if args.get('quiet'):
        pass
    elif args.get('summary'):
-        print_header()
-        print_totals()
+        print_header('')
+        print_entries('total')
    elif args.get('files'):
-        print_entries(by='file')
-        print_totals()
+        print_header('file')
+        print_entries('file')
+        print_entries('total')
    else:
-        print_entries(by='name')
-        print_totals()
+        print_header('struct')
+        print_entries('struct')
+        print_entries('total')
+

 if __name__ == "__main__":
    import argparse
@@ -312,7 +327,7 @@ if __name__ == "__main__":
    parser.add_argument('-m', '--merge',
        help="Merge with an existing CSV file when writing to output.")
    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all functions, not just the ones that changed.")
+        help="Show all structs, not just the ones that changed.")
    parser.add_argument('-A', '--everything', action='store_true',
        help="Include builtin and libc specific symbols.")
    parser.add_argument('-s', '--size-sort', action='store_true',