gecko/memory/replace/dmd/dmd.py
Nicholas Nethercote cce4aa7ac7 Bug 1074008 - Add a --fix-stacks option to dmd.py. r=glandium.
--HG--
extra : rebase_source : 6ecfdf21ed09671bf9e1e65072dafc1402052a65
2014-09-28 18:36:49 -07:00

452 lines
16 KiB
Python
Executable File

#! /usr/bin/python
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
'''This script analyzes a JSON file emitted by DMD.'''
from __future__ import print_function, division
import argparse
import collections
import json
import os
import platform
import re
import shutil
import sys
import tempfile
# The DMD output version this script handles.
outputVersion = 1
# If --ignore-alloc-fns is specified, stack frames containing functions that
# match these strings will be removed.
allocatorFns = [
'replace_malloc',
'replace_calloc',
'replace_realloc',
'replace_memalign',
'replace_posix_memalign',
'moz_xmalloc',
'moz_xcalloc',
'moz_xrealloc',
'operator new(',
'operator new[](',
'g_malloc',
'g_slice_alloc',
'callocCanGC',
'reallocCanGC',
'vpx_malloc',
'vpx_calloc',
'vpx_realloc',
'vpx_memalign',
'js_malloc',
'js_calloc',
'js_realloc',
'pod_malloc',
'pod_calloc',
'pod_realloc',
]
class Record(object):
def __init__(self):
self.numBlocks = 0
self.reqSize = 0
self.slopSize = 0
self.usableSize = 0
self.isSampled = False
self.usableSizes = collections.defaultdict(int)
@staticmethod
def cmpByIsSampled(r1, r2):
# Treat sampled as smaller than non-sampled.
return cmp(r2.isSampled, r1.isSampled)
@staticmethod
def cmpByUsableSize(r1, r2):
# Sort by usable size, then req size, then by isSampled.
return cmp(r1.usableSize, r2.usableSize) or Record.cmpByReqSize(r1, r2)
@staticmethod
def cmpByReqSize(r1, r2):
# Sort by req size, then by isSampled.
return cmp(r1.reqSize, r2.reqSize) or Record.cmpByIsSampled(r1, r2)
@staticmethod
def cmpBySlopSize(r1, r2):
# Sort by slop size, then by isSampled.
return cmp(r1.slopSize, r2.slopSize) or Record.cmpByIsSampled(r1, r2)
sortByChoices = {
'usable': Record.cmpByUsableSize, # the default
'req': Record.cmpByReqSize,
'slop': Record.cmpBySlopSize,
}
def parseCommandLine():
# 24 is the maximum number of frames that DMD will produce.
def range_1_24(string):
value = int(string)
if value < 1 or value > 24:
msg = '{:s} is not in the range 1..24'.format(string)
raise argparse.ArgumentTypeError(msg)
return value
description = '''
Analyze heap data produced by DMD.
If no files are specified, read from stdin.
Write to stdout unless -o/--output is specified.
Stack traces are fixed to show function names, filenames and line numbers
unless --no-fix-stacks is specified; stack fixing modifies the original file
and may take some time.
'''
p = argparse.ArgumentParser(description=description)
p.add_argument('-o', '--output', type=argparse.FileType('w'),
help='output file; stdout if unspecified')
p.add_argument('-f', '--max-frames', type=range_1_24,
help='maximum number of frames to consider in each trace')
p.add_argument('-r', '--ignore-reports', action='store_true',
help='ignore memory reports data; useful if you just ' +
'want basic heap profiling')
p.add_argument('-s', '--sort-by', choices=sortByChoices.keys(),
default=sortByChoices.keys()[0],
help='sort the records by a particular metric')
p.add_argument('-a', '--ignore-alloc-fns', action='store_true',
help='ignore allocation functions at the start of traces')
p.add_argument('-b', '--show-all-block-sizes', action='store_true',
help='show individual block sizes for each record')
p.add_argument('--no-fix-stacks', action='store_true',
help='do not fix stacks')
p.add_argument('input_file', type=argparse.FileType('r'))
return p.parse_args(sys.argv[1:])
# Fix stacks if necessary: first write the output to a tempfile, then replace
# the original file with it.
def fixStackTraces(args):
# This append() call is needed to make the import statements work when this
# script is installed as a symlink.
sys.path.append(os.path.dirname(__file__))
# XXX: should incorporate fix_stack_using_bpsyms.py here as well, like in
# testing/mochitests/runtests.py
sysname = platform.system()
if sysname == 'Linux':
import fix_linux_stack as fixModule
fix = lambda line: fixModule.fixSymbols(line)
elif sysname == 'Darwin':
import fix_macosx_stack as fixModule
fix = lambda line: fixModule.fixSymbols(line)
else:
fix = None # there is no fix script for Windows
if fix:
# Fix stacks, writing output to a temporary file, and then
# overwrite the original file.
with tempfile.NamedTemporaryFile(delete=False) as tmp:
for line in args.input_file:
tmp.write(fix(line))
shutil.move(tmp.name, args.input_file.name)
args.input_file = open(args.input_file.name)
def main():
args = parseCommandLine()
# Fix stack traces unless otherwise instructed.
if not args.no_fix_stacks:
fixStackTraces(args)
j = json.load(args.input_file)
if j['version'] != outputVersion:
raise Exception("'version' property isn't '{:d}'".format(outputVersion))
# Extract the main parts of the JSON object.
invocation = j['invocation']
dmdEnvVar = invocation['dmdEnvVar']
sampleBelowSize = invocation['sampleBelowSize']
blockList = j['blockList']
traceTable = j['traceTable']
frameTable = j['frameTable']
heapIsSampled = sampleBelowSize > 1 # is sampling present?
# Remove allocation functions at the start of traces.
if args.ignore_alloc_fns:
# Build a regexp that matches every function in allocatorFns.
escapedAllocatorFns = map(re.escape, allocatorFns)
fn_re = re.compile('|'.join(escapedAllocatorFns))
# Remove allocator fns from each stack trace.
for traceKey, frameKeys in traceTable.items():
numSkippedFrames = 0
for frameKey in frameKeys:
frameDesc = frameTable[frameKey]
if re.search(fn_re, frameDesc):
numSkippedFrames += 1
else:
break
if numSkippedFrames > 0:
traceTable[traceKey] = frameKeys[numSkippedFrames:]
# Trim the number of frames.
for traceKey, frameKeys in traceTable.items():
if len(frameKeys) > args.max_frames:
traceTable[traceKey] = frameKeys[:args.max_frames]
# Aggregate blocks into records. All sufficiently similar blocks go into a
# single record.
if args.ignore_reports:
liveRecords = collections.defaultdict(Record)
else:
unreportedRecords = collections.defaultdict(Record)
onceReportedRecords = collections.defaultdict(Record)
twiceReportedRecords = collections.defaultdict(Record)
heapUsableSize = 0
heapBlocks = 0
for block in blockList:
# For each block we compute a |recordKey|, and all blocks with the same
# |recordKey| are aggregated into a single record. The |recordKey| is
# derived from the block's 'alloc' and 'reps' (if present) stack
# traces.
#
# Each stack trace has a key in the JSON file. But we don't use that
# key to construct |recordKey|; instead we use the frame keys.
# This is because the stack trimming done for --max-frames can cause
# stack traces with distinct trace keys to end up with the same frame
# keys, and these should be considered equivalent. E.g. if we have
# distinct traces T1:[A,B,C] and T2:[A,B,D] and we trim the final frame
# of each they should be considered equivalent.
allocatedAt = block['alloc']
if args.ignore_reports:
recordKey = str(traceTable[allocatedAt])
records = liveRecords
else:
recordKey = str(traceTable[allocatedAt])
if 'reps' in block:
reportedAts = block['reps']
for reportedAt in reportedAts:
recordKey += str(traceTable[reportedAt])
if len(reportedAts) == 1:
records = onceReportedRecords
else:
records = twiceReportedRecords
else:
records = unreportedRecords
record = records[recordKey]
if 'req' in block:
# not sampled
reqSize = block['req']
slopSize = block.get('slop', 0)
isSampled = False
else:
# sampled
reqSize = sampleBelowSize
if 'slop' in block:
raise Exception("'slop' property in sampled block'")
slopSize = 0
isSampled = True
usableSize = reqSize + slopSize
heapUsableSize += usableSize
heapBlocks += 1
record.numBlocks += 1
record.reqSize += reqSize
record.slopSize += slopSize
record.usableSize += usableSize
record.isSampled = record.isSampled or isSampled
record.allocatedAt = block['alloc']
if args.ignore_reports:
pass
else:
if 'reps' in block:
record.reportedAts = block['reps']
record.usableSizes[(usableSize, isSampled)] += 1
# Print records.
separator = '#' + '-' * 65 + '\n'
def number(n, isSampled):
'''Format a number, with comma as a separator and a '~' prefix if it's
sampled.'''
return '{:}{:,d}'.format('~' if isSampled else '', n)
def perc(m, n):
return 0 if n == 0 else (100 * m / n)
def plural(n):
return '' if n == 1 else 's'
# Prints to stdout, or to file if -o/--output was specified.
def out(*arguments, **kwargs):
print(*arguments, file=args.output, **kwargs)
def printStack(traceTable, frameTable, traceKey):
# The frame number is always '#00' (see DMD.h for why), so we have to
# replace that with the correct frame number.
for n, frameKey in enumerate(traceTable[traceKey], start=1):
out(' #{:02d}{:}'.format(n, frameTable[frameKey][3:]))
def printRecords(recordKind, records, heapUsableSize):
RecordKind = recordKind.capitalize()
out(separator)
numRecords = len(records)
cmpRecords = sortByChoices[args.sort_by]
sortedRecords = sorted(records.values(), cmp=cmpRecords, reverse=True)
kindBlocks = 0
kindUsableSize = 0
maxRecord = 1000
# First iteration: get totals, etc.
for record in sortedRecords:
kindBlocks += record.numBlocks
kindUsableSize += record.usableSize
# Second iteration: print.
if numRecords == 0:
out('# no {:} heap blocks\n'.format(recordKind))
kindCumulativeUsableSize = 0
for i, record in enumerate(sortedRecords, start=1):
# Stop printing at the |maxRecord|th record.
if i == maxRecord:
out('# {:}: stopping after {:,d} heap block records\n'.
format(RecordKind, i))
break
kindCumulativeUsableSize += record.usableSize
isSampled = record.isSampled
out(RecordKind + ' {')
out(' {:} block{:} in heap block record {:,d} of {:,d}'.
format(number(record.numBlocks, isSampled),
plural(record.numBlocks), i, numRecords))
out(' {:} bytes ({:} requested / {:} slop)'.
format(number(record.usableSize, isSampled),
number(record.reqSize, isSampled),
number(record.slopSize, isSampled)))
out(' {:4.2f}% of the heap ({:4.2f}% cumulative)'.
format(perc(record.usableSize, heapUsableSize),
perc(kindCumulativeUsableSize, heapUsableSize)))
if args.ignore_reports:
pass
else:
out(' {:4.2f}% of {:} ({:4.2f}% cumulative)'.
format(perc(record.usableSize, kindUsableSize),
recordKind,
perc(kindCumulativeUsableSize, kindUsableSize)))
if args.show_all_block_sizes:
usableSizes = sorted(record.usableSizes.items(), reverse=True)
out(' Individual block sizes: ', end='')
isFirst = True
for (usableSize, isSampled), count in usableSizes:
if not isFirst:
out('; ', end='')
out('{:}'.format(number(usableSize, isSampled)), end='')
if count > 1:
out(' x {:,d}'.format(count), end='')
isFirst = False
out()
out(' Allocated at {')
printStack(traceTable, frameTable, record.allocatedAt)
out(' }')
if args.ignore_reports:
pass
else:
if hasattr(record, 'reportedAts'):
for n, reportedAt in enumerate(record.reportedAts):
again = 'again ' if n > 0 else ''
out(' Reported {:}at {{'.format(again))
printStack(traceTable, frameTable, reportedAt)
out(' }')
out('}\n')
return (kindUsableSize, kindBlocks)
# Print header.
out(separator)
out('Invocation {')
out(' $DMD = \'' + dmdEnvVar + '\'')
out(' Sample-below size = ' + str(sampleBelowSize))
out('}\n')
# Print records.
if args.ignore_reports:
liveUsableSize, liveBlocks = \
printRecords('live', liveRecords, heapUsableSize)
else:
twiceReportedUsableSize, twiceReportedBlocks = \
printRecords('twice-reported', twiceReportedRecords, heapUsableSize)
unreportedUsableSize, unreportedBlocks = \
printRecords('unreported', unreportedRecords, heapUsableSize)
onceReportedUsableSize, onceReportedBlocks = \
printRecords('once-reported', onceReportedRecords, heapUsableSize)
# Print summary.
out(separator)
out('Summary {')
if args.ignore_reports:
out(' Total: {:} bytes in {:} blocks'.
format(number(liveUsableSize, heapIsSampled),
number(liveBlocks, heapIsSampled)))
else:
fmt = ' {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)'
out(fmt.
format('Total:',
number(heapUsableSize, heapIsSampled),
100,
number(heapBlocks, heapIsSampled),
100))
out(fmt.
format('Unreported:',
number(unreportedUsableSize, heapIsSampled),
perc(unreportedUsableSize, heapUsableSize),
number(unreportedBlocks, heapIsSampled),
perc(unreportedBlocks, heapBlocks)))
out(fmt.
format('Once-reported:',
number(onceReportedUsableSize, heapIsSampled),
perc(onceReportedUsableSize, heapUsableSize),
number(onceReportedBlocks, heapIsSampled),
perc(onceReportedBlocks, heapBlocks)))
out(fmt.
format('Twice-reported:',
number(twiceReportedUsableSize, heapIsSampled),
perc(twiceReportedUsableSize, heapUsableSize),
number(twiceReportedBlocks, heapIsSampled),
perc(twiceReportedBlocks, heapBlocks)))
out('}\n')
if __name__ == '__main__':
main()