Bug 1058178, part 2 - Implement address clamping analysis for DMD scan logs. r=njn

This adds a new option --clamp-contents to dmd.py. This replaces every value
contained in the memory contents in the log with a pointer to the start of a live
block, if the value is a pointer into the middle of that block. All other values
are replaced with 0. This conservative analysis makes it easier to determine
which blocks point to other blocks.
This commit is contained in:
Andrew McCreight 2015-06-30 11:54:15 -07:00
parent c8a00cd5a2
commit cc28747e25
5 changed files with 218 additions and 6 deletions

View File

@ -18,6 +18,7 @@ import re
import shutil
import sys
import tempfile
from bisect import bisect_right
# The DMD output version this script handles.
outputVersion = 4
@ -188,6 +189,12 @@ variable is used to find breakpad symbols for stack fixing.
p.add_argument('--no-fix-stacks', action='store_true',
help='do not fix stacks')
p.add_argument('--clamp-contents', action='store_true',
help='for a scan mode log, clamp addresses to the start of live blocks, or zero if not in one')
p.add_argument('--print-clamp-stats', action='store_true',
help='print information about the results of pointer clamping; mostly useful for debugging clamping')
p.add_argument('--filter-stacks-for-testing', action='store_true',
help='filter stack traces; only useful for testing purposes')
@ -259,6 +266,9 @@ def getDigestFromFile(args, inputFile):
if not args.no_fix_stacks:
fixStackTraces(inputFile, isZipped, opener)
if args.clamp_contents:
clampBlockList(args, inputFile, isZipped, opener)
with opener(inputFile, 'rb') as f:
j = json.load(f)
@ -700,6 +710,197 @@ def printDigest(args, digest):
out('}\n')
#############################
# Pretty printer for DMD JSON
#############################
def prettyPrintDmdJson(out, j):
out.write('{\n')
out.write(' "version": {0},\n'.format(j['version']))
out.write(' "invocation": ')
json.dump(j['invocation'], out, sort_keys=True)
out.write(',\n')
out.write(' "blockList": [')
first = True
for b in j['blockList']:
out.write('' if first else ',')
out.write('\n ')
json.dump(b, out, sort_keys=True)
first = False
out.write('\n ],\n')
out.write(' "traceTable": {')
first = True
for k, l in j['traceTable'].iteritems():
out.write('' if first else ',')
out.write('\n "{0}": {1}'.format(k, json.dumps(l)))
first = False
out.write('\n },\n')
out.write(' "frameTable": {')
first = True
for k, v in j['frameTable'].iteritems():
out.write('' if first else ',')
out.write('\n "{0}": "{1}"'.format(k, v))
first = False
out.write('\n }\n')
out.write('}\n')
##################################################################
# Code for clamping addresses using conservative pointer analysis.
##################################################################
# Start is the address of the first byte of the block, while end is
# the address of the first byte after the final byte in the block.
class AddrRange:
def __init__(self, block, length):
self.block = block
self.start = int(block, 16)
self.length = length
self.end = self.start + self.length
assert self.start > 0
assert length >= 0
class ClampStats:
def __init__(self):
# Number of pointers already pointing to the start of a block.
self.startBlockPtr = 0
# Number of pointers pointing to the middle of a block. These
# are clamped to the start of the block they point into.
self.midBlockPtr = 0
# Number of null pointers.
self.nullPtr = 0
# Number of non-null pointers that didn't point into the middle
# of any blocks. These are clamped to null.
self.nonNullNonBlockPtr = 0
def clampedBlockAddr(self, sameAddress):
if sameAddress:
self.startBlockPtr += 1
else:
self.midBlockPtr += 1
def nullAddr(self):
self.nullPtr += 1
def clampedNonBlockAddr(self):
self.nonNullNonBlockPtr += 1
def log(self):
sys.stderr.write('Results:\n')
sys.stderr.write(' Number of pointers already pointing to start of blocks: ' + str(self.startBlockPtr) + '\n')
sys.stderr.write(' Number of pointers clamped to start of blocks: ' + str(self.midBlockPtr) + '\n')
sys.stderr.write(' Number of non-null pointers not pointing into blocks clamped to null: ' + str(self.nonNullNonBlockPtr) + '\n')
sys.stderr.write(' Number of null pointers: ' + str(self.nullPtr) + '\n')
# Search the block ranges array for a block that address points into.
# The search is carried out in an array of starting addresses for each blocks
# because it is faster.
def clampAddress(blockRanges, blockStarts, clampStats, address):
i = bisect_right(blockStarts, address)
# Any addresses completely out of the range should have been eliminated already.
assert i > 0
r = blockRanges[i - 1]
assert r.start <= address
if address >= r.end:
assert address < blockRanges[i].start
clampStats.clampedNonBlockAddr()
return '0'
clampStats.clampedBlockAddr(r.start == address)
return r.block
def clampBlockList(args, inputFileName, isZipped, opener):
# XXX This isn't very efficient because we end up reading and writing
# the file multiple times.
with opener(inputFileName, 'rb') as f:
j = json.load(f)
if j['version'] != outputVersion:
raise Exception("'version' property isn't '{:d}'".format(outputVersion))
# Check that the invocation is reasonable for contents clamping.
invocation = j['invocation']
if invocation['sampleBelowSize'] > 1:
raise Exception("Heap analysis is not going to work with sampled blocks.")
if invocation['mode'] != 'scan':
raise Exception("Log was taken in mode " + invocation['mode'] + " not scan")
sys.stderr.write('Creating block range list.\n')
blockList = j['blockList']
blockRanges = []
for block in blockList:
blockRanges.append(AddrRange(block['addr'], block['req']))
blockRanges.sort(key=lambda r: r.start)
# Make sure there are no overlapping blocks.
prevRange = blockRanges[0]
for currRange in blockRanges[1:]:
assert prevRange.end <= currRange.start
prevRange = currRange
sys.stderr.write('Clamping block contents.\n')
clampStats = ClampStats()
firstAddr = blockRanges[0].start
lastAddr = blockRanges[-1].end
blockStarts = []
for r in blockRanges:
blockStarts.append(r.start)
for block in blockList:
# Small blocks don't have any contents.
if not 'contents' in block:
continue
cont = block['contents']
for i in range(len(cont)):
address = int(cont[i], 16)
if address == 0:
clampStats.nullAddr()
continue
# If the address is before the first block or after the last
# block then it can't be within a block.
if address < firstAddr or address >= lastAddr:
clampStats.clampedNonBlockAddr()
cont[i] = '0'
continue
cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address)
# Remove any trailing nulls.
while len(cont) and cont[-1] == '0':
cont.pop()
if args.print_clamp_stats:
clampStats.log()
sys.stderr.write('Saving file.\n')
tmpFile = tempfile.NamedTemporaryFile(delete=False)
tmpFilename = tmpFile.name
if isZipped:
tmpFile = gzip.GzipFile(filename='', fileobj=tmpFile)
prettyPrintDmdJson(tmpFile, j)
tmpFile.close()
shutil.move(tmpFilename, inputFileName)
def main():
args = parseCommandLine()
digest = getDigestFromFile(args, args.input_file)

View File

@ -1,5 +1,5 @@
#-----------------------------------------------------------------
# dmd.py --filter-stacks-for-testing -o basic-scan-32-actual.txt basic-scan.json
# dmd.py --filter-stacks-for-testing -o basic-scan-32-actual.txt --clamp-contents basic-scan.json
Invocation {
$DMD = '--mode=scan'

View File

@ -1,5 +1,5 @@
#-----------------------------------------------------------------
# dmd.py --filter-stacks-for-testing -o basic-scan-64-actual.txt basic-scan.json
# dmd.py --filter-stacks-for-testing -o basic-scan-64-actual.txt --clamp-contents basic-scan.json
Invocation {
$DMD = '--mode=scan'

View File

@ -24,6 +24,9 @@ This is only for testing. Input files can be gzipped.
'''
p = argparse.ArgumentParser(description=description)
p.add_argument('--clamp-contents', action='store_true',
help='expect that the contents of the JSON input file have had their addresses clamped')
p.add_argument('input_file',
help='a file produced by DMD')
@ -70,9 +73,12 @@ def main():
b = blockList[0]
# The expected values are based on hard-coded values in SmokeDMD.cpp.
addr = int(b['addr'], 16)
expected = ['123', '0', str(format(addr - 1, 'x')), b['addr'],
str(format(addr + 1, 'x')), '0']
if args.clamp_contents:
expected = ['0', '0', '0', b['addr'], b['addr']]
else:
addr = int(b['addr'], 16)
expected = ['123', '0', str(format(addr - 1, 'x')), b['addr'],
str(format(addr + 1, 'x')), '0']
checkScanContents(b['contents'], expected)

View File

@ -167,7 +167,12 @@ function run_test() {
let is64Bit = Components.classes["@mozilla.org/xre/app-info;1"]
.getService(Components.interfaces.nsIXULRuntime).is64Bit;
let basicScanFileName = "basic-scan-" + (is64Bit ? "64" : "32");
test(basicScanFileName, [jsonFile.path]);
test(basicScanFileName, ["--clamp-contents", jsonFile.path]);
ok(scanTest(jsonFile.path, ["--clamp-contents"]), "Scan with address clamping");
// Run the generic test a second time to ensure that the first time produced
// valid JSON output. "--clamp-contents" is passed in so we don't have to have
// more variants of the files.
test(basicScanFileName, ["--clamp-contents", jsonFile.path]);
jsonFile.remove(true);