gecko/testing/runtimes/writeruntimes.py
Andrew Halberstadt f884d25860 Bug 1199241 - Average runtime data across platforms instead of keeping it distinct, r=jgriffin
Keeping distinct platform runtime files takes up a lot of space. It also adds complexity and
greater possibility for mistakes, if a file for some arbitrary platform (like pgo) doesn't
exist, there will be a failure. This patch removes all platform specific runtime files and
replaces them with a single per suite file. The times in this file are averaged across all
platforms the tests run on. With the added space savings, we take into account the 50th
percentile of tests, instead of the 90th.
2015-08-27 17:37:26 -04:00

123 lines
3.9 KiB
Python

from argparse import ArgumentParser
from collections import defaultdict
import json
import os
import sys
import requests
here = os.path.abspath(os.path.dirname(__file__))
ACTIVE_DATA_URL = "http://activedata.allizom.org/query"
PERCENTILE = 0.5 # ignore the bottom PERCENTILE*100% of numbers
def query_activedata(suite, platforms=None):
platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else ''
query = """
{
"from":"unittest",
"limit":200000,
"groupby":["result.test"],
"select":{"value":"result.duration","aggregate":"average"},
"where":{"and":[
{"eq":{"suite":"%s"%s}},
{"gt":{"run.timestamp":"{{today-week}}"}}
]}
}
""" % (suite, platforms)
response = requests.post(ACTIVE_DATA_URL,
data=query,
stream=True)
response.raise_for_status()
data = response.json()["data"]
return data
def write_runtimes(data, suite, indir=here, outdir=here):
data = dict(data)
outfilename = os.path.join(outdir, "%s.runtimes.json" % suite)
infilename = os.path.join(indir, "%s.runtimes.json" % suite)
if not os.path.exists(outdir):
os.makedirs(outdir)
# read in existing data, if any
indata = None
if os.path.exists(infilename):
with open(infilename, 'r') as f:
indata = json.loads(f.read()).get('runtimes')
# identify a threshold of durations, below which we ignore
runtimes = []
for result in data.itervalues():
duration = int(result * 1000) if result else 0
if duration:
runtimes.append(duration)
runtimes.sort()
threshold = runtimes[int(len(runtimes) * PERCENTILE)]
# split the durations into two groups; ommitted and specified
ommitted = []
specified = indata if indata else {}
current_tests = []
for test, duration in data.iteritems():
current_tests.append(test)
duration = int(duration * 1000) if duration else 0
if duration > 0 and duration < threshold:
ommitted.append(duration)
if test in specified:
del specified[test]
elif duration >= threshold and test != "automation.py":
original = specified.get(test, 0)
if not original or abs(original - duration) > (original/20):
# only write new data if it's > 20% different than original
specified[test] = duration
# delete any test references no longer needed
to_delete = []
for test in specified:
if test not in current_tests:
to_delete.append(test)
for test in to_delete:
del specified[test]
avg = int(sum(ommitted)/len(ommitted))
results = {'excluded_test_average': avg,
'runtimes': specified}
with open(outfilename, 'w') as f:
f.write(json.dumps(results, indent=2, sort_keys=True))
def cli(args=sys.argv[1:]):
parser = ArgumentParser()
parser.add_argument('-o', '--output-directory', dest='outdir',
default=here, help="Directory to save runtime data.")
parser.add_argument('-i', '--input-directory', dest='indir',
default=here, help="Directory from which to read current runtime data.")
parser.add_argument('-p', '--platforms', default=None,
help="Comma separated list of platforms from which to generate data.")
parser.add_argument('-s', '--suite', dest='suite', default=None,
help="Suite for which to generate data.")
args = parser.parse_args(args)
if not args.suite:
raise ValueError("Must specify suite with the -u argument")
if ',' in args.suite:
raise ValueError("Passing multiple suites is not supported")
if args.platforms:
args.platforms = args.platforms.split(',')
data = query_activedata(args.suite, args.platforms)
write_runtimes(data, args.suite, indir=args.indir, outdir=args.outdir)
if __name__ == "__main__":
sys.exit(cli())