Source code

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""output formats for Talos"""
# NOTE: we have a circular dependency with output.py when we import results
import simplejson as json
from mozlog import get_proxy_logger
from talos import filter, utils
LOG = get_proxy_logger()
class Output(object):
"""abstract base class for Talos output"""
@classmethod
def check(cls, urls):
"""check to ensure that the urls are valid"""
def __init__(self, results, tsresult_class):
"""
- results : TalosResults instance
- tsresult_class : Results class
"""
self.results = results
self.tsresult_class = tsresult_class
def __call__(self):
suites = []
test_results = {
"application": {
"name": self.results.results[0].browser_name.lower(),
"version": self.results.results[0].browser_version,
},
"framework": {
"name": self.results.results[0].framework,
},
"suites": suites,
}
for test in self.results.results:
# serialize test results
tsresult = None
if not test.using_xperf:
subtests = []
suite = {
"name": test.name(),
"extraOptions": self.results.extra_options or [],
"subtests": subtests,
"shouldAlert": test.test_config.get("suite_should_alert", True),
}
suites.append(suite)
vals = []
replicates = {}
# TODO: counters!!!! we don't have any, but they suffer the same
for result in test.results:
# XXX this will not work for manifests which list
# the same page name twice. It also ignores cycles
for page, val in result.raw_values():
if page == "NULL":
page = test.name()
if tsresult is None:
tsresult = r = self.tsresult_class()
r.results = [
{"index": 0, "page": test.name(), "runs": val}
]
else:
r = tsresult.results[0]
if r["page"] == test.name():
r["runs"].extend(val)
replicates.setdefault(page, []).extend(val)
tresults = [tsresult] if tsresult else test.results
# Merge results for the same page when using cycle > 1
merged_results = {}
for result in tresults:
results = []
for r in result.results:
page = r["page"]
if page in merged_results:
merged_results[page]["runs"].extend(r["runs"])
else:
merged_results[page] = r
results.append(r)
# override the list of page results for each run
result.results = results
for result in tresults:
filtered_results = result.values(
suite["name"], test.test_config["filters"]
)
vals.extend([[i["value"], j] for i, j in filtered_results])
subtest_index = 0
for val, page in filtered_results:
if page == "NULL":
# no real subtests
page = test.name()
subtest = {
"name": page,
"value": val["filtered"],
"replicates": replicates[page],
}
# if results are from a comparison test i.e. perf-reftest, it will also
# contain replicates for 'base' and 'reference'; we wish to keep those
# to reference; actual results were calculated as the difference of those
base_runs = result.results[subtest_index].get("base_runs", None)
ref_runs = result.results[subtest_index].get("ref_runs", None)
if base_runs and ref_runs:
subtest["base_replicates"] = base_runs
subtest["ref_replicates"] = ref_runs
subtests.append(subtest)
subtest_index += 1
if test.test_config.get("lower_is_better") is not None:
subtest["lowerIsBetter"] = test.test_config[
"lower_is_better"
]
if test.test_config.get("alert_threshold") is not None:
subtest["alertThreshold"] = test.test_config[
"alert_threshold"
]
if test.test_config.get("subtest_alerts") is not None:
subtest["shouldAlert"] = test.test_config["subtest_alerts"]
if test.test_config.get("alert_threshold") is not None:
subtest["alertThreshold"] = test.test_config[
"alert_threshold"
]
if test.test_config.get("unit"):
subtest["unit"] = test.test_config["unit"]
# if there is only one subtest, carry alerting setting from the suite
if len(subtests) == 1:
subtests[0]["shouldAlert"] = suite["shouldAlert"]
# if there is more than one subtest, calculate a summary result
elif len(subtests) > 1:
suite["value"] = self.construct_results(vals, testname=test.name())
if test.test_config.get("lower_is_better") is not None:
suite["lowerIsBetter"] = test.test_config["lower_is_better"]
if test.test_config.get("alert_threshold") is not None:
suite["alertThreshold"] = test.test_config["alert_threshold"]
# counters results_aux data
counter_subtests = []
for cd in test.all_counter_results:
for name, vals in cd.items():
# We want to add the xperf data as talos_counters
# exclude counters whose values are tuples (bad for
# graphserver)
if len(vals) > 0 and isinstance(vals[0], list):
continue
# mainthread IO is a list of filenames and accesses, we do
# not report this as a counter
if "mainthreadio" in name:
continue
# responsiveness has it's own metric, not the mean
# TODO: consider doing this for all counters
if "responsiveness" == name:
subtest = {
"name": name,
"value": filter.responsiveness_Metric(vals),
}
counter_subtests.append(subtest)
continue
subtest = {
"name": name,
"value": 0.0,
}
counter_subtests.append(subtest)
if test.using_xperf:
if len(vals) > 0:
subtest["value"] = vals[0]
else:
# calculate mean value
if len(vals) > 0:
varray = [float(v) for v in vals]
subtest["value"] = filter.mean(varray)
if counter_subtests:
suites.append(
{
"name": test.name(),
"extraOptions": self.results.extra_options or [],
"subtests": counter_subtests,
"shouldAlert": test.test_config.get("suite_should_alert", True),
}
)
return test_results
def output(self, results, results_url):
"""output to the a file if results_url starts with file://
- results : json instance
- results_url : file:// URL
"""
# parse the results url
results_url_split = utils.urlsplit(results_url)
results_scheme, results_server, results_path, _, _ = results_url_split
if results_scheme in ("http", "https"):
self.post(results, results_server, results_path, results_scheme)
elif results_scheme == "file":
with open(results_path, "w") as f:
for result in results:
f.write("%s\n" % result)
else:
raise NotImplementedError(
"%s: %s - only http://, https://, and file:// supported"
% (self.__class__.__name__, results_url)
)
# This is the output that treeherder expects to find when parsing the
# log file
if "gecko-profile" in self.results.extra_options:
LOG.info("gecko-profile enabled")
for suite in results["suites"]:
suite["shouldAlert"] = False
for subtest in suite["subtests"]:
subtest["shouldAlert"] = False
LOG.info("PERFHERDER_DATA: %s" % json.dumps(results, ignore_nan=True))
if results_scheme in ("file"):
json.dump(
results,
open(results_path, "w"),
indent=2,
sort_keys=True,
ignore_nan=True,
)
def post(self, results, server, path, scheme):
raise NotImplementedError("Abstract base class")
@classmethod
def shortName(cls, name):
"""short name for counters"""
names = {"% Processor Time": "%cpu", "XRes": "xres"}
return names.get(name, name)
@classmethod
def isMemoryMetric(cls, resultName):
"""returns if the result is a memory metric"""
memory_metric = ["xres"] # measured in bytes
return bool([i for i in memory_metric if i in resultName])
@classmethod
def v8_Metric(cls, val_list):
results = [i for i, j in val_list]
score = 100 * filter.geometric_mean(results)
return score
@classmethod
def JS_Metric(cls, val_list):
"""v8 benchmark score"""
results = [i for i, j in val_list]
return sum(results)
@classmethod
def benchmark_score(cls, val_list):
"""
benchmark_score: ares6/jetstream self reported as 'geomean'
"""
results = [i for i, j in val_list if j == "geomean"]
return filter.mean(results)
@classmethod
def stylebench_score(cls, val_list):
"""
"""
correctionFactor = 3
results = [i for i, j in val_list]
# stylebench has 5 tests, each of these are made of up 5 subtests
#
# * Adding classes.
# * Removing classes.
# * Mutating attributes.
# * Adding leaf elements.
# * Removing leaf elements.
#
# which are made of two subtests each (sync/async) and repeated 5 times
# each, thus, the list here looks like:
#
# [Test name/Adding classes - 0/ Sync; <x>]
# [Test name/Adding classes - 0/ Async; <y>]
# [Test name/Adding classes - 0; <x> + <y>]
# [Test name/Removing classes - 0/ Sync; <x>]
# [Test name/Removing classes - 0/ Async; <y>]
# [Test name/Removing classes - 0; <x> + <y>]
# ...
# [Test name/Adding classes - 1 / Sync; <x>]
# [Test name/Adding classes - 1 / Async; <y>]
# [Test name/Adding classes - 1 ; <x> + <y>]
# ...
# [Test name/Removing leaf elements - 4; <x> + <y>]
# [Test name; <sum>] <- This is what we want.
#
# So, 5 (subtests) *
# 5 (repetitions) *
# 3 (entries per repetition (sync/async/sum)) =
# 75 entries for test before the sum.
#
# We receive 76 entries per test, which ads up to 380. We want to use
# the 5 test entries, not the rest.
if len(results) != 380:
raise Exception(
"StyleBench requires 380 entries, found: %s instead" % len(results)
)
results = results[75::76]
# pylint --py3k W1619
score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
return score
@classmethod
def damp_score(cls, val_list):
"""
damp_score: damp is only interested in the value of subtests and will
aggregate data from several suites.
Use a hardcoded value for the suite to avoid inconsistencies.
"""
return 100
def construct_results(self, vals, testname):
if "responsiveness" in testname:
return filter.responsiveness_Metric([val for (val, page) in vals])
elif testname.startswith("v8_7"):
return self.v8_Metric(vals)
elif testname.startswith("kraken"):
return self.JS_Metric(vals)
elif testname.startswith("ares6"):
return self.benchmark_score(vals)
elif testname.startswith("jetstream"):
return self.benchmark_score(vals)
elif testname.startswith("speedometer"):
return self.speedometer_score(vals)
elif testname.startswith("stylebench"):
return self.stylebench_score(vals)
elif testname.startswith("damp"):
return self.damp_score(vals)
elif len(vals) > 1:
return filter.geometric_mean([i for i, j in vals])
else:
return filter.mean([i for i, j in vals])