blob: be6b7150f4b57fecbd0ae5bac82160783f19cfef [file] [log] [blame]
import re
import lnt.testing
import lnt.util.stats
###
# Aggregation Function
class Aggregation(object):
def __init__(self):
self.is_initialized = False
def __repr__(self):
return repr(self.getvalue())
def getvalue(self):
abstract
def append(self, values):
if not self.is_initialized:
self.is_initialized = True
self._initialize(values)
self._append(values)
class Sum(Aggregation):
def __init__(self):
Aggregation.__init__(self)
self.sum = None
def getvalue(self):
return self.sum
def _initialize(self, values):
self.sum = [0.] * len(values)
def _append(self, values):
for i, value in enumerate(values):
self.sum[i] += value
class Mean(Aggregation):
def __init__(self):
Aggregation.__init__(self)
self.count = 0
self.sum = None
def getvalue(self):
return [value/self.count for value in self.sum]
def _initialize(self, values):
self.sum = [0.] * len(values)
def _append(self, values):
for i, value in enumerate(values):
self.sum[i] += value
self.count += 1
class GeometricMean(Aggregation):
def __init__(self):
Aggregation.__init__(self)
self.count = 0
self.product = None
def getvalue(self):
return [value ** 1.0/self.count for value in self.product]
def __repr__(self):
return repr(self.geometric_mean)
def _initialize(self, values):
self.product = [1.] * len(values)
def _append(self, values):
for i, value in enumerate(values):
self.product[i] *= value
self.count += 1
class NormalizedMean(Mean):
def _append(self, values):
baseline = values[0]
Mean._append(self, [v/baseline
for v in values])
###
class SummaryReport(object):
def __init__(self, db, report_orders, report_machine_names,
report_machine_patterns):
self.db = db
self.testsuites = list(db.testsuite.values())
self.report_orders = list((name, orders)
for name, orders in report_orders)
self.report_machine_names = set(report_machine_names)
self.report_machine_patterns = list(report_machine_patterns)
self.report_machine_rexes = [
re.compile(pattern)
for pattern in self.report_machine_patterns
]
self.data_table = None
self.requested_machine_ids = None
self.requested_machines = None
self.runs_at_index = None
self.warnings = []
def build(self, session):
# Build a per-testsuite list of the machines that match the specified
# patterns.
def should_be_in_report(machine):
if machine.name in self.report_machine_names:
return True
for rex in self.report_machine_rexes:
if rex.match(machine.name):
return True
self.requested_machines = dict(
(ts, list(filter(should_be_in_report, session.query(ts.Machine).all())))
for ts in self.testsuites)
self.requested_machine_ids = dict(
(ts, [m.id for m in machines])
for ts, machines in self.requested_machines.items()
)
# First, collect all the runs to summarize on, for each index in the
# report orders.
self.runs_at_index = []
for _, orders in self.report_orders:
# For each test suite...
runs = []
for ts in self.testsuites:
# Find all the orders that match.
result = session.query(ts.Order.id).\
filter(ts.Order.llvm_project_revision.in_(
orders)).all()
ts_order_ids = [id for id, in result]
# Find all the runs that matchs those orders.
if not ts_order_ids:
ts_runs = []
else:
ts_runs = session.query(ts.Run).\
filter(ts.Run.order_id.in_(ts_order_ids)).\
filter(ts.Run.machine_id.in_(
self.requested_machine_ids[ts])).all()
if not ts_runs:
self.warnings.append(
'no runs for test suite %r in orders %r' % (
ts.name, orders))
runs.append((ts_runs, ts_order_ids))
self.runs_at_index.append(runs)
# Load the tests for each testsuite.
self.tests = dict((ts, dict((test.id, test)
for test in session.query(ts.Test)))
for ts in self.testsuites)
# Compute the base table for aggregation.
#
# The table is indexed by a test name and test features, which are
# either extracted from the test name or from the test run (depending
# on the suite).
#
# Each value in the table contains a array with one item for each
# report_order entry, which contains all of the samples for that
# entry..
#
# The table keys are tuples of:
# (<test name>,
# <metric>, # Value is either 'Compile Time' or 'Execution Time'.
# <arch>,
# <build mode>, # Value is either 'Debug' or 'Release'.
# <machine id>)
self.data_table = {}
self._build_data_table()
# Compute indexed data table by applying the indexing functions.
self._build_indexed_data_table()
# Normalize across all machines.
self._build_normalized_data_table()
# Build final organized data tables.
self._build_final_data_tables()
def _build_data_table(self):
def get_nts_datapoints_for_sample(ts, sample):
# Get the basic sample info.
run_id = sample[0]
machine_id = run_machine_id_map[run_id]
run_parameters = run_parameters_map[run_id]
# Get the test.
test = ts_tests[sample[1]]
# The test name for a sample in the NTS suite is just the name of
# the sample test.
test_name = test.name
# The arch and build mode are derived from the run flags.
arch = run_parameters['cc_target'].split('-')[0]
if '86' in arch:
arch = 'x86'
if run_parameters['OPTFLAGS'] == '-O0':
build_mode = 'Debug'
else:
build_mode = 'Release'
# Return a datapoint for each passing field.
for field_name, field, status_field in ts_sample_metric_fields:
# Ignore failing samples.
if status_field:
status_field_index = ts.get_field_index(status_field)
if sample[2 + status_field_index] == lnt.testing.FAIL:
continue
# Ignore missing samples.
field_index = ts.get_field_index(field)
value = sample[2 + field_index]
if value is None:
continue
# Otherwise, return a datapoint.
if field_name == 'compile_time':
metric = 'Compile Time'
else:
assert field_name == 'execution_time'
metric = 'Execution Time'
yield ((test_name, metric, arch, build_mode, machine_id),
value)
def get_compile_datapoints_for_sample(ts, sample):
# Get the basic sample info.
run_id = sample[0]
machine_id = run_machine_id_map[run_id]
run_parameters = run_parameters_map[run_id]
# Get the test.
test = ts_tests[sample[1]]
# Extract the compile flags from the test name.
base_name, flags = test.name.split('(')
assert flags[-1] == ')'
other_flags = []
build_mode = None
for flag in flags[:-1].split(','):
# If this is an optimization flag, derive the build mode from
# it.
if flag.startswith('-O'):
if '-O0' in flag:
build_mode = 'Debug'
else:
build_mode = 'Release'
continue
# If this is a 'config' flag, derive the build mode from it.
if flag.startswith('config='):
if flag == "config='Debug'":
build_mode = 'Debug'
else:
assert flag == "config='Release'"
build_mode = 'Release'
continue
# Otherwise, treat the flag as part of the test name.
other_flags.append(flag)
# Form the test name prefix from the remaining flags.
test_name_prefix = '%s(%s)' % (base_name, ','.join(other_flags))
# Extract the arch from the run info (and normalize).
arch = run_parameters['cc_target'].split('-')[0]
if arch.startswith('arm'):
arch = 'ARM'
elif '86' in arch:
arch = 'x86'
# The metric is fixed.
metric = 'Compile Time'
# Report the user and wall time.
for field_name, field, status_field in ts_sample_metric_fields:
if field_name not in ('user_time', 'wall_time'):
continue
# Ignore failing samples.
if status_field:
status_field_index = ts.get_field_index(status_field)
if sample[2 + status_field_index] == lnt.testing.FAIL:
continue
# Ignore missing samples.
field_index = ts.get_field_index(field)
value = sample[2 + field_index]
if value is None:
continue
# Otherwise, return a datapoint.
yield (('%s.%s' % (test_name_prefix, field_name), metric, arch,
build_mode, machine_id), value)
def get_datapoints_for_sample(ts, sample):
# The exact datapoints in each sample depend on the testsuite
if ts.name == 'nts':
return get_nts_datapoints_for_sample(ts, sample)
else:
assert ts.name == 'compile'
return get_compile_datapoints_for_sample(ts, sample)
# For each column...
for index, runs in enumerate(self.runs_at_index):
# For each test suite and run list...
for ts, (ts_runs, _) in zip(self.testsuites, runs):
ts_tests = self.tests[ts]
# Compute the metric fields.
ts_sample_metric_fields = [
(f.name, f, f.status_field)
for f in ts.Sample.get_metric_fields()]
# Compute a mapping from run id to run.
run_id_map = dict((r.id, r)
for r in ts_runs)
# Compute a mapping from run id to machine id.
run_machine_id_map = dict((r.id, r.machine.name)
for r in ts_runs)
# Preload the run parameters.
run_parameters_map = dict((r.id, r.parameters)
for r in ts_runs)
# Load all the samples for all runs we are interested in.
columns = [ts.Sample.run_id, ts.Sample.test_id]
columns.extend(f.column for f in ts.sample_fields)
samples = session.query(*columns).filter(
ts.Sample.run_id.in_(list(run_id_map.keys())))
for sample in samples:
run = run_id_map[sample[0]]
datapoints = list()
for key, value in get_datapoints_for_sample(ts, sample):
items = self.data_table.get(key)
if items is None:
items = [[]
for _ in self.report_orders]
self.data_table[key] = items
items[index].append(value)
def _build_indexed_data_table(self):
def is_in_execution_time_filter(name):
for key in ("SPEC", "ClamAV", "lencod", "minisat", "SIBSim4",
"SPASS", "sqlite3", "viterbi", "Bullet"):
if key in name:
return True
def compute_index_name(key):
test_name, metric, arch, build_mode, machine_id = key
# If this is a nightly test..
if test_name.startswith('SingleSource/') or \
test_name.startswith('MultiSource/') or \
test_name.startswith('External/'):
# If this is a compile time test, aggregate all values into a
# cumulative compile time.
if metric == 'Compile Time':
return ('Lmark', metric, build_mode, arch, machine_id), Sum
# Otherwise, this is an execution time. Index the cumulative
# result of a limited set of benchmarks.
assert metric == 'Execution Time'
if is_in_execution_time_filter(test_name):
return ('Lmark', metric, build_mode, arch, machine_id), Sum
# Otherwise, ignore the test.
return
# Otherwise, we have a compile time suite test.
# Ignore user time results for now.
if not test_name.endswith('.wall_time'):
return
# Index full builds across all job sizes.
if test_name.startswith('build/'):
project_name, subtest_name = re.match(
r'build/(.*)\(j=[0-9]+\)\.(.*)', str(test_name)).groups()
return (('Full Build (%s)' % (project_name,),
metric, build_mode, arch, machine_id),
NormalizedMean)
# Index single file tests across all inputs.
if test_name.startswith('compile/'):
file_name, stage_name, subtest_name = re.match(
r'compile/(.*)/(.*)/\(\)\.(.*)', str(test_name)).groups()
return (('Single File (%s)' % (stage_name,),
metric, build_mode, arch, machine_id),
Mean)
# Index PCH generation tests by input.
if test_name.startswith('pch-gen/'):
file_name, subtest_name = re.match(
r'pch-gen/(.*)/\(\)\.(.*)', str(test_name)).groups()
return (('PCH Generation (%s)' % (file_name,),
metric, build_mode, arch, machine_id),
Mean)
# Otherwise, ignore the test.
return
def is_missing_samples(values):
for samples in values:
if not samples:
return True
self.indexed_data_table = {}
for key, values in self.data_table.items():
# Ignore any test which is missing some data.
if is_missing_samples(values):
self.warnings.append("missing values for %r" % (key,))
continue
# Select the median values.
medians = [lnt.util.stats.median(samples)
for samples in values]
# Compute the index name, and ignore unused tests.
result = compute_index_name(key)
if result is None:
continue
index_name, index_class = result
item = self.indexed_data_table.get(index_name)
if item is None:
self.indexed_data_table[index_name] = item = index_class()
item.append(medians)
def _build_normalized_data_table(self):
self.normalized_data_table = {}
for key, indexed_value in self.indexed_data_table.items():
test_name, metric, build_mode, arch, machine_id = key
if test_name.startswith('Single File'):
aggr = Mean
else:
aggr = NormalizedMean
normalized_key = (test_name, metric, build_mode, arch)
item = self.normalized_data_table.get(normalized_key)
if item is None:
self.normalized_data_table[normalized_key] = \
item = aggr()
item.append(indexed_value.getvalue())
single_file_stage_order = [
'init', 'driver', 'syntax', 'irgen_only', 'codegen', 'assembly']
def _build_final_data_tables(self):
self.grouped_table = {}
self.single_file_table = {}
for key, normalized_value in self.normalized_data_table.items():
test_name, metric, build_mode, arch = key
# If this isn't a single file test, add a plot for it grouped by
# metric and build mode.
group_key = (metric, build_mode)
if not test_name.startswith('Single File'):
items = self.grouped_table[group_key] = self.grouped_table.get(
group_key, [])
items.append((test_name, arch,
normalized_value.getvalue()))
continue
# Add to the single file stack.
stage_name, = re.match(r'Single File \((.*)\)', test_name).groups()
try:
stack_index = self.single_file_stage_order.index(stage_name)
except ValueError:
stack_index = None
# If we don't have an index for this stage, ignore it.
if stack_index is None:
continue
# Otherwise, add the last value to the single file stack.
stack = self.single_file_table.get(group_key)
if stack is None:
self.single_file_table[group_key] = stack = \
[None] * len(self.single_file_stage_order)
stack[stack_index] = normalized_value.getvalue()[-1]
# If this is the last single file stage, also add a plot for it.
if stage_name == self.single_file_stage_order[-1]:
items = self.grouped_table[group_key] = self.grouped_table.get(
group_key, [])
values = normalized_value.getvalue()
baseline = values[0]
items.append(('Single File Tests', arch,
[v/baseline for v in values]))