blob: 7f03ba619993c66d2e764eb0bb2cfb3ce0907668 [file] [log] [blame]
import difflib
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.session import Session
from sqlalchemy.orm.exc import ObjectDeletedError
from typing import Tuple, List
import lnt.server.reporting.analysis
from lnt.testing.util.commands import timed
from lnt.util import logger
from lnt.server.db.regression import new_regression, RegressionState
from lnt.server.db.regression import rebuild_title
from sqlalchemy import or_
from lnt.server.db import rules_manager as rules
from lnt.server.db.testsuitedb import TestSuiteDB
# How many runs backwards to use in the previous run set.
# More runs are slower (more DB access), but may provide
# more accurate results.
FIELD_CHANGE_LOOKBACK = 10
def post_submit_tasks(session, ts, run_id):
"""Run the field change related post submission tasks.
"""
regenerate_fieldchanges_for_run(session, ts, run_id)
def delete_fieldchange(session, ts, change):
# type: (Session, TestSuiteDB, TestSuiteDB.FieldChange) -> List[int]
"""Delete this field change. Since it might be attahed to a regression
via regression indicators, fix those up too. If this orphans a regression
delete it as well."""
# Find the indicators.
indicators = session.query(ts.RegressionIndicator). \
filter(ts.RegressionIndicator.field_change_id == change.id). \
all()
# And all the related regressions.
regression_ids = [r.regression_id for r in indicators]
# Remove the idicators that point to this change.
for ind in indicators:
session.delete(ind)
# Now we can remove the change, itself.
session.delete(change)
# We might have just created a regression with no changes.
# If so, delete it as well.
deleted_ids = []
for r in regression_ids:
remaining = session.query(ts.RegressionIndicator). \
filter(ts.RegressionIndicator.regression_id == r). \
all()
if len(remaining) == 0:
r = session.query(ts.Regression).get(r)
logger.info("Deleting regression because it has not changes:" +
repr(r))
session.delete(r)
deleted_ids.append(r)
session.commit()
return deleted_ids
@timed
def regenerate_fieldchanges_for_run(session, ts, run_id):
# type: (Session, TestSuiteDB, int) -> None
"""Regenerate the set of FieldChange objects for the given run.
"""
# Allow for potentially a few different runs, previous_runs, next_runs
# all with the same order_id which we will aggregate together to make
# our comparison result.
logger.info("Regenerate fieldchanges for %s run %s" % (ts, run_id))
run = ts.getRun(session, run_id)
runs = session.query(ts.Run). \
filter(ts.Run.order_id == run.order_id). \
filter(ts.Run.machine_id == run.machine_id). \
all()
previous_runs = ts.get_previous_runs_on_machine(session, run,
FIELD_CHANGE_LOOKBACK)
next_runs = ts.get_next_runs_on_machine(session, run,
FIELD_CHANGE_LOOKBACK)
# Find our start/end order.
if previous_runs != []:
start_order = previous_runs[0].order
else:
start_order = run.order
if next_runs != []:
end_order = next_runs[-1].order
else:
end_order = run.order
# Load our run data for the creation of the new fieldchanges.
runs_to_load = [r.id for r in (runs + previous_runs)]
# When the same rev is submitted many times, the database accesses here
# can be huge, and it is almost always an error to have the same rev
# be used in so many runs.
run_size = len(runs_to_load)
if run_size > 50:
logger.warning("Generating field changes for {} runs."
"That will be very slow.".format(run_size))
runinfo = lnt.server.reporting.analysis.RunInfo(session, ts, runs_to_load)
# Only store fieldchanges for "metric" samples like execution time;
# not for fields with other data, e.g. hash of a binary
field_ids = [x.id for x in ts.Sample.get_metric_fields()]
# We need to make sure if a field change already exists we use it.
# Since querying on every possible test*field is expensive, get the
# interesting locations ahead of time, and keep them in memory to
# check if we should actually query.
changes_of_interest = set(session.query(ts.FieldChange.start_order_id,
ts.FieldChange.end_order_id,
ts.FieldChange.test_id,
ts.FieldChange.machine_id,
ts.FieldChange.field_id)
.filter(ts.FieldChange.start_order == start_order)
.filter(ts.FieldChange.end_order == end_order)
.filter(ts.FieldChange.test_id.in_(runinfo.test_ids))
.filter(ts.FieldChange.machine == run.machine)
.filter(ts.FieldChange.field_id.in_(field_ids))
.all())
active_indicators = session.query(ts.FieldChange) \
.join(ts.RegressionIndicator) \
.join(ts.Regression) \
.filter(or_(ts.Regression.state == RegressionState.DETECTED,
ts.Regression.state == RegressionState.DETECTED_FIXED)) \
.options(joinedload(ts.FieldChange.start_order),
joinedload(ts.FieldChange.end_order),
joinedload(ts.FieldChange.test),
joinedload(ts.FieldChange.machine)) \
.all()
for field in list(ts.Sample.get_metric_fields()):
for test_id in runinfo.test_ids:
f = None
result = runinfo.get_comparison_result(
runs, previous_runs, test_id, field,
ts.Sample.get_hash_of_binary_field())
# Try and find a matching FC and update, else create one.
target = (start_order.id, run.order.id, run.machine.id, test_id, field.id)
should_search = target in changes_of_interest
if should_search:
f = session.query(ts.FieldChange) \
.filter(ts.FieldChange.start_order == start_order) \
.filter(ts.FieldChange.end_order == end_order) \
.filter(ts.FieldChange.test_id == test_id) \
.filter(ts.FieldChange.machine == run.machine) \
.filter(ts.FieldChange.field_id == field.id) \
.one_or_none()
if not f:
logger.warning("Fell back to field lookup. Should not happen.")
if not result.is_result_performance_change() and f:
# With more data, its not a regression. Kill it!
logger.info("Removing field change: {}".format(f.id))
delete_fieldchange(session, ts, f)
continue
if result.is_result_performance_change() and not f:
test = session.query(ts.Test) \
.filter(ts.Test.id == test_id) \
.one()
f = ts.FieldChange(start_order=start_order,
end_order=run.order,
machine=run.machine,
test=test,
field_id=field.id)
session.add(f)
try:
found, new_reg = identify_related_changes(session, ts,
f, active_indicators)
except ObjectDeletedError:
# This can happen from time to time.
# So, lets retry once.
found, new_reg = identify_related_changes(session, ts,
f, active_indicators)
if found:
logger.info("Found field change: {}".format(
run.machine))
# Always update FCs with new values.
if f:
f.old_value = result.previous
f.new_value = result.current
f.run = run
session.commit()
rules.post_submission_hooks(session, ts, run_id)
def is_overlaping(fc1, fc2):
# type: (TestSuiteDB.FieldChange, TestSuiteDB.FieldChange) -> bool
""""Returns true if these two orders intersect. """
try:
r1_min = fc1.start_order
r1_max = fc1.end_order
r2_min = fc2.start_order
r2_max = fc2.end_order
except AttributeError:
# If we are on first run, some of these could be None.
return False
return (r1_min == r2_min and r1_max == r2_max) or \
(r1_min < r2_max and r2_min < r1_max)
def percent_similar(a, b):
# type: (str, str) -> float
"""
Percent similar: are these strings similar to each other?
:param a: first string
:param b: second string
"""
s = difflib.SequenceMatcher(lambda x: x.isdigit(), a, b)
return s.ratio()
@timed
def identify_related_changes(session, ts, fc, active_indicators):
# type: (Session, TestSuiteDB, TestSuiteDB.FieldChange, List) -> Tuple[bool, List]
"""Can we find a home for this change in some existing regression? If a
match is found add a regression indicator adding this change to that
regression, otherwise create a new regression for this change.
Regression matching looks for regressions that happen in overlapping order
ranges. Then looks for changes that are similar.
"""
for change in active_indicators:
if is_overlaping(change, fc):
confidence = 0.0
confidence += percent_similar(change.machine.name,
fc.machine.name)
confidence += percent_similar(change.test.name,
fc.test.name)
if change.field_id == fc.field_id:
confidence += 1.0
if confidence >= 2.0:
# Matching
MSG = "Found a match: {} with score {}."
regression = session.query(ts.Regression) \
.join(ts.RegressionIndicator) \
.filter(ts.RegressionIndicator.field_change_id == change.id) \
.one()
logger.info(MSG.format(str(regression),
confidence))
ri = ts.RegressionIndicator(regression, fc)
session.add(ri)
active_indicators.append(ri)
# Update the default title if needed.
rebuild_title(session, ts, regression)
logger.info("Updated title of Regression({}) to \"{}\"".format(regression.id, regression.title))
return True, regression
new_reg, new_indicators = new_regression(session, ts, [fc.id])
logger.info("Could not find a partner, creating new Regression for change: {}".format(new_reg.title))
active_indicators.extend(new_indicators)
return False, new_reg