appengine/findit/findit_api.py - Issue 2425853005: [Findit] Modify Findit API to return more information to Sheriff-O-Matic.

Unified Diff: appengine/findit/findit_api.py

Issue 2425853005: [Findit] Modify Findit API to return more information to Sheriff-O-Matic. (Closed)

Patch Set: fix nits Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/findit/findit_api.py

diff --git a/appengine/findit/findit_api.py b/appengine/findit/findit_api.py

index 50a68c17d13866a51886a7abe5fe0b9483914e2f..40aa505612dbd12ea93f1d4a946bf19648203cbf 100644

--- a/appengine/findit/findit_api.py

+++ b/appengine/findit/findit_api.py

@@ -23,11 +23,15 @@ from common import auth_util

from common import constants

from common import time_util

from common.waterfall import failure_type

+from model import analysis_approach_type

from model.flake.flake_analysis_request import FlakeAnalysisRequest

+from model.suspected_cl_confidence import SuspectedCLConfidence

from model.wf_analysis import WfAnalysis

+from model.wf_suspected_cl import WfSuspectedCL

from model.wf_swarming_task import WfSwarmingTask

from model.wf_try_job import WfTryJob

from waterfall import buildbot

+from waterfall import suspected_cl_util

from waterfall import waterfall_config

from waterfall.flake import flake_analysis_service

from waterfall.flake import triggering_sources

@@ -55,15 +59,25 @@ class _BuildFailureCollection(messages.Message):

builds = messages.MessageField(_BuildFailure, 1, repeated=True)

+class _AnalysisApproach(messages.Enum):

+ HEURISTIC = analysis_approach_type.HEURISTIC

+ TRY_JOB = analysis_approach_type.TRY_JOB

class _SuspectedCL(messages.Message):

repo_name = messages.StringField(1, required=True)

revision = messages.StringField(2, required=True)

commit_position = messages.IntegerField(3, variant=messages.Variant.INT32)

+ confidence = messages.IntegerField(4, variant=messages.Variant.INT32)

+ analysis_approach = messages.EnumField(_AnalysisApproach, 5)

-class _AnalysisApproach(messages.Enum):

- HEURISTIC = 1

- TRY_JOB = 2

+class _TryJobStatus(messages.Enum):

stgao 2016/10/19 00:43:08 More status here: 1. not supported 2. pending 3. e

chanli 2016/10/19 03:00:59 I was thinking the try job status is to let sherif

stgao 2016/10/21 01:51:22 OK, I agree 2-4 could be merged into RUNNING or FI

stgao 2016/10/22 00:24:14 Per discussion offline, we will go with the curren

+ # Try job is pending or running. Can expect result from try job.

+ RUNNING = 1

+ # There is no try job, try job completed or try job finished with error.

+ # Result from try job is ready or no need to continue waiting for it.

+ FINISHED = 2

class _BuildFailureAnalysisResult(messages.Message):

@@ -79,6 +93,8 @@ class _BuildFailureAnalysisResult(messages.Message):

7, variant=messages.Variant.INT32)

suspected_cls = messages.MessageField(_SuspectedCL, 8, repeated=True)

analysis_approach = messages.EnumField(_AnalysisApproach, 9)

+ try_job_status = messages.EnumField(_TryJobStatus, 10)

+ is_flaky_test = messages.BooleanField(11, variant=messages.Variant.BOOL)

class _BuildFailureAnalysisResultCollection(messages.Message):

@@ -129,16 +145,47 @@ def _TriggerNewAnalysesOnDemand(builds):

class FindItApi(remote.Service):

"""FindIt API v1."""

+ def _GetConfidenceAndApproachForCL(

+ self, repo_name, revision, confidences, build, first_failure):

+ cl = WfSuspectedCL.Get(repo_name, revision)

+ if not cl:

+ return None, None

+ master_name = buildbot.GetMasterNameFromUrl(build.master_url)

+ builder_name = build.builder_name

+ current_build = build.build_number

+ # If the CL is found by a try job, only the first failure will be recorded.

+ # So we might need to go to the first failure to get CL information.

+ build_info = (cl.GetBuildInfo(master_name, builder_name, current_build) or

+ cl.GetBuildInfo(master_name, builder_name, first_failure))

+ confidence = suspected_cl_util.GetSuspectedCLConfidenceScore(

+ confidences, build_info)

+ cl_approach = (

+ _AnalysisApproach.TRY_JOB if analysis_approach_type.TRY_JOB in

+ build_info['approaches'] else _AnalysisApproach.HEURISTIC)

+ return confidence, cl_approach

def _GenerateBuildFailureAnalysisResult(

- self, build, suspected_cls_in_result, step_name,

- first_failure, test_name=None,

- analysis_approach=_AnalysisApproach.HEURISTIC):

+ self, build, suspected_cls_in_result, step_name, first_failure, test_name,

+ analysis_approach, confidences, try_job_status, is_flaky_test):

suspected_cls = []

for suspected_cl in suspected_cls_in_result:

+ repo_name = suspected_cl['repo_name']

+ revision = suspected_cl['revision']

+ commit_position = suspected_cl['commit_position']

+ confidence, cl_approach = self._GetConfidenceAndApproachForCL(

+ repo_name, revision, confidences, build, first_failure)

+ cl_approach = cl_approach or analysis_approach

suspected_cls.append(_SuspectedCL(

- repo_name=suspected_cl['repo_name'],

- revision=suspected_cl['revision'],

- commit_position=suspected_cl['commit_position']))

+ repo_name=repo_name, revision=revision,

+ commit_position=commit_position, confidence=confidence,

+ analysis_approach=cl_approach))

return _BuildFailureAnalysisResult(

master_url=build.master_url,

@@ -149,13 +196,15 @@ class FindItApi(remote.Service):

test_name=test_name,

first_known_failed_build_number=first_failure,

suspected_cls=suspected_cls,

- analysis_approach=analysis_approach)

+ analysis_approach=analysis_approach,

+ try_job_status=try_job_status,

+ is_flaky_test=is_flaky_test)

- def _GetCulpritFromTryJob(

+ def _GetStatusAndCulpritFromTryJob(

self, try_job_map, build_failure_type, step_name, test_name=None):

"""Returns the culprit found by try-job for the given step or test."""

if not try_job_map:

- return None

+ return _TryJobStatus.FINISHED, None

if test_name is None:

try_job_key = try_job_map.get(step_name)

@@ -163,37 +212,59 @@ class FindItApi(remote.Service):

try_job_key = try_job_map.get(step_name, {}).get(test_name)

if not try_job_key:

- return None

+ return _TryJobStatus.FINISHED, None

try_job = WfTryJob.Get(*try_job_key.split('/'))

- if not try_job or not try_job.completed or try_job.failed:

- return None

+ if not try_job or try_job.failed:

+ return _TryJobStatus.FINISHED, None

+ if not try_job.completed:

+ return _TryJobStatus.RUNNING, None

if build_failure_type == failure_type.COMPILE:

if not try_job.compile_results: # pragma: no cover.

- return None

- return try_job.compile_results[-1].get('culprit', {}).get(step_name)

+ return _TryJobStatus.FINISHED, None

+ return (

+ _TryJobStatus.FINISHED,

+ try_job.compile_results[-1].get('culprit', {}).get(step_name))

if not try_job.test_results: # pragma: no cover.

- return None

+ return _TryJobStatus.FINISHED, None

if test_name is None:

step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)

if not step_info or step_info.get('tests'): # pragma: no cover.

# TODO(chanli): For some steps like checkperms/sizes/etc, the culprit

# finding try-job might have test-level results.

- return None

- return step_info

+ return _TryJobStatus.FINISHED, None

+ return _TryJobStatus.FINISHED, step_info

task = WfSwarmingTask.Get(*try_job_key.split('/'), step_name=step_name)

ref_name = (task.parameters.get('ref_name') if task and task.parameters

else None)

- return try_job.test_results[-1].get('culprit', {}).get(

- ref_name or step_name, {}).get('tests', {}).get(test_name)

+ return (

+ _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(

+ ref_name or step_name, {}).get('tests', {}).get(test_name))

+ def _CheckIsFlaky(self, try_job_map, step_name, test_name):

+ """Checks if the test is flaky."""

+ if not try_job_map or not test_name:

+ return False

+ try_job_key = try_job_map.get(step_name, {}).get(test_name)

+ if not try_job_key:

+ return False

+ swarming_task = WfSwarmingTask.Get(*try_job_key.split('/'),

stgao 2016/10/22 00:24:14 Do we have function for the encode and decode of k

chanli 2016/10/24 18:30:44 Done.

+ step_name=step_name)

+ if not swarming_task or not swarming_task.classified_tests:

+ return False

+ return test_name in swarming_task.classified_tests.get('flaky_tests', [])

def _PopulateResult(

self, results, build, try_job_map, build_failure_type,

- heuristic_result, step_name, test_name=None):

+ heuristic_result, step_name, confidences, test_name=None):

"""Appends an analysis result for the given step or test.

Try-job results are always given priority over heuristic results.

@@ -202,32 +273,42 @@ class FindItApi(remote.Service):

suspected_cls = heuristic_result['suspected_cls']

analysis_approach = _AnalysisApproach.HEURISTIC

- # Check analysis result from try-job.

- culprit = self._GetCulpritFromTryJob(

- try_job_map, build_failure_type, step_name, test_name=test_name)

- if culprit:

- suspected_cls = [culprit]

- analysis_approach = _AnalysisApproach.TRY_JOB

+ # Check if the test is flaky.

+ is_flaky_test = self._CheckIsFlaky(try_job_map, step_name, test_name)

- if not suspected_cls:

+ if is_flaky_test:

+ suspected_cls = []

+ try_job_status = _TryJobStatus.FINISHED # There will be no try job.

+ else:

+ # Check analysis result from try-job.

+ try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(

+ try_job_map, build_failure_type, step_name, test_name=test_name)

+ if culprit:

+ suspected_cls = [culprit]

+ analysis_approach = _AnalysisApproach.TRY_JOB

+ if not is_flaky_test and not suspected_cls:

return

results.append(self._GenerateBuildFailureAnalysisResult(

stgao 2016/10/19 00:43:08 Do we have an estimation of how many ndb read in o

chanli 2016/10/19 03:00:59 For cases with the most reads: 1. 1 ndb read for c

stgao 2016/10/21 01:51:22 Acknowledged.

build, suspected_cls, step_name, heuristic_result['first_failure'],

- test_name=test_name, analysis_approach=analysis_approach))

+ test_name, analysis_approach, confidences, try_job_status,

+ is_flaky_test))

- def _GenerateResultsForBuild(self, build, heuristic_analysis, results):

+ def _GenerateResultsForBuild(

+ self, build, heuristic_analysis, results, confidences):

for failure in heuristic_analysis.result['failures']:

if failure.get('tests'): # Test-level analysis.

for test in failure['tests']:

self._PopulateResult(

results, build, heuristic_analysis.failure_result_map,

heuristic_analysis.failure_type, test,

- failure['step_name'], test_name=test['test_name'])

+ failure['step_name'], confidences, test_name=test['test_name'])

else:

self._PopulateResult(

results, build, heuristic_analysis.failure_result_map,

- heuristic_analysis.failure_type, failure, failure['step_name'])

+ heuristic_analysis.failure_type, failure, failure['step_name'],

+ confidences)

@endpoints.method(

_BuildFailureCollection, _BuildFailureAnalysisResultCollection,

@@ -246,6 +327,7 @@ class FindItApi(remote.Service):

"""

results = []

supported_builds = []

+ confidences = SuspectedCLConfidence.Get()

for build in request.builds:

master_name = buildbot.GetMasterNameFromUrl(build.master_url)

@@ -274,7 +356,8 @@ class FindItApi(remote.Service):

# Bail out if the analysis failed or there is no result yet.

continue

- self._GenerateResultsForBuild(build, heuristic_analysis, results)

+ self._GenerateResultsForBuild(

+ build, heuristic_analysis, results, confidences)

logging.info('%d build failure(s), while %d are supported',

len(request.builds), len(supported_builds))

« no previous file with comments | « no previous file | appengine/findit/handlers/build_failure.py » ('j') | no next file with comments »