| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """This module is to provide Findit service APIs through Cloud Endpoints: | 5 """This module is to provide Findit service APIs through Cloud Endpoints: |
| 6 | 6 |
| 7 Current APIs include: | 7 Current APIs include: |
| 8 1. Analysis of compile/test failures in Chromium waterfalls. | 8 1. Analysis of compile/test failures in Chromium waterfalls. |
| 9 Analyzes failures and detects suspected CLs. | 9 Analyzes failures and detects suspected CLs. |
| 10 2. Analysis of flakes on Commit Queue. | 10 2. Analysis of flakes on Commit Queue. |
| 11 """ | 11 """ |
| 12 | 12 |
| 13 from collections import defaultdict |
| 13 import json | 14 import json |
| 14 import logging | 15 import logging |
| 15 import pickle | 16 import pickle |
| 16 | 17 |
| 17 import endpoints | 18 import endpoints |
| 18 from google.appengine.api import taskqueue | 19 from google.appengine.api import taskqueue |
| 19 from protorpc import messages | 20 from protorpc import messages |
| 20 from protorpc import remote | 21 from protorpc import remote |
| 21 | 22 |
| 22 from common import appengine_util | 23 from common import appengine_util |
| 23 from common import auth_util | 24 from common import auth_util |
| 24 from common import constants | 25 from common import constants |
| 25 from common import time_util | 26 from common import time_util |
| 26 from common.waterfall import failure_type | 27 from common.waterfall import failure_type |
| 27 from model import analysis_approach_type | 28 from model import analysis_approach_type |
| 29 from model import analysis_status |
| 28 from model.flake.flake_analysis_request import FlakeAnalysisRequest | 30 from model.flake.flake_analysis_request import FlakeAnalysisRequest |
| 29 from model.suspected_cl_confidence import SuspectedCLConfidence | 31 from model.suspected_cl_confidence import SuspectedCLConfidence |
| 30 from model.wf_analysis import WfAnalysis | 32 from model.wf_analysis import WfAnalysis |
| 31 from model.wf_suspected_cl import WfSuspectedCL | 33 from model.wf_suspected_cl import WfSuspectedCL |
| 32 from model.wf_swarming_task import WfSwarmingTask | 34 from model.wf_swarming_task import WfSwarmingTask |
| 33 from model.wf_try_job import WfTryJob | 35 from model.wf_try_job import WfTryJob |
| 34 from waterfall import build_util | 36 from waterfall import build_util |
| 35 from waterfall import buildbot | 37 from waterfall import buildbot |
| 36 from waterfall import suspected_cl_util | 38 from waterfall import suspected_cl_util |
| 37 from waterfall import waterfall_config | 39 from waterfall import waterfall_config |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 161 cl = WfSuspectedCL.Get(repo_name, revision) | 163 cl = WfSuspectedCL.Get(repo_name, revision) |
| 162 if not cl: | 164 if not cl: |
| 163 return None, None | 165 return None, None |
| 164 | 166 |
| 165 master_name = buildbot.GetMasterNameFromUrl(build.master_url) | 167 master_name = buildbot.GetMasterNameFromUrl(build.master_url) |
| 166 builder_name = build.builder_name | 168 builder_name = build.builder_name |
| 167 current_build = build.build_number | 169 current_build = build.build_number |
| 168 | 170 |
| 169 # If the CL is found by a try job, only the first failure will be recorded. | 171 # If the CL is found by a try job, only the first failure will be recorded. |
| 170 # So we might need to go to the first failure to get CL information. | 172 # So we might need to go to the first failure to get CL information. |
| 171 build_info = (cl.GetBuildInfo(master_name, builder_name, current_build) or | 173 build_info = cl.GetBuildInfo(master_name, builder_name, current_build) |
| 172 cl.GetBuildInfo(master_name, builder_name, first_failure)) | 174 first_build_info = cl.GetBuildInfo(master_name, builder_name, first_failure) |
| 173 | 175 return suspected_cl_util.GetSuspectedCLConfidenceScoreAndApproach( |
| 174 confidence = suspected_cl_util.GetSuspectedCLConfidenceScore( | 176 confidences, build_info, first_build_info) |
| 175 confidences, build_info) | |
| 176 | |
| 177 cl_approach = ( | |
| 178 _AnalysisApproach.TRY_JOB if analysis_approach_type.TRY_JOB in | |
| 179 build_info['approaches'] else _AnalysisApproach.HEURISTIC) | |
| 180 | |
| 181 return confidence, cl_approach | |
| 182 | 177 |
| 183 def _GenerateBuildFailureAnalysisResult( | 178 def _GenerateBuildFailureAnalysisResult( |
| 184 self, build, suspected_cls_in_result, step_name, first_failure, test_name, | 179 self, build, suspected_cls_in_result, step_name, first_failure, test_name, |
| 185 analysis_approach, confidences, try_job_status, is_flaky_test): | 180 analysis_approach, confidences, try_job_status, is_flaky_test): |
| 186 | 181 |
| 187 suspected_cls = [] | 182 suspected_cls = [] |
| 188 for suspected_cl in suspected_cls_in_result: | 183 for suspected_cl in suspected_cls_in_result: |
| 189 repo_name = suspected_cl['repo_name'] | 184 repo_name = suspected_cl['repo_name'] |
| 190 revision = suspected_cl['revision'] | 185 revision = suspected_cl['revision'] |
| 191 commit_position = suspected_cl['commit_position'] | 186 commit_position = suspected_cl['commit_position'] |
| 192 confidence, cl_approach = self._GetConfidenceAndApproachForCL( | 187 confidence, cl_approach = self._GetConfidenceAndApproachForCL( |
| 193 repo_name, revision, confidences, build, first_failure) | 188 repo_name, revision, confidences, build, first_failure) |
| 194 cl_approach = cl_approach or analysis_approach | 189 if cl_approach: |
| 190 cl_approach = ( |
| 191 _AnalysisApproach.HEURISTIC if |
| 192 cl_approach == analysis_approach_type.HEURISTIC else |
| 193 _AnalysisApproach.TRY_JOB) |
| 194 else: |
| 195 cl_approach = analysis_approach |
| 195 | 196 |
| 196 suspected_cls.append(_SuspectedCL( | 197 suspected_cls.append(_SuspectedCL( |
| 197 repo_name=repo_name, revision=revision, | 198 repo_name=repo_name, revision=revision, |
| 198 commit_position=commit_position, confidence=confidence, | 199 commit_position=commit_position, confidence=confidence, |
| 199 analysis_approach=cl_approach)) | 200 analysis_approach=cl_approach)) |
| 200 | 201 |
| 201 return _BuildFailureAnalysisResult( | 202 return _BuildFailureAnalysisResult( |
| 202 master_url=build.master_url, | 203 master_url=build.master_url, |
| 203 builder_name=build.builder_name, | 204 builder_name=build.builder_name, |
| 204 build_number=build.build_number, | 205 build_number=build.build_number, |
| 205 step_name=step_name, | 206 step_name=step_name, |
| 206 is_sub_test=test_name is not None, | 207 is_sub_test=test_name is not None, |
| 207 test_name=test_name, | 208 test_name=test_name, |
| 208 first_known_failed_build_number=first_failure, | 209 first_known_failed_build_number=first_failure, |
| 209 suspected_cls=suspected_cls, | 210 suspected_cls=suspected_cls, |
| 210 analysis_approach=analysis_approach, | 211 analysis_approach=analysis_approach, |
| 211 try_job_status=try_job_status, | 212 try_job_status=try_job_status, |
| 212 is_flaky_test=is_flaky_test) | 213 is_flaky_test=is_flaky_test) |
| 213 | 214 |
| 214 def _GetStatusAndCulpritFromTryJob( | 215 def _GetStatusAndCulpritFromTryJob( |
| 215 self, try_job_map, build_failure_type, step_name, test_name=None): | 216 self, try_job, swarming_task, build_failure_type, step_name, |
| 217 test_name=None): |
| 216 """Returns the culprit found by try-job for the given step or test.""" | 218 """Returns the culprit found by try-job for the given step or test.""" |
| 217 if not try_job_map: | |
| 218 return _TryJobStatus.FINISHED, None | |
| 219 | 219 |
| 220 if test_name is None: | 220 if swarming_task and swarming_task.status in ( |
| 221 try_job_key = try_job_map.get(step_name) | 221 analysis_status.PENDING, analysis_status.RUNNING): |
| 222 else: | 222 return _TryJobStatus.RUNNING, None |
| 223 try_job_key = try_job_map.get(step_name, {}).get(test_name) | |
| 224 | 223 |
| 225 if not try_job_key: | |
| 226 return _TryJobStatus.FINISHED, None | |
| 227 | |
| 228 try_job = WfTryJob.Get(*build_util.GetBuildInfoFromId(try_job_key)) | |
| 229 if not try_job or try_job.failed: | 224 if not try_job or try_job.failed: |
| 230 return _TryJobStatus.FINISHED, None | 225 return _TryJobStatus.FINISHED, None |
| 231 | 226 |
| 232 if not try_job.completed: | 227 if not try_job.completed: |
| 233 return _TryJobStatus.RUNNING, None | 228 return _TryJobStatus.RUNNING, None |
| 234 | 229 |
| 235 if build_failure_type == failure_type.COMPILE: | 230 if build_failure_type == failure_type.COMPILE: |
| 236 if not try_job.compile_results: # pragma: no cover. | 231 if not try_job.compile_results: # pragma: no cover. |
| 237 return _TryJobStatus.FINISHED, None | 232 return _TryJobStatus.FINISHED, None |
| 238 return ( | 233 return ( |
| 239 _TryJobStatus.FINISHED, | 234 _TryJobStatus.FINISHED, |
| 240 try_job.compile_results[-1].get('culprit', {}).get(step_name)) | 235 try_job.compile_results[-1].get('culprit', {}).get(step_name)) |
| 241 | 236 |
| 242 if not try_job.test_results: # pragma: no cover. | 237 if not try_job.test_results: # pragma: no cover. |
| 243 return _TryJobStatus.FINISHED, None | 238 return _TryJobStatus.FINISHED, None |
| 244 | 239 |
| 245 if test_name is None: | 240 if test_name is None: |
| 246 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name) | 241 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name) |
| 247 if not step_info or step_info.get('tests'): # pragma: no cover. | 242 if not step_info or step_info.get('tests'): # pragma: no cover. |
| 248 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit | 243 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit |
| 249 # finding try-job might have test-level results. | 244 # finding try-job might have test-level results. |
| 250 return _TryJobStatus.FINISHED, None | 245 return _TryJobStatus.FINISHED, None |
| 251 return _TryJobStatus.FINISHED, step_info | 246 return _TryJobStatus.FINISHED, step_info |
| 252 | 247 |
| 253 task = WfSwarmingTask.Get(*build_util.GetBuildInfoFromId(try_job_key), | 248 ref_name = (swarming_task.parameters.get('ref_name') if swarming_task and |
| 254 step_name=step_name) | 249 swarming_task.parameters else None) |
| 255 ref_name = (task.parameters.get('ref_name') if task and task.parameters | |
| 256 else None) | |
| 257 return ( | 250 return ( |
| 258 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get( | 251 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get( |
| 259 ref_name or step_name, {}).get('tests', {}).get(test_name)) | 252 ref_name or step_name, {}).get('tests', {}).get(test_name)) |
| 260 | 253 |
| 261 def _CheckIsFlaky(self, try_job_map, step_name, test_name): | 254 def _CheckIsFlaky(self, swarming_task, test_name): |
| 262 """Checks if the test is flaky.""" | 255 """Checks if the test is flaky.""" |
| 263 if not try_job_map or not test_name: | |
| 264 return False | |
| 265 | |
| 266 try_job_key = try_job_map.get(step_name, {}).get(test_name) | |
| 267 if not try_job_key: | |
| 268 return False | |
| 269 | |
| 270 swarming_task = WfSwarmingTask.Get( | |
| 271 *build_util.GetBuildInfoFromId(try_job_key), step_name=step_name) | |
| 272 if not swarming_task or not swarming_task.classified_tests: | 256 if not swarming_task or not swarming_task.classified_tests: |
| 273 return False | 257 return False |
| 274 | 258 |
| 275 return test_name in swarming_task.classified_tests.get('flaky_tests', []) | 259 return test_name in swarming_task.classified_tests.get('flaky_tests', []) |
| 276 | 260 |
| 277 def _PopulateResult( | 261 def _PopulateResult( |
| 278 self, results, build, try_job_map, build_failure_type, | 262 self, results, build, build_failure_type,heuristic_result, step_name, |
| 279 heuristic_result, step_name, confidences, test_name=None): | 263 confidences, swarming_task, try_job, test_name=None): |
| 280 """Appends an analysis result for the given step or test. | 264 """Appends an analysis result for the given step or test. |
| 281 | 265 |
| 282 Try-job results are always given priority over heuristic results. | 266 Try-job results are always given priority over heuristic results. |
| 283 """ | 267 """ |
| 284 # Default to heuristic analysis. | 268 # Default to heuristic analysis. |
| 285 suspected_cls = heuristic_result['suspected_cls'] | 269 suspected_cls = heuristic_result['suspected_cls'] |
| 286 analysis_approach = _AnalysisApproach.HEURISTIC | 270 analysis_approach = _AnalysisApproach.HEURISTIC |
| 287 | 271 |
| 288 # Check if the test is flaky. | 272 # Check if the test is flaky. |
| 289 is_flaky_test = self._CheckIsFlaky(try_job_map, step_name, test_name) | 273 is_flaky_test = self._CheckIsFlaky(swarming_task, test_name) |
| 290 | 274 |
| 291 if is_flaky_test: | 275 if is_flaky_test: |
| 292 suspected_cls = [] | 276 suspected_cls = [] |
| 293 try_job_status = _TryJobStatus.FINISHED # There will be no try job. | 277 try_job_status = _TryJobStatus.FINISHED # There will be no try job. |
| 294 else: | 278 else: |
| 295 # Check analysis result from try-job. | 279 # Check analysis result from try-job. |
| 296 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob( | 280 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob( |
| 297 try_job_map, build_failure_type, step_name, test_name=test_name) | 281 try_job, swarming_task, build_failure_type, step_name, |
| 282 test_name=test_name) |
| 298 if culprit: | 283 if culprit: |
| 299 suspected_cls = [culprit] | 284 suspected_cls = [culprit] |
| 300 analysis_approach = _AnalysisApproach.TRY_JOB | 285 analysis_approach = _AnalysisApproach.TRY_JOB |
| 301 | 286 if (not is_flaky_test and not suspected_cls and |
| 302 if not is_flaky_test and not suspected_cls: | 287 not try_job_status == _TryJobStatus.RUNNING): |
| 303 return | 288 return |
| 304 | 289 |
| 305 results.append(self._GenerateBuildFailureAnalysisResult( | 290 results.append(self._GenerateBuildFailureAnalysisResult( |
| 306 build, suspected_cls, step_name, heuristic_result['first_failure'], | 291 build, suspected_cls, step_name, heuristic_result['first_failure'], |
| 307 test_name, analysis_approach, confidences, try_job_status, | 292 test_name, analysis_approach, confidences, try_job_status, |
| 308 is_flaky_test)) | 293 is_flaky_test)) |
| 309 | 294 |
| 295 def _GetAllSwarmingTasks(self, failure_result_map): |
| 296 """Returns all swarming tasks related to one build. |
| 297 |
| 298 Args: |
| 299 A dict to map each step/test with the key to the build when it failed the |
| 300 first time. |
| 301 { |
| 302 'step1': 'm/b/1', |
| 303 'step2': { |
| 304 'test1': 'm/b/1', |
| 305 'test2': 'm/b/2' |
| 306 } |
| 307 } |
| 308 |
| 309 Returns: |
| 310 A dict of swarming tasks like below: |
| 311 { |
| 312 'step1': { |
| 313 'm/b/1': WfSwarmingTask( |
| 314 key=Key('WfBuild', 'm/b/1', 'WfSwarmingTask', 'step1'),...) |
| 315 }, |
| 316 ... |
| 317 } |
| 318 """ |
| 319 if not failure_result_map: |
| 320 return {} |
| 321 |
| 322 swarming_tasks = defaultdict(dict) |
| 323 for step_name, step_map in failure_result_map.iteritems(): |
| 324 if isinstance(step_map, basestring): |
| 325 swarming_tasks[step_name][step_map] = ( |
| 326 WfSwarmingTask.Get( |
| 327 *build_util.GetBuildInfoFromId(step_map), step_name=step_name)) |
| 328 else: |
| 329 for task_key in step_map.values(): |
| 330 if not swarming_tasks[step_name].get(task_key): |
| 331 swarming_tasks[step_name][task_key] = ( |
| 332 WfSwarmingTask.Get(*build_util.GetBuildInfoFromId(task_key), |
| 333 step_name=step_name)) |
| 334 |
| 335 return swarming_tasks |
| 336 |
| 337 def _GetAllTryJobs(self, failure_result_map): |
| 338 """Returns all try jobs related to one build. |
| 339 |
| 340 Args: |
| 341 A dict to map each step/test with the key to the build when it failed the |
| 342 first time. |
| 343 { |
| 344 'step1': 'm/b/1', |
| 345 'step2': { |
| 346 'test1': 'm/b/1', |
| 347 'test2': 'm/b/2' |
| 348 } |
| 349 } |
| 350 |
| 351 Returns: |
| 352 A dict of try jobs like below: |
| 353 { |
| 354 'm/b/1': WfTryJob( |
| 355 key=Key('WfBuild', 'm/b/1'),...) |
| 356 ... |
| 357 } |
| 358 """ |
| 359 if not failure_result_map: |
| 360 return {} |
| 361 |
| 362 try_jobs = {} |
| 363 for step_map in failure_result_map.values(): |
| 364 if isinstance(step_map, basestring): |
| 365 try_jobs[step_map] = WfTryJob.Get(*step_map.split('/')) |
| 366 else: |
| 367 for task_key in step_map.values(): |
| 368 if not try_jobs.get(task_key): |
| 369 try_jobs[task_key] = WfTryJob.Get(*task_key.split('/')) |
| 370 |
| 371 return try_jobs |
| 372 |
| 373 def _GetSwarmingTaskAndTryJobForFailure( |
| 374 self, step_name, test_name, failure_result_map, swarming_tasks, try_jobs): |
| 375 """Gets swarming task and try job for the specific step/test.""" |
| 376 if not failure_result_map: |
| 377 return None, None |
| 378 |
| 379 if test_name: |
| 380 try_job_key = failure_result_map.get(step_name, {}).get(test_name) |
| 381 else: |
| 382 try_job_key = failure_result_map.get(step_name) |
| 383 |
| 384 # Gets the swarming task for the test. |
| 385 swarming_task = swarming_tasks.get(step_name, {}).get(try_job_key) |
| 386 |
| 387 # Get the try job for the step/test. |
| 388 try_job = try_jobs.get(try_job_key) |
| 389 |
| 390 return swarming_task, try_job |
| 391 |
| 310 def _GenerateResultsForBuild( | 392 def _GenerateResultsForBuild( |
| 311 self, build, heuristic_analysis, results, confidences): | 393 self, build, heuristic_analysis, results, confidences): |
| 394 |
| 395 swarming_tasks = self._GetAllSwarmingTasks( |
| 396 heuristic_analysis.failure_result_map) |
| 397 try_jobs = self._GetAllTryJobs(heuristic_analysis.failure_result_map) |
| 398 |
| 312 for failure in heuristic_analysis.result['failures']: | 399 for failure in heuristic_analysis.result['failures']: |
| 400 step_name = failure['step_name'] |
| 313 if failure.get('tests'): # Test-level analysis. | 401 if failure.get('tests'): # Test-level analysis. |
| 314 for test in failure['tests']: | 402 for test in failure['tests']: |
| 403 test_name = test['test_name'] |
| 404 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure( |
| 405 step_name, test_name, heuristic_analysis.failure_result_map, |
| 406 swarming_tasks, try_jobs) |
| 407 |
| 315 self._PopulateResult( | 408 self._PopulateResult( |
| 316 results, build, heuristic_analysis.failure_result_map, | 409 results, build, heuristic_analysis.failure_type, test, |
| 317 heuristic_analysis.failure_type, test, | 410 step_name, confidences, swarming_task, try_job, |
| 318 failure['step_name'], confidences, test_name=test['test_name']) | 411 test_name=test_name) |
| 319 else: | 412 else: |
| 413 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure( |
| 414 step_name, None, heuristic_analysis.failure_result_map, |
| 415 swarming_tasks, try_jobs) |
| 320 self._PopulateResult( | 416 self._PopulateResult( |
| 321 results, build, heuristic_analysis.failure_result_map, | 417 results, build, heuristic_analysis.failure_type, failure, |
| 322 heuristic_analysis.failure_type, failure, failure['step_name'], | 418 step_name, confidences, swarming_task, try_job) |
| 323 confidences) | |
| 324 | 419 |
| 325 @endpoints.method( | 420 @endpoints.method( |
| 326 _BuildFailureCollection, _BuildFailureAnalysisResultCollection, | 421 _BuildFailureCollection, _BuildFailureAnalysisResultCollection, |
| 327 path='buildfailure', name='buildfailure') | 422 path='buildfailure', name='buildfailure') |
| 328 def AnalyzeBuildFailures(self, request): | 423 def AnalyzeBuildFailures(self, request): |
| 329 """Returns analysis results for the given build failures in the request. | 424 """Returns analysis results for the given build failures in the request. |
| 330 | 425 |
| 331 Analysis of build failures will be triggered automatically on demand. | 426 Analysis of build failures will be triggered automatically on demand. |
| 332 | 427 |
| 333 Args: | 428 Args: |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 407 | 502 |
| 408 try: | 503 try: |
| 409 _AsyncProcessFlakeReport(flake_analysis_request, user_email, is_admin) | 504 _AsyncProcessFlakeReport(flake_analysis_request, user_email, is_admin) |
| 410 queued = True | 505 queued = True |
| 411 except Exception: | 506 except Exception: |
| 412 # Ignore the report when fail to queue it for async processing. | 507 # Ignore the report when fail to queue it for async processing. |
| 413 queued = False | 508 queued = False |
| 414 logging.exception('Failed to queue flake report for async processing') | 509 logging.exception('Failed to queue flake report for async processing') |
| 415 | 510 |
| 416 return _FlakeAnalysis(queued=queued) | 511 return _FlakeAnalysis(queued=queued) |
| OLD | NEW |