Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Task queue endpoints for creating and updating issues on issue tracker.""" | 5 """Task queue endpoints for creating and updating issues on issue tracker.""" |
| 6 | 6 |
| 7 import datetime | 7 import datetime |
| 8 import json | |
| 8 import logging | 9 import logging |
| 9 import webapp2 | 10 import webapp2 |
| 10 | 11 |
| 11 from google.appengine.api import app_identity | 12 from google.appengine.api import app_identity |
| 12 from google.appengine.api import taskqueue | 13 from google.appengine.api import taskqueue |
| 14 from google.appengine.api import urlfetch | |
| 13 from google.appengine.ext import ndb | 15 from google.appengine.ext import ndb |
| 14 | 16 |
| 17 from infra_libs import ts_mon | |
| 15 from issue_tracker import issue_tracker_api, issue | 18 from issue_tracker import issue_tracker_api, issue |
| 16 from model.flake import FlakeUpdateSingleton, FlakeUpdate, Flake | 19 from model.flake import ( |
| 17 from infra_libs import ts_mon | 20 Flake, FlakeOccurrence, FlakeUpdate, FlakeUpdateSingleton, FlakyRun) |
| 21 from status import build_result, util | |
| 18 | 22 |
| 19 | 23 |
| 20 MAX_UPDATED_ISSUES_PER_DAY = 50 | 24 MAX_UPDATED_ISSUES_PER_DAY = 50 |
| 21 MAX_TIME_DIFFERENCE_SECONDS = 12 * 60 * 60 | 25 MAX_TIME_DIFFERENCE_SECONDS = 12 * 60 * 60 |
| 22 MIN_REQUIRED_FLAKY_RUNS = 5 | 26 MIN_REQUIRED_FLAKY_RUNS = 5 |
| 23 DAYS_TILL_STALE = 3 | 27 DAYS_TILL_STALE = 3 |
| 24 USE_MONORAIL = False | 28 USE_MONORAIL = False |
| 25 DAYS_TO_REOPEN_ISSUE = 3 | 29 DAYS_TO_REOPEN_ISSUE = 3 |
| 26 FLAKY_RUNS_TEMPLATE = ( | 30 FLAKY_RUNS_TEMPLATE = ( |
| 27 'Detected %(new_flakes_count)d new flakes for test/step "%(name)s". To see ' | 31 'Detected %(new_flakes_count)d new flakes for test/step "%(name)s". To see ' |
| (...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 327 api.update(flake_issue, comment=BACK_TO_SHERIFF_MESSAGE) | 331 api.update(flake_issue, comment=BACK_TO_SHERIFF_MESSAGE) |
| 328 return | 332 return |
| 329 | 333 |
| 330 # Report to stale-flakes-reports@ if the issue has no updates for 7 days. | 334 # Report to stale-flakes-reports@ if the issue has no updates for 7 days. |
| 331 week_ago = now - datetime.timedelta(days=7) | 335 week_ago = now - datetime.timedelta(days=7) |
| 332 if (last_third_party_update < week_ago and | 336 if (last_third_party_update < week_ago and |
| 333 STALE_FLAKES_ML not in flake_issue.cc): | 337 STALE_FLAKES_ML not in flake_issue.cc): |
| 334 flake_issue.cc.append(STALE_FLAKES_ML) | 338 flake_issue.cc.append(STALE_FLAKES_ML) |
| 335 logging.info('Reporting issue %s to %s', flake_issue.id, STALE_FLAKES_ML) | 339 logging.info('Reporting issue %s to %s', flake_issue.id, STALE_FLAKES_ML) |
| 336 api.update(flake_issue, comment=VERY_STALE_FLAKES_MESSAGE) | 340 api.update(flake_issue, comment=VERY_STALE_FLAKES_MESSAGE) |
| 341 | |
| 342 | |
| 343 class CreateFlakyRun(webapp2.RequestHandler): | |
|
Sergiy Byelozyorov
2016/02/02 21:24:35
Most of the methods here are copied with minor mod
| |
| 344 # We execute below method in an indepedent transaction since otherwise we | |
| 345 # would exceed the maximum number of entities allowed within a single | |
| 346 # transaction. | |
| 347 @staticmethod | |
| 348 # pylint: disable=E1120 | |
| 349 @ndb.transactional(xg=True, propagation=ndb.TransactionOptions.INDEPENDENT) | |
| 350 def add_failure_to_flake(name, flaky_run_key, failure_time): | |
| 351 flake = Flake.get_by_id(name) | |
| 352 if not flake: | |
| 353 flake = Flake(name=name, id=name, last_time_seen=datetime.datetime.min) | |
| 354 flake.put() | |
| 355 | |
| 356 flake.occurrences.append(flaky_run_key) | |
| 357 util.add_occurrence_time_to_flake(flake, failure_time) | |
| 358 flake.put() | |
| 359 | |
| 360 # see examples: | |
| 361 # compile http://build.chromium.org/p/tryserver.chromium.mac/json/builders/ | |
| 362 # mac_chromium_compile_dbg/builds/11167?as_text=1 | |
| 363 # gtest http://build.chromium.org/p/tryserver.chromium.win/json/builders/ | |
| 364 # win_chromium_x64_rel_swarming/builds/4357?as_text=1 | |
| 365 # TODO(jam): get specific problem with compile so we can use that as name | |
| 366 # TODO(jam): It's unfortunate to have to parse this html. Can we get it from | |
| 367 # another place instead of the tryserver's json? | |
| 368 @staticmethod | |
| 369 def get_flakes(step): | |
| 370 combined = ' '.join(step['text']) | |
| 371 | |
| 372 # If test results were invalid, report whole step as flaky. | |
| 373 if 'TEST RESULTS WERE INVALID' in combined: | |
| 374 return [combined] | |
| 375 | |
| 376 #gtest | |
| 377 gtest_search_str = 'failures:<br/>' | |
| 378 gtest_search_index = combined.find(gtest_search_str) | |
| 379 if gtest_search_index != -1: | |
| 380 failures = combined[gtest_search_index + len(gtest_search_str):] | |
| 381 failures = failures.split('<br/>') | |
| 382 results = [] | |
| 383 for failure in failures: | |
| 384 if not failure: | |
| 385 continue | |
| 386 if failure == 'ignored:': | |
| 387 break # layout test output | |
| 388 results.append(failure) | |
| 389 return results | |
| 390 | |
| 391 #gpu | |
| 392 gpu_search_str = '&tests=' | |
| 393 gpu_search_index = combined.find(gpu_search_str) | |
| 394 if gpu_search_index != -1: | |
| 395 failures = combined[gpu_search_index + len(gpu_search_str):] | |
| 396 end_index = failures.find('">') | |
| 397 failures = failures[:end_index ] | |
| 398 failures = failures.split(',') | |
| 399 results = [] | |
| 400 for failure in failures: | |
| 401 if not failure: | |
| 402 continue | |
| 403 results.append(failure) | |
| 404 return results | |
| 405 | |
| 406 return [combined] | |
| 407 | |
| 408 @ndb.transactional(xg=True) # pylint: disable=E1120 | |
| 409 def post(self): | |
| 410 if (not self.request.get('failure_run_key') or | |
| 411 not self.request.get('success_run_key')): | |
| 412 self.response.set_status(400, 'Invalid request parameters') | |
| 413 return | |
| 414 | |
| 415 failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get() | |
| 416 success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get() | |
| 417 | |
| 418 flaky_run = FlakyRun( | |
| 419 failure_run=failure_run.key, | |
| 420 failure_run_time_started=failure_run.time_started, | |
| 421 failure_run_time_finished=failure_run.time_finished, | |
| 422 success_run=success_run.key) | |
| 423 | |
| 424 success_time = success_run.time_finished | |
| 425 failure_time = failure_run.time_finished | |
| 426 patchset_builder_runs = failure_run.key.parent().get() | |
| 427 url = ('http://build.chromium.org/p/' + patchset_builder_runs.master + | |
| 428 '/json/builders/' + patchset_builder_runs.builder +'/builds/' + | |
| 429 str(failure_run.buildnumber)) | |
| 430 urlfetch.set_default_fetch_deadline(60) | |
| 431 logging.info('get_flaky_run_reason ' + url) | |
| 432 result = urlfetch.fetch(url).content | |
| 433 try: | |
| 434 json_result = json.loads(result) | |
| 435 except ValueError: | |
| 436 logging.exception('couldnt decode json for %s', url) | |
| 437 return | |
| 438 steps = json_result['steps'] | |
| 439 | |
| 440 failed_steps = [] | |
| 441 passed_steps = [] | |
| 442 for step in steps: | |
| 443 result = step['results'][0] | |
| 444 if build_result.isResultSuccess(result): | |
| 445 passed_steps.append(step) | |
| 446 continue | |
| 447 if not build_result.isResultFailure(result): | |
| 448 continue | |
| 449 step_name = step['name'] | |
| 450 step_text = ' '.join(step['text']) | |
| 451 # The following step failures are ignored: | |
| 452 # - steps: always red when any other step is red (not a failure) | |
| 453 # - [swarming] ...: summary step would also be red (do not double count) | |
| 454 # - presubmit: typically red due to missing OWNERs LGTM, not a flake | |
| 455 # - recipe failure reason: always red when build fails (not a failure) | |
| 456 # - Patch failure: if success run was before failure run, it is | |
| 457 # likely a legitimate failure. For example it often happens that | |
| 458 # developers use CQ dry run and then wait for a review. Once getting | |
| 459 # LGTM they check CQ checkbox, but the patch does not cleanly apply | |
| 460 # anymore. | |
| 461 # - bot_update PATCH FAILED: Corresponds to 'Patch failure' step. | |
| 462 # - test results: always red when another step is red (not a failure) | |
| 463 # - Uncaught Exception: summary step referring to an exception in another | |
| 464 # step (e.g. bot_update) | |
| 465 if (step_name == 'steps' or step_name.startswith('[swarming]') or | |
| 466 step_name == 'presubmit' or step_name == 'recipe failure reason' or | |
| 467 (step_name == 'Patch failure' and success_time < failure_time) or | |
| 468 (step_name == 'bot_update' and 'PATCH FAILED' in step_text) or | |
| 469 step_name == 'test results' or step_name == 'Uncaught Exception'): | |
| 470 continue | |
| 471 failed_steps.append(step) | |
| 472 | |
| 473 steps_to_ignore = [] | |
| 474 for step in failed_steps: | |
| 475 step_name = step['name'] | |
| 476 if ' (with patch)' in step_name: | |
| 477 # Android instrumentation tests add a prefix before the step name, which | |
| 478 # doesn't appear on the summary step (without suffixes). To make sure we | |
| 479 # correctly ignore duplicate failures, we remove the prefix. | |
| 480 step_name = step_name.replace('Instrumentation test ', '') | |
| 481 | |
| 482 step_name_with_no_modifier = step_name.replace(' (with patch)', '') | |
| 483 for other_step in failed_steps: | |
| 484 # A step which fails, and then is retried and also fails, will have | |
| 485 # its name without the ' (with patch)' again. Don't double count. | |
| 486 if other_step['name'] == step_name_with_no_modifier: | |
| 487 steps_to_ignore.append(other_step['name']) | |
| 488 | |
| 489 # If a step fails without the patch, then the tree is busted. Don't | |
| 490 # count as flake. | |
| 491 step_name_without_patch = ( | |
| 492 '%s (without patch)' % step_name_with_no_modifier) | |
| 493 for other_step in failed_steps: | |
| 494 if other_step['name'] == step_name_without_patch: | |
| 495 steps_to_ignore.append(step['name']) | |
| 496 steps_to_ignore.append(other_step['name']) | |
| 497 | |
| 498 flakes_to_update = [] | |
| 499 for step in failed_steps: | |
| 500 step_name = step['name'] | |
| 501 if step_name in steps_to_ignore: | |
| 502 continue | |
| 503 flakes = self.get_flakes(step) | |
| 504 if not flakes: | |
| 505 continue | |
| 506 for flake in flakes: | |
| 507 flake_occurrence = FlakeOccurrence(name=step_name, failure=flake) | |
| 508 flaky_run.flakes.append(flake_occurrence) | |
| 509 flakes_to_update.append(flake) | |
| 510 | |
| 511 flaky_run_key = flaky_run.put() | |
| 512 for flake in flakes_to_update: | |
| 513 self.add_failure_to_flake(flake, flaky_run_key, failure_time) | |
| OLD | NEW |