OLD | NEW |
---|---|
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Task queue endpoints for creating and updating issues on issue tracker.""" | 5 """Task queue endpoints for creating and updating issues on issue tracker.""" |
6 | 6 |
7 import datetime | 7 import datetime |
8 import json | |
8 import logging | 9 import logging |
9 import webapp2 | 10 import webapp2 |
10 | 11 |
11 from google.appengine.api import app_identity | 12 from google.appengine.api import app_identity |
12 from google.appengine.api import taskqueue | 13 from google.appengine.api import taskqueue |
14 from google.appengine.api import urlfetch | |
13 from google.appengine.ext import ndb | 15 from google.appengine.ext import ndb |
14 | 16 |
17 from infra_libs import ts_mon | |
15 from issue_tracker import issue_tracker_api, issue | 18 from issue_tracker import issue_tracker_api, issue |
16 from model.flake import FlakeUpdateSingleton, FlakeUpdate, Flake | 19 from model.flake import ( |
17 from infra_libs import ts_mon | 20 Flake, FlakeOccurrence, FlakeUpdate, FlakeUpdateSingleton, FlakyRun) |
21 from status import build_result, util | |
18 | 22 |
19 | 23 |
20 MAX_UPDATED_ISSUES_PER_DAY = 50 | 24 MAX_UPDATED_ISSUES_PER_DAY = 50 |
21 MAX_TIME_DIFFERENCE_SECONDS = 12 * 60 * 60 | 25 MAX_TIME_DIFFERENCE_SECONDS = 12 * 60 * 60 |
22 MIN_REQUIRED_FLAKY_RUNS = 5 | 26 MIN_REQUIRED_FLAKY_RUNS = 5 |
23 DAYS_TILL_STALE = 3 | 27 DAYS_TILL_STALE = 3 |
24 USE_MONORAIL = False | 28 USE_MONORAIL = False |
25 DAYS_TO_REOPEN_ISSUE = 3 | 29 DAYS_TO_REOPEN_ISSUE = 3 |
26 FLAKY_RUNS_TEMPLATE = ( | 30 FLAKY_RUNS_TEMPLATE = ( |
27 'Detected %(new_flakes_count)d new flakes for test/step "%(name)s". To see ' | 31 'Detected %(new_flakes_count)d new flakes for test/step "%(name)s". To see ' |
(...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
327 api.update(flake_issue, comment=BACK_TO_SHERIFF_MESSAGE) | 331 api.update(flake_issue, comment=BACK_TO_SHERIFF_MESSAGE) |
328 return | 332 return |
329 | 333 |
330 # Report to stale-flakes-reports@ if the issue has no updates for 7 days. | 334 # Report to stale-flakes-reports@ if the issue has no updates for 7 days. |
331 week_ago = now - datetime.timedelta(days=7) | 335 week_ago = now - datetime.timedelta(days=7) |
332 if (last_third_party_update < week_ago and | 336 if (last_third_party_update < week_ago and |
333 STALE_FLAKES_ML not in flake_issue.cc): | 337 STALE_FLAKES_ML not in flake_issue.cc): |
334 flake_issue.cc.append(STALE_FLAKES_ML) | 338 flake_issue.cc.append(STALE_FLAKES_ML) |
335 logging.info('Reporting issue %s to %s', flake_issue.id, STALE_FLAKES_ML) | 339 logging.info('Reporting issue %s to %s', flake_issue.id, STALE_FLAKES_ML) |
336 api.update(flake_issue, comment=VERY_STALE_FLAKES_MESSAGE) | 340 api.update(flake_issue, comment=VERY_STALE_FLAKES_MESSAGE) |
341 | |
342 | |
343 class CreateFlakyRun(webapp2.RequestHandler): | |
344 # We execute below method in an indepedent transaction since otherwise we | |
345 # would exceed the maximum number of entities allowed within a single | |
346 # transaction. | |
tandrii(chromium)
2016/02/02 21:54:18
i do not fully understand why, though I believe yo
Paweł Hajdan Jr.
2016/02/03 09:09:51
Yes, AE has limits on transactions, not all of whi
| |
347 @staticmethod | |
348 # pylint: disable=E1120 | |
349 @ndb.transactional(xg=True, propagation=ndb.TransactionOptions.INDEPENDENT) | |
350 def add_failure_to_flake(name, flaky_run_key, failure_time): | |
351 flake = Flake.get_by_id(name) | |
352 if not flake: | |
353 flake = Flake(name=name, id=name, last_time_seen=datetime.datetime.min) | |
354 flake.put() | |
355 | |
356 flake.occurrences.append(flaky_run_key) | |
357 util.add_occurrence_time_to_flake(flake, failure_time) | |
358 flake.put() | |
359 | |
360 # see examples: | |
361 # compile http://build.chromium.org/p/tryserver.chromium.mac/json/builders/ | |
362 # mac_chromium_compile_dbg/builds/11167?as_text=1 | |
363 # gtest http://build.chromium.org/p/tryserver.chromium.win/json/builders/ | |
364 # win_chromium_x64_rel_swarming/builds/4357?as_text=1 | |
365 # TODO(jam): get specific problem with compile so we can use that as name | |
366 # TODO(jam): It's unfortunate to have to parse this html. Can we get it from | |
Paweł Hajdan Jr.
2016/02/03 09:09:51
I know this is pre-existing, but IMHO important in
Sergiy Byelozyorov
2016/02/03 09:15:56
I was actually planning to move to Buildbot JSON +
| |
367 # another place instead of the tryserver's json? | |
368 @staticmethod | |
369 def get_flakes(step): | |
370 combined = ' '.join(step['text']) | |
371 | |
372 # If test results were invalid, report whole step as flaky. | |
373 if 'TEST RESULTS WERE INVALID' in combined: | |
374 return [combined] | |
375 | |
376 #gtest | |
377 gtest_search_str = 'failures:<br/>' | |
378 gtest_search_index = combined.find(gtest_search_str) | |
379 if gtest_search_index != -1: | |
380 failures = combined[gtest_search_index + len(gtest_search_str):] | |
381 failures = failures.split('<br/>') | |
382 results = [] | |
383 for failure in failures: | |
384 if not failure: | |
385 continue | |
386 if failure == 'ignored:': | |
387 break # layout test output | |
388 results.append(failure) | |
389 return results | |
390 | |
391 #gpu | |
392 gpu_search_str = '&tests=' | |
393 gpu_search_index = combined.find(gpu_search_str) | |
394 if gpu_search_index != -1: | |
395 failures = combined[gpu_search_index + len(gpu_search_str):] | |
396 end_index = failures.find('">') | |
397 failures = failures[:end_index ] | |
398 failures = failures.split(',') | |
399 results = [] | |
400 for failure in failures: | |
401 if not failure: | |
402 continue | |
403 results.append(failure) | |
404 return results | |
405 | |
406 return [combined] | |
407 | |
408 @ndb.transactional(xg=True) # pylint: disable=E1120 | |
409 def post(self): | |
410 if (not self.request.get('failure_run_key') or | |
411 not self.request.get('success_run_key')): | |
412 self.response.set_status(400, 'Invalid request parameters') | |
413 return | |
414 | |
415 failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get() | |
416 success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get() | |
417 | |
418 flaky_run = FlakyRun( | |
419 failure_run=failure_run.key, | |
420 failure_run_time_started=failure_run.time_started, | |
421 failure_run_time_finished=failure_run.time_finished, | |
422 success_run=success_run.key) | |
423 | |
424 success_time = success_run.time_finished | |
425 failure_time = failure_run.time_finished | |
426 patchset_builder_runs = failure_run.key.parent().get() | |
427 url = ('http://build.chromium.org/p/' + patchset_builder_runs.master + | |
428 '/json/builders/' + patchset_builder_runs.builder +'/builds/' + | |
429 str(failure_run.buildnumber)) | |
430 urlfetch.set_default_fetch_deadline(60) | |
431 logging.info('get_flaky_run_reason ' + url) | |
432 result = urlfetch.fetch(url).content | |
433 try: | |
434 json_result = json.loads(result) | |
435 except ValueError: | |
436 logging.exception('couldnt decode json for %s', url) | |
437 return | |
438 steps = json_result['steps'] | |
439 | |
440 failed_steps = [] | |
441 passed_steps = [] | |
442 for step in steps: | |
443 result = step['results'][0] | |
444 if build_result.isResultSuccess(result): | |
445 passed_steps.append(step) | |
446 continue | |
447 if not build_result.isResultFailure(result): | |
448 continue | |
449 step_name = step['name'] | |
450 step_text = ' '.join(step['text']) | |
451 # The following step failures are ignored: | |
452 # - steps: always red when any other step is red (not a failure) | |
453 # - [swarming] ...: summary step would also be red (do not double count) | |
454 # - presubmit: typically red due to missing OWNERs LGTM, not a flake | |
455 # - recipe failure reason: always red when build fails (not a failure) | |
456 # - Patch failure: if success run was before failure run, it is | |
457 # likely a legitimate failure. For example it often happens that | |
458 # developers use CQ dry run and then wait for a review. Once getting | |
459 # LGTM they check CQ checkbox, but the patch does not cleanly apply | |
460 # anymore. | |
461 # - bot_update PATCH FAILED: Corresponds to 'Patch failure' step. | |
462 # - test results: always red when another step is red (not a failure) | |
463 # - Uncaught Exception: summary step referring to an exception in another | |
464 # step (e.g. bot_update) | |
465 if (step_name == 'steps' or step_name.startswith('[swarming]') or | |
466 step_name == 'presubmit' or step_name == 'recipe failure reason' or | |
467 (step_name == 'Patch failure' and success_time < failure_time) or | |
468 (step_name == 'bot_update' and 'PATCH FAILED' in step_text) or | |
469 step_name == 'test results' or step_name == 'Uncaught Exception'): | |
470 continue | |
471 failed_steps.append(step) | |
472 | |
473 steps_to_ignore = [] | |
474 for step in failed_steps: | |
475 step_name = step['name'] | |
476 if ' (with patch)' in step_name: | |
477 # Android instrumentation tests add a prefix before the step name, which | |
478 # doesn't appear on the summary step (without suffixes). To make sure we | |
479 # correctly ignore duplicate failures, we remove the prefix. | |
480 step_name = step_name.replace('Instrumentation test ', '') | |
481 | |
482 step_name_with_no_modifier = step_name.replace(' (with patch)', '') | |
483 for other_step in failed_steps: | |
484 # A step which fails, and then is retried and also fails, will have | |
485 # its name without the ' (with patch)' again. Don't double count. | |
486 if other_step['name'] == step_name_with_no_modifier: | |
487 steps_to_ignore.append(other_step['name']) | |
488 | |
489 # If a step fails without the patch, then the tree is busted. Don't | |
490 # count as flake. | |
491 step_name_without_patch = ( | |
492 '%s (without patch)' % step_name_with_no_modifier) | |
493 for other_step in failed_steps: | |
494 if other_step['name'] == step_name_without_patch: | |
495 steps_to_ignore.append(step['name']) | |
496 steps_to_ignore.append(other_step['name']) | |
497 | |
498 flakes_to_update = [] | |
499 for step in failed_steps: | |
500 step_name = step['name'] | |
501 if step_name in steps_to_ignore: | |
502 continue | |
503 flakes = self.get_flakes(step) | |
504 if not flakes: | |
505 continue | |
506 for flake in flakes: | |
507 flake_occurrence = FlakeOccurrence(name=step_name, failure=flake) | |
508 flaky_run.flakes.append(flake_occurrence) | |
509 flakes_to_update.append(flake) | |
510 | |
511 flaky_run_key = flaky_run.put() | |
512 for flake in flakes_to_update: | |
513 self.add_failure_to_flake(flake, flaky_run_key, failure_time) | |
OLD | NEW |