Index: gm/rebaseline_server/compare_to_expectations.py |
diff --git a/gm/rebaseline_server/compare_to_expectations.py b/gm/rebaseline_server/compare_to_expectations.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..303294c0ae9a7470329f49ac73b3d793f7ee659a |
--- /dev/null |
+++ b/gm/rebaseline_server/compare_to_expectations.py |
@@ -0,0 +1,415 @@ |
+#!/usr/bin/python |
+ |
+""" |
+Copyright 2013 Google Inc. |
+ |
+Use of this source code is governed by a BSD-style license that can be |
+found in the LICENSE file. |
+ |
+Repackage expected/actual GM results as needed by our HTML rebaseline viewer. |
+""" |
+ |
+# System-level imports |
+import argparse |
+import fnmatch |
+import logging |
+import os |
+import time |
+ |
+# Must fix up PYTHONPATH before importing from within Skia |
+import rs_fixpypath # pylint: disable=W0611 |
+ |
+# Imports from within Skia |
+from py.utils import url_utils |
+import column |
+import gm_json |
+import imagediffdb |
+import imagepair |
+import imagepairset |
+import results |
+ |
+EXPECTATION_FIELDS_PASSED_THRU_VERBATIM = [ |
+ results.KEY__EXPECTATIONS__BUGS, |
+ results.KEY__EXPECTATIONS__IGNOREFAILURE, |
+ results.KEY__EXPECTATIONS__REVIEWED, |
+] |
+FREEFORM_COLUMN_IDS = [ |
+ results.KEY__EXTRACOLUMNS__BUILDER, |
+ results.KEY__EXTRACOLUMNS__TEST, |
+] |
+ORDERED_COLUMN_IDS = [ |
+ results.KEY__EXTRACOLUMNS__RESULT_TYPE, |
+ results.KEY__EXTRACOLUMNS__BUILDER, |
+ results.KEY__EXTRACOLUMNS__TEST, |
+ results.KEY__EXTRACOLUMNS__CONFIG, |
+] |
+ |
+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) |
+DEFAULT_EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm') |
+DEFAULT_IGNORE_FAILURES_FILE = 'ignored-tests.txt' |
+ |
+IMAGEPAIR_SET_DESCRIPTIONS = ('expected image', 'actual image') |
+ |
+ |
+class ExpectationComparisons(results.BaseComparisons): |
+ """Loads actual and expected GM results into an ImagePairSet. |
+ |
+ Loads actual and expected results from all builders, except for those skipped |
+ by _ignore_builder(). |
+ |
+ Once this object has been constructed, the results (in self._results[]) |
+ are immutable. If you want to update the results based on updated JSON |
+ file contents, you will need to create a new ExpectationComparisons object.""" |
+ |
+ def __init__(self, image_diff_db, actuals_root=results.DEFAULT_ACTUALS_DIR, |
+ expected_root=DEFAULT_EXPECTATIONS_DIR, |
+ ignore_failures_file=DEFAULT_IGNORE_FAILURES_FILE, |
+ diff_base_url=None, builder_regex_list=None): |
+ """ |
+ Args: |
+ image_diff_db: instance of ImageDiffDB we use to cache the image diffs |
+ actuals_root: root directory containing all actual-results.json files |
+ expected_root: root directory containing all expected-results.json files |
+ ignore_failures_file: if a file with this name is found within |
+ expected_root, ignore failures for any tests listed in the file |
+ diff_base_url: base URL within which the client should look for diff |
+ images; if not specified, defaults to a "file:///" URL representation |
+ of image_diff_db's storage_root |
+ builder_regex_list: List of regular expressions specifying which builders |
+ we will process. If None, process all builders. |
+ """ |
+ super(ExpectationComparisons, self).__init__() |
+ time_start = int(time.time()) |
+ if builder_regex_list != None: |
+ self.set_match_builders_pattern_list(builder_regex_list) |
+ self._image_diff_db = image_diff_db |
+ self._diff_base_url = ( |
+ diff_base_url or |
+ url_utils.create_filepath_url(image_diff_db.storage_root)) |
+ self._actuals_root = actuals_root |
+ self._expected_root = expected_root |
+ self._ignore_failures_on_these_tests = [] |
+ if ignore_failures_file: |
+ self._ignore_failures_on_these_tests = ( |
+ ExpectationComparisons._read_noncomment_lines( |
+ os.path.join(expected_root, ignore_failures_file))) |
+ self._load_actual_and_expected() |
+ self._timestamp = int(time.time()) |
+ logging.info('Results complete; took %d seconds.' % |
+ (self._timestamp - time_start)) |
+ |
+ def edit_expectations(self, modifications): |
+ """Edit the expectations stored within this object and write them back |
+ to disk. |
+ |
+ Note that this will NOT update the results stored in self._results[] ; |
+ in order to see those updates, you must instantiate a new |
+ ExpectationComparisons object based on the (now updated) files on disk. |
+ |
+ Args: |
+ modifications: a list of dictionaries, one for each expectation to update: |
+ |
+ [ |
+ { |
+ imagepair.KEY__IMAGEPAIRS__EXPECTATIONS: { |
+ results.KEY__EXPECTATIONS__BUGS: [123, 456], |
+ results.KEY__EXPECTATIONS__IGNOREFAILURE: false, |
+ results.KEY__EXPECTATIONS__REVIEWED: true, |
+ }, |
+ imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS: { |
+ results.KEY__EXTRACOLUMNS__BUILDER: 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug', |
+ results.KEY__EXTRACOLUMNS__CONFIG: '8888', |
+ results.KEY__EXTRACOLUMNS__TEST: 'bigmatrix', |
+ }, |
+ results.KEY__IMAGEPAIRS__IMAGE_B_URL: 'bitmap-64bitMD5/bigmatrix/10894408024079689926.png', |
+ }, |
+ ... |
+ ] |
+ |
+ """ |
+ expected_builder_dicts = self._read_builder_dicts_from_root( |
+ self._expected_root) |
+ for mod in modifications: |
+ image_name = results.IMAGE_FILENAME_FORMATTER % ( |
+ mod[imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS] |
+ [results.KEY__EXTRACOLUMNS__TEST], |
+ mod[imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS] |
+ [results.KEY__EXTRACOLUMNS__CONFIG]) |
+ _, hash_type, hash_digest = gm_json.SplitGmRelativeUrl( |
+ mod[imagepair.KEY__IMAGEPAIRS__IMAGE_B_URL]) |
+ allowed_digests = [[hash_type, int(hash_digest)]] |
+ new_expectations = { |
+ gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests, |
+ } |
+ for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM: |
+ value = mod[imagepair.KEY__IMAGEPAIRS__EXPECTATIONS].get(field) |
+ if value is not None: |
+ new_expectations[field] = value |
+ builder_dict = expected_builder_dicts[ |
+ mod[imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS] |
+ [results.KEY__EXTRACOLUMNS__BUILDER]] |
+ builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS) |
+ if not builder_expectations: |
+ builder_expectations = {} |
+ builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations |
+ builder_expectations[image_name] = new_expectations |
+ ExpectationComparisons._write_dicts_to_root( |
+ expected_builder_dicts, self._expected_root) |
+ |
+ @staticmethod |
+ def _write_dicts_to_root(meta_dict, root, pattern='*.json'): |
+ """Write all per-builder dictionaries within meta_dict to files under |
+ the root path. |
+ |
+ Security note: this will only write to files that already exist within |
+ the root path (as found by os.walk() within root), so we don't need to |
+ worry about malformed content writing to disk outside of root. |
+ However, the data written to those files is not double-checked, so it |
+ could contain poisonous data. |
+ |
+ Args: |
+ meta_dict: a builder-keyed meta-dictionary containing all the JSON |
+ dictionaries we want to write out |
+ root: path to root of directory tree within which to write files |
+ pattern: which files to write within root (fnmatch-style pattern) |
+ |
+ Raises: |
+ IOError if root does not refer to an existing directory |
+ KeyError if the set of per-builder dictionaries written out was |
+ different than expected |
+ """ |
+ if not os.path.isdir(root): |
+ raise IOError('no directory found at path %s' % root) |
+ actual_builders_written = [] |
+ for dirpath, _, filenames in os.walk(root): |
+ for matching_filename in fnmatch.filter(filenames, pattern): |
+ builder = os.path.basename(dirpath) |
+ per_builder_dict = meta_dict.get(builder) |
+ if per_builder_dict is not None: |
+ fullpath = os.path.join(dirpath, matching_filename) |
+ gm_json.WriteToFile(per_builder_dict, fullpath) |
+ actual_builders_written.append(builder) |
+ |
+ # Check: did we write out the set of per-builder dictionaries we |
+ # expected to? |
+ expected_builders_written = sorted(meta_dict.keys()) |
+ actual_builders_written.sort() |
+ if expected_builders_written != actual_builders_written: |
+ raise KeyError( |
+ 'expected to write dicts for builders %s, but actually wrote them ' |
+ 'for builders %s' % ( |
+ expected_builders_written, actual_builders_written)) |
+ |
+ def _load_actual_and_expected(self): |
+ """Loads the results of all tests, across all builders (based on the |
+ files within self._actuals_root and self._expected_root), |
+ and stores them in self._results. |
+ """ |
+ logging.info('Reading actual-results JSON files from %s...' % |
+ self._actuals_root) |
+ actual_builder_dicts = self._read_builder_dicts_from_root( |
+ self._actuals_root) |
+ logging.info('Reading expected-results JSON files from %s...' % |
+ self._expected_root) |
+ expected_builder_dicts = self._read_builder_dicts_from_root( |
+ self._expected_root) |
+ |
+ all_image_pairs = imagepairset.ImagePairSet( |
+ descriptions=IMAGEPAIR_SET_DESCRIPTIONS, |
+ diff_base_url=self._diff_base_url) |
+ failing_image_pairs = imagepairset.ImagePairSet( |
+ descriptions=IMAGEPAIR_SET_DESCRIPTIONS, |
+ diff_base_url=self._diff_base_url) |
+ |
+ # Override settings for columns that should be filtered using freeform text. |
+ for column_id in FREEFORM_COLUMN_IDS: |
+ factory = column.ColumnHeaderFactory( |
+ header_text=column_id, use_freeform_filter=True) |
+ all_image_pairs.set_column_header_factory( |
+ column_id=column_id, column_header_factory=factory) |
+ failing_image_pairs.set_column_header_factory( |
+ column_id=column_id, column_header_factory=factory) |
+ |
+ all_image_pairs.ensure_extra_column_values_in_summary( |
+ column_id=results.KEY__EXTRACOLUMNS__RESULT_TYPE, values=[ |
+ results.KEY__RESULT_TYPE__FAILED, |
+ results.KEY__RESULT_TYPE__FAILUREIGNORED, |
+ results.KEY__RESULT_TYPE__NOCOMPARISON, |
+ results.KEY__RESULT_TYPE__SUCCEEDED, |
+ ]) |
+ failing_image_pairs.ensure_extra_column_values_in_summary( |
+ column_id=results.KEY__EXTRACOLUMNS__RESULT_TYPE, values=[ |
+ results.KEY__RESULT_TYPE__FAILED, |
+ results.KEY__RESULT_TYPE__FAILUREIGNORED, |
+ results.KEY__RESULT_TYPE__NOCOMPARISON, |
+ ]) |
+ |
+ # Only consider builders we have both expected and actual results for. |
+ # Fixes http://skbug.com/2486 ('rebaseline_server shows actual results |
+ # (but not expectations) for Test-Ubuntu12-ShuttleA-NoGPU-x86_64-Debug |
+ # builder') |
+ actual_builder_set = set(actual_builder_dicts.keys()) |
+ expected_builder_set = set(expected_builder_dicts.keys()) |
+ builders = sorted(actual_builder_set.intersection(expected_builder_set)) |
+ |
+ num_builders = len(builders) |
+ builder_num = 0 |
+ for builder in builders: |
+ builder_num += 1 |
+ logging.info('Generating pixel diffs for builder #%d of %d, "%s"...' % |
+ (builder_num, num_builders, builder)) |
+ actual_results_for_this_builder = ( |
+ actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS]) |
+ for result_type in sorted(actual_results_for_this_builder.keys()): |
+ results_of_this_type = actual_results_for_this_builder[result_type] |
+ if not results_of_this_type: |
+ continue |
+ for image_name in sorted(results_of_this_type.keys()): |
+ (test, config) = results.IMAGE_FILENAME_RE.match(image_name).groups() |
+ actual_image_relative_url = ( |
+ ExpectationComparisons._create_relative_url( |
+ hashtype_and_digest=results_of_this_type[image_name], |
+ test_name=test)) |
+ |
+ # Default empty expectations; overwrite these if we find any real ones |
+ expectations_per_test = None |
+ expected_image_relative_url = None |
+ expectations_dict = None |
+ try: |
+ expectations_per_test = ( |
+ expected_builder_dicts |
+ [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name]) |
+ # TODO(epoger): assumes a single allowed digest per test, which is |
+ # fine; see https://code.google.com/p/skia/issues/detail?id=1787 |
+ expected_image_hashtype_and_digest = ( |
+ expectations_per_test |
+ [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0]) |
+ expected_image_relative_url = ( |
+ ExpectationComparisons._create_relative_url( |
+ hashtype_and_digest=expected_image_hashtype_and_digest, |
+ test_name=test)) |
+ expectations_dict = {} |
+ for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM: |
+ expectations_dict[field] = expectations_per_test.get(field) |
+ except (KeyError, TypeError): |
+ # There are several cases in which we would expect to find |
+ # no expectations for a given test: |
+ # |
+ # 1. result_type == NOCOMPARISON |
+ # There are no expectations for this test yet! |
+ # |
+ # 2. alternate rendering mode failures (e.g. serialized) |
+ # In cases like |
+ # https://code.google.com/p/skia/issues/detail?id=1684 |
+ # ('tileimagefilter GM test failing in serialized render mode'), |
+ # the gm-actuals will list a failure for the alternate |
+ # rendering mode even though we don't have explicit expectations |
+ # for the test (the implicit expectation is that it must |
+ # render the same in all rendering modes). |
+ # |
+ # Don't log type 1, because it is common. |
+ # Log other types, because they are rare and we should know about |
+ # them, but don't throw an exception, because we need to keep our |
+ # tools working in the meanwhile! |
+ if result_type != results.KEY__RESULT_TYPE__NOCOMPARISON: |
+ logging.warning('No expectations found for test: %s' % { |
+ results.KEY__EXTRACOLUMNS__BUILDER: builder, |
+ results.KEY__EXTRACOLUMNS__RESULT_TYPE: result_type, |
+ 'image_name': image_name, |
+ }) |
+ |
+ # If this test was recently rebaselined, it will remain in |
+ # the 'failed' set of actuals until all the bots have |
+ # cycled (although the expectations have indeed been set |
+ # from the most recent actuals). Treat these as successes |
+ # instead of failures. |
+ # |
+ # TODO(epoger): Do we need to do something similar in |
+ # other cases, such as when we have recently marked a test |
+ # as ignoreFailure but it still shows up in the 'failed' |
+ # category? Maybe we should not rely on the result_type |
+ # categories recorded within the gm_actuals AT ALL, and |
+ # instead evaluate the result_type ourselves based on what |
+ # we see in expectations vs actual checksum? |
+ if expected_image_relative_url == actual_image_relative_url: |
+ updated_result_type = results.KEY__RESULT_TYPE__SUCCEEDED |
+ elif ((result_type == results.KEY__RESULT_TYPE__FAILED) and |
+ (test in self._ignore_failures_on_these_tests)): |
+ updated_result_type = results.KEY__RESULT_TYPE__FAILUREIGNORED |
+ else: |
+ updated_result_type = result_type |
+ extra_columns_dict = { |
+ results.KEY__EXTRACOLUMNS__RESULT_TYPE: updated_result_type, |
+ results.KEY__EXTRACOLUMNS__BUILDER: builder, |
+ results.KEY__EXTRACOLUMNS__TEST: test, |
+ results.KEY__EXTRACOLUMNS__CONFIG: config, |
+ } |
+ try: |
+ image_pair = imagepair.ImagePair( |
+ image_diff_db=self._image_diff_db, |
+ imageA_base_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL, |
+ imageB_base_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL, |
+ imageA_relative_url=expected_image_relative_url, |
+ imageB_relative_url=actual_image_relative_url, |
+ expectations=expectations_dict, |
+ extra_columns=extra_columns_dict) |
+ all_image_pairs.add_image_pair(image_pair) |
+ if updated_result_type != results.KEY__RESULT_TYPE__SUCCEEDED: |
+ failing_image_pairs.add_image_pair(image_pair) |
+ except Exception: |
+ logging.exception('got exception while creating new ImagePair') |
+ |
+ # pylint: disable=W0201 |
+ self._results = { |
+ results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict( |
+ column_ids_in_order=ORDERED_COLUMN_IDS), |
+ results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict( |
+ column_ids_in_order=ORDERED_COLUMN_IDS), |
+ } |
+ |
+ |
+def main(): |
+ logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', |
+ datefmt='%m/%d/%Y %H:%M:%S', |
+ level=logging.INFO) |
+ parser = argparse.ArgumentParser() |
+ parser.add_argument( |
+ '--actuals', default=results.DEFAULT_ACTUALS_DIR, |
+ help='Directory containing all actual-result JSON files; defaults to ' |
+ '\'%(default)s\' .') |
+ parser.add_argument( |
+ '--expectations', default=DEFAULT_EXPECTATIONS_DIR, |
+ help='Directory containing all expected-result JSON files; defaults to ' |
+ '\'%(default)s\' .') |
+ parser.add_argument( |
+ '--ignore-failures-file', default=DEFAULT_IGNORE_FAILURES_FILE, |
+ help='If a file with this name is found within the EXPECTATIONS dir, ' |
+ 'ignore failures for any tests listed in the file; defaults to ' |
+ '\'%(default)s\' .') |
+ parser.add_argument( |
+ '--outfile', required=True, |
+ help='File to write result summary into, in JSON format.') |
+ parser.add_argument( |
+ '--results', default=results.KEY__HEADER__RESULTS_FAILURES, |
+ help='Which result types to include. Defaults to \'%(default)s\'; ' |
+ 'must be one of ' + |
+ str([results.KEY__HEADER__RESULTS_FAILURES, |
+ results.KEY__HEADER__RESULTS_ALL])) |
+ parser.add_argument( |
+ '--workdir', default=results.DEFAULT_GENERATED_IMAGES_ROOT, |
+ help='Directory within which to download images and generate diffs; ' |
+ 'defaults to \'%(default)s\' .') |
+ args = parser.parse_args() |
+ image_diff_db = imagediffdb.ImageDiffDB(storage_root=args.workdir) |
+ results_obj = ExpectationComparisons( |
+ image_diff_db=image_diff_db, |
+ actuals_root=args.actuals, |
+ expected_root=args.expectations, |
+ ignore_failures_file=args.ignore_failures_file) |
+ gm_json.WriteToFile( |
+ results_obj.get_packaged_results_of_type(results_type=args.results), |
+ args.outfile) |
+ |
+ |
+if __name__ == '__main__': |
+ main() |