gm/rebaseline_server/results.py - Issue 44123004: rebaseline_server: UI improvements + set reviewed-by-human on commit

Side by Side Diff: gm/rebaseline_server/results.py

Issue 44123004: rebaseline_server: UI improvements + set reviewed-by-human on commit (Closed) Base URL: http://skia.googlecode.com/svn/trunk/

Patch Set: javascript_style_fix Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/python

2	2

3 """	3 """

4 Copyright 2013 Google Inc.	4 Copyright 2013 Google Inc.

5	5

6 Use of this source code is governed by a BSD-style license that can be	6 Use of this source code is governed by a BSD-style license that can be

7 found in the LICENSE file.	7 found in the LICENSE file.

8	8

9 Repackage expected/actual GM results as needed by our HTML rebaseline viewer.	9 Repackage expected/actual GM results as needed by our HTML rebaseline viewer.

10 """	10 """

(...skipping 15 matching lines...) Expand all Loading...
26 # Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the end	26 # Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the end

27 # so any dirs that are already in the PYTHONPATH will be preferred.	27 # so any dirs that are already in the PYTHONPATH will be preferred.

28 GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))	28 GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

29 if GM_DIRECTORY not in sys.path:	29 if GM_DIRECTORY not in sys.path:

30 sys.path.append(GM_DIRECTORY)	30 sys.path.append(GM_DIRECTORY)

31 import gm_json	31 import gm_json

32	32

33 IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)	33 IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)

34 IMAGE_FILENAME_FORMATTER = '%s_%s.png' # pass in (testname, config)	34 IMAGE_FILENAME_FORMATTER = '%s_%s.png' # pass in (testname, config)

35	35

	36 FIELDS_PASSED_THRU_VERBATIM = [

	37 gm_json.JSONKEY_EXPECTEDRESULTS_BUGS,

	38 gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE,

	39 gm_json.JSONKEY_EXPECTEDRESULTS_REVIEWED,

	40 ]

36 CATEGORIES_TO_SUMMARIZE = [	41 CATEGORIES_TO_SUMMARIZE = [

37 'builder', 'test', 'config', 'resultType',	42 'builder', 'test', 'config', 'resultType',

	43 gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE,

	44 gm_json.JSONKEY_EXPECTEDRESULTS_REVIEWED,

38 ]	45 ]

	46

39 RESULTS_ALL = 'all'	47 RESULTS_ALL = 'all'

40 RESULTS_FAILURES = 'failures'	48 RESULTS_FAILURES = 'failures'

41	49

42 class Results(object):	50 class Results(object):

43 """ Loads actual and expected results from all builders, supplying combined	51 """ Loads actual and expected results from all builders, supplying combined

44 reports as requested.	52 reports as requested.

45	53

46 Once this object has been constructed, the results (in self._results[])	54 Once this object has been constructed, the results (in self._results[])

47 are immutable. If you want to update the results based on updated JSON	55 are immutable. If you want to update the results based on updated JSON

48 file contents, you will need to create a new Results object."""	56 file contents, you will need to create a new Results object."""

(...skipping 26 matching lines...) Expand all Loading...
75 Args:	83 Args:

76 modifications: a list of dictionaries, one for each expectation to update:	84 modifications: a list of dictionaries, one for each expectation to update:

77	85

78 [	86 [

79 {	87 {

80 'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',	88 'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

81 'test': 'bigmatrix',	89 'test': 'bigmatrix',

82 'config': '8888',	90 'config': '8888',

83 'expectedHashType': 'bitmap-64bitMD5',	91 'expectedHashType': 'bitmap-64bitMD5',

84 'expectedHashDigest': '10894408024079689926',	92 'expectedHashDigest': '10894408024079689926',

	93 'bugs': [123, 456],

	94 'ignore-failure': false,

	95 'reviewed-by-human': true,

85 },	96 },

86 ...	97 ...

87 ]	98 ]

88	99

89 TODO(epoger): For now, this does not allow the caller to set any fields

90 other than expectedHashType/expectedHashDigest, and assumes that

91 ignore-failure should be set to False. We need to add support

92 for other fields (notes, bugs, etc.) and ignore-failure=True.

93 """	100 """

94 expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)	101 expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)

95 for mod in modifications:	102 for mod in modifications:

96 image_name = IMAGE_FILENAME_FORMATTER % (mod['test'], mod['config'])	103 image_name = IMAGE_FILENAME_FORMATTER % (mod['test'], mod['config'])

97 # TODO(epoger): assumes a single allowed digest per test	104 # TODO(epoger): assumes a single allowed digest per test

98 allowed_digests = [[mod['expectedHashType'],	105 allowed_digests = [[mod['expectedHashType'],

99 int(mod['expectedHashDigest'])]]	106 int(mod['expectedHashDigest'])]]

100 new_expectations = {	107 new_expectations = {

101 gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,	108 gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,

102 gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE: False,

103 }	109 }

	110 for field in FIELDS_PASSED_THRU_VERBATIM:

	111 value = mod.get(field)

	112 if value is not None:

	113 new_expectations[field] = value

104 builder_dict = expected_builder_dicts[mod['builder']]	114 builder_dict = expected_builder_dicts[mod['builder']]

105 builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)	115 builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)

106 if not builder_expectations:	116 if not builder_expectations:

107 builder_expectations = {}	117 builder_expectations = {}

108 builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations	118 builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations

109 builder_expectations[image_name] = new_expectations	119 builder_expectations[image_name] = new_expectations

110 Results._write_dicts_to_root(expected_builder_dicts, self._expected_root)	120 Results._write_dicts_to_root(expected_builder_dicts, self._expected_root)

111	121

112 def get_results_of_type(self, type):	122 def get_results_of_type(self, type):

113 """Return results of some/all tests (depending on 'type' parameter).	123 """Return results of some/all tests (depending on 'type' parameter).

(...skipping 21 matching lines...) Expand all Loading...
135 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,	145 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,

136 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,	146 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,

137 ...	147 ...

138 },	148 },

139 ... # other categories from CATEGORIES_TO_SUMMARIZE	149 ... # other categories from CATEGORIES_TO_SUMMARIZE

140 }, # end of 'categories' dictionary	150 }, # end of 'categories' dictionary

141	151

142 'testData': # list of test results, with a dictionary for each	152 'testData': # list of test results, with a dictionary for each

143 [	153 [

144 {	154 {

	155 'resultType': 'failed',

145 'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',	156 'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

146 'test': 'bigmatrix',	157 'test': 'bigmatrix',

147 'config': '8888',	158 'config': '8888',

148 'resultType': 'failed',

149 'expectedHashType': 'bitmap-64bitMD5',	159 'expectedHashType': 'bitmap-64bitMD5',

150 'expectedHashDigest': '10894408024079689926',	160 'expectedHashDigest': '10894408024079689926',

151 'actualHashType': 'bitmap-64bitMD5',	161 'actualHashType': 'bitmap-64bitMD5',

152 'actualHashDigest': '2409857384569',	162 'actualHashDigest': '2409857384569',

	163 'bugs': [123, 456],

	164 'ignore-failure': false,

	165 'reviewed-by-human': true,

153 },	166 },

154 ...	167 ...

155 ], # end of 'testData' list	168 ], # end of 'testData' list

156 }	169 }

157 """	170 """

158 return self._results[type]	171 return self._results[type]

159	172

160 @staticmethod	173 @staticmethod

161 def _read_dicts_from_root(root, pattern='*.json'):	174 def _read_dicts_from_root(root, pattern='*.json'):

162 """Read all JSON dictionaries within a directory tree.	175 """Read all JSON dictionaries within a directory tree.

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 def _load_actual_and_expected(self):	252 def _load_actual_and_expected(self):

240 """Loads the results of all tests, across all builders (based on the	253 """Loads the results of all tests, across all builders (based on the

241 files within self._actuals_root and self._expected_root),	254 files within self._actuals_root and self._expected_root),

242 and stores them in self._results.	255 and stores them in self._results.

243 """	256 """

244 actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root)	257 actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root)

245 expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)	258 expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)

246	259

247 categories_all = {}	260 categories_all = {}

248 categories_failures = {}	261 categories_failures = {}

	262

249 Results._ensure_included_in_category_dict(categories_all,	263 Results._ensure_included_in_category_dict(categories_all,

250 'resultType', [	264 'resultType', [

251 gm_json.JSONKEY_ACTUALRESULTS_FAILED,	265 gm_json.JSONKEY_ACTUALRESULTS_FAILED,

252 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,	266 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

253 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,	267 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

254 gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,	268 gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,

255 ])	269 ])

256 Results._ensure_included_in_category_dict(categories_failures,	270 Results._ensure_included_in_category_dict(categories_failures,

257 'resultType', [	271 'resultType', [

258 gm_json.JSONKEY_ACTUALRESULTS_FAILED,	272 gm_json.JSONKEY_ACTUALRESULTS_FAILED,

259 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,	273 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

260 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,	274 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

261 ])	275 ])

262	276

263 data_all = []	277 data_all = []

264 data_failures = []	278 data_failures = []

265 for builder in sorted(actual_builder_dicts.keys()):	279 for builder in sorted(actual_builder_dicts.keys()):

266 actual_results_for_this_builder = (	280 actual_results_for_this_builder = (

267 actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])	281 actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])

268 for result_type in sorted(actual_results_for_this_builder.keys()):	282 for result_type in sorted(actual_results_for_this_builder.keys()):

269 results_of_this_type = actual_results_for_this_builder[result_type]	283 results_of_this_type = actual_results_for_this_builder[result_type]

270 if not results_of_this_type:	284 if not results_of_this_type:

271 continue	285 continue

272 for image_name in sorted(results_of_this_type.keys()):	286 for image_name in sorted(results_of_this_type.keys()):

273 actual_image = results_of_this_type[image_name]	287 actual_image = results_of_this_type[image_name]

	288

	289 # Default empty expectations; overwrite these if we find any real ones

	290 expectations_per_test = None

	291 expected_image = [None, None]

274 try:	292 try:

	293 expectations_per_test = (

	294 expected_builder_dicts

	295 [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name])

275 # TODO(epoger): assumes a single allowed digest per test	296 # TODO(epoger): assumes a single allowed digest per test

276 expected_image = (	297 expected_image = (

277 expected_builder_dicts	298 expectations_per_test

278 [builder][gm_json.JSONKEY_EXPECTEDRESULTS]	299 [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0])

279 [image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS]

280 [0])

281 except (KeyError, TypeError):	300 except (KeyError, TypeError):

282 # There are several cases in which we would expect to find	301 # There are several cases in which we would expect to find

283 # no expectations for a given test:	302 # no expectations for a given test:

284 #	303 #

285 # 1. result_type == NOCOMPARISON	304 # 1. result_type == NOCOMPARISON

286 # There are no expectations for this test yet!	305 # There are no expectations for this test yet!

287 #	306 #

288 # 2. ignore-tests.txt	307 # 2. alternate rendering mode failures (e.g. serialized)

289 # If a test has been listed in ignore-tests.txt, then its status

290 # may show as FAILUREIGNORED even if it doesn't have any

291 # expectations yet.

292 #

293 # 3. alternate rendering mode failures (e.g. serialized)

294 # In cases like	308 # In cases like

295 # https://code.google.com/p/skia/issues/detail?id=1684	309 # https://code.google.com/p/skia/issues/detail?id=1684

296 # ('tileimagefilter GM test failing in serialized render mode'),	310 # ('tileimagefilter GM test failing in serialized render mode'),

297 # the gm-actuals will list a failure for the alternate	311 # the gm-actuals will list a failure for the alternate

298 # rendering mode even though we don't have explicit expectations	312 # rendering mode even though we don't have explicit expectations

299 # for the test (the implicit expectation is that it must	313 # for the test (the implicit expectation is that it must

300 # render the same in all rendering modes).	314 # render the same in all rendering modes).

301 #	315 #

302 # Don't log types 1 or 2, because they are common.	316 # Don't log type 1, because it is common.

303 # Log other types, because they are rare and we should know about	317 # Log other types, because they are rare and we should know about

304 # them, but don't throw an exception, because we need to keep our	318 # them, but don't throw an exception, because we need to keep our

305 # tools working in the meanwhile!	319 # tools working in the meanwhile!

306 if result_type not in [	320 if result_type != gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON:

307 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

308 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] :

309 logging.warning('No expectations found for test: %s' % {	321 logging.warning('No expectations found for test: %s' % {

310 'builder': builder,	322 'builder': builder,

311 'image_name': image_name,	323 'image_name': image_name,

312 'result_type': result_type,	324 'result_type': result_type,

313 })	325 })

314 expected_image = [None, None]

315	326

316 # If this test was recently rebaselined, it will remain in	327 # If this test was recently rebaselined, it will remain in

317 # the 'failed' set of actuals until all the bots have	328 # the 'failed' set of actuals until all the bots have

318 # cycled (although the expectations have indeed been set	329 # cycled (although the expectations have indeed been set

319 # from the most recent actuals). Treat these as successes	330 # from the most recent actuals). Treat these as successes

320 # instead of failures.	331 # instead of failures.

321 #	332 #

322 # TODO(epoger): Do we need to do something similar in	333 # TODO(epoger): Do we need to do something similar in

323 # other cases, such as when we have recently marked a test	334 # other cases, such as when we have recently marked a test

324 # as ignoreFailure but it still shows up in the 'failed'	335 # as ignoreFailure but it still shows up in the 'failed'

325 # category? Maybe we should not rely on the result_type	336 # category? Maybe we should not rely on the result_type

326 # categories recorded within the gm_actuals AT ALL, and	337 # categories recorded within the gm_actuals AT ALL, and

327 # instead evaluate the result_type ourselves based on what	338 # instead evaluate the result_type ourselves based on what

328 # we see in expectations vs actual checksum?	339 # we see in expectations vs actual checksum?

329 if expected_image == actual_image:	340 if expected_image == actual_image:

330 updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED	341 updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED

331 else:	342 else:

332 updated_result_type = result_type	343 updated_result_type = result_type

333	344

334 (test, config) = IMAGE_FILENAME_RE.match(image_name).groups()	345 (test, config) = IMAGE_FILENAME_RE.match(image_name).groups()

335 results_for_this_test = {	346 results_for_this_test = {

	347 'resultType': updated_result_type,

336 'builder': builder,	348 'builder': builder,

337 'test': test,	349 'test': test,

338 'config': config,	350 'config': config,

339 'resultType': updated_result_type,

340 'actualHashType': actual_image[0],	351 'actualHashType': actual_image[0],

341 'actualHashDigest': str(actual_image[1]),	352 'actualHashDigest': str(actual_image[1]),

342 'expectedHashType': expected_image[0],	353 'expectedHashType': expected_image[0],

343 'expectedHashDigest': str(expected_image[1]),	354 'expectedHashDigest': str(expected_image[1]),

	355

	356 # FIELDS_PASSED_THRU_VERBATIM that may be overwritten below...

	357 gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE: False,

344 }	358 }

	359 if expectations_per_test:

	360 for field in FIELDS_PASSED_THRU_VERBATIM:

	361 results_for_this_test[field] = expectations_per_test.get(field)

345 Results._add_to_category_dict(categories_all, results_for_this_test)	362 Results._add_to_category_dict(categories_all, results_for_this_test)

346 data_all.append(results_for_this_test)	363 data_all.append(results_for_this_test)

	364

	365 # TODO(epoger): In effect, we have a list of resultTypes that we

	366 # include in the different result lists (data_all and data_failures).

	367 # This same list should be used by the calls to

	368 # Results._ensure_included_in_category_dict() earlier on.

347 if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:	369 if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:

348 Results._add_to_category_dict(categories_failures,	370 Results._add_to_category_dict(categories_failures,

349 results_for_this_test)	371 results_for_this_test)

350 data_failures.append(results_for_this_test)	372 data_failures.append(results_for_this_test)

351	373

352 self._results = {	374 self._results = {

353 RESULTS_ALL:	375 RESULTS_ALL:

354 {'categories': categories_all, 'testData': data_all},	376 {'categories': categories_all, 'testData': data_all},

355 RESULTS_FAILURES:	377 RESULTS_FAILURES:

356 {'categories': categories_failures, 'testData': data_failures},	378 {'categories': categories_failures, 'testData': data_failures},

357 }	379 }

358	380

359 @staticmethod	381 @staticmethod

360 def _add_to_category_dict(category_dict, test_results):	382 def _add_to_category_dict(category_dict, test_results):

361 """Add test_results to the category dictionary we are building.	383 """Add test_results to the category dictionary we are building.

362 (See documentation of self.get_results_of_type() for the format of this	384 (See documentation of self.get_results_of_type() for the format of this

363 dictionary.)	385 dictionary.)

364	386

365 Args:	387 Args:

366 category_dict: category dict-of-dicts to add to; modify this in-place	388 category_dict: category dict-of-dicts to add to; modify this in-place

367 test_results: test data with which to update category_list, in a dict:	389 test_results: test data with which to update category_list, in a dict:

368 {	390 {

369 'category_name': 'category_value',	391 'category_name': 'category_value',

370 'category_name': 'category_value',	392 'category_name': 'category_value',

371 ...	393 ...

372 }	394 }

373 """	395 """

374 for category in CATEGORIES_TO_SUMMARIZE:	396 for category in CATEGORIES_TO_SUMMARIZE:

375 category_value = test_results.get(category)	397 category_value = test_results.get(category)

376 if not category_value:

377 continue # test_results did not include this category, keep going

378 if not category_dict.get(category):	398 if not category_dict.get(category):

379 category_dict[category] = {}	399 category_dict[category] = {}

380 if not category_dict[category].get(category_value):	400 if not category_dict[category].get(category_value):

381 category_dict[category][category_value] = 0	401 category_dict[category][category_value] = 0

382 category_dict[category][category_value] += 1	402 category_dict[category][category_value] += 1

383	403

384 @staticmethod	404 @staticmethod

385 def _ensure_included_in_category_dict(category_dict,	405 def _ensure_included_in_category_dict(category_dict,

386 category_name, category_values):	406 category_name, category_values):

387 """Ensure that the category name/value pairs are included in category_dict,	407 """Ensure that the category name/value pairs are included in category_dict,

388 even if there aren't any results with that name/value pair.	408 even if there aren't any results with that name/value pair.

389 (See documentation of self.get_results_of_type() for the format of this	409 (See documentation of self.get_results_of_type() for the format of this

390 dictionary.)	410 dictionary.)

391	411

392 Args:	412 Args:

393 category_dict: category dict-of-dicts to modify	413 category_dict: category dict-of-dicts to modify

394 category_name: category name, as a string	414 category_name: category name, as a string

395 category_values: list of values we want to make sure are represented	415 category_values: list of values we want to make sure are represented

396 for this category	416 for this category

397 """	417 """

398 if not category_dict.get(category_name):	418 if not category_dict.get(category_name):

399 category_dict[category_name] = {}	419 category_dict[category_name] = {}

400 for category_value in category_values:	420 for category_value in category_values:

401 if not category_dict[category_name].get(category_value):	421 if not category_dict[category_name].get(category_value):

402 category_dict[category_name][category_value] = 0	422 category_dict[category_name][category_value] = 0

OLD	NEW

« no previous file with comments | « no previous file | gm/rebaseline_server/static/loader.js » ('j') | no next file with comments »