| OLD | NEW |
| (Empty) |
| 1 # Copyright (C) 2011, Google Inc. All rights reserved. | |
| 2 # | |
| 3 # Redistribution and use in source and binary forms, with or without | |
| 4 # modification, are permitted provided that the following conditions are | |
| 5 # met: | |
| 6 # | |
| 7 # * Redistributions of source code must retain the above copyright | |
| 8 # notice, this list of conditions and the following disclaimer. | |
| 9 # * Redistributions in binary form must reproduce the above | |
| 10 # copyright notice, this list of conditions and the following disclaimer | |
| 11 # in the documentation and/or other materials provided with the | |
| 12 # distribution. | |
| 13 # * Neither the name of Google Inc. nor the names of its | |
| 14 # contributors may be used to endorse or promote products derived from | |
| 15 # this software without specific prior written permission. | |
| 16 # | |
| 17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 28 | |
| 29 import copy | |
| 30 import logging | |
| 31 | |
| 32 from webkitpy.common.memoized import memoized | |
| 33 from functools import reduce | |
| 34 | |
| 35 _log = logging.getLogger(__name__) | |
| 36 | |
| 37 | |
| 38 # FIXME: Should this function be somewhere more general? | |
| 39 def _invert_dictionary(dictionary): | |
| 40 inverted_dictionary = {} | |
| 41 for key, value in dictionary.items(): | |
| 42 if inverted_dictionary.get(value): | |
| 43 inverted_dictionary[value].append(key) | |
| 44 else: | |
| 45 inverted_dictionary[value] = [key] | |
| 46 return inverted_dictionary | |
| 47 | |
| 48 | |
| 49 class BaselineOptimizer(object): | |
| 50 ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests' | |
| 51 | |
| 52 def __init__(self, host, port, port_names): | |
| 53 self._filesystem = host.filesystem | |
| 54 self._default_port = port | |
| 55 self._ports = {} | |
| 56 for port_name in port_names: | |
| 57 self._ports[port_name] = host.port_factory.get(port_name) | |
| 58 | |
| 59 self._webkit_base = port.webkit_base() | |
| 60 self._layout_tests_dir = port.layout_tests_dir() | |
| 61 | |
| 62 # Only used by unittests. | |
| 63 self.new_results_by_directory = [] | |
| 64 | |
| 65 def _baseline_root(self, baseline_name): | |
| 66 virtual_suite = self._virtual_suite(baseline_name) | |
| 67 if virtual_suite: | |
| 68 return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtu
al_suite.name) | |
| 69 return self.ROOT_LAYOUT_TESTS_DIRECTORY | |
| 70 | |
| 71 def _baseline_search_path(self, port, baseline_name): | |
| 72 virtual_suite = self._virtual_suite(baseline_name) | |
| 73 if virtual_suite: | |
| 74 return port.virtual_baseline_search_path(baseline_name) | |
| 75 return port.baseline_search_path() | |
| 76 | |
| 77 def _virtual_suite(self, baseline_name): | |
| 78 return self._default_port.lookup_virtual_suite(baseline_name) | |
| 79 | |
| 80 def _virtual_base(self, baseline_name): | |
| 81 return self._default_port.lookup_virtual_test_base(baseline_name) | |
| 82 | |
| 83 def _relative_baseline_search_paths(self, port, baseline_name): | |
| 84 baseline_search_path = self._baseline_search_path(port, baseline_name) | |
| 85 baseline_root = self._baseline_root(baseline_name) | |
| 86 relative_paths = [self._filesystem.relpath(path, self._webkit_base) for
path in baseline_search_path] | |
| 87 return relative_paths + [baseline_root] | |
| 88 | |
| 89 def _join_directory(self, directory, baseline_name): | |
| 90 # This code is complicated because both the directory name and the basel
ine_name have the virtual | |
| 91 # test suite in the name and the virtual baseline name is not a strict s
uperset of the non-virtual name. | |
| 92 # For example, virtual/gpu/fast/canvas/foo-expected.png corresponds to f
ast/canvas/foo-expected.png and | |
| 93 # the baseline directories are like platform/mac/virtual/gpu/fast/canvas
. So, to get the path | |
| 94 # to the baseline in the platform directory, we need to append just foo-
expected.png to the directory. | |
| 95 virtual_suite = self._virtual_suite(baseline_name) | |
| 96 if virtual_suite: | |
| 97 baseline_name_without_virtual = baseline_name[len(virtual_suite.name
) + 1:] | |
| 98 else: | |
| 99 baseline_name_without_virtual = baseline_name | |
| 100 return self._filesystem.join(self._webkit_base, directory, baseline_name
_without_virtual) | |
| 101 | |
| 102 def read_results_by_directory(self, baseline_name): | |
| 103 results_by_directory = {} | |
| 104 directories = reduce(set.union, map(set, [self._relative_baseline_search
_paths( | |
| 105 port, baseline_name) for port in self._ports.values()])) | |
| 106 | |
| 107 for directory in directories: | |
| 108 path = self._join_directory(directory, baseline_name) | |
| 109 if self._filesystem.exists(path): | |
| 110 results_by_directory[directory] = self._filesystem.sha1(path) | |
| 111 return results_by_directory | |
| 112 | |
| 113 def _results_by_port_name(self, results_by_directory, baseline_name): | |
| 114 results_by_port_name = {} | |
| 115 for port_name, port in self._ports.items(): | |
| 116 for directory in self._relative_baseline_search_paths(port, baseline
_name): | |
| 117 if directory in results_by_directory: | |
| 118 results_by_port_name[port_name] = results_by_directory[direc
tory] | |
| 119 break | |
| 120 return results_by_port_name | |
| 121 | |
| 122 @memoized | |
| 123 def _directories_immediately_preceding_root(self, baseline_name): | |
| 124 directories = set() | |
| 125 for port in self._ports.values(): | |
| 126 directory = self._filesystem.relpath(self._baseline_search_path(port
, baseline_name)[-1], self._webkit_base) | |
| 127 directories.add(directory) | |
| 128 return directories | |
| 129 | |
| 130 def _optimize_result_for_root(self, new_results_by_directory, baseline_name)
: | |
| 131 # The root directory (i.e. LayoutTests) is the only one that doesn't cor
respond | |
| 132 # to a specific platform. As such, it's the only one where the baseline
in fallback directories | |
| 133 # immediately before it can be promoted up, i.e. if win and mac | |
| 134 # have the same baseline, then it can be promoted up to be the LayoutTes
ts baseline. | |
| 135 # All other baselines can only be removed if they're redundant with a ba
seline earlier | |
| 136 # in the fallback order. They can never promoted up. | |
| 137 directories_immediately_preceding_root = self._directories_immediately_p
receding_root(baseline_name) | |
| 138 | |
| 139 shared_result = None | |
| 140 root_baseline_unused = False | |
| 141 for directory in directories_immediately_preceding_root: | |
| 142 this_result = new_results_by_directory.get(directory) | |
| 143 | |
| 144 # If any of these directories don't have a baseline, there's no opti
mization we can do. | |
| 145 if not this_result: | |
| 146 return | |
| 147 | |
| 148 if not shared_result: | |
| 149 shared_result = this_result | |
| 150 elif shared_result != this_result: | |
| 151 root_baseline_unused = True | |
| 152 | |
| 153 baseline_root = self._baseline_root(baseline_name) | |
| 154 | |
| 155 # The root baseline is unused if all the directories immediately precedi
ng the root | |
| 156 # have a baseline, but have different baselines, so the baselines can't
be promoted up. | |
| 157 if root_baseline_unused: | |
| 158 if baseline_root in new_results_by_directory: | |
| 159 del new_results_by_directory[baseline_root] | |
| 160 return | |
| 161 | |
| 162 new_results_by_directory[baseline_root] = shared_result | |
| 163 for directory in directories_immediately_preceding_root: | |
| 164 del new_results_by_directory[directory] | |
| 165 | |
| 166 def _find_optimal_result_placement(self, baseline_name): | |
| 167 results_by_directory = self.read_results_by_directory(baseline_name) | |
| 168 results_by_port_name = self._results_by_port_name(results_by_directory,
baseline_name) | |
| 169 port_names_by_result = _invert_dictionary(results_by_port_name) | |
| 170 | |
| 171 new_results_by_directory = self._remove_redundant_results( | |
| 172 results_by_directory, results_by_port_name, port_names_by_result, ba
seline_name) | |
| 173 self._optimize_result_for_root(new_results_by_directory, baseline_name) | |
| 174 | |
| 175 return results_by_directory, new_results_by_directory | |
| 176 | |
| 177 def _remove_redundant_results(self, results_by_directory, results_by_port_na
me, port_names_by_result, baseline_name): | |
| 178 new_results_by_directory = copy.copy(results_by_directory) | |
| 179 for port_name, port in self._ports.items(): | |
| 180 current_result = results_by_port_name.get(port_name) | |
| 181 | |
| 182 # This happens if we're missing baselines for a port. | |
| 183 if not current_result: | |
| 184 continue | |
| 185 | |
| 186 fallback_path = self._relative_baseline_search_paths(port, baseline_
name) | |
| 187 current_index, current_directory = self._find_in_fallbackpath(fallba
ck_path, current_result, new_results_by_directory) | |
| 188 for index in range(current_index + 1, len(fallback_path)): | |
| 189 new_directory = fallback_path[index] | |
| 190 if not new_directory in new_results_by_directory: | |
| 191 # No result for this baseline in this directory. | |
| 192 continue | |
| 193 elif new_results_by_directory[new_directory] == current_result: | |
| 194 # Result for new_directory are redundant with the result ear
lier in the fallback order. | |
| 195 if current_directory in new_results_by_directory: | |
| 196 del new_results_by_directory[current_directory] | |
| 197 else: | |
| 198 # The new_directory contains a different result, so stop try
ing to push results up. | |
| 199 break | |
| 200 | |
| 201 return new_results_by_directory | |
| 202 | |
| 203 def _find_in_fallbackpath(self, fallback_path, current_result, results_by_di
rectory): | |
| 204 for index, directory in enumerate(fallback_path): | |
| 205 if directory in results_by_directory and (results_by_directory[direc
tory] == current_result): | |
| 206 return index, directory | |
| 207 assert False, "result %s not found in fallback_path %s, %s" % (current_r
esult, fallback_path, results_by_directory) | |
| 208 | |
| 209 def _platform(self, filename): | |
| 210 platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep +
'platform' + self._filesystem.sep | |
| 211 if filename.startswith(platform_dir): | |
| 212 return filename.replace(platform_dir, '').split(self._filesystem.sep
)[0] | |
| 213 platform_dir = self._filesystem.join(self._webkit_base, platform_dir) | |
| 214 if filename.startswith(platform_dir): | |
| 215 return filename.replace(platform_dir, '').split(self._filesystem.sep
)[0] | |
| 216 return '(generic)' | |
| 217 | |
| 218 def _move_baselines(self, baseline_name, results_by_directory, new_results_b
y_directory): | |
| 219 data_for_result = {} | |
| 220 for directory, result in results_by_directory.items(): | |
| 221 if not result in data_for_result: | |
| 222 source = self._join_directory(directory, baseline_name) | |
| 223 data_for_result[result] = self._filesystem.read_binary_file(sour
ce) | |
| 224 | |
| 225 fs_files = [] | |
| 226 for directory, result in results_by_directory.items(): | |
| 227 if new_results_by_directory.get(directory) != result: | |
| 228 file_name = self._join_directory(directory, baseline_name) | |
| 229 if self._filesystem.exists(file_name): | |
| 230 fs_files.append(file_name) | |
| 231 | |
| 232 if fs_files: | |
| 233 _log.debug(" Deleting (file system):") | |
| 234 for platform_dir in sorted(self._platform(filename) for filename in
fs_files): | |
| 235 _log.debug(" " + platform_dir) | |
| 236 for filename in fs_files: | |
| 237 self._filesystem.remove(filename) | |
| 238 else: | |
| 239 _log.debug(" (Nothing to delete)") | |
| 240 | |
| 241 file_names = [] | |
| 242 for directory, result in new_results_by_directory.items(): | |
| 243 if results_by_directory.get(directory) != result: | |
| 244 destination = self._join_directory(directory, baseline_name) | |
| 245 self._filesystem.maybe_make_directory(self._filesystem.split(des
tination)[0]) | |
| 246 self._filesystem.write_binary_file(destination, data_for_result[
result]) | |
| 247 file_names.append(destination) | |
| 248 | |
| 249 if file_names: | |
| 250 _log.debug(" Adding:") | |
| 251 for platform_dir in sorted(self._platform(filename) for filename in
file_names): | |
| 252 _log.debug(" " + platform_dir) | |
| 253 else: | |
| 254 _log.debug(" (Nothing to add)") | |
| 255 | |
| 256 def write_by_directory(self, results_by_directory, writer, indent): | |
| 257 for path in sorted(results_by_directory): | |
| 258 writer("%s%s: %s" % (indent, self._platform(path), results_by_direct
ory[path][0:6])) | |
| 259 | |
| 260 def _optimize_subtree(self, baseline_name): | |
| 261 basename = self._filesystem.basename(baseline_name) | |
| 262 results_by_directory, new_results_by_directory = self._find_optimal_resu
lt_placement(baseline_name) | |
| 263 | |
| 264 if new_results_by_directory == results_by_directory: | |
| 265 if new_results_by_directory: | |
| 266 _log.debug(" %s: (already optimal)", basename) | |
| 267 self.write_by_directory(results_by_directory, _log.debug, " "
) | |
| 268 else: | |
| 269 _log.debug(" %s: (no baselines found)", basename) | |
| 270 # This is just used for unittests. Intentionally set it to the old d
ata if we don't modify anything. | |
| 271 self.new_results_by_directory.append(results_by_directory) | |
| 272 return True | |
| 273 | |
| 274 if self._results_by_port_name(results_by_directory, baseline_name) != se
lf._results_by_port_name( | |
| 275 new_results_by_directory, baseline_name): | |
| 276 # This really should never happen. Just a sanity check to make sure
the script fails in the case of bugs | |
| 277 # instead of committing incorrect baselines. | |
| 278 _log.error(" %s: optimization failed", basename) | |
| 279 self.write_by_directory(results_by_directory, _log.warning, " "
) | |
| 280 return False | |
| 281 | |
| 282 _log.debug(" %s:", basename) | |
| 283 _log.debug(" Before: ") | |
| 284 self.write_by_directory(results_by_directory, _log.debug, " ") | |
| 285 _log.debug(" After: ") | |
| 286 self.write_by_directory(new_results_by_directory, _log.debug, " ") | |
| 287 | |
| 288 self._move_baselines(baseline_name, results_by_directory, new_results_by
_directory) | |
| 289 return True | |
| 290 | |
| 291 def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name): | |
| 292 virtual_root_expected_baseline_path = self._filesystem.join(self._layout
_tests_dir, baseline_name) | |
| 293 if not self._filesystem.exists(virtual_root_expected_baseline_path): | |
| 294 return | |
| 295 root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path) | |
| 296 | |
| 297 results_by_directory = self.read_results_by_directory(non_virtual_baseli
ne_name) | |
| 298 # See if all the immediate predecessors of the virtual root have the sam
e expected result. | |
| 299 for port in self._ports.values(): | |
| 300 directories = self._relative_baseline_search_paths(port, non_virtual
_baseline_name) | |
| 301 for directory in directories: | |
| 302 if directory not in results_by_directory: | |
| 303 continue | |
| 304 if results_by_directory[directory] != root_sha1: | |
| 305 return | |
| 306 break | |
| 307 | |
| 308 _log.debug("Deleting redundant virtual root expected result.") | |
| 309 _log.debug(" Deleting (file system): " + virtual_root_expected_baseli
ne_path) | |
| 310 self._filesystem.remove(virtual_root_expected_baseline_path) | |
| 311 | |
| 312 def optimize(self, baseline_name): | |
| 313 # The virtual fallback path is the same as the non-virtual one tacked on
to the bottom of the non-virtual path. | |
| 314 # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBb
UaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for | |
| 315 # a visual representation of this. | |
| 316 # | |
| 317 # So, we can optimize the virtual path, then the virtual root and then t
he regular path. | |
| 318 | |
| 319 _log.debug("Optimizing regular fallback path.") | |
| 320 result = self._optimize_subtree(baseline_name) | |
| 321 non_virtual_baseline_name = self._virtual_base(baseline_name) | |
| 322 if not non_virtual_baseline_name: | |
| 323 return result | |
| 324 | |
| 325 self._optimize_virtual_root(baseline_name, non_virtual_baseline_name) | |
| 326 | |
| 327 _log.debug("Optimizing non-virtual fallback path.") | |
| 328 result |= self._optimize_subtree(non_virtual_baseline_name) | |
| 329 return result | |
| OLD | NEW |