OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 """ | 3 """ |
4 Copyright 2013 Google Inc. | 4 Copyright 2013 Google Inc. |
5 | 5 |
6 Use of this source code is governed by a BSD-style license that can be | 6 Use of this source code is governed by a BSD-style license that can be |
7 found in the LICENSE file. | 7 found in the LICENSE file. |
8 | 8 |
9 Calulate differences between image pairs, and store them in a database. | 9 Calulate differences between image pairs, and store them in a database. |
10 """ | 10 """ |
11 | 11 |
12 # System-level imports | 12 # System-level imports |
13 import contextlib | 13 import contextlib |
14 import json | 14 import json |
15 import logging | 15 import logging |
16 import os | 16 import os |
17 import re | 17 import re |
18 import shutil | 18 import shutil |
19 import tempfile | 19 import tempfile |
20 import urllib | 20 import urllib |
21 | 21 |
22 # Must fix up PYTHONPATH before importing from within Skia | 22 # Must fix up PYTHONPATH before importing from within Skia |
23 import fix_pythonpath # pylint: disable=W0611 | 23 import fix_pythonpath # pylint: disable=W0611 |
24 | 24 |
25 # Imports from within Skia | 25 # Imports from within Skia |
26 import find_run_binary | 26 import find_run_binary |
| 27 from py.utils import gs_utils |
| 28 |
27 | 29 |
28 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') | 30 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') |
29 | 31 |
30 DEFAULT_IMAGE_SUFFIX = '.png' | 32 DEFAULT_IMAGE_SUFFIX = '.png' |
31 DEFAULT_IMAGES_SUBDIR = 'images' | 33 DEFAULT_IMAGES_SUBDIR = 'images' |
32 | 34 |
33 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') | 35 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') |
34 | 36 |
35 RGBDIFFS_SUBDIR = 'diffs' | 37 RGBDIFFS_SUBDIR = 'diffs' |
36 WHITEDIFFS_SUBDIR = 'whitediffs' | 38 WHITEDIFFS_SUBDIR = 'whitediffs' |
37 | 39 |
38 # Keys used within DiffRecord dictionary representations. | 40 # Keys used within DiffRecord dictionary representations. |
39 # NOTE: Keep these in sync with static/constants.js | 41 # NOTE: Keep these in sync with static/constants.js |
40 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' | 42 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' |
41 KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels' | 43 KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels' |
42 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' | 44 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' |
43 KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference' | 45 KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference' |
44 | 46 |
45 | 47 |
| 48 class GSObject(object): |
| 49 """Reference to a file, dir, or entire bucket in Google Storage.""" |
| 50 # EPOGER: move this class into gs_utils.py ? |
| 51 def __init__(self, gs_url=None, bucket=None, path=None): |
| 52 """ |
| 53 Create a reference to a GS object, using EITHER gs_url OR bucket/path. |
| 54 |
| 55 Args: |
| 56 gs_url: URL pointing to this file, e.g. "gs://bucketname/dir/file". |
| 57 If this is set, the other params must NOT be set. |
| 58 bucket: name of the bucket, e.g. "bucketname". |
| 59 If this is set, the "gs_url" param must NOT be set. |
| 60 path: Posix-style path within the bucket, e.g. "dir/file". |
| 61 If this is set, the "gs_url" param must NOT be set. |
| 62 """ |
| 63 if gs_url != None: |
| 64 if bucket != None or path != None: |
| 65 raise Exception( |
| 66 'gs_url is set, so neither bucket nor path should be set') |
| 67 (self.bucket, self.path) = gs_utils.GSUtils.split_gs_url(gs_url) |
| 68 else: |
| 69 if not bucket: |
| 70 raise Exception('gs_url is not set, so bucket must be set') |
| 71 self.bucket = bucket |
| 72 self.path = path or '' |
| 73 |
| 74 def as_gs_url(self): |
| 75 """Returns a gs:// URL pointing at this object in Google Storage.""" |
| 76 url = 'gs://' + self.bucket |
| 77 if self.path: |
| 78 url += '/' + self.path |
| 79 return url |
| 80 |
| 81 def as_http_url(self): |
| 82 """Returns an http:// URL pointing at this object in Google Storage.""" |
| 83 url = 'http://%s.commondatastorage.googleapis.com' % self.bucket |
| 84 if self.path: |
| 85 url += '/' + self.path |
| 86 return url |
| 87 |
| 88 |
46 class DiffRecord(object): | 89 class DiffRecord(object): |
47 """ Record of differences between two images. """ | 90 """ Record of differences between two images. """ |
48 | 91 |
49 def __init__(self, storage_root, | 92 def __init__(self, gs, storage_root, |
50 expected_image_url, expected_image_locator, | 93 expected_image_url, expected_image_locator, |
51 actual_image_url, actual_image_locator, | 94 actual_image_url, actual_image_locator, |
52 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, | 95 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, |
53 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, | 96 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, |
54 image_suffix=DEFAULT_IMAGE_SUFFIX): | 97 image_suffix=DEFAULT_IMAGE_SUFFIX): |
55 """Download this pair of images (unless we already have them on local disk), | 98 """Download this pair of images (unless we already have them on local disk), |
56 and prepare a DiffRecord for them. | 99 and prepare a DiffRecord for them. |
57 | 100 |
58 TODO(epoger): Make this asynchronously download images, rather than blocking | 101 TODO(epoger): Make this asynchronously download images, rather than blocking |
59 until the images have been downloaded and processed. | 102 until the images have been downloaded and processed. |
60 | 103 |
61 Args: | 104 Args: |
| 105 gs: instance of GSUtils object we can use to download images |
62 storage_root: root directory on local disk within which we store all | 106 storage_root: root directory on local disk within which we store all |
63 images | 107 images |
64 expected_image_url: file or HTTP url from which we will download the | 108 expected_image_url: file, GS, or HTTP url from which we will download the |
65 expected image | 109 expected image |
66 expected_image_locator: a unique ID string under which we will store the | 110 expected_image_locator: a unique ID string under which we will store the |
67 expected image within storage_root (probably including a checksum to | 111 expected image within storage_root (probably including a checksum to |
68 guarantee uniqueness) | 112 guarantee uniqueness) |
69 actual_image_url: file or HTTP url from which we will download the | 113 actual_image_url: file, GS, or HTTP url from which we will download the |
70 actual image | 114 actual image |
71 actual_image_locator: a unique ID string under which we will store the | 115 actual_image_locator: a unique ID string under which we will store the |
72 actual image within storage_root (probably including a checksum to | 116 actual image within storage_root (probably including a checksum to |
73 guarantee uniqueness) | 117 guarantee uniqueness) |
74 expected_images_subdir: the subdirectory expected images are stored in. | 118 expected_images_subdir: the subdirectory expected images are stored in. |
75 actual_images_subdir: the subdirectory actual images are stored in. | 119 actual_images_subdir: the subdirectory actual images are stored in. |
76 image_suffix: the suffix of images. | 120 image_suffix: the suffix of images. |
77 """ | 121 """ |
78 expected_image_locator = _sanitize_locator(expected_image_locator) | 122 expected_image_locator = _sanitize_locator(expected_image_locator) |
79 actual_image_locator = _sanitize_locator(actual_image_locator) | 123 actual_image_locator = _sanitize_locator(actual_image_locator) |
80 | 124 |
81 # Download the expected/actual images, if we don't have them already. | 125 # Download the expected/actual images, if we don't have them already. |
82 # TODO(rmistry): Add a parameter that just tries to use already-present | 126 # TODO(rmistry): Add a parameter that just tries to use already-present |
83 # image files rather than downloading them. | 127 # image files rather than downloading them. |
84 expected_image_file = os.path.join( | 128 expected_image_file = os.path.join( |
85 storage_root, expected_images_subdir, | 129 storage_root, expected_images_subdir, |
86 str(expected_image_locator) + image_suffix) | 130 str(expected_image_locator) + image_suffix) |
87 actual_image_file = os.path.join( | 131 actual_image_file = os.path.join( |
88 storage_root, actual_images_subdir, | 132 storage_root, actual_images_subdir, |
89 str(actual_image_locator) + image_suffix) | 133 str(actual_image_locator) + image_suffix) |
90 try: | 134 try: |
91 _download_file(expected_image_file, expected_image_url) | 135 _download_file(gs, expected_image_file, expected_image_url) |
92 except Exception: | 136 except Exception: |
93 logging.exception('unable to download expected_image_url %s to file %s' % | 137 logging.exception('unable to download expected_image_url %s to file %s' % |
94 (expected_image_url, expected_image_file)) | 138 (expected_image_url, expected_image_file)) |
95 raise | 139 raise |
96 try: | 140 try: |
97 _download_file(actual_image_file, actual_image_url) | 141 _download_file(gs, actual_image_file, actual_image_url) |
98 except Exception: | 142 except Exception: |
99 logging.exception('unable to download actual_image_url %s to file %s' % | 143 logging.exception('unable to download actual_image_url %s to file %s' % |
100 (actual_image_url, actual_image_file)) | 144 (actual_image_url, actual_image_file)) |
101 raise | 145 raise |
102 | 146 |
103 # Get all diff images and values from skpdiff binary. | 147 # Get all diff images and values from skpdiff binary. |
104 skpdiff_output_dir = tempfile.mkdtemp() | 148 skpdiff_output_dir = tempfile.mkdtemp() |
105 try: | 149 try: |
106 skpdiff_summary_file = os.path.join(skpdiff_output_dir, | 150 skpdiff_summary_file = os.path.join(skpdiff_output_dir, |
107 'skpdiff-output.json') | 151 'skpdiff-output.json') |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
204 self.get_percent_pixels_differing(), | 248 self.get_percent_pixels_differing(), |
205 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, | 249 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, |
206 KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference, | 250 KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference, |
207 } | 251 } |
208 | 252 |
209 | 253 |
210 class ImageDiffDB(object): | 254 class ImageDiffDB(object): |
211 """ Calculates differences between image pairs, maintaining a database of | 255 """ Calculates differences between image pairs, maintaining a database of |
212 them for download.""" | 256 them for download.""" |
213 | 257 |
214 def __init__(self, storage_root): | 258 def __init__(self, storage_root, gs=None): |
215 """ | 259 """ |
216 Args: | 260 Args: |
217 storage_root: string; root path within the DB will store all of its stuff | 261 storage_root: string; root path within the DB will store all of its stuff |
| 262 gs: instance of GSUtils object we can use to download images |
218 """ | 263 """ |
219 self._storage_root = storage_root | 264 self._storage_root = storage_root |
| 265 self._gs = gs |
220 | 266 |
221 # Dictionary of DiffRecords, keyed by (expected_image_locator, | 267 # Dictionary of DiffRecords, keyed by (expected_image_locator, |
222 # actual_image_locator) tuples. | 268 # actual_image_locator) tuples. |
223 self._diff_dict = {} | 269 self._diff_dict = {} |
224 | 270 |
225 @property | 271 @property |
226 def storage_root(self): | 272 def storage_root(self): |
227 return self._storage_root | 273 return self._storage_root |
228 | 274 |
229 def add_image_pair(self, | 275 def add_image_pair(self, |
230 expected_image_url, expected_image_locator, | 276 expected_image_url, expected_image_locator, |
231 actual_image_url, actual_image_locator): | 277 actual_image_url, actual_image_locator): |
232 """Download this pair of images (unless we already have them on local disk), | 278 """Download this pair of images (unless we already have them on local disk), |
233 and prepare a DiffRecord for them. | 279 and prepare a DiffRecord for them. |
234 | 280 |
235 TODO(epoger): Make this asynchronously download images, rather than blocking | 281 TODO(epoger): Make this asynchronously download images, rather than blocking |
236 until the images have been downloaded and processed. | 282 until the images have been downloaded and processed. |
237 When we do that, we should probably add a new method that will block | 283 When we do that, we should probably add a new method that will block |
238 until all of the images have been downloaded and processed. Otherwise, | 284 until all of the images have been downloaded and processed. Otherwise, |
239 we won't know when it's safe to start calling get_diff_record(). | 285 we won't know when it's safe to start calling get_diff_record(). |
240 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a | 286 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a |
241 thread-pool/worker queue at a higher level that just uses ImageDiffDB? | 287 thread-pool/worker queue at a higher level that just uses ImageDiffDB? |
242 | 288 |
243 Args: | 289 Args: |
244 expected_image_url: file or HTTP url from which we will download the | 290 expected_image_url: file, GS, or HTTP url from which we will download the |
245 expected image | 291 expected image |
246 expected_image_locator: a unique ID string under which we will store the | 292 expected_image_locator: a unique ID string under which we will store the |
247 expected image within storage_root (probably including a checksum to | 293 expected image within storage_root (probably including a checksum to |
248 guarantee uniqueness) | 294 guarantee uniqueness) |
249 actual_image_url: file or HTTP url from which we will download the | 295 actual_image_url: file, GS, or HTTP url from which we will download the |
250 actual image | 296 actual image |
251 actual_image_locator: a unique ID string under which we will store the | 297 actual_image_locator: a unique ID string under which we will store the |
252 actual image within storage_root (probably including a checksum to | 298 actual image within storage_root (probably including a checksum to |
253 guarantee uniqueness) | 299 guarantee uniqueness) |
254 """ | 300 """ |
255 expected_image_locator = _sanitize_locator(expected_image_locator) | 301 expected_image_locator = _sanitize_locator(expected_image_locator) |
256 actual_image_locator = _sanitize_locator(actual_image_locator) | 302 actual_image_locator = _sanitize_locator(actual_image_locator) |
257 key = (expected_image_locator, actual_image_locator) | 303 key = (expected_image_locator, actual_image_locator) |
258 if not key in self._diff_dict: | 304 if not key in self._diff_dict: |
259 try: | 305 try: |
260 new_diff_record = DiffRecord( | 306 new_diff_record = DiffRecord( |
261 self._storage_root, | 307 self._gs, self._storage_root, |
262 expected_image_url=expected_image_url, | 308 expected_image_url=expected_image_url, |
263 expected_image_locator=expected_image_locator, | 309 expected_image_locator=expected_image_locator, |
264 actual_image_url=actual_image_url, | 310 actual_image_url=actual_image_url, |
265 actual_image_locator=actual_image_locator) | 311 actual_image_locator=actual_image_locator) |
266 except Exception: | 312 except Exception: |
267 # If we can't create a real DiffRecord for this (expected, actual) pair, | 313 # If we can't create a real DiffRecord for this (expected, actual) pair, |
268 # store None and the UI will show whatever information we DO have. | 314 # store None and the UI will show whatever information we DO have. |
269 # Fixes http://skbug.com/2368 . | 315 # Fixes http://skbug.com/2368 . |
270 logging.exception( | 316 logging.exception( |
271 'got exception while creating a DiffRecord for ' | 317 'got exception while creating a DiffRecord for ' |
272 'expected_image_url=%s , actual_image_url=%s; returning None' % ( | 318 'expected_image_url=%s , actual_image_url=%s; returning None' % ( |
273 expected_image_url, actual_image_url)) | 319 expected_image_url, actual_image_url)) |
274 new_diff_record = None | 320 new_diff_record = None |
275 self._diff_dict[key] = new_diff_record | 321 self._diff_dict[key] = new_diff_record |
276 | 322 |
277 def get_diff_record(self, expected_image_locator, actual_image_locator): | 323 def get_diff_record(self, expected_image_locator, actual_image_locator): |
278 """Returns the DiffRecord for this image pair. | 324 """Returns the DiffRecord for this image pair. |
279 | 325 |
280 Raises a KeyError if we don't have a DiffRecord for this image pair. | 326 Raises a KeyError if we don't have a DiffRecord for this image pair. |
281 """ | 327 """ |
282 key = (_sanitize_locator(expected_image_locator), | 328 key = (_sanitize_locator(expected_image_locator), |
283 _sanitize_locator(actual_image_locator)) | 329 _sanitize_locator(actual_image_locator)) |
284 return self._diff_dict[key] | 330 return self._diff_dict[key] |
285 | 331 |
286 | 332 |
287 # Utility functions | 333 # Utility functions |
288 | 334 |
289 def _download_file(local_filepath, url): | 335 def _download_file(gs, local_filepath, url): |
290 """Download a file from url to local_filepath, unless it is already there. | 336 """Download a file from url to local_filepath, unless it is already there. |
291 | 337 |
292 Args: | 338 Args: |
| 339 gs: instance of GSUtils object, in case the url points at Google Storage |
293 local_filepath: path on local disk where the image should be stored | 340 local_filepath: path on local disk where the image should be stored |
294 url: URL from which we can download the image if we don't have it yet | 341 url: HTTP or GS URL from which we can download the image if we don't have |
| 342 it yet |
295 """ | 343 """ |
296 if not os.path.exists(local_filepath): | 344 if not os.path.exists(local_filepath): |
297 _mkdir_unless_exists(os.path.dirname(local_filepath)) | 345 _mkdir_unless_exists(os.path.dirname(local_filepath)) |
298 with contextlib.closing(urllib.urlopen(url)) as url_handle: | 346 if gs_utils.GSUtils.is_gs_url(url): |
299 with open(local_filepath, 'wb') as file_handle: | 347 (bucket, path) = gs_utils.GSUtils.split_gs_url(url) |
300 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | 348 gs.download_file(source_bucket=bucket, source_path=path, |
| 349 dest_path=local_filepath) |
| 350 else: |
| 351 with contextlib.closing(urllib.urlopen(url)) as url_handle: |
| 352 with open(local_filepath, 'wb') as file_handle: |
| 353 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
301 | 354 |
302 | 355 |
303 def _mkdir_unless_exists(path): | 356 def _mkdir_unless_exists(path): |
304 """Unless path refers to an already-existing directory, create it. | 357 """Unless path refers to an already-existing directory, create it. |
305 | 358 |
306 Args: | 359 Args: |
307 path: path on local disk | 360 path: path on local disk |
308 """ | 361 """ |
309 if not os.path.isdir(path): | 362 if not os.path.isdir(path): |
310 os.makedirs(path) | 363 os.makedirs(path) |
(...skipping 18 matching lines...) Expand all Loading... |
329 | 382 |
330 Args: | 383 Args: |
331 expected_image_locator: locator string pointing at expected image | 384 expected_image_locator: locator string pointing at expected image |
332 actual_image_locator: locator string pointing at actual image | 385 actual_image_locator: locator string pointing at actual image |
333 | 386 |
334 Returns: already-sanitized locator where the diffs between expected and | 387 Returns: already-sanitized locator where the diffs between expected and |
335 actual images can be found | 388 actual images can be found |
336 """ | 389 """ |
337 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), | 390 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), |
338 _sanitize_locator(actual_image_locator)) | 391 _sanitize_locator(actual_image_locator)) |
OLD | NEW |