Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 """ | 3 """ |
| 4 Copyright 2013 Google Inc. | 4 Copyright 2013 Google Inc. |
| 5 | 5 |
| 6 Use of this source code is governed by a BSD-style license that can be | 6 Use of this source code is governed by a BSD-style license that can be |
| 7 found in the LICENSE file. | 7 found in the LICENSE file. |
| 8 | 8 |
| 9 Calulate differences between image pairs, and store them in a database. | 9 Calulate differences between image pairs, and store them in a database. |
| 10 """ | 10 """ |
| 11 | 11 |
| 12 # System-level imports | 12 # System-level imports |
| 13 import contextlib | 13 import contextlib |
| 14 import errno | |
| 14 import json | 15 import json |
| 15 import logging | 16 import logging |
| 16 import os | 17 import os |
| 18 import Queue | |
| 17 import re | 19 import re |
| 18 import shutil | 20 import shutil |
| 19 import tempfile | 21 import tempfile |
| 22 import threading | |
| 23 import time | |
| 20 import urllib | 24 import urllib |
| 21 | 25 |
| 22 # Must fix up PYTHONPATH before importing from within Skia | 26 # Must fix up PYTHONPATH before importing from within Skia |
| 23 import fix_pythonpath # pylint: disable=W0611 | 27 import fix_pythonpath # pylint: disable=W0611 |
| 24 | 28 |
| 25 # Imports from within Skia | 29 # Imports from within Skia |
| 26 import find_run_binary | 30 import find_run_binary |
| 31 from py.utils import gs_utils | |
| 32 | |
| 27 | 33 |
| 28 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') | 34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') |
| 29 | 35 |
| 30 DEFAULT_IMAGE_SUFFIX = '.png' | 36 DEFAULT_IMAGE_SUFFIX = '.png' |
| 31 DEFAULT_IMAGES_SUBDIR = 'images' | 37 DEFAULT_IMAGES_SUBDIR = 'images' |
| 38 # TODO(epoger): Using a conservative default number of threads, to avoid | |
| 39 # the "too many open files" bug we saw in http://skbug.com/2423 | |
|
rmistry
2014/08/04 20:52:49
Is this a TODO or a comment?
epoger
2014/08/05 03:34:08
Done.
| |
| 40 DEFAULT_NUM_WORKER_THREADS = 1 | |
| 32 | 41 |
| 33 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') | 42 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') |
| 34 | 43 |
| 35 RGBDIFFS_SUBDIR = 'diffs' | 44 RGBDIFFS_SUBDIR = 'diffs' |
| 36 WHITEDIFFS_SUBDIR = 'whitediffs' | 45 WHITEDIFFS_SUBDIR = 'whitediffs' |
| 37 | 46 |
| 38 # Keys used within DiffRecord dictionary representations. | 47 # Keys used within DiffRecord dictionary representations. |
| 39 # NOTE: Keep these in sync with static/constants.js | 48 # NOTE: Keep these in sync with static/constants.js |
| 40 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' | 49 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' |
| 41 KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels' | 50 KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels' |
| 42 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' | 51 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' |
| 43 KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference' | 52 KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference' |
| 44 | 53 |
| 54 # Special values within ImageDiffDB._diff_dict | |
| 55 _DIFFRECORD_FAILED = 'failed' | |
| 56 _DIFFRECORD_PENDING = 'pending' | |
| 57 | |
| 58 # TODO(epoger): Temporary variable to keep track of how many times we download | |
| 59 # the same file in multiple threads. | |
|
rmistry
2014/08/04 20:52:49
TODO or comment?
epoger
2014/08/05 03:34:08
Done.
| |
| 60 global_file_collisions = 0 | |
| 61 | |
| 45 | 62 |
| 46 class DiffRecord(object): | 63 class DiffRecord(object): |
| 47 """ Record of differences between two images. """ | 64 """ Record of differences between two images. """ |
| 48 | 65 |
| 49 def __init__(self, storage_root, | 66 def __init__(self, gs, storage_root, |
| 50 expected_image_url, expected_image_locator, | 67 expected_image_url, expected_image_locator, |
| 51 actual_image_url, actual_image_locator, | 68 actual_image_url, actual_image_locator, |
| 52 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, | 69 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, |
| 53 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, | 70 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, |
| 54 image_suffix=DEFAULT_IMAGE_SUFFIX): | 71 image_suffix=DEFAULT_IMAGE_SUFFIX): |
| 55 """Download this pair of images (unless we already have them on local disk), | 72 """Download this pair of images (unless we already have them on local disk), |
| 56 and prepare a DiffRecord for them. | 73 and prepare a DiffRecord for them. |
| 57 | 74 |
| 58 TODO(epoger): Make this asynchronously download images, rather than blocking | |
| 59 until the images have been downloaded and processed. | |
| 60 | |
| 61 Args: | 75 Args: |
| 76 gs: instance of GSUtils object we can use to download images | |
| 62 storage_root: root directory on local disk within which we store all | 77 storage_root: root directory on local disk within which we store all |
| 63 images | 78 images |
| 64 expected_image_url: file or HTTP url from which we will download the | 79 expected_image_url: file, GS, or HTTP url from which we will download the |
| 65 expected image | 80 expected image |
| 66 expected_image_locator: a unique ID string under which we will store the | 81 expected_image_locator: a unique ID string under which we will store the |
| 67 expected image within storage_root (probably including a checksum to | 82 expected image within storage_root (probably including a checksum to |
| 68 guarantee uniqueness) | 83 guarantee uniqueness) |
| 69 actual_image_url: file or HTTP url from which we will download the | 84 actual_image_url: file, GS, or HTTP url from which we will download the |
| 70 actual image | 85 actual image |
| 71 actual_image_locator: a unique ID string under which we will store the | 86 actual_image_locator: a unique ID string under which we will store the |
| 72 actual image within storage_root (probably including a checksum to | 87 actual image within storage_root (probably including a checksum to |
| 73 guarantee uniqueness) | 88 guarantee uniqueness) |
| 74 expected_images_subdir: the subdirectory expected images are stored in. | 89 expected_images_subdir: the subdirectory expected images are stored in. |
| 75 actual_images_subdir: the subdirectory actual images are stored in. | 90 actual_images_subdir: the subdirectory actual images are stored in. |
| 76 image_suffix: the suffix of images. | 91 image_suffix: the suffix of images. |
| 77 """ | 92 """ |
| 78 expected_image_locator = _sanitize_locator(expected_image_locator) | 93 expected_image_locator = _sanitize_locator(expected_image_locator) |
| 79 actual_image_locator = _sanitize_locator(actual_image_locator) | 94 actual_image_locator = _sanitize_locator(actual_image_locator) |
| 80 | 95 |
| 81 # Download the expected/actual images, if we don't have them already. | 96 # Download the expected/actual images, if we don't have them already. |
| 82 # TODO(rmistry): Add a parameter that just tries to use already-present | |
| 83 # image files rather than downloading them. | |
| 84 expected_image_file = os.path.join( | 97 expected_image_file = os.path.join( |
| 85 storage_root, expected_images_subdir, | 98 storage_root, expected_images_subdir, |
| 86 str(expected_image_locator) + image_suffix) | 99 str(expected_image_locator) + image_suffix) |
| 87 actual_image_file = os.path.join( | 100 actual_image_file = os.path.join( |
| 88 storage_root, actual_images_subdir, | 101 storage_root, actual_images_subdir, |
| 89 str(actual_image_locator) + image_suffix) | 102 str(actual_image_locator) + image_suffix) |
| 90 try: | 103 try: |
| 91 _download_file(expected_image_file, expected_image_url) | 104 _download_file(gs, expected_image_file, expected_image_url) |
| 92 except Exception: | 105 except Exception: |
| 93 logging.exception('unable to download expected_image_url %s to file %s' % | 106 logging.exception('unable to download expected_image_url %s to file %s' % |
| 94 (expected_image_url, expected_image_file)) | 107 (expected_image_url, expected_image_file)) |
| 95 raise | 108 raise |
| 96 try: | 109 try: |
| 97 _download_file(actual_image_file, actual_image_url) | 110 _download_file(gs, actual_image_file, actual_image_url) |
| 98 except Exception: | 111 except Exception: |
| 99 logging.exception('unable to download actual_image_url %s to file %s' % | 112 logging.exception('unable to download actual_image_url %s to file %s' % |
| 100 (actual_image_url, actual_image_file)) | 113 (actual_image_url, actual_image_file)) |
| 101 raise | 114 raise |
| 102 | 115 |
| 103 # Get all diff images and values from skpdiff binary. | 116 # Get all diff images and values from skpdiff binary. |
| 104 skpdiff_output_dir = tempfile.mkdtemp() | 117 skpdiff_output_dir = tempfile.mkdtemp() |
| 105 try: | 118 try: |
| 106 skpdiff_summary_file = os.path.join(skpdiff_output_dir, | 119 skpdiff_summary_file = os.path.join(skpdiff_output_dir, |
| 107 'skpdiff-output.json') | 120 'skpdiff-output.json') |
| 108 skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff') | 121 skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff') |
| 109 skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff') | 122 skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff') |
| 110 expected_img = os.path.join(storage_root, expected_images_subdir, | 123 expected_img = os.path.join(storage_root, expected_images_subdir, |
| 111 str(expected_image_locator) + image_suffix) | 124 str(expected_image_locator) + image_suffix) |
| 112 actual_img = os.path.join(storage_root, actual_images_subdir, | 125 actual_img = os.path.join(storage_root, actual_images_subdir, |
| 113 str(actual_image_locator) + image_suffix) | 126 str(actual_image_locator) + image_suffix) |
| 114 | 127 |
| 115 # TODO: Call skpdiff ONCE for all image pairs, instead of calling it | 128 # TODO(epoger): Consider calling skpdiff ONCE for all image pairs, |
| 116 # repeatedly. This will allow us to parallelize a lot more work. | 129 # instead of calling it separately for each image pair. |
| 130 # Pro: we'll incur less overhead from making repeated system calls, | |
| 131 # spinning up the skpdiff binary, etc. | |
| 132 # Con: we would have to wait until all image pairs were loaded before | |
| 133 # generating any of the diffs? | |
| 117 find_run_binary.run_command( | 134 find_run_binary.run_command( |
| 118 [SKPDIFF_BINARY, '-p', expected_img, actual_img, | 135 [SKPDIFF_BINARY, '-p', expected_img, actual_img, |
| 119 '--jsonp', 'false', | 136 '--jsonp', 'false', |
| 120 '--output', skpdiff_summary_file, | 137 '--output', skpdiff_summary_file, |
| 121 '--differs', 'perceptual', 'different_pixels', | 138 '--differs', 'perceptual', 'different_pixels', |
| 122 '--rgbDiffDir', skpdiff_rgbdiff_dir, | 139 '--rgbDiffDir', skpdiff_rgbdiff_dir, |
| 123 '--whiteDiffDir', skpdiff_whitediff_dir, | 140 '--whiteDiffDir', skpdiff_whitediff_dir, |
| 124 ]) | 141 ]) |
| 125 | 142 |
| 126 # Get information out of the skpdiff_summary_file. | 143 # Get information out of the skpdiff_summary_file. |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 204 self.get_percent_pixels_differing(), | 221 self.get_percent_pixels_differing(), |
| 205 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, | 222 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, |
| 206 KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference, | 223 KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference, |
| 207 } | 224 } |
| 208 | 225 |
| 209 | 226 |
| 210 class ImageDiffDB(object): | 227 class ImageDiffDB(object): |
| 211 """ Calculates differences between image pairs, maintaining a database of | 228 """ Calculates differences between image pairs, maintaining a database of |
| 212 them for download.""" | 229 them for download.""" |
| 213 | 230 |
| 214 def __init__(self, storage_root): | 231 def __init__(self, storage_root, gs=None, |
| 232 num_worker_threads=DEFAULT_NUM_WORKER_THREADS): | |
| 215 """ | 233 """ |
| 216 Args: | 234 Args: |
| 217 storage_root: string; root path within the DB will store all of its stuff | 235 storage_root: string; root path within the DB will store all of its stuff |
| 236 gs: instance of GSUtils object we can use to download images | |
| 237 num_worker_threads: how many threads that download images and | |
| 238 generate diffs simultaneously | |
| 218 """ | 239 """ |
| 219 self._storage_root = storage_root | 240 self._storage_root = storage_root |
| 241 self._gs = gs | |
| 220 | 242 |
| 221 # Dictionary of DiffRecords, keyed by (expected_image_locator, | 243 # Dictionary of DiffRecords, keyed by (expected_image_locator, |
| 222 # actual_image_locator) tuples. | 244 # actual_image_locator) tuples. |
| 245 # Values can also be _DIFFRECORD_PENDING, _DIFFRECORD_FAILED. | |
| 246 # | |
| 247 # Any thread that modifies _diff_dict must first acquire | |
| 248 # _diff_dict_writelock! | |
| 249 # | |
| 250 # TODO(epoger): Disk is limitless, but RAM is not... so, we should probably | |
| 251 # remove items from self._diff_dict if they haven't been accessed for a | |
| 252 # long time. We can always regenerate them by diffing the images we | |
| 253 # previously downloaded to local disk. | |
| 254 # I guess we should figure out how expensive it is to download vs diff the | |
| 255 # image pairs... if diffing them is expensive too, we can write these | |
| 256 # _diff_dict objects out to disk if there's too many to hold in RAM. | |
| 257 # Or we could use virtual memory to handle that automatically. | |
| 223 self._diff_dict = {} | 258 self._diff_dict = {} |
| 259 self._diff_dict_writelock = threading.RLock() | |
| 260 | |
| 261 # Set up the queue for asynchronously loading DiffRecords, and start the | |
| 262 # worker threads reading from it. | |
| 263 self._tasks_queue = Queue.Queue(maxsize=2*num_worker_threads) | |
| 264 self._workers = [] | |
| 265 for i in range(num_worker_threads): | |
| 266 worker = threading.Thread(target=self.worker, args=(i,)) | |
| 267 worker.daemon = True | |
| 268 worker.start() | |
| 269 self._workers.append(worker) | |
| 270 | |
| 271 def worker(self, worker_num): | |
| 272 """Launch a worker thread that pulls tasks off self._tasks_queue. | |
| 273 | |
| 274 Args: | |
| 275 worker_num: (integer) which worker this is | |
| 276 """ | |
| 277 while True: | |
| 278 params = self._tasks_queue.get() | |
| 279 key, expected_image_url, actual_image_url = params | |
| 280 try: | |
| 281 diff_record = DiffRecord( | |
| 282 self._gs, self._storage_root, | |
| 283 expected_image_url=expected_image_url, | |
| 284 expected_image_locator=key[0], | |
| 285 actual_image_url=actual_image_url, | |
| 286 actual_image_locator=key[1]) | |
| 287 except Exception: | |
| 288 logging.exception( | |
| 289 'exception while creating DiffRecord for key %s' % str(key)) | |
| 290 diff_record = _DIFFRECORD_FAILED | |
| 291 self._diff_dict_writelock.acquire() | |
| 292 try: | |
| 293 self._diff_dict[key] = diff_record | |
| 294 finally: | |
| 295 self._diff_dict_writelock.release() | |
| 224 | 296 |
| 225 @property | 297 @property |
| 226 def storage_root(self): | 298 def storage_root(self): |
| 227 return self._storage_root | 299 return self._storage_root |
| 228 | 300 |
| 229 def add_image_pair(self, | 301 def add_image_pair(self, |
| 230 expected_image_url, expected_image_locator, | 302 expected_image_url, expected_image_locator, |
| 231 actual_image_url, actual_image_locator): | 303 actual_image_url, actual_image_locator): |
| 232 """Download this pair of images (unless we already have them on local disk), | 304 """Asynchronously prepare a DiffRecord for a pair of images. |
| 233 and prepare a DiffRecord for them. | |
| 234 | 305 |
| 235 TODO(epoger): Make this asynchronously download images, rather than blocking | 306 This method will return quickly; calls to get_diff_record() will block |
| 236 until the images have been downloaded and processed. | 307 until the DiffRecord is available (or we have given up on creating it). |
| 237 When we do that, we should probably add a new method that will block | 308 |
| 238 until all of the images have been downloaded and processed. Otherwise, | 309 If we already have a DiffRecord for this particular image pair, no work |
| 239 we won't know when it's safe to start calling get_diff_record(). | 310 will be done. |
| 240 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a | |
| 241 thread-pool/worker queue at a higher level that just uses ImageDiffDB? | |
| 242 | 311 |
| 243 Args: | 312 Args: |
| 244 expected_image_url: file or HTTP url from which we will download the | 313 expected_image_url: file, GS, or HTTP url from which we will download the |
| 245 expected image | 314 expected image |
| 246 expected_image_locator: a unique ID string under which we will store the | 315 expected_image_locator: a unique ID string under which we will store the |
| 247 expected image within storage_root (probably including a checksum to | 316 expected image within storage_root (probably including a checksum to |
| 248 guarantee uniqueness) | 317 guarantee uniqueness) |
| 249 actual_image_url: file or HTTP url from which we will download the | 318 actual_image_url: file, GS, or HTTP url from which we will download the |
| 250 actual image | 319 actual image |
| 251 actual_image_locator: a unique ID string under which we will store the | 320 actual_image_locator: a unique ID string under which we will store the |
| 252 actual image within storage_root (probably including a checksum to | 321 actual image within storage_root (probably including a checksum to |
| 253 guarantee uniqueness) | 322 guarantee uniqueness) |
| 254 """ | 323 """ |
| 255 expected_image_locator = _sanitize_locator(expected_image_locator) | 324 expected_image_locator = _sanitize_locator(expected_image_locator) |
| 256 actual_image_locator = _sanitize_locator(actual_image_locator) | 325 actual_image_locator = _sanitize_locator(actual_image_locator) |
| 257 key = (expected_image_locator, actual_image_locator) | 326 key = (expected_image_locator, actual_image_locator) |
| 258 if not key in self._diff_dict: | 327 must_add_to_queue = False |
| 259 try: | 328 |
| 260 new_diff_record = DiffRecord( | 329 self._diff_dict_writelock.acquire() |
| 261 self._storage_root, | 330 try: |
| 262 expected_image_url=expected_image_url, | 331 if not key in self._diff_dict: |
| 263 expected_image_locator=expected_image_locator, | 332 # If we have already requested a diff between these two images, |
| 264 actual_image_url=actual_image_url, | 333 # we don't need to request it again. |
| 265 actual_image_locator=actual_image_locator) | 334 must_add_to_queue = True |
| 266 except Exception: | 335 self._diff_dict[key] = _DIFFRECORD_PENDING |
| 267 # If we can't create a real DiffRecord for this (expected, actual) pair, | 336 finally: |
| 268 # store None and the UI will show whatever information we DO have. | 337 self._diff_dict_writelock.release() |
| 269 # Fixes http://skbug.com/2368 . | 338 |
| 270 logging.exception( | 339 if must_add_to_queue: |
| 271 'got exception while creating a DiffRecord for ' | 340 self._tasks_queue.put((key, expected_image_url, actual_image_url)) |
| 272 'expected_image_url=%s , actual_image_url=%s; returning None' % ( | |
| 273 expected_image_url, actual_image_url)) | |
| 274 new_diff_record = None | |
| 275 self._diff_dict[key] = new_diff_record | |
| 276 | 341 |
| 277 def get_diff_record(self, expected_image_locator, actual_image_locator): | 342 def get_diff_record(self, expected_image_locator, actual_image_locator): |
| 278 """Returns the DiffRecord for this image pair. | 343 """Returns the DiffRecord for this image pair. |
| 279 | 344 |
| 280 Raises a KeyError if we don't have a DiffRecord for this image pair. | 345 This call will block until the diff record is available, or we were unable |
| 346 to generate it. | |
| 347 | |
| 348 Args: | |
| 349 expected_image_locator: a unique ID string under which we will store the | |
| 350 expected image within storage_root (probably including a checksum to | |
| 351 guarantee uniqueness) | |
| 352 actual_image_locator: a unique ID string under which we will store the | |
| 353 actual image within storage_root (probably including a checksum to | |
| 354 guarantee uniqueness) | |
| 355 | |
| 356 Returns the DiffRecord for this image pair, or None if we were unable to | |
| 357 generate one. | |
| 281 """ | 358 """ |
| 282 key = (_sanitize_locator(expected_image_locator), | 359 key = (_sanitize_locator(expected_image_locator), |
| 283 _sanitize_locator(actual_image_locator)) | 360 _sanitize_locator(actual_image_locator)) |
| 284 return self._diff_dict[key] | 361 diff_record = self._diff_dict[key] |
| 362 | |
| 363 # If we have no results yet, block until we do. | |
| 364 while diff_record == _DIFFRECORD_PENDING: | |
| 365 time.sleep(1) | |
| 366 diff_record = self._diff_dict[key] | |
| 367 | |
| 368 # Once we have the result... | |
| 369 if diff_record == _DIFFRECORD_FAILED: | |
| 370 logging.error( | |
| 371 'failed to create a DiffRecord for expected_image_locator=%s , ' | |
| 372 'actual_image_locator=%s' % ( | |
| 373 expected_image_locator, actual_image_locator)) | |
| 374 return None | |
| 375 else: | |
| 376 return diff_record | |
| 285 | 377 |
| 286 | 378 |
| 287 # Utility functions | 379 # Utility functions |
| 288 | 380 |
| 289 def _download_file(local_filepath, url): | 381 def _download_file(gs, local_filepath, url): |
| 290 """Download a file from url to local_filepath, unless it is already there. | 382 """Download a file from url to local_filepath, unless it is already there. |
| 291 | 383 |
| 292 Args: | 384 Args: |
| 385 gs: instance of GSUtils object, in case the url points at Google Storage | |
| 293 local_filepath: path on local disk where the image should be stored | 386 local_filepath: path on local disk where the image should be stored |
| 294 url: URL from which we can download the image if we don't have it yet | 387 url: HTTP or GS URL from which we can download the image if we don't have |
| 388 it yet | |
| 295 """ | 389 """ |
| 390 global global_file_collisions | |
| 296 if not os.path.exists(local_filepath): | 391 if not os.path.exists(local_filepath): |
| 297 _mkdir_unless_exists(os.path.dirname(local_filepath)) | 392 _mkdir_unless_exists(os.path.dirname(local_filepath)) |
| 298 with contextlib.closing(urllib.urlopen(url)) as url_handle: | 393 |
| 299 with open(local_filepath, 'wb') as file_handle: | 394 # First download the file contents into a unique filename, and |
| 300 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | 395 # then rename that file. That way, if multiple threads are downloading |
| 396 # the same filename at the same time, they won't interfere with each | |
| 397 # other (they will both download the file, and one will "win" in the end) | |
| 398 temp_filename = '%s-%d' % (local_filepath, | |
| 399 threading.current_thread().ident) | |
| 400 if gs_utils.GSUtils.is_gs_url(url): | |
| 401 (bucket, path) = gs_utils.GSUtils.split_gs_url(url) | |
| 402 gs.download_file(source_bucket=bucket, source_path=path, | |
| 403 dest_path=temp_filename) | |
| 404 else: | |
| 405 with contextlib.closing(urllib.urlopen(url)) as url_handle: | |
| 406 with open(temp_filename, 'wb') as file_handle: | |
| 407 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | |
| 408 | |
| 409 # Rename the file to its real filename. | |
| 410 # Keep count of how many colliding downloads we encounter; | |
| 411 # if it's a large number, we may want to change our download strategy | |
| 412 # to minimize repeated downloads. | |
| 413 if os.path.exists(local_filepath): | |
| 414 global_file_collisions += 1 | |
| 415 else: | |
| 416 os.rename(temp_filename, local_filepath) | |
| 301 | 417 |
| 302 | 418 |
| 303 def _mkdir_unless_exists(path): | 419 def _mkdir_unless_exists(path): |
| 304 """Unless path refers to an already-existing directory, create it. | 420 """Unless path refers to an already-existing directory, create it. |
| 305 | 421 |
| 306 Args: | 422 Args: |
| 307 path: path on local disk | 423 path: path on local disk |
| 308 """ | 424 """ |
| 309 if not os.path.isdir(path): | 425 try: |
| 310 os.makedirs(path) | 426 os.makedirs(path) |
| 427 except OSError as e: | |
| 428 if e.errno == errno.EEXIST: | |
| 429 pass | |
| 311 | 430 |
| 312 | 431 |
| 313 def _sanitize_locator(locator): | 432 def _sanitize_locator(locator): |
| 314 """Returns a sanitized version of a locator (one in which we know none of the | 433 """Returns a sanitized version of a locator (one in which we know none of the |
| 315 characters will have special meaning in filenames). | 434 characters will have special meaning in filenames). |
| 316 | 435 |
| 317 Args: | 436 Args: |
| 318 locator: string, or something that can be represented as a string | 437 locator: string, or something that can be represented as a string |
| 319 """ | 438 """ |
| 320 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) | 439 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) |
| 321 | 440 |
| 322 | 441 |
| 323 def _get_difference_locator(expected_image_locator, actual_image_locator): | 442 def _get_difference_locator(expected_image_locator, actual_image_locator): |
| 324 """Returns the locator string used to look up the diffs between expected_image | 443 """Returns the locator string used to look up the diffs between expected_image |
| 325 and actual_image. | 444 and actual_image. |
| 326 | 445 |
| 327 We must keep this function in sync with getImageDiffRelativeUrl() in | 446 We must keep this function in sync with getImageDiffRelativeUrl() in |
| 328 static/loader.js | 447 static/loader.js |
| 329 | 448 |
| 330 Args: | 449 Args: |
| 331 expected_image_locator: locator string pointing at expected image | 450 expected_image_locator: locator string pointing at expected image |
| 332 actual_image_locator: locator string pointing at actual image | 451 actual_image_locator: locator string pointing at actual image |
| 333 | 452 |
| 334 Returns: already-sanitized locator where the diffs between expected and | 453 Returns: already-sanitized locator where the diffs between expected and |
| 335 actual images can be found | 454 actual images can be found |
| 336 """ | 455 """ |
| 337 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), | 456 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), |
| 338 _sanitize_locator(actual_image_locator)) | 457 _sanitize_locator(actual_image_locator)) |
| OLD | NEW |