OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 """ | 3 """ |
4 Copyright 2013 Google Inc. | 4 Copyright 2013 Google Inc. |
5 | 5 |
6 Use of this source code is governed by a BSD-style license that can be | 6 Use of this source code is governed by a BSD-style license that can be |
7 found in the LICENSE file. | 7 found in the LICENSE file. |
8 | 8 |
9 Calulate differences between image pairs, and store them in a database. | 9 Calulate differences between image pairs, and store them in a database. |
10 """ | 10 """ |
11 | 11 |
12 import contextlib | 12 import contextlib |
13 import csv | 13 import csv |
14 import errno | |
15 import logging | 14 import logging |
16 import Queue | |
17 import os | 15 import os |
18 import re | 16 import re |
19 import shutil | 17 import shutil |
20 import sys | 18 import sys |
21 import tempfile | 19 import tempfile |
22 import time | |
23 import threading | |
24 import urllib | 20 import urllib |
25 try: | 21 try: |
26 from PIL import Image, ImageChops | 22 from PIL import Image, ImageChops |
27 except ImportError: | 23 except ImportError: |
28 raise ImportError('Requires PIL to be installed; see ' | 24 raise ImportError('Requires PIL to be installed; see ' |
29 + 'http://www.pythonware.com/products/pil/') | 25 + 'http://www.pythonware.com/products/pil/') |
30 | 26 |
31 # Set the PYTHONPATH to include the tools directory. | 27 # Set the PYTHONPATH to include the tools directory. |
32 sys.path.append( | 28 sys.path.append( |
33 os.path.join( | 29 os.path.join( |
34 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, | 30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, |
35 'tools')) | 31 'tools')) |
36 import find_run_binary | 32 import find_run_binary |
37 | 33 |
38 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') | 34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') |
39 | 35 |
40 DEFAULT_IMAGE_SUFFIX = '.png' | 36 DEFAULT_IMAGE_SUFFIX = '.png' |
41 DEFAULT_IMAGES_SUBDIR = 'images' | 37 DEFAULT_IMAGES_SUBDIR = 'images' |
42 DEFAULT_NUM_WORKERS = 8 | |
43 | 38 |
44 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') | 39 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') |
45 | 40 |
46 DIFFS_SUBDIR = 'diffs' | 41 DIFFS_SUBDIR = 'diffs' |
47 WHITEDIFFS_SUBDIR = 'whitediffs' | 42 WHITEDIFFS_SUBDIR = 'whitediffs' |
48 | 43 |
49 VALUES_PER_BAND = 256 | 44 VALUES_PER_BAND = 256 |
50 | 45 |
51 # Keys used within DiffRecord dictionary representations. | 46 # Keys used within DiffRecord dictionary representations. |
52 # NOTE: Keep these in sync with static/constants.js | 47 # NOTE: Keep these in sync with static/constants.js |
53 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' | 48 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' |
54 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels' | 49 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels' |
55 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' | 50 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' |
56 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference' | 51 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference' |
57 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure' | 52 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure' |
58 | 53 |
59 # Special values within ImageDiffDB._diff_dict | |
60 DIFFRECORD_FAILED = 'failed' | |
61 DIFFRECORD_PENDING = 'pending' | |
62 | |
63 # TODO(epoger): Temporary(?) list to keep track of how many times we download | |
64 # the same file in multiple threads. | |
65 global_file_collisions = 0 | |
66 | |
67 | 54 |
68 class DiffRecord(object): | 55 class DiffRecord(object): |
69 """ Record of differences between two images. """ | 56 """ Record of differences between two images. """ |
70 | 57 |
71 def __init__(self, storage_root, | 58 def __init__(self, storage_root, |
72 expected_image_url, expected_image_locator, | 59 expected_image_url, expected_image_locator, |
73 actual_image_url, actual_image_locator, | 60 actual_image_url, actual_image_locator, |
74 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, | 61 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, |
75 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, | 62 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, |
76 image_suffix=DEFAULT_IMAGE_SUFFIX): | 63 image_suffix=DEFAULT_IMAGE_SUFFIX): |
77 """Download this pair of images (unless we already have them on local disk), | 64 """Download this pair of images (unless we already have them on local disk), |
78 and prepare a DiffRecord for them. | 65 and prepare a DiffRecord for them. |
79 | 66 |
| 67 TODO(epoger): Make this asynchronously download images, rather than blocking |
| 68 until the images have been downloaded and processed. |
| 69 |
80 Args: | 70 Args: |
81 storage_root: root directory on local disk within which we store all | 71 storage_root: root directory on local disk within which we store all |
82 images | 72 images |
83 expected_image_url: file or HTTP url from which we will download the | 73 expected_image_url: file or HTTP url from which we will download the |
84 expected image | 74 expected image |
85 expected_image_locator: a unique ID string under which we will store the | 75 expected_image_locator: a unique ID string under which we will store the |
86 expected image within storage_root (probably including a checksum to | 76 expected image within storage_root (probably including a checksum to |
87 guarantee uniqueness) | 77 guarantee uniqueness) |
88 actual_image_url: file or HTTP url from which we will download the | 78 actual_image_url: file or HTTP url from which we will download the |
89 actual image | 79 actual image |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
222 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(), | 212 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(), |
223 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, | 213 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, |
224 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference, | 214 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference, |
225 } | 215 } |
226 | 216 |
227 | 217 |
228 class ImageDiffDB(object): | 218 class ImageDiffDB(object): |
229 """ Calculates differences between image pairs, maintaining a database of | 219 """ Calculates differences between image pairs, maintaining a database of |
230 them for download.""" | 220 them for download.""" |
231 | 221 |
232 def __init__(self, storage_root, num_workers=DEFAULT_NUM_WORKERS): | 222 def __init__(self, storage_root): |
233 """ | 223 """ |
234 Args: | 224 Args: |
235 storage_root: string; root path within the DB will store all of its stuff | 225 storage_root: string; root path within the DB will store all of its stuff |
236 num_workers: integer; number of worker threads to spawn | |
237 """ | 226 """ |
238 self._storage_root = storage_root | 227 self._storage_root = storage_root |
239 | 228 |
240 # Dictionary of DiffRecords, keyed by (expected_image_locator, | 229 # Dictionary of DiffRecords, keyed by (expected_image_locator, |
241 # actual_image_locator) tuples. | 230 # actual_image_locator) tuples. |
242 # Values can also be DIFFRECORD_PENDING, DIFFRECORD_FAILED. | |
243 self._diff_dict = {} | 231 self._diff_dict = {} |
244 | 232 |
245 # Set up the queue for asynchronously loading DiffRecords, and start the | |
246 # worker threads reading from it. | |
247 self._tasks_queue = Queue.Queue(maxsize=2*num_workers) | |
248 self._workers = [] | |
249 for i in range(num_workers): | |
250 worker = threading.Thread(target=self.worker, args=(i,)) | |
251 worker.daemon = True | |
252 worker.start() | |
253 self._workers.append(worker) | |
254 | |
255 def worker(self, worker_num): | |
256 """Launch a worker thread that pulls tasks off self._tasks_queue. | |
257 | |
258 Args: | |
259 worker_num: (integer) which worker this is | |
260 """ | |
261 while True: | |
262 params = self._tasks_queue.get() | |
263 key, expected_image_url, actual_image_url = params | |
264 try: | |
265 diff_record = DiffRecord( | |
266 self._storage_root, | |
267 expected_image_url=expected_image_url, | |
268 expected_image_locator=key[0], | |
269 actual_image_url=actual_image_url, | |
270 actual_image_locator=key[1]) | |
271 except Exception: | |
272 logging.exception( | |
273 'exception while creating DiffRecord for key %s' % str(key)) | |
274 diff_record = DIFFRECORD_FAILED | |
275 self._diff_dict[key] = diff_record | |
276 | |
277 def add_image_pair(self, | 233 def add_image_pair(self, |
278 expected_image_url, expected_image_locator, | 234 expected_image_url, expected_image_locator, |
279 actual_image_url, actual_image_locator): | 235 actual_image_url, actual_image_locator): |
280 """Download this pair of images (unless we already have them on local disk), | 236 """Download this pair of images (unless we already have them on local disk), |
281 and prepare a DiffRecord for them. | 237 and prepare a DiffRecord for them. |
282 | 238 |
283 This method will block until the images are downloaded and DiffRecord is | 239 TODO(epoger): Make this asynchronously download images, rather than blocking |
284 available by calling get_diff_record(). | 240 until the images have been downloaded and processed. |
| 241 When we do that, we should probably add a new method that will block |
| 242 until all of the images have been downloaded and processed. Otherwise, |
| 243 we won't know when it's safe to start calling get_diff_record(). |
| 244 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a |
| 245 thread-pool/worker queue at a higher level that just uses ImageDiffDB? |
285 | 246 |
286 Args: | 247 Args: |
287 expected_image_url: file or HTTP url from which we will download the | 248 expected_image_url: file or HTTP url from which we will download the |
288 expected image | 249 expected image |
289 expected_image_locator: a unique ID string under which we will store the | 250 expected_image_locator: a unique ID string under which we will store the |
290 expected image within storage_root (probably including a checksum to | 251 expected image within storage_root (probably including a checksum to |
291 guarantee uniqueness) | 252 guarantee uniqueness) |
292 actual_image_url: file or HTTP url from which we will download the | 253 actual_image_url: file or HTTP url from which we will download the |
293 actual image | 254 actual image |
294 actual_image_locator: a unique ID string under which we will store the | 255 actual_image_locator: a unique ID string under which we will store the |
295 actual image within storage_root (probably including a checksum to | 256 actual image within storage_root (probably including a checksum to |
296 guarantee uniqueness) | 257 guarantee uniqueness) |
297 | |
298 Raises: | |
299 Exception if we are unable to create a DiffRecord for this image pair. | |
300 """ | 258 """ |
301 key = _generate_key(expected_image_locator, actual_image_locator) | 259 expected_image_locator = _sanitize_locator(expected_image_locator) |
| 260 actual_image_locator = _sanitize_locator(actual_image_locator) |
| 261 key = (expected_image_locator, actual_image_locator) |
302 if not key in self._diff_dict: | 262 if not key in self._diff_dict: |
303 try: | 263 try: |
304 new_diff_record = DiffRecord( | 264 new_diff_record = DiffRecord( |
305 self._storage_root, | 265 self._storage_root, |
306 expected_image_url=expected_image_url, | 266 expected_image_url=expected_image_url, |
307 expected_image_locator=expected_image_locator, | 267 expected_image_locator=expected_image_locator, |
308 actual_image_url=actual_image_url, | 268 actual_image_url=actual_image_url, |
309 actual_image_locator=actual_image_locator) | 269 actual_image_locator=actual_image_locator) |
310 except Exception: | 270 except Exception: |
311 # If we can't create a real DiffRecord for this (expected, actual) pair, | 271 # If we can't create a real DiffRecord for this (expected, actual) pair, |
312 # store None and the UI will show whatever information we DO have. | 272 # store None and the UI will show whatever information we DO have. |
313 # Fixes http://skbug.com/2368 . | 273 # Fixes http://skbug.com/2368 . |
314 logging.exception( | 274 logging.exception( |
315 'got exception while creating a DiffRecord for ' | 275 'got exception while creating a DiffRecord for ' |
316 'expected_image_url=%s , actual_image_url=%s; returning None' % ( | 276 'expected_image_url=%s , actual_image_url=%s; returning None' % ( |
317 expected_image_url, actual_image_url)) | 277 expected_image_url, actual_image_url)) |
318 new_diff_record = None | 278 new_diff_record = None |
319 self._diff_dict[key] = new_diff_record | 279 self._diff_dict[key] = new_diff_record |
320 | 280 |
321 def add_image_pair_async(self, | |
322 expected_image_url, expected_image_locator, | |
323 actual_image_url, actual_image_locator): | |
324 """Download this pair of images (unless we already have them on local disk), | |
325 and prepare a DiffRecord for them. | |
326 | |
327 This method will return quickly; calls to get_diff_record() will block | |
328 until the DiffRecord is available (or we have given up on creating it). | |
329 | |
330 Args: | |
331 expected_image_url: file or HTTP url from which we will download the | |
332 expected image | |
333 expected_image_locator: a unique ID string under which we will store the | |
334 expected image within storage_root (probably including a checksum to | |
335 guarantee uniqueness) | |
336 actual_image_url: file or HTTP url from which we will download the | |
337 actual image | |
338 actual_image_locator: a unique ID string under which we will store the | |
339 actual image within storage_root (probably including a checksum to | |
340 guarantee uniqueness) | |
341 """ | |
342 key = _generate_key(expected_image_locator, actual_image_locator) | |
343 if not key in self._diff_dict: | |
344 # If we have already requested a diff between these two images, | |
345 # we don't need to request it again. | |
346 # | |
347 # Threading note: If multiple threads called into this method with the | |
348 # same key at the same time, there will be multiple tasks on the queue | |
349 # with the same key. But that's OK; they will both complete successfully, | |
350 # and just waste a little time in the process. Nothing will break. | |
351 self._diff_dict[key] = DIFFRECORD_PENDING | |
352 self._tasks_queue.put((key, expected_image_url, actual_image_url)) | |
353 | |
354 def get_diff_record(self, expected_image_locator, actual_image_locator): | 281 def get_diff_record(self, expected_image_locator, actual_image_locator): |
355 """Returns the DiffRecord for this image pair. | 282 """Returns the DiffRecord for this image pair. |
356 | 283 |
357 Args: | 284 Raises a KeyError if we don't have a DiffRecord for this image pair. |
358 expected_image_locator: a unique ID string under which we will store the | |
359 expected image within storage_root (probably including a checksum to | |
360 guarantee uniqueness) | |
361 actual_image_locator: a unique ID string under which we will store the | |
362 actual image within storage_root (probably including a checksum to | |
363 guarantee uniqueness) | |
364 | |
365 Returns the DiffRecord for this image pair, or None if we were unable to | |
366 generate one. | |
367 """ | 285 """ |
368 key = _generate_key(expected_image_locator, actual_image_locator) | 286 key = (_sanitize_locator(expected_image_locator), |
369 diff_record = self._diff_dict[key] | 287 _sanitize_locator(actual_image_locator)) |
370 | 288 return self._diff_dict[key] |
371 # If we have no results yet, block until we do. | |
372 while diff_record == DIFFRECORD_PENDING: | |
373 time.sleep(1) | |
374 diff_record = self._diff_dict[key] | |
375 | |
376 # Once we have the result... | |
377 if diff_record == DIFFRECORD_FAILED: | |
378 logging.error( | |
379 'failed to create a DiffRecord for expected_image_locator=%s , ' | |
380 'actual_image_locator=%s' % ( | |
381 expected_image_locator, actual_image_locator)) | |
382 return None | |
383 else: | |
384 return diff_record | |
385 | 289 |
386 | 290 |
387 # Utility functions | 291 # Utility functions |
388 | 292 |
389 def _calculate_weighted_diff_metric(histogram, num_pixels): | 293 def _calculate_weighted_diff_metric(histogram, num_pixels): |
390 """Given the histogram of a diff image (per-channel diff at each | 294 """Given the histogram of a diff image (per-channel diff at each |
391 pixel between two images), calculate the weighted diff metric (a | 295 pixel between two images), calculate the weighted diff metric (a |
392 stab at how different the two images really are). | 296 stab at how different the two images really are). |
393 | 297 |
394 TODO(epoger): Delete this function, now that we have perceptual diff? | 298 TODO(epoger): Delete this function, now that we have perceptual diff? |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
463 def _download_and_open_image(local_filepath, url): | 367 def _download_and_open_image(local_filepath, url): |
464 """Open the image at local_filepath; if there is no file at that path, | 368 """Open the image at local_filepath; if there is no file at that path, |
465 download it from url to that path and then open it. | 369 download it from url to that path and then open it. |
466 | 370 |
467 Args: | 371 Args: |
468 local_filepath: path on local disk where the image should be stored | 372 local_filepath: path on local disk where the image should be stored |
469 url: URL from which we can download the image if we don't have it yet | 373 url: URL from which we can download the image if we don't have it yet |
470 | 374 |
471 Returns: a PIL image object | 375 Returns: a PIL image object |
472 """ | 376 """ |
473 global global_file_collisions | |
474 if not os.path.exists(local_filepath): | 377 if not os.path.exists(local_filepath): |
475 _mkdir_unless_exists(os.path.dirname(local_filepath)) | 378 _mkdir_unless_exists(os.path.dirname(local_filepath)) |
476 with contextlib.closing(urllib.urlopen(url)) as url_handle: | 379 with contextlib.closing(urllib.urlopen(url)) as url_handle: |
477 | 380 with open(local_filepath, 'wb') as file_handle: |
478 # First download the file contents into a unique filename, and | |
479 # then rename that file. That way, if multiple threads are downloading | |
480 # the same filename at the same time, they won't interfere with each | |
481 # other (they will both download the file, and one will "win" in the end) | |
482 temp_filename = '%s-%d' % (local_filepath, | |
483 threading.current_thread().ident) | |
484 with open(temp_filename, 'wb') as file_handle: | |
485 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | 381 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
486 | |
487 # Keep count of how many colliding downloads we encounter; | |
488 # if it's a large number, we may want to change our download strategy | |
489 # to minimize repeated downloads. | |
490 if os.path.exists(local_filepath): | |
491 global_file_collisions += 1 | |
492 else: | |
493 os.rename(temp_filename, local_filepath) | |
494 | |
495 return _open_image(local_filepath) | 382 return _open_image(local_filepath) |
496 | 383 |
497 | 384 |
498 def _open_image(filepath): | 385 def _open_image(filepath): |
499 """Wrapper for Image.open(filepath) that yields more useful error messages. | 386 """Wrapper for Image.open(filepath) that yields more useful error messages. |
500 | 387 |
501 Args: | 388 Args: |
502 filepath: path on local disk to load image from | 389 filepath: path on local disk to load image from |
503 | 390 |
504 Returns: a PIL image object | 391 Returns: a PIL image object |
(...skipping 20 matching lines...) Expand all Loading... |
525 _mkdir_unless_exists(os.path.dirname(filepath)) | 412 _mkdir_unless_exists(os.path.dirname(filepath)) |
526 image.save(filepath, format) | 413 image.save(filepath, format) |
527 | 414 |
528 | 415 |
529 def _mkdir_unless_exists(path): | 416 def _mkdir_unless_exists(path): |
530 """Unless path refers to an already-existing directory, create it. | 417 """Unless path refers to an already-existing directory, create it. |
531 | 418 |
532 Args: | 419 Args: |
533 path: path on local disk | 420 path: path on local disk |
534 """ | 421 """ |
535 try: | 422 if not os.path.isdir(path): |
536 os.makedirs(path) | 423 os.makedirs(path) |
537 except OSError as e: | |
538 if e.errno == errno.EEXIST: | |
539 pass | |
540 | 424 |
541 | 425 |
542 def _sanitize_locator(locator): | 426 def _sanitize_locator(locator): |
543 """Returns a sanitized version of a locator (one in which we know none of the | 427 """Returns a sanitized version of a locator (one in which we know none of the |
544 characters will have special meaning in filenames). | 428 characters will have special meaning in filenames). |
545 | 429 |
546 Args: | 430 Args: |
547 locator: string, or something that can be represented as a string | 431 locator: string, or something that can be represented as a string |
548 """ | 432 """ |
549 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) | 433 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) |
550 | 434 |
551 | 435 |
552 def _generate_key(expected_image_locator, actual_image_locator): | |
553 """Returns a key suitable for looking up this image pair. | |
554 | |
555 Args: | |
556 expected_image_locator: a unique ID string under which we will store the | |
557 expected image within storage_root (probably including a checksum to | |
558 guarantee uniqueness) | |
559 actual_image_locator: a unique ID string under which we will store the | |
560 actual image within storage_root (probably including a checksum to | |
561 guarantee uniqueness) | |
562 """ | |
563 return (_sanitize_locator(expected_image_locator), | |
564 _sanitize_locator(actual_image_locator)) | |
565 | |
566 | |
567 def _get_difference_locator(expected_image_locator, actual_image_locator): | 436 def _get_difference_locator(expected_image_locator, actual_image_locator): |
568 """Returns the locator string used to look up the diffs between expected_image | 437 """Returns the locator string used to look up the diffs between expected_image |
569 and actual_image. | 438 and actual_image. |
570 | 439 |
571 We must keep this function in sync with getImageDiffRelativeUrl() in | 440 We must keep this function in sync with getImageDiffRelativeUrl() in |
572 static/loader.js | 441 static/loader.js |
573 | 442 |
574 Args: | 443 Args: |
575 expected_image_locator: locator string pointing at expected image | 444 expected_image_locator: locator string pointing at expected image |
576 actual_image_locator: locator string pointing at actual image | 445 actual_image_locator: locator string pointing at actual image |
577 | 446 |
578 Returns: already-sanitized locator where the diffs between expected and | 447 Returns: already-sanitized locator where the diffs between expected and |
579 actual images can be found | 448 actual images can be found |
580 """ | 449 """ |
581 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), | 450 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), |
582 _sanitize_locator(actual_image_locator)) | 451 _sanitize_locator(actual_image_locator)) |
OLD | NEW |