OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 """ | 3 """ |
4 Copyright 2013 Google Inc. | 4 Copyright 2013 Google Inc. |
5 | 5 |
6 Use of this source code is governed by a BSD-style license that can be | 6 Use of this source code is governed by a BSD-style license that can be |
7 found in the LICENSE file. | 7 found in the LICENSE file. |
8 | 8 |
9 Calulate differences between image pairs, and store them in a database. | 9 Calulate differences between image pairs, and store them in a database. |
10 """ | 10 """ |
11 | 11 |
12 import contextlib | 12 import contextlib |
13 import csv | 13 import csv |
14 import errno | |
14 import logging | 15 import logging |
16 import Queue | |
15 import os | 17 import os |
16 import re | 18 import re |
17 import shutil | 19 import shutil |
18 import sys | 20 import sys |
19 import tempfile | 21 import tempfile |
22 import time | |
23 import threading | |
20 import urllib | 24 import urllib |
21 try: | 25 try: |
22 from PIL import Image, ImageChops | 26 from PIL import Image, ImageChops |
23 except ImportError: | 27 except ImportError: |
24 raise ImportError('Requires PIL to be installed; see ' | 28 raise ImportError('Requires PIL to be installed; see ' |
25 + 'http://www.pythonware.com/products/pil/') | 29 + 'http://www.pythonware.com/products/pil/') |
26 | 30 |
27 # Set the PYTHONPATH to include the tools directory. | 31 # Set the PYTHONPATH to include the tools directory. |
28 sys.path.append( | 32 sys.path.append( |
29 os.path.join( | 33 os.path.join( |
30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, | 34 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, |
31 'tools')) | 35 'tools')) |
32 import find_run_binary | 36 import find_run_binary |
33 | 37 |
34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') | 38 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') |
35 | 39 |
36 DEFAULT_IMAGE_SUFFIX = '.png' | 40 DEFAULT_IMAGE_SUFFIX = '.png' |
37 DEFAULT_IMAGES_SUBDIR = 'images' | 41 DEFAULT_IMAGES_SUBDIR = 'images' |
42 DEFAULT_NUM_WORKERS = 8 | |
38 | 43 |
39 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') | 44 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') |
40 | 45 |
41 DIFFS_SUBDIR = 'diffs' | 46 DIFFS_SUBDIR = 'diffs' |
42 WHITEDIFFS_SUBDIR = 'whitediffs' | 47 WHITEDIFFS_SUBDIR = 'whitediffs' |
43 | 48 |
44 VALUES_PER_BAND = 256 | 49 VALUES_PER_BAND = 256 |
45 | 50 |
46 # Keys used within DiffRecord dictionary representations. | 51 # Keys used within DiffRecord dictionary representations. |
47 # NOTE: Keep these in sync with static/constants.js | 52 # NOTE: Keep these in sync with static/constants.js |
48 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' | 53 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' |
49 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels' | 54 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels' |
50 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' | 55 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' |
51 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference' | 56 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference' |
52 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure' | 57 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure' |
53 | 58 |
59 # Special values within ImageDiffDB._diff_dict | |
60 DIFFRECORD_FAILED = 'failed' | |
61 DIFFRECORD_PENDING = 'pending' | |
62 | |
63 # TODO(epoger): Temporary(?) list to keep track of how many times we download | |
64 # the same file in multiple threads. | |
65 global_file_collisions = 0 | |
66 | |
54 | 67 |
55 class DiffRecord(object): | 68 class DiffRecord(object): |
56 """ Record of differences between two images. """ | 69 """ Record of differences between two images. """ |
57 | 70 |
58 def __init__(self, storage_root, | 71 def __init__(self, storage_root, |
59 expected_image_url, expected_image_locator, | 72 expected_image_url, expected_image_locator, |
60 actual_image_url, actual_image_locator, | 73 actual_image_url, actual_image_locator, |
61 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, | 74 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, |
62 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, | 75 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, |
63 image_suffix=DEFAULT_IMAGE_SUFFIX): | 76 image_suffix=DEFAULT_IMAGE_SUFFIX): |
64 """Download this pair of images (unless we already have them on local disk), | 77 """Download this pair of images (unless we already have them on local disk), |
65 and prepare a DiffRecord for them. | 78 and prepare a DiffRecord for them. |
66 | 79 |
67 TODO(epoger): Make this asynchronously download images, rather than blocking | |
68 until the images have been downloaded and processed. | |
69 | |
70 Args: | 80 Args: |
71 storage_root: root directory on local disk within which we store all | 81 storage_root: root directory on local disk within which we store all |
72 images | 82 images |
73 expected_image_url: file or HTTP url from which we will download the | 83 expected_image_url: file or HTTP url from which we will download the |
74 expected image | 84 expected image |
75 expected_image_locator: a unique ID string under which we will store the | 85 expected_image_locator: a unique ID string under which we will store the |
76 expected image within storage_root (probably including a checksum to | 86 expected image within storage_root (probably including a checksum to |
77 guarantee uniqueness) | 87 guarantee uniqueness) |
78 actual_image_url: file or HTTP url from which we will download the | 88 actual_image_url: file or HTTP url from which we will download the |
79 actual image | 89 actual image |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
212 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(), | 222 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(), |
213 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, | 223 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, |
214 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference, | 224 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference, |
215 } | 225 } |
216 | 226 |
217 | 227 |
218 class ImageDiffDB(object): | 228 class ImageDiffDB(object): |
219 """ Calculates differences between image pairs, maintaining a database of | 229 """ Calculates differences between image pairs, maintaining a database of |
220 them for download.""" | 230 them for download.""" |
221 | 231 |
222 def __init__(self, storage_root): | 232 def __init__(self, storage_root, num_workers=DEFAULT_NUM_WORKERS): |
223 """ | 233 """ |
224 Args: | 234 Args: |
225 storage_root: string; root path within the DB will store all of its stuff | 235 storage_root: string; root path within the DB will store all of its stuff |
236 num_workers: integer; number of worker threads to spawn | |
226 """ | 237 """ |
227 self._storage_root = storage_root | 238 self._storage_root = storage_root |
228 | 239 |
229 # Dictionary of DiffRecords, keyed by (expected_image_locator, | 240 # Dictionary of DiffRecords, keyed by (expected_image_locator, |
230 # actual_image_locator) tuples. | 241 # actual_image_locator) tuples. |
242 # Values can also be DIFFRECORD_PENDING, DIFFRECORD_FAILED. | |
231 self._diff_dict = {} | 243 self._diff_dict = {} |
232 | 244 |
245 # Set up the queue for asynchronously loading DiffRecords, and start the | |
246 # worker threads reading from it. | |
247 self._tasks_queue = Queue.Queue(maxsize=2*num_workers) | |
248 self._workers = [] | |
249 for i in range(num_workers): | |
250 worker = threading.Thread(target=self.worker, args=(i,)) | |
251 worker.daemon = True | |
252 worker.start() | |
253 self._workers.append(worker) | |
254 | |
255 def worker(self, worker_num): | |
256 """Launch a worker thread that pulls tasks off self._tasks_queue. | |
257 | |
258 Args: | |
259 worker_num: (integer) which worker this is | |
260 """ | |
261 while True: | |
262 params = self._tasks_queue.get() | |
263 key, expected_image_url, actual_image_url = params | |
264 try: | |
265 diff_record = DiffRecord( | |
266 self._storage_root, | |
267 expected_image_url=expected_image_url, | |
268 expected_image_locator=key[0], | |
269 actual_image_url=actual_image_url, | |
270 actual_image_locator=key[1]) | |
271 except Exception: | |
272 logging.exception( | |
273 'exception while creating DiffRecord for key %s' % str(key)) | |
274 diff_record = DIFFRECORD_FAILED | |
275 self._diff_dict[key] = diff_record | |
276 | |
233 def add_image_pair(self, | 277 def add_image_pair(self, |
234 expected_image_url, expected_image_locator, | 278 expected_image_url, expected_image_locator, |
235 actual_image_url, actual_image_locator): | 279 actual_image_url, actual_image_locator): |
236 """Download this pair of images (unless we already have them on local disk), | 280 """Download this pair of images (unless we already have them on local disk), |
237 and prepare a DiffRecord for them. | 281 and prepare a DiffRecord for them. |
238 | 282 |
239 TODO(epoger): Make this asynchronously download images, rather than blocking | 283 This method will block until the images are downloaded and DiffRecord is |
240 until the images have been downloaded and processed. | 284 available by calling get_diff_record(). |
241 When we do that, we should probably add a new method that will block | |
242 until all of the images have been downloaded and processed. Otherwise, | |
243 we won't know when it's safe to start calling get_diff_record(). | |
244 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a | |
245 thread-pool/worker queue at a higher level that just uses ImageDiffDB? | |
246 | 285 |
247 Args: | 286 Args: |
248 expected_image_url: file or HTTP url from which we will download the | 287 expected_image_url: file or HTTP url from which we will download the |
249 expected image | 288 expected image |
250 expected_image_locator: a unique ID string under which we will store the | 289 expected_image_locator: a unique ID string under which we will store the |
251 expected image within storage_root (probably including a checksum to | 290 expected image within storage_root (probably including a checksum to |
252 guarantee uniqueness) | 291 guarantee uniqueness) |
253 actual_image_url: file or HTTP url from which we will download the | 292 actual_image_url: file or HTTP url from which we will download the |
254 actual image | 293 actual image |
255 actual_image_locator: a unique ID string under which we will store the | 294 actual_image_locator: a unique ID string under which we will store the |
256 actual image within storage_root (probably including a checksum to | 295 actual image within storage_root (probably including a checksum to |
257 guarantee uniqueness) | 296 guarantee uniqueness) |
297 | |
298 Raises: | |
299 Exception if we are unable to create a DiffRecord for this image pair. | |
258 """ | 300 """ |
259 expected_image_locator = _sanitize_locator(expected_image_locator) | 301 key = _generate_key(expected_image_locator, actual_image_locator) |
260 actual_image_locator = _sanitize_locator(actual_image_locator) | |
261 key = (expected_image_locator, actual_image_locator) | |
262 if not key in self._diff_dict: | 302 if not key in self._diff_dict: |
263 try: | 303 try: |
264 new_diff_record = DiffRecord( | 304 new_diff_record = DiffRecord( |
265 self._storage_root, | 305 self._storage_root, |
266 expected_image_url=expected_image_url, | 306 expected_image_url=expected_image_url, |
267 expected_image_locator=expected_image_locator, | 307 expected_image_locator=expected_image_locator, |
268 actual_image_url=actual_image_url, | 308 actual_image_url=actual_image_url, |
269 actual_image_locator=actual_image_locator) | 309 actual_image_locator=actual_image_locator) |
270 except Exception: | 310 except Exception: |
271 # If we can't create a real DiffRecord for this (expected, actual) pair, | 311 # If we can't create a real DiffRecord for this (expected, actual) pair, |
272 # store None and the UI will show whatever information we DO have. | 312 # store None and the UI will show whatever information we DO have. |
273 # Fixes http://skbug.com/2368 . | 313 # Fixes http://skbug.com/2368 . |
274 logging.exception( | 314 logging.exception( |
275 'got exception while creating a DiffRecord for ' | 315 'got exception while creating a DiffRecord for ' |
276 'expected_image_url=%s , actual_image_url=%s; returning None' % ( | 316 'expected_image_url=%s , actual_image_url=%s; returning None' % ( |
277 expected_image_url, actual_image_url)) | 317 expected_image_url, actual_image_url)) |
278 new_diff_record = None | 318 new_diff_record = None |
279 self._diff_dict[key] = new_diff_record | 319 self._diff_dict[key] = new_diff_record |
280 | 320 |
321 def add_image_pair_async(self, | |
322 expected_image_url, expected_image_locator, | |
323 actual_image_url, actual_image_locator): | |
324 """Download this pair of images (unless we already have them on local disk), | |
325 and prepare a DiffRecord for them. | |
326 | |
327 This method will return quickly; calls to get_diff_record() will block | |
328 until the DiffRecord is available (or we have given up on creating it). | |
329 | |
330 Args: | |
331 expected_image_url: file or HTTP url from which we will download the | |
332 expected image | |
333 expected_image_locator: a unique ID string under which we will store the | |
334 expected image within storage_root (probably including a checksum to | |
335 guarantee uniqueness) | |
336 actual_image_url: file or HTTP url from which we will download the | |
337 actual image | |
338 actual_image_locator: a unique ID string under which we will store the | |
339 actual image within storage_root (probably including a checksum to | |
340 guarantee uniqueness) | |
341 """ | |
342 key = _generate_key(expected_image_locator, actual_image_locator) | |
343 if not key in self._diff_dict: | |
344 # If we have already requested a diff between these two images, | |
345 # we don't need to request it again. | |
346 # | |
347 # Threading note: If multiple threads called into this method with the | |
348 # same key at the same time, there will be multiple tasks on the queue | |
349 # with the same key. But that's OK; they will both complete successfully, | |
350 # and just waste a little time in the process. Nothing will break. | |
rmistry
2014/04/14 17:59:06
Can we use semaphores/locks here? Based on your te
epoger
2014/04/14 18:10:51
In my tests, I didn't see any global_file_collisio
| |
351 self._diff_dict[key] = DIFFRECORD_PENDING | |
352 self._tasks_queue.put((key, expected_image_url, actual_image_url)) | |
353 | |
281 def get_diff_record(self, expected_image_locator, actual_image_locator): | 354 def get_diff_record(self, expected_image_locator, actual_image_locator): |
282 """Returns the DiffRecord for this image pair. | 355 """Returns the DiffRecord for this image pair. |
283 | 356 |
284 Raises a KeyError if we don't have a DiffRecord for this image pair. | 357 Args: |
358 expected_image_locator: a unique ID string under which we will store the | |
359 expected image within storage_root (probably including a checksum to | |
360 guarantee uniqueness) | |
361 actual_image_locator: a unique ID string under which we will store the | |
362 actual image within storage_root (probably including a checksum to | |
363 guarantee uniqueness) | |
364 | |
365 Returns the DiffRecord for this image pair, or None if we were unable to | |
366 generate one. | |
285 """ | 367 """ |
286 key = (_sanitize_locator(expected_image_locator), | 368 key = _generate_key(expected_image_locator, actual_image_locator) |
287 _sanitize_locator(actual_image_locator)) | 369 diff_record = self._diff_dict[key] |
288 return self._diff_dict[key] | 370 |
371 # If we have no results yet, block until we do. | |
372 while diff_record == DIFFRECORD_PENDING: | |
373 time.sleep(1) | |
374 diff_record = self._diff_dict[key] | |
375 | |
376 # Once we have the result... | |
377 if diff_record == DIFFRECORD_FAILED: | |
378 logging.error( | |
379 'failed to create a DiffRecord for expected_image_locator=%s , ' | |
380 'actual_image_locator=%s' % ( | |
381 expected_image_locator, actual_image_locator)) | |
382 return None | |
383 else: | |
384 return diff_record | |
289 | 385 |
290 | 386 |
291 # Utility functions | 387 # Utility functions |
292 | 388 |
293 def _calculate_weighted_diff_metric(histogram, num_pixels): | 389 def _calculate_weighted_diff_metric(histogram, num_pixels): |
294 """Given the histogram of a diff image (per-channel diff at each | 390 """Given the histogram of a diff image (per-channel diff at each |
295 pixel between two images), calculate the weighted diff metric (a | 391 pixel between two images), calculate the weighted diff metric (a |
296 stab at how different the two images really are). | 392 stab at how different the two images really are). |
297 | 393 |
298 TODO(epoger): Delete this function, now that we have perceptual diff? | 394 TODO(epoger): Delete this function, now that we have perceptual diff? |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
367 def _download_and_open_image(local_filepath, url): | 463 def _download_and_open_image(local_filepath, url): |
368 """Open the image at local_filepath; if there is no file at that path, | 464 """Open the image at local_filepath; if there is no file at that path, |
369 download it from url to that path and then open it. | 465 download it from url to that path and then open it. |
370 | 466 |
371 Args: | 467 Args: |
372 local_filepath: path on local disk where the image should be stored | 468 local_filepath: path on local disk where the image should be stored |
373 url: URL from which we can download the image if we don't have it yet | 469 url: URL from which we can download the image if we don't have it yet |
374 | 470 |
375 Returns: a PIL image object | 471 Returns: a PIL image object |
376 """ | 472 """ |
473 global global_file_collisions | |
377 if not os.path.exists(local_filepath): | 474 if not os.path.exists(local_filepath): |
378 _mkdir_unless_exists(os.path.dirname(local_filepath)) | 475 _mkdir_unless_exists(os.path.dirname(local_filepath)) |
379 with contextlib.closing(urllib.urlopen(url)) as url_handle: | 476 with contextlib.closing(urllib.urlopen(url)) as url_handle: |
380 with open(local_filepath, 'wb') as file_handle: | 477 |
478 # First download the file contents into a unique filename, and | |
479 # then rename that file. That way, if multiple threads are downloading | |
480 # the same filename at the same time, they won't interfere with each | |
481 # other (they will both download the file, and one will "win" in the end) | |
482 temp_filename = '%s-%d' % (local_filepath, | |
483 threading.current_thread().ident) | |
484 with open(temp_filename, 'wb') as file_handle: | |
381 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | 485 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
486 | |
487 # Keep count of how many colliding downloads we encounter; | |
488 # if it's a large number, we may want to change our download strategy | |
489 # to minimize repeated downloads. | |
490 if os.path.exists(local_filepath): | |
491 global_file_collisions += 1 | |
492 else: | |
493 os.rename(temp_filename, local_filepath) | |
494 | |
382 return _open_image(local_filepath) | 495 return _open_image(local_filepath) |
383 | 496 |
384 | 497 |
385 def _open_image(filepath): | 498 def _open_image(filepath): |
386 """Wrapper for Image.open(filepath) that yields more useful error messages. | 499 """Wrapper for Image.open(filepath) that yields more useful error messages. |
387 | 500 |
388 Args: | 501 Args: |
389 filepath: path on local disk to load image from | 502 filepath: path on local disk to load image from |
390 | 503 |
391 Returns: a PIL image object | 504 Returns: a PIL image object |
(...skipping 20 matching lines...) Expand all Loading... | |
412 _mkdir_unless_exists(os.path.dirname(filepath)) | 525 _mkdir_unless_exists(os.path.dirname(filepath)) |
413 image.save(filepath, format) | 526 image.save(filepath, format) |
414 | 527 |
415 | 528 |
416 def _mkdir_unless_exists(path): | 529 def _mkdir_unless_exists(path): |
417 """Unless path refers to an already-existing directory, create it. | 530 """Unless path refers to an already-existing directory, create it. |
418 | 531 |
419 Args: | 532 Args: |
420 path: path on local disk | 533 path: path on local disk |
421 """ | 534 """ |
422 if not os.path.isdir(path): | 535 try: |
423 os.makedirs(path) | 536 os.makedirs(path) |
537 except OSError as e: | |
538 if e.errno == errno.EEXIST: | |
539 pass | |
424 | 540 |
425 | 541 |
426 def _sanitize_locator(locator): | 542 def _sanitize_locator(locator): |
427 """Returns a sanitized version of a locator (one in which we know none of the | 543 """Returns a sanitized version of a locator (one in which we know none of the |
428 characters will have special meaning in filenames). | 544 characters will have special meaning in filenames). |
429 | 545 |
430 Args: | 546 Args: |
431 locator: string, or something that can be represented as a string | 547 locator: string, or something that can be represented as a string |
432 """ | 548 """ |
433 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) | 549 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) |
434 | 550 |
435 | 551 |
552 def _generate_key(expected_image_locator, actual_image_locator): | |
553 """Returns a key suitable for looking up this image pair. | |
554 | |
555 Args: | |
556 expected_image_locator: a unique ID string under which we will store the | |
557 expected image within storage_root (probably including a checksum to | |
558 guarantee uniqueness) | |
559 actual_image_locator: a unique ID string under which we will store the | |
560 actual image within storage_root (probably including a checksum to | |
561 guarantee uniqueness) | |
562 """ | |
563 return (_sanitize_locator(expected_image_locator), | |
564 _sanitize_locator(actual_image_locator)) | |
565 | |
566 | |
436 def _get_difference_locator(expected_image_locator, actual_image_locator): | 567 def _get_difference_locator(expected_image_locator, actual_image_locator): |
437 """Returns the locator string used to look up the diffs between expected_image | 568 """Returns the locator string used to look up the diffs between expected_image |
438 and actual_image. | 569 and actual_image. |
439 | 570 |
440 We must keep this function in sync with getImageDiffRelativeUrl() in | 571 We must keep this function in sync with getImageDiffRelativeUrl() in |
441 static/loader.js | 572 static/loader.js |
442 | 573 |
443 Args: | 574 Args: |
444 expected_image_locator: locator string pointing at expected image | 575 expected_image_locator: locator string pointing at expected image |
445 actual_image_locator: locator string pointing at actual image | 576 actual_image_locator: locator string pointing at actual image |
446 | 577 |
447 Returns: already-sanitized locator where the diffs between expected and | 578 Returns: already-sanitized locator where the diffs between expected and |
448 actual images can be found | 579 actual images can be found |
449 """ | 580 """ |
450 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), | 581 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), |
451 _sanitize_locator(actual_image_locator)) | 582 _sanitize_locator(actual_image_locator)) |
OLD | NEW |