| Index: gm/rebaseline_server/imagediffdb.py
|
| diff --git a/gm/rebaseline_server/imagediffdb.py b/gm/rebaseline_server/imagediffdb.py
|
| index 10fcc98f3b13c35dd57a7aa9550dd813edcbaf04..3b1eb3ebc032a804b444c453dd28f9ed4cc06307 100644
|
| --- a/gm/rebaseline_server/imagediffdb.py
|
| +++ b/gm/rebaseline_server/imagediffdb.py
|
| @@ -11,16 +11,12 @@
|
|
|
| import contextlib
|
| import csv
|
| -import errno
|
| import logging
|
| -import Queue
|
| import os
|
| import re
|
| import shutil
|
| import sys
|
| import tempfile
|
| -import time
|
| -import threading
|
| import urllib
|
| try:
|
| from PIL import Image, ImageChops
|
| @@ -39,7 +35,6 @@
|
|
|
| DEFAULT_IMAGE_SUFFIX = '.png'
|
| DEFAULT_IMAGES_SUBDIR = 'images'
|
| -DEFAULT_NUM_WORKERS = 8
|
|
|
| DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
|
|
|
| @@ -55,14 +50,6 @@
|
| KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
|
| KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'
|
| KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'
|
| -
|
| -# Special values within ImageDiffDB._diff_dict
|
| -DIFFRECORD_FAILED = 'failed'
|
| -DIFFRECORD_PENDING = 'pending'
|
| -
|
| -# TODO(epoger): Temporary(?) list to keep track of how many times we download
|
| -# the same file in multiple threads.
|
| -global_file_collisions = 0
|
|
|
|
|
| class DiffRecord(object):
|
| @@ -76,6 +63,9 @@
|
| image_suffix=DEFAULT_IMAGE_SUFFIX):
|
| """Download this pair of images (unless we already have them on local disk),
|
| and prepare a DiffRecord for them.
|
| +
|
| + TODO(epoger): Make this asynchronously download images, rather than blocking
|
| + until the images have been downloaded and processed.
|
|
|
| Args:
|
| storage_root: root directory on local disk within which we store all
|
| @@ -229,50 +219,16 @@
|
| """ Calculates differences between image pairs, maintaining a database of
|
| them for download."""
|
|
|
| - def __init__(self, storage_root, num_workers=DEFAULT_NUM_WORKERS):
|
| + def __init__(self, storage_root):
|
| """
|
| Args:
|
| storage_root: string; root path within the DB will store all of its stuff
|
| - num_workers: integer; number of worker threads to spawn
|
| """
|
| self._storage_root = storage_root
|
|
|
| # Dictionary of DiffRecords, keyed by (expected_image_locator,
|
| # actual_image_locator) tuples.
|
| - # Values can also be DIFFRECORD_PENDING, DIFFRECORD_FAILED.
|
| self._diff_dict = {}
|
| -
|
| - # Set up the queue for asynchronously loading DiffRecords, and start the
|
| - # worker threads reading from it.
|
| - self._tasks_queue = Queue.Queue(maxsize=2*num_workers)
|
| - self._workers = []
|
| - for i in range(num_workers):
|
| - worker = threading.Thread(target=self.worker, args=(i,))
|
| - worker.daemon = True
|
| - worker.start()
|
| - self._workers.append(worker)
|
| -
|
| - def worker(self, worker_num):
|
| - """Launch a worker thread that pulls tasks off self._tasks_queue.
|
| -
|
| - Args:
|
| - worker_num: (integer) which worker this is
|
| - """
|
| - while True:
|
| - params = self._tasks_queue.get()
|
| - key, expected_image_url, actual_image_url = params
|
| - try:
|
| - diff_record = DiffRecord(
|
| - self._storage_root,
|
| - expected_image_url=expected_image_url,
|
| - expected_image_locator=key[0],
|
| - actual_image_url=actual_image_url,
|
| - actual_image_locator=key[1])
|
| - except Exception:
|
| - logging.exception(
|
| - 'exception while creating DiffRecord for key %s' % str(key))
|
| - diff_record = DIFFRECORD_FAILED
|
| - self._diff_dict[key] = diff_record
|
|
|
| def add_image_pair(self,
|
| expected_image_url, expected_image_locator,
|
| @@ -280,8 +236,13 @@
|
| """Download this pair of images (unless we already have them on local disk),
|
| and prepare a DiffRecord for them.
|
|
|
| - This method will block until the images are downloaded and DiffRecord is
|
| - available by calling get_diff_record().
|
| + TODO(epoger): Make this asynchronously download images, rather than blocking
|
| + until the images have been downloaded and processed.
|
| + When we do that, we should probably add a new method that will block
|
| + until all of the images have been downloaded and processed. Otherwise,
|
| + we won't know when it's safe to start calling get_diff_record().
|
| + jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
|
| + thread-pool/worker queue at a higher level that just uses ImageDiffDB?
|
|
|
| Args:
|
| expected_image_url: file or HTTP url from which we will download the
|
| @@ -294,11 +255,10 @@
|
| actual_image_locator: a unique ID string under which we will store the
|
| actual image within storage_root (probably including a checksum to
|
| guarantee uniqueness)
|
| -
|
| - Raises:
|
| - Exception if we are unable to create a DiffRecord for this image pair.
|
| - """
|
| - key = _generate_key(expected_image_locator, actual_image_locator)
|
| + """
|
| + expected_image_locator = _sanitize_locator(expected_image_locator)
|
| + actual_image_locator = _sanitize_locator(actual_image_locator)
|
| + key = (expected_image_locator, actual_image_locator)
|
| if not key in self._diff_dict:
|
| try:
|
| new_diff_record = DiffRecord(
|
| @@ -318,70 +278,14 @@
|
| new_diff_record = None
|
| self._diff_dict[key] = new_diff_record
|
|
|
| - def add_image_pair_async(self,
|
| - expected_image_url, expected_image_locator,
|
| - actual_image_url, actual_image_locator):
|
| - """Download this pair of images (unless we already have them on local disk),
|
| - and prepare a DiffRecord for them.
|
| -
|
| - This method will return quickly; calls to get_diff_record() will block
|
| - until the DiffRecord is available (or we have given up on creating it).
|
| -
|
| - Args:
|
| - expected_image_url: file or HTTP url from which we will download the
|
| - expected image
|
| - expected_image_locator: a unique ID string under which we will store the
|
| - expected image within storage_root (probably including a checksum to
|
| - guarantee uniqueness)
|
| - actual_image_url: file or HTTP url from which we will download the
|
| - actual image
|
| - actual_image_locator: a unique ID string under which we will store the
|
| - actual image within storage_root (probably including a checksum to
|
| - guarantee uniqueness)
|
| - """
|
| - key = _generate_key(expected_image_locator, actual_image_locator)
|
| - if not key in self._diff_dict:
|
| - # If we have already requested a diff between these two images,
|
| - # we don't need to request it again.
|
| - #
|
| - # Threading note: If multiple threads called into this method with the
|
| - # same key at the same time, there will be multiple tasks on the queue
|
| - # with the same key. But that's OK; they will both complete successfully,
|
| - # and just waste a little time in the process. Nothing will break.
|
| - self._diff_dict[key] = DIFFRECORD_PENDING
|
| - self._tasks_queue.put((key, expected_image_url, actual_image_url))
|
| -
|
| def get_diff_record(self, expected_image_locator, actual_image_locator):
|
| """Returns the DiffRecord for this image pair.
|
|
|
| - Args:
|
| - expected_image_locator: a unique ID string under which we will store the
|
| - expected image within storage_root (probably including a checksum to
|
| - guarantee uniqueness)
|
| - actual_image_locator: a unique ID string under which we will store the
|
| - actual image within storage_root (probably including a checksum to
|
| - guarantee uniqueness)
|
| -
|
| - Returns the DiffRecord for this image pair, or None if we were unable to
|
| - generate one.
|
| - """
|
| - key = _generate_key(expected_image_locator, actual_image_locator)
|
| - diff_record = self._diff_dict[key]
|
| -
|
| - # If we have no results yet, block until we do.
|
| - while diff_record == DIFFRECORD_PENDING:
|
| - time.sleep(1)
|
| - diff_record = self._diff_dict[key]
|
| -
|
| - # Once we have the result...
|
| - if diff_record == DIFFRECORD_FAILED:
|
| - logging.error(
|
| - 'failed to create a DiffRecord for expected_image_locator=%s , '
|
| - 'actual_image_locator=%s' % (
|
| - expected_image_locator, actual_image_locator))
|
| - return None
|
| - else:
|
| - return diff_record
|
| + Raises a KeyError if we don't have a DiffRecord for this image pair.
|
| + """
|
| + key = (_sanitize_locator(expected_image_locator),
|
| + _sanitize_locator(actual_image_locator))
|
| + return self._diff_dict[key]
|
|
|
|
|
| # Utility functions
|
| @@ -470,28 +374,11 @@
|
|
|
| Returns: a PIL image object
|
| """
|
| - global global_file_collisions
|
| if not os.path.exists(local_filepath):
|
| _mkdir_unless_exists(os.path.dirname(local_filepath))
|
| with contextlib.closing(urllib.urlopen(url)) as url_handle:
|
| -
|
| - # First download the file contents into a unique filename, and
|
| - # then rename that file. That way, if multiple threads are downloading
|
| - # the same filename at the same time, they won't interfere with each
|
| - # other (they will both download the file, and one will "win" in the end)
|
| - temp_filename = '%s-%d' % (local_filepath,
|
| - threading.current_thread().ident)
|
| - with open(temp_filename, 'wb') as file_handle:
|
| + with open(local_filepath, 'wb') as file_handle:
|
| shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
|
| -
|
| - # Keep count of how many colliding downloads we encounter;
|
| - # if it's a large number, we may want to change our download strategy
|
| - # to minimize repeated downloads.
|
| - if os.path.exists(local_filepath):
|
| - global_file_collisions += 1
|
| - else:
|
| - os.rename(temp_filename, local_filepath)
|
| -
|
| return _open_image(local_filepath)
|
|
|
|
|
| @@ -532,11 +419,8 @@
|
| Args:
|
| path: path on local disk
|
| """
|
| - try:
|
| + if not os.path.isdir(path):
|
| os.makedirs(path)
|
| - except OSError as e:
|
| - if e.errno == errno.EEXIST:
|
| - pass
|
|
|
|
|
| def _sanitize_locator(locator):
|
| @@ -549,21 +433,6 @@
|
| return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
|
|
|
|
|
| -def _generate_key(expected_image_locator, actual_image_locator):
|
| - """Returns a key suitable for looking up this image pair.
|
| -
|
| - Args:
|
| - expected_image_locator: a unique ID string under which we will store the
|
| - expected image within storage_root (probably including a checksum to
|
| - guarantee uniqueness)
|
| - actual_image_locator: a unique ID string under which we will store the
|
| - actual image within storage_root (probably including a checksum to
|
| - guarantee uniqueness)
|
| - """
|
| - return (_sanitize_locator(expected_image_locator),
|
| - _sanitize_locator(actual_image_locator))
|
| -
|
| -
|
| def _get_difference_locator(expected_image_locator, actual_image_locator):
|
| """Returns the locator string used to look up the diffs between expected_image
|
| and actual_image.
|
|
|