Index: gm/rebaseline_server/imagediffdb.py |
=================================================================== |
--- gm/rebaseline_server/imagediffdb.py (revision 0) |
+++ gm/rebaseline_server/imagediffdb.py (revision 0) |
@@ -0,0 +1,271 @@ |
+#!/usr/bin/python |
+ |
+""" |
+Copyright 2013 Google Inc. |
+ |
+Use of this source code is governed by a BSD-style license that can be |
+found in the LICENSE file. |
+ |
+Calulate differences between image pairs, and store them in a database. |
+""" |
+ |
+import contextlib |
+import logging |
+import os |
+import shutil |
+import urllib |
+try: |
+ from PIL import Image, ImageChops |
+except ImportError: |
+ raise ImportError('Requires PIL to be installed; see ' |
+ + 'http://www.pythonware.com/products/pil/') |
+ |
+IMAGE_SUFFIX = '.png' |
+IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at |
+ # http://effbot.org/imagingbook/formats.htm |
+ |
+IMAGES_SUBDIR = 'images' |
+DIFFS_SUBDIR = 'diffs' |
+WHITEDIFFS_SUBDIR = 'whitediffs' |
+ |
+ |
+class DiffRecord(object): |
+ """ Record of differences between two images. """ |
+ |
+ def __init__(self, storage_root, |
+ expected_image_url, expected_image_locator, |
+ actual_image_url, actual_image_locator): |
+ """Download this pair of images (unless we already have them on local disk), |
+ and prepare a DiffRecord for them. |
+ |
+ TODO(epoger): Make this asynchronously download images, rather than blocking |
+ until the images have been downloaded and processed. |
+ |
+ Args: |
+ storage_root: root directory on local disk within which we store all |
+ images |
+ expected_image_url: file or HTTP url from which we will download the |
+ expected image |
+ expected_image_locator: a unique ID string under which we will store the |
+ expected image within storage_root (probably including a checksum to |
+ guarantee uniqueness) |
+ actual_image_url: file or HTTP url from which we will download the |
+ actual image |
+ actual_image_locator: a unique ID string under which we will store the |
+ actual image within storage_root (probably including a checksum to |
+ guarantee uniqueness) |
+ """ |
+ # Download the expected/actual images, if we don't have them already. |
+ expected_image = _download_and_open_image( |
+ os.path.join(storage_root, IMAGES_SUBDIR, |
+ str(expected_image_locator) + IMAGE_SUFFIX), |
+ expected_image_url) |
+ actual_image = _download_and_open_image( |
+ os.path.join(storage_root, IMAGES_SUBDIR, |
+ str(actual_image_locator) + IMAGE_SUFFIX), |
+ actual_image_url) |
+ |
+ # Store the diff image (absolute diff at each pixel). |
+ diff_image = _generate_image_diff(actual_image, expected_image) |
+ self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image) |
+ diff_image_locator = _get_difference_locator( |
+ expected_image_locator=expected_image_locator, |
+ actual_image_locator=actual_image_locator) |
+ diff_image_filepath = os.path.join( |
+ storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) |
+ _mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) |
+ diff_image.save(diff_image_filepath, IMAGE_FORMAT) |
+ |
+ # Store the whitediff image (any differing pixels show as white). |
+ # |
+ # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems |
+ # like we should be able to use im.point(function, mode) to perform both |
+ # the point() and convert('1') operations simultaneously, but I couldn't |
+ # get it to work. |
+ whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) |
+ .convert('1')) |
+ whitediff_image_filepath = os.path.join( |
+ storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) |
+ _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) |
+ whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) |
+ |
+ # Calculate difference metrics. |
+ (self._width, self._height) = diff_image.size |
+ self._num_pixels_differing = whitediff_image.histogram()[255] |
+ |
+ def get_num_pixels_differing(self): |
+ """Returns the absolute number of pixels that differ.""" |
+ return self._num_pixels_differing |
+ |
+ def get_percent_pixels_differing(self): |
+ """Returns the percentage of pixels that differ, as a float between |
+ 0 and 100 (inclusive).""" |
+ return ((float(self._num_pixels_differing) * 100) / |
+ (self._width * self._height)) |
+ |
+ def get_weighted_diff_measure(self): |
+ """Returns a weighted measure of image diffs, as a float between 0 and 100 |
+ (inclusive).""" |
+ return self._weighted_diff_measure |
+ |
+ |
+class ImageDiffDB(object): |
+ """ Calculates differences between image pairs, maintaining a database of |
+ them for download.""" |
+ |
+ def __init__(self, storage_root): |
+ """ |
+ Args: |
+ storage_root: string; root path within the DB will store all of its stuff |
+ """ |
+ self._storage_root = storage_root |
+ |
+ # Dictionary of DiffRecords, keyed by (expected_image_locator, |
+ # actual_image_locator) tuples. |
+ self._diff_dict = {} |
+ |
+ def add_image_pair(self, |
+ expected_image_url, expected_image_locator, |
+ actual_image_url, actual_image_locator): |
+ """Download this pair of images (unless we already have them on local disk), |
+ and prepare a DiffRecord for them. |
+ |
+ TODO(epoger): Make this asynchronously download images, rather than blocking |
+ until the images have been downloaded and processed. |
+ When we do that, we should probably add a new method that will block |
+ until all of the images have been downloaded and processed. Otherwise, |
+ we won't know when it's safe to start calling get_diff_record(). |
+ jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a |
+ thread-pool/worker queue at a higher level that just uses ImageDiffDB? |
+ |
+ Args: |
+ expected_image_url: file or HTTP url from which we will download the |
+ expected image |
+ expected_image_locator: a unique ID string under which we will store the |
+ expected image within storage_root (probably including a checksum to |
+ guarantee uniqueness) |
+ actual_image_url: file or HTTP url from which we will download the |
+ actual image |
+ actual_image_locator: a unique ID string under which we will store the |
+ actual image within storage_root (probably including a checksum to |
+ guarantee uniqueness) |
+ """ |
+ key = (expected_image_locator, actual_image_locator) |
+ if not key in self._diff_dict: |
+ try: |
+ new_diff_record = DiffRecord( |
+ self._storage_root, |
+ expected_image_url=expected_image_url, |
+ expected_image_locator=expected_image_locator, |
+ actual_image_url=actual_image_url, |
+ actual_image_locator=actual_image_locator) |
+ except: |
+ logging.exception('got exception while creating new DiffRecord') |
+ return |
+ self._diff_dict[key] = new_diff_record |
+ |
+ def get_diff_record(self, expected_image_locator, actual_image_locator): |
+ """Returns the DiffRecord for this image pair. |
+ |
+ Raises a KeyError if we don't have a DiffRecord for this image pair. |
+ """ |
+ key = (expected_image_locator, actual_image_locator) |
+ return self._diff_dict[key] |
+ |
+ |
+# Utility functions |
+ |
+def _calculate_weighted_diff_metric(image): |
+ """Given a diff image (per-channel diff at each pixel between two images), |
+ calculate the weighted diff metric (a stab at how different the two images |
+ really are). |
+ |
+ Args: |
+ image: PIL image; a per-channel diff between two images |
+ |
+ Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). |
+ """ |
+ # TODO(epoger): This is just a wild guess at an appropriate metric. |
+ # In the long term, we will probably use some metric generated by |
+ # skpdiff anyway. |
+ (width, height) = image.size |
+ maxdiff = 3 * (width * height) * 255**2 |
+ h = image.histogram() |
+ assert(len(h) % 256 == 0) |
+ totaldiff = sum(map(lambda index,value: value * (index%256)**2, |
+ range(len(h)), h)) |
+ return float(100 * totaldiff) / maxdiff |
+ |
+def _generate_image_diff(image1, image2): |
+ """Wrapper for ImageChops.difference(image1, image2) that will handle some |
+ errors automatically, or at least yield more useful error messages. |
+ |
+ TODO(epoger): Currently, some of the images generated by the bots are RGBA |
+ and others are RGB. I'm not sure why that is. For now, to avoid confusion |
+ within the UI, convert all to RGB when diffing. |
+ |
+ Args: |
+ image1: a PIL image object |
+ image2: a PIL image object |
+ |
+ Returns: per-pixel diffs between image1 and image2, as a PIL image object |
+ """ |
+ try: |
+ return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) |
+ except ValueError: |
+ logging.error('Error diffing image1 [%s] and image2 [%s].' % ( |
+ repr(image1), repr(image2))) |
+ raise |
+ |
+def _download_and_open_image(local_filepath, url): |
+ """Open the image at local_filepath; if there is no file at that path, |
+ download it from url to that path and then open it. |
+ |
+ Args: |
+ local_filepath: path on local disk where the image should be stored |
+ url: URL from which we can download the image if we don't have it yet |
+ |
+ Returns: a PIL image object |
+ """ |
+ if not os.path.exists(local_filepath): |
+ _mkdir_unless_exists(os.path.dirname(local_filepath)) |
+ with contextlib.closing(urllib.urlopen(url)) as url_handle: |
+ with open(local_filepath, 'wb') as file_handle: |
+ shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
+ return _open_image(local_filepath) |
+ |
+def _open_image(filepath): |
+ """Wrapper for Image.open(filepath) that yields more useful error messages. |
+ |
+ Args: |
+ filepath: path on local disk to load image from |
+ |
+ Returns: a PIL image object |
+ """ |
+ try: |
+ return Image.open(filepath) |
+ except IOError: |
+ logging.error('IOError loading image file %s' % filepath) |
+ raise |
+ |
+def _mkdir_unless_exists(path): |
+ """Unless path refers to an already-existing directory, create it. |
+ |
+ Args: |
+ path: path on local disk |
+ """ |
+ if not os.path.isdir(path): |
+ os.makedirs(path) |
+ |
+def _get_difference_locator(expected_image_locator, actual_image_locator): |
+ """Returns the locator string used to look up the diffs between expected_image |
+ and actual_image. |
+ |
+ Args: |
+ expected_image_locator: locator string pointing at expected image |
+ actual_image_locator: locator string pointing at actual image |
+ |
+ Returns: locator where the diffs between expected and actual images can be |
+ found |
+ """ |
+ return "%s-vs-%s" % (expected_image_locator, actual_image_locator) |