Chromium Code Reviews| Index: gm/rebaseline_server/imagediffdb.py |
| =================================================================== |
| --- gm/rebaseline_server/imagediffdb.py (revision 0) |
| +++ gm/rebaseline_server/imagediffdb.py (revision 0) |
| @@ -0,0 +1,264 @@ |
| +#!/usr/bin/python |
| + |
| +""" |
| +Copyright 2013 Google Inc. |
| + |
| +Use of this source code is governed by a BSD-style license that can be |
| +found in the LICENSE file. |
| + |
| +Calulate differences between image pairs, and store them in a database. |
| +Requires PIL to be installed; see http://www.pythonware.com/products/pil/ |
|
rmistry
2013/11/06 19:12:52
Optional: You could also output this line by wrapp
epoger
2013/11/07 21:11:53
Done.
|
| +""" |
| + |
| +# System-level imports |
|
rmistry
2013/11/06 19:12:52
Nit: Not required since you do not have any other
epoger
2013/11/07 21:11:53
Done.
|
| +import contextlib |
| +import logging |
| +import os |
| +import urllib |
| +from cStringIO import StringIO |
|
jcgregorio
2013/11/06 18:47:28
unused import
epoger
2013/11/07 21:11:53
Done.
|
| +from PIL import Image, ImageChops |
| + |
| + |
| +IMAGE_SUFFIX = '.png' |
| +IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at |
| + # http://effbot.org/imagingbook/formats.htm |
| + |
| +IMAGES_SUBDIR = 'images' |
| +DIFFS_SUBDIR = 'diffs' |
| +WHITEDIFFS_SUBDIR = 'whitediffs' |
| + |
|
jcgregorio
2013/11/06 18:47:28
2 lines
epoger
2013/11/07 21:11:53
Done.
|
| +class DiffRecord(object): |
| + """ Record of differences between two images. """ |
| + |
| + def __init__(self, storage_root, |
| + expected_image_url, expected_image_locator, |
| + actual_image_url, actual_image_locator): |
| + """Download this pair of images (unless we already have them on local disk), |
| + and prepare a DiffRecord for them. |
| + |
| + TODO(epoger): Make this asynchronously download images, rather than blocking |
| + until the images have been downloaded and processed. |
| + |
| + Args: |
| + storage_root: root directory on local disk within which we store all |
| + images |
| + expected_image_url: file or HTTP url from which we will download the |
| + expected image |
| + expected_image_locator: a unique ID string under which we will store the |
| + expected image within storage_root (probably including a checksum to |
| + guarantee uniqueness) |
| + actual_image_url: file or HTTP url from which we will download the |
| + actual image |
| + actual_image_locator: a unique ID string under which we will store the |
| + actual image within storage_root (probably including a checksum to |
| + guarantee uniqueness) |
| + """ |
| + # Download the expected/actual images, if we don't have them already. |
| + mkdir_unless_exists(os.path.join(storage_root, IMAGES_SUBDIR)) |
|
jcgregorio
2013/11/06 18:47:28
Repeated code 58-69. Can you create a function loa
epoger
2013/11/07 21:11:53
Done.
|
| + expected_image_filepath = os.path.join( |
| + storage_root, IMAGES_SUBDIR, str(expected_image_locator) + IMAGE_SUFFIX) |
| + actual_image_filepath = os.path.join( |
| + storage_root, IMAGES_SUBDIR, str(actual_image_locator) + IMAGE_SUFFIX) |
| + download_file_unless_exists( |
| + source_url=expected_image_url, dest_filepath=expected_image_filepath) |
| + download_file_unless_exists( |
| + source_url=actual_image_url, dest_filepath=actual_image_filepath) |
| + |
| + # Read in expected/actual images. |
| + expected_image = Image.open(expected_image_filepath) |
| + actual_image = Image.open(actual_image_filepath) |
| + |
| + # Store the diff image (absolute diff at each pixel). |
| + diff_image = generate_image_diff(actual_image, expected_image) |
| + self._weighted_diff_measure = calculate_weighted_diff_metric(diff_image) |
| + diff_image_locator = get_difference_locator( |
| + expected_image_locator=expected_image_locator, |
| + actual_image_locator=actual_image_locator) |
| + diff_image_filepath = os.path.join( |
| + storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) |
| + mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) |
| + diff_image.save(diff_image_filepath, IMAGE_FORMAT) |
| + |
| + # Store the whitediff image (any differing pixels show as white). |
| + # |
| + # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems |
| + # like we should be able to use im.point(function, mode) to perform both |
| + # the point() and convert('1') operations simultaneously, but I couldn't |
| + # get it to work. |
| + whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) |
| + .convert('1')) |
| + whitediff_image_filepath = os.path.join( |
| + storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) |
| + mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) |
| + whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) |
| + |
| + # Calculate difference metrics. |
| + (self._width, self._height) = diff_image.size |
| + self._num_pixels_differing = whitediff_image.histogram()[255] |
| + |
| + def get_num_pixels_differing(self): |
| + """Returns the absolute number of pixels that differ.""" |
| + return self._num_pixels_differing |
| + |
| + def get_percent_pixels_differing(self): |
| + """Returns the percentage of pixels that differ, as a float between |
| + 0 and 100 (inclusive).""" |
| + return ((float(self._num_pixels_differing) * 100) / |
| + (self._width * self._height)) |
| + |
| + def get_weighted_diff_measure(self): |
| + """Returns a weighted measure of image diffs, as a float between 0 and 100 |
| + (inclusive).""" |
| + return self._weighted_diff_measure |
| + |
| + |
| +class ImageDiffDB(object): |
| + """ Calculates differences between image pairs, maintaining a database of |
| + them for download.""" |
| + |
| + def __init__(self, storage_root): |
| + """ |
| + Args: |
| + storage_root: string; root path within the DB will store all of its stuff |
| + """ |
| + self._storage_root = storage_root |
| + |
| + # Dictionary of DiffRecords, keyed by (expected_image_locator, |
| + # actual_image_locator) tuples. |
| + self._diff_dict = {} |
| + |
| + def add_image_pair(self, |
| + expected_image_url, expected_image_locator, |
| + actual_image_url, actual_image_locator): |
| + """Download this pair of images (unless we already have them on local disk), |
| + and prepare a DiffRecord for them. |
| + |
| + TODO(epoger): Make this asynchronously download images, rather than blocking |
|
jcgregorio
2013/11/06 18:47:28
I don't know if the async belongs in at this level
epoger
2013/11/07 21:11:53
Added to TODO, thanks.
|
| + until the images have been downloaded and processed. |
| + When we do that, we should probably add a new method that will block |
| + until all of the images have been downloaded and processed. Otherwise, |
| + we won't know when it's safe to start calling get_diff_record(). |
| + |
| + Args: |
| + expected_image_url: file or HTTP url from which we will download the |
| + expected image |
| + expected_image_locator: a unique ID string under which we will store the |
| + expected image within storage_root (probably including a checksum to |
| + guarantee uniqueness) |
| + actual_image_url: file or HTTP url from which we will download the |
| + actual image |
| + actual_image_locator: a unique ID string under which we will store the |
| + actual image within storage_root (probably including a checksum to |
| + guarantee uniqueness) |
| + """ |
| + key = (expected_image_locator, actual_image_locator) |
| + if not self._diff_dict.get(key): |
|
jcgregorio
2013/11/06 18:47:28
if not key in self._diff_dict:
epoger
2013/11/07 21:11:53
Done.
|
| + self._diff_dict[key] = DiffRecord( |
| + self._storage_root, |
| + expected_image_url=expected_image_url, |
| + expected_image_locator=expected_image_locator, |
| + actual_image_url=actual_image_url, |
| + actual_image_locator=actual_image_locator) |
| + |
| + def get_diff_record(self, expected_image_locator, actual_image_locator): |
| + """Returns the DiffRecord for this image pair. |
| + |
| + Raises a KeyError if we don't have a DiffRecord for this image pair. |
| + """ |
| + key = (expected_image_locator, actual_image_locator) |
| + return self._diff_dict[key] |
| + |
| + |
| +# Utility functions |
| + |
| +def calculate_weighted_diff_metric(image): |
| + """Given a diff image (per-channel diff at each pixel between two images), |
| + calculate the weighted diff metric (a stab at how different the two images |
| + really are). |
| + |
| + Args: |
| + image: PIL image; a per-channel diff between two images |
| + |
| + Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). |
| + """ |
| + # TODO(epoger): This is just a wild guess at an appropriate metric. |
| + # In the long term, we will probably use some metric generated by |
| + # skpdiff anyway. |
| + (width, height) = image.size |
| + maxdiff = 3 * (width * height) * 255**2 |
| + h = image.histogram() |
| + assert(len(h) % 256 == 0) |
| + totaldiff = sum(map(lambda index,value: value * (index%256)**2, |
| + range(len(h)), h)) |
| + return float(100 * totaldiff) / maxdiff |
| + |
| +def generate_image_diff(image1, image2): |
|
jcgregorio
2013/11/06 18:47:28
Document args and returns, here and below.
epoger
2013/11/07 21:11:53
Done.
|
| + """Wrapper for ImageChops.difference(image1, image2) that will handle some |
| + errors automatically, or at least yield more useful error messages. |
| + |
| + TODO(epoger): Currently, some of the images generated by the bots are RGBA |
| + and others are RGB. I'm not sure why that is. For now, to avoid confusion |
| + within the UI, convert all to RGB when diffing. |
| + """ |
| + try: |
| + return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) |
| + except ValueError: |
| + logging.error('Error diffing image1 [%s] and image2 [%s].' % ( |
| + repr(image1), repr(image2))) |
| + raise |
| + |
| +def mkdir_unless_exists(path): |
|
rmistry
2013/11/06 19:12:52
Make it private? this and some of the other top le
epoger
2013/11/07 21:11:53
Done.
|
| + """Unless path refers to an already-existing directory, create it.""" |
| + if not os.path.isdir(path): |
| + os.makedirs(path) |
| + |
| +def download_file_unless_exists(source_url, dest_filepath): |
| + """Downloads the file from source_url, storing it at dest_filepath, |
| + UNLESS there is already a file at dest_filepath (in which case we do |
| + nothing.""" |
| + if not os.path.exists(dest_filepath): |
| + with contextlib.closing(urllib.urlopen(source_url)) as url_handle: |
| + with open(dest_filepath, 'wb') as file_handle: |
| + file_handle.write(url_handle.read()) |
|
jcgregorio
2013/11/06 18:47:28
Consider shutil.copyfileobj for this:
http://docs
epoger
2013/11/07 21:11:53
Thanks, good advice! See http://stackoverflow.com
|
| + |
| +def get_difference_locator(expected_image_locator, actual_image_locator): |
| + """Returns the locator string used to look up the diffs between expected_image |
| + and actual_image.""" |
| + return "%s-vs-%s" % (expected_image_locator, actual_image_locator) |
| + |
| + |
| +# Test harness |
| +def main(): |
| + logging.basicConfig(level=logging.INFO) |
|
jcgregorio
2013/11/06 18:47:28
Break tests out into a separate imagediffdb_test.p
epoger
2013/11/07 21:11:53
Done.
|
| + |
| + # params for each self-test: |
| + # 0. expected image locator |
| + # 1. expected image URL |
| + # 2. actual image locator |
| + # 3. actual image URL |
| + # 4. expected percent_pixels_differing (as a string, to 4 decimal places) |
| + # 5. expected weighted_diff_measure (as a string, to 4 decimal places) |
| + selftests = [ |
| + ['16206093933823793653', 'http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/arcofzorro/16206093933823793653.png', |
| + '13786535001616823825', 'http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/arcofzorro/13786535001616823825.png', |
| + '0.0653', '0.0113'], |
| + ] |
| + |
| + # Add all image pairs to the database |
| + db = ImageDiffDB('/tmp/ImageDiffDB') |
| + for selftest in selftests: |
| + retval = db.add_image_pair( |
| + expected_image_locator=selftest[0], expected_image_url=selftest[1], |
| + actual_image_locator=selftest[2], actual_image_url=selftest[3]) |
| + |
| + # Fetch each image pair from the database |
| + for selftest in selftests: |
| + record = db.get_diff_record(expected_image_locator=selftest[0], |
| + actual_image_locator=selftest[2]) |
| + assert (('%.4f' % record.get_percent_pixels_differing()) == selftest[4]) |
| + assert (('%.4f' % record.get_weighted_diff_measure()) == selftest[5]) |
| + |
| + logging.info("Self-test completed successfully!") |
| + |
|
jcgregorio
2013/11/06 18:47:28
2 lines
epoger
2013/11/07 21:11:53
Done (in the new test file)
|
| +if __name__ == '__main__': |
| + main() |
| Property changes on: gm/rebaseline_server/imagediffdb.py |
| ___________________________________________________________________ |
| Added: svn:executable |
| + * |