OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 |
| 3 """ |
| 4 Copyright 2013 Google Inc. |
| 5 |
| 6 Use of this source code is governed by a BSD-style license that can be |
| 7 found in the LICENSE file. |
| 8 |
| 9 Calulate differences between image pairs, and store them in a database. |
| 10 """ |
| 11 |
| 12 import contextlib |
| 13 import logging |
| 14 import os |
| 15 import shutil |
| 16 import urllib |
| 17 try: |
| 18 from PIL import Image, ImageChops |
| 19 except ImportError: |
| 20 raise ImportError('Requires PIL to be installed; see ' |
| 21 + 'http://www.pythonware.com/products/pil/') |
| 22 |
| 23 IMAGE_SUFFIX = '.png' |
| 24 IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at |
| 25 # http://effbot.org/imagingbook/formats.htm |
| 26 |
| 27 IMAGES_SUBDIR = 'images' |
| 28 DIFFS_SUBDIR = 'diffs' |
| 29 WHITEDIFFS_SUBDIR = 'whitediffs' |
| 30 |
| 31 |
| 32 class DiffRecord(object): |
| 33 """ Record of differences between two images. """ |
| 34 |
| 35 def __init__(self, storage_root, |
| 36 expected_image_url, expected_image_locator, |
| 37 actual_image_url, actual_image_locator): |
| 38 """Download this pair of images (unless we already have them on local disk), |
| 39 and prepare a DiffRecord for them. |
| 40 |
| 41 TODO(epoger): Make this asynchronously download images, rather than blocking |
| 42 until the images have been downloaded and processed. |
| 43 |
| 44 Args: |
| 45 storage_root: root directory on local disk within which we store all |
| 46 images |
| 47 expected_image_url: file or HTTP url from which we will download the |
| 48 expected image |
| 49 expected_image_locator: a unique ID string under which we will store the |
| 50 expected image within storage_root (probably including a checksum to |
| 51 guarantee uniqueness) |
| 52 actual_image_url: file or HTTP url from which we will download the |
| 53 actual image |
| 54 actual_image_locator: a unique ID string under which we will store the |
| 55 actual image within storage_root (probably including a checksum to |
| 56 guarantee uniqueness) |
| 57 """ |
| 58 # Download the expected/actual images, if we don't have them already. |
| 59 expected_image = _download_and_open_image( |
| 60 os.path.join(storage_root, IMAGES_SUBDIR, |
| 61 str(expected_image_locator) + IMAGE_SUFFIX), |
| 62 expected_image_url) |
| 63 actual_image = _download_and_open_image( |
| 64 os.path.join(storage_root, IMAGES_SUBDIR, |
| 65 str(actual_image_locator) + IMAGE_SUFFIX), |
| 66 actual_image_url) |
| 67 |
| 68 # Store the diff image (absolute diff at each pixel). |
| 69 diff_image = _generate_image_diff(actual_image, expected_image) |
| 70 self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image) |
| 71 diff_image_locator = _get_difference_locator( |
| 72 expected_image_locator=expected_image_locator, |
| 73 actual_image_locator=actual_image_locator) |
| 74 diff_image_filepath = os.path.join( |
| 75 storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) |
| 76 _mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) |
| 77 diff_image.save(diff_image_filepath, IMAGE_FORMAT) |
| 78 |
| 79 # Store the whitediff image (any differing pixels show as white). |
| 80 # |
| 81 # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems |
| 82 # like we should be able to use im.point(function, mode) to perform both |
| 83 # the point() and convert('1') operations simultaneously, but I couldn't |
| 84 # get it to work. |
| 85 whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) |
| 86 .convert('1')) |
| 87 whitediff_image_filepath = os.path.join( |
| 88 storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) |
| 89 _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) |
| 90 whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) |
| 91 |
| 92 # Calculate difference metrics. |
| 93 (self._width, self._height) = diff_image.size |
| 94 self._num_pixels_differing = whitediff_image.histogram()[255] |
| 95 |
| 96 def get_num_pixels_differing(self): |
| 97 """Returns the absolute number of pixels that differ.""" |
| 98 return self._num_pixels_differing |
| 99 |
| 100 def get_percent_pixels_differing(self): |
| 101 """Returns the percentage of pixels that differ, as a float between |
| 102 0 and 100 (inclusive).""" |
| 103 return ((float(self._num_pixels_differing) * 100) / |
| 104 (self._width * self._height)) |
| 105 |
| 106 def get_weighted_diff_measure(self): |
| 107 """Returns a weighted measure of image diffs, as a float between 0 and 100 |
| 108 (inclusive).""" |
| 109 return self._weighted_diff_measure |
| 110 |
| 111 |
| 112 class ImageDiffDB(object): |
| 113 """ Calculates differences between image pairs, maintaining a database of |
| 114 them for download.""" |
| 115 |
| 116 def __init__(self, storage_root): |
| 117 """ |
| 118 Args: |
| 119 storage_root: string; root path within the DB will store all of its stuff |
| 120 """ |
| 121 self._storage_root = storage_root |
| 122 |
| 123 # Dictionary of DiffRecords, keyed by (expected_image_locator, |
| 124 # actual_image_locator) tuples. |
| 125 self._diff_dict = {} |
| 126 |
| 127 def add_image_pair(self, |
| 128 expected_image_url, expected_image_locator, |
| 129 actual_image_url, actual_image_locator): |
| 130 """Download this pair of images (unless we already have them on local disk), |
| 131 and prepare a DiffRecord for them. |
| 132 |
| 133 TODO(epoger): Make this asynchronously download images, rather than blocking |
| 134 until the images have been downloaded and processed. |
| 135 When we do that, we should probably add a new method that will block |
| 136 until all of the images have been downloaded and processed. Otherwise, |
| 137 we won't know when it's safe to start calling get_diff_record(). |
| 138 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a |
| 139 thread-pool/worker queue at a higher level that just uses ImageDiffDB? |
| 140 |
| 141 Args: |
| 142 expected_image_url: file or HTTP url from which we will download the |
| 143 expected image |
| 144 expected_image_locator: a unique ID string under which we will store the |
| 145 expected image within storage_root (probably including a checksum to |
| 146 guarantee uniqueness) |
| 147 actual_image_url: file or HTTP url from which we will download the |
| 148 actual image |
| 149 actual_image_locator: a unique ID string under which we will store the |
| 150 actual image within storage_root (probably including a checksum to |
| 151 guarantee uniqueness) |
| 152 """ |
| 153 key = (expected_image_locator, actual_image_locator) |
| 154 if not key in self._diff_dict: |
| 155 try: |
| 156 new_diff_record = DiffRecord( |
| 157 self._storage_root, |
| 158 expected_image_url=expected_image_url, |
| 159 expected_image_locator=expected_image_locator, |
| 160 actual_image_url=actual_image_url, |
| 161 actual_image_locator=actual_image_locator) |
| 162 except: |
| 163 logging.exception('got exception while creating new DiffRecord') |
| 164 return |
| 165 self._diff_dict[key] = new_diff_record |
| 166 |
| 167 def get_diff_record(self, expected_image_locator, actual_image_locator): |
| 168 """Returns the DiffRecord for this image pair. |
| 169 |
| 170 Raises a KeyError if we don't have a DiffRecord for this image pair. |
| 171 """ |
| 172 key = (expected_image_locator, actual_image_locator) |
| 173 return self._diff_dict[key] |
| 174 |
| 175 |
| 176 # Utility functions |
| 177 |
| 178 def _calculate_weighted_diff_metric(image): |
| 179 """Given a diff image (per-channel diff at each pixel between two images), |
| 180 calculate the weighted diff metric (a stab at how different the two images |
| 181 really are). |
| 182 |
| 183 Args: |
| 184 image: PIL image; a per-channel diff between two images |
| 185 |
| 186 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). |
| 187 """ |
| 188 # TODO(epoger): This is just a wild guess at an appropriate metric. |
| 189 # In the long term, we will probably use some metric generated by |
| 190 # skpdiff anyway. |
| 191 (width, height) = image.size |
| 192 maxdiff = 3 * (width * height) * 255**2 |
| 193 h = image.histogram() |
| 194 assert(len(h) % 256 == 0) |
| 195 totaldiff = sum(map(lambda index,value: value * (index%256)**2, |
| 196 range(len(h)), h)) |
| 197 return float(100 * totaldiff) / maxdiff |
| 198 |
| 199 def _generate_image_diff(image1, image2): |
| 200 """Wrapper for ImageChops.difference(image1, image2) that will handle some |
| 201 errors automatically, or at least yield more useful error messages. |
| 202 |
| 203 TODO(epoger): Currently, some of the images generated by the bots are RGBA |
| 204 and others are RGB. I'm not sure why that is. For now, to avoid confusion |
| 205 within the UI, convert all to RGB when diffing. |
| 206 |
| 207 Args: |
| 208 image1: a PIL image object |
| 209 image2: a PIL image object |
| 210 |
| 211 Returns: per-pixel diffs between image1 and image2, as a PIL image object |
| 212 """ |
| 213 try: |
| 214 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) |
| 215 except ValueError: |
| 216 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( |
| 217 repr(image1), repr(image2))) |
| 218 raise |
| 219 |
| 220 def _download_and_open_image(local_filepath, url): |
| 221 """Open the image at local_filepath; if there is no file at that path, |
| 222 download it from url to that path and then open it. |
| 223 |
| 224 Args: |
| 225 local_filepath: path on local disk where the image should be stored |
| 226 url: URL from which we can download the image if we don't have it yet |
| 227 |
| 228 Returns: a PIL image object |
| 229 """ |
| 230 if not os.path.exists(local_filepath): |
| 231 _mkdir_unless_exists(os.path.dirname(local_filepath)) |
| 232 with contextlib.closing(urllib.urlopen(url)) as url_handle: |
| 233 with open(local_filepath, 'wb') as file_handle: |
| 234 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
| 235 return _open_image(local_filepath) |
| 236 |
| 237 def _open_image(filepath): |
| 238 """Wrapper for Image.open(filepath) that yields more useful error messages. |
| 239 |
| 240 Args: |
| 241 filepath: path on local disk to load image from |
| 242 |
| 243 Returns: a PIL image object |
| 244 """ |
| 245 try: |
| 246 return Image.open(filepath) |
| 247 except IOError: |
| 248 logging.error('IOError loading image file %s' % filepath) |
| 249 raise |
| 250 |
| 251 def _mkdir_unless_exists(path): |
| 252 """Unless path refers to an already-existing directory, create it. |
| 253 |
| 254 Args: |
| 255 path: path on local disk |
| 256 """ |
| 257 if not os.path.isdir(path): |
| 258 os.makedirs(path) |
| 259 |
| 260 def _get_difference_locator(expected_image_locator, actual_image_locator): |
| 261 """Returns the locator string used to look up the diffs between expected_image |
| 262 and actual_image. |
| 263 |
| 264 Args: |
| 265 expected_image_locator: locator string pointing at expected image |
| 266 actual_image_locator: locator string pointing at actual image |
| 267 |
| 268 Returns: locator where the diffs between expected and actual images can be |
| 269 found |
| 270 """ |
| 271 return "%s-vs-%s" % (expected_image_locator, actual_image_locator) |
OLD | NEW |