Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 | |
| 3 """ | |
| 4 Copyright 2013 Google Inc. | |
| 5 | |
| 6 Use of this source code is governed by a BSD-style license that can be | |
| 7 found in the LICENSE file. | |
| 8 | |
| 9 Calulate differences between image pairs, and store them in a database. | |
| 10 """ | |
| 11 | |
| 12 import contextlib | |
| 13 import logging | |
| 14 import os | |
| 15 import shutil | |
| 16 import urllib | |
| 17 try: | |
| 18 from PIL import Image, ImageChops | |
| 19 except ImportError: | |
| 20 raise ImportError('Requires PIL to be installed; see ' | |
| 21 + 'http://www.pythonware.com/products/pil/') | |
| 22 | |
| 23 IMAGE_SUFFIX = '.png' | |
| 24 IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at | |
| 25 # http://effbot.org/imagingbook/formats.htm | |
| 26 | |
| 27 IMAGES_SUBDIR = 'images' | |
| 28 DIFFS_SUBDIR = 'diffs' | |
| 29 WHITEDIFFS_SUBDIR = 'whitediffs' | |
| 30 | |
| 31 | |
| 32 class DiffRecord(object): | |
| 33 """ Record of differences between two images. """ | |
| 34 | |
| 35 def __init__(self, storage_root, | |
| 36 expected_image_url, expected_image_locator, | |
| 37 actual_image_url, actual_image_locator): | |
| 38 """Download this pair of images (unless we already have them on local disk), | |
| 39 and prepare a DiffRecord for them. | |
| 40 | |
| 41 TODO(epoger): Make this asynchronously download images, rather than blocking | |
| 42 until the images have been downloaded and processed. | |
| 43 | |
| 44 Args: | |
| 45 storage_root: root directory on local disk within which we store all | |
| 46 images | |
| 47 expected_image_url: file or HTTP url from which we will download the | |
| 48 expected image | |
| 49 expected_image_locator: a unique ID string under which we will store the | |
| 50 expected image within storage_root (probably including a checksum to | |
| 51 guarantee uniqueness) | |
| 52 actual_image_url: file or HTTP url from which we will download the | |
| 53 actual image | |
| 54 actual_image_locator: a unique ID string under which we will store the | |
| 55 actual image within storage_root (probably including a checksum to | |
| 56 guarantee uniqueness) | |
| 57 """ | |
| 58 # Download the expected/actual images, if we don't have them already. | |
| 59 expected_image = _download_and_open_image( | |
| 60 os.path.join(storage_root, IMAGES_SUBDIR, | |
| 61 str(expected_image_locator) + IMAGE_SUFFIX), | |
| 62 expected_image_url) | |
| 63 actual_image = _download_and_open_image( | |
| 64 os.path.join(storage_root, IMAGES_SUBDIR, | |
| 65 str(actual_image_locator) + IMAGE_SUFFIX), | |
| 66 actual_image_url) | |
| 67 | |
| 68 # Store the diff image (absolute diff at each pixel). | |
| 69 diff_image = _generate_image_diff(actual_image, expected_image) | |
| 70 self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image) | |
| 71 diff_image_locator = _get_difference_locator( | |
| 72 expected_image_locator=expected_image_locator, | |
| 73 actual_image_locator=actual_image_locator) | |
| 74 diff_image_filepath = os.path.join( | |
| 75 storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
| 76 _mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) | |
| 77 diff_image.save(diff_image_filepath, IMAGE_FORMAT) | |
| 78 | |
| 79 # Store the whitediff image (any differing pixels show as white). | |
| 80 # | |
| 81 # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems | |
| 82 # like we should be able to use im.point(function, mode) to perform both | |
| 83 # the point() and convert('1') operations simultaneously, but I couldn't | |
| 84 # get it to work. | |
| 85 whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) | |
| 86 .convert('1')) | |
| 87 whitediff_image_filepath = os.path.join( | |
| 88 storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
| 89 _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) | |
| 90 whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) | |
| 91 | |
| 92 # Calculate difference metrics. | |
| 93 (self._width, self._height) = diff_image.size | |
| 94 self._num_pixels_differing = whitediff_image.histogram()[255] | |
| 95 | |
| 96 def get_num_pixels_differing(self): | |
| 97 """Returns the absolute number of pixels that differ.""" | |
| 98 return self._num_pixels_differing | |
| 99 | |
| 100 def get_percent_pixels_differing(self): | |
| 101 """Returns the percentage of pixels that differ, as a float between | |
| 102 0 and 100 (inclusive).""" | |
| 103 return ((float(self._num_pixels_differing) * 100) / | |
| 104 (self._width * self._height)) | |
| 105 | |
| 106 def get_weighted_diff_measure(self): | |
| 107 """Returns a weighted measure of image diffs, as a float between 0 and 100 | |
| 108 (inclusive).""" | |
| 109 return self._weighted_diff_measure | |
| 110 | |
| 111 | |
| 112 class ImageDiffDB(object): | |
| 113 """ Calculates differences between image pairs, maintaining a database of | |
| 114 them for download.""" | |
| 115 | |
| 116 def __init__(self, storage_root): | |
| 117 """ | |
| 118 Args: | |
| 119 storage_root: string; root path within the DB will store all of its stuff | |
| 120 """ | |
| 121 self._storage_root = storage_root | |
| 122 | |
| 123 # Dictionary of DiffRecords, keyed by (expected_image_locator, | |
| 124 # actual_image_locator) tuples. | |
| 125 self._diff_dict = {} | |
| 126 | |
| 127 def add_image_pair(self, | |
| 128 expected_image_url, expected_image_locator, | |
| 129 actual_image_url, actual_image_locator): | |
| 130 """Download this pair of images (unless we already have them on local disk), | |
| 131 and prepare a DiffRecord for them. | |
| 132 | |
| 133 TODO(epoger): Make this asynchronously download images, rather than blocking | |
| 134 until the images have been downloaded and processed. | |
| 135 When we do that, we should probably add a new method that will block | |
| 136 until all of the images have been downloaded and processed. Otherwise, | |
| 137 we won't know when it's safe to start calling get_diff_record(). | |
| 138 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a | |
| 139 thread-pool/worker queue at a higher level that just uses ImageDiffDB? | |
| 140 | |
| 141 Args: | |
| 142 expected_image_url: file or HTTP url from which we will download the | |
| 143 expected image | |
| 144 expected_image_locator: a unique ID string under which we will store the | |
| 145 expected image within storage_root (probably including a checksum to | |
| 146 guarantee uniqueness) | |
| 147 actual_image_url: file or HTTP url from which we will download the | |
| 148 actual image | |
| 149 actual_image_locator: a unique ID string under which we will store the | |
| 150 actual image within storage_root (probably including a checksum to | |
| 151 guarantee uniqueness) | |
| 152 """ | |
| 153 key = (expected_image_locator, actual_image_locator) | |
| 154 if not key in self._diff_dict: | |
| 155 try: | |
| 156 new_diff_record = DiffRecord( | |
| 157 self._storage_root, | |
| 158 expected_image_url=expected_image_url, | |
| 159 expected_image_locator=expected_image_locator, | |
| 160 actual_image_url=actual_image_url, | |
| 161 actual_image_locator=actual_image_locator) | |
| 162 except: | |
| 163 logging.exception('got exception while creating new DiffRecord') | |
|
epoger
2013/11/07 21:11:54
As I was testing these changes, I started getting
| |
| 164 return | |
| 165 self._diff_dict[key] = new_diff_record | |
| 166 | |
| 167 def get_diff_record(self, expected_image_locator, actual_image_locator): | |
| 168 """Returns the DiffRecord for this image pair. | |
| 169 | |
| 170 Raises a KeyError if we don't have a DiffRecord for this image pair. | |
| 171 """ | |
| 172 key = (expected_image_locator, actual_image_locator) | |
| 173 return self._diff_dict[key] | |
| 174 | |
| 175 | |
| 176 # Utility functions | |
| 177 | |
| 178 def _calculate_weighted_diff_metric(image): | |
| 179 """Given a diff image (per-channel diff at each pixel between two images), | |
| 180 calculate the weighted diff metric (a stab at how different the two images | |
| 181 really are). | |
| 182 | |
| 183 Args: | |
| 184 image: PIL image; a per-channel diff between two images | |
| 185 | |
| 186 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). | |
| 187 """ | |
| 188 # TODO(epoger): This is just a wild guess at an appropriate metric. | |
| 189 # In the long term, we will probably use some metric generated by | |
| 190 # skpdiff anyway. | |
| 191 (width, height) = image.size | |
| 192 maxdiff = 3 * (width * height) * 255**2 | |
| 193 h = image.histogram() | |
| 194 assert(len(h) % 256 == 0) | |
| 195 totaldiff = sum(map(lambda index,value: value * (index%256)**2, | |
| 196 range(len(h)), h)) | |
| 197 return float(100 * totaldiff) / maxdiff | |
| 198 | |
| 199 def _generate_image_diff(image1, image2): | |
| 200 """Wrapper for ImageChops.difference(image1, image2) that will handle some | |
| 201 errors automatically, or at least yield more useful error messages. | |
| 202 | |
| 203 TODO(epoger): Currently, some of the images generated by the bots are RGBA | |
| 204 and others are RGB. I'm not sure why that is. For now, to avoid confusion | |
| 205 within the UI, convert all to RGB when diffing. | |
| 206 | |
| 207 Args: | |
| 208 image1: a PIL image object | |
| 209 image2: a PIL image object | |
| 210 | |
| 211 Returns: per-pixel diffs between image1 and image2, as a PIL image object | |
| 212 """ | |
| 213 try: | |
| 214 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) | |
| 215 except ValueError: | |
| 216 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( | |
| 217 repr(image1), repr(image2))) | |
| 218 raise | |
| 219 | |
| 220 def _download_and_open_image(local_filepath, url): | |
| 221 """Open the image at local_filepath; if there is no file at that path, | |
| 222 download it from url to that path and then open it. | |
| 223 | |
| 224 Args: | |
| 225 local_filepath: path on local disk where the image should be stored | |
| 226 url: URL from which we can download the image if we don't have it yet | |
| 227 | |
| 228 Returns: a PIL image object | |
| 229 """ | |
| 230 if not os.path.exists(local_filepath): | |
| 231 _mkdir_unless_exists(os.path.dirname(local_filepath)) | |
| 232 with contextlib.closing(urllib.urlopen(url)) as url_handle: | |
| 233 with open(local_filepath, 'wb') as file_handle: | |
| 234 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | |
| 235 return _open_image(local_filepath) | |
| 236 | |
| 237 def _open_image(filepath): | |
| 238 """Wrapper for Image.open(filepath) that yields more useful error messages. | |
| 239 | |
| 240 Args: | |
| 241 filepath: path on local disk to load image from | |
| 242 | |
| 243 Returns: a PIL image object | |
| 244 """ | |
| 245 try: | |
| 246 return Image.open(filepath) | |
| 247 except IOError: | |
| 248 logging.error('IOError loading image file %s' % filepath) | |
| 249 raise | |
| 250 | |
| 251 def _mkdir_unless_exists(path): | |
| 252 """Unless path refers to an already-existing directory, create it. | |
| 253 | |
| 254 Args: | |
| 255 path: path on local disk | |
| 256 """ | |
| 257 if not os.path.isdir(path): | |
| 258 os.makedirs(path) | |
| 259 | |
| 260 def _get_difference_locator(expected_image_locator, actual_image_locator): | |
| 261 """Returns the locator string used to look up the diffs between expected_image | |
| 262 and actual_image. | |
| 263 | |
| 264 Args: | |
| 265 expected_image_locator: locator string pointing at expected image | |
| 266 actual_image_locator: locator string pointing at actual image | |
| 267 | |
| 268 Returns: locator where the diffs between expected and actual images can be | |
| 269 found | |
| 270 """ | |
| 271 return "%s-vs-%s" % (expected_image_locator, actual_image_locator) | |
| OLD | NEW |