Chromium Code Reviews
|
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 | |
| 3 """ | |
| 4 Copyright 2013 Google Inc. | |
| 5 | |
| 6 Use of this source code is governed by a BSD-style license that can be | |
| 7 found in the LICENSE file. | |
| 8 | |
| 9 Calulate differences between image pairs, and store them in a database. | |
| 10 Requires PIL to be installed; see http://www.pythonware.com/products/pil/ | |
|
rmistry
2013/11/06 19:12:52
Optional: You could also output this line by wrapp
epoger
2013/11/07 21:11:53
Done.
| |
| 11 """ | |
| 12 | |
| 13 # System-level imports | |
|
rmistry
2013/11/06 19:12:52
Nit: Not required since you do not have any other
epoger
2013/11/07 21:11:53
Done.
| |
| 14 import contextlib | |
| 15 import logging | |
| 16 import os | |
| 17 import urllib | |
| 18 from cStringIO import StringIO | |
|
jcgregorio
2013/11/06 18:47:28
unused import
epoger
2013/11/07 21:11:53
Done.
| |
| 19 from PIL import Image, ImageChops | |
| 20 | |
| 21 | |
| 22 IMAGE_SUFFIX = '.png' | |
| 23 IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at | |
| 24 # http://effbot.org/imagingbook/formats.htm | |
| 25 | |
| 26 IMAGES_SUBDIR = 'images' | |
| 27 DIFFS_SUBDIR = 'diffs' | |
| 28 WHITEDIFFS_SUBDIR = 'whitediffs' | |
| 29 | |
|
jcgregorio
2013/11/06 18:47:28
2 lines
epoger
2013/11/07 21:11:53
Done.
| |
| 30 class DiffRecord(object): | |
| 31 """ Record of differences between two images. """ | |
| 32 | |
| 33 def __init__(self, storage_root, | |
| 34 expected_image_url, expected_image_locator, | |
| 35 actual_image_url, actual_image_locator): | |
| 36 """Download this pair of images (unless we already have them on local disk), | |
| 37 and prepare a DiffRecord for them. | |
| 38 | |
| 39 TODO(epoger): Make this asynchronously download images, rather than blocking | |
| 40 until the images have been downloaded and processed. | |
| 41 | |
| 42 Args: | |
| 43 storage_root: root directory on local disk within which we store all | |
| 44 images | |
| 45 expected_image_url: file or HTTP url from which we will download the | |
| 46 expected image | |
| 47 expected_image_locator: a unique ID string under which we will store the | |
| 48 expected image within storage_root (probably including a checksum to | |
| 49 guarantee uniqueness) | |
| 50 actual_image_url: file or HTTP url from which we will download the | |
| 51 actual image | |
| 52 actual_image_locator: a unique ID string under which we will store the | |
| 53 actual image within storage_root (probably including a checksum to | |
| 54 guarantee uniqueness) | |
| 55 """ | |
| 56 # Download the expected/actual images, if we don't have them already. | |
| 57 mkdir_unless_exists(os.path.join(storage_root, IMAGES_SUBDIR)) | |
|
jcgregorio
2013/11/06 18:47:28
Repeated code 58-69. Can you create a function loa
epoger
2013/11/07 21:11:53
Done.
| |
| 58 expected_image_filepath = os.path.join( | |
| 59 storage_root, IMAGES_SUBDIR, str(expected_image_locator) + IMAGE_SUFFIX) | |
| 60 actual_image_filepath = os.path.join( | |
| 61 storage_root, IMAGES_SUBDIR, str(actual_image_locator) + IMAGE_SUFFIX) | |
| 62 download_file_unless_exists( | |
| 63 source_url=expected_image_url, dest_filepath=expected_image_filepath) | |
| 64 download_file_unless_exists( | |
| 65 source_url=actual_image_url, dest_filepath=actual_image_filepath) | |
| 66 | |
| 67 # Read in expected/actual images. | |
| 68 expected_image = Image.open(expected_image_filepath) | |
| 69 actual_image = Image.open(actual_image_filepath) | |
| 70 | |
| 71 # Store the diff image (absolute diff at each pixel). | |
| 72 diff_image = generate_image_diff(actual_image, expected_image) | |
| 73 self._weighted_diff_measure = calculate_weighted_diff_metric(diff_image) | |
| 74 diff_image_locator = get_difference_locator( | |
| 75 expected_image_locator=expected_image_locator, | |
| 76 actual_image_locator=actual_image_locator) | |
| 77 diff_image_filepath = os.path.join( | |
| 78 storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
| 79 mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) | |
| 80 diff_image.save(diff_image_filepath, IMAGE_FORMAT) | |
| 81 | |
| 82 # Store the whitediff image (any differing pixels show as white). | |
| 83 # | |
| 84 # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems | |
| 85 # like we should be able to use im.point(function, mode) to perform both | |
| 86 # the point() and convert('1') operations simultaneously, but I couldn't | |
| 87 # get it to work. | |
| 88 whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) | |
| 89 .convert('1')) | |
| 90 whitediff_image_filepath = os.path.join( | |
| 91 storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
| 92 mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) | |
| 93 whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) | |
| 94 | |
| 95 # Calculate difference metrics. | |
| 96 (self._width, self._height) = diff_image.size | |
| 97 self._num_pixels_differing = whitediff_image.histogram()[255] | |
| 98 | |
| 99 def get_num_pixels_differing(self): | |
| 100 """Returns the absolute number of pixels that differ.""" | |
| 101 return self._num_pixels_differing | |
| 102 | |
| 103 def get_percent_pixels_differing(self): | |
| 104 """Returns the percentage of pixels that differ, as a float between | |
| 105 0 and 100 (inclusive).""" | |
| 106 return ((float(self._num_pixels_differing) * 100) / | |
| 107 (self._width * self._height)) | |
| 108 | |
| 109 def get_weighted_diff_measure(self): | |
| 110 """Returns a weighted measure of image diffs, as a float between 0 and 100 | |
| 111 (inclusive).""" | |
| 112 return self._weighted_diff_measure | |
| 113 | |
| 114 | |
| 115 class ImageDiffDB(object): | |
| 116 """ Calculates differences between image pairs, maintaining a database of | |
| 117 them for download.""" | |
| 118 | |
| 119 def __init__(self, storage_root): | |
| 120 """ | |
| 121 Args: | |
| 122 storage_root: string; root path within the DB will store all of its stuff | |
| 123 """ | |
| 124 self._storage_root = storage_root | |
| 125 | |
| 126 # Dictionary of DiffRecords, keyed by (expected_image_locator, | |
| 127 # actual_image_locator) tuples. | |
| 128 self._diff_dict = {} | |
| 129 | |
| 130 def add_image_pair(self, | |
| 131 expected_image_url, expected_image_locator, | |
| 132 actual_image_url, actual_image_locator): | |
| 133 """Download this pair of images (unless we already have them on local disk), | |
| 134 and prepare a DiffRecord for them. | |
| 135 | |
| 136 TODO(epoger): Make this asynchronously download images, rather than blocking | |
|
jcgregorio
2013/11/06 18:47:28
I don't know if the async belongs in at this level
epoger
2013/11/07 21:11:53
Added to TODO, thanks.
| |
| 137 until the images have been downloaded and processed. | |
| 138 When we do that, we should probably add a new method that will block | |
| 139 until all of the images have been downloaded and processed. Otherwise, | |
| 140 we won't know when it's safe to start calling get_diff_record(). | |
| 141 | |
| 142 Args: | |
| 143 expected_image_url: file or HTTP url from which we will download the | |
| 144 expected image | |
| 145 expected_image_locator: a unique ID string under which we will store the | |
| 146 expected image within storage_root (probably including a checksum to | |
| 147 guarantee uniqueness) | |
| 148 actual_image_url: file or HTTP url from which we will download the | |
| 149 actual image | |
| 150 actual_image_locator: a unique ID string under which we will store the | |
| 151 actual image within storage_root (probably including a checksum to | |
| 152 guarantee uniqueness) | |
| 153 """ | |
| 154 key = (expected_image_locator, actual_image_locator) | |
| 155 if not self._diff_dict.get(key): | |
|
jcgregorio
2013/11/06 18:47:28
if not key in self._diff_dict:
epoger
2013/11/07 21:11:53
Done.
| |
| 156 self._diff_dict[key] = DiffRecord( | |
| 157 self._storage_root, | |
| 158 expected_image_url=expected_image_url, | |
| 159 expected_image_locator=expected_image_locator, | |
| 160 actual_image_url=actual_image_url, | |
| 161 actual_image_locator=actual_image_locator) | |
| 162 | |
| 163 def get_diff_record(self, expected_image_locator, actual_image_locator): | |
| 164 """Returns the DiffRecord for this image pair. | |
| 165 | |
| 166 Raises a KeyError if we don't have a DiffRecord for this image pair. | |
| 167 """ | |
| 168 key = (expected_image_locator, actual_image_locator) | |
| 169 return self._diff_dict[key] | |
| 170 | |
| 171 | |
| 172 # Utility functions | |
| 173 | |
| 174 def calculate_weighted_diff_metric(image): | |
| 175 """Given a diff image (per-channel diff at each pixel between two images), | |
| 176 calculate the weighted diff metric (a stab at how different the two images | |
| 177 really are). | |
| 178 | |
| 179 Args: | |
| 180 image: PIL image; a per-channel diff between two images | |
| 181 | |
| 182 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). | |
| 183 """ | |
| 184 # TODO(epoger): This is just a wild guess at an appropriate metric. | |
| 185 # In the long term, we will probably use some metric generated by | |
| 186 # skpdiff anyway. | |
| 187 (width, height) = image.size | |
| 188 maxdiff = 3 * (width * height) * 255**2 | |
| 189 h = image.histogram() | |
| 190 assert(len(h) % 256 == 0) | |
| 191 totaldiff = sum(map(lambda index,value: value * (index%256)**2, | |
| 192 range(len(h)), h)) | |
| 193 return float(100 * totaldiff) / maxdiff | |
| 194 | |
| 195 def generate_image_diff(image1, image2): | |
|
jcgregorio
2013/11/06 18:47:28
Document args and returns, here and below.
epoger
2013/11/07 21:11:53
Done.
| |
| 196 """Wrapper for ImageChops.difference(image1, image2) that will handle some | |
| 197 errors automatically, or at least yield more useful error messages. | |
| 198 | |
| 199 TODO(epoger): Currently, some of the images generated by the bots are RGBA | |
| 200 and others are RGB. I'm not sure why that is. For now, to avoid confusion | |
| 201 within the UI, convert all to RGB when diffing. | |
| 202 """ | |
| 203 try: | |
| 204 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) | |
| 205 except ValueError: | |
| 206 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( | |
| 207 repr(image1), repr(image2))) | |
| 208 raise | |
| 209 | |
| 210 def mkdir_unless_exists(path): | |
|
rmistry
2013/11/06 19:12:52
Make it private? this and some of the other top le
epoger
2013/11/07 21:11:53
Done.
| |
| 211 """Unless path refers to an already-existing directory, create it.""" | |
| 212 if not os.path.isdir(path): | |
| 213 os.makedirs(path) | |
| 214 | |
| 215 def download_file_unless_exists(source_url, dest_filepath): | |
| 216 """Downloads the file from source_url, storing it at dest_filepath, | |
| 217 UNLESS there is already a file at dest_filepath (in which case we do | |
| 218 nothing.""" | |
| 219 if not os.path.exists(dest_filepath): | |
| 220 with contextlib.closing(urllib.urlopen(source_url)) as url_handle: | |
| 221 with open(dest_filepath, 'wb') as file_handle: | |
| 222 file_handle.write(url_handle.read()) | |
|
jcgregorio
2013/11/06 18:47:28
Consider shutil.copyfileobj for this:
http://docs
epoger
2013/11/07 21:11:53
Thanks, good advice! See http://stackoverflow.com
| |
| 223 | |
| 224 def get_difference_locator(expected_image_locator, actual_image_locator): | |
| 225 """Returns the locator string used to look up the diffs between expected_image | |
| 226 and actual_image.""" | |
| 227 return "%s-vs-%s" % (expected_image_locator, actual_image_locator) | |
| 228 | |
| 229 | |
| 230 # Test harness | |
| 231 def main(): | |
| 232 logging.basicConfig(level=logging.INFO) | |
|
jcgregorio
2013/11/06 18:47:28
Break tests out into a separate imagediffdb_test.p
epoger
2013/11/07 21:11:53
Done.
| |
| 233 | |
| 234 # params for each self-test: | |
| 235 # 0. expected image locator | |
| 236 # 1. expected image URL | |
| 237 # 2. actual image locator | |
| 238 # 3. actual image URL | |
| 239 # 4. expected percent_pixels_differing (as a string, to 4 decimal places) | |
| 240 # 5. expected weighted_diff_measure (as a string, to 4 decimal places) | |
| 241 selftests = [ | |
| 242 ['16206093933823793653', 'http://chromium-skia-gm.commondatastorage.google apis.com/gm/bitmap-64bitMD5/arcofzorro/16206093933823793653.png', | |
| 243 '13786535001616823825', 'http://chromium-skia-gm.commondatastorage.google apis.com/gm/bitmap-64bitMD5/arcofzorro/13786535001616823825.png', | |
| 244 '0.0653', '0.0113'], | |
| 245 ] | |
| 246 | |
| 247 # Add all image pairs to the database | |
| 248 db = ImageDiffDB('/tmp/ImageDiffDB') | |
| 249 for selftest in selftests: | |
| 250 retval = db.add_image_pair( | |
| 251 expected_image_locator=selftest[0], expected_image_url=selftest[1], | |
| 252 actual_image_locator=selftest[2], actual_image_url=selftest[3]) | |
| 253 | |
| 254 # Fetch each image pair from the database | |
| 255 for selftest in selftests: | |
| 256 record = db.get_diff_record(expected_image_locator=selftest[0], | |
| 257 actual_image_locator=selftest[2]) | |
| 258 assert (('%.4f' % record.get_percent_pixels_differing()) == selftest[4]) | |
| 259 assert (('%.4f' % record.get_weighted_diff_measure()) == selftest[5]) | |
| 260 | |
| 261 logging.info("Self-test completed successfully!") | |
| 262 | |
|
jcgregorio
2013/11/06 18:47:28
2 lines
epoger
2013/11/07 21:11:53
Done (in the new test file)
| |
| 263 if __name__ == '__main__': | |
| 264 main() | |
| OLD | NEW |