OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 | |
3 """ | |
4 Copyright 2013 Google Inc. | |
5 | |
6 Use of this source code is governed by a BSD-style license that can be | |
7 found in the LICENSE file. | |
8 | |
9 Calulate differences between image pairs, and store them in a database. | |
10 """ | |
11 | |
12 import contextlib | |
13 import logging | |
14 import os | |
15 import shutil | |
16 import urllib | |
17 try: | |
18 from PIL import Image, ImageChops | |
19 except ImportError: | |
20 raise ImportError('Requires PIL to be installed; see ' | |
21 + 'http://www.pythonware.com/products/pil/') | |
22 | |
23 IMAGE_SUFFIX = '.png' | |
24 IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at | |
25 # http://effbot.org/imagingbook/formats.htm | |
26 | |
27 IMAGES_SUBDIR = 'images' | |
28 DIFFS_SUBDIR = 'diffs' | |
29 WHITEDIFFS_SUBDIR = 'whitediffs' | |
30 | |
31 | |
32 class DiffRecord(object): | |
33 """ Record of differences between two images. """ | |
34 | |
35 def __init__(self, storage_root, | |
36 expected_image_url, expected_image_locator, | |
37 actual_image_url, actual_image_locator): | |
38 """Download this pair of images (unless we already have them on local disk), | |
39 and prepare a DiffRecord for them. | |
40 | |
41 TODO(epoger): Make this asynchronously download images, rather than blocking | |
42 until the images have been downloaded and processed. | |
43 | |
44 Args: | |
45 storage_root: root directory on local disk within which we store all | |
46 images | |
47 expected_image_url: file or HTTP url from which we will download the | |
48 expected image | |
49 expected_image_locator: a unique ID string under which we will store the | |
50 expected image within storage_root (probably including a checksum to | |
51 guarantee uniqueness) | |
52 actual_image_url: file or HTTP url from which we will download the | |
53 actual image | |
54 actual_image_locator: a unique ID string under which we will store the | |
55 actual image within storage_root (probably including a checksum to | |
56 guarantee uniqueness) | |
57 """ | |
58 # Download the expected/actual images, if we don't have them already. | |
59 expected_image = _download_and_open_image( | |
60 os.path.join(storage_root, IMAGES_SUBDIR, | |
61 str(expected_image_locator) + IMAGE_SUFFIX), | |
62 expected_image_url) | |
63 actual_image = _download_and_open_image( | |
64 os.path.join(storage_root, IMAGES_SUBDIR, | |
65 str(actual_image_locator) + IMAGE_SUFFIX), | |
66 actual_image_url) | |
67 | |
68 # Store the diff image (absolute diff at each pixel). | |
69 diff_image = _generate_image_diff(actual_image, expected_image) | |
70 self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image) | |
71 diff_image_locator = _get_difference_locator( | |
72 expected_image_locator=expected_image_locator, | |
73 actual_image_locator=actual_image_locator) | |
74 diff_image_filepath = os.path.join( | |
75 storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
76 _mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) | |
77 diff_image.save(diff_image_filepath, IMAGE_FORMAT) | |
78 | |
79 # Store the whitediff image (any differing pixels show as white). | |
80 # | |
81 # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems | |
82 # like we should be able to use im.point(function, mode) to perform both | |
83 # the point() and convert('1') operations simultaneously, but I couldn't | |
84 # get it to work. | |
85 whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) | |
86 .convert('1')) | |
87 whitediff_image_filepath = os.path.join( | |
88 storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
89 _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) | |
90 whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) | |
91 | |
92 # Calculate difference metrics. | |
93 (self._width, self._height) = diff_image.size | |
94 self._num_pixels_differing = whitediff_image.histogram()[255] | |
95 | |
96 def get_num_pixels_differing(self): | |
97 """Returns the absolute number of pixels that differ.""" | |
98 return self._num_pixels_differing | |
99 | |
100 def get_percent_pixels_differing(self): | |
101 """Returns the percentage of pixels that differ, as a float between | |
102 0 and 100 (inclusive).""" | |
103 return ((float(self._num_pixels_differing) * 100) / | |
104 (self._width * self._height)) | |
105 | |
106 def get_weighted_diff_measure(self): | |
107 """Returns a weighted measure of image diffs, as a float between 0 and 100 | |
108 (inclusive).""" | |
109 return self._weighted_diff_measure | |
110 | |
111 | |
112 class ImageDiffDB(object): | |
113 """ Calculates differences between image pairs, maintaining a database of | |
114 them for download.""" | |
115 | |
116 def __init__(self, storage_root): | |
117 """ | |
118 Args: | |
119 storage_root: string; root path within the DB will store all of its stuff | |
120 """ | |
121 self._storage_root = storage_root | |
122 | |
123 # Dictionary of DiffRecords, keyed by (expected_image_locator, | |
124 # actual_image_locator) tuples. | |
125 self._diff_dict = {} | |
126 | |
127 def add_image_pair(self, | |
128 expected_image_url, expected_image_locator, | |
129 actual_image_url, actual_image_locator): | |
130 """Download this pair of images (unless we already have them on local disk), | |
131 and prepare a DiffRecord for them. | |
132 | |
133 TODO(epoger): Make this asynchronously download images, rather than blocking | |
134 until the images have been downloaded and processed. | |
135 When we do that, we should probably add a new method that will block | |
136 until all of the images have been downloaded and processed. Otherwise, | |
137 we won't know when it's safe to start calling get_diff_record(). | |
138 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a | |
139 thread-pool/worker queue at a higher level that just uses ImageDiffDB? | |
140 | |
141 Args: | |
142 expected_image_url: file or HTTP url from which we will download the | |
143 expected image | |
144 expected_image_locator: a unique ID string under which we will store the | |
145 expected image within storage_root (probably including a checksum to | |
146 guarantee uniqueness) | |
147 actual_image_url: file or HTTP url from which we will download the | |
148 actual image | |
149 actual_image_locator: a unique ID string under which we will store the | |
150 actual image within storage_root (probably including a checksum to | |
151 guarantee uniqueness) | |
152 """ | |
153 key = (expected_image_locator, actual_image_locator) | |
154 if not key in self._diff_dict: | |
155 try: | |
156 new_diff_record = DiffRecord( | |
157 self._storage_root, | |
158 expected_image_url=expected_image_url, | |
159 expected_image_locator=expected_image_locator, | |
160 actual_image_url=actual_image_url, | |
161 actual_image_locator=actual_image_locator) | |
162 except: | |
163 logging.exception('got exception while creating new DiffRecord') | |
epoger
2013/11/07 21:11:54
As I was testing these changes, I started getting
| |
164 return | |
165 self._diff_dict[key] = new_diff_record | |
166 | |
167 def get_diff_record(self, expected_image_locator, actual_image_locator): | |
168 """Returns the DiffRecord for this image pair. | |
169 | |
170 Raises a KeyError if we don't have a DiffRecord for this image pair. | |
171 """ | |
172 key = (expected_image_locator, actual_image_locator) | |
173 return self._diff_dict[key] | |
174 | |
175 | |
176 # Utility functions | |
177 | |
178 def _calculate_weighted_diff_metric(image): | |
179 """Given a diff image (per-channel diff at each pixel between two images), | |
180 calculate the weighted diff metric (a stab at how different the two images | |
181 really are). | |
182 | |
183 Args: | |
184 image: PIL image; a per-channel diff between two images | |
185 | |
186 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). | |
187 """ | |
188 # TODO(epoger): This is just a wild guess at an appropriate metric. | |
189 # In the long term, we will probably use some metric generated by | |
190 # skpdiff anyway. | |
191 (width, height) = image.size | |
192 maxdiff = 3 * (width * height) * 255**2 | |
193 h = image.histogram() | |
194 assert(len(h) % 256 == 0) | |
195 totaldiff = sum(map(lambda index,value: value * (index%256)**2, | |
196 range(len(h)), h)) | |
197 return float(100 * totaldiff) / maxdiff | |
198 | |
199 def _generate_image_diff(image1, image2): | |
200 """Wrapper for ImageChops.difference(image1, image2) that will handle some | |
201 errors automatically, or at least yield more useful error messages. | |
202 | |
203 TODO(epoger): Currently, some of the images generated by the bots are RGBA | |
204 and others are RGB. I'm not sure why that is. For now, to avoid confusion | |
205 within the UI, convert all to RGB when diffing. | |
206 | |
207 Args: | |
208 image1: a PIL image object | |
209 image2: a PIL image object | |
210 | |
211 Returns: per-pixel diffs between image1 and image2, as a PIL image object | |
212 """ | |
213 try: | |
214 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) | |
215 except ValueError: | |
216 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( | |
217 repr(image1), repr(image2))) | |
218 raise | |
219 | |
220 def _download_and_open_image(local_filepath, url): | |
221 """Open the image at local_filepath; if there is no file at that path, | |
222 download it from url to that path and then open it. | |
223 | |
224 Args: | |
225 local_filepath: path on local disk where the image should be stored | |
226 url: URL from which we can download the image if we don't have it yet | |
227 | |
228 Returns: a PIL image object | |
229 """ | |
230 if not os.path.exists(local_filepath): | |
231 _mkdir_unless_exists(os.path.dirname(local_filepath)) | |
232 with contextlib.closing(urllib.urlopen(url)) as url_handle: | |
233 with open(local_filepath, 'wb') as file_handle: | |
234 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) | |
235 return _open_image(local_filepath) | |
236 | |
237 def _open_image(filepath): | |
238 """Wrapper for Image.open(filepath) that yields more useful error messages. | |
239 | |
240 Args: | |
241 filepath: path on local disk to load image from | |
242 | |
243 Returns: a PIL image object | |
244 """ | |
245 try: | |
246 return Image.open(filepath) | |
247 except IOError: | |
248 logging.error('IOError loading image file %s' % filepath) | |
249 raise | |
250 | |
251 def _mkdir_unless_exists(path): | |
252 """Unless path refers to an already-existing directory, create it. | |
253 | |
254 Args: | |
255 path: path on local disk | |
256 """ | |
257 if not os.path.isdir(path): | |
258 os.makedirs(path) | |
259 | |
260 def _get_difference_locator(expected_image_locator, actual_image_locator): | |
261 """Returns the locator string used to look up the diffs between expected_image | |
262 and actual_image. | |
263 | |
264 Args: | |
265 expected_image_locator: locator string pointing at expected image | |
266 actual_image_locator: locator string pointing at actual image | |
267 | |
268 Returns: locator where the diffs between expected and actual images can be | |
269 found | |
270 """ | |
271 return "%s-vs-%s" % (expected_image_locator, actual_image_locator) | |
OLD | NEW |