|
OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 | |
3 """ | |
4 Copyright 2013 Google Inc. | |
5 | |
6 Use of this source code is governed by a BSD-style license that can be | |
7 found in the LICENSE file. | |
8 | |
9 Calulate differences between image pairs, and store them in a database. | |
10 Requires PIL to be installed; see http://www.pythonware.com/products/pil/ | |
rmistry
2013/11/06 19:12:52
Optional: You could also output this line by wrapp
epoger
2013/11/07 21:11:53
Done.
| |
11 """ | |
12 | |
13 # System-level imports | |
rmistry
2013/11/06 19:12:52
Nit: Not required since you do not have any other
epoger
2013/11/07 21:11:53
Done.
| |
14 import contextlib | |
15 import logging | |
16 import os | |
17 import urllib | |
18 from cStringIO import StringIO | |
jcgregorio
2013/11/06 18:47:28
unused import
epoger
2013/11/07 21:11:53
Done.
| |
19 from PIL import Image, ImageChops | |
20 | |
21 | |
22 IMAGE_SUFFIX = '.png' | |
23 IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at | |
24 # http://effbot.org/imagingbook/formats.htm | |
25 | |
26 IMAGES_SUBDIR = 'images' | |
27 DIFFS_SUBDIR = 'diffs' | |
28 WHITEDIFFS_SUBDIR = 'whitediffs' | |
29 | |
jcgregorio
2013/11/06 18:47:28
2 lines
epoger
2013/11/07 21:11:53
Done.
| |
30 class DiffRecord(object): | |
31 """ Record of differences between two images. """ | |
32 | |
33 def __init__(self, storage_root, | |
34 expected_image_url, expected_image_locator, | |
35 actual_image_url, actual_image_locator): | |
36 """Download this pair of images (unless we already have them on local disk), | |
37 and prepare a DiffRecord for them. | |
38 | |
39 TODO(epoger): Make this asynchronously download images, rather than blocking | |
40 until the images have been downloaded and processed. | |
41 | |
42 Args: | |
43 storage_root: root directory on local disk within which we store all | |
44 images | |
45 expected_image_url: file or HTTP url from which we will download the | |
46 expected image | |
47 expected_image_locator: a unique ID string under which we will store the | |
48 expected image within storage_root (probably including a checksum to | |
49 guarantee uniqueness) | |
50 actual_image_url: file or HTTP url from which we will download the | |
51 actual image | |
52 actual_image_locator: a unique ID string under which we will store the | |
53 actual image within storage_root (probably including a checksum to | |
54 guarantee uniqueness) | |
55 """ | |
56 # Download the expected/actual images, if we don't have them already. | |
57 mkdir_unless_exists(os.path.join(storage_root, IMAGES_SUBDIR)) | |
jcgregorio
2013/11/06 18:47:28
Repeated code 58-69. Can you create a function loa
epoger
2013/11/07 21:11:53
Done.
| |
58 expected_image_filepath = os.path.join( | |
59 storage_root, IMAGES_SUBDIR, str(expected_image_locator) + IMAGE_SUFFIX) | |
60 actual_image_filepath = os.path.join( | |
61 storage_root, IMAGES_SUBDIR, str(actual_image_locator) + IMAGE_SUFFIX) | |
62 download_file_unless_exists( | |
63 source_url=expected_image_url, dest_filepath=expected_image_filepath) | |
64 download_file_unless_exists( | |
65 source_url=actual_image_url, dest_filepath=actual_image_filepath) | |
66 | |
67 # Read in expected/actual images. | |
68 expected_image = Image.open(expected_image_filepath) | |
69 actual_image = Image.open(actual_image_filepath) | |
70 | |
71 # Store the diff image (absolute diff at each pixel). | |
72 diff_image = generate_image_diff(actual_image, expected_image) | |
73 self._weighted_diff_measure = calculate_weighted_diff_metric(diff_image) | |
74 diff_image_locator = get_difference_locator( | |
75 expected_image_locator=expected_image_locator, | |
76 actual_image_locator=actual_image_locator) | |
77 diff_image_filepath = os.path.join( | |
78 storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
79 mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR)) | |
80 diff_image.save(diff_image_filepath, IMAGE_FORMAT) | |
81 | |
82 # Store the whitediff image (any differing pixels show as white). | |
83 # | |
84 # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems | |
85 # like we should be able to use im.point(function, mode) to perform both | |
86 # the point() and convert('1') operations simultaneously, but I couldn't | |
87 # get it to work. | |
88 whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0]) | |
89 .convert('1')) | |
90 whitediff_image_filepath = os.path.join( | |
91 storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX) | |
92 mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR)) | |
93 whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT) | |
94 | |
95 # Calculate difference metrics. | |
96 (self._width, self._height) = diff_image.size | |
97 self._num_pixels_differing = whitediff_image.histogram()[255] | |
98 | |
99 def get_num_pixels_differing(self): | |
100 """Returns the absolute number of pixels that differ.""" | |
101 return self._num_pixels_differing | |
102 | |
103 def get_percent_pixels_differing(self): | |
104 """Returns the percentage of pixels that differ, as a float between | |
105 0 and 100 (inclusive).""" | |
106 return ((float(self._num_pixels_differing) * 100) / | |
107 (self._width * self._height)) | |
108 | |
109 def get_weighted_diff_measure(self): | |
110 """Returns a weighted measure of image diffs, as a float between 0 and 100 | |
111 (inclusive).""" | |
112 return self._weighted_diff_measure | |
113 | |
114 | |
115 class ImageDiffDB(object): | |
116 """ Calculates differences between image pairs, maintaining a database of | |
117 them for download.""" | |
118 | |
119 def __init__(self, storage_root): | |
120 """ | |
121 Args: | |
122 storage_root: string; root path within the DB will store all of its stuff | |
123 """ | |
124 self._storage_root = storage_root | |
125 | |
126 # Dictionary of DiffRecords, keyed by (expected_image_locator, | |
127 # actual_image_locator) tuples. | |
128 self._diff_dict = {} | |
129 | |
130 def add_image_pair(self, | |
131 expected_image_url, expected_image_locator, | |
132 actual_image_url, actual_image_locator): | |
133 """Download this pair of images (unless we already have them on local disk), | |
134 and prepare a DiffRecord for them. | |
135 | |
136 TODO(epoger): Make this asynchronously download images, rather than blocking | |
jcgregorio
2013/11/06 18:47:28
I don't know if the async belongs in at this level
epoger
2013/11/07 21:11:53
Added to TODO, thanks.
| |
137 until the images have been downloaded and processed. | |
138 When we do that, we should probably add a new method that will block | |
139 until all of the images have been downloaded and processed. Otherwise, | |
140 we won't know when it's safe to start calling get_diff_record(). | |
141 | |
142 Args: | |
143 expected_image_url: file or HTTP url from which we will download the | |
144 expected image | |
145 expected_image_locator: a unique ID string under which we will store the | |
146 expected image within storage_root (probably including a checksum to | |
147 guarantee uniqueness) | |
148 actual_image_url: file or HTTP url from which we will download the | |
149 actual image | |
150 actual_image_locator: a unique ID string under which we will store the | |
151 actual image within storage_root (probably including a checksum to | |
152 guarantee uniqueness) | |
153 """ | |
154 key = (expected_image_locator, actual_image_locator) | |
155 if not self._diff_dict.get(key): | |
jcgregorio
2013/11/06 18:47:28
if not key in self._diff_dict:
epoger
2013/11/07 21:11:53
Done.
| |
156 self._diff_dict[key] = DiffRecord( | |
157 self._storage_root, | |
158 expected_image_url=expected_image_url, | |
159 expected_image_locator=expected_image_locator, | |
160 actual_image_url=actual_image_url, | |
161 actual_image_locator=actual_image_locator) | |
162 | |
163 def get_diff_record(self, expected_image_locator, actual_image_locator): | |
164 """Returns the DiffRecord for this image pair. | |
165 | |
166 Raises a KeyError if we don't have a DiffRecord for this image pair. | |
167 """ | |
168 key = (expected_image_locator, actual_image_locator) | |
169 return self._diff_dict[key] | |
170 | |
171 | |
172 # Utility functions | |
173 | |
174 def calculate_weighted_diff_metric(image): | |
175 """Given a diff image (per-channel diff at each pixel between two images), | |
176 calculate the weighted diff metric (a stab at how different the two images | |
177 really are). | |
178 | |
179 Args: | |
180 image: PIL image; a per-channel diff between two images | |
181 | |
182 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). | |
183 """ | |
184 # TODO(epoger): This is just a wild guess at an appropriate metric. | |
185 # In the long term, we will probably use some metric generated by | |
186 # skpdiff anyway. | |
187 (width, height) = image.size | |
188 maxdiff = 3 * (width * height) * 255**2 | |
189 h = image.histogram() | |
190 assert(len(h) % 256 == 0) | |
191 totaldiff = sum(map(lambda index,value: value * (index%256)**2, | |
192 range(len(h)), h)) | |
193 return float(100 * totaldiff) / maxdiff | |
194 | |
195 def generate_image_diff(image1, image2): | |
jcgregorio
2013/11/06 18:47:28
Document args and returns, here and below.
epoger
2013/11/07 21:11:53
Done.
| |
196 """Wrapper for ImageChops.difference(image1, image2) that will handle some | |
197 errors automatically, or at least yield more useful error messages. | |
198 | |
199 TODO(epoger): Currently, some of the images generated by the bots are RGBA | |
200 and others are RGB. I'm not sure why that is. For now, to avoid confusion | |
201 within the UI, convert all to RGB when diffing. | |
202 """ | |
203 try: | |
204 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) | |
205 except ValueError: | |
206 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( | |
207 repr(image1), repr(image2))) | |
208 raise | |
209 | |
210 def mkdir_unless_exists(path): | |
rmistry
2013/11/06 19:12:52
Make it private? this and some of the other top le
epoger
2013/11/07 21:11:53
Done.
| |
211 """Unless path refers to an already-existing directory, create it.""" | |
212 if not os.path.isdir(path): | |
213 os.makedirs(path) | |
214 | |
215 def download_file_unless_exists(source_url, dest_filepath): | |
216 """Downloads the file from source_url, storing it at dest_filepath, | |
217 UNLESS there is already a file at dest_filepath (in which case we do | |
218 nothing.""" | |
219 if not os.path.exists(dest_filepath): | |
220 with contextlib.closing(urllib.urlopen(source_url)) as url_handle: | |
221 with open(dest_filepath, 'wb') as file_handle: | |
222 file_handle.write(url_handle.read()) | |
jcgregorio
2013/11/06 18:47:28
Consider shutil.copyfileobj for this:
http://docs
epoger
2013/11/07 21:11:53
Thanks, good advice! See http://stackoverflow.com
| |
223 | |
224 def get_difference_locator(expected_image_locator, actual_image_locator): | |
225 """Returns the locator string used to look up the diffs between expected_image | |
226 and actual_image.""" | |
227 return "%s-vs-%s" % (expected_image_locator, actual_image_locator) | |
228 | |
229 | |
230 # Test harness | |
231 def main(): | |
232 logging.basicConfig(level=logging.INFO) | |
jcgregorio
2013/11/06 18:47:28
Break tests out into a separate imagediffdb_test.p
epoger
2013/11/07 21:11:53
Done.
| |
233 | |
234 # params for each self-test: | |
235 # 0. expected image locator | |
236 # 1. expected image URL | |
237 # 2. actual image locator | |
238 # 3. actual image URL | |
239 # 4. expected percent_pixels_differing (as a string, to 4 decimal places) | |
240 # 5. expected weighted_diff_measure (as a string, to 4 decimal places) | |
241 selftests = [ | |
242 ['16206093933823793653', 'http://chromium-skia-gm.commondatastorage.google apis.com/gm/bitmap-64bitMD5/arcofzorro/16206093933823793653.png', | |
243 '13786535001616823825', 'http://chromium-skia-gm.commondatastorage.google apis.com/gm/bitmap-64bitMD5/arcofzorro/13786535001616823825.png', | |
244 '0.0653', '0.0113'], | |
245 ] | |
246 | |
247 # Add all image pairs to the database | |
248 db = ImageDiffDB('/tmp/ImageDiffDB') | |
249 for selftest in selftests: | |
250 retval = db.add_image_pair( | |
251 expected_image_locator=selftest[0], expected_image_url=selftest[1], | |
252 actual_image_locator=selftest[2], actual_image_url=selftest[3]) | |
253 | |
254 # Fetch each image pair from the database | |
255 for selftest in selftests: | |
256 record = db.get_diff_record(expected_image_locator=selftest[0], | |
257 actual_image_locator=selftest[2]) | |
258 assert (('%.4f' % record.get_percent_pixels_differing()) == selftest[4]) | |
259 assert (('%.4f' % record.get_weighted_diff_measure()) == selftest[5]) | |
260 | |
261 logging.info("Self-test completed successfully!") | |
262 | |
jcgregorio
2013/11/06 18:47:28
2 lines
epoger
2013/11/07 21:11:53
Done (in the new test file)
| |
263 if __name__ == '__main__': | |
264 main() | |
OLD | NEW |