gm/rebaseline_server/imagediffdb.py - Issue 59283006: rebaseline_server: add pixel diffs, and sorting by diff metrics

Side by Side Diff: gm/rebaseline_server/imagediffdb.py

Issue 59283006: rebaseline_server: add pixel diffs, and sorting by diff metrics (Closed) Base URL: http://skia.googlecode.com/svn/trunk/

Patch Set: rename_selftest Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2

	3 """

	4 Copyright 2013 Google Inc.

	5

	6 Use of this source code is governed by a BSD-style license that can be

	7 found in the LICENSE file.

	8

	9 Calulate differences between image pairs, and store them in a database.

	10 """

	11

	12 import contextlib

	13 import logging

	14 import os

	15 import shutil

	16 import urllib

	17 try:

	18 from PIL import Image, ImageChops

	19 except ImportError:

	20 raise ImportError('Requires PIL to be installed; see '

	21 + 'http://www.pythonware.com/products/pil/')

	22

	23 IMAGE_SUFFIX = '.png'

	24 IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at

	25 # http://effbot.org/imagingbook/formats.htm

	26

	27 IMAGES_SUBDIR = 'images'

	28 DIFFS_SUBDIR = 'diffs'

	29 WHITEDIFFS_SUBDIR = 'whitediffs'

	30

	31

	32 class DiffRecord(object):

	33 """ Record of differences between two images. """

	34

	35 def __init__(self, storage_root,

	36 expected_image_url, expected_image_locator,

	37 actual_image_url, actual_image_locator):

	38 """Download this pair of images (unless we already have them on local disk),

	39 and prepare a DiffRecord for them.

	40

	41 TODO(epoger): Make this asynchronously download images, rather than blocking

	42 until the images have been downloaded and processed.

	43

	44 Args:

	45 storage_root: root directory on local disk within which we store all

	46 images

	47 expected_image_url: file or HTTP url from which we will download the

	48 expected image

	49 expected_image_locator: a unique ID string under which we will store the

	50 expected image within storage_root (probably including a checksum to

	51 guarantee uniqueness)

	52 actual_image_url: file or HTTP url from which we will download the

	53 actual image

	54 actual_image_locator: a unique ID string under which we will store the

	55 actual image within storage_root (probably including a checksum to

	56 guarantee uniqueness)

	57 """

	58 # Download the expected/actual images, if we don't have them already.

	59 expected_image = _download_and_open_image(

	60 os.path.join(storage_root, IMAGES_SUBDIR,

	61 str(expected_image_locator) + IMAGE_SUFFIX),

	62 expected_image_url)

	63 actual_image = _download_and_open_image(

	64 os.path.join(storage_root, IMAGES_SUBDIR,

	65 str(actual_image_locator) + IMAGE_SUFFIX),

	66 actual_image_url)

	67

	68 # Store the diff image (absolute diff at each pixel).

	69 diff_image = _generate_image_diff(actual_image, expected_image)

	70 self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image)

	71 diff_image_locator = _get_difference_locator(

	72 expected_image_locator=expected_image_locator,

	73 actual_image_locator=actual_image_locator)

	74 diff_image_filepath = os.path.join(

	75 storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX)

	76 _mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR))

	77 diff_image.save(diff_image_filepath, IMAGE_FORMAT)

	78

	79 # Store the whitediff image (any differing pixels show as white).

	80 #

	81 # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems

	82 # like we should be able to use im.point(function, mode) to perform both

	83 # the point() and convert('1') operations simultaneously, but I couldn't

	84 # get it to work.

	85 whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0])

	86 .convert('1'))

	87 whitediff_image_filepath = os.path.join(

	88 storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX)

	89 _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))

	90 whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT)

	91

	92 # Calculate difference metrics.

	93 (self._width, self._height) = diff_image.size

	94 self._num_pixels_differing = whitediff_image.histogram()[255]

	95

	96 def get_num_pixels_differing(self):

	97 """Returns the absolute number of pixels that differ."""

	98 return self._num_pixels_differing

	99

	100 def get_percent_pixels_differing(self):

	101 """Returns the percentage of pixels that differ, as a float between

	102 0 and 100 (inclusive)."""

	103 return ((float(self._num_pixels_differing) * 100) /

	104 (self._width * self._height))

	105

	106 def get_weighted_diff_measure(self):

	107 """Returns a weighted measure of image diffs, as a float between 0 and 100

	108 (inclusive)."""

	109 return self._weighted_diff_measure

	110

	111

	112 class ImageDiffDB(object):

	113 """ Calculates differences between image pairs, maintaining a database of

	114 them for download."""

	115

	116 def __init__(self, storage_root):

	117 """

	118 Args:

	119 storage_root: string; root path within the DB will store all of its stuff

	120 """

	121 self._storage_root = storage_root

	122

	123 # Dictionary of DiffRecords, keyed by (expected_image_locator,

	124 # actual_image_locator) tuples.

	125 self._diff_dict = {}

	126

	127 def add_image_pair(self,

	128 expected_image_url, expected_image_locator,

	129 actual_image_url, actual_image_locator):

	130 """Download this pair of images (unless we already have them on local disk),

	131 and prepare a DiffRecord for them.

	132

	133 TODO(epoger): Make this asynchronously download images, rather than blocking

	134 until the images have been downloaded and processed.

	135 When we do that, we should probably add a new method that will block

	136 until all of the images have been downloaded and processed. Otherwise,

	137 we won't know when it's safe to start calling get_diff_record().

	138 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a

	139 thread-pool/worker queue at a higher level that just uses ImageDiffDB?

	140

	141 Args:

	142 expected_image_url: file or HTTP url from which we will download the

	143 expected image

	144 expected_image_locator: a unique ID string under which we will store the

	145 expected image within storage_root (probably including a checksum to

	146 guarantee uniqueness)

	147 actual_image_url: file or HTTP url from which we will download the

	148 actual image

	149 actual_image_locator: a unique ID string under which we will store the

	150 actual image within storage_root (probably including a checksum to

	151 guarantee uniqueness)

	152 """

	153 key = (expected_image_locator, actual_image_locator)

	154 if not key in self._diff_dict:

	155 try:

	156 new_diff_record = DiffRecord(

	157 self._storage_root,

	158 expected_image_url=expected_image_url,

	159 expected_image_locator=expected_image_locator,

	160 actual_image_url=actual_image_url,

	161 actual_image_locator=actual_image_locator)

	162 except:

	163 logging.exception('got exception while creating new DiffRecord')

	164 return

	165 self._diff_dict[key] = new_diff_record

	166

	167 def get_diff_record(self, expected_image_locator, actual_image_locator):

	168 """Returns the DiffRecord for this image pair.

	169

	170 Raises a KeyError if we don't have a DiffRecord for this image pair.

	171 """

	172 key = (expected_image_locator, actual_image_locator)

	173 return self._diff_dict[key]

	174

	175

	176 # Utility functions

	177

	178 def _calculate_weighted_diff_metric(image):

	179 """Given a diff image (per-channel diff at each pixel between two images),

	180 calculate the weighted diff metric (a stab at how different the two images

	181 really are).

	182

	183 Args:

	184 image: PIL image; a per-channel diff between two images

	185

	186 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).

	187 """

	188 # TODO(epoger): This is just a wild guess at an appropriate metric.

	189 # In the long term, we will probably use some metric generated by

	190 # skpdiff anyway.

	191 (width, height) = image.size

	192 maxdiff = 3 * (width * height) * 255**2

	193 h = image.histogram()

	194 assert(len(h) % 256 == 0)

	195 totaldiff = sum(map(lambda index,value: value * (index%256)**2,

	196 range(len(h)), h))

	197 return float(100 * totaldiff) / maxdiff

	198

	199 def _generate_image_diff(image1, image2):

	200 """Wrapper for ImageChops.difference(image1, image2) that will handle some

	201 errors automatically, or at least yield more useful error messages.

	202

	203 TODO(epoger): Currently, some of the images generated by the bots are RGBA

	204 and others are RGB. I'm not sure why that is. For now, to avoid confusion

	205 within the UI, convert all to RGB when diffing.

	206

	207 Args:

	208 image1: a PIL image object

	209 image2: a PIL image object

	210

	211 Returns: per-pixel diffs between image1 and image2, as a PIL image object

	212 """

	213 try:

	214 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))

	215 except ValueError:

	216 logging.error('Error diffing image1 [%s] and image2 [%s].' % (

	217 repr(image1), repr(image2)))

	218 raise

	219

	220 def _download_and_open_image(local_filepath, url):

	221 """Open the image at local_filepath; if there is no file at that path,

	222 download it from url to that path and then open it.

	223

	224 Args:

	225 local_filepath: path on local disk where the image should be stored

	226 url: URL from which we can download the image if we don't have it yet

	227

	228 Returns: a PIL image object

	229 """

	230 if not os.path.exists(local_filepath):

	231 _mkdir_unless_exists(os.path.dirname(local_filepath))

	232 with contextlib.closing(urllib.urlopen(url)) as url_handle:

	233 with open(local_filepath, 'wb') as file_handle:

	234 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)

	235 return _open_image(local_filepath)

	236

	237 def _open_image(filepath):

	238 """Wrapper for Image.open(filepath) that yields more useful error messages.

	239

	240 Args:

	241 filepath: path on local disk to load image from

	242

	243 Returns: a PIL image object

	244 """

	245 try:

	246 return Image.open(filepath)

	247 except IOError:

	248 logging.error('IOError loading image file %s' % filepath)

	249 raise

	250

	251 def _mkdir_unless_exists(path):

	252 """Unless path refers to an already-existing directory, create it.

	253

	254 Args:

	255 path: path on local disk

	256 """

	257 if not os.path.isdir(path):

	258 os.makedirs(path)

	259

	260 def _get_difference_locator(expected_image_locator, actual_image_locator):

	261 """Returns the locator string used to look up the diffs between expected_image

	262 and actual_image.

	263

	264 Args:

	265 expected_image_locator: locator string pointing at expected image

	266 actual_image_locator: locator string pointing at actual image

	267

	268 Returns: locator where the diffs between expected and actual images can be

	269 found

	270 """

	271 return "%s-vs-%s" % (expected_image_locator, actual_image_locator)

OLD	NEW

« no previous file with comments | « no previous file | gm/rebaseline_server/imagediffdb_test.py » ('j') | no next file with comments »