gm/rebaseline_server/imagediffdb.py - Issue 59283006: rebaseline_server: add pixel diffs, and sorting by diff metrics

Unified Diff: gm/rebaseline_server/imagediffdb.py

Issue 59283006: rebaseline_server: add pixel diffs, and sorting by diff metrics (Closed) Base URL: http://skia.googlecode.com/svn/trunk/

Patch Set: lots_of_fixes Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: gm/rebaseline_server/imagediffdb.py

===================================================================

--- gm/rebaseline_server/imagediffdb.py (revision 0)

+++ gm/rebaseline_server/imagediffdb.py (revision 0)

@@ -0,0 +1,271 @@

+#!/usr/bin/python

+"""

+Use of this source code is governed by a BSD-style license that can be

+found in the LICENSE file.

+Calulate differences between image pairs, and store them in a database.

+"""

+import contextlib

+import logging

+import os

+import shutil

+import urllib

+try:

+ from PIL import Image, ImageChops

+except ImportError:

+ raise ImportError('Requires PIL to be installed; see '

+ + 'http://www.pythonware.com/products/pil/')

+IMAGE_SUFFIX = '.png'

+IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at

+ # http://effbot.org/imagingbook/formats.htm

+IMAGES_SUBDIR = 'images'

+DIFFS_SUBDIR = 'diffs'

+WHITEDIFFS_SUBDIR = 'whitediffs'

+class DiffRecord(object):

+ """ Record of differences between two images. """

+ def __init__(self, storage_root,

+ expected_image_url, expected_image_locator,

+ actual_image_url, actual_image_locator):

+ """Download this pair of images (unless we already have them on local disk),

+ and prepare a DiffRecord for them.

+ TODO(epoger): Make this asynchronously download images, rather than blocking

+ until the images have been downloaded and processed.

+ Args:

+ storage_root: root directory on local disk within which we store all

+ images

+ expected_image_url: file or HTTP url from which we will download the

+ expected image

+ expected_image_locator: a unique ID string under which we will store the

+ expected image within storage_root (probably including a checksum to

+ guarantee uniqueness)

+ actual_image_url: file or HTTP url from which we will download the

+ actual image

+ actual_image_locator: a unique ID string under which we will store the

+ actual image within storage_root (probably including a checksum to

+ guarantee uniqueness)

+ """

+ # Download the expected/actual images, if we don't have them already.

+ expected_image = _download_and_open_image(

+ os.path.join(storage_root, IMAGES_SUBDIR,

+ str(expected_image_locator) + IMAGE_SUFFIX),

+ expected_image_url)

+ actual_image = _download_and_open_image(

+ os.path.join(storage_root, IMAGES_SUBDIR,

+ str(actual_image_locator) + IMAGE_SUFFIX),

+ actual_image_url)

+ # Store the diff image (absolute diff at each pixel).

+ diff_image = _generate_image_diff(actual_image, expected_image)

+ self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image)

+ diff_image_locator = _get_difference_locator(

+ expected_image_locator=expected_image_locator,

+ actual_image_locator=actual_image_locator)

+ diff_image_filepath = os.path.join(

+ storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX)

+ _mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR))

+ diff_image.save(diff_image_filepath, IMAGE_FORMAT)

+ # Store the whitediff image (any differing pixels show as white).

+ #

+ # TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems

+ # like we should be able to use im.point(function, mode) to perform both

+ # the point() and convert('1') operations simultaneously, but I couldn't

+ # get it to work.

+ whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0])

+ .convert('1'))

+ whitediff_image_filepath = os.path.join(

+ storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX)

+ _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))

+ whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT)

+ # Calculate difference metrics.

+ (self._width, self._height) = diff_image.size

+ self._num_pixels_differing = whitediff_image.histogram()[255]

+ def get_num_pixels_differing(self):

+ """Returns the absolute number of pixels that differ."""

+ return self._num_pixels_differing

+ def get_percent_pixels_differing(self):

+ """Returns the percentage of pixels that differ, as a float between

+ 0 and 100 (inclusive)."""

+ return ((float(self._num_pixels_differing) * 100) /

+ (self._width * self._height))

+ def get_weighted_diff_measure(self):

+ """Returns a weighted measure of image diffs, as a float between 0 and 100

+ (inclusive)."""

+ return self._weighted_diff_measure

+class ImageDiffDB(object):

+ """ Calculates differences between image pairs, maintaining a database of

+ them for download."""

+ def __init__(self, storage_root):

+ """

+ Args:

+ storage_root: string; root path within the DB will store all of its stuff

+ """

+ self._storage_root = storage_root

+ # Dictionary of DiffRecords, keyed by (expected_image_locator,

+ # actual_image_locator) tuples.

+ self._diff_dict = {}

+ def add_image_pair(self,

+ expected_image_url, expected_image_locator,

+ actual_image_url, actual_image_locator):

+ """Download this pair of images (unless we already have them on local disk),

+ and prepare a DiffRecord for them.

+ TODO(epoger): Make this asynchronously download images, rather than blocking

+ until the images have been downloaded and processed.

+ When we do that, we should probably add a new method that will block

+ until all of the images have been downloaded and processed. Otherwise,

+ we won't know when it's safe to start calling get_diff_record().

+ jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a

+ thread-pool/worker queue at a higher level that just uses ImageDiffDB?

+ Args:

+ expected_image_url: file or HTTP url from which we will download the

+ expected image

+ expected_image_locator: a unique ID string under which we will store the

+ expected image within storage_root (probably including a checksum to

+ guarantee uniqueness)

+ actual_image_url: file or HTTP url from which we will download the

+ actual image

+ actual_image_locator: a unique ID string under which we will store the

+ actual image within storage_root (probably including a checksum to

+ guarantee uniqueness)

+ """

+ key = (expected_image_locator, actual_image_locator)

+ if not key in self._diff_dict:

+ try:

+ new_diff_record = DiffRecord(

+ self._storage_root,

+ expected_image_url=expected_image_url,

+ expected_image_locator=expected_image_locator,

+ actual_image_url=actual_image_url,

+ actual_image_locator=actual_image_locator)

+ except:

+ logging.exception('got exception while creating new DiffRecord')

epoger 2013/11/07 21:11:54 As I was testing these changes, I started getting

+ return

+ self._diff_dict[key] = new_diff_record

+ def get_diff_record(self, expected_image_locator, actual_image_locator):

+ """Returns the DiffRecord for this image pair.

+ Raises a KeyError if we don't have a DiffRecord for this image pair.

+ """

+ key = (expected_image_locator, actual_image_locator)

+ return self._diff_dict[key]

+# Utility functions

+def _calculate_weighted_diff_metric(image):

+ """Given a diff image (per-channel diff at each pixel between two images),

+ calculate the weighted diff metric (a stab at how different the two images

+ really are).

+ Args:

+ image: PIL image; a per-channel diff between two images

+ Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).

+ """

+ # TODO(epoger): This is just a wild guess at an appropriate metric.

+ # In the long term, we will probably use some metric generated by

+ # skpdiff anyway.

+ (width, height) = image.size

+ maxdiff = 3 * (width * height) * 255**2

+ h = image.histogram()

+ assert(len(h) % 256 == 0)

+ totaldiff = sum(map(lambda index,value: value * (index%256)**2,

+ range(len(h)), h))

+ return float(100 * totaldiff) / maxdiff

+def _generate_image_diff(image1, image2):

+ """Wrapper for ImageChops.difference(image1, image2) that will handle some

+ errors automatically, or at least yield more useful error messages.

+ TODO(epoger): Currently, some of the images generated by the bots are RGBA

+ and others are RGB. I'm not sure why that is. For now, to avoid confusion

+ within the UI, convert all to RGB when diffing.

+ Args:

+ image1: a PIL image object

+ image2: a PIL image object

+ Returns: per-pixel diffs between image1 and image2, as a PIL image object

+ """

+ try:

+ return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))

+ except ValueError:

+ logging.error('Error diffing image1 [%s] and image2 [%s].' % (

+ repr(image1), repr(image2)))

+ raise

+def _download_and_open_image(local_filepath, url):

+ """Open the image at local_filepath; if there is no file at that path,

+ download it from url to that path and then open it.

+ Args:

+ local_filepath: path on local disk where the image should be stored

+ url: URL from which we can download the image if we don't have it yet

+ Returns: a PIL image object

+ """

+ if not os.path.exists(local_filepath):

+ _mkdir_unless_exists(os.path.dirname(local_filepath))

+ with contextlib.closing(urllib.urlopen(url)) as url_handle:

+ with open(local_filepath, 'wb') as file_handle:

+ shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)

+ return _open_image(local_filepath)

+def _open_image(filepath):

+ """Wrapper for Image.open(filepath) that yields more useful error messages.

+ Args:

+ filepath: path on local disk to load image from

+ Returns: a PIL image object

+ """

+ try:

+ return Image.open(filepath)

+ except IOError:

+ logging.error('IOError loading image file %s' % filepath)

+ raise

+def _mkdir_unless_exists(path):

+ """Unless path refers to an already-existing directory, create it.

+ Args:

+ path: path on local disk

+ """

+ if not os.path.isdir(path):

+ os.makedirs(path)

+def _get_difference_locator(expected_image_locator, actual_image_locator):

+ """Returns the locator string used to look up the diffs between expected_image

+ and actual_image.

+ Args:

+ expected_image_locator: locator string pointing at expected image

+ actual_image_locator: locator string pointing at actual image

+ Returns: locator where the diffs between expected and actual images can be

+ found

+ """

+ return "%s-vs-%s" % (expected_image_locator, actual_image_locator)

« no previous file with comments | « no previous file | gm/rebaseline_server/imagediffdb-test.py » ('j') | gm/rebaseline_server/imagediffdb-test.py » ('J')