Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: gm/rebaseline_server/imagediffdb.py

Issue 239623002: Revert of rebaseline_server: multithreaded loading/diffing of images (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « gm/rebaseline_server/compare_to_expectations.py ('k') | gm/rebaseline_server/imagepair.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 """ 3 """
4 Copyright 2013 Google Inc. 4 Copyright 2013 Google Inc.
5 5
6 Use of this source code is governed by a BSD-style license that can be 6 Use of this source code is governed by a BSD-style license that can be
7 found in the LICENSE file. 7 found in the LICENSE file.
8 8
9 Calulate differences between image pairs, and store them in a database. 9 Calulate differences between image pairs, and store them in a database.
10 """ 10 """
11 11
12 import contextlib 12 import contextlib
13 import csv 13 import csv
14 import errno
15 import logging 14 import logging
16 import Queue
17 import os 15 import os
18 import re 16 import re
19 import shutil 17 import shutil
20 import sys 18 import sys
21 import tempfile 19 import tempfile
22 import time
23 import threading
24 import urllib 20 import urllib
25 try: 21 try:
26 from PIL import Image, ImageChops 22 from PIL import Image, ImageChops
27 except ImportError: 23 except ImportError:
28 raise ImportError('Requires PIL to be installed; see ' 24 raise ImportError('Requires PIL to be installed; see '
29 + 'http://www.pythonware.com/products/pil/') 25 + 'http://www.pythonware.com/products/pil/')
30 26
31 # Set the PYTHONPATH to include the tools directory. 27 # Set the PYTHONPATH to include the tools directory.
32 sys.path.append( 28 sys.path.append(
33 os.path.join( 29 os.path.join(
34 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, 30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
35 'tools')) 31 'tools'))
36 import find_run_binary 32 import find_run_binary
37 33
38 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') 34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
39 35
40 DEFAULT_IMAGE_SUFFIX = '.png' 36 DEFAULT_IMAGE_SUFFIX = '.png'
41 DEFAULT_IMAGES_SUBDIR = 'images' 37 DEFAULT_IMAGES_SUBDIR = 'images'
42 DEFAULT_NUM_WORKERS = 8
43 38
44 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') 39 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
45 40
46 DIFFS_SUBDIR = 'diffs' 41 DIFFS_SUBDIR = 'diffs'
47 WHITEDIFFS_SUBDIR = 'whitediffs' 42 WHITEDIFFS_SUBDIR = 'whitediffs'
48 43
49 VALUES_PER_BAND = 256 44 VALUES_PER_BAND = 256
50 45
51 # Keys used within DiffRecord dictionary representations. 46 # Keys used within DiffRecord dictionary representations.
52 # NOTE: Keep these in sync with static/constants.js 47 # NOTE: Keep these in sync with static/constants.js
53 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' 48 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
54 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels' 49 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels'
55 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' 50 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
56 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference' 51 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'
57 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure' 52 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'
58 53
59 # Special values within ImageDiffDB._diff_dict
60 DIFFRECORD_FAILED = 'failed'
61 DIFFRECORD_PENDING = 'pending'
62
63 # TODO(epoger): Temporary(?) list to keep track of how many times we download
64 # the same file in multiple threads.
65 global_file_collisions = 0
66
67 54
68 class DiffRecord(object): 55 class DiffRecord(object):
69 """ Record of differences between two images. """ 56 """ Record of differences between two images. """
70 57
71 def __init__(self, storage_root, 58 def __init__(self, storage_root,
72 expected_image_url, expected_image_locator, 59 expected_image_url, expected_image_locator,
73 actual_image_url, actual_image_locator, 60 actual_image_url, actual_image_locator,
74 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, 61 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
75 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, 62 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
76 image_suffix=DEFAULT_IMAGE_SUFFIX): 63 image_suffix=DEFAULT_IMAGE_SUFFIX):
77 """Download this pair of images (unless we already have them on local disk), 64 """Download this pair of images (unless we already have them on local disk),
78 and prepare a DiffRecord for them. 65 and prepare a DiffRecord for them.
79 66
67 TODO(epoger): Make this asynchronously download images, rather than blocking
68 until the images have been downloaded and processed.
69
80 Args: 70 Args:
81 storage_root: root directory on local disk within which we store all 71 storage_root: root directory on local disk within which we store all
82 images 72 images
83 expected_image_url: file or HTTP url from which we will download the 73 expected_image_url: file or HTTP url from which we will download the
84 expected image 74 expected image
85 expected_image_locator: a unique ID string under which we will store the 75 expected_image_locator: a unique ID string under which we will store the
86 expected image within storage_root (probably including a checksum to 76 expected image within storage_root (probably including a checksum to
87 guarantee uniqueness) 77 guarantee uniqueness)
88 actual_image_url: file or HTTP url from which we will download the 78 actual_image_url: file or HTTP url from which we will download the
89 actual image 79 actual image
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(), 212 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(),
223 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, 213 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
224 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference, 214 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference,
225 } 215 }
226 216
227 217
228 class ImageDiffDB(object): 218 class ImageDiffDB(object):
229 """ Calculates differences between image pairs, maintaining a database of 219 """ Calculates differences between image pairs, maintaining a database of
230 them for download.""" 220 them for download."""
231 221
232 def __init__(self, storage_root, num_workers=DEFAULT_NUM_WORKERS): 222 def __init__(self, storage_root):
233 """ 223 """
234 Args: 224 Args:
235 storage_root: string; root path within the DB will store all of its stuff 225 storage_root: string; root path within the DB will store all of its stuff
236 num_workers: integer; number of worker threads to spawn
237 """ 226 """
238 self._storage_root = storage_root 227 self._storage_root = storage_root
239 228
240 # Dictionary of DiffRecords, keyed by (expected_image_locator, 229 # Dictionary of DiffRecords, keyed by (expected_image_locator,
241 # actual_image_locator) tuples. 230 # actual_image_locator) tuples.
242 # Values can also be DIFFRECORD_PENDING, DIFFRECORD_FAILED.
243 self._diff_dict = {} 231 self._diff_dict = {}
244 232
245 # Set up the queue for asynchronously loading DiffRecords, and start the
246 # worker threads reading from it.
247 self._tasks_queue = Queue.Queue(maxsize=2*num_workers)
248 self._workers = []
249 for i in range(num_workers):
250 worker = threading.Thread(target=self.worker, args=(i,))
251 worker.daemon = True
252 worker.start()
253 self._workers.append(worker)
254
255 def worker(self, worker_num):
256 """Launch a worker thread that pulls tasks off self._tasks_queue.
257
258 Args:
259 worker_num: (integer) which worker this is
260 """
261 while True:
262 params = self._tasks_queue.get()
263 key, expected_image_url, actual_image_url = params
264 try:
265 diff_record = DiffRecord(
266 self._storage_root,
267 expected_image_url=expected_image_url,
268 expected_image_locator=key[0],
269 actual_image_url=actual_image_url,
270 actual_image_locator=key[1])
271 except Exception:
272 logging.exception(
273 'exception while creating DiffRecord for key %s' % str(key))
274 diff_record = DIFFRECORD_FAILED
275 self._diff_dict[key] = diff_record
276
277 def add_image_pair(self, 233 def add_image_pair(self,
278 expected_image_url, expected_image_locator, 234 expected_image_url, expected_image_locator,
279 actual_image_url, actual_image_locator): 235 actual_image_url, actual_image_locator):
280 """Download this pair of images (unless we already have them on local disk), 236 """Download this pair of images (unless we already have them on local disk),
281 and prepare a DiffRecord for them. 237 and prepare a DiffRecord for them.
282 238
283 This method will block until the images are downloaded and DiffRecord is 239 TODO(epoger): Make this asynchronously download images, rather than blocking
284 available by calling get_diff_record(). 240 until the images have been downloaded and processed.
241 When we do that, we should probably add a new method that will block
242 until all of the images have been downloaded and processed. Otherwise,
243 we won't know when it's safe to start calling get_diff_record().
244 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
245 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
285 246
286 Args: 247 Args:
287 expected_image_url: file or HTTP url from which we will download the 248 expected_image_url: file or HTTP url from which we will download the
288 expected image 249 expected image
289 expected_image_locator: a unique ID string under which we will store the 250 expected_image_locator: a unique ID string under which we will store the
290 expected image within storage_root (probably including a checksum to 251 expected image within storage_root (probably including a checksum to
291 guarantee uniqueness) 252 guarantee uniqueness)
292 actual_image_url: file or HTTP url from which we will download the 253 actual_image_url: file or HTTP url from which we will download the
293 actual image 254 actual image
294 actual_image_locator: a unique ID string under which we will store the 255 actual_image_locator: a unique ID string under which we will store the
295 actual image within storage_root (probably including a checksum to 256 actual image within storage_root (probably including a checksum to
296 guarantee uniqueness) 257 guarantee uniqueness)
297
298 Raises:
299 Exception if we are unable to create a DiffRecord for this image pair.
300 """ 258 """
301 key = _generate_key(expected_image_locator, actual_image_locator) 259 expected_image_locator = _sanitize_locator(expected_image_locator)
260 actual_image_locator = _sanitize_locator(actual_image_locator)
261 key = (expected_image_locator, actual_image_locator)
302 if not key in self._diff_dict: 262 if not key in self._diff_dict:
303 try: 263 try:
304 new_diff_record = DiffRecord( 264 new_diff_record = DiffRecord(
305 self._storage_root, 265 self._storage_root,
306 expected_image_url=expected_image_url, 266 expected_image_url=expected_image_url,
307 expected_image_locator=expected_image_locator, 267 expected_image_locator=expected_image_locator,
308 actual_image_url=actual_image_url, 268 actual_image_url=actual_image_url,
309 actual_image_locator=actual_image_locator) 269 actual_image_locator=actual_image_locator)
310 except Exception: 270 except Exception:
311 # If we can't create a real DiffRecord for this (expected, actual) pair, 271 # If we can't create a real DiffRecord for this (expected, actual) pair,
312 # store None and the UI will show whatever information we DO have. 272 # store None and the UI will show whatever information we DO have.
313 # Fixes http://skbug.com/2368 . 273 # Fixes http://skbug.com/2368 .
314 logging.exception( 274 logging.exception(
315 'got exception while creating a DiffRecord for ' 275 'got exception while creating a DiffRecord for '
316 'expected_image_url=%s , actual_image_url=%s; returning None' % ( 276 'expected_image_url=%s , actual_image_url=%s; returning None' % (
317 expected_image_url, actual_image_url)) 277 expected_image_url, actual_image_url))
318 new_diff_record = None 278 new_diff_record = None
319 self._diff_dict[key] = new_diff_record 279 self._diff_dict[key] = new_diff_record
320 280
321 def add_image_pair_async(self,
322 expected_image_url, expected_image_locator,
323 actual_image_url, actual_image_locator):
324 """Download this pair of images (unless we already have them on local disk),
325 and prepare a DiffRecord for them.
326
327 This method will return quickly; calls to get_diff_record() will block
328 until the DiffRecord is available (or we have given up on creating it).
329
330 Args:
331 expected_image_url: file or HTTP url from which we will download the
332 expected image
333 expected_image_locator: a unique ID string under which we will store the
334 expected image within storage_root (probably including a checksum to
335 guarantee uniqueness)
336 actual_image_url: file or HTTP url from which we will download the
337 actual image
338 actual_image_locator: a unique ID string under which we will store the
339 actual image within storage_root (probably including a checksum to
340 guarantee uniqueness)
341 """
342 key = _generate_key(expected_image_locator, actual_image_locator)
343 if not key in self._diff_dict:
344 # If we have already requested a diff between these two images,
345 # we don't need to request it again.
346 #
347 # Threading note: If multiple threads called into this method with the
348 # same key at the same time, there will be multiple tasks on the queue
349 # with the same key. But that's OK; they will both complete successfully,
350 # and just waste a little time in the process. Nothing will break.
351 self._diff_dict[key] = DIFFRECORD_PENDING
352 self._tasks_queue.put((key, expected_image_url, actual_image_url))
353
354 def get_diff_record(self, expected_image_locator, actual_image_locator): 281 def get_diff_record(self, expected_image_locator, actual_image_locator):
355 """Returns the DiffRecord for this image pair. 282 """Returns the DiffRecord for this image pair.
356 283
357 Args: 284 Raises a KeyError if we don't have a DiffRecord for this image pair.
358 expected_image_locator: a unique ID string under which we will store the
359 expected image within storage_root (probably including a checksum to
360 guarantee uniqueness)
361 actual_image_locator: a unique ID string under which we will store the
362 actual image within storage_root (probably including a checksum to
363 guarantee uniqueness)
364
365 Returns the DiffRecord for this image pair, or None if we were unable to
366 generate one.
367 """ 285 """
368 key = _generate_key(expected_image_locator, actual_image_locator) 286 key = (_sanitize_locator(expected_image_locator),
369 diff_record = self._diff_dict[key] 287 _sanitize_locator(actual_image_locator))
370 288 return self._diff_dict[key]
371 # If we have no results yet, block until we do.
372 while diff_record == DIFFRECORD_PENDING:
373 time.sleep(1)
374 diff_record = self._diff_dict[key]
375
376 # Once we have the result...
377 if diff_record == DIFFRECORD_FAILED:
378 logging.error(
379 'failed to create a DiffRecord for expected_image_locator=%s , '
380 'actual_image_locator=%s' % (
381 expected_image_locator, actual_image_locator))
382 return None
383 else:
384 return diff_record
385 289
386 290
387 # Utility functions 291 # Utility functions
388 292
389 def _calculate_weighted_diff_metric(histogram, num_pixels): 293 def _calculate_weighted_diff_metric(histogram, num_pixels):
390 """Given the histogram of a diff image (per-channel diff at each 294 """Given the histogram of a diff image (per-channel diff at each
391 pixel between two images), calculate the weighted diff metric (a 295 pixel between two images), calculate the weighted diff metric (a
392 stab at how different the two images really are). 296 stab at how different the two images really are).
393 297
394 TODO(epoger): Delete this function, now that we have perceptual diff? 298 TODO(epoger): Delete this function, now that we have perceptual diff?
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
463 def _download_and_open_image(local_filepath, url): 367 def _download_and_open_image(local_filepath, url):
464 """Open the image at local_filepath; if there is no file at that path, 368 """Open the image at local_filepath; if there is no file at that path,
465 download it from url to that path and then open it. 369 download it from url to that path and then open it.
466 370
467 Args: 371 Args:
468 local_filepath: path on local disk where the image should be stored 372 local_filepath: path on local disk where the image should be stored
469 url: URL from which we can download the image if we don't have it yet 373 url: URL from which we can download the image if we don't have it yet
470 374
471 Returns: a PIL image object 375 Returns: a PIL image object
472 """ 376 """
473 global global_file_collisions
474 if not os.path.exists(local_filepath): 377 if not os.path.exists(local_filepath):
475 _mkdir_unless_exists(os.path.dirname(local_filepath)) 378 _mkdir_unless_exists(os.path.dirname(local_filepath))
476 with contextlib.closing(urllib.urlopen(url)) as url_handle: 379 with contextlib.closing(urllib.urlopen(url)) as url_handle:
477 380 with open(local_filepath, 'wb') as file_handle:
478 # First download the file contents into a unique filename, and
479 # then rename that file. That way, if multiple threads are downloading
480 # the same filename at the same time, they won't interfere with each
481 # other (they will both download the file, and one will "win" in the end)
482 temp_filename = '%s-%d' % (local_filepath,
483 threading.current_thread().ident)
484 with open(temp_filename, 'wb') as file_handle:
485 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) 381 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
486
487 # Keep count of how many colliding downloads we encounter;
488 # if it's a large number, we may want to change our download strategy
489 # to minimize repeated downloads.
490 if os.path.exists(local_filepath):
491 global_file_collisions += 1
492 else:
493 os.rename(temp_filename, local_filepath)
494
495 return _open_image(local_filepath) 382 return _open_image(local_filepath)
496 383
497 384
498 def _open_image(filepath): 385 def _open_image(filepath):
499 """Wrapper for Image.open(filepath) that yields more useful error messages. 386 """Wrapper for Image.open(filepath) that yields more useful error messages.
500 387
501 Args: 388 Args:
502 filepath: path on local disk to load image from 389 filepath: path on local disk to load image from
503 390
504 Returns: a PIL image object 391 Returns: a PIL image object
(...skipping 20 matching lines...) Expand all
525 _mkdir_unless_exists(os.path.dirname(filepath)) 412 _mkdir_unless_exists(os.path.dirname(filepath))
526 image.save(filepath, format) 413 image.save(filepath, format)
527 414
528 415
529 def _mkdir_unless_exists(path): 416 def _mkdir_unless_exists(path):
530 """Unless path refers to an already-existing directory, create it. 417 """Unless path refers to an already-existing directory, create it.
531 418
532 Args: 419 Args:
533 path: path on local disk 420 path: path on local disk
534 """ 421 """
535 try: 422 if not os.path.isdir(path):
536 os.makedirs(path) 423 os.makedirs(path)
537 except OSError as e:
538 if e.errno == errno.EEXIST:
539 pass
540 424
541 425
542 def _sanitize_locator(locator): 426 def _sanitize_locator(locator):
543 """Returns a sanitized version of a locator (one in which we know none of the 427 """Returns a sanitized version of a locator (one in which we know none of the
544 characters will have special meaning in filenames). 428 characters will have special meaning in filenames).
545 429
546 Args: 430 Args:
547 locator: string, or something that can be represented as a string 431 locator: string, or something that can be represented as a string
548 """ 432 """
549 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) 433 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
550 434
551 435
552 def _generate_key(expected_image_locator, actual_image_locator):
553 """Returns a key suitable for looking up this image pair.
554
555 Args:
556 expected_image_locator: a unique ID string under which we will store the
557 expected image within storage_root (probably including a checksum to
558 guarantee uniqueness)
559 actual_image_locator: a unique ID string under which we will store the
560 actual image within storage_root (probably including a checksum to
561 guarantee uniqueness)
562 """
563 return (_sanitize_locator(expected_image_locator),
564 _sanitize_locator(actual_image_locator))
565
566
567 def _get_difference_locator(expected_image_locator, actual_image_locator): 436 def _get_difference_locator(expected_image_locator, actual_image_locator):
568 """Returns the locator string used to look up the diffs between expected_image 437 """Returns the locator string used to look up the diffs between expected_image
569 and actual_image. 438 and actual_image.
570 439
571 We must keep this function in sync with getImageDiffRelativeUrl() in 440 We must keep this function in sync with getImageDiffRelativeUrl() in
572 static/loader.js 441 static/loader.js
573 442
574 Args: 443 Args:
575 expected_image_locator: locator string pointing at expected image 444 expected_image_locator: locator string pointing at expected image
576 actual_image_locator: locator string pointing at actual image 445 actual_image_locator: locator string pointing at actual image
577 446
578 Returns: already-sanitized locator where the diffs between expected and 447 Returns: already-sanitized locator where the diffs between expected and
579 actual images can be found 448 actual images can be found
580 """ 449 """
581 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), 450 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
582 _sanitize_locator(actual_image_locator)) 451 _sanitize_locator(actual_image_locator))
OLDNEW
« no previous file with comments | « gm/rebaseline_server/compare_to_expectations.py ('k') | gm/rebaseline_server/imagepair.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698