gm/rebaseline_server/imagediffdb.py - Issue 239623002: Revert of rebaseline_server: multithreaded loading/diffing of images

Side by Side Diff: gm/rebaseline_server/imagediffdb.py

Issue 239623002: Revert of rebaseline_server: multithreaded loading/diffing of images (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/python

2	2

3 """	3 """

4 Copyright 2013 Google Inc.	4 Copyright 2013 Google Inc.

5	5

6 Use of this source code is governed by a BSD-style license that can be	6 Use of this source code is governed by a BSD-style license that can be

7 found in the LICENSE file.	7 found in the LICENSE file.

8	8

9 Calulate differences between image pairs, and store them in a database.	9 Calulate differences between image pairs, and store them in a database.

10 """	10 """

11	11

12 import contextlib	12 import contextlib

13 import csv	13 import csv

14 import errno

15 import logging	14 import logging

16 import Queue

17 import os	15 import os

18 import re	16 import re

19 import shutil	17 import shutil

20 import sys	18 import sys

21 import tempfile	19 import tempfile

22 import time

23 import threading

24 import urllib	20 import urllib

25 try:	21 try:

26 from PIL import Image, ImageChops	22 from PIL import Image, ImageChops

27 except ImportError:	23 except ImportError:

28 raise ImportError('Requires PIL to be installed; see '	24 raise ImportError('Requires PIL to be installed; see '

29 + 'http://www.pythonware.com/products/pil/')	25 + 'http://www.pythonware.com/products/pil/')

30	26

31 # Set the PYTHONPATH to include the tools directory.	27 # Set the PYTHONPATH to include the tools directory.

32 sys.path.append(	28 sys.path.append(

33 os.path.join(	29 os.path.join(

34 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,	30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,

35 'tools'))	31 'tools'))

36 import find_run_binary	32 import find_run_binary

37	33

38 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')	34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')

39	35

40 DEFAULT_IMAGE_SUFFIX = '.png'	36 DEFAULT_IMAGE_SUFFIX = '.png'

41 DEFAULT_IMAGES_SUBDIR = 'images'	37 DEFAULT_IMAGES_SUBDIR = 'images'

42 DEFAULT_NUM_WORKERS = 8

43	38

44 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')	39 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')

45	40

46 DIFFS_SUBDIR = 'diffs'	41 DIFFS_SUBDIR = 'diffs'

47 WHITEDIFFS_SUBDIR = 'whitediffs'	42 WHITEDIFFS_SUBDIR = 'whitediffs'

48	43

49 VALUES_PER_BAND = 256	44 VALUES_PER_BAND = 256

50	45

51 # Keys used within DiffRecord dictionary representations.	46 # Keys used within DiffRecord dictionary representations.

52 # NOTE: Keep these in sync with static/constants.js	47 # NOTE: Keep these in sync with static/constants.js

53 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'	48 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'

54 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels'	49 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels'

55 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'	50 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'

56 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'	51 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'

57 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'	52 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'

58	53

59 # Special values within ImageDiffDB._diff_dict

60 DIFFRECORD_FAILED = 'failed'

61 DIFFRECORD_PENDING = 'pending'

62

63 # TODO(epoger): Temporary(?) list to keep track of how many times we download

64 # the same file in multiple threads.

65 global_file_collisions = 0

66

67	54

68 class DiffRecord(object):	55 class DiffRecord(object):

69 """ Record of differences between two images. """	56 """ Record of differences between two images. """

70	57

71 def __init__(self, storage_root,	58 def __init__(self, storage_root,

72 expected_image_url, expected_image_locator,	59 expected_image_url, expected_image_locator,

73 actual_image_url, actual_image_locator,	60 actual_image_url, actual_image_locator,

74 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,	61 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,

75 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,	62 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,

76 image_suffix=DEFAULT_IMAGE_SUFFIX):	63 image_suffix=DEFAULT_IMAGE_SUFFIX):

77 """Download this pair of images (unless we already have them on local disk),	64 """Download this pair of images (unless we already have them on local disk),

78 and prepare a DiffRecord for them.	65 and prepare a DiffRecord for them.

79	66

	67 TODO(epoger): Make this asynchronously download images, rather than blocking

	68 until the images have been downloaded and processed.

	69

80 Args:	70 Args:

81 storage_root: root directory on local disk within which we store all	71 storage_root: root directory on local disk within which we store all

82 images	72 images

83 expected_image_url: file or HTTP url from which we will download the	73 expected_image_url: file or HTTP url from which we will download the

84 expected image	74 expected image

85 expected_image_locator: a unique ID string under which we will store the	75 expected_image_locator: a unique ID string under which we will store the

86 expected image within storage_root (probably including a checksum to	76 expected image within storage_root (probably including a checksum to

87 guarantee uniqueness)	77 guarantee uniqueness)

88 actual_image_url: file or HTTP url from which we will download the	78 actual_image_url: file or HTTP url from which we will download the

89 actual image	79 actual image

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
222 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(),	212 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(),

223 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,	213 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,

224 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference,	214 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference,

225 }	215 }

226	216

227	217

228 class ImageDiffDB(object):	218 class ImageDiffDB(object):

229 """ Calculates differences between image pairs, maintaining a database of	219 """ Calculates differences between image pairs, maintaining a database of

230 them for download."""	220 them for download."""

231	221

232 def __init__(self, storage_root, num_workers=DEFAULT_NUM_WORKERS):	222 def __init__(self, storage_root):

233 """	223 """

234 Args:	224 Args:

235 storage_root: string; root path within the DB will store all of its stuff	225 storage_root: string; root path within the DB will store all of its stuff

236 num_workers: integer; number of worker threads to spawn

237 """	226 """

238 self._storage_root = storage_root	227 self._storage_root = storage_root

239	228

240 # Dictionary of DiffRecords, keyed by (expected_image_locator,	229 # Dictionary of DiffRecords, keyed by (expected_image_locator,

241 # actual_image_locator) tuples.	230 # actual_image_locator) tuples.

242 # Values can also be DIFFRECORD_PENDING, DIFFRECORD_FAILED.

243 self._diff_dict = {}	231 self._diff_dict = {}

244	232

245 # Set up the queue for asynchronously loading DiffRecords, and start the

246 # worker threads reading from it.

247 self._tasks_queue = Queue.Queue(maxsize=2*num_workers)

248 self._workers = []

249 for i in range(num_workers):

250 worker = threading.Thread(target=self.worker, args=(i,))

251 worker.daemon = True

252 worker.start()

253 self._workers.append(worker)

254

255 def worker(self, worker_num):

256 """Launch a worker thread that pulls tasks off self._tasks_queue.

257

258 Args:

259 worker_num: (integer) which worker this is

260 """

261 while True:

262 params = self._tasks_queue.get()

263 key, expected_image_url, actual_image_url = params

264 try:

265 diff_record = DiffRecord(

266 self._storage_root,

267 expected_image_url=expected_image_url,

268 expected_image_locator=key[0],

269 actual_image_url=actual_image_url,

270 actual_image_locator=key[1])

271 except Exception:

272 logging.exception(

273 'exception while creating DiffRecord for key %s' % str(key))

274 diff_record = DIFFRECORD_FAILED

275 self._diff_dict[key] = diff_record

276

277 def add_image_pair(self,	233 def add_image_pair(self,

278 expected_image_url, expected_image_locator,	234 expected_image_url, expected_image_locator,

279 actual_image_url, actual_image_locator):	235 actual_image_url, actual_image_locator):

280 """Download this pair of images (unless we already have them on local disk),	236 """Download this pair of images (unless we already have them on local disk),

281 and prepare a DiffRecord for them.	237 and prepare a DiffRecord for them.

282	238

283 This method will block until the images are downloaded and DiffRecord is	239 TODO(epoger): Make this asynchronously download images, rather than blocking

284 available by calling get_diff_record().	240 until the images have been downloaded and processed.

	241 When we do that, we should probably add a new method that will block

	242 until all of the images have been downloaded and processed. Otherwise,

	243 we won't know when it's safe to start calling get_diff_record().

	244 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a

	245 thread-pool/worker queue at a higher level that just uses ImageDiffDB?

285	246

286 Args:	247 Args:

287 expected_image_url: file or HTTP url from which we will download the	248 expected_image_url: file or HTTP url from which we will download the

288 expected image	249 expected image

289 expected_image_locator: a unique ID string under which we will store the	250 expected_image_locator: a unique ID string under which we will store the

290 expected image within storage_root (probably including a checksum to	251 expected image within storage_root (probably including a checksum to

291 guarantee uniqueness)	252 guarantee uniqueness)

292 actual_image_url: file or HTTP url from which we will download the	253 actual_image_url: file or HTTP url from which we will download the

293 actual image	254 actual image

294 actual_image_locator: a unique ID string under which we will store the	255 actual_image_locator: a unique ID string under which we will store the

295 actual image within storage_root (probably including a checksum to	256 actual image within storage_root (probably including a checksum to

296 guarantee uniqueness)	257 guarantee uniqueness)

297

298 Raises:

299 Exception if we are unable to create a DiffRecord for this image pair.

300 """	258 """

301 key = _generate_key(expected_image_locator, actual_image_locator)	259 expected_image_locator = _sanitize_locator(expected_image_locator)

	260 actual_image_locator = _sanitize_locator(actual_image_locator)

	261 key = (expected_image_locator, actual_image_locator)

302 if not key in self._diff_dict:	262 if not key in self._diff_dict:

303 try:	263 try:

304 new_diff_record = DiffRecord(	264 new_diff_record = DiffRecord(

305 self._storage_root,	265 self._storage_root,

306 expected_image_url=expected_image_url,	266 expected_image_url=expected_image_url,

307 expected_image_locator=expected_image_locator,	267 expected_image_locator=expected_image_locator,

308 actual_image_url=actual_image_url,	268 actual_image_url=actual_image_url,

309 actual_image_locator=actual_image_locator)	269 actual_image_locator=actual_image_locator)

310 except Exception:	270 except Exception:

311 # If we can't create a real DiffRecord for this (expected, actual) pair,	271 # If we can't create a real DiffRecord for this (expected, actual) pair,

312 # store None and the UI will show whatever information we DO have.	272 # store None and the UI will show whatever information we DO have.

313 # Fixes http://skbug.com/2368 .	273 # Fixes http://skbug.com/2368 .

314 logging.exception(	274 logging.exception(

315 'got exception while creating a DiffRecord for '	275 'got exception while creating a DiffRecord for '

316 'expected_image_url=%s , actual_image_url=%s; returning None' % (	276 'expected_image_url=%s , actual_image_url=%s; returning None' % (

317 expected_image_url, actual_image_url))	277 expected_image_url, actual_image_url))

318 new_diff_record = None	278 new_diff_record = None

319 self._diff_dict[key] = new_diff_record	279 self._diff_dict[key] = new_diff_record

320	280

321 def add_image_pair_async(self,

322 expected_image_url, expected_image_locator,

323 actual_image_url, actual_image_locator):

324 """Download this pair of images (unless we already have them on local disk),

325 and prepare a DiffRecord for them.

326

327 This method will return quickly; calls to get_diff_record() will block

328 until the DiffRecord is available (or we have given up on creating it).

329

330 Args:

331 expected_image_url: file or HTTP url from which we will download the

332 expected image

333 expected_image_locator: a unique ID string under which we will store the

334 expected image within storage_root (probably including a checksum to

335 guarantee uniqueness)

336 actual_image_url: file or HTTP url from which we will download the

337 actual image

338 actual_image_locator: a unique ID string under which we will store the

339 actual image within storage_root (probably including a checksum to

340 guarantee uniqueness)

341 """

342 key = _generate_key(expected_image_locator, actual_image_locator)

343 if not key in self._diff_dict:

344 # If we have already requested a diff between these two images,

345 # we don't need to request it again.

346 #

347 # Threading note: If multiple threads called into this method with the

348 # same key at the same time, there will be multiple tasks on the queue

349 # with the same key. But that's OK; they will both complete successfully,

350 # and just waste a little time in the process. Nothing will break.

351 self._diff_dict[key] = DIFFRECORD_PENDING

352 self._tasks_queue.put((key, expected_image_url, actual_image_url))

353

354 def get_diff_record(self, expected_image_locator, actual_image_locator):	281 def get_diff_record(self, expected_image_locator, actual_image_locator):

355 """Returns the DiffRecord for this image pair.	282 """Returns the DiffRecord for this image pair.

356	283

357 Args:	284 Raises a KeyError if we don't have a DiffRecord for this image pair.

358 expected_image_locator: a unique ID string under which we will store the

359 expected image within storage_root (probably including a checksum to

360 guarantee uniqueness)

361 actual_image_locator: a unique ID string under which we will store the

362 actual image within storage_root (probably including a checksum to

363 guarantee uniqueness)

364

365 Returns the DiffRecord for this image pair, or None if we were unable to

366 generate one.

367 """	285 """

368 key = _generate_key(expected_image_locator, actual_image_locator)	286 key = (_sanitize_locator(expected_image_locator),

369 diff_record = self._diff_dict[key]	287 _sanitize_locator(actual_image_locator))

370	288 return self._diff_dict[key]

371 # If we have no results yet, block until we do.

372 while diff_record == DIFFRECORD_PENDING:

373 time.sleep(1)

374 diff_record = self._diff_dict[key]

375

376 # Once we have the result...

377 if diff_record == DIFFRECORD_FAILED:

378 logging.error(

379 'failed to create a DiffRecord for expected_image_locator=%s , '

380 'actual_image_locator=%s' % (

381 expected_image_locator, actual_image_locator))

382 return None

383 else:

384 return diff_record

385	289

386	290

387 # Utility functions	291 # Utility functions

388	292

389 def _calculate_weighted_diff_metric(histogram, num_pixels):	293 def _calculate_weighted_diff_metric(histogram, num_pixels):

390 """Given the histogram of a diff image (per-channel diff at each	294 """Given the histogram of a diff image (per-channel diff at each

391 pixel between two images), calculate the weighted diff metric (a	295 pixel between two images), calculate the weighted diff metric (a

392 stab at how different the two images really are).	296 stab at how different the two images really are).

393	297

394 TODO(epoger): Delete this function, now that we have perceptual diff?	298 TODO(epoger): Delete this function, now that we have perceptual diff?

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
463 def _download_and_open_image(local_filepath, url):	367 def _download_and_open_image(local_filepath, url):

464 """Open the image at local_filepath; if there is no file at that path,	368 """Open the image at local_filepath; if there is no file at that path,

465 download it from url to that path and then open it.	369 download it from url to that path and then open it.

466	370

467 Args:	371 Args:

468 local_filepath: path on local disk where the image should be stored	372 local_filepath: path on local disk where the image should be stored

469 url: URL from which we can download the image if we don't have it yet	373 url: URL from which we can download the image if we don't have it yet

470	374

471 Returns: a PIL image object	375 Returns: a PIL image object

472 """	376 """

473 global global_file_collisions

474 if not os.path.exists(local_filepath):	377 if not os.path.exists(local_filepath):

475 _mkdir_unless_exists(os.path.dirname(local_filepath))	378 _mkdir_unless_exists(os.path.dirname(local_filepath))

476 with contextlib.closing(urllib.urlopen(url)) as url_handle:	379 with contextlib.closing(urllib.urlopen(url)) as url_handle:

477	380 with open(local_filepath, 'wb') as file_handle:

478 # First download the file contents into a unique filename, and

479 # then rename that file. That way, if multiple threads are downloading

480 # the same filename at the same time, they won't interfere with each

481 # other (they will both download the file, and one will "win" in the end)

482 temp_filename = '%s-%d' % (local_filepath,

483 threading.current_thread().ident)

484 with open(temp_filename, 'wb') as file_handle:

485 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)	381 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)

486

487 # Keep count of how many colliding downloads we encounter;

488 # if it's a large number, we may want to change our download strategy

489 # to minimize repeated downloads.

490 if os.path.exists(local_filepath):

491 global_file_collisions += 1

492 else:

493 os.rename(temp_filename, local_filepath)

494

495 return _open_image(local_filepath)	382 return _open_image(local_filepath)

496	383

497	384

498 def _open_image(filepath):	385 def _open_image(filepath):

499 """Wrapper for Image.open(filepath) that yields more useful error messages.	386 """Wrapper for Image.open(filepath) that yields more useful error messages.

500	387

501 Args:	388 Args:

502 filepath: path on local disk to load image from	389 filepath: path on local disk to load image from

503	390

504 Returns: a PIL image object	391 Returns: a PIL image object

(...skipping 20 matching lines...) Expand all Loading...
525 _mkdir_unless_exists(os.path.dirname(filepath))	412 _mkdir_unless_exists(os.path.dirname(filepath))

526 image.save(filepath, format)	413 image.save(filepath, format)

527	414

528	415

529 def _mkdir_unless_exists(path):	416 def _mkdir_unless_exists(path):

530 """Unless path refers to an already-existing directory, create it.	417 """Unless path refers to an already-existing directory, create it.

531	418

532 Args:	419 Args:

533 path: path on local disk	420 path: path on local disk

534 """	421 """

535 try:	422 if not os.path.isdir(path):

536 os.makedirs(path)	423 os.makedirs(path)

537 except OSError as e:

538 if e.errno == errno.EEXIST:

539 pass

540	424

541	425

542 def _sanitize_locator(locator):	426 def _sanitize_locator(locator):

543 """Returns a sanitized version of a locator (one in which we know none of the	427 """Returns a sanitized version of a locator (one in which we know none of the

544 characters will have special meaning in filenames).	428 characters will have special meaning in filenames).

545	429

546 Args:	430 Args:

547 locator: string, or something that can be represented as a string	431 locator: string, or something that can be represented as a string

548 """	432 """

549 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))	433 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))

550	434

551	435

552 def _generate_key(expected_image_locator, actual_image_locator):

553 """Returns a key suitable for looking up this image pair.

554

555 Args:

556 expected_image_locator: a unique ID string under which we will store the

557 expected image within storage_root (probably including a checksum to

558 guarantee uniqueness)

559 actual_image_locator: a unique ID string under which we will store the

560 actual image within storage_root (probably including a checksum to

561 guarantee uniqueness)

562 """

563 return (_sanitize_locator(expected_image_locator),

564 _sanitize_locator(actual_image_locator))

565

566

567 def _get_difference_locator(expected_image_locator, actual_image_locator):	436 def _get_difference_locator(expected_image_locator, actual_image_locator):

568 """Returns the locator string used to look up the diffs between expected_image	437 """Returns the locator string used to look up the diffs between expected_image

569 and actual_image.	438 and actual_image.

570	439

571 We must keep this function in sync with getImageDiffRelativeUrl() in	440 We must keep this function in sync with getImageDiffRelativeUrl() in

572 static/loader.js	441 static/loader.js

573	442

574 Args:	443 Args:

575 expected_image_locator: locator string pointing at expected image	444 expected_image_locator: locator string pointing at expected image

576 actual_image_locator: locator string pointing at actual image	445 actual_image_locator: locator string pointing at actual image

577	446

578 Returns: already-sanitized locator where the diffs between expected and	447 Returns: already-sanitized locator where the diffs between expected and

579 actual images can be found	448 actual images can be found

580 """	449 """

581 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),	450 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),

582 _sanitize_locator(actual_image_locator))	451 _sanitize_locator(actual_image_locator))

OLD	NEW

« no previous file with comments | « gm/rebaseline_server/compare_to_expectations.py ('k') | gm/rebaseline_server/imagepair.py » ('j') | no next file with comments »