Index: gm/rebaseline_server/imagediffdb.py |
diff --git a/gm/rebaseline_server/imagediffdb.py b/gm/rebaseline_server/imagediffdb.py |
index 89f9fef319a4045909553c7c840bbc19fb1004f0..d655e8d7ca1e179a2aa1a1100dc8cf847507ad1d 100644 |
--- a/gm/rebaseline_server/imagediffdb.py |
+++ b/gm/rebaseline_server/imagediffdb.py |
@@ -24,6 +24,8 @@ import fix_pythonpath # pylint: disable=W0611 |
# Imports from within Skia |
import find_run_binary |
+from py.utils import gs_utils |
+ |
SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') |
@@ -43,10 +45,51 @@ KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' |
KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference' |
+class GSObject(object): |
+ """Reference to a file, dir, or entire bucket in Google Storage.""" |
+ # EPOGER: move this class into gs_utils.py ? |
+ def __init__(self, gs_url=None, bucket=None, path=None): |
+ """ |
+ Create a reference to a GS object, using EITHER gs_url OR bucket/path. |
+ |
+ Args: |
+ gs_url: URL pointing to this file, e.g. "gs://bucketname/dir/file". |
+ If this is set, the other params must NOT be set. |
+ bucket: name of the bucket, e.g. "bucketname". |
+ If this is set, the "gs_url" param must NOT be set. |
+ path: Posix-style path within the bucket, e.g. "dir/file". |
+ If this is set, the "gs_url" param must NOT be set. |
+ """ |
+ if gs_url != None: |
+ if bucket != None or path != None: |
+ raise Exception( |
+ 'gs_url is set, so neither bucket nor path should be set') |
+ (self.bucket, self.path) = gs_utils.GSUtils.split_gs_url(gs_url) |
+ else: |
+ if not bucket: |
+ raise Exception('gs_url is not set, so bucket must be set') |
+ self.bucket = bucket |
+ self.path = path or '' |
+ |
+ def as_gs_url(self): |
+ """Returns a gs:// URL pointing at this object in Google Storage.""" |
+ url = 'gs://' + self.bucket |
+ if self.path: |
+ url += '/' + self.path |
+ return url |
+ |
+ def as_http_url(self): |
+ """Returns an http:// URL pointing at this object in Google Storage.""" |
+ url = 'http://%s.commondatastorage.googleapis.com' % self.bucket |
+ if self.path: |
+ url += '/' + self.path |
+ return url |
+ |
+ |
class DiffRecord(object): |
""" Record of differences between two images. """ |
- def __init__(self, storage_root, |
+ def __init__(self, gs, storage_root, |
expected_image_url, expected_image_locator, |
actual_image_url, actual_image_locator, |
expected_images_subdir=DEFAULT_IMAGES_SUBDIR, |
@@ -59,14 +102,15 @@ class DiffRecord(object): |
until the images have been downloaded and processed. |
Args: |
+ gs: instance of GSUtils object we can use to download images |
storage_root: root directory on local disk within which we store all |
images |
- expected_image_url: file or HTTP url from which we will download the |
+ expected_image_url: file, GS, or HTTP url from which we will download the |
expected image |
expected_image_locator: a unique ID string under which we will store the |
expected image within storage_root (probably including a checksum to |
guarantee uniqueness) |
- actual_image_url: file or HTTP url from which we will download the |
+ actual_image_url: file, GS, or HTTP url from which we will download the |
actual image |
actual_image_locator: a unique ID string under which we will store the |
actual image within storage_root (probably including a checksum to |
@@ -88,13 +132,13 @@ class DiffRecord(object): |
storage_root, actual_images_subdir, |
str(actual_image_locator) + image_suffix) |
try: |
- _download_file(expected_image_file, expected_image_url) |
+ _download_file(gs, expected_image_file, expected_image_url) |
except Exception: |
logging.exception('unable to download expected_image_url %s to file %s' % |
(expected_image_url, expected_image_file)) |
raise |
try: |
- _download_file(actual_image_file, actual_image_url) |
+ _download_file(gs, actual_image_file, actual_image_url) |
except Exception: |
logging.exception('unable to download actual_image_url %s to file %s' % |
(actual_image_url, actual_image_file)) |
@@ -211,12 +255,14 @@ class ImageDiffDB(object): |
""" Calculates differences between image pairs, maintaining a database of |
them for download.""" |
- def __init__(self, storage_root): |
+ def __init__(self, storage_root, gs=None): |
""" |
Args: |
storage_root: string; root path within the DB will store all of its stuff |
+ gs: instance of GSUtils object we can use to download images |
""" |
self._storage_root = storage_root |
+ self._gs = gs |
# Dictionary of DiffRecords, keyed by (expected_image_locator, |
# actual_image_locator) tuples. |
@@ -241,12 +287,12 @@ class ImageDiffDB(object): |
thread-pool/worker queue at a higher level that just uses ImageDiffDB? |
Args: |
- expected_image_url: file or HTTP url from which we will download the |
+ expected_image_url: file, GS, or HTTP url from which we will download the |
expected image |
expected_image_locator: a unique ID string under which we will store the |
expected image within storage_root (probably including a checksum to |
guarantee uniqueness) |
- actual_image_url: file or HTTP url from which we will download the |
+ actual_image_url: file, GS, or HTTP url from which we will download the |
actual image |
actual_image_locator: a unique ID string under which we will store the |
actual image within storage_root (probably including a checksum to |
@@ -258,7 +304,7 @@ class ImageDiffDB(object): |
if not key in self._diff_dict: |
try: |
new_diff_record = DiffRecord( |
- self._storage_root, |
+ self._gs, self._storage_root, |
expected_image_url=expected_image_url, |
expected_image_locator=expected_image_locator, |
actual_image_url=actual_image_url, |
@@ -286,18 +332,25 @@ class ImageDiffDB(object): |
# Utility functions |
-def _download_file(local_filepath, url): |
+def _download_file(gs, local_filepath, url): |
"""Download a file from url to local_filepath, unless it is already there. |
Args: |
+ gs: instance of GSUtils object, in case the url points at Google Storage |
local_filepath: path on local disk where the image should be stored |
- url: URL from which we can download the image if we don't have it yet |
+ url: HTTP or GS URL from which we can download the image if we don't have |
+ it yet |
""" |
if not os.path.exists(local_filepath): |
_mkdir_unless_exists(os.path.dirname(local_filepath)) |
- with contextlib.closing(urllib.urlopen(url)) as url_handle: |
- with open(local_filepath, 'wb') as file_handle: |
- shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
+ if gs_utils.GSUtils.is_gs_url(url): |
+ (bucket, path) = gs_utils.GSUtils.split_gs_url(url) |
+ gs.download_file(source_bucket=bucket, source_path=path, |
+ dest_path=local_filepath) |
+ else: |
+ with contextlib.closing(urllib.urlopen(url)) as url_handle: |
+ with open(local_filepath, 'wb') as file_handle: |
+ shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) |
def _mkdir_unless_exists(path): |