Index: py/utils/anyfile_utils.py |
diff --git a/py/utils/anyfile_utils.py b/py/utils/anyfile_utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..23c6c51ca2042893d04d904b5bc95ff4605d327e |
--- /dev/null |
+++ b/py/utils/anyfile_utils.py |
@@ -0,0 +1,151 @@ |
+#!/usr/bin/python |
+ |
+""" |
+Copyright 2014 Google Inc. |
+ |
+Use of this source code is governed by a BSD-style license that can be |
+found in the LICENSE file. |
+ |
+Utilities for uniformly handling files whether they are local or remote. |
+ |
+EPOGER: Add unittests |
+""" |
+ |
+# System-level imports |
+import contextlib |
+import os |
+import shutil |
+import urllib |
+import urlparse |
+ |
+# Local imports |
+import gs_utils |
+ |
+ |
+class AnyFileUtils(object): |
+ """Utilities for uniformly handling files whether they are local or remote.""" |
+ |
+ class BaseFileObject(object): |
+ """Base object used for all file types.""" |
+ pass |
+ |
+ class GsFile(BaseFileObject): |
+ def __init__(self, gs_url): |
+ self._gs_url = gs_url |
+ self._bucket = None |
+ self._path = None |
+ self._url = None |
+ |
+ @property |
+ def bucket(self): |
+ if self._bucket == None: |
+ (self._bucket, self._path) = gs_utils.GSUtils.split_gs_url(self._gs_url) |
+ return self._bucket |
+ |
+ @property |
+ def path(self): |
+ if self._path == None: |
+ (self._bucket, self._path) = gs_utils.GSUtils.split_gs_url(self._gs_url) |
+ return self._path |
+ |
+ @property |
+ def url(self): |
+ if self._url == None: |
+ self._url = 'http://%s.commondatastorage.googleapis.com' % self.bucket |
+ if self.path: |
+ self._url += '/' + self.path |
+ return self._url |
+ |
+ class HttpFile(BaseFileObject): |
+ def __init__(self, url): |
+ self._url = url |
+ |
+ @property |
+ def url(self): |
+ return self._url |
+ |
+ class LocalFile(BaseFileObject): |
+ def __init__(self, url=None, path=None): |
+ """Must be constructed with EITHER url OR path, but not both.""" |
+ assert (url == None and path != None) or (url != None and path == None) |
+ self._url = url |
+ if path == None: |
+ self._abspath = None |
+ else: |
+ self._abspath = os.path.abspath(path) |
+ |
+ @property |
+ def abspath(self): |
+ if self._abspath == None: |
+ self._abspath = os.path.abspath( |
+ urllib.url2pathname(urlparse.urlparse(self._url).path)) |
+ return self._abspath |
+ |
+ @property |
+ def url(self): |
+ if self._url == None: |
+ self._url = urlparse.urljoin( |
+ 'file:', urllib.pathname2url(self._abspath)) |
+ return self._url |
+ |
+ def __init__(self, boto_file_path=None): |
+ """Constructor. |
+ |
+ Params: |
+ boto_file_path: full path (local-OS-style) on local disk where .boto |
+ credentials file can be found. If None, then the AnyFileUtils object |
+ created will be able to access only public files in Google Storage. |
+ |
+ Raises an exception if no file is found at boto_file_path, or if the file |
+ found there is malformed. |
+ """ |
+ self._gs = gs_utils.GSUtils(boto_file_path=boto_file_path) |
+ |
+ def copy_file(self, source, dest): |
+ """Copy a single file from one place to another. |
+ |
+ Args: |
+ source: The file to make a copy of. |
+ May be a URL, filepath, or BaseFileObject subclass instance. |
+ dest: Where to write the copy to. |
+ May be a URL, filepath, or BaseFileObject subclass instance. |
+ dest: URL or filepath telling us where to write the copy to |
+ """ |
+ source_object = self.create_file_object(source) |
+ dest_object = self.create_file_object(dest) |
+ |
+ if not isinstance(dest_object, self.LocalFile): |
+ raise Exception('unsupported dest_object type %s' % type(dest_object)) |
+ |
+ if isinstance(source_object, self.HttpFile): |
+ with contextlib.closing(urllib.urlopen(source_object.url)) as fsrc: |
+ with open(dest_object.abspath, 'wb') as fdst: |
+ shutil.copyfileobj(fsrc=fsrc, fdst=fdst) |
+ elif isinstance(source_object, self.GsFile): |
+ (bucket, path) = gs_utils.GSUtils.split_gs_url(url) |
+ self._gs.download_file( |
+ source_bucket=source_object.bucket, source_path=source_object.path, |
+ dest_path=dest_object.abspath) |
+ else: |
+ raise Exception('unsupported source_object type %s' % type(source_object)) |
+ |
+ @staticmethod |
+ def create_file_object(url_or_filepath): |
+ """Returns a subclass instance of BaseFileObject referring to a file or url. |
+ |
+ Args: |
+ url_or_filepath: URL referring to the file object, or a path to file on |
+ local disk |
+ """ |
+ # EPOGER: first, check if it's already a BaseFileObject |
+ lowercase_url = url_or_filepath.lower() |
+ if lowercase_url.startswith('http:'): |
+ return AnyFileUtils.HttpFile(url=url_or_filepath) |
+ if lowercase_url.startswith('https:'): |
+ return AnyFileUtils.HttpFile(url=url_or_filepath) |
+ if lowercase_url.startswith('gs:'): |
+ return AnyFileUtils.GsFile(gs_url=url_or_filepath) |
+ if lowercase_url.startswith('file:'): |
+ return AnyFileUtils.LocalFile(url=url_or_filepath) |
+ else: |
+ return AnyFileUtils.LocalFile(path=url_or_filepath) |