| Index: py/utils/anyfile_utils.py
|
| diff --git a/py/utils/anyfile_utils.py b/py/utils/anyfile_utils.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..23c6c51ca2042893d04d904b5bc95ff4605d327e
|
| --- /dev/null
|
| +++ b/py/utils/anyfile_utils.py
|
| @@ -0,0 +1,151 @@
|
| +#!/usr/bin/python
|
| +
|
| +"""
|
| +Copyright 2014 Google Inc.
|
| +
|
| +Use of this source code is governed by a BSD-style license that can be
|
| +found in the LICENSE file.
|
| +
|
| +Utilities for uniformly handling files whether they are local or remote.
|
| +
|
| +EPOGER: Add unittests
|
| +"""
|
| +
|
| +# System-level imports
|
| +import contextlib
|
| +import os
|
| +import shutil
|
| +import urllib
|
| +import urlparse
|
| +
|
| +# Local imports
|
| +import gs_utils
|
| +
|
| +
|
| +class AnyFileUtils(object):
|
| + """Utilities for uniformly handling files whether they are local or remote."""
|
| +
|
| + class BaseFileObject(object):
|
| + """Base object used for all file types."""
|
| + pass
|
| +
|
| + class GsFile(BaseFileObject):
|
| + def __init__(self, gs_url):
|
| + self._gs_url = gs_url
|
| + self._bucket = None
|
| + self._path = None
|
| + self._url = None
|
| +
|
| + @property
|
| + def bucket(self):
|
| + if self._bucket == None:
|
| + (self._bucket, self._path) = gs_utils.GSUtils.split_gs_url(self._gs_url)
|
| + return self._bucket
|
| +
|
| + @property
|
| + def path(self):
|
| + if self._path == None:
|
| + (self._bucket, self._path) = gs_utils.GSUtils.split_gs_url(self._gs_url)
|
| + return self._path
|
| +
|
| + @property
|
| + def url(self):
|
| + if self._url == None:
|
| + self._url = 'http://%s.commondatastorage.googleapis.com' % self.bucket
|
| + if self.path:
|
| + self._url += '/' + self.path
|
| + return self._url
|
| +
|
| + class HttpFile(BaseFileObject):
|
| + def __init__(self, url):
|
| + self._url = url
|
| +
|
| + @property
|
| + def url(self):
|
| + return self._url
|
| +
|
| + class LocalFile(BaseFileObject):
|
| + def __init__(self, url=None, path=None):
|
| + """Must be constructed with EITHER url OR path, but not both."""
|
| + assert (url == None and path != None) or (url != None and path == None)
|
| + self._url = url
|
| + if path == None:
|
| + self._abspath = None
|
| + else:
|
| + self._abspath = os.path.abspath(path)
|
| +
|
| + @property
|
| + def abspath(self):
|
| + if self._abspath == None:
|
| + self._abspath = os.path.abspath(
|
| + urllib.url2pathname(urlparse.urlparse(self._url).path))
|
| + return self._abspath
|
| +
|
| + @property
|
| + def url(self):
|
| + if self._url == None:
|
| + self._url = urlparse.urljoin(
|
| + 'file:', urllib.pathname2url(self._abspath))
|
| + return self._url
|
| +
|
| + def __init__(self, boto_file_path=None):
|
| + """Constructor.
|
| +
|
| + Params:
|
| + boto_file_path: full path (local-OS-style) on local disk where .boto
|
| + credentials file can be found. If None, then the AnyFileUtils object
|
| + created will be able to access only public files in Google Storage.
|
| +
|
| + Raises an exception if no file is found at boto_file_path, or if the file
|
| + found there is malformed.
|
| + """
|
| + self._gs = gs_utils.GSUtils(boto_file_path=boto_file_path)
|
| +
|
| + def copy_file(self, source, dest):
|
| + """Copy a single file from one place to another.
|
| +
|
| + Args:
|
| + source: The file to make a copy of.
|
| + May be a URL, filepath, or BaseFileObject subclass instance.
|
| + dest: Where to write the copy to.
|
| + May be a URL, filepath, or BaseFileObject subclass instance.
|
| + dest: URL or filepath telling us where to write the copy to
|
| + """
|
| + source_object = self.create_file_object(source)
|
| + dest_object = self.create_file_object(dest)
|
| +
|
| + if not isinstance(dest_object, self.LocalFile):
|
| + raise Exception('unsupported dest_object type %s' % type(dest_object))
|
| +
|
| + if isinstance(source_object, self.HttpFile):
|
| + with contextlib.closing(urllib.urlopen(source_object.url)) as fsrc:
|
| + with open(dest_object.abspath, 'wb') as fdst:
|
| + shutil.copyfileobj(fsrc=fsrc, fdst=fdst)
|
| + elif isinstance(source_object, self.GsFile):
|
| + (bucket, path) = gs_utils.GSUtils.split_gs_url(url)
|
| + self._gs.download_file(
|
| + source_bucket=source_object.bucket, source_path=source_object.path,
|
| + dest_path=dest_object.abspath)
|
| + else:
|
| + raise Exception('unsupported source_object type %s' % type(source_object))
|
| +
|
| + @staticmethod
|
| + def create_file_object(url_or_filepath):
|
| + """Returns a subclass instance of BaseFileObject referring to a file or url.
|
| +
|
| + Args:
|
| + url_or_filepath: URL referring to the file object, or a path to file on
|
| + local disk
|
| + """
|
| + # EPOGER: first, check if it's already a BaseFileObject
|
| + lowercase_url = url_or_filepath.lower()
|
| + if lowercase_url.startswith('http:'):
|
| + return AnyFileUtils.HttpFile(url=url_or_filepath)
|
| + if lowercase_url.startswith('https:'):
|
| + return AnyFileUtils.HttpFile(url=url_or_filepath)
|
| + if lowercase_url.startswith('gs:'):
|
| + return AnyFileUtils.GsFile(gs_url=url_or_filepath)
|
| + if lowercase_url.startswith('file:'):
|
| + return AnyFileUtils.LocalFile(url=url_or_filepath)
|
| + else:
|
| + return AnyFileUtils.LocalFile(path=url_or_filepath)
|
|
|