| OLD | NEW | 
|    1 #!/usr/bin/python |    1 #!/usr/bin/python | 
|    2  |    2  | 
|    3 # pylint: disable=C0301 |    3 # pylint: disable=C0301 | 
|    4 """ |    4 """ | 
|    5 Copyright 2014 Google Inc. |    5 Copyright 2014 Google Inc. | 
|    6  |    6  | 
|    7 Use of this source code is governed by a BSD-style license that can be |    7 Use of this source code is governed by a BSD-style license that can be | 
|    8 found in the LICENSE file. |    8 found in the LICENSE file. | 
|    9  |    9  | 
|   10 Utilities for accessing Google Cloud Storage, using the boto library. |   10 Utilities for accessing Google Cloud Storage, using the boto library. | 
| (...skipping 22 matching lines...) Expand all  Loading... | 
|   33   if import_dirpath not in sys.path: |   33   if import_dirpath not in sys.path: | 
|   34     # We need to insert at the beginning of the path, to make sure that our |   34     # We need to insert at the beginning of the path, to make sure that our | 
|   35     # imported versions are favored over others that might be in the path. |   35     # imported versions are favored over others that might be in the path. | 
|   36     sys.path.insert(0, import_dirpath) |   36     sys.path.insert(0, import_dirpath) | 
|   37 from boto.gs.connection import GSConnection |   37 from boto.gs.connection import GSConnection | 
|   38 from boto.gs.key import Key |   38 from boto.gs.key import Key | 
|   39 from boto.s3.bucketlistresultset import BucketListResultSet |   39 from boto.s3.bucketlistresultset import BucketListResultSet | 
|   40 from boto.s3.prefix import Prefix |   40 from boto.s3.prefix import Prefix | 
|   41  |   41  | 
|   42  |   42  | 
|   43 def delete_file(bucket, path): |   43 class GSUtils(object): | 
|   44   """Delete a single file within a GS bucket. |   44   """Utilities for accessing Google Cloud Storage, using the boto library.""" | 
|   45  |   45  | 
|   46   TODO(epoger): what if bucket or path does not exist?  Should probably raise |   46   def __init__(self, boto_file_path=os.path.join('~','.boto')): | 
|   47   an exception.  Implement, and add a test to exercise this. |   47     """Constructor. | 
|   48  |   48  | 
|   49   Params: |   49     Params: | 
|   50     bucket: GS bucket to delete a file from |   50       boto_file_path: full path (local-OS-style) on local disk where .boto | 
|   51     path: full path (Posix-style) of the file within the bucket to delete |   51           credentials file can be found.  An exception is thrown if this file | 
|   52   """ |   52           is missing. | 
|   53   conn = _create_connection() |   53           TODO(epoger): Change missing-file behavior: allow the caller to | 
|   54   b = conn.get_bucket(bucket_name=bucket) |   54           operate on public files in Google Storage. | 
|   55   item = Key(b) |   55     """ | 
|   56   item.key = path |   56     boto_file_path = os.path.expanduser(boto_file_path) | 
|   57   item.delete() |   57     print 'Reading boto file from %s' % boto_file_path | 
 |   58     boto_dict = _config_file_as_dict(filepath=boto_file_path) | 
 |   59     self._gs_access_key_id = boto_dict['gs_access_key_id'] | 
 |   60     self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | 
|   58  |   61  | 
 |   62   def delete_file(self, bucket, path): | 
 |   63     """Delete a single file within a GS bucket. | 
|   59  |   64  | 
|   60 def upload_file(source_path, dest_bucket, dest_path): |   65     TODO(epoger): what if bucket or path does not exist?  Should probably raise | 
|   61   """Upload contents of a local file to Google Storage. |   66     an exception.  Implement, and add a test to exercise this. | 
|   62  |   67  | 
|   63   TODO(epoger): Add the extra parameters provided by upload_file() within |   68     Params: | 
|   64   https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u
     tils/old_gs_utils.py , |   69       bucket: GS bucket to delete a file from | 
|   65   so we can replace that function with this one. |   70       path: full path (Posix-style) of the file within the bucket to delete | 
 |   71     """ | 
 |   72     conn = self._create_connection() | 
 |   73     b = conn.get_bucket(bucket_name=bucket) | 
 |   74     item = Key(b) | 
 |   75     item.key = path | 
 |   76     item.delete() | 
|   66  |   77  | 
|   67   params: |   78   def upload_file(self, source_path, dest_bucket, dest_path): | 
|   68     source_path: full path (local-OS-style) on local disk to read from |   79     """Upload contents of a local file to Google Storage. | 
|   69     dest_bucket: GCS bucket to copy the file to |  | 
|   70     dest_path: full path (Posix-style) within that bucket |  | 
|   71   """ |  | 
|   72   conn = _create_connection() |  | 
|   73   b = conn.get_bucket(bucket_name=dest_bucket) |  | 
|   74   item = Key(b) |  | 
|   75   item.key = dest_path |  | 
|   76   item.set_contents_from_filename(filename=source_path) |  | 
|   77  |   80  | 
 |   81     TODO(epoger): Add the extra parameters provided by upload_file() within | 
 |   82     https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts
     /utils/old_gs_utils.py , | 
 |   83     so we can replace that function with this one. | 
|   78  |   84  | 
|   79 def download_file(source_bucket, source_path, dest_path, |   85     params: | 
|   80                   create_subdirs_if_needed=False): |   86       source_path: full path (local-OS-style) on local disk to read from | 
|   81   """ Downloads a single file from Google Cloud Storage to local disk. |   87       dest_bucket: GCS bucket to copy the file to | 
 |   88       dest_path: full path (Posix-style) within that bucket | 
 |   89     """ | 
 |   90     conn = self._create_connection() | 
 |   91     b = conn.get_bucket(bucket_name=dest_bucket) | 
 |   92     item = Key(b) | 
 |   93     item.key = dest_path | 
 |   94     item.set_contents_from_filename(filename=source_path) | 
|   82  |   95  | 
|   83   Args: |   96   def download_file(self, source_bucket, source_path, dest_path, | 
|   84     source_bucket: GCS bucket to download the file from |   97                     create_subdirs_if_needed=False): | 
|   85     source_path: full path (Posix-style) within that bucket |   98     """Downloads a single file from Google Cloud Storage to local disk. | 
|   86     dest_path: full path (local-OS-style) on local disk to copy the file to |  | 
|   87     create_subdirs_if_needed: boolean; whether to create subdirectories as |  | 
|   88         needed to create dest_path |  | 
|   89   """ |  | 
|   90   conn = _create_connection() |  | 
|   91   b = conn.get_bucket(bucket_name=source_bucket) |  | 
|   92   item = Key(b) |  | 
|   93   item.key = source_path |  | 
|   94   if create_subdirs_if_needed: |  | 
|   95     _makedirs_if_needed(os.path.dirname(dest_path)) |  | 
|   96   with open(dest_path, 'w') as f: |  | 
|   97     item.get_contents_to_file(fp=f) |  | 
|   98  |   99  | 
 |  100     Args: | 
 |  101       source_bucket: GCS bucket to download the file from | 
 |  102       source_path: full path (Posix-style) within that bucket | 
 |  103       dest_path: full path (local-OS-style) on local disk to copy the file to | 
 |  104       create_subdirs_if_needed: boolean; whether to create subdirectories as | 
 |  105           needed to create dest_path | 
 |  106     """ | 
 |  107     conn = self._create_connection() | 
 |  108     b = conn.get_bucket(bucket_name=source_bucket) | 
 |  109     item = Key(b) | 
 |  110     item.key = source_path | 
 |  111     if create_subdirs_if_needed: | 
 |  112       _makedirs_if_needed(os.path.dirname(dest_path)) | 
 |  113     with open(dest_path, 'w') as f: | 
 |  114       item.get_contents_to_file(fp=f) | 
|   99  |  115  | 
|  100 def list_bucket_contents(bucket, subdir=None): |  116   def list_bucket_contents(self, bucket, subdir=None): | 
|  101   """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. |  117     """Returns files in the Google Storage bucket as a (dirs, files) tuple. | 
|  102  |  118  | 
|  103   Args: |  119     Args: | 
|  104     bucket: name of the Google Storage bucket |  120       bucket: name of the Google Storage bucket | 
|  105     subdir: directory within the bucket to list, or None for root directory |  121       subdir: directory within the bucket to list, or None for root directory | 
|  106   """ |  122     """ | 
|  107   # The GS command relies on the prefix (if any) ending with a slash. |  123     # The GS command relies on the prefix (if any) ending with a slash. | 
|  108   prefix = subdir or '' |  124     prefix = subdir or '' | 
|  109   if prefix and not prefix.endswith('/'): |  125     if prefix and not prefix.endswith('/'): | 
|  110     prefix += '/' |  126       prefix += '/' | 
|  111   prefix_length = len(prefix) if prefix else 0 |  127     prefix_length = len(prefix) if prefix else 0 | 
|  112  |  128  | 
|  113   conn = _create_connection() |  129     conn = self._create_connection() | 
|  114   b = conn.get_bucket(bucket_name=bucket) |  130     b = conn.get_bucket(bucket_name=bucket) | 
|  115   lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |  131     lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') | 
|  116   dirs = [] |  132     dirs = [] | 
|  117   files = [] |  133     files = [] | 
|  118   for item in lister: |  134     for item in lister: | 
|  119     t = type(item) |  135       t = type(item) | 
|  120     if t is Key: |  136       if t is Key: | 
|  121       files.append(item.key[prefix_length:]) |  137         files.append(item.key[prefix_length:]) | 
|  122     elif t is Prefix: |  138       elif t is Prefix: | 
|  123       dirs.append(item.name[prefix_length:-1]) |  139         dirs.append(item.name[prefix_length:-1]) | 
|  124   return (dirs, files) |  140     return (dirs, files) | 
 |  141  | 
 |  142   def _create_connection(self): | 
 |  143     """Returns a GSConnection object we can use to access Google Storage.""" | 
 |  144     return GSConnection( | 
 |  145         gs_access_key_id=self._gs_access_key_id, | 
 |  146         gs_secret_access_key=self._gs_secret_access_key) | 
|  125  |  147  | 
|  126  |  148  | 
|  127 def _config_file_as_dict(filepath): |  149 def _config_file_as_dict(filepath): | 
|  128   """Reads a boto-style config file into a dict. |  150   """Reads a boto-style config file into a dict. | 
|  129  |  151  | 
|  130   Parses all lines from the file of this form: key = value |  152   Parses all lines from the file of this form: key = value | 
|  131   TODO(epoger): Create unittest. |  153   TODO(epoger): Create unittest. | 
|  132  |  154  | 
|  133   Params: |  155   Params: | 
|  134     filepath: path to config file on local disk |  156     filepath: path to config file on local disk | 
|  135  |  157  | 
|  136   Returns: contents of the config file, as a dictionary |  158   Returns: contents of the config file, as a dictionary | 
|  137  |  159  | 
|  138   Raises exception if file not found. |  160   Raises exception if file not found. | 
|  139   """ |  161   """ | 
|  140   dic = {} |  162   dic = {} | 
|  141   line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') |  163   line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') | 
|  142   with open(filepath) as f: |  164   with open(filepath) as f: | 
|  143     for line in f: |  165     for line in f: | 
|  144       match = line_regex.match(line) |  166       match = line_regex.match(line) | 
|  145       if match: |  167       if match: | 
|  146         (key, value) = match.groups() |  168         (key, value) = match.groups() | 
|  147         dic[key] = value |  169         dic[key] = value | 
|  148   return dic |  170   return dic | 
|  149  |  171  | 
|  150  |  172  | 
|  151 def _create_connection(boto_file_path=os.path.join('~','.boto')): |  | 
|  152   """Returns a GSConnection object we can use to access Google Storage. |  | 
|  153  |  | 
|  154   Params: |  | 
|  155     boto_file_path: full path (local-OS-style) on local disk where .boto |  | 
|  156         credentials file can be found |  | 
|  157  |  | 
|  158   TODO(epoger): Change this module to be object-based, where __init__() reads |  | 
|  159   the boto file into boto_dict once instead of repeatedly for each operation. |  | 
|  160  |  | 
|  161   TODO(epoger): if the file does not exist, rather than raising an exception, |  | 
|  162   create a GSConnection that can operate on public files. |  | 
|  163   """ |  | 
|  164   boto_file_path = os.path.expanduser(boto_file_path) |  | 
|  165   print 'Reading boto file from %s' % boto_file_path |  | 
|  166   boto_dict = _config_file_as_dict(filepath=boto_file_path) |  | 
|  167   return GSConnection( |  | 
|  168       gs_access_key_id=boto_dict['gs_access_key_id'], |  | 
|  169       gs_secret_access_key=boto_dict['gs_secret_access_key']) |  | 
|  170  |  | 
|  171  |  | 
|  172 def _makedirs_if_needed(path): |  173 def _makedirs_if_needed(path): | 
|  173   """ Creates a directory (and any parent directories needed), if it does not |  174   """Creates a directory (and any parent directories needed), if it does not | 
|  174   exist yet. |  175   exist yet. | 
|  175  |  176  | 
|  176   Args: |  177   Args: | 
|  177     path: full path of directory to create |  178     path: full path of directory to create | 
|  178   """ |  179   """ | 
|  179   try: |  180   try: | 
|  180     os.makedirs(path) |  181     os.makedirs(path) | 
|  181   except OSError as e: |  182   except OSError as e: | 
|  182     if e.errno != errno.EEXIST: |  183     if e.errno != errno.EEXIST: | 
|  183       raise |  184       raise | 
|  184  |  185  | 
|  185  |  186  | 
|  186 def _run_self_test(): |  187 def _run_self_test(): | 
|  187   bucket = 'chromium-skia-gm' |  188   bucket = 'chromium-skia-gm' | 
|  188   remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |  189   remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 
|  189   subdir = 'subdir' |  190   subdir = 'subdir' | 
|  190   filenames_to_upload = ['file1', 'file2'] |  191   filenames_to_upload = ['file1', 'file2'] | 
 |  192   gs = GSUtils() | 
|  191  |  193  | 
|  192   # Upload test files to Google Storage. |  194   # Upload test files to Google Storage. | 
|  193   local_src_dir = tempfile.mkdtemp() |  195   local_src_dir = tempfile.mkdtemp() | 
|  194   os.mkdir(os.path.join(local_src_dir, subdir)) |  196   os.mkdir(os.path.join(local_src_dir, subdir)) | 
|  195   try: |  197   try: | 
|  196     for filename in filenames_to_upload: |  198     for filename in filenames_to_upload: | 
|  197       with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |  199       with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 
|  198         f.write('contents of %s\n' % filename) |  200         f.write('contents of %s\n' % filename) | 
|  199       upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |  201       gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), | 
|  200                   dest_bucket=bucket, |  202                      dest_bucket=bucket, | 
|  201                   dest_path=posixpath.join(remote_dir, subdir, filename)) |  203                      dest_path=posixpath.join(remote_dir, subdir, filename)) | 
|  202   finally: |  204   finally: | 
|  203     shutil.rmtree(local_src_dir) |  205     shutil.rmtree(local_src_dir) | 
|  204  |  206  | 
|  205   # Get a list of the files we uploaded to Google Storage. |  207   # Get a list of the files we uploaded to Google Storage. | 
|  206   (dirs, files) = list_bucket_contents( |  208   (dirs, files) = gs.list_bucket_contents( | 
|  207       bucket=bucket, subdir=remote_dir) |  209       bucket=bucket, subdir=remote_dir) | 
|  208   assert dirs == [subdir] |  210   assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 
|  209   assert files == [] |  211   assert files == [], '%s == []' % files | 
|  210   (dirs, files) = list_bucket_contents( |  212   (dirs, files) = gs.list_bucket_contents( | 
|  211       bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |  213       bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 
|  212   assert dirs == [] |  214   assert dirs == [], '%s == []' % dirs | 
|  213   assert files == filenames_to_upload |  215   assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) | 
|  214  |  216  | 
|  215   # Download the files we uploaded to Google Storage, and validate contents. |  217   # Download the files we uploaded to Google Storage, and validate contents. | 
|  216   local_dest_dir = tempfile.mkdtemp() |  218   local_dest_dir = tempfile.mkdtemp() | 
|  217   try: |  219   try: | 
|  218     for filename in filenames_to_upload: |  220     for filename in filenames_to_upload: | 
|  219       download_file(source_bucket=bucket, |  221       gs.download_file(source_bucket=bucket, | 
|  220                     source_path=posixpath.join(remote_dir, subdir, filename), |  222                        source_path=posixpath.join(remote_dir, subdir, filename), | 
|  221                     dest_path=os.path.join(local_dest_dir, subdir, filename), |  223                        dest_path=os.path.join(local_dest_dir, subdir, filename), | 
|  222                     create_subdirs_if_needed=True) |  224                        create_subdirs_if_needed=True) | 
|  223       with open(os.path.join(local_dest_dir, subdir, filename)) as f: |  225       with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 
|  224         file_contents = f.read() |  226         file_contents = f.read() | 
|  225       assert file_contents == 'contents of %s\n' % filename |  227       assert file_contents == 'contents of %s\n' % filename, ( | 
 |  228           '%s == "contents of %s\n"' % (file_contents, filename)) | 
|  226   finally: |  229   finally: | 
|  227     shutil.rmtree(local_dest_dir) |  230     shutil.rmtree(local_dest_dir) | 
|  228  |  231  | 
|  229   # Delete all the files we uploaded to Google Storage. |  232   # Delete all the files we uploaded to Google Storage. | 
|  230   for filename in filenames_to_upload: |  233   for filename in filenames_to_upload: | 
|  231     delete_file(bucket=bucket, |  234     gs.delete_file(bucket=bucket, | 
|  232                 path=posixpath.join(remote_dir, subdir, filename)) |  235                    path=posixpath.join(remote_dir, subdir, filename)) | 
|  233  |  236  | 
|  234   # Confirm that we deleted all the files we uploaded to Google Storage. |  237   # Confirm that we deleted all the files we uploaded to Google Storage. | 
|  235   (dirs, files) = list_bucket_contents( |  238   (dirs, files) = gs.list_bucket_contents( | 
|  236       bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |  239       bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 
|  237   assert dirs == [] |  240   assert dirs == [], '%s == []' % dirs | 
|  238   assert files == [] |  241   assert files == [], '%s == []' % files | 
|  239  |  242  | 
|  240  |  243  | 
|  241 # TODO(epoger): How should we exercise this self-test? |  244 # TODO(epoger): How should we exercise this self-test? | 
|  242 # I avoided using the standard unittest framework, because these Google Storage |  245 # I avoided using the standard unittest framework, because these Google Storage | 
|  243 # operations are expensive and require .boto permissions. |  246 # operations are expensive and require .boto permissions. | 
|  244 # |  247 # | 
|  245 # How can we automatically test this code without wasting too many resources |  248 # How can we automatically test this code without wasting too many resources | 
|  246 # or needing .boto permissions? |  249 # or needing .boto permissions? | 
|  247 if __name__ == '__main__': |  250 if __name__ == '__main__': | 
|  248   _run_self_test() |  251   _run_self_test() | 
| OLD | NEW |