OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
190 | 190 |
191 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, | 191 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, |
192 predefined_acl=None, fine_grained_acl_list=None): | 192 predefined_acl=None, fine_grained_acl_list=None): |
193 """Recursively upload contents of a local directory to Google Storage. | 193 """Recursively upload contents of a local directory to Google Storage. |
194 | 194 |
195 params: | 195 params: |
196 source_dir: full path (local-OS-style) on local disk of directory to copy | 196 source_dir: full path (local-OS-style) on local disk of directory to copy |
197 contents of | 197 contents of |
198 dest_bucket: GCS bucket to copy the files into | 198 dest_bucket: GCS bucket to copy the files into |
199 dest_dir: full path (Posix-style) within that bucket; write the files into | 199 dest_dir: full path (Posix-style) within that bucket; write the files into |
200 this directory | 200 this directory. If None, write into the root directory of the bucket. |
201 predefined_acl: which predefined ACL to apply to the files on Google | 201 predefined_acl: which predefined ACL to apply to the files on Google |
202 Storage; must be one of the PredefinedACL values defined above. | 202 Storage; must be one of the PredefinedACL values defined above. |
203 If None, inherits dest_bucket's default object ACL. | 203 If None, inherits dest_bucket's default object ACL. |
204 TODO(epoger): add unittests for this param, although it seems to work | 204 TODO(epoger): add unittests for this param, although it seems to work |
205 in my manual testing | 205 in my manual testing |
206 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 206 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
207 to apply to every file uploaded (on top of the predefined_acl), | 207 to apply to every file uploaded (on top of the predefined_acl), |
208 or None if predefined_acl is sufficient | 208 or None if predefined_acl is sufficient |
209 | 209 |
210 The copy operates as a "merge with overwrite": any files in source_dir will | 210 The copy operates as a "merge with overwrite": any files in source_dir will |
211 be "overlaid" on top of the existing content in dest_dir. Existing files | 211 be "overlaid" on top of the existing content in dest_dir. Existing files |
212 with the same names will be overwritten. | 212 with the same names will be overwritten. |
213 | 213 |
214 TODO(epoger): Upload multiple files simultaneously to reduce latency. | 214 TODO(epoger): Upload multiple files simultaneously to reduce latency. |
215 | 215 |
216 TODO(epoger): Add a "noclobber" mode that will not upload any files would | 216 TODO(epoger): Add a "noclobber" mode that will not upload any files would |
217 overwrite existing files in Google Storage. | 217 overwrite existing files in Google Storage. |
218 | 218 |
219 TODO(epoger): Consider adding a do_compress parameter that would compress | 219 TODO(epoger): Consider adding a do_compress parameter that would compress |
220 the file using gzip before upload, and add a "Content-Encoding:gzip" header | 220 the file using gzip before upload, and add a "Content-Encoding:gzip" header |
221 so that HTTP downloads of the file would be unzipped automatically. | 221 so that HTTP downloads of the file would be unzipped automatically. |
222 See https://developers.google.com/storage/docs/gsutil/addlhelp/ | 222 See https://developers.google.com/storage/docs/gsutil/addlhelp/ |
223 WorkingWithObjectMetadata#content-encoding | 223 WorkingWithObjectMetadata#content-encoding |
224 """ | 224 """ |
225 b = self._connect_to_bucket(bucket_name=dest_bucket) | 225 b = self._connect_to_bucket(bucket_name=dest_bucket) |
226 for filename in sorted(os.listdir(source_dir)): | 226 for filename in sorted(os.listdir(source_dir)): |
227 local_path = os.path.join(source_dir, filename) | 227 local_path = os.path.join(source_dir, filename) |
| 228 if dest_dir: |
| 229 remote_path = posixpath.join(dest_dir, filename) |
| 230 else: |
| 231 remote_path = filename |
| 232 |
228 if os.path.isdir(local_path): | 233 if os.path.isdir(local_path): |
229 self.upload_dir_contents( # recurse | 234 self.upload_dir_contents( # recurse |
230 source_dir=local_path, dest_bucket=dest_bucket, | 235 source_dir=local_path, dest_bucket=dest_bucket, |
231 dest_dir=posixpath.join(dest_dir, filename), | 236 dest_dir=remote_path, |
232 predefined_acl=predefined_acl, | 237 predefined_acl=predefined_acl, |
233 fine_grained_acl_list=fine_grained_acl_list) | 238 fine_grained_acl_list=fine_grained_acl_list) |
234 else: | 239 else: |
235 item = Key(b) | 240 item = Key(b) |
236 dest_path = posixpath.join(dest_dir, filename) | 241 item.key = remote_path |
237 item.key = dest_path | |
238 try: | 242 try: |
239 item.set_contents_from_filename( | 243 item.set_contents_from_filename( |
240 filename=local_path, policy=predefined_acl) | 244 filename=local_path, policy=predefined_acl) |
241 except BotoServerError, e: | 245 except BotoServerError, e: |
242 e.body = (repr(e.body) + | 246 e.body = (repr(e.body) + |
243 ' while uploading local_path=%s to bucket=%s, path=%s' % ( | 247 ' while uploading local_path=%s to bucket=%s, path=%s' % ( |
244 local_path, dest_bucket, dest_path)) | 248 local_path, dest_bucket, remote_path)) |
245 raise | 249 raise |
246 # TODO(epoger): This may be inefficient, because it calls | 250 # TODO(epoger): This may be inefficient, because it calls |
247 # _connect_to_bucket() for every file. Depending on how expensive that | 251 # _connect_to_bucket() for every file. Depending on how expensive that |
248 # call is, we may want to optimize this. | 252 # call is, we may want to optimize this. |
249 for (id_type, id_value, permission) in fine_grained_acl_list or []: | 253 for (id_type, id_value, permission) in fine_grained_acl_list or []: |
250 self.set_acl( | 254 self.set_acl( |
251 bucket=dest_bucket, path=dest_path, | 255 bucket=dest_bucket, path=remote_path, |
252 id_type=id_type, id_value=id_value, permission=permission) | 256 id_type=id_type, id_value=id_value, permission=permission) |
253 | 257 |
254 def download_file(self, source_bucket, source_path, dest_path, | 258 def download_file(self, source_bucket, source_path, dest_path, |
255 create_subdirs_if_needed=False): | 259 create_subdirs_if_needed=False): |
256 """Downloads a single file from Google Cloud Storage to local disk. | 260 """Downloads a single file from Google Cloud Storage to local disk. |
257 | 261 |
258 Args: | 262 Args: |
259 source_bucket: GCS bucket to download the file from | 263 source_bucket: GCS bucket to download the file from |
260 source_path: full path (Posix-style) within that bucket | 264 source_path: full path (Posix-style) within that bucket |
261 dest_path: full path (local-OS-style) on local disk to copy the file to | 265 dest_path: full path (local-OS-style) on local disk to copy the file to |
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
489 exist yet. | 493 exist yet. |
490 | 494 |
491 Args: | 495 Args: |
492 path: full path of directory to create | 496 path: full path of directory to create |
493 """ | 497 """ |
494 try: | 498 try: |
495 os.makedirs(path) | 499 os.makedirs(path) |
496 except OSError as e: | 500 except OSError as e: |
497 if e.errno != errno.EEXIST: | 501 if e.errno != errno.EEXIST: |
498 raise | 502 raise |
OLD | NEW |