OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
11 for the XML API). | 11 for the XML API). |
12 | 12 |
13 API/library references: | 13 API/library references: |
14 - https://developers.google.com/storage/docs/reference-guide | 14 - https://developers.google.com/storage/docs/reference-guide |
15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u
sing-boto.html | 15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u
sing-boto.html |
16 """ | 16 """ |
17 # pylint: enable=C0301 | 17 # pylint: enable=C0301 |
18 | 18 |
19 # System-level imports | 19 # System-level imports |
20 import errno | 20 import errno |
| 21 import hashlib |
21 import os | 22 import os |
22 import posixpath | 23 import posixpath |
23 import re | 24 import re |
24 import sys | 25 import sys |
25 | 26 |
26 # Imports from third-party code | 27 # Imports from third-party code |
27 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 28 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
28 os.path.dirname(__file__), os.pardir, os.pardir)) | 29 os.path.dirname(__file__), os.pardir, os.pardir)) |
29 for import_subdir in ['boto']: | 30 for import_subdir in ['boto']: |
30 import_dirpath = os.path.join( | 31 import_dirpath = os.path.join( |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
141 b = self._connect_to_bucket(bucket_name=bucket) | 142 b = self._connect_to_bucket(bucket_name=bucket) |
142 item = Key(b) | 143 item = Key(b) |
143 item.key = path | 144 item.key = path |
144 try: | 145 try: |
145 item.delete() | 146 item.delete() |
146 except BotoServerError, e: | 147 except BotoServerError, e: |
147 e.body = (repr(e.body) + | 148 e.body = (repr(e.body) + |
148 ' while deleting bucket=%s, path=%s' % (bucket, path)) | 149 ' while deleting bucket=%s, path=%s' % (bucket, path)) |
149 raise | 150 raise |
150 | 151 |
| 152 def get_last_modified_time(self, bucket, path): |
| 153 """Gets the timestamp of when this file was last modified. |
| 154 |
| 155 Params: |
| 156 bucket: GS bucket in which to look for the file |
| 157 path: full path (Posix-style) of the file within the bucket to check |
| 158 |
| 159 Returns the last modified time, as a freeform string. If the file was not |
| 160 found, returns None. |
| 161 """ |
| 162 b = self._connect_to_bucket(bucket_name=bucket) |
| 163 try: |
| 164 key = b.get_key(key_name=path) |
| 165 if not key: |
| 166 return None |
| 167 return key.last_modified |
| 168 except BotoServerError, e: |
| 169 e.body = (repr(e.body) + |
| 170 ' while getting attributes of bucket=%s, path=%s' % ( |
| 171 bucket, path)) |
| 172 raise |
| 173 |
151 def upload_file(self, source_path, dest_bucket, dest_path, | 174 def upload_file(self, source_path, dest_bucket, dest_path, |
152 predefined_acl=None, fine_grained_acl_list=None): | 175 only_if_modified=False, predefined_acl=None, |
| 176 fine_grained_acl_list=None): |
153 """Upload contents of a local file to Google Storage. | 177 """Upload contents of a local file to Google Storage. |
154 | 178 |
155 TODO(epoger): Add the only_if_modified param provided by upload_file() in | |
156 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts
/utils/old_gs_utils.py , | |
157 so we can replace that function with this one. | |
158 | |
159 params: | 179 params: |
160 source_path: full path (local-OS-style) on local disk to read from | 180 source_path: full path (local-OS-style) on local disk to read from |
161 dest_bucket: GCS bucket to copy the file to | 181 dest_bucket: GCS bucket to copy the file to |
162 dest_path: full path (Posix-style) within that bucket | 182 dest_path: full path (Posix-style) within that bucket |
| 183 only_if_modified: if True, only upload the file if it would actually |
| 184 change the content on Google Storage (uploads the file if dest_path |
| 185 does not exist, or if it exists but has different contents than |
| 186 source_path). Note that this may take longer than just uploading the |
| 187 file without checking first, due to extra round-trips! |
163 predefined_acl: which predefined ACL to apply to the file on Google | 188 predefined_acl: which predefined ACL to apply to the file on Google |
164 Storage; must be one of the PredefinedACL values defined above. | 189 Storage; must be one of the PredefinedACL values defined above. |
165 If None, inherits dest_bucket's default object ACL. | 190 If None, inherits dest_bucket's default object ACL. |
166 TODO(epoger): add unittests for this param, although it seems to work | 191 TODO(epoger): add unittests for this param, although it seems to work |
167 in my manual testing | 192 in my manual testing |
168 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
169 to apply to the uploaded file (on top of the predefined_acl), | 194 to apply to the uploaded file (on top of the predefined_acl), |
170 or None if predefined_acl is sufficient | 195 or None if predefined_acl is sufficient |
171 """ | 196 """ |
172 b = self._connect_to_bucket(bucket_name=dest_bucket) | 197 b = self._connect_to_bucket(bucket_name=dest_bucket) |
| 198 |
| 199 if only_if_modified: |
| 200 old_key = b.get_key(key_name=dest_path) |
| 201 if old_key: |
| 202 local_md5 = '"%s"' % _get_local_md5(path=source_path) |
| 203 if local_md5 == old_key.etag: |
| 204 print 'Skipping upload of unmodified file %s : %s' % ( |
| 205 source_path, local_md5) |
| 206 return |
| 207 |
173 item = Key(b) | 208 item = Key(b) |
174 item.key = dest_path | 209 item.key = dest_path |
175 try: | 210 try: |
176 item.set_contents_from_filename(filename=source_path, | 211 item.set_contents_from_filename(filename=source_path, |
177 policy=predefined_acl) | 212 policy=predefined_acl) |
178 except BotoServerError, e: | 213 except BotoServerError, e: |
179 e.body = (repr(e.body) + | 214 e.body = (repr(e.body) + |
180 ' while uploading source_path=%s to bucket=%s, path=%s' % ( | 215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( |
181 source_path, dest_bucket, item.key)) | 216 source_path, dest_bucket, item.key)) |
182 raise | 217 raise |
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
493 exist yet. | 528 exist yet. |
494 | 529 |
495 Args: | 530 Args: |
496 path: full path of directory to create | 531 path: full path of directory to create |
497 """ | 532 """ |
498 try: | 533 try: |
499 os.makedirs(path) | 534 os.makedirs(path) |
500 except OSError as e: | 535 except OSError as e: |
501 if e.errno != errno.EEXIST: | 536 if e.errno != errno.EEXIST: |
502 raise | 537 raise |
| 538 |
| 539 |
| 540 def _get_local_md5(path): |
| 541 """Returns the MD5 hash of a file on local disk.""" |
| 542 hasher = hashlib.md5() |
| 543 with open(path, 'rb') as f: |
| 544 while True: |
| 545 data = f.read(64*1024) |
| 546 if not data: |
| 547 return hasher.hexdigest() |
| 548 hasher.update(data) |
OLD | NEW |