py/utils/gs_utils.py - Issue 418503005: GSUtils: allow uploads to happen ALWAYS, IF_NEW, or IF_MODIFIED

Side by Side Diff: py/utils/gs_utils.py

Issue 418503005: GSUtils: allow uploads to happen ALWAYS, IF_NEW, or IF_MODIFIED (Closed) Base URL: https://skia.googlesource.com/common.git@master

Patch Set: refactor upload_dir_contents() to call upload_file() Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/python

2	2

3 # pylint: disable=C0301	3 # pylint: disable=C0301

4 """	4 """

5 Copyright 2014 Google Inc.	5 Copyright 2014 Google Inc.

6	6

7 Use of this source code is governed by a BSD-style license that can be	7 Use of this source code is governed by a BSD-style license that can be

8 found in the LICENSE file.	8 found in the LICENSE file.

9	9

10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper	10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
94 PUBLIC_READ_WRITE = 'public-read-write'	94 PUBLIC_READ_WRITE = 'public-read-write'

95	95

96 class IdType:	96 class IdType:

97 """Types of identifiers we can use to set "fine-grained" ACLs."""	97 """Types of identifiers we can use to set "fine-grained" ACLs."""

98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN	98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN

99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL	99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL

100 GROUP_BY_ID = acl.GROUP_BY_ID	100 GROUP_BY_ID = acl.GROUP_BY_ID

101 USER_BY_EMAIL = acl.USER_BY_EMAIL	101 USER_BY_EMAIL = acl.USER_BY_EMAIL

102 USER_BY_ID = acl.USER_BY_ID	102 USER_BY_ID = acl.USER_BY_ID

103	103

	104 class UploadIf:

	105 """Cases in which we will upload a file.

	106

	107 Beware of performance tradeoffs. E.g., if the file is small, the extra

	108 round trip to check for file existence and/or checksum may take longer than

	109 just uploading the file."""

	110 ALWAYS = 1 # always upload the file

	111 IF_NEW = 2 # if there is an existing file with the same name,

	112 # leave it alone

	113 IF_MODIFIED = 3 # if there is an existing file with the same name and

	114 # contents, leave it alone

104	115

105 def __init__(self, boto_file_path=None):	116 def __init__(self, boto_file_path=None):

106 """Constructor.	117 """Constructor.

107	118

108 Params:	119 Params:

109 boto_file_path: full path (local-OS-style) on local disk where .boto	120 boto_file_path: full path (local-OS-style) on local disk where .boto

110 credentials file can be found. If None, then the GSUtils object	121 credentials file can be found. If None, then the GSUtils object

111 created will be able to access only public files in Google Storage.	122 created will be able to access only public files in Google Storage.

112	123

113 Raises an exception if no file is found at boto_file_path, or if the file	124 Raises an exception if no file is found at boto_file_path, or if the file

(...skipping 18 matching lines...) Expand all Loading...
132 def delete_file(self, bucket, path):	143 def delete_file(self, bucket, path):

133 """Delete a single file within a GS bucket.	144 """Delete a single file within a GS bucket.

134	145

135 TODO(epoger): what if bucket or path does not exist? Should probably raise	146 TODO(epoger): what if bucket or path does not exist? Should probably raise

136 an exception. Implement, and add a test to exercise this.	147 an exception. Implement, and add a test to exercise this.

137	148

138 Params:	149 Params:

139 bucket: GS bucket to delete a file from	150 bucket: GS bucket to delete a file from

140 path: full path (Posix-style) of the file within the bucket to delete	151 path: full path (Posix-style) of the file within the bucket to delete

141 """	152 """

142 b = self._connect_to_bucket(bucket_name=bucket)	153 b = self._connect_to_bucket(bucket=bucket)

143 key = Key(b)	154 key = Key(b)

144 key.name = path	155 key.name = path

145 try:	156 try:

146 key.delete()	157 key.delete()

147 except BotoServerError, e:	158 except BotoServerError, e:

148 e.body = (repr(e.body) +	159 e.body = (repr(e.body) +

149 ' while deleting bucket=%s, path=%s' % (bucket, path))	160 ' while deleting bucket=%s, path=%s' % (bucket, path))

150 raise	161 raise

151	162

152 def get_last_modified_time(self, bucket, path):	163 def get_last_modified_time(self, bucket, path):

153 """Gets the timestamp of when this file was last modified.	164 """Gets the timestamp of when this file was last modified.

154	165

155 Params:	166 Params:

156 bucket: GS bucket in which to look for the file	167 bucket: GS bucket in which to look for the file

157 path: full path (Posix-style) of the file within the bucket to check	168 path: full path (Posix-style) of the file within the bucket to check

158	169

159 Returns the last modified time, as a freeform string. If the file was not	170 Returns the last modified time, as a freeform string. If the file was not

160 found, returns None.	171 found, returns None.

161 """	172 """

162 b = self._connect_to_bucket(bucket_name=bucket)	173 b = self._connect_to_bucket(bucket=bucket)

163 try:	174 try:

164 key = b.get_key(key_name=path)	175 key = b.get_key(key_name=path)

165 if not key:	176 if not key:

166 return None	177 return None

167 return key.last_modified	178 return key.last_modified

168 except BotoServerError, e:	179 except BotoServerError, e:

169 e.body = (repr(e.body) +	180 e.body = (repr(e.body) +

170 ' while getting attributes of bucket=%s, path=%s' % (	181 ' while getting attributes of bucket=%s, path=%s' % (

171 bucket, path))	182 bucket, path))

172 raise	183 raise

173	184

174 def upload_file(self, source_path, dest_bucket, dest_path,	185 def upload_file(self, source_path, dest_bucket, dest_path,

175 only_if_modified=False, predefined_acl=None,	186 upload_if=UploadIf.ALWAYS,

	187 predefined_acl=None,

176 fine_grained_acl_list=None):	188 fine_grained_acl_list=None):

177 """Upload contents of a local file to Google Storage.	189 """Upload contents of a local file to Google Storage.

178	190

179 params:	191 params:

180 source_path: full path (local-OS-style) on local disk to read from	192 source_path: full path (local-OS-style) on local disk to read from

181 dest_bucket: GCS bucket to copy the file to	193 dest_bucket: GS bucket to copy the file to

182 dest_path: full path (Posix-style) within that bucket	194 dest_path: full path (Posix-style) within that bucket

183 only_if_modified: if True, only upload the file if it would actually	195 upload_if: one of the UploadIf values, describing in which cases we should

184 change the content on Google Storage (uploads the file if dest_path	196 upload the file

185 does not exist, or if it exists but has different contents than

186 source_path). Note that this may take longer than just uploading the

187 file without checking first, due to extra round-trips!

188 predefined_acl: which predefined ACL to apply to the file on Google	197 predefined_acl: which predefined ACL to apply to the file on Google

189 Storage; must be one of the PredefinedACL values defined above.	198 Storage; must be one of the PredefinedACL values defined above.

190 If None, inherits dest_bucket's default object ACL.	199 If None, inherits dest_bucket's default object ACL.

191 TODO(epoger): add unittests for this param, although it seems to work

192 in my manual testing

193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples	200 fine_grained_acl_list: list of (id_type, id_value, permission) tuples

194 to apply to the uploaded file (on top of the predefined_acl),	201 to apply to the uploaded file (on top of the predefined_acl),

195 or None if predefined_acl is sufficient	202 or None if predefined_acl is sufficient

	203

	204 TODO(epoger): Consider adding a do_compress parameter that would compress

	205 the file using gzip before upload, and add a "Content-Encoding:gzip" header

	206 so that HTTP downloads of the file would be unzipped automatically.

	207 See https://developers.google.com/storage/docs/gsutil/addlhelp/

	208 WorkingWithObjectMetadata#content-encoding

196 """	209 """

197 b = self._connect_to_bucket(bucket_name=dest_bucket)	210 b = self._connect_to_bucket(bucket=dest_bucket)

198	211

199 if only_if_modified:	212 if upload_if == self.UploadIf.IF_NEW:

	213 old_key = b.get_key(key_name=dest_path)

	214 if old_key:

	215 print 'Skipping upload of existing file gs://%s/%s' % (

	216 dest_bucket, dest_path)

	217 return

	218 elif upload_if == self.UploadIf.IF_MODIFIED:

200 old_key = b.get_key(key_name=dest_path)	219 old_key = b.get_key(key_name=dest_path)

201 if old_key:	220 if old_key:

202 local_md5 = '"%s"' % _get_local_md5(path=source_path)	221 local_md5 = '"%s"' % _get_local_md5(path=source_path)

203 if local_md5 == old_key.etag:	222 if local_md5 == old_key.etag:

204 print 'Skipping upload of unmodified file %s : %s' % (	223 print 'Skipping upload of unmodified file gs://%s/%s : %s' % (

205 source_path, local_md5)	224 dest_bucket, dest_path, local_md5)

206 return	225 return

207	226

208 key = Key(b)	227 key = Key(b)

209 key.name = dest_path	228 key.name = dest_path

210 try:	229 try:

211 key.set_contents_from_filename(filename=source_path,	230 key.set_contents_from_filename(filename=source_path,

212 policy=predefined_acl)	231 policy=predefined_acl)

213 except BotoServerError, e:	232 except BotoServerError, e:

214 e.body = (repr(e.body) +	233 e.body = (repr(e.body) +

215 ' while uploading source_path=%s to bucket=%s, path=%s' % (	234 ' while uploading source_path=%s to bucket=%s, path=%s' % (

216 source_path, dest_bucket, key.name))	235 source_path, dest_bucket, key.name))

217 raise	236 raise

218 # TODO(epoger): This may be inefficient, because it calls

219 # _connect_to_bucket() again. Depending on how expensive that

220 # call is, we may want to optimize this.

221 for (id_type, id_value, permission) in fine_grained_acl_list or []:	237 for (id_type, id_value, permission) in fine_grained_acl_list or []:

222 self.set_acl(	238 self.set_acl(

223 bucket=dest_bucket, path=key.name,	239 bucket=b, path=key.name,

224 id_type=id_type, id_value=id_value, permission=permission)	240 id_type=id_type, id_value=id_value, permission=permission)

225	241

226 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,	242 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, **kwargs):

227 predefined_acl=None, fine_grained_acl_list=None):

228 """Recursively upload contents of a local directory to Google Storage.	243 """Recursively upload contents of a local directory to Google Storage.

229	244

230 params:	245 params:

231 source_dir: full path (local-OS-style) on local disk of directory to copy	246 source_dir: full path (local-OS-style) on local disk of directory to copy

232 contents of	247 contents of

233 dest_bucket: GCS bucket to copy the files into	248 dest_bucket: GS bucket to copy the files into

234 dest_dir: full path (Posix-style) within that bucket; write the files into	249 dest_dir: full path (Posix-style) within that bucket; write the files into

235 this directory. If None, write into the root directory of the bucket.	250 this directory. If None, write into the root directory of the bucket.

236 predefined_acl: which predefined ACL to apply to the files on Google	251 kwargs: any additional keyword arguments "inherited" from upload_file()
	borenet 2014/07/24 14:25:18 I'd say you don't need to document kwargs. I'd say you don't need to document kwargs. epoger 2014/07/24 14:49:49 From live discussion: this is the best way we coul Show quoted text On 2014/07/24 14:25:18, borenet wrote: > I'd say you don't need to document kwargs. From live discussion: this is the best way we could think of to document the behavior.
237 Storage; must be one of the PredefinedACL values defined above.

238 If None, inherits dest_bucket's default object ACL.

239 TODO(epoger): add unittests for this param, although it seems to work

240 in my manual testing

241 fine_grained_acl_list: list of (id_type, id_value, permission) tuples

242 to apply to every file uploaded (on top of the predefined_acl),

243 or None if predefined_acl is sufficient

244	252

245 The copy operates as a "merge with overwrite": any files in source_dir will	253 The copy operates as a merge: any files in source_dir will be "overlaid" on

246 be "overlaid" on top of the existing content in dest_dir. Existing files	254 top of the existing content in dest_dir. Existing files with the same names

247 with the same names will be overwritten.	255 may or may not be overwritten, depending on the value of the upload_if kwarg

	256 inherited from upload_file().

248	257

249 TODO(epoger): Upload multiple files simultaneously to reduce latency.	258 TODO(epoger): Upload multiple files simultaneously to reduce latency.

250

251 TODO(epoger): Add a "noclobber" mode that will not upload any files would

252 overwrite existing files in Google Storage.

253

254 TODO(epoger): Consider adding a do_compress parameter that would compress

255 the file using gzip before upload, and add a "Content-Encoding:gzip" header

256 so that HTTP downloads of the file would be unzipped automatically.

257 See https://developers.google.com/storage/docs/gsutil/addlhelp/

258 WorkingWithObjectMetadata#content-encoding

259 """	259 """

260 b = self._connect_to_bucket(bucket_name=dest_bucket)	260 b = self._connect_to_bucket(bucket=dest_bucket)

261 for filename in sorted(os.listdir(source_dir)):	261 for filename in sorted(os.listdir(source_dir)):

262 local_path = os.path.join(source_dir, filename)	262 local_path = os.path.join(source_dir, filename)

263 if dest_dir:	263 if dest_dir:

264 remote_path = posixpath.join(dest_dir, filename)	264 remote_path = posixpath.join(dest_dir, filename)

265 else:	265 else:

266 remote_path = filename	266 remote_path = filename

267	267

268 if os.path.isdir(local_path):	268 if os.path.isdir(local_path):

269 self.upload_dir_contents( # recurse	269 self.upload_dir_contents( # recurse

270 source_dir=local_path, dest_bucket=dest_bucket,	270 source_dir=local_path, dest_bucket=b, dest_dir=remote_path,

271 dest_dir=remote_path,	271 **kwargs)

272 predefined_acl=predefined_acl,

273 fine_grained_acl_list=fine_grained_acl_list)

274 else:	272 else:

275 key = Key(b)	273 self.upload_file(

276 key.name = remote_path	274 source_path=local_path, dest_bucket=b, dest_path=remote_path,

277 try:	275 **kwargs)

278 key.set_contents_from_filename(

279 filename=local_path, policy=predefined_acl)

280 except BotoServerError, e:

281 e.body = (repr(e.body) +

282 ' while uploading local_path=%s to bucket=%s, path=%s' % (

283 local_path, dest_bucket, remote_path))

284 raise

285 # TODO(epoger): This may be inefficient, because it calls

286 # _connect_to_bucket() for every file. Depending on how expensive that

287 # call is, we may want to optimize this.

288 for (id_type, id_value, permission) in fine_grained_acl_list or []:

289 self.set_acl(

290 bucket=dest_bucket, path=remote_path,

291 id_type=id_type, id_value=id_value, permission=permission)

292	276

293 def download_file(self, source_bucket, source_path, dest_path,	277 def download_file(self, source_bucket, source_path, dest_path,

294 create_subdirs_if_needed=False):	278 create_subdirs_if_needed=False):

295 """Downloads a single file from Google Cloud Storage to local disk.	279 """Downloads a single file from Google Cloud Storage to local disk.

296	280

297 Args:	281 Args:

298 source_bucket: GCS bucket to download the file from	282 source_bucket: GS bucket to download the file from

299 source_path: full path (Posix-style) within that bucket	283 source_path: full path (Posix-style) within that bucket

300 dest_path: full path (local-OS-style) on local disk to copy the file to	284 dest_path: full path (local-OS-style) on local disk to copy the file to

301 create_subdirs_if_needed: boolean; whether to create subdirectories as	285 create_subdirs_if_needed: boolean; whether to create subdirectories as

302 needed to create dest_path	286 needed to create dest_path

303 """	287 """

304 b = self._connect_to_bucket(bucket_name=source_bucket)	288 b = self._connect_to_bucket(bucket=source_bucket)

305 key = Key(b)	289 key = Key(b)

306 key.name = source_path	290 key.name = source_path

307 if create_subdirs_if_needed:	291 if create_subdirs_if_needed:

308 _makedirs_if_needed(os.path.dirname(dest_path))	292 _makedirs_if_needed(os.path.dirname(dest_path))

309 with open(dest_path, 'w') as f:	293 with open(dest_path, 'w') as f:

310 try:	294 try:

311 key.get_contents_to_file(fp=f)	295 key.get_contents_to_file(fp=f)

312 except BotoServerError, e:	296 except BotoServerError, e:

313 e.body = (repr(e.body) +	297 e.body = (repr(e.body) +

314 ' while downloading bucket=%s, path=%s to local_path=%s' % (	298 ' while downloading bucket=%s, path=%s to local_path=%s' % (

315 source_bucket, source_path, dest_path))	299 source_bucket, source_path, dest_path))

316 raise	300 raise

317	301

318 def download_dir_contents(self, source_bucket, source_dir, dest_dir):	302 def download_dir_contents(self, source_bucket, source_dir, dest_dir):

319 """Recursively download contents of a Google Storage directory to local disk	303 """Recursively download contents of a Google Storage directory to local disk

320	304

321 params:	305 params:

322 source_bucket: GCS bucket to copy the files from	306 source_bucket: GS bucket to copy the files from

323 source_dir: full path (Posix-style) within that bucket; read the files	307 source_dir: full path (Posix-style) within that bucket; read the files

324 from this directory	308 from this directory

325 dest_dir: full path (local-OS-style) on local disk of directory to copy	309 dest_dir: full path (local-OS-style) on local disk of directory to copy

326 the files into	310 the files into

327	311

328 The copy operates as a "merge with overwrite": any files in source_dir will	312 The copy operates as a "merge with overwrite": any files in source_dir will

329 be "overlaid" on top of the existing content in dest_dir. Existing files	313 be "overlaid" on top of the existing content in dest_dir. Existing files

330 with the same names will be overwritten.	314 with the same names will be overwritten.

331	315

332 TODO(epoger): Download multiple files simultaneously to reduce latency.	316 TODO(epoger): Download multiple files simultaneously to reduce latency.

333 """	317 """

334 _makedirs_if_needed(dest_dir)	318 _makedirs_if_needed(dest_dir)

335 b = self._connect_to_bucket(bucket_name=source_bucket)	319 b = self._connect_to_bucket(bucket=source_bucket)

336 (dirs, files) = self.list_bucket_contents(	320 (dirs, files) = self.list_bucket_contents(

337 bucket=source_bucket, subdir=source_dir)	321 bucket=source_bucket, subdir=source_dir)

338	322

339 for filename in files:	323 for filename in files:

340 key = Key(b)	324 key = Key(b)

341 key.name = posixpath.join(source_dir, filename)	325 key.name = posixpath.join(source_dir, filename)

342 dest_path = os.path.join(dest_dir, filename)	326 dest_path = os.path.join(dest_dir, filename)

343 with open(dest_path, 'w') as f:	327 with open(dest_path, 'w') as f:

344 try:	328 try:

345 key.get_contents_to_file(fp=f)	329 key.get_contents_to_file(fp=f)

(...skipping 25 matching lines...) Expand all Loading...
371 path: full path (Posix-style) to the file within that bucket	355 path: full path (Posix-style) to the file within that bucket

372 id_type: must be one of the IdType values defined above	356 id_type: must be one of the IdType values defined above

373 id_value: get permissions for users whose id_type field contains this	357 id_value: get permissions for users whose id_type field contains this

374 value	358 value

375	359

376 Returns: the Permission value which has been set for users matching	360 Returns: the Permission value which has been set for users matching

377 this id_type/id_value, on this file; or Permission.EMPTY if no such	361 this id_type/id_value, on this file; or Permission.EMPTY if no such

378 permissions have been set.	362 permissions have been set.

379 """	363 """

380 field = self._field_by_id_type[id_type]	364 field = self._field_by_id_type[id_type]

381 b = self._connect_to_bucket(bucket_name=bucket)	365 b = self._connect_to_bucket(bucket=bucket)

382 acls = b.get_acl(key_name=path)	366 acls = b.get_acl(key_name=path)

383 matching_entries = [entry for entry in acls.entries.entry_list	367 matching_entries = [entry for entry in acls.entries.entry_list

384 if (entry.scope.type == id_type) and	368 if (entry.scope.type == id_type) and

385 (getattr(entry.scope, field) == id_value)]	369 (getattr(entry.scope, field) == id_value)]

386 if matching_entries:	370 if matching_entries:

387 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)	371 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)

388 return matching_entries[0].permission	372 return matching_entries[0].permission

389 else:	373 else:

390 return self.Permission.EMPTY	374 return self.Permission.EMPTY

391	375

(...skipping 28 matching lines...) Expand all Loading...
420 bucket = 'gs://bucket-name'	404 bucket = 'gs://bucket-name'

421 path = 'path/to/file'	405 path = 'path/to/file'

422 id_type = IdType.USER_BY_EMAIL	406 id_type = IdType.USER_BY_EMAIL

423 id_value = 'epoger@google.com'	407 id_value = 'epoger@google.com'

424 set_acl(bucket, path, id_type, id_value, Permission.READ)	408 set_acl(bucket, path, id_type, id_value, Permission.READ)

425 assert Permission.READ == get_acl(bucket, path, id_type, id_value)	409 assert Permission.READ == get_acl(bucket, path, id_type, id_value)

426 set_acl(bucket, path, id_type, id_value, Permission.WRITE)	410 set_acl(bucket, path, id_type, id_value, Permission.WRITE)

427 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value)	411 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value)

428 """	412 """

429 field = self._field_by_id_type[id_type]	413 field = self._field_by_id_type[id_type]

430 b = self._connect_to_bucket(bucket_name=bucket)	414 b = self._connect_to_bucket(bucket=bucket)

431 acls = b.get_acl(key_name=path)	415 acls = b.get_acl(key_name=path)

432	416

433 # Remove any existing entries that refer to the same id_type/id_value,	417 # Remove any existing entries that refer to the same id_type/id_value,

434 # because the API will fail if we try to set more than one.	418 # because the API will fail if we try to set more than one.

435 matching_entries = [entry for entry in acls.entries.entry_list	419 matching_entries = [entry for entry in acls.entries.entry_list

436 if (entry.scope.type == id_type) and	420 if (entry.scope.type == id_type) and

437 (getattr(entry.scope, field) == id_value)]	421 (getattr(entry.scope, field) == id_value)]

438 if matching_entries:	422 if matching_entries:

439 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)	423 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)

440 acls.entries.entry_list.remove(matching_entries[0])	424 acls.entries.entry_list.remove(matching_entries[0])

(...skipping 17 matching lines...) Expand all Loading...
458 Args:	442 Args:

459 bucket: name of the Google Storage bucket	443 bucket: name of the Google Storage bucket

460 subdir: directory within the bucket to list, or None for root directory	444 subdir: directory within the bucket to list, or None for root directory

461 """	445 """

462 # The GS command relies on the prefix (if any) ending with a slash.	446 # The GS command relies on the prefix (if any) ending with a slash.

463 prefix = subdir or ''	447 prefix = subdir or ''

464 if prefix and not prefix.endswith('/'):	448 if prefix and not prefix.endswith('/'):

465 prefix += '/'	449 prefix += '/'

466 prefix_length = len(prefix) if prefix else 0	450 prefix_length = len(prefix) if prefix else 0

467	451

468 b = self._connect_to_bucket(bucket_name=bucket)	452 b = self._connect_to_bucket(bucket=bucket)

469 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')	453 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')

470 dirs = []	454 dirs = []

471 files = []	455 files = []

472 for item in items:	456 for item in items:

473 t = type(item)	457 t = type(item)

474 if t is Key:	458 if t is Key:

475 files.append(item.name[prefix_length:])	459 files.append(item.name[prefix_length:])

476 elif t is Prefix:	460 elif t is Prefix:

477 dirs.append(item.name[prefix_length:-1])	461 dirs.append(item.name[prefix_length:-1])

478 return (dirs, files)	462 return (dirs, files)

479	463

480 def _connect_to_bucket(self, bucket_name):	464 def _connect_to_bucket(self, bucket):

481 """Returns a Bucket object we can use to access a particular bucket in GS.	465 """Returns a Bucket object we can use to access a particular bucket in GS.

482	466

483 Params:	467 Params:

484 bucket_name: name of the bucket (e.g., 'chromium-skia-gm')	468 bucket: name of the bucket (e.g., 'chromium-skia-gm'), or a Bucket

	469 object (in which case this param is just returned as-is)

485 """	470 """

	471 if type(bucket) is Bucket:

	472 return bucket

486 try:	473 try:

487 return self._create_connection().get_bucket(bucket_name=bucket_name)	474 return self._create_connection().get_bucket(bucket_name=bucket)

488 except BotoServerError, e:	475 except BotoServerError, e:

489 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name	476 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket

490 raise	477 raise

491	478

492 def _create_connection(self):	479 def _create_connection(self):

493 """Returns a GSConnection object we can use to access Google Storage."""	480 """Returns a GSConnection object we can use to access Google Storage."""

494 if self._gs_access_key_id:	481 if self._gs_access_key_id:

495 return GSConnection(	482 return GSConnection(

496 gs_access_key_id=self._gs_access_key_id,	483 gs_access_key_id=self._gs_access_key_id,

497 gs_secret_access_key=self._gs_secret_access_key)	484 gs_secret_access_key=self._gs_secret_access_key)

498 else:	485 else:

499 return AnonymousGSConnection()	486 return AnonymousGSConnection()

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
539	526

540 def _get_local_md5(path):	527 def _get_local_md5(path):

541 """Returns the MD5 hash of a file on local disk."""	528 """Returns the MD5 hash of a file on local disk."""

542 hasher = hashlib.md5()	529 hasher = hashlib.md5()

543 with open(path, 'rb') as f:	530 with open(path, 'rb') as f:

544 while True:	531 while True:

545 data = f.read(64*1024)	532 data = f.read(64*1024)

546 if not data:	533 if not data:

547 return hasher.hexdigest()	534 return hasher.hexdigest()

548 hasher.update(data)	535 hasher.update(data)

OLD	NEW

« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »