Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(818)

Side by Side Diff: gae/cloudstorage/cloudstorage_api.py

Issue 1150463002: [chrome-devtools-frontend] Migrate to cloudstorage client (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chrome-devtools-frontend
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gae/cloudstorage/api_utils.py ('k') | gae/cloudstorage/common.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2012 Google Inc. All Rights Reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing,
10 # software distributed under the License is distributed on an
11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 # either express or implied. See the License for the specific
13 # language governing permissions and limitations under the License.
14
15 """File Interface for Google Cloud Storage."""
16
17
18
19 from __future__ import with_statement
20
21
22
23 __all__ = ['delete',
24 'listbucket',
25 'open',
26 'stat',
27 'compose',
28 ]
29
30 import logging
31 import StringIO
32 import urllib
33 import os
34 import itertools
35 import types
36 import xml.etree.cElementTree as ET
37 from . import api_utils
38 from . import common
39 from . import errors
40 from . import storage_api
41
42
43
44 def open(filename,
45 mode='r',
46 content_type=None,
47 options=None,
48 offset=0,
49 read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
50 retry_params=None,
51 _account_id=None):
52 """Opens a Google Cloud Storage file and returns it as a File-like object.
53
54 Args:
55 filename: A Google Cloud Storage filename of form '/bucket/filename'.
56 mode: 'r' for reading mode. 'w' for writing mode.
57 In reading mode, the file must exist. In writing mode, a file will
58 be created or be overrode.
59 content_type: The MIME type of the file. str. Only valid in writing mode.
60 options: A str->basestring dict to specify additional headers to pass to
61 GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
62 Supported options are x-goog-acl, x-goog-meta-, cache-control,
63 content-disposition, and content-encoding.
64 Only valid in writing mode.
65 See https://developers.google.com/storage/docs/reference-headers
66 for details.
67 read_buffer_size: The buffer size for read. Read keeps a buffer
68 and prefetches another one. To minimize blocking for large files,
69 always read by buffer size. To minimize number of RPC requests for
70 small files, set a large buffer size. Max is 30MB.
71 retry_params: An instance of api_utils.RetryParams for subsequent calls
72 to GCS from this file handle. If None, the default one is used.
73 _account_id: Internal-use only.
74 offset: Number of bytes to skip at the start of the file. If None, 0 is
75 used.
76
77 Returns:
78 A reading or writing buffer that supports File-like interface. Buffer
79 must be closed after operations are done.
80
81 Raises:
82 errors.AuthorizationError: if authorization failed.
83 errors.NotFoundError: if an object that's expected to exist doesn't.
84 ValueError: invalid open mode or if content_type or options are specified
85 in reading mode.
86 """
87 common.validate_file_path(filename)
88 api = storage_api._get_storage_api(retry_params=retry_params,
89 account_id=_account_id)
90 filename = api_utils._quote_filename(filename)
91
92 if mode == 'w':
93 common.validate_options(options)
94 return storage_api.StreamingBuffer(api, filename, content_type, options)
95 elif mode == 'r':
96 if content_type or options:
97 raise ValueError('Options and content_type can only be specified '
98 'for writing mode.')
99 return storage_api.ReadBuffer(api,
100 filename,
101 offset=offset,
102 buffer_size=read_buffer_size)
103 else:
104 raise ValueError('Invalid mode %s.' % mode)
105
106
107 def delete(filename, retry_params=None, _account_id=None):
108 """Delete a Google Cloud Storage file.
109
110 Args:
111 filename: A Google Cloud Storage filename of form '/bucket/filename'.
112 retry_params: An api_utils.RetryParams for this call to GCS. If None,
113 the default one is used.
114 _account_id: Internal-use only.
115
116 Raises:
117 errors.NotFoundError: if the file doesn't exist prior to deletion.
118 """
119 api = storage_api._get_storage_api(retry_params=retry_params,
120 account_id=_account_id)
121 common.validate_file_path(filename)
122 filename = api_utils._quote_filename(filename)
123 status, resp_headers, content = api.delete_object(filename)
124 errors.check_status(status, [204], filename, resp_headers=resp_headers,
125 body=content)
126
127
128 def stat(filename, retry_params=None, _account_id=None):
129 """Get GCSFileStat of a Google Cloud storage file.
130
131 Args:
132 filename: A Google Cloud Storage filename of form '/bucket/filename'.
133 retry_params: An api_utils.RetryParams for this call to GCS. If None,
134 the default one is used.
135 _account_id: Internal-use only.
136
137 Returns:
138 a GCSFileStat object containing info about this file.
139
140 Raises:
141 errors.AuthorizationError: if authorization failed.
142 errors.NotFoundError: if an object that's expected to exist doesn't.
143 """
144 common.validate_file_path(filename)
145 api = storage_api._get_storage_api(retry_params=retry_params,
146 account_id=_account_id)
147 status, headers, content = api.head_object(
148 api_utils._quote_filename(filename))
149 errors.check_status(status, [200], filename, resp_headers=headers,
150 body=content)
151 file_stat = common.GCSFileStat(
152 filename=filename,
153 st_size=common.get_stored_content_length(headers),
154 st_ctime=common.http_time_to_posix(headers.get('last-modified')),
155 etag=headers.get('etag'),
156 content_type=headers.get('content-type'),
157 metadata=common.get_metadata(headers))
158
159 return file_stat
160
161
162 def _copy2(src, dst, metadata=None, retry_params=None):
163 """Copy the file content from src to dst.
164
165 Internal use only!
166
167 Args:
168 src: /bucket/filename
169 dst: /bucket/filename
170 metadata: a dict of metadata for this copy. If None, old metadata is copied.
171 For example, {'x-goog-meta-foo': 'bar'}.
172 retry_params: An api_utils.RetryParams for this call to GCS. If None,
173 the default one is used.
174
175 Raises:
176 errors.AuthorizationError: if authorization failed.
177 errors.NotFoundError: if an object that's expected to exist doesn't.
178 """
179 common.validate_file_path(src)
180 common.validate_file_path(dst)
181
182 if metadata is None:
183 metadata = {}
184 copy_meta = 'COPY'
185 else:
186 copy_meta = 'REPLACE'
187 metadata.update({'x-goog-copy-source': src,
188 'x-goog-metadata-directive': copy_meta})
189
190 api = storage_api._get_storage_api(retry_params=retry_params)
191 status, resp_headers, content = api.put_object(
192 api_utils._quote_filename(dst), headers=metadata)
193 errors.check_status(status, [200], src, metadata, resp_headers, body=content)
194
195
196 def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,
197 delimiter=None, retry_params=None, _account_id=None):
198 """Returns a GCSFileStat iterator over a bucket.
199
200 Optional arguments can limit the result to a subset of files under bucket.
201
202 This function has two modes:
203 1. List bucket mode: Lists all files in the bucket without any concept of
204 hierarchy. GCS doesn't have real directory hierarchies.
205 2. Directory emulation mode: If you specify the 'delimiter' argument,
206 it is used as a path separator to emulate a hierarchy of directories.
207 In this mode, the "path_prefix" argument should end in the delimiter
208 specified (thus designates a logical directory). The logical directory's
209 contents, both files and subdirectories, are listed. The names of
210 subdirectories returned will end with the delimiter. So listbucket
211 can be called with the subdirectory name to list the subdirectory's
212 contents.
213
214 Args:
215 path_prefix: A Google Cloud Storage path of format "/bucket" or
216 "/bucket/prefix". Only objects whose fullpath starts with the
217 path_prefix will be returned.
218 marker: Another path prefix. Only objects whose fullpath starts
219 lexicographically after marker will be returned (exclusive).
220 prefix: Deprecated. Use path_prefix.
221 max_keys: The limit on the number of objects to return. int.
222 For best performance, specify max_keys only if you know how many objects
223 you want. Otherwise, this method requests large batches and handles
224 pagination for you.
225 delimiter: Use to turn on directory mode. str of one or multiple chars
226 that your bucket uses as its directory separator.
227 retry_params: An api_utils.RetryParams for this call to GCS. If None,
228 the default one is used.
229 _account_id: Internal-use only.
230
231 Examples:
232 For files "/bucket/a",
233 "/bucket/bar/1"
234 "/bucket/foo",
235 "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",
236
237 Regular mode:
238 listbucket("/bucket/f", marker="/bucket/foo/1")
239 will match "/bucket/foo/2/1", "/bucket/foo/3/1".
240
241 Directory mode:
242 listbucket("/bucket/", delimiter="/")
243 will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".
244 listbucket("/bucket/foo/", delimiter="/")
245 will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"
246
247 Returns:
248 Regular mode:
249 A GCSFileStat iterator over matched files ordered by filename.
250 The iterator returns GCSFileStat objects. filename, etag, st_size,
251 st_ctime, and is_dir are set.
252
253 Directory emulation mode:
254 A GCSFileStat iterator over matched files and directories ordered by
255 name. The iterator returns GCSFileStat objects. For directories,
256 only the filename and is_dir fields are set.
257
258 The last name yielded can be used as next call's marker.
259 """
260 if prefix:
261 common.validate_bucket_path(path_prefix)
262 bucket = path_prefix
263 else:
264 bucket, prefix = common._process_path_prefix(path_prefix)
265
266 if marker and marker.startswith(bucket):
267 marker = marker[len(bucket) + 1:]
268
269 api = storage_api._get_storage_api(retry_params=retry_params,
270 account_id=_account_id)
271 options = {}
272 if marker:
273 options['marker'] = marker
274 if max_keys:
275 options['max-keys'] = max_keys
276 if prefix:
277 options['prefix'] = prefix
278 if delimiter:
279 options['delimiter'] = delimiter
280
281 return _Bucket(api, bucket, options)
282
283 # pylint: disable=too-many-locals, too-many-branches, too-many-statements
284 def compose(list_of_files, destination_file, files_metadata=None,
285 content_type=None, retry_params=None, _account_id=None):
286 """Runs the GCS Compose on the given files.
287
288 Merges between 2 and 32 files into one file. Composite files may even
289 be built from other existing composites, provided that the total
290 component count does not exceed 1024. See here for details:
291 https://cloud.google.com/storage/docs/composite-objects
292
293 Args:
294 list_of_files: List of file name strings with no leading slashes or bucket.
295 destination_file: Path to the output file. Must have the bucket in the path.
296 files_metadata: Optional, file metadata, order must match list_of_files,
297 see link for available options:
298 https://cloud.google.com/storage/docs/composite-objects#_Xml
299 content_type: Optional, used to specify content-header of the output file.
300 retry_params: Optional, an api_utils.RetryParams for this call to GCS.
301 If None,the default one is used.
302 _account_id: Internal-use only.
303
304 Raises:
305 ValueError: If the number of files is outside the range of 2-32.
306 """
307 api = storage_api._get_storage_api(retry_params=retry_params,
308 account_id=_account_id)
309
310 # Needed until cloudstorage_stub.py is updated to accept compose requests
311 # TODO(rbruyere@gmail.com): When patched remove the True flow from this if.
312
313 if os.getenv('SERVER_SOFTWARE').startswith('Dev'):
314 def _temp_func(file_list, destination_file, content_type):
315 """Dev server stub remove when the dev server accepts compose requests."""
316 bucket = '/' + destination_file.split('/')[1] + '/'
317 with open(destination_file, 'w', content_type=content_type) as gcs_merge:
318 for source_file in file_list:
319 with open(bucket + source_file['Name'], 'r') as gcs_source:
320 gcs_merge.write(gcs_source.read())
321
322 compose_object = _temp_func
323 else:
324 compose_object = api.compose_object
325 file_list, _ = _validate_compose_list(destination_file,
326 list_of_files,
327 files_metadata, 32)
328 compose_object(file_list, destination_file, content_type)
329
330
331 def _file_exists(destination):
332 """Checks if a file exists.
333 Tries to open the file.
334 If it succeeds returns True otherwise False.
335
336 Args:
337 destination: Full path to the file (ie. /bucket/object) with leading slash.
338
339 Returns:
340 True if the file is accessible otherwise False.
341 """
342 try:
343 with open(destination, "r"):
344 return True
345 except errors.NotFoundError:
346 return False
347
348
349 def _validate_compose_list(destination_file, file_list,
350 files_metadata=None, number_of_files=32):
351 """Validates the file_list and merges the file_list, files_metadata.
352
353 Args:
354 destination: Path to the file (ie. /destination_bucket/destination_file).
355 file_list: List of files to compose, see compose for details.
356 files_metadata: Meta details for each file in the file_list.
357 number_of_files: Maximum number of files allowed in the list.
358
359 Returns:
360 A tuple (list_of_files, bucket):
361 list_of_files: Ready to use dict version of the list.
362 bucket: bucket name extracted from the file paths.
363 """
364 common.validate_file_path(destination_file)
365 bucket = destination_file[0:(destination_file.index('/', 1) + 1)]
366 try:
367 if isinstance(file_list, types.StringTypes):
368 raise TypeError
369 list_len = len(file_list)
370 except TypeError:
371 raise TypeError('file_list must be a list')
372
373 if list_len > number_of_files:
374 raise ValueError(
375 'Compose attempted to create composite with too many'
376 '(%i) components; limit is (%i).' % (list_len, number_of_files))
377 if list_len <= 1:
378 raise ValueError('Compose operation requires at'
379 ' least two components; %i provided.' % list_len)
380
381 if files_metadata is None:
382 files_metadata = []
383 elif len(files_metadata) > list_len:
384 raise ValueError('files_metadata contains more entries(%i)'
385 ' than file_list(%i)'
386 % (len(files_metadata), list_len))
387 list_of_files = []
388 for source_file, meta_data in itertools.izip_longest(file_list,
389 files_metadata):
390 if not isinstance(source_file, str):
391 raise TypeError('Each item of file_list must be a string')
392 if source_file.startswith('/'):
393 logging.warn('Detected a "/" at the start of the file, '
394 'Unless the file name contains a "/" it '
395 ' may cause files to be misread')
396 if source_file.startswith(bucket):
397 logging.warn('Detected bucket name at the start of the file, '
398 'must not specify the bucket when listing file_names.'
399 ' May cause files to be misread')
400 common.validate_file_path(bucket + source_file)
401
402 list_entry = {}
403
404 if meta_data is not None:
405 list_entry.update(meta_data)
406 list_entry["Name"] = source_file
407 list_of_files.append(list_entry)
408
409 return list_of_files, bucket
410
411
412 class _Bucket(object):
413 """A wrapper for a GCS bucket as the return value of listbucket."""
414
415 def __init__(self, api, path, options):
416 """Initialize.
417
418 Args:
419 api: storage_api instance.
420 path: bucket path of form '/bucket'.
421 options: a dict of listbucket options. Please see listbucket doc.
422 """
423 self._init(api, path, options)
424
425 def _init(self, api, path, options):
426 self._api = api
427 self._path = path
428 self._options = options.copy()
429 self._get_bucket_fut = self._api.get_bucket_async(
430 self._path + '?' + urllib.urlencode(self._options))
431 self._last_yield = None
432 self._new_max_keys = self._options.get('max-keys')
433
434 def __getstate__(self):
435 options = self._options
436 if self._last_yield:
437 options['marker'] = self._last_yield.filename[len(self._path) + 1:]
438 if self._new_max_keys is not None:
439 options['max-keys'] = self._new_max_keys
440 return {'api': self._api,
441 'path': self._path,
442 'options': options}
443
444 def __setstate__(self, state):
445 self._init(state['api'], state['path'], state['options'])
446
447 def __iter__(self):
448 """Iter over the bucket.
449
450 Yields:
451 GCSFileStat: a GCSFileStat for an object in the bucket.
452 They are ordered by GCSFileStat.filename.
453 """
454 total = 0
455 max_keys = self._options.get('max-keys')
456
457 while self._get_bucket_fut:
458 status, resp_headers, content = self._get_bucket_fut.get_result()
459 errors.check_status(status, [200], self._path, resp_headers=resp_headers,
460 body=content, extras=self._options)
461
462 if self._should_get_another_batch(content):
463 self._get_bucket_fut = self._api.get_bucket_async(
464 self._path + '?' + urllib.urlencode(self._options))
465 else:
466 self._get_bucket_fut = None
467
468 root = ET.fromstring(content)
469 dirs = self._next_dir_gen(root)
470 files = self._next_file_gen(root)
471 next_file = files.next()
472 next_dir = dirs.next()
473
474 while ((max_keys is None or total < max_keys) and
475 not (next_file is None and next_dir is None)):
476 total += 1
477 if next_file is None:
478 self._last_yield = next_dir
479 next_dir = dirs.next()
480 elif next_dir is None:
481 self._last_yield = next_file
482 next_file = files.next()
483 elif next_dir < next_file:
484 self._last_yield = next_dir
485 next_dir = dirs.next()
486 elif next_file < next_dir:
487 self._last_yield = next_file
488 next_file = files.next()
489 else:
490 logging.error(
491 'Should never reach. next file is %r. next dir is %r.',
492 next_file, next_dir)
493 if self._new_max_keys:
494 self._new_max_keys -= 1
495 yield self._last_yield
496
497 def _next_file_gen(self, root):
498 """Generator for next file element in the document.
499
500 Args:
501 root: root element of the XML tree.
502
503 Yields:
504 GCSFileStat for the next file.
505 """
506 for e in root.getiterator(common._T_CONTENTS):
507 st_ctime, size, etag, key = None, None, None, None
508 for child in e.getiterator('*'):
509 if child.tag == common._T_LAST_MODIFIED:
510 st_ctime = common.dt_str_to_posix(child.text)
511 elif child.tag == common._T_ETAG:
512 etag = child.text
513 elif child.tag == common._T_SIZE:
514 size = child.text
515 elif child.tag == common._T_KEY:
516 key = child.text
517 yield common.GCSFileStat(self._path + '/' + key,
518 size, etag, st_ctime)
519 e.clear()
520 yield None
521
522 def _next_dir_gen(self, root):
523 """Generator for next directory element in the document.
524
525 Args:
526 root: root element in the XML tree.
527
528 Yields:
529 GCSFileStat for the next directory.
530 """
531 for e in root.getiterator(common._T_COMMON_PREFIXES):
532 yield common.GCSFileStat(
533 self._path + '/' + e.find(common._T_PREFIX).text,
534 st_size=None, etag=None, st_ctime=None, is_dir=True)
535 e.clear()
536 yield None
537
538 def _should_get_another_batch(self, content):
539 """Whether to issue another GET bucket call.
540
541 Args:
542 content: response XML.
543
544 Returns:
545 True if should, also update self._options for the next request.
546 False otherwise.
547 """
548 if ('max-keys' in self._options and
549 self._options['max-keys'] <= common._MAX_GET_BUCKET_RESULT):
550 return False
551
552 elements = self._find_elements(
553 content, set([common._T_IS_TRUNCATED,
554 common._T_NEXT_MARKER]))
555 if elements.get(common._T_IS_TRUNCATED, 'false').lower() != 'true':
556 return False
557
558 next_marker = elements.get(common._T_NEXT_MARKER)
559 if next_marker is None:
560 self._options.pop('marker', None)
561 return False
562 self._options['marker'] = next_marker
563 return True
564
565 def _find_elements(self, result, elements):
566 """Find interesting elements from XML.
567
568 This function tries to only look for specified elements
569 without parsing the entire XML. The specified elements is better
570 located near the beginning.
571
572 Args:
573 result: response XML.
574 elements: a set of interesting element tags.
575
576 Returns:
577 A dict from element tag to element value.
578 """
579 element_mapping = {}
580 result = StringIO.StringIO(result)
581 for _, e in ET.iterparse(result, events=('end',)):
582 if not elements:
583 break
584 if e.tag in elements:
585 element_mapping[e.tag] = e.text
586 elements.remove(e.tag)
587 return element_mapping
OLDNEW
« no previous file with comments | « gae/cloudstorage/api_utils.py ('k') | gae/cloudstorage/common.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698