gae/cloudstorage/cloudstorage_api.py - Issue 1150463002: [chrome-devtools-frontend] Migrate to cloudstorage client

Side by Side Diff: gae/cloudstorage/cloudstorage_api.py

Issue 1150463002: [chrome-devtools-frontend] Migrate to cloudstorage client (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chrome-devtools-frontend

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright 2012 Google Inc. All Rights Reserved.

	2 #

	3 # Licensed under the Apache License, Version 2.0 (the "License");

	4 # you may not use this file except in compliance with the License.

	5 # You may obtain a copy of the License at

	6 #

	7 # http://www.apache.org/licenses/LICENSE-2.0

	8 #

	9 # Unless required by applicable law or agreed to in writing,

	10 # software distributed under the License is distributed on an

	11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,

	12 # either express or implied. See the License for the specific

	13 # language governing permissions and limitations under the License.

	14

	15 """File Interface for Google Cloud Storage."""

	16

	17

	18

	19 from __future__ import with_statement

	20

	21

	22

	23 __all__ = ['delete',

	24 'listbucket',

	25 'open',

	26 'stat',

	27 'compose',

	28 ]

	29

	30 import logging

	31 import StringIO

	32 import urllib

	33 import os

	34 import itertools

	35 import types

	36 import xml.etree.cElementTree as ET

	37 from . import api_utils

	38 from . import common

	39 from . import errors

	40 from . import storage_api

	41

	42

	43

	44 def open(filename,

	45 mode='r',

	46 content_type=None,

	47 options=None,

	48 offset=0,

	49 read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,

	50 retry_params=None,

	51 _account_id=None):

	52 """Opens a Google Cloud Storage file and returns it as a File-like object.

	53

	54 Args:

	55 filename: A Google Cloud Storage filename of form '/bucket/filename'.

	56 mode: 'r' for reading mode. 'w' for writing mode.

	57 In reading mode, the file must exist. In writing mode, a file will

	58 be created or be overrode.

	59 content_type: The MIME type of the file. str. Only valid in writing mode.

	60 options: A str->basestring dict to specify additional headers to pass to

	61 GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.

	62 Supported options are x-goog-acl, x-goog-meta-, cache-control,

	63 content-disposition, and content-encoding.

	64 Only valid in writing mode.

	65 See https://developers.google.com/storage/docs/reference-headers

	66 for details.

	67 read_buffer_size: The buffer size for read. Read keeps a buffer

	68 and prefetches another one. To minimize blocking for large files,

	69 always read by buffer size. To minimize number of RPC requests for

	70 small files, set a large buffer size. Max is 30MB.

	71 retry_params: An instance of api_utils.RetryParams for subsequent calls

	72 to GCS from this file handle. If None, the default one is used.

	73 _account_id: Internal-use only.

	74 offset: Number of bytes to skip at the start of the file. If None, 0 is

	75 used.

	76

	77 Returns:

	78 A reading or writing buffer that supports File-like interface. Buffer

	79 must be closed after operations are done.

	80

	81 Raises:

	82 errors.AuthorizationError: if authorization failed.

	83 errors.NotFoundError: if an object that's expected to exist doesn't.

	84 ValueError: invalid open mode or if content_type or options are specified

	85 in reading mode.

	86 """

	87 common.validate_file_path(filename)

	88 api = storage_api._get_storage_api(retry_params=retry_params,

	89 account_id=_account_id)

	90 filename = api_utils._quote_filename(filename)

	91

	92 if mode == 'w':

	93 common.validate_options(options)

	94 return storage_api.StreamingBuffer(api, filename, content_type, options)

	95 elif mode == 'r':

	96 if content_type or options:

	97 raise ValueError('Options and content_type can only be specified '

	98 'for writing mode.')

	99 return storage_api.ReadBuffer(api,

	100 filename,

	101 offset=offset,

	102 buffer_size=read_buffer_size)

	103 else:

	104 raise ValueError('Invalid mode %s.' % mode)

	105

	106

	107 def delete(filename, retry_params=None, _account_id=None):

	108 """Delete a Google Cloud Storage file.

	109

	110 Args:

	111 filename: A Google Cloud Storage filename of form '/bucket/filename'.

	112 retry_params: An api_utils.RetryParams for this call to GCS. If None,

	113 the default one is used.

	114 _account_id: Internal-use only.

	115

	116 Raises:

	117 errors.NotFoundError: if the file doesn't exist prior to deletion.

	118 """

	119 api = storage_api._get_storage_api(retry_params=retry_params,

	120 account_id=_account_id)

	121 common.validate_file_path(filename)

	122 filename = api_utils._quote_filename(filename)

	123 status, resp_headers, content = api.delete_object(filename)

	124 errors.check_status(status, [204], filename, resp_headers=resp_headers,

	125 body=content)

	126

	127

	128 def stat(filename, retry_params=None, _account_id=None):

	129 """Get GCSFileStat of a Google Cloud storage file.

	130

	131 Args:

	132 filename: A Google Cloud Storage filename of form '/bucket/filename'.

	133 retry_params: An api_utils.RetryParams for this call to GCS. If None,

	134 the default one is used.

	135 _account_id: Internal-use only.

	136

	137 Returns:

	138 a GCSFileStat object containing info about this file.

	139

	140 Raises:

	141 errors.AuthorizationError: if authorization failed.

	142 errors.NotFoundError: if an object that's expected to exist doesn't.

	143 """

	144 common.validate_file_path(filename)

	145 api = storage_api._get_storage_api(retry_params=retry_params,

	146 account_id=_account_id)

	147 status, headers, content = api.head_object(

	148 api_utils._quote_filename(filename))

	149 errors.check_status(status, [200], filename, resp_headers=headers,

	150 body=content)

	151 file_stat = common.GCSFileStat(

	152 filename=filename,

	153 st_size=common.get_stored_content_length(headers),

	154 st_ctime=common.http_time_to_posix(headers.get('last-modified')),

	155 etag=headers.get('etag'),

	156 content_type=headers.get('content-type'),

	157 metadata=common.get_metadata(headers))

	158

	159 return file_stat

	160

	161

	162 def _copy2(src, dst, metadata=None, retry_params=None):

	163 """Copy the file content from src to dst.

	164

	165 Internal use only!

	166

	167 Args:

	168 src: /bucket/filename

	169 dst: /bucket/filename

	170 metadata: a dict of metadata for this copy. If None, old metadata is copied.

	171 For example, {'x-goog-meta-foo': 'bar'}.

	172 retry_params: An api_utils.RetryParams for this call to GCS. If None,

	173 the default one is used.

	174

	175 Raises:

	176 errors.AuthorizationError: if authorization failed.

	177 errors.NotFoundError: if an object that's expected to exist doesn't.

	178 """

	179 common.validate_file_path(src)

	180 common.validate_file_path(dst)

	181

	182 if metadata is None:

	183 metadata = {}

	184 copy_meta = 'COPY'

	185 else:

	186 copy_meta = 'REPLACE'

	187 metadata.update({'x-goog-copy-source': src,

	188 'x-goog-metadata-directive': copy_meta})

	189

	190 api = storage_api._get_storage_api(retry_params=retry_params)

	191 status, resp_headers, content = api.put_object(

	192 api_utils._quote_filename(dst), headers=metadata)

	193 errors.check_status(status, [200], src, metadata, resp_headers, body=content)

	194

	195

	196 def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,

	197 delimiter=None, retry_params=None, _account_id=None):

	198 """Returns a GCSFileStat iterator over a bucket.

	199

	200 Optional arguments can limit the result to a subset of files under bucket.

	201

	202 This function has two modes:

	203 1. List bucket mode: Lists all files in the bucket without any concept of

	204 hierarchy. GCS doesn't have real directory hierarchies.

	205 2. Directory emulation mode: If you specify the 'delimiter' argument,

	206 it is used as a path separator to emulate a hierarchy of directories.

	207 In this mode, the "path_prefix" argument should end in the delimiter

	208 specified (thus designates a logical directory). The logical directory's

	209 contents, both files and subdirectories, are listed. The names of

	210 subdirectories returned will end with the delimiter. So listbucket

	211 can be called with the subdirectory name to list the subdirectory's

	212 contents.

	213

	214 Args:

	215 path_prefix: A Google Cloud Storage path of format "/bucket" or

	216 "/bucket/prefix". Only objects whose fullpath starts with the

	217 path_prefix will be returned.

	218 marker: Another path prefix. Only objects whose fullpath starts

	219 lexicographically after marker will be returned (exclusive).

	220 prefix: Deprecated. Use path_prefix.

	221 max_keys: The limit on the number of objects to return. int.

	222 For best performance, specify max_keys only if you know how many objects

	223 you want. Otherwise, this method requests large batches and handles

	224 pagination for you.

	225 delimiter: Use to turn on directory mode. str of one or multiple chars

	226 that your bucket uses as its directory separator.

	227 retry_params: An api_utils.RetryParams for this call to GCS. If None,

	228 the default one is used.

	229 _account_id: Internal-use only.

	230

	231 Examples:

	232 For files "/bucket/a",

	233 "/bucket/bar/1"

	234 "/bucket/foo",

	235 "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",

	236

	237 Regular mode:

	238 listbucket("/bucket/f", marker="/bucket/foo/1")

	239 will match "/bucket/foo/2/1", "/bucket/foo/3/1".

	240

	241 Directory mode:

	242 listbucket("/bucket/", delimiter="/")

	243 will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".

	244 listbucket("/bucket/foo/", delimiter="/")

	245 will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"

	246

	247 Returns:

	248 Regular mode:

	249 A GCSFileStat iterator over matched files ordered by filename.

	250 The iterator returns GCSFileStat objects. filename, etag, st_size,

	251 st_ctime, and is_dir are set.

	252

	253 Directory emulation mode:

	254 A GCSFileStat iterator over matched files and directories ordered by

	255 name. The iterator returns GCSFileStat objects. For directories,

	256 only the filename and is_dir fields are set.

	257

	258 The last name yielded can be used as next call's marker.

	259 """

	260 if prefix:

	261 common.validate_bucket_path(path_prefix)

	262 bucket = path_prefix

	263 else:

	264 bucket, prefix = common._process_path_prefix(path_prefix)

	265

	266 if marker and marker.startswith(bucket):

	267 marker = marker[len(bucket) + 1:]

	268

	269 api = storage_api._get_storage_api(retry_params=retry_params,

	270 account_id=_account_id)

	271 options = {}

	272 if marker:

	273 options['marker'] = marker

	274 if max_keys:

	275 options['max-keys'] = max_keys

	276 if prefix:

	277 options['prefix'] = prefix

	278 if delimiter:

	279 options['delimiter'] = delimiter

	280

	281 return _Bucket(api, bucket, options)

	282

	283 # pylint: disable=too-many-locals, too-many-branches, too-many-statements

	284 def compose(list_of_files, destination_file, files_metadata=None,

	285 content_type=None, retry_params=None, _account_id=None):

	286 """Runs the GCS Compose on the given files.

	287

	288 Merges between 2 and 32 files into one file. Composite files may even

	289 be built from other existing composites, provided that the total

	290 component count does not exceed 1024. See here for details:

	291 https://cloud.google.com/storage/docs/composite-objects

	292

	293 Args:

	294 list_of_files: List of file name strings with no leading slashes or bucket.

	295 destination_file: Path to the output file. Must have the bucket in the path.

	296 files_metadata: Optional, file metadata, order must match list_of_files,

	297 see link for available options:

	298 https://cloud.google.com/storage/docs/composite-objects#_Xml

	299 content_type: Optional, used to specify content-header of the output file.

	300 retry_params: Optional, an api_utils.RetryParams for this call to GCS.

	301 If None,the default one is used.

	302 _account_id: Internal-use only.

	303

	304 Raises:

	305 ValueError: If the number of files is outside the range of 2-32.

	306 """

	307 api = storage_api._get_storage_api(retry_params=retry_params,

	308 account_id=_account_id)

	309

	310 # Needed until cloudstorage_stub.py is updated to accept compose requests

	311 # TODO(rbruyere@gmail.com): When patched remove the True flow from this if.

	312

	313 if os.getenv('SERVER_SOFTWARE').startswith('Dev'):

	314 def _temp_func(file_list, destination_file, content_type):

	315 """Dev server stub remove when the dev server accepts compose requests."""

	316 bucket = '/' + destination_file.split('/')[1] + '/'

	317 with open(destination_file, 'w', content_type=content_type) as gcs_merge:

	318 for source_file in file_list:

	319 with open(bucket + source_file['Name'], 'r') as gcs_source:

	320 gcs_merge.write(gcs_source.read())

	321

	322 compose_object = _temp_func

	323 else:

	324 compose_object = api.compose_object

	325 file_list, _ = _validate_compose_list(destination_file,

	326 list_of_files,

	327 files_metadata, 32)

	328 compose_object(file_list, destination_file, content_type)

	329

	330

	331 def _file_exists(destination):

	332 """Checks if a file exists.

	333 Tries to open the file.

	334 If it succeeds returns True otherwise False.

	335

	336 Args:

	337 destination: Full path to the file (ie. /bucket/object) with leading slash.

	338

	339 Returns:

	340 True if the file is accessible otherwise False.

	341 """

	342 try:

	343 with open(destination, "r"):

	344 return True

	345 except errors.NotFoundError:

	346 return False

	347

	348

	349 def _validate_compose_list(destination_file, file_list,

	350 files_metadata=None, number_of_files=32):

	351 """Validates the file_list and merges the file_list, files_metadata.

	352

	353 Args:

	354 destination: Path to the file (ie. /destination_bucket/destination_file).

	355 file_list: List of files to compose, see compose for details.

	356 files_metadata: Meta details for each file in the file_list.

	357 number_of_files: Maximum number of files allowed in the list.

	358

	359 Returns:

	360 A tuple (list_of_files, bucket):

	361 list_of_files: Ready to use dict version of the list.

	362 bucket: bucket name extracted from the file paths.

	363 """

	364 common.validate_file_path(destination_file)

	365 bucket = destination_file[0:(destination_file.index('/', 1) + 1)]

	366 try:

	367 if isinstance(file_list, types.StringTypes):

	368 raise TypeError

	369 list_len = len(file_list)

	370 except TypeError:

	371 raise TypeError('file_list must be a list')

	372

	373 if list_len > number_of_files:

	374 raise ValueError(

	375 'Compose attempted to create composite with too many'

	376 '(%i) components; limit is (%i).' % (list_len, number_of_files))

	377 if list_len <= 1:

	378 raise ValueError('Compose operation requires at'

	379 ' least two components; %i provided.' % list_len)

	380

	381 if files_metadata is None:

	382 files_metadata = []

	383 elif len(files_metadata) > list_len:

	384 raise ValueError('files_metadata contains more entries(%i)'

	385 ' than file_list(%i)'

	386 % (len(files_metadata), list_len))

	387 list_of_files = []

	388 for source_file, meta_data in itertools.izip_longest(file_list,

	389 files_metadata):

	390 if not isinstance(source_file, str):

	391 raise TypeError('Each item of file_list must be a string')

	392 if source_file.startswith('/'):

	393 logging.warn('Detected a "/" at the start of the file, '

	394 'Unless the file name contains a "/" it '

	395 ' may cause files to be misread')

	396 if source_file.startswith(bucket):

	397 logging.warn('Detected bucket name at the start of the file, '

	398 'must not specify the bucket when listing file_names.'

	399 ' May cause files to be misread')

	400 common.validate_file_path(bucket + source_file)

	401

	402 list_entry = {}

	403

	404 if meta_data is not None:

	405 list_entry.update(meta_data)

	406 list_entry["Name"] = source_file

	407 list_of_files.append(list_entry)

	408

	409 return list_of_files, bucket

	410

	411

	412 class _Bucket(object):

	413 """A wrapper for a GCS bucket as the return value of listbucket."""

	414

	415 def __init__(self, api, path, options):

	416 """Initialize.

	417

	418 Args:

	419 api: storage_api instance.

	420 path: bucket path of form '/bucket'.

	421 options: a dict of listbucket options. Please see listbucket doc.

	422 """

	423 self._init(api, path, options)

	424

	425 def _init(self, api, path, options):

	426 self._api = api

	427 self._path = path

	428 self._options = options.copy()

	429 self._get_bucket_fut = self._api.get_bucket_async(

	430 self._path + '?' + urllib.urlencode(self._options))

	431 self._last_yield = None

	432 self._new_max_keys = self._options.get('max-keys')

	433

	434 def __getstate__(self):

	435 options = self._options

	436 if self._last_yield:

	437 options['marker'] = self._last_yield.filename[len(self._path) + 1:]

	438 if self._new_max_keys is not None:

	439 options['max-keys'] = self._new_max_keys

	440 return {'api': self._api,

	441 'path': self._path,

	442 'options': options}

	443

	444 def __setstate__(self, state):

	445 self._init(state['api'], state['path'], state['options'])

	446

	447 def __iter__(self):

	448 """Iter over the bucket.

	449

	450 Yields:

	451 GCSFileStat: a GCSFileStat for an object in the bucket.

	452 They are ordered by GCSFileStat.filename.

	453 """

	454 total = 0

	455 max_keys = self._options.get('max-keys')

	456

	457 while self._get_bucket_fut:

	458 status, resp_headers, content = self._get_bucket_fut.get_result()

	459 errors.check_status(status, [200], self._path, resp_headers=resp_headers,

	460 body=content, extras=self._options)

	461

	462 if self._should_get_another_batch(content):

	463 self._get_bucket_fut = self._api.get_bucket_async(

	464 self._path + '?' + urllib.urlencode(self._options))

	465 else:

	466 self._get_bucket_fut = None

	467

	468 root = ET.fromstring(content)

	469 dirs = self._next_dir_gen(root)

	470 files = self._next_file_gen(root)

	471 next_file = files.next()

	472 next_dir = dirs.next()

	473

	474 while ((max_keys is None or total < max_keys) and

	475 not (next_file is None and next_dir is None)):

	476 total += 1

	477 if next_file is None:

	478 self._last_yield = next_dir

	479 next_dir = dirs.next()

	480 elif next_dir is None:

	481 self._last_yield = next_file

	482 next_file = files.next()

	483 elif next_dir < next_file:

	484 self._last_yield = next_dir

	485 next_dir = dirs.next()

	486 elif next_file < next_dir:

	487 self._last_yield = next_file

	488 next_file = files.next()

	489 else:

	490 logging.error(

	491 'Should never reach. next file is %r. next dir is %r.',

	492 next_file, next_dir)

	493 if self._new_max_keys:

	494 self._new_max_keys -= 1

	495 yield self._last_yield

	496

	497 def _next_file_gen(self, root):

	498 """Generator for next file element in the document.

	499

	500 Args:

	501 root: root element of the XML tree.

	502

	503 Yields:

	504 GCSFileStat for the next file.

	505 """

	506 for e in root.getiterator(common._T_CONTENTS):

	507 st_ctime, size, etag, key = None, None, None, None

	508 for child in e.getiterator('*'):

	509 if child.tag == common._T_LAST_MODIFIED:

	510 st_ctime = common.dt_str_to_posix(child.text)

	511 elif child.tag == common._T_ETAG:

	512 etag = child.text

	513 elif child.tag == common._T_SIZE:

	514 size = child.text

	515 elif child.tag == common._T_KEY:

	516 key = child.text

	517 yield common.GCSFileStat(self._path + '/' + key,

	518 size, etag, st_ctime)

	519 e.clear()

	520 yield None

	521

	522 def _next_dir_gen(self, root):

	523 """Generator for next directory element in the document.

	524

	525 Args:

	526 root: root element in the XML tree.

	527

	528 Yields:

	529 GCSFileStat for the next directory.

	530 """

	531 for e in root.getiterator(common._T_COMMON_PREFIXES):

	532 yield common.GCSFileStat(

	533 self._path + '/' + e.find(common._T_PREFIX).text,

	534 st_size=None, etag=None, st_ctime=None, is_dir=True)

	535 e.clear()

	536 yield None

	537

	538 def _should_get_another_batch(self, content):

	539 """Whether to issue another GET bucket call.

	540

	541 Args:

	542 content: response XML.

	543

	544 Returns:

	545 True if should, also update self._options for the next request.

	546 False otherwise.

	547 """

	548 if ('max-keys' in self._options and

	549 self._options['max-keys'] <= common._MAX_GET_BUCKET_RESULT):

	550 return False

	551

	552 elements = self._find_elements(

	553 content, set([common._T_IS_TRUNCATED,

	554 common._T_NEXT_MARKER]))

	555 if elements.get(common._T_IS_TRUNCATED, 'false').lower() != 'true':

	556 return False

	557

	558 next_marker = elements.get(common._T_NEXT_MARKER)

	559 if next_marker is None:

	560 self._options.pop('marker', None)

	561 return False

	562 self._options['marker'] = next_marker

	563 return True

	564

	565 def _find_elements(self, result, elements):

	566 """Find interesting elements from XML.

	567

	568 This function tries to only look for specified elements

	569 without parsing the entire XML. The specified elements is better

	570 located near the beginning.

	571

	572 Args:

	573 result: response XML.

	574 elements: a set of interesting element tags.

	575

	576 Returns:

	577 A dict from element tag to element value.

	578 """

	579 element_mapping = {}

	580 result = StringIO.StringIO(result)

	581 for _, e in ET.iterparse(result, events=('end',)):

	582 if not elements:

	583 break

	584 if e.tag in elements:

	585 element_mapping[e.tag] = e.text

	586 elements.remove(e.tag)

	587 return element_mapping

OLD	NEW

« no previous file with comments | « gae/cloudstorage/api_utils.py ('k') | gae/cloudstorage/common.py » ('j') | no next file with comments »