Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1150)

Side by Side Diff: third_party/google_appengine_cloudstorage/cloudstorage/common.py

Issue 139303023: add GCS support to docs server (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Updated third party library, rebased and fixed a path issue caused by rebasing Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2012 Google Inc. All Rights Reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing,
10 # software distributed under the License is distributed on an
11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 # either express or implied. See the License for the specific
13 # language governing permissions and limitations under the License.
14
15 """Helpers shared by cloudstorage_stub and cloudstorage_api."""
16
17
18
19
20
21 __all__ = ['CS_XML_NS',
22 'CSFileStat',
23 'dt_str_to_posix',
24 'local_api_url',
25 'LOCAL_GCS_ENDPOINT',
26 'local_run',
27 'get_access_token',
28 'get_metadata',
29 'GCSFileStat',
30 'http_time_to_posix',
31 'memory_usage',
32 'posix_time_to_http',
33 'posix_to_dt_str',
34 'set_access_token',
35 'validate_options',
36 'validate_bucket_name',
37 'validate_bucket_path',
38 'validate_file_path',
39 ]
40
41
42 import calendar
43 import datetime
44 from email import utils as email_utils
45 import logging
46 import os
47 import re
48
49 try:
50 from google.appengine.api import runtime
51 except ImportError:
52 from google.appengine.api import runtime
53
54
55 _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
56 _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
57 _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
58 _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
59 _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
60 _GCS_METADATA = ['x-goog-meta-',
61 'content-disposition',
62 'cache-control',
63 'content-encoding']
64 _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
65 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
66 LOCAL_GCS_ENDPOINT = '/_ah/gcs'
67 _access_token = ''
68
69
70 _MAX_GET_BUCKET_RESULT = 1000
71
72
73 def set_access_token(access_token):
74 """Set the shared access token to authenticate with Google Cloud Storage.
75
76 When set, the library will always attempt to communicate with the
77 real Google Cloud Storage with this token even when running on dev appserver.
78 Note the token could expire so it's up to you to renew it.
79
80 When absent, the library will automatically request and refresh a token
81 on appserver, or when on dev appserver, talk to a Google Cloud Storage
82 stub.
83
84 Args:
85 access_token: you can get one by run 'gsutil -d ls' and copy the
86 str after 'Bearer'.
87 """
88 global _access_token
89 _access_token = access_token
90
91
92 def get_access_token():
93 """Returns the shared access token."""
94 return _access_token
95
96
97 class GCSFileStat(object):
98 """Container for GCS file stat."""
99
100 def __init__(self,
101 filename,
102 st_size,
103 etag,
104 st_ctime,
105 content_type=None,
106 metadata=None,
107 is_dir=False):
108 """Initialize.
109
110 For files, the non optional arguments are always set.
111 For directories, only filename and is_dir is set.
112
113 Args:
114 filename: a Google Cloud Storage filename of form '/bucket/filename'.
115 st_size: file size in bytes. long compatible.
116 etag: hex digest of the md5 hash of the file's content. str.
117 st_ctime: posix file creation time. float compatible.
118 content_type: content type. str.
119 metadata: a str->str dict of user specified options when creating
120 the file. Possible keys are x-goog-meta-, content-disposition,
121 content-encoding, and cache-control.
122 is_dir: True if this represents a directory. False if this is a real file.
123 """
124 self.filename = filename
125 self.is_dir = is_dir
126 self.st_size = None
127 self.st_ctime = None
128 self.etag = None
129 self.content_type = content_type
130 self.metadata = metadata
131
132 if not is_dir:
133 self.st_size = long(st_size)
134 self.st_ctime = float(st_ctime)
135 if etag[0] == '"' and etag[-1] == '"':
136 etag = etag[1:-1]
137 self.etag = etag
138
139 def __repr__(self):
140 if self.is_dir:
141 return '(directory: %s)' % self.filename
142
143 return (
144 '(filename: %(filename)s, st_size: %(st_size)s, '
145 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
146 'content_type: %(content_type)s, '
147 'metadata: %(metadata)s)' %
148 dict(filename=self.filename,
149 st_size=self.st_size,
150 st_ctime=self.st_ctime,
151 etag=self.etag,
152 content_type=self.content_type,
153 metadata=self.metadata))
154
155 def __cmp__(self, other):
156 if not isinstance(other, self.__class__):
157 raise ValueError('Argument to cmp must have the same type. '
158 'Expect %s, got %s', self.__class__.__name__,
159 other.__class__.__name__)
160 if self.filename > other.filename:
161 return 1
162 elif self.filename < other.filename:
163 return -1
164 return 0
165
166 def __hash__(self):
167 if self.etag:
168 return hash(self.etag)
169 return hash(self.filename)
170
171
172 CSFileStat = GCSFileStat
173
174
175 def get_metadata(headers):
176 """Get user defined options from HTTP response headers."""
177 return dict((k, v) for k, v in headers.iteritems()
178 if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
179
180
181 def validate_bucket_name(name):
182 """Validate a Google Storage bucket name.
183
184 Args:
185 name: a Google Storage bucket name with no prefix or suffix.
186
187 Raises:
188 ValueError: if name is invalid.
189 """
190 _validate_path(name)
191 if not _GCS_BUCKET_REGEX.match(name):
192 raise ValueError('Bucket should be 3-63 characters long using only a-z,'
193 '0-9, underscore, dash or dot but got %s' % name)
194
195
196 def validate_bucket_path(path):
197 """Validate a Google Cloud Storage bucket path.
198
199 Args:
200 path: a Google Storage bucket path. It should have form '/bucket'.
201
202 Raises:
203 ValueError: if path is invalid.
204 """
205 _validate_path(path)
206 if not _GCS_BUCKET_PATH_REGEX.match(path):
207 raise ValueError('Bucket should have format /bucket '
208 'but got %s' % path)
209
210
211 def validate_file_path(path):
212 """Validate a Google Cloud Storage file path.
213
214 Args:
215 path: a Google Storage file path. It should have form '/bucket/filename'.
216
217 Raises:
218 ValueError: if path is invalid.
219 """
220 _validate_path(path)
221 if not _GCS_FULLPATH_REGEX.match(path):
222 raise ValueError('Path should have format /bucket/filename '
223 'but got %s' % path)
224
225
226 def _process_path_prefix(path_prefix):
227 """Validate and process a Google Cloud Stoarge path prefix.
228
229 Args:
230 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
231 or '/bucket/' or '/bucket'.
232
233 Raises:
234 ValueError: if path is invalid.
235
236 Returns:
237 a tuple of /bucket and prefix. prefix can be None.
238 """
239 _validate_path(path_prefix)
240 if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
241 raise ValueError('Path prefix should have format /bucket, /bucket/, '
242 'or /bucket/prefix but got %s.' % path_prefix)
243 bucket_name_end = path_prefix.find('/', 1)
244 bucket = path_prefix
245 prefix = None
246 if bucket_name_end != -1:
247 bucket = path_prefix[:bucket_name_end]
248 prefix = path_prefix[bucket_name_end + 1:] or None
249 return bucket, prefix
250
251
252 def _validate_path(path):
253 """Basic validation of Google Storage paths.
254
255 Args:
256 path: a Google Storage path. It should have form '/bucket/filename'
257 or '/bucket'.
258
259 Raises:
260 ValueError: if path is invalid.
261 TypeError: if path is not of type basestring.
262 """
263 if not path:
264 raise ValueError('Path is empty')
265 if not isinstance(path, basestring):
266 raise TypeError('Path should be a string but is %s (%s).' %
267 (path.__class__, path))
268
269
270 def validate_options(options):
271 """Validate Google Cloud Storage options.
272
273 Args:
274 options: a str->basestring dict of options to pass to Google Cloud Storage.
275
276 Raises:
277 ValueError: if option is not supported.
278 TypeError: if option is not of type str or value of an option
279 is not of type basestring.
280 """
281 if not options:
282 return
283
284 for k, v in options.iteritems():
285 if not isinstance(k, str):
286 raise TypeError('option %r should be a str.' % k)
287 if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
288 raise ValueError('option %s is not supported.' % k)
289 if not isinstance(v, basestring):
290 raise TypeError('value %r for option %s should be of type basestring.' %
291 (v, k))
292
293
294 def http_time_to_posix(http_time):
295 """Convert HTTP time format to posix time.
296
297 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
298 for http time format.
299
300 Args:
301 http_time: time in RFC 2616 format. e.g.
302 "Mon, 20 Nov 1995 19:12:08 GMT".
303
304 Returns:
305 A float of secs from unix epoch.
306 """
307 if http_time is not None:
308 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
309
310
311 def posix_time_to_http(posix_time):
312 """Convert posix time to HTML header time format.
313
314 Args:
315 posix_time: unix time.
316
317 Returns:
318 A datatime str in RFC 2616 format.
319 """
320 if posix_time:
321 return email_utils.formatdate(posix_time, usegmt=True)
322
323
324 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
325
326
327 def dt_str_to_posix(dt_str):
328 """format str to posix.
329
330 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
331 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
332 between date and time when they are on the same line.
333 Z indicates UTC (zero meridian).
334
335 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
336
337 This is used to parse LastModified node from GCS's GET bucket XML response.
338
339 Args:
340 dt_str: A datetime str.
341
342 Returns:
343 A float of secs from unix epoch. By posix definition, epoch is midnight
344 1970/1/1 UTC.
345 """
346 parsable, _ = dt_str.split('.')
347 dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
348 return calendar.timegm(dt.utctimetuple())
349
350
351 def posix_to_dt_str(posix):
352 """Reverse of str_to_datetime.
353
354 This is used by GCS stub to generate GET bucket XML response.
355
356 Args:
357 posix: A float of secs from unix epoch.
358
359 Returns:
360 A datetime str.
361 """
362 dt = datetime.datetime.utcfromtimestamp(posix)
363 dt_str = dt.strftime(_DT_FORMAT)
364 return dt_str + '.000Z'
365
366
367 def local_run():
368 """Whether we should hit GCS dev appserver stub."""
369 server_software = os.environ.get('SERVER_SOFTWARE')
370 if server_software is None:
371 return True
372 if 'remote_api' in server_software:
373 return False
374 if server_software.startswith(('Development', 'testutil')):
375 return True
376 return False
377
378
379 def local_api_url():
380 """Return URL for GCS emulation on dev appserver."""
381 return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
382
383
384 def memory_usage(method):
385 """Log memory usage before and after a method."""
386 def wrapper(*args, **kwargs):
387 logging.info('Memory before method %s is %s.',
388 method.__name__, runtime.memory_usage().current())
389 result = method(*args, **kwargs)
390 logging.info('Memory after method %s is %s',
391 method.__name__, runtime.memory_usage().current())
392 return result
393 return wrapper
394
395
396 def _add_ns(tagname):
397 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
398 'tag': tagname}
399
400
401 _T_CONTENTS = _add_ns('Contents')
402 _T_LAST_MODIFIED = _add_ns('LastModified')
403 _T_ETAG = _add_ns('ETag')
404 _T_KEY = _add_ns('Key')
405 _T_SIZE = _add_ns('Size')
406 _T_PREFIX = _add_ns('Prefix')
407 _T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
408 _T_NEXT_MARKER = _add_ns('NextMarker')
409 _T_IS_TRUNCATED = _add_ns('IsTruncated')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698