Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Side by Side Diff: third_party/google_appengine_cloudstorage/cloudstorage/common.py

Issue 139303023: add GCS support to docs server (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: bumped versions Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2012 Google Inc. All Rights Reserved.
2
3 """Helpers shared by cloudstorage_stub and cloudstorage_api."""
4
5
6
7
8
9 __all__ = ['CS_XML_NS',
10 'CSFileStat',
11 'dt_str_to_posix',
12 'local_api_url',
13 'LOCAL_GCS_ENDPOINT',
14 'local_run',
15 'get_access_token',
16 'get_metadata',
17 'GCSFileStat',
18 'http_time_to_posix',
19 'memory_usage',
20 'posix_time_to_http',
21 'posix_to_dt_str',
22 'set_access_token',
23 'validate_options',
24 'validate_bucket_name',
25 'validate_bucket_path',
26 'validate_file_path',
27 ]
28
29
30 import calendar
31 import datetime
32 from email import utils as email_utils
33 import logging
34 import os
35 import re
36
37 try:
38 from google.appengine.api import runtime
39 except ImportError:
40 from google.appengine.api import runtime
41
42
43 _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
44 _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
45 _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
46 _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
47 _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
48 _GCS_METADATA = ['x-goog-meta-',
49 'content-disposition',
50 'cache-control',
51 'content-encoding']
52 _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
53 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
54 LOCAL_GCS_ENDPOINT = '/_ah/gcs'
55 _access_token = ''
56
57
58 _MAX_GET_BUCKET_RESULT = 1000
59
60
61 def set_access_token(access_token):
62 """Set the shared access token to authenticate with Google Cloud Storage.
63
64 When set, the library will always attempt to communicate with the
65 real Google Cloud Storage with this token even when running on dev appserver.
66 Note the token could expire so it's up to you to renew it.
67
68 When absent, the library will automatically request and refresh a token
69 on appserver, or when on dev appserver, talk to a Google Cloud Storage
70 stub.
71
72 Args:
73 access_token: you can get one by run 'gsutil -d ls' and copy the
74 str after 'Bearer'.
75 """
76 global _access_token
77 _access_token = access_token
78
79
80 def get_access_token():
81 """Returns the shared access token."""
82 return _access_token
83
84
85 class GCSFileStat(object):
86 """Container for GCS file stat."""
87
88 def __init__(self,
89 filename,
90 st_size,
91 etag,
92 st_ctime,
93 content_type=None,
94 metadata=None,
95 is_dir=False):
96 """Initialize.
97
98 For files, the non optional arguments are always set.
99 For directories, only filename and is_dir is set.
100
101 Args:
102 filename: a Google Cloud Storage filename of form '/bucket/filename'.
103 st_size: file size in bytes. long compatible.
104 etag: hex digest of the md5 hash of the file's content. str.
105 st_ctime: posix file creation time. float compatible.
106 content_type: content type. str.
107 metadata: a str->str dict of user specified options when creating
108 the file. Possible keys are x-goog-meta-, content-disposition,
109 content-encoding, and cache-control.
110 is_dir: True if this represents a directory. False if this is a real file.
111 """
112 self.filename = filename
113 self.is_dir = is_dir
114 self.st_size = None
115 self.st_ctime = None
116 self.etag = None
117 self.content_type = content_type
118 self.metadata = metadata
119
120 if not is_dir:
121 self.st_size = long(st_size)
122 self.st_ctime = float(st_ctime)
123 if etag[0] == '"' and etag[-1] == '"':
124 etag = etag[1:-1]
125 self.etag = etag
126
127 def __repr__(self):
128 if self.is_dir:
129 return '(directory: %s)' % self.filename
130
131 return (
132 '(filename: %(filename)s, st_size: %(st_size)s, '
133 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
134 'content_type: %(content_type)s, '
135 'metadata: %(metadata)s)' %
136 dict(filename=self.filename,
137 st_size=self.st_size,
138 st_ctime=self.st_ctime,
139 etag=self.etag,
140 content_type=self.content_type,
141 metadata=self.metadata))
142
143 def __cmp__(self, other):
144 if not isinstance(other, self.__class__):
145 raise ValueError('Argument to cmp must have the same type. '
146 'Expect %s, got %s', self.__class__.__name__,
147 other.__class__.__name__)
148 if self.filename > other.filename:
149 return 1
150 elif self.filename < other.filename:
151 return -1
152 return 0
153
154 def __hash__(self):
155 if self.etag:
156 return hash(self.etag)
157 return hash(self.filename)
158
159
160 CSFileStat = GCSFileStat
161
162
163 def get_metadata(headers):
164 """Get user defined options from HTTP response headers."""
165 return dict((k, v) for k, v in headers.iteritems()
166 if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
167
168
169 def validate_bucket_name(name):
170 """Validate a Google Storage bucket name.
171
172 Args:
173 name: a Google Storage bucket name with no prefix or suffix.
174
175 Raises:
176 ValueError: if name is invalid.
177 """
178 _validate_path(name)
179 if not _GCS_BUCKET_REGEX.match(name):
180 raise ValueError('Bucket should be 3-63 characters long using only a-z,'
181 '0-9, underscore, dash or dot but got %s' % name)
182
183
184 def validate_bucket_path(path):
185 """Validate a Google Cloud Storage bucket path.
186
187 Args:
188 path: a Google Storage bucket path. It should have form '/bucket'.
189
190 Raises:
191 ValueError: if path is invalid.
192 """
193 _validate_path(path)
194 if not _GCS_BUCKET_PATH_REGEX.match(path):
195 raise ValueError('Bucket should have format /bucket '
196 'but got %s' % path)
197
198
199 def validate_file_path(path):
200 """Validate a Google Cloud Storage file path.
201
202 Args:
203 path: a Google Storage file path. It should have form '/bucket/filename'.
204
205 Raises:
206 ValueError: if path is invalid.
207 """
208 _validate_path(path)
209 if not _GCS_FULLPATH_REGEX.match(path):
210 raise ValueError('Path should have format /bucket/filename '
211 'but got %s' % path)
212
213
214 def _process_path_prefix(path_prefix):
215 """Validate and process a Google Cloud Stoarge path prefix.
216
217 Args:
218 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
219 or '/bucket/' or '/bucket'.
220
221 Raises:
222 ValueError: if path is invalid.
223
224 Returns:
225 a tuple of /bucket and prefix. prefix can be None.
226 """
227 _validate_path(path_prefix)
228 if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
229 raise ValueError('Path prefix should have format /bucket, /bucket/, '
230 'or /bucket/prefix but got %s.' % path_prefix)
231 bucket_name_end = path_prefix.find('/', 1)
232 bucket = path_prefix
233 prefix = None
234 if bucket_name_end != -1:
235 bucket = path_prefix[:bucket_name_end]
236 prefix = path_prefix[bucket_name_end + 1:] or None
237 return bucket, prefix
238
239
240 def _validate_path(path):
241 """Basic validation of Google Storage paths.
242
243 Args:
244 path: a Google Storage path. It should have form '/bucket/filename'
245 or '/bucket'.
246
247 Raises:
248 ValueError: if path is invalid.
249 TypeError: if path is not of type basestring.
250 """
251 if not path:
252 raise ValueError('Path is empty')
253 if not isinstance(path, basestring):
254 raise TypeError('Path should be a string but is %s (%s).' %
255 (path.__class__, path))
256
257
258 def validate_options(options):
259 """Validate Google Cloud Storage options.
260
261 Args:
262 options: a str->basestring dict of options to pass to Google Cloud Storage.
263
264 Raises:
265 ValueError: if option is not supported.
266 TypeError: if option is not of type str or value of an option
267 is not of type basestring.
268 """
269 if not options:
270 return
271
272 for k, v in options.iteritems():
273 if not isinstance(k, str):
274 raise TypeError('option %r should be a str.' % k)
275 if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
276 raise ValueError('option %s is not supported.' % k)
277 if not isinstance(v, basestring):
278 raise TypeError('value %r for option %s should be of type basestring.' %
279 v, k)
280
281
282 def http_time_to_posix(http_time):
283 """Convert HTTP time format to posix time.
284
285 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
286 for http time format.
287
288 Args:
289 http_time: time in RFC 2616 format. e.g.
290 "Mon, 20 Nov 1995 19:12:08 GMT".
291
292 Returns:
293 A float of secs from unix epoch.
294 """
295 if http_time is not None:
296 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
297
298
299 def posix_time_to_http(posix_time):
300 """Convert posix time to HTML header time format.
301
302 Args:
303 posix_time: unix time.
304
305 Returns:
306 A datatime str in RFC 2616 format.
307 """
308 if posix_time:
309 return email_utils.formatdate(posix_time, usegmt=True)
310
311
312 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
313
314
315 def dt_str_to_posix(dt_str):
316 """format str to posix.
317
318 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
319 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
320 between date and time when they are on the same line.
321 Z indicates UTC (zero meridian).
322
323 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
324
325 This is used to parse LastModified node from GCS's GET bucket XML response.
326
327 Args:
328 dt_str: A datetime str.
329
330 Returns:
331 A float of secs from unix epoch. By posix definition, epoch is midnight
332 1970/1/1 UTC.
333 """
334 parsable, _ = dt_str.split('.')
335 dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
336 return calendar.timegm(dt.utctimetuple())
337
338
339 def posix_to_dt_str(posix):
340 """Reverse of str_to_datetime.
341
342 This is used by GCS stub to generate GET bucket XML response.
343
344 Args:
345 posix: A float of secs from unix epoch.
346
347 Returns:
348 A datetime str.
349 """
350 dt = datetime.datetime.utcfromtimestamp(posix)
351 dt_str = dt.strftime(_DT_FORMAT)
352 return dt_str + '.000Z'
353
354
355 def local_run():
356 """Whether we should hit GCS dev appserver stub."""
357 server_software = os.environ.get('SERVER_SOFTWARE')
358 if server_software is None:
359 return True
360 if 'remote_api' in server_software:
361 return False
362 if server_software.startswith(('Development', 'testutil')):
363 return True
364 return False
365
366
367 def local_api_url():
368 """Return URL for GCS emulation on dev appserver."""
369 return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
370
371
372 def memory_usage(method):
373 """Log memory usage before and after a method."""
374 def wrapper(*args, **kwargs):
375 logging.info('Memory before method %s is %s.',
376 method.__name__, runtime.memory_usage().current())
377 result = method(*args, **kwargs)
378 logging.info('Memory after method %s is %s',
379 method.__name__, runtime.memory_usage().current())
380 return result
381 return wrapper
382
383
384 def _add_ns(tagname):
385 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
386 'tag': tagname}
387
388
389 _T_CONTENTS = _add_ns('Contents')
390 _T_LAST_MODIFIED = _add_ns('LastModified')
391 _T_ETAG = _add_ns('ETag')
392 _T_KEY = _add_ns('Key')
393 _T_SIZE = _add_ns('Size')
394 _T_PREFIX = _add_ns('Prefix')
395 _T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
396 _T_NEXT_MARKER = _add_ns('NextMarker')
397 _T_IS_TRUNCATED = _add_ns('IsTruncated')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698