| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2012 Google Inc. All Rights Reserved. |
| 2 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at |
| 6 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # |
| 9 # Unless required by applicable law or agreed to in writing, |
| 10 # software distributed under the License is distributed on an |
| 11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, |
| 12 # either express or implied. See the License for the specific |
| 13 # language governing permissions and limitations under the License. |
| 14 |
| 15 """Helpers shared by cloudstorage_stub and cloudstorage_api.""" |
| 16 |
| 17 |
| 18 |
| 19 |
| 20 |
| 21 __all__ = ['CS_XML_NS', |
| 22 'CSFileStat', |
| 23 'dt_str_to_posix', |
| 24 'local_api_url', |
| 25 'LOCAL_GCS_ENDPOINT', |
| 26 'local_run', |
| 27 'get_access_token', |
| 28 'get_metadata', |
| 29 'GCSFileStat', |
| 30 'http_time_to_posix', |
| 31 'memory_usage', |
| 32 'posix_time_to_http', |
| 33 'posix_to_dt_str', |
| 34 'set_access_token', |
| 35 'validate_options', |
| 36 'validate_bucket_name', |
| 37 'validate_bucket_path', |
| 38 'validate_file_path', |
| 39 ] |
| 40 |
| 41 |
| 42 import calendar |
| 43 import datetime |
| 44 from email import utils as email_utils |
| 45 import logging |
| 46 import os |
| 47 import re |
| 48 |
| 49 try: |
| 50 from google.appengine.api import runtime |
| 51 except ImportError: |
| 52 from google.appengine.api import runtime |
| 53 |
| 54 |
| 55 _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' |
| 56 _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') |
| 57 _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') |
| 58 _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') |
| 59 _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') |
| 60 _GCS_METADATA = ['x-goog-meta-', |
| 61 'content-disposition', |
| 62 'cache-control', |
| 63 'content-encoding'] |
| 64 _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] |
| 65 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' |
| 66 LOCAL_GCS_ENDPOINT = '/_ah/gcs' |
| 67 _access_token = '' |
| 68 |
| 69 |
| 70 _MAX_GET_BUCKET_RESULT = 1000 |
| 71 |
| 72 |
| 73 def set_access_token(access_token): |
| 74 """Set the shared access token to authenticate with Google Cloud Storage. |
| 75 |
| 76 When set, the library will always attempt to communicate with the |
| 77 real Google Cloud Storage with this token even when running on dev appserver. |
| 78 Note the token could expire so it's up to you to renew it. |
| 79 |
| 80 When absent, the library will automatically request and refresh a token |
| 81 on appserver, or when on dev appserver, talk to a Google Cloud Storage |
| 82 stub. |
| 83 |
| 84 Args: |
| 85 access_token: you can get one by run 'gsutil -d ls' and copy the |
| 86 str after 'Bearer'. |
| 87 """ |
| 88 global _access_token |
| 89 _access_token = access_token |
| 90 |
| 91 |
| 92 def get_access_token(): |
| 93 """Returns the shared access token.""" |
| 94 return _access_token |
| 95 |
| 96 |
| 97 class GCSFileStat(object): |
| 98 """Container for GCS file stat.""" |
| 99 |
| 100 def __init__(self, |
| 101 filename, |
| 102 st_size, |
| 103 etag, |
| 104 st_ctime, |
| 105 content_type=None, |
| 106 metadata=None, |
| 107 is_dir=False): |
| 108 """Initialize. |
| 109 |
| 110 For files, the non optional arguments are always set. |
| 111 For directories, only filename and is_dir is set. |
| 112 |
| 113 Args: |
| 114 filename: a Google Cloud Storage filename of form '/bucket/filename'. |
| 115 st_size: file size in bytes. long compatible. |
| 116 etag: hex digest of the md5 hash of the file's content. str. |
| 117 st_ctime: posix file creation time. float compatible. |
| 118 content_type: content type. str. |
| 119 metadata: a str->str dict of user specified options when creating |
| 120 the file. Possible keys are x-goog-meta-, content-disposition, |
| 121 content-encoding, and cache-control. |
| 122 is_dir: True if this represents a directory. False if this is a real file. |
| 123 """ |
| 124 self.filename = filename |
| 125 self.is_dir = is_dir |
| 126 self.st_size = None |
| 127 self.st_ctime = None |
| 128 self.etag = None |
| 129 self.content_type = content_type |
| 130 self.metadata = metadata |
| 131 |
| 132 if not is_dir: |
| 133 self.st_size = long(st_size) |
| 134 self.st_ctime = float(st_ctime) |
| 135 if etag[0] == '"' and etag[-1] == '"': |
| 136 etag = etag[1:-1] |
| 137 self.etag = etag |
| 138 |
| 139 def __repr__(self): |
| 140 if self.is_dir: |
| 141 return '(directory: %s)' % self.filename |
| 142 |
| 143 return ( |
| 144 '(filename: %(filename)s, st_size: %(st_size)s, ' |
| 145 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' |
| 146 'content_type: %(content_type)s, ' |
| 147 'metadata: %(metadata)s)' % |
| 148 dict(filename=self.filename, |
| 149 st_size=self.st_size, |
| 150 st_ctime=self.st_ctime, |
| 151 etag=self.etag, |
| 152 content_type=self.content_type, |
| 153 metadata=self.metadata)) |
| 154 |
| 155 def __cmp__(self, other): |
| 156 if not isinstance(other, self.__class__): |
| 157 raise ValueError('Argument to cmp must have the same type. ' |
| 158 'Expect %s, got %s', self.__class__.__name__, |
| 159 other.__class__.__name__) |
| 160 if self.filename > other.filename: |
| 161 return 1 |
| 162 elif self.filename < other.filename: |
| 163 return -1 |
| 164 return 0 |
| 165 |
| 166 def __hash__(self): |
| 167 if self.etag: |
| 168 return hash(self.etag) |
| 169 return hash(self.filename) |
| 170 |
| 171 |
| 172 CSFileStat = GCSFileStat |
| 173 |
| 174 |
| 175 def get_metadata(headers): |
| 176 """Get user defined options from HTTP response headers.""" |
| 177 return dict((k, v) for k, v in headers.iteritems() |
| 178 if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) |
| 179 |
| 180 |
| 181 def validate_bucket_name(name): |
| 182 """Validate a Google Storage bucket name. |
| 183 |
| 184 Args: |
| 185 name: a Google Storage bucket name with no prefix or suffix. |
| 186 |
| 187 Raises: |
| 188 ValueError: if name is invalid. |
| 189 """ |
| 190 _validate_path(name) |
| 191 if not _GCS_BUCKET_REGEX.match(name): |
| 192 raise ValueError('Bucket should be 3-63 characters long using only a-z,' |
| 193 '0-9, underscore, dash or dot but got %s' % name) |
| 194 |
| 195 |
| 196 def validate_bucket_path(path): |
| 197 """Validate a Google Cloud Storage bucket path. |
| 198 |
| 199 Args: |
| 200 path: a Google Storage bucket path. It should have form '/bucket'. |
| 201 |
| 202 Raises: |
| 203 ValueError: if path is invalid. |
| 204 """ |
| 205 _validate_path(path) |
| 206 if not _GCS_BUCKET_PATH_REGEX.match(path): |
| 207 raise ValueError('Bucket should have format /bucket ' |
| 208 'but got %s' % path) |
| 209 |
| 210 |
| 211 def validate_file_path(path): |
| 212 """Validate a Google Cloud Storage file path. |
| 213 |
| 214 Args: |
| 215 path: a Google Storage file path. It should have form '/bucket/filename'. |
| 216 |
| 217 Raises: |
| 218 ValueError: if path is invalid. |
| 219 """ |
| 220 _validate_path(path) |
| 221 if not _GCS_FULLPATH_REGEX.match(path): |
| 222 raise ValueError('Path should have format /bucket/filename ' |
| 223 'but got %s' % path) |
| 224 |
| 225 |
| 226 def _process_path_prefix(path_prefix): |
| 227 """Validate and process a Google Cloud Stoarge path prefix. |
| 228 |
| 229 Args: |
| 230 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' |
| 231 or '/bucket/' or '/bucket'. |
| 232 |
| 233 Raises: |
| 234 ValueError: if path is invalid. |
| 235 |
| 236 Returns: |
| 237 a tuple of /bucket and prefix. prefix can be None. |
| 238 """ |
| 239 _validate_path(path_prefix) |
| 240 if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): |
| 241 raise ValueError('Path prefix should have format /bucket, /bucket/, ' |
| 242 'or /bucket/prefix but got %s.' % path_prefix) |
| 243 bucket_name_end = path_prefix.find('/', 1) |
| 244 bucket = path_prefix |
| 245 prefix = None |
| 246 if bucket_name_end != -1: |
| 247 bucket = path_prefix[:bucket_name_end] |
| 248 prefix = path_prefix[bucket_name_end + 1:] or None |
| 249 return bucket, prefix |
| 250 |
| 251 |
| 252 def _validate_path(path): |
| 253 """Basic validation of Google Storage paths. |
| 254 |
| 255 Args: |
| 256 path: a Google Storage path. It should have form '/bucket/filename' |
| 257 or '/bucket'. |
| 258 |
| 259 Raises: |
| 260 ValueError: if path is invalid. |
| 261 TypeError: if path is not of type basestring. |
| 262 """ |
| 263 if not path: |
| 264 raise ValueError('Path is empty') |
| 265 if not isinstance(path, basestring): |
| 266 raise TypeError('Path should be a string but is %s (%s).' % |
| 267 (path.__class__, path)) |
| 268 |
| 269 |
| 270 def validate_options(options): |
| 271 """Validate Google Cloud Storage options. |
| 272 |
| 273 Args: |
| 274 options: a str->basestring dict of options to pass to Google Cloud Storage. |
| 275 |
| 276 Raises: |
| 277 ValueError: if option is not supported. |
| 278 TypeError: if option is not of type str or value of an option |
| 279 is not of type basestring. |
| 280 """ |
| 281 if not options: |
| 282 return |
| 283 |
| 284 for k, v in options.iteritems(): |
| 285 if not isinstance(k, str): |
| 286 raise TypeError('option %r should be a str.' % k) |
| 287 if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): |
| 288 raise ValueError('option %s is not supported.' % k) |
| 289 if not isinstance(v, basestring): |
| 290 raise TypeError('value %r for option %s should be of type basestring.' % |
| 291 (v, k)) |
| 292 |
| 293 |
| 294 def http_time_to_posix(http_time): |
| 295 """Convert HTTP time format to posix time. |
| 296 |
| 297 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 |
| 298 for http time format. |
| 299 |
| 300 Args: |
| 301 http_time: time in RFC 2616 format. e.g. |
| 302 "Mon, 20 Nov 1995 19:12:08 GMT". |
| 303 |
| 304 Returns: |
| 305 A float of secs from unix epoch. |
| 306 """ |
| 307 if http_time is not None: |
| 308 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) |
| 309 |
| 310 |
| 311 def posix_time_to_http(posix_time): |
| 312 """Convert posix time to HTML header time format. |
| 313 |
| 314 Args: |
| 315 posix_time: unix time. |
| 316 |
| 317 Returns: |
| 318 A datatime str in RFC 2616 format. |
| 319 """ |
| 320 if posix_time: |
| 321 return email_utils.formatdate(posix_time, usegmt=True) |
| 322 |
| 323 |
| 324 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S' |
| 325 |
| 326 |
| 327 def dt_str_to_posix(dt_str): |
| 328 """format str to posix. |
| 329 |
| 330 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, |
| 331 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator |
| 332 between date and time when they are on the same line. |
| 333 Z indicates UTC (zero meridian). |
| 334 |
| 335 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html |
| 336 |
| 337 This is used to parse LastModified node from GCS's GET bucket XML response. |
| 338 |
| 339 Args: |
| 340 dt_str: A datetime str. |
| 341 |
| 342 Returns: |
| 343 A float of secs from unix epoch. By posix definition, epoch is midnight |
| 344 1970/1/1 UTC. |
| 345 """ |
| 346 parsable, _ = dt_str.split('.') |
| 347 dt = datetime.datetime.strptime(parsable, _DT_FORMAT) |
| 348 return calendar.timegm(dt.utctimetuple()) |
| 349 |
| 350 |
| 351 def posix_to_dt_str(posix): |
| 352 """Reverse of str_to_datetime. |
| 353 |
| 354 This is used by GCS stub to generate GET bucket XML response. |
| 355 |
| 356 Args: |
| 357 posix: A float of secs from unix epoch. |
| 358 |
| 359 Returns: |
| 360 A datetime str. |
| 361 """ |
| 362 dt = datetime.datetime.utcfromtimestamp(posix) |
| 363 dt_str = dt.strftime(_DT_FORMAT) |
| 364 return dt_str + '.000Z' |
| 365 |
| 366 |
| 367 def local_run(): |
| 368 """Whether we should hit GCS dev appserver stub.""" |
| 369 server_software = os.environ.get('SERVER_SOFTWARE') |
| 370 if server_software is None: |
| 371 return True |
| 372 if 'remote_api' in server_software: |
| 373 return False |
| 374 if server_software.startswith(('Development', 'testutil')): |
| 375 return True |
| 376 return False |
| 377 |
| 378 |
| 379 def local_api_url(): |
| 380 """Return URL for GCS emulation on dev appserver.""" |
| 381 return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) |
| 382 |
| 383 |
| 384 def memory_usage(method): |
| 385 """Log memory usage before and after a method.""" |
| 386 def wrapper(*args, **kwargs): |
| 387 logging.info('Memory before method %s is %s.', |
| 388 method.__name__, runtime.memory_usage().current()) |
| 389 result = method(*args, **kwargs) |
| 390 logging.info('Memory after method %s is %s', |
| 391 method.__name__, runtime.memory_usage().current()) |
| 392 return result |
| 393 return wrapper |
| 394 |
| 395 |
| 396 def _add_ns(tagname): |
| 397 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, |
| 398 'tag': tagname} |
| 399 |
| 400 |
| 401 _T_CONTENTS = _add_ns('Contents') |
| 402 _T_LAST_MODIFIED = _add_ns('LastModified') |
| 403 _T_ETAG = _add_ns('ETag') |
| 404 _T_KEY = _add_ns('Key') |
| 405 _T_SIZE = _add_ns('Size') |
| 406 _T_PREFIX = _add_ns('Prefix') |
| 407 _T_COMMON_PREFIXES = _add_ns('CommonPrefixes') |
| 408 _T_NEXT_MARKER = _add_ns('NextMarker') |
| 409 _T_IS_TRUNCATED = _add_ns('IsTruncated') |
| OLD | NEW |