| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2012 Google Inc. All Rights Reserved. |
| 2 |
| 3 """Helpers shared by cloudstorage_stub and cloudstorage_api.""" |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 __all__ = ['CS_XML_NS', |
| 10 'CSFileStat', |
| 11 'dt_str_to_posix', |
| 12 'local_api_url', |
| 13 'LOCAL_GCS_ENDPOINT', |
| 14 'local_run', |
| 15 'get_access_token', |
| 16 'get_metadata', |
| 17 'GCSFileStat', |
| 18 'http_time_to_posix', |
| 19 'memory_usage', |
| 20 'posix_time_to_http', |
| 21 'posix_to_dt_str', |
| 22 'set_access_token', |
| 23 'validate_options', |
| 24 'validate_bucket_name', |
| 25 'validate_bucket_path', |
| 26 'validate_file_path', |
| 27 ] |
| 28 |
| 29 |
| 30 import calendar |
| 31 import datetime |
| 32 from email import utils as email_utils |
| 33 import logging |
| 34 import os |
| 35 import re |
| 36 |
| 37 try: |
| 38 from google.appengine.api import runtime |
| 39 except ImportError: |
| 40 from google.appengine.api import runtime |
| 41 |
| 42 |
| 43 _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' |
| 44 _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') |
| 45 _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') |
| 46 _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') |
| 47 _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') |
| 48 _GCS_METADATA = ['x-goog-meta-', |
| 49 'content-disposition', |
| 50 'cache-control', |
| 51 'content-encoding'] |
| 52 _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] |
| 53 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' |
| 54 LOCAL_GCS_ENDPOINT = '/_ah/gcs' |
| 55 _access_token = '' |
| 56 |
| 57 |
| 58 _MAX_GET_BUCKET_RESULT = 1000 |
| 59 |
| 60 |
| 61 def set_access_token(access_token): |
| 62 """Set the shared access token to authenticate with Google Cloud Storage. |
| 63 |
| 64 When set, the library will always attempt to communicate with the |
| 65 real Google Cloud Storage with this token even when running on dev appserver. |
| 66 Note the token could expire so it's up to you to renew it. |
| 67 |
| 68 When absent, the library will automatically request and refresh a token |
| 69 on appserver, or when on dev appserver, talk to a Google Cloud Storage |
| 70 stub. |
| 71 |
| 72 Args: |
| 73 access_token: you can get one by run 'gsutil -d ls' and copy the |
| 74 str after 'Bearer'. |
| 75 """ |
| 76 global _access_token |
| 77 _access_token = access_token |
| 78 |
| 79 |
| 80 def get_access_token(): |
| 81 """Returns the shared access token.""" |
| 82 return _access_token |
| 83 |
| 84 |
| 85 class GCSFileStat(object): |
| 86 """Container for GCS file stat.""" |
| 87 |
| 88 def __init__(self, |
| 89 filename, |
| 90 st_size, |
| 91 etag, |
| 92 st_ctime, |
| 93 content_type=None, |
| 94 metadata=None, |
| 95 is_dir=False): |
| 96 """Initialize. |
| 97 |
| 98 For files, the non optional arguments are always set. |
| 99 For directories, only filename and is_dir is set. |
| 100 |
| 101 Args: |
| 102 filename: a Google Cloud Storage filename of form '/bucket/filename'. |
| 103 st_size: file size in bytes. long compatible. |
| 104 etag: hex digest of the md5 hash of the file's content. str. |
| 105 st_ctime: posix file creation time. float compatible. |
| 106 content_type: content type. str. |
| 107 metadata: a str->str dict of user specified options when creating |
| 108 the file. Possible keys are x-goog-meta-, content-disposition, |
| 109 content-encoding, and cache-control. |
| 110 is_dir: True if this represents a directory. False if this is a real file. |
| 111 """ |
| 112 self.filename = filename |
| 113 self.is_dir = is_dir |
| 114 self.st_size = None |
| 115 self.st_ctime = None |
| 116 self.etag = None |
| 117 self.content_type = content_type |
| 118 self.metadata = metadata |
| 119 |
| 120 if not is_dir: |
| 121 self.st_size = long(st_size) |
| 122 self.st_ctime = float(st_ctime) |
| 123 if etag[0] == '"' and etag[-1] == '"': |
| 124 etag = etag[1:-1] |
| 125 self.etag = etag |
| 126 |
| 127 def __repr__(self): |
| 128 if self.is_dir: |
| 129 return '(directory: %s)' % self.filename |
| 130 |
| 131 return ( |
| 132 '(filename: %(filename)s, st_size: %(st_size)s, ' |
| 133 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' |
| 134 'content_type: %(content_type)s, ' |
| 135 'metadata: %(metadata)s)' % |
| 136 dict(filename=self.filename, |
| 137 st_size=self.st_size, |
| 138 st_ctime=self.st_ctime, |
| 139 etag=self.etag, |
| 140 content_type=self.content_type, |
| 141 metadata=self.metadata)) |
| 142 |
| 143 def __cmp__(self, other): |
| 144 if not isinstance(other, self.__class__): |
| 145 raise ValueError('Argument to cmp must have the same type. ' |
| 146 'Expect %s, got %s', self.__class__.__name__, |
| 147 other.__class__.__name__) |
| 148 if self.filename > other.filename: |
| 149 return 1 |
| 150 elif self.filename < other.filename: |
| 151 return -1 |
| 152 return 0 |
| 153 |
| 154 def __hash__(self): |
| 155 if self.etag: |
| 156 return hash(self.etag) |
| 157 return hash(self.filename) |
| 158 |
| 159 |
| 160 CSFileStat = GCSFileStat |
| 161 |
| 162 |
| 163 def get_metadata(headers): |
| 164 """Get user defined options from HTTP response headers.""" |
| 165 return dict((k, v) for k, v in headers.iteritems() |
| 166 if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) |
| 167 |
| 168 |
| 169 def validate_bucket_name(name): |
| 170 """Validate a Google Storage bucket name. |
| 171 |
| 172 Args: |
| 173 name: a Google Storage bucket name with no prefix or suffix. |
| 174 |
| 175 Raises: |
| 176 ValueError: if name is invalid. |
| 177 """ |
| 178 _validate_path(name) |
| 179 if not _GCS_BUCKET_REGEX.match(name): |
| 180 raise ValueError('Bucket should be 3-63 characters long using only a-z,' |
| 181 '0-9, underscore, dash or dot but got %s' % name) |
| 182 |
| 183 |
| 184 def validate_bucket_path(path): |
| 185 """Validate a Google Cloud Storage bucket path. |
| 186 |
| 187 Args: |
| 188 path: a Google Storage bucket path. It should have form '/bucket'. |
| 189 |
| 190 Raises: |
| 191 ValueError: if path is invalid. |
| 192 """ |
| 193 _validate_path(path) |
| 194 if not _GCS_BUCKET_PATH_REGEX.match(path): |
| 195 raise ValueError('Bucket should have format /bucket ' |
| 196 'but got %s' % path) |
| 197 |
| 198 |
| 199 def validate_file_path(path): |
| 200 """Validate a Google Cloud Storage file path. |
| 201 |
| 202 Args: |
| 203 path: a Google Storage file path. It should have form '/bucket/filename'. |
| 204 |
| 205 Raises: |
| 206 ValueError: if path is invalid. |
| 207 """ |
| 208 _validate_path(path) |
| 209 if not _GCS_FULLPATH_REGEX.match(path): |
| 210 raise ValueError('Path should have format /bucket/filename ' |
| 211 'but got %s' % path) |
| 212 |
| 213 |
| 214 def _process_path_prefix(path_prefix): |
| 215 """Validate and process a Google Cloud Stoarge path prefix. |
| 216 |
| 217 Args: |
| 218 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' |
| 219 or '/bucket/' or '/bucket'. |
| 220 |
| 221 Raises: |
| 222 ValueError: if path is invalid. |
| 223 |
| 224 Returns: |
| 225 a tuple of /bucket and prefix. prefix can be None. |
| 226 """ |
| 227 _validate_path(path_prefix) |
| 228 if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): |
| 229 raise ValueError('Path prefix should have format /bucket, /bucket/, ' |
| 230 'or /bucket/prefix but got %s.' % path_prefix) |
| 231 bucket_name_end = path_prefix.find('/', 1) |
| 232 bucket = path_prefix |
| 233 prefix = None |
| 234 if bucket_name_end != -1: |
| 235 bucket = path_prefix[:bucket_name_end] |
| 236 prefix = path_prefix[bucket_name_end + 1:] or None |
| 237 return bucket, prefix |
| 238 |
| 239 |
| 240 def _validate_path(path): |
| 241 """Basic validation of Google Storage paths. |
| 242 |
| 243 Args: |
| 244 path: a Google Storage path. It should have form '/bucket/filename' |
| 245 or '/bucket'. |
| 246 |
| 247 Raises: |
| 248 ValueError: if path is invalid. |
| 249 TypeError: if path is not of type basestring. |
| 250 """ |
| 251 if not path: |
| 252 raise ValueError('Path is empty') |
| 253 if not isinstance(path, basestring): |
| 254 raise TypeError('Path should be a string but is %s (%s).' % |
| 255 (path.__class__, path)) |
| 256 |
| 257 |
| 258 def validate_options(options): |
| 259 """Validate Google Cloud Storage options. |
| 260 |
| 261 Args: |
| 262 options: a str->basestring dict of options to pass to Google Cloud Storage. |
| 263 |
| 264 Raises: |
| 265 ValueError: if option is not supported. |
| 266 TypeError: if option is not of type str or value of an option |
| 267 is not of type basestring. |
| 268 """ |
| 269 if not options: |
| 270 return |
| 271 |
| 272 for k, v in options.iteritems(): |
| 273 if not isinstance(k, str): |
| 274 raise TypeError('option %r should be a str.' % k) |
| 275 if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): |
| 276 raise ValueError('option %s is not supported.' % k) |
| 277 if not isinstance(v, basestring): |
| 278 raise TypeError('value %r for option %s should be of type basestring.' % |
| 279 v, k) |
| 280 |
| 281 |
| 282 def http_time_to_posix(http_time): |
| 283 """Convert HTTP time format to posix time. |
| 284 |
| 285 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 |
| 286 for http time format. |
| 287 |
| 288 Args: |
| 289 http_time: time in RFC 2616 format. e.g. |
| 290 "Mon, 20 Nov 1995 19:12:08 GMT". |
| 291 |
| 292 Returns: |
| 293 A float of secs from unix epoch. |
| 294 """ |
| 295 if http_time is not None: |
| 296 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) |
| 297 |
| 298 |
| 299 def posix_time_to_http(posix_time): |
| 300 """Convert posix time to HTML header time format. |
| 301 |
| 302 Args: |
| 303 posix_time: unix time. |
| 304 |
| 305 Returns: |
| 306 A datatime str in RFC 2616 format. |
| 307 """ |
| 308 if posix_time: |
| 309 return email_utils.formatdate(posix_time, usegmt=True) |
| 310 |
| 311 |
| 312 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S' |
| 313 |
| 314 |
| 315 def dt_str_to_posix(dt_str): |
| 316 """format str to posix. |
| 317 |
| 318 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, |
| 319 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator |
| 320 between date and time when they are on the same line. |
| 321 Z indicates UTC (zero meridian). |
| 322 |
| 323 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html |
| 324 |
| 325 This is used to parse LastModified node from GCS's GET bucket XML response. |
| 326 |
| 327 Args: |
| 328 dt_str: A datetime str. |
| 329 |
| 330 Returns: |
| 331 A float of secs from unix epoch. By posix definition, epoch is midnight |
| 332 1970/1/1 UTC. |
| 333 """ |
| 334 parsable, _ = dt_str.split('.') |
| 335 dt = datetime.datetime.strptime(parsable, _DT_FORMAT) |
| 336 return calendar.timegm(dt.utctimetuple()) |
| 337 |
| 338 |
| 339 def posix_to_dt_str(posix): |
| 340 """Reverse of str_to_datetime. |
| 341 |
| 342 This is used by GCS stub to generate GET bucket XML response. |
| 343 |
| 344 Args: |
| 345 posix: A float of secs from unix epoch. |
| 346 |
| 347 Returns: |
| 348 A datetime str. |
| 349 """ |
| 350 dt = datetime.datetime.utcfromtimestamp(posix) |
| 351 dt_str = dt.strftime(_DT_FORMAT) |
| 352 return dt_str + '.000Z' |
| 353 |
| 354 |
| 355 def local_run(): |
| 356 """Whether we should hit GCS dev appserver stub.""" |
| 357 server_software = os.environ.get('SERVER_SOFTWARE') |
| 358 if server_software is None: |
| 359 return True |
| 360 if 'remote_api' in server_software: |
| 361 return False |
| 362 if server_software.startswith(('Development', 'testutil')): |
| 363 return True |
| 364 return False |
| 365 |
| 366 |
| 367 def local_api_url(): |
| 368 """Return URL for GCS emulation on dev appserver.""" |
| 369 return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) |
| 370 |
| 371 |
| 372 def memory_usage(method): |
| 373 """Log memory usage before and after a method.""" |
| 374 def wrapper(*args, **kwargs): |
| 375 logging.info('Memory before method %s is %s.', |
| 376 method.__name__, runtime.memory_usage().current()) |
| 377 result = method(*args, **kwargs) |
| 378 logging.info('Memory after method %s is %s', |
| 379 method.__name__, runtime.memory_usage().current()) |
| 380 return result |
| 381 return wrapper |
| 382 |
| 383 |
| 384 def _add_ns(tagname): |
| 385 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, |
| 386 'tag': tagname} |
| 387 |
| 388 |
| 389 _T_CONTENTS = _add_ns('Contents') |
| 390 _T_LAST_MODIFIED = _add_ns('LastModified') |
| 391 _T_ETAG = _add_ns('ETag') |
| 392 _T_KEY = _add_ns('Key') |
| 393 _T_SIZE = _add_ns('Size') |
| 394 _T_PREFIX = _add_ns('Prefix') |
| 395 _T_COMMON_PREFIXES = _add_ns('CommonPrefixes') |
| 396 _T_NEXT_MARKER = _add_ns('NextMarker') |
| 397 _T_IS_TRUNCATED = _add_ns('IsTruncated') |
| OLD | NEW |