| OLD | NEW |
| (Empty) |
| 1 # Copyright 2010 Google Inc. All Rights Reserved. | |
| 2 # | |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 # you may not use this file except in compliance with the License. | |
| 5 # You may obtain a copy of the License at | |
| 6 # | |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 # | |
| 9 # Unless required by applicable law or agreed to in writing, software | |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 # See the License for the specific language governing permissions and | |
| 13 # limitations under the License. | |
| 14 | |
| 15 """An OAuth2 client library. | |
| 16 | |
| 17 This library provides a client implementation of the OAuth2 protocol (see | |
| 18 https://developers.google.com/storage/docs/authentication.html#oauth). | |
| 19 | |
| 20 **** Experimental API **** | |
| 21 | |
| 22 This module is experimental and is subject to modification or removal without | |
| 23 notice. | |
| 24 """ | |
| 25 | |
| 26 # This implementation is a wrapper around the oauth2client implementation | |
| 27 # that implements caching of access tokens independent of refresh | |
| 28 # tokens (in the python API client oauth2client, there is a single class that | |
| 29 # encapsulates both refresh and access tokens). | |
| 30 | |
| 31 | |
| 32 import cgi | |
| 33 import socks | |
| 34 import datetime | |
| 35 import errno | |
| 36 from hashlib import sha1 | |
| 37 import httplib2 | |
| 38 import logging | |
| 39 import multiprocessing | |
| 40 import os | |
| 41 import tempfile | |
| 42 import urllib | |
| 43 import urlparse | |
| 44 | |
| 45 from boto import cacerts | |
| 46 from boto import config | |
| 47 from gslib.util import CreateLock | |
| 48 from gslib.util import Retry | |
| 49 from oauth2client.client import AccessTokenRefreshError | |
| 50 from oauth2client.client import HAS_CRYPTO | |
| 51 from oauth2client.client import OAuth2Credentials | |
| 52 | |
| 53 if HAS_CRYPTO: | |
| 54 from oauth2client.client import SignedJwtAssertionCredentials | |
| 55 | |
| 56 try: | |
| 57 import json | |
| 58 except ImportError: | |
| 59 try: | |
| 60 # Try to import from django, should work on App Engine | |
| 61 from django.utils import simplejson as json | |
| 62 except ImportError: | |
| 63 # Try for simplejson | |
| 64 import simplejson as json | |
| 65 | |
| 66 global token_exchange_lock | |
| 67 def InitializeMultiprocessingVariables(): | |
| 68 """ | |
| 69 Perform necessary initialization - see | |
| 70 gslib.command.InitializeMultiprocessingVariables for an explanation of why | |
| 71 this is necessary. | |
| 72 """ | |
| 73 global token_exchange_lock | |
| 74 # Lock used for checking/exchanging refresh token so that a parallelized | |
| 75 # operation doesn't attempt concurrent refreshes. | |
| 76 token_exchange_lock = CreateLock() | |
| 77 | |
| 78 | |
| 79 LOG = logging.getLogger('oauth2_client') | |
| 80 | |
| 81 GSUTIL_DEFAULT_SCOPE = 'https://www.googleapis.com/auth/devstorage.full_control' | |
| 82 | |
| 83 | |
| 84 class TokenCache(object): | |
| 85 """Interface for OAuth2 token caches.""" | |
| 86 | |
| 87 def PutToken(self, key, value): | |
| 88 raise NotImplementedError | |
| 89 | |
| 90 def GetToken(self, key): | |
| 91 raise NotImplementedError | |
| 92 | |
| 93 | |
| 94 class NoopTokenCache(TokenCache): | |
| 95 """A stub implementation of TokenCache that does nothing.""" | |
| 96 | |
| 97 def PutToken(self, key, value): | |
| 98 pass | |
| 99 | |
| 100 def GetToken(self, key): | |
| 101 return None | |
| 102 | |
| 103 | |
| 104 class InMemoryTokenCache(TokenCache): | |
| 105 """An in-memory token cache. | |
| 106 | |
| 107 The cache is implemented by a python dict, and inherits the thread-safety | |
| 108 properties of dict. | |
| 109 """ | |
| 110 | |
| 111 def __init__(self): | |
| 112 super(InMemoryTokenCache, self).__init__() | |
| 113 self.cache = dict() | |
| 114 | |
| 115 def PutToken(self, key, value): | |
| 116 LOG.debug('InMemoryTokenCache.PutToken: key=%s', key) | |
| 117 self.cache[key] = value | |
| 118 | |
| 119 def GetToken(self, key): | |
| 120 value = self.cache.get(key, None) | |
| 121 LOG.debug('InMemoryTokenCache.GetToken: key=%s%s present', | |
| 122 key, ' not' if value is None else '') | |
| 123 return value | |
| 124 | |
| 125 | |
| 126 class FileSystemTokenCache(TokenCache): | |
| 127 """An implementation of a token cache that persists tokens on disk. | |
| 128 | |
| 129 Each token object in the cache is stored in serialized form in a separate | |
| 130 file. The cache file's name can be configured via a path pattern that is | |
| 131 parameterized by the key under which a value is cached and optionally the | |
| 132 current processes uid as obtained by os.getuid(). | |
| 133 | |
| 134 Since file names are generally publicly visible in the system, it is important | |
| 135 that the cache key does not leak information about the token's value. If | |
| 136 client code computes cache keys from token values, a cryptographically strong | |
| 137 one-way function must be used. | |
| 138 """ | |
| 139 | |
| 140 def __init__(self, path_pattern=None): | |
| 141 """Creates a FileSystemTokenCache. | |
| 142 | |
| 143 Args: | |
| 144 path_pattern: Optional string argument to specify the path pattern for | |
| 145 cache files. The argument should be a path with format placeholders | |
| 146 '%(key)s' and optionally '%(uid)s'. If the argument is omitted, the | |
| 147 default pattern | |
| 148 <tmpdir>/oauth2client-tokencache.%(uid)s.%(key)s | |
| 149 is used, where <tmpdir> is replaced with the system temp dir as | |
| 150 obtained from tempfile.gettempdir(). | |
| 151 """ | |
| 152 super(FileSystemTokenCache, self).__init__() | |
| 153 self.path_pattern = path_pattern | |
| 154 if not path_pattern: | |
| 155 self.path_pattern = os.path.join( | |
| 156 tempfile.gettempdir(), 'oauth2_client-tokencache.%(uid)s.%(key)s') | |
| 157 | |
| 158 def CacheFileName(self, key): | |
| 159 uid = '_' | |
| 160 try: | |
| 161 # os.getuid() doesn't seem to work in Windows | |
| 162 uid = str(os.getuid()) | |
| 163 except: | |
| 164 pass | |
| 165 return self.path_pattern % {'key': key, 'uid': uid} | |
| 166 | |
| 167 def PutToken(self, key, value): | |
| 168 """Serializes the value to the key's filename. | |
| 169 | |
| 170 To ensure that written tokens aren't leaked to a different users, we | |
| 171 a) unlink an existing cache file, if any (to ensure we don't fall victim | |
| 172 to symlink attacks and the like), | |
| 173 b) create a new file with O_CREAT | O_EXCL (to ensure nobody is trying to | |
| 174 race us) | |
| 175 If either of these steps fail, we simply give up (but log a warning). Not | |
| 176 caching access tokens is not catastrophic, and failure to create a file | |
| 177 can happen for either of the following reasons: | |
| 178 - someone is attacking us as above, in which case we want to default to | |
| 179 safe operation (not write the token); | |
| 180 - another legitimate process is racing us; in this case one of the two | |
| 181 will win and write the access token, which is fine; | |
| 182 - we don't have permission to remove the old file or write to the | |
| 183 specified directory, in which case we can't recover | |
| 184 | |
| 185 Args: | |
| 186 key: the hash key to store. | |
| 187 value: the access_token value to serialize. | |
| 188 """ | |
| 189 | |
| 190 cache_file = self.CacheFileName(key) | |
| 191 LOG.debug('FileSystemTokenCache.PutToken: key=%s, cache_file=%s', | |
| 192 key, cache_file) | |
| 193 try: | |
| 194 os.unlink(cache_file) | |
| 195 except: | |
| 196 # Ignore failure to unlink the file; if the file exists and can't be | |
| 197 # unlinked, the subsequent open with O_CREAT | O_EXCL will fail. | |
| 198 pass | |
| 199 | |
| 200 flags = os.O_RDWR | os.O_CREAT | os.O_EXCL | |
| 201 | |
| 202 # Accommodate Windows; stolen from python2.6/tempfile.py. | |
| 203 if hasattr(os, 'O_NOINHERIT'): | |
| 204 flags |= os.O_NOINHERIT | |
| 205 if hasattr(os, 'O_BINARY'): | |
| 206 flags |= os.O_BINARY | |
| 207 | |
| 208 try: | |
| 209 fd = os.open(cache_file, flags, 0600) | |
| 210 except (OSError, IOError) as e: | |
| 211 LOG.warning('FileSystemTokenCache.PutToken: ' | |
| 212 'Failed to create cache file %s: %s', cache_file, e) | |
| 213 return | |
| 214 f = os.fdopen(fd, 'w+b') | |
| 215 f.write(value.Serialize()) | |
| 216 f.close() | |
| 217 | |
| 218 def GetToken(self, key): | |
| 219 """Returns a deserialized access token from the key's filename.""" | |
| 220 value = None | |
| 221 cache_file = self.CacheFileName(key) | |
| 222 | |
| 223 try: | |
| 224 f = open(cache_file) | |
| 225 value = AccessToken.UnSerialize(f.read()) | |
| 226 f.close() | |
| 227 except (IOError, OSError) as e: | |
| 228 if e.errno != errno.ENOENT: | |
| 229 LOG.warning('FileSystemTokenCache.GetToken: ' | |
| 230 'Failed to read cache file %s: %s', cache_file, e) | |
| 231 except Exception as e: | |
| 232 LOG.warning('FileSystemTokenCache.GetToken: ' | |
| 233 'Failed to read cache file %s (possibly corrupted): %s', | |
| 234 cache_file, e) | |
| 235 | |
| 236 LOG.debug('FileSystemTokenCache.GetToken: key=%s%s present (cache_file=%s)', | |
| 237 key, ' not' if value is None else '', cache_file) | |
| 238 return value | |
| 239 | |
| 240 | |
| 241 class OAuth2Client(object): | |
| 242 """Common logic for OAuth2 clients.""" | |
| 243 | |
| 244 def __init__(self, cache_key_base, access_token_cache=None, | |
| 245 datetime_strategy=datetime.datetime, auth_uri=None, | |
| 246 token_uri=None, disable_ssl_certificate_validation=False, | |
| 247 proxy_host=None, proxy_port=None, ca_certs_file=None): | |
| 248 # datetime_strategy is used to invoke utcnow() on; it is injected into the | |
| 249 # constructor for unit testing purposes. | |
| 250 self.auth_uri = auth_uri | |
| 251 self.token_uri = token_uri | |
| 252 self.cache_key_base = cache_key_base | |
| 253 self.datetime_strategy = datetime_strategy | |
| 254 self.access_token_cache = access_token_cache or InMemoryTokenCache() | |
| 255 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation | |
| 256 self.ca_certs_file = ca_certs_file | |
| 257 if proxy_host and proxy_port: | |
| 258 self._proxy_info = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP, | |
| 259 proxy_host, | |
| 260 proxy_port, | |
| 261 proxy_rdns=True) | |
| 262 else: | |
| 263 self._proxy_info = None | |
| 264 | |
| 265 def CreateHttpRequest(self): | |
| 266 return httplib2.Http( | |
| 267 ca_certs=self.ca_certs_file, | |
| 268 disable_ssl_certificate_validation | |
| 269 = self.disable_ssl_certificate_validation, | |
| 270 proxy_info=self._proxy_info) | |
| 271 | |
| 272 def GetAccessToken(self): | |
| 273 """Obtains an access token for this client. | |
| 274 | |
| 275 This client's access token cache is first checked for an existing, | |
| 276 not-yet-expired access token. If none is found, the client obtains a fresh | |
| 277 access token from the OAuth2 provider's token endpoint. | |
| 278 | |
| 279 Returns: | |
| 280 The cached or freshly obtained AccessToken. | |
| 281 Raises: | |
| 282 AccessTokenRefreshError if an error occurs. | |
| 283 """ | |
| 284 # Ensure only one thread at a time attempts to get (and possibly refresh) | |
| 285 # the access token. This doesn't prevent concurrent refresh attempts across | |
| 286 # multiple gsutil instances, but at least protects against multiple threads | |
| 287 # simultaneously attempting to refresh when gsutil -m is used. | |
| 288 token_exchange_lock.acquire() | |
| 289 try: | |
| 290 cache_key = self.CacheKey() | |
| 291 LOG.debug('GetAccessToken: checking cache for key %s', cache_key) | |
| 292 access_token = self.access_token_cache.GetToken(cache_key) | |
| 293 LOG.debug('GetAccessToken: token from cache: %s', access_token) | |
| 294 if access_token is None or access_token.ShouldRefresh(): | |
| 295 LOG.debug('GetAccessToken: fetching fresh access token...') | |
| 296 access_token = self.FetchAccessToken() | |
| 297 LOG.debug('GetAccessToken: fresh access token: %s', access_token) | |
| 298 self.access_token_cache.PutToken(cache_key, access_token) | |
| 299 return access_token | |
| 300 finally: | |
| 301 token_exchange_lock.release() | |
| 302 | |
| 303 def CacheKey(self): | |
| 304 """Computes a cache key. | |
| 305 | |
| 306 The cache key is computed as the SHA1 hash of the refresh token for user | |
| 307 accounts, or the hash of the gs_service_client_id for service accounts, | |
| 308 which satisfies the FileSystemTokenCache requirement that cache keys do not | |
| 309 leak information about token values. | |
| 310 | |
| 311 Returns: | |
| 312 A hash key. | |
| 313 """ | |
| 314 h = sha1() | |
| 315 h.update(self.cache_key_base) | |
| 316 return h.hexdigest() | |
| 317 | |
| 318 def GetAuthorizationHeader(self): | |
| 319 """Gets the access token HTTP authorization header value. | |
| 320 | |
| 321 Returns: | |
| 322 The value of an Authorization HTTP header that authenticates | |
| 323 requests with an OAuth2 access token. | |
| 324 """ | |
| 325 return 'Bearer %s' % self.GetAccessToken().token | |
| 326 | |
| 327 | |
| 328 class OAuth2ServiceAccountClient(OAuth2Client): | |
| 329 | |
| 330 def __init__(self, client_id, private_key, password, | |
| 331 access_token_cache=None, auth_uri=None, token_uri=None, | |
| 332 datetime_strategy=datetime.datetime, | |
| 333 disable_ssl_certificate_validation=False, | |
| 334 proxy_host=None, proxy_port=None, ca_certs_file=None): | |
| 335 """Creates an OAuth2ServiceAccountClient. | |
| 336 | |
| 337 Args: | |
| 338 client_id: The OAuth2 client ID of this client. | |
| 339 private_key: The private key associated with this service account. | |
| 340 password: The private key password used for the crypto signer. | |
| 341 access_token_cache: An optional instance of a TokenCache. If omitted or | |
| 342 None, an InMemoryTokenCache is used. | |
| 343 auth_uri: The URI for OAuth2 authorization. | |
| 344 token_uri: The URI used to refresh access tokens. | |
| 345 datetime_strategy: datetime module strategy to use. | |
| 346 disable_ssl_certificate_validation: True if certifications should not be | |
| 347 validated. | |
| 348 proxy_host: An optional string specifying the host name of an HTTP proxy | |
| 349 to be used. | |
| 350 proxy_port: An optional int specifying the port number of an HTTP proxy | |
| 351 to be used. | |
| 352 ca_certs_file: The cacerts.txt file to use. | |
| 353 """ | |
| 354 super(OAuth2ServiceAccountClient, self).__init__( | |
| 355 cache_key_base=client_id, auth_uri=auth_uri, token_uri=token_uri, | |
| 356 access_token_cache=access_token_cache, | |
| 357 datetime_strategy=datetime_strategy, | |
| 358 disable_ssl_certificate_validation=disable_ssl_certificate_validation, | |
| 359 proxy_host=proxy_host, proxy_port=proxy_port, | |
| 360 ca_certs_file=ca_certs_file) | |
| 361 self.client_id = client_id | |
| 362 self.private_key = private_key | |
| 363 self.password = password | |
| 364 | |
| 365 def FetchAccessToken(self): | |
| 366 credentials = SignedJwtAssertionCredentials(self.client_id, | |
| 367 self.private_key, scope=GSUTIL_DEFAULT_SCOPE, | |
| 368 private_key_password=self.password) | |
| 369 http = self.CreateHttpRequest() | |
| 370 credentials.refresh(http) | |
| 371 return AccessToken(credentials.access_token, | |
| 372 credentials.token_expiry, datetime_strategy=self.datetime_strategy) | |
| 373 | |
| 374 | |
| 375 class GsAccessTokenRefreshError(Exception): | |
| 376 """Rate limiting error when exchanging refresh token for access token.""" | |
| 377 def __init__(self, e): | |
| 378 super(Exception, self).__init__(e) | |
| 379 | |
| 380 | |
| 381 class GsInvalidRefreshTokenError(Exception): | |
| 382 def __init__(self, e): | |
| 383 super(Exception, self).__init__(e) | |
| 384 | |
| 385 | |
| 386 class OAuth2UserAccountClient(OAuth2Client): | |
| 387 """An OAuth2 client.""" | |
| 388 | |
| 389 def __init__(self, token_uri, client_id, client_secret, refresh_token, | |
| 390 auth_uri=None, access_token_cache=None, | |
| 391 datetime_strategy=datetime.datetime, | |
| 392 disable_ssl_certificate_validation=False, | |
| 393 proxy_host=None, proxy_port=None, ca_certs_file=None): | |
| 394 """Creates an OAuth2UserAccountClient. | |
| 395 | |
| 396 Args: | |
| 397 token_uri: The URI used to refresh access tokens. | |
| 398 client_id: The OAuth2 client ID of this client. | |
| 399 client_secret: The OAuth2 client secret of this client. | |
| 400 refresh_token: The token used to refresh the access token. | |
| 401 auth_uri: The URI for OAuth2 authorization. | |
| 402 access_token_cache: An optional instance of a TokenCache. If omitted or | |
| 403 None, an InMemoryTokenCache is used. | |
| 404 datetime_strategy: datetime module strategy to use. | |
| 405 disable_ssl_certificate_validation: True if certifications should not be | |
| 406 validated. | |
| 407 proxy_host: An optional string specifying the host name of an HTTP proxy | |
| 408 to be used. | |
| 409 proxy_port: An optional int specifying the port number of an HTTP proxy | |
| 410 to be used. | |
| 411 ca_certs_file: The cacerts.txt file to use. | |
| 412 """ | |
| 413 super(OAuth2UserAccountClient, self).__init__( | |
| 414 cache_key_base=refresh_token, auth_uri=auth_uri, token_uri=token_uri, | |
| 415 access_token_cache=access_token_cache, | |
| 416 datetime_strategy=datetime_strategy, | |
| 417 disable_ssl_certificate_validation=disable_ssl_certificate_validation, | |
| 418 proxy_host=proxy_host, proxy_port=proxy_port, | |
| 419 ca_certs_file=ca_certs_file) | |
| 420 self.token_uri = token_uri | |
| 421 self.client_id = client_id | |
| 422 self.client_secret = client_secret | |
| 423 self.refresh_token = refresh_token | |
| 424 | |
| 425 @Retry(GsAccessTokenRefreshError, | |
| 426 tries=config.get('OAuth2', 'oauth2_refresh_retries', 6), | |
| 427 timeout_secs=1) | |
| 428 def FetchAccessToken(self): | |
| 429 """Fetches an access token from the provider's token endpoint. | |
| 430 | |
| 431 Fetches an access token from this client's OAuth2 provider's token endpoint. | |
| 432 | |
| 433 Returns: | |
| 434 The fetched AccessToken. | |
| 435 """ | |
| 436 try: | |
| 437 http = self.CreateHttpRequest() | |
| 438 credentials = OAuth2Credentials(None, self.client_id, self.client_secret, | |
| 439 self.refresh_token, None, self.token_uri, None) | |
| 440 credentials.refresh(http) | |
| 441 return AccessToken(credentials.access_token, | |
| 442 credentials.token_expiry, datetime_strategy=self.datetime_strategy) | |
| 443 except AccessTokenRefreshError, e: | |
| 444 if 'Invalid response 403' in e.message: | |
| 445 # This is the most we can do at the moment to accurately detect rate | |
| 446 # limiting errors since they come back as 403s with no further | |
| 447 # information. | |
| 448 raise GsAccessTokenRefreshError(e) | |
| 449 elif 'invalid_grant' in e.message: | |
| 450 LOG.info(""" | |
| 451 Attempted to retrieve an access token from an invalid refresh token. Two common | |
| 452 cases in which you will see this error are: | |
| 453 1. Your refresh token was revoked. | |
| 454 2. Your refresh token was typed incorrectly. | |
| 455 """) | |
| 456 raise GsInvalidRefreshTokenError(e) | |
| 457 else: | |
| 458 raise | |
| 459 | |
| 460 | |
| 461 class AccessToken(object): | |
| 462 """Encapsulates an OAuth2 access token.""" | |
| 463 | |
| 464 def __init__(self, token, expiry, datetime_strategy=datetime.datetime): | |
| 465 self.token = token | |
| 466 self.expiry = expiry | |
| 467 self.datetime_strategy = datetime_strategy | |
| 468 | |
| 469 @staticmethod | |
| 470 def UnSerialize(query): | |
| 471 """Creates an AccessToken object from its serialized form.""" | |
| 472 | |
| 473 def GetValue(d, key): | |
| 474 return (d.get(key, [None]))[0] | |
| 475 kv = cgi.parse_qs(query) | |
| 476 if not kv['token']: | |
| 477 return None | |
| 478 expiry = None | |
| 479 expiry_tuple = GetValue(kv, 'expiry') | |
| 480 if expiry_tuple: | |
| 481 try: | |
| 482 expiry = datetime.datetime( | |
| 483 *[int(n) for n in expiry_tuple.split(',')]) | |
| 484 except: | |
| 485 return None | |
| 486 return AccessToken(GetValue(kv, 'token'), expiry) | |
| 487 | |
| 488 def Serialize(self): | |
| 489 """Serializes this object as URI-encoded key-value pairs.""" | |
| 490 # There's got to be a better way to serialize a datetime. Unfortunately, | |
| 491 # there is no reliable way to convert into a unix epoch. | |
| 492 kv = {'token': self.token} | |
| 493 if self.expiry: | |
| 494 t = self.expiry | |
| 495 tupl = (t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond) | |
| 496 kv['expiry'] = ','.join([str(i) for i in tupl]) | |
| 497 return urllib.urlencode(kv) | |
| 498 | |
| 499 def ShouldRefresh(self, time_delta=300): | |
| 500 """Whether the access token needs to be refreshed. | |
| 501 | |
| 502 Args: | |
| 503 time_delta: refresh access token when it expires within time_delta secs. | |
| 504 | |
| 505 Returns: | |
| 506 True if the token is expired or about to expire, False if the | |
| 507 token should be expected to work. Note that the token may still | |
| 508 be rejected, e.g. if it has been revoked server-side. | |
| 509 """ | |
| 510 if self.expiry is None: | |
| 511 return False | |
| 512 return (self.datetime_strategy.utcnow() | |
| 513 + datetime.timedelta(seconds=time_delta) > self.expiry) | |
| 514 | |
| 515 def __eq__(self, other): | |
| 516 return self.token == other.token and self.expiry == other.expiry | |
| 517 | |
| 518 def __ne__(self, other): | |
| 519 return not self.__eq__(other) | |
| 520 | |
| 521 def __str__(self): | |
| 522 return 'AccessToken(token=%s, expiry=%sZ)' % (self.token, self.expiry) | |
| 523 | |
| OLD | NEW |