| OLD | NEW |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import json | 5 import json |
| 6 import logging | 6 import logging |
| 7 from cStringIO import StringIO | 7 from cStringIO import StringIO |
| 8 import posixpath | 8 import posixpath |
| 9 import sys | 9 import sys |
| 10 import traceback | 10 import traceback |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 116 return GithubFileSystem( | 116 return GithubFileSystem( |
| 117 path if path is not None else 'test_data/github_file_system', | 117 path if path is not None else 'test_data/github_file_system', |
| 118 'test_owner', | 118 'test_owner', |
| 119 repo, | 119 repo, |
| 120 object_store_creator or ObjectStoreCreator.ForTest(), | 120 object_store_creator or ObjectStoreCreator.ForTest(), |
| 121 fake_fetcher) | 121 fake_fetcher) |
| 122 | 122 |
| 123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): | 123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): |
| 124 self._repo_key = '%s/%s' % (owner, repo) | 124 self._repo_key = '%s/%s' % (owner, repo) |
| 125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) | 125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) |
| 126 | 126 self._username, self._password = _LoadCredentials(object_store_creator) |
| 127 self._blobstore = blobstore.AppEngineBlobstore() | 127 self._blobstore = blobstore.AppEngineBlobstore() |
| 128 # Lookup the chrome github api credentials. | |
| 129 self._username, self._password = _LoadCredentials(object_store_creator) | |
| 130 self._fetcher = Fetcher(self._repo_url) | 128 self._fetcher = Fetcher(self._repo_url) |
| 131 | 129 # Stores whether the github is up-to-date. This will either be True or |
| 132 # start_empty=False here to maintain the most recent stat across cron runs. | 130 # empty, the emptiness most likely due to this being a cron run. |
| 133 # Refresh() will always re-stat and use that to decide whether to download | 131 self._up_to_date_cache = object_store_creator.Create( |
| 134 # the zipball. | 132 GithubFileSystem, category='up-to-date') |
| 133 # Caches the zip file's stat. Overrides start_empty=False and use |
| 134 # |self._up_to_date_cache| to determine whether we need to refresh. |
| 135 self._stat_cache = object_store_creator.Create( | 135 self._stat_cache = object_store_creator.Create( |
| 136 GithubFileSystem, category='stat-cache', start_empty=False) | 136 GithubFileSystem, category='stat-cache', start_empty=False) |
| 137 | 137 |
| 138 # A Future to the github zip file. Normally this Future will resolve itself | 138 # Created lazily in |_EnsureRepoZip|. |
| 139 # by querying blobstore for the blob; however, Refresh() may decide to | 139 self._repo_zip = None |
| 140 # override this with a new blob if it's out of date. | |
| 141 def resolve_from_blobstore(): | |
| 142 blob = self._blobstore.Get(self._repo_url, _GITHUB_REPOS_NAMESPACE) | |
| 143 return _GithubZipFile.Create(self._repo_key, blob) if blob else None | |
| 144 self._repo_zip = Future(delegate=Gettable(resolve_from_blobstore)) | |
| 145 | 140 |
| 146 def _GetCachedVersion(self): | 141 def _EnsureRepoZip(self): |
| 147 '''Returns the currently cached version of the repository. The version is a | 142 '''Initializes |self._repo_zip| if it hasn't already been (i.e. if |
| 148 'sha' hash value. | 143 _EnsureRepoZip has never been called before). In that case |self._repo_zip| |
| 144 will be set to a Future of _GithubZipFile and the fetch process started, |
| 145 whether that be from a blobstore or if necessary all the way from GitHub. |
| 149 ''' | 146 ''' |
| 150 return self._stat_cache.Get(self._repo_key).Get() | 147 if self._repo_zip is not None: |
| 148 return |
| 151 | 149 |
| 152 def _SetCachedVersion(self, version): | 150 repo_key, repo_url, username, password = ( |
| 153 '''Sets the currently cached version of the repository. The version is a | 151 self._repo_key, self._repo_url, self._username, self._password) |
| 154 'sha' hash value. | |
| 155 ''' | |
| 156 self._stat_cache.Set(self._repo_key, version) | |
| 157 | 152 |
| 158 def _FetchLiveVersion(self): | 153 def fetch_from_blobstore(): |
| 154 '''Returns a Future which resolves to the _GithubZipFile for this repo |
| 155 fetched from blobstore. |
| 156 ''' |
| 157 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE) |
| 158 if blob is None: |
| 159 return FileSystemError.RaiseInFuture( |
| 160 'No blob for %s found in datastore' % repo_key) |
| 161 |
| 162 repo_zip = _GithubZipFile.Create(repo_key, blob) |
| 163 if repo_zip is None: |
| 164 return FileSystemError.RaiseInFuture( |
| 165 'Blob for %s was corrupted in blobstore!?' % repo_key) |
| 166 |
| 167 return Future(value=repo_zip) |
| 168 |
| 169 def fetch_from_github(version): |
| 170 '''Returns a Future which resolves to the _GithubZipFile for this repo |
| 171 fetched new from GitHub, then writes it to blobstore and |version| to the |
| 172 stat caches. |
| 173 ''' |
| 174 github_future = self._fetcher.FetchAsync( |
| 175 'zipball', username=username, password=password) |
| 176 def resolve(): |
| 177 try: |
| 178 blob = github_future.Get().content |
| 179 except urlfetch.DownloadError: |
| 180 raise FileSystemError('Failed to download repo %s file from %s' % |
| 181 (repo_key, repo_url)) |
| 182 |
| 183 repo_zip = _GithubZipFile.Create(repo_key, blob) |
| 184 if repo_zip is None: |
| 185 raise FileSystemError('Blob for %s was fetched corrupted from %s' % |
| 186 (repo_key, repo_url)) |
| 187 |
| 188 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) |
| 189 self._up_to_date_cache.Set(repo_key, True) |
| 190 self._stat_cache.Set(repo_key, version) |
| 191 return repo_zip |
| 192 return Future(delegate=Gettable(resolve)) |
| 193 |
| 194 # To decide whether we need to re-stat, and from there whether to re-fetch, |
| 195 # make use of ObjectStore's start-empty configuration. If |
| 196 # |object_store_creator| is configured to start empty then our creator |
| 197 # wants to refresh (e.g. running a cron), so fetch the live stat from |
| 198 # GitHub. If the stat hasn't changed since last time then no reason to |
| 199 # re-fetch from GitHub, just take from blobstore. |
| 200 |
| 201 if self._up_to_date_cache.Get(repo_key).Get() is None: |
| 202 # This is either a cron or an instance where a cron has never been run. |
| 203 cached_version = self._stat_cache.Get(repo_key).Get() |
| 204 live_version = self._FetchLiveVersion(username, password) |
| 205 if cached_version != live_version: |
| 206 # Note: branch intentionally triggered if |cached_version| is None. |
| 207 logging.info('%s has changed, fetching from GitHub.' % repo_url) |
| 208 self._repo_zip = fetch_from_github(live_version) |
| 209 else: |
| 210 # Already up to date. Fetch from blobstore. No need to set up-to-date |
| 211 # to True here since it'll already be set for instances, and it'll |
| 212 # never be set for crons. |
| 213 logging.info('%s is up to date.' % repo_url) |
| 214 self._repo_zip = fetch_from_blobstore() |
| 215 else: |
| 216 # Instance where cron has been run. It should be in blobstore. |
| 217 self._repo_zip = fetch_from_blobstore() |
| 218 |
| 219 assert self._repo_zip is not None |
| 220 |
| 221 def _FetchLiveVersion(self, username, password): |
| 159 '''Fetches the current repository version from github.com and returns it. | 222 '''Fetches the current repository version from github.com and returns it. |
| 160 The version is a 'sha' hash value. | 223 The version is a 'sha' hash value. |
| 161 ''' | 224 ''' |
| 162 # TODO(kalman): Do this asynchronously (use FetchAsync). | 225 # TODO(kalman): Do this asynchronously (use FetchAsync). |
| 163 result = self._fetcher.Fetch( | 226 result = self._fetcher.Fetch( |
| 164 'commits/HEAD', username=self._username, password=self._password) | 227 'commits/HEAD', username=username, password=password) |
| 165 | 228 |
| 166 try: | 229 try: |
| 167 return json.loads(result.content)['commit']['tree']['sha'] | 230 return json.loads(result.content)['sha'] |
| 168 except (KeyError, ValueError): | 231 except (KeyError, ValueError): |
| 169 raise FileSystemError('Error parsing JSON from repo %s: %s' % | 232 raise FileSystemError('Error parsing JSON from repo %s: %s' % |
| 170 (self._repo_url, traceback.format_exc())) | 233 (self._repo_url, traceback.format_exc())) |
| 171 | 234 |
| 172 def Refresh(self): | 235 def Refresh(self): |
| 173 '''Compares the cached and live stat versions to see if the cached | 236 return self.ReadSingle('') |
| 174 repository is out of date. If it is, an async fetch is started and a | |
| 175 Future is returned. When this Future is evaluated, the fetch will be | |
| 176 completed and the results cached. | |
| 177 | |
| 178 If no update is needed, None will be returned. | |
| 179 ''' | |
| 180 version = self._FetchLiveVersion() | |
| 181 if version == self._GetCachedVersion(): | |
| 182 logging.info('%s is up to date.' % self._repo_url) | |
| 183 # By default this Future will load the blob from datastore. | |
| 184 return self._repo_zip | |
| 185 | |
| 186 logging.info('%s has changed. Re-fetching.' % self._repo_url) | |
| 187 fetch = self._fetcher.FetchAsync( | |
| 188 'zipball', username=self._username, password=self._password) | |
| 189 | |
| 190 def resolve(): | |
| 191 '''Completes |fetch| and stores the results in blobstore. | |
| 192 ''' | |
| 193 repo_zip_url = self._repo_url + '/zipball' | |
| 194 try: | |
| 195 blob = fetch.Get().content | |
| 196 except urlfetch.DownloadError: | |
| 197 raise FileSystemError( | |
| 198 '%s: Failed to download zip file from repository %s' % repo_zip_url) | |
| 199 | |
| 200 repo_zip = _GithubZipFile.Create(self._repo_key, blob) | |
| 201 if repo_zip is None: | |
| 202 raise FileSystemError('%s: failed to create zip' % repo_zip_url) | |
| 203 | |
| 204 # Success. Update blobstore and the version. | |
| 205 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) | |
| 206 self._SetCachedVersion(version) | |
| 207 return repo_zip | |
| 208 | |
| 209 self._repo_zip = Future(delegate=Gettable(resolve)) | |
| 210 return self._repo_zip | |
| 211 | 237 |
| 212 def Read(self, paths, binary=False): | 238 def Read(self, paths, binary=False): |
| 213 '''Returns a directory mapping |paths| to the contents of the file at each | 239 '''Returns a directory mapping |paths| to the contents of the file at each |
| 214 path. If path ends with a '/', it is treated as a directory and is mapped to | 240 path. If path ends with a '/', it is treated as a directory and is mapped to |
| 215 a list of filenames in that directory. | 241 a list of filenames in that directory. |
| 216 | 242 |
| 217 |binary| is ignored. | 243 |binary| is ignored. |
| 218 ''' | 244 ''' |
| 245 self._EnsureRepoZip() |
| 219 def resolve(): | 246 def resolve(): |
| 220 repo_zip = self._repo_zip.Get() | 247 repo_zip = self._repo_zip.Get() |
| 221 if repo_zip is None: | |
| 222 raise FileNotFoundError('"%s" has not been Refreshed' % self._repo_key) | |
| 223 reads = {} | 248 reads = {} |
| 224 for path in paths: | 249 for path in paths: |
| 225 if path not in repo_zip.Paths(): | 250 if path not in repo_zip.Paths(): |
| 226 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) | 251 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) |
| 227 if path == '' or path.endswith('/'): | 252 if path == '' or path.endswith('/'): |
| 228 reads[path] = repo_zip.List(path) | 253 reads[path] = repo_zip.List(path) |
| 229 else: | 254 else: |
| 230 reads[path] = repo_zip.Read(path) | 255 reads[path] = repo_zip.Read(path) |
| 231 return reads | 256 return reads |
| 232 | |
| 233 # Delay reading until after self._repo_zip has finished fetching. | |
| 234 return Future(delegate=Gettable(resolve)) | 257 return Future(delegate=Gettable(resolve)) |
| 235 | 258 |
| 236 def Stat(self, path): | 259 def Stat(self, path): |
| 237 '''Stats |path| returning its version as as StatInfo object. If |path| ends | 260 '''Stats |path| returning its version as as StatInfo object. If |path| ends |
| 238 with a '/', it is assumed to be a directory and the StatInfo object returned | 261 with a '/', it is assumed to be a directory and the StatInfo object returned |
| 239 includes child_versions for all paths in the directory. | 262 includes child_versions for all paths in the directory. |
| 240 | 263 |
| 241 File paths do not include the name of the zip file, which is arbitrary and | 264 File paths do not include the name of the zip file, which is arbitrary and |
| 242 useless to consumers. | 265 useless to consumers. |
| 243 | 266 |
| 244 Because the repository will only be downloaded once per server version, all | 267 Because the repository will only be downloaded once per server version, all |
| 245 stat versions are always 0. | 268 stat versions are always 0. |
| 246 ''' | 269 ''' |
| 270 self._EnsureRepoZip() |
| 247 repo_zip = self._repo_zip.Get() | 271 repo_zip = self._repo_zip.Get() |
| 248 if repo_zip is None: | |
| 249 raise FileNotFoundError('"%s" has never been Refreshed' % self._repo_key) | |
| 250 | 272 |
| 251 if path not in repo_zip.Paths(): | 273 if path not in repo_zip.Paths(): |
| 252 raise FileNotFoundError('"%s" does not contain file "%s"' % | 274 raise FileNotFoundError('"%s" does not contain file "%s"' % |
| 253 (self._repo_key, path)) | 275 (self._repo_key, path)) |
| 254 | 276 |
| 255 version = self._GetCachedVersion() | 277 version = self._stat_cache.Get(self._repo_key).Get() |
| 256 assert version, ('There was a zipball in datastore; there should be a ' | 278 assert version is not None, ('There was a zipball in datastore; there ' |
| 257 'version cached for it') | 279 'should be a version cached for it') |
| 258 | 280 |
| 259 stat_info = StatInfo(version) | 281 stat_info = StatInfo(version) |
| 260 if path == '' or path.endswith('/'): | 282 if path == '' or path.endswith('/'): |
| 261 stat_info.child_versions = dict((p, StatInfo(version)) | 283 stat_info.child_versions = dict((p, StatInfo(version)) |
| 262 for p in repo_zip.List(path)) | 284 for p in repo_zip.List(path)) |
| 263 return stat_info | 285 return stat_info |
| 264 | 286 |
| 265 def GetIdentity(self): | 287 def GetIdentity(self): |
| 266 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) | 288 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) |
| 267 | 289 |
| 268 def __repr__(self): | 290 def __repr__(self): |
| 269 return '%s(key=%s, url=%s)' % (type(self).__name__, | 291 return '%s(key=%s, url=%s)' % (type(self).__name__, |
| 270 self._repo_key, | 292 self._repo_key, |
| 271 self._repo_url) | 293 self._repo_url) |
| OLD | NEW |