OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 | |
6 from base64 import b64decode | |
7 from itertools import izip | |
8 import json | |
9 import logging | |
10 import posixpath | |
11 import time | |
12 import traceback | |
13 | |
14 from appengine_url_fetcher import AppEngineUrlFetcher | |
15 from appengine_wrappers import IsDownloadError, app_identity | |
16 from docs_server_utils import StringIdentity | |
17 from environment import IsDevServer | |
18 from file_system import (FileNotFoundError, | |
19 FileSystem, | |
20 FileSystemError, | |
21 FileSystemThrottledError, | |
22 StatInfo) | |
23 from future import All, Future | |
24 from path_util import AssertIsValid, IsDirectory, ToDirectory | |
25 from third_party.json_schema_compiler.memoize import memoize | |
26 from url_constants import (GITILES_BASE, | |
27 GITILES_SRC_ROOT, | |
28 GITILES_BRANCHES_PATH, | |
29 GITILES_OAUTH2_SCOPE) | |
30 | |
31 | |
32 _JSON_FORMAT = '?format=JSON' | |
33 _TEXT_FORMAT = '?format=TEXT' | |
34 _AUTH_PATH_PREFIX = '/a' | |
35 | |
36 | |
37 def _ParseGitilesJson(json_data): | |
38 '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON | |
39 data coming from Gitiles views. | |
40 ''' | |
41 return json.loads(json_data[json_data.find('{'):]) | |
42 | |
43 | |
44 def _CreateStatInfo(json_data): | |
45 '''Returns a StatInfo object comprised of the tree ID for |json_data|, | |
46 as well as the tree IDs for the entries in |json_data|. | |
47 ''' | |
48 tree = _ParseGitilesJson(json_data) | |
49 return StatInfo(tree['id'], | |
50 dict((e['name'], e['id']) for e in tree['entries'])) | |
51 | |
52 | |
53 class GitilesFileSystem(FileSystem): | |
54 '''Class to fetch filesystem data from the Chromium project's gitiles | |
55 service. | |
56 ''' | |
57 _logged_tokens = set() | |
58 | |
59 @classmethod | |
60 def Create(cls, branch='master', commit=None): | |
61 token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) | |
62 | |
63 # Log the access token (once per token) so that it can be sneakily re-used | |
64 # in development. | |
65 if token not in cls._logged_tokens: | |
66 logging.info('Got token %s for scope %s' % (token, GITILES_OAUTH2_SCOPE)) | |
67 cls._logged_tokens.add(token) | |
68 | |
69 # Only include forced-auth (/a/) in the Gitiles URL if we have a token and | |
70 # this is not the development server. | |
71 path_prefix = ('' if token is None or IsDevServer() | |
72 else _AUTH_PATH_PREFIX) | |
73 if commit: | |
74 base_url = '%s%s/%s/%s' % ( | |
75 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit) | |
76 elif branch is 'master': | |
77 base_url = '%s%s/%s/master' % ( | |
78 GITILES_BASE, path_prefix, GITILES_SRC_ROOT) | |
79 else: | |
80 base_url = '%s%s/%s/%s/%s' % ( | |
81 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, | |
82 GITILES_BRANCHES_PATH, branch) | |
83 return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit) | |
84 | |
85 def __init__(self, fetcher, base_url, branch, commit): | |
86 self._fetcher = fetcher | |
87 self._base_url = base_url | |
88 self._branch = branch | |
89 self._commit = commit | |
90 | |
91 def _FetchAsync(self, url): | |
92 '''Convenience wrapper for fetcher.FetchAsync, so callers don't | |
93 need to use posixpath.join. | |
94 ''' | |
95 AssertIsValid(url) | |
96 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) | |
97 return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url), | |
98 access_token=access_token) | |
99 | |
100 def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False): | |
101 '''Returns a future to cleanly resolve |fetch_future|. | |
102 ''' | |
103 def handle(e): | |
104 if skip_not_found and IsDownloadError(e): | |
105 return None | |
106 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError | |
107 raise exc_type('%s fetching %s for Get from %s: %s' % | |
108 (type(e).__name__, path, self._base_url, traceback.format_exc())) | |
109 | |
110 def get_content(result): | |
111 if result.status_code == 404: | |
112 if skip_not_found: | |
113 return None | |
114 raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % | |
115 (path, self._base_url)) | |
116 if result.status_code == 429: | |
117 logging.warning('Access throttled when fetching %s for Get from %s' % | |
118 (path, self._base_url)) | |
119 raise FileSystemThrottledError( | |
120 'Access throttled when fetching %s for Get from %s' % | |
121 (path, self._base_url)) | |
122 if result.status_code != 200: | |
123 raise FileSystemError( | |
124 'Got %s when fetching %s for Get from %s, content %s' % | |
125 (result.status_code, path, self._base_url, result.content)) | |
126 return result.content | |
127 | |
128 return fetch_future.Then(get_content, handle) | |
129 | |
130 def Read(self, paths, skip_not_found=False): | |
131 # Directory content is formatted in JSON in Gitiles as follows: | |
132 # | |
133 # { | |
134 # "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID. | |
135 # "entries": [ | |
136 # { | |
137 # "mode": 33188, | |
138 # "type": "blob", | |
139 # "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID. | |
140 # "name": ".gitignore" | |
141 # }, | |
142 # ... | |
143 # ] | |
144 # } | |
145 def list_dir(json_data): | |
146 entries = _ParseGitilesJson(json_data).get('entries', []) | |
147 return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries] | |
148 | |
149 def fixup_url_format(path): | |
150 # By default, Gitiles URLs display resources in HTML. To get resources | |
151 # suitable for our consumption, a '?format=' string must be appended to | |
152 # the URL. The format may be one of 'JSON' or 'TEXT' for directory or | |
153 # text resources, respectively. | |
154 return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT) | |
155 | |
156 # A list of tuples of the form (path, Future). | |
157 fetches = [(path, self._FetchAsync(fixup_url_format(path))) | |
158 for path in paths] | |
159 | |
160 def parse_contents(results): | |
161 value = {} | |
162 for path, content in izip(paths, results): | |
163 if content is None: | |
164 continue | |
165 # Gitiles encodes text content in base64 (see | |
166 # http://tools.ietf.org/html/rfc4648 for info about base64). | |
167 value[path] = (list_dir if IsDirectory(path) else b64decode)(content) | |
168 return value | |
169 | |
170 return All(self._ResolveFetchContent(path, future, skip_not_found) | |
171 for path, future in fetches).Then(parse_contents) | |
172 | |
173 def Refresh(self): | |
174 return Future(value=()) | |
175 | |
176 @memoize | |
177 def _GetCommitInfo(self, key): | |
178 '''Gets the commit information specified by |key|. | |
179 | |
180 The JSON view for commit info looks like: | |
181 { | |
182 "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID. | |
183 "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53", # Tree ID. | |
184 "parents": [ | |
185 "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID. | |
186 ], | |
187 "author": { | |
188 "name": "...", | |
189 "email": "...", | |
190 "time": "Tue Aug 12 17:17:21 2014" | |
191 }, | |
192 "committer": { | |
193 "name": "...", | |
194 "email": "...", | |
195 "time": "Tue Aug 12 17:18:28 2014" | |
196 }, | |
197 "message": "...", | |
198 "tree_diff": [...] | |
199 } | |
200 ''' | |
201 # Commit information for a branch is obtained by appending '?format=JSON' | |
202 # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is | |
203 # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves | |
204 # the root directory JSON content, whereas the former serves the branch | |
205 # commit info JSON content. | |
206 | |
207 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) | |
208 fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT, | |
209 access_token=access_token) | |
210 content_future = self._ResolveFetchContent(self._base_url, fetch_future) | |
211 return content_future.Then(lambda json: _ParseGitilesJson(json)[key]) | |
212 | |
213 def GetCommitID(self): | |
214 '''Returns a future that resolves to the commit ID for this branch. | |
215 ''' | |
216 return self._GetCommitInfo('commit') | |
217 | |
218 def GetPreviousCommitID(self): | |
219 '''Returns a future that resolves to the previous commit ID for this branch. | |
220 ''' | |
221 return self._GetCommitInfo('parents').Then(lambda parents: parents[0]) | |
222 | |
223 def StatAsync(self, path): | |
224 dir_, filename = posixpath.split(path) | |
225 def stat(content): | |
226 stat_info = _CreateStatInfo(content) | |
227 if stat_info.version is None: | |
228 raise FileSystemError('Failed to find version of dir %s' % dir_) | |
229 if IsDirectory(path): | |
230 return stat_info | |
231 if filename not in stat_info.child_versions: | |
232 raise FileNotFoundError( | |
233 '%s from %s was not in child versions for Stat' % (filename, path)) | |
234 return StatInfo(stat_info.child_versions[filename]) | |
235 | |
236 fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT) | |
237 return self._ResolveFetchContent(path, fetch_future).Then(stat) | |
238 | |
239 def GetIdentity(self): | |
240 if self._branch == 'master': | |
241 # A master FS always carries the same identity even if pinned to a commit. | |
242 str_id = 'master' | |
243 elif self._commit is not None: | |
244 str_id = self._commit | |
245 else: | |
246 str_id = '%s/%s' % (GITILES_BRANCHES_PATH, self._branch) | |
247 return '@'.join((self.__class__.__name__, StringIdentity( | |
248 '%s/%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT, str_id)))) | |
249 | |
250 def GetVersion(self): | |
251 return self._commit | |
OLD | NEW |