OLD | NEW |
---|---|
1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 from base64 import b64decode | 6 from base64 import b64decode |
7 from itertools import izip | 7 from itertools import izip |
8 import logging | 8 import logging |
9 import json | 9 import json |
10 import logging | |
ahernandez
2014/09/15 23:58:02
Double import.
Ken Rockot(use gerrit already)
2014/09/16 00:24:31
Done.
| |
10 import posixpath | 11 import posixpath |
11 import time | 12 import time |
12 import traceback | 13 import traceback |
13 | 14 |
14 from appengine_url_fetcher import AppEngineUrlFetcher | 15 from appengine_url_fetcher import AppEngineUrlFetcher |
15 from appengine_wrappers import IsDownloadError, app_identity | 16 from appengine_wrappers import IsDownloadError, app_identity |
16 from docs_server_utils import StringIdentity | 17 from docs_server_utils import StringIdentity |
17 from file_system import (FileNotFoundError, | 18 from file_system import (FileNotFoundError, |
18 FileSystem, | 19 FileSystem, |
19 FileSystemError, | 20 FileSystemError, |
21 FileSystemThrottledError, | |
20 StatInfo) | 22 StatInfo) |
21 from future import All, Future | 23 from future import All, Future |
22 from path_util import AssertIsValid, IsDirectory, ToDirectory | 24 from path_util import AssertIsValid, IsDirectory, ToDirectory |
23 from third_party.json_schema_compiler.memoize import memoize | 25 from third_party.json_schema_compiler.memoize import memoize |
24 from url_constants import (GITILES_BASE, | 26 from url_constants import (GITILES_BASE, |
25 GITILES_BRANCH_BASE, | 27 GITILES_SRC_ROOT, |
28 GITILES_BRANCHES_PATH, | |
26 GITILES_OAUTH2_SCOPE) | 29 GITILES_OAUTH2_SCOPE) |
27 | 30 |
31 | |
28 _JSON_FORMAT = '?format=JSON' | 32 _JSON_FORMAT = '?format=JSON' |
29 _TEXT_FORMAT = '?format=TEXT' | 33 _TEXT_FORMAT = '?format=TEXT' |
34 _AUTH_PATH_PREFIX = '/a' | |
30 | 35 |
31 | 36 |
32 def _ParseGitilesJson(json_data): | 37 def _ParseGitilesJson(json_data): |
33 '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON | 38 '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON |
34 data coming from Gitiles views. | 39 data coming from Gitiles views. |
35 ''' | 40 ''' |
36 return json.loads(json_data[json_data.find('{'):]) | 41 return json.loads(json_data[json_data.find('{'):]) |
37 | 42 |
38 | 43 |
39 def _CreateStatInfo(json_data): | 44 def _CreateStatInfo(json_data): |
40 '''Returns a StatInfo object comprised of the tree ID for |json_data|, | 45 '''Returns a StatInfo object comprised of the tree ID for |json_data|, |
41 as well as the tree IDs for the entries in |json_data|. | 46 as well as the tree IDs for the entries in |json_data|. |
42 ''' | 47 ''' |
43 tree = _ParseGitilesJson(json_data) | 48 tree = _ParseGitilesJson(json_data) |
44 return StatInfo(tree['id'], | 49 return StatInfo(tree['id'], |
45 dict((e['name'], e['id']) for e in tree['entries'])) | 50 dict((e['name'], e['id']) for e in tree['entries'])) |
46 | 51 |
47 | 52 |
48 class GitilesFileSystem(FileSystem): | 53 class GitilesFileSystem(FileSystem): |
49 '''Class to fetch filesystem data from the Chromium project's gitiles | 54 '''Class to fetch filesystem data from the Chromium project's gitiles |
50 service. | 55 service. |
51 ''' | 56 ''' |
52 @staticmethod | 57 @staticmethod |
53 def Create(branch='master', commit=None): | 58 def Create(branch='master', commit=None): |
59 token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) | |
60 path_prefix = '' if token is None else _AUTH_PATH_PREFIX | |
54 if commit: | 61 if commit: |
55 base_url = '%s/%s' % (GITILES_BASE, commit) | 62 base_url = '%s%s/%s/%s' % ( |
63 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit) | |
56 elif branch is 'master': | 64 elif branch is 'master': |
57 base_url = '%s/master' % GITILES_BASE | 65 base_url = '%s%s/%s/master' % ( |
66 GITILES_BASE, path_prefix, GITILES_SRC_ROOT) | |
58 else: | 67 else: |
59 base_url = '%s/%s' % (GITILES_BRANCH_BASE, branch) | 68 base_url = '%s%s/%s/%s/%s' % ( |
69 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, | |
70 GITILES_BRANCHES_PATH, branch) | |
60 return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit) | 71 return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit) |
61 | 72 |
62 def __init__(self, fetcher, base_url, branch, commit): | 73 def __init__(self, fetcher, base_url, branch, commit): |
63 self._fetcher = fetcher | 74 self._fetcher = fetcher |
64 self._base_url = base_url | 75 self._base_url = base_url |
65 self._branch = branch | 76 self._branch = branch |
66 self._commit = commit | 77 self._commit = commit |
67 | 78 |
68 def _FetchAsync(self, url): | 79 def _FetchAsync(self, url): |
69 '''Convenience wrapper for fetcher.FetchAsync, so callers don't | 80 '''Convenience wrapper for fetcher.FetchAsync, so callers don't |
70 need to use posixpath.join. | 81 need to use posixpath.join. |
71 ''' | 82 ''' |
72 AssertIsValid(url) | 83 AssertIsValid(url) |
73 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) | 84 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) |
74 return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url), | 85 return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url), |
75 access_token=access_token) | 86 access_token=access_token) |
76 | 87 |
77 def _ResolveFetchContent(self, path, fetch_future, retry, | 88 def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False): |
78 skip_not_found=False): | |
79 '''Returns a future to cleanly resolve |fetch_future|. | 89 '''Returns a future to cleanly resolve |fetch_future|. |
80 ''' | 90 ''' |
81 def handle(e): | 91 def handle(e): |
82 if skip_not_found and IsDownloadError(e): | 92 if skip_not_found and IsDownloadError(e): |
83 return None | 93 return None |
84 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError | 94 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError |
85 raise exc_type('%s fetching %s for Get from %s: %s' % | 95 raise exc_type('%s fetching %s for Get from %s: %s' % |
86 (type(e).__name__, path, self._base_url, traceback.format_exc())) | 96 (type(e).__name__, path, self._base_url, traceback.format_exc())) |
87 | 97 |
88 def get_content(result): | 98 def get_content(result): |
89 if result.status_code == 404: | 99 if result.status_code == 404: |
90 if skip_not_found: | 100 if skip_not_found: |
91 return None | 101 return None |
92 raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % | 102 raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % |
93 (path, self._base_url)) | 103 (path, self._base_url)) |
94 if result.status_code == 429: | 104 if result.status_code == 429: |
95 logging.warning('Access throttled when fetching %s for Get from %s' % | 105 logging.warning('Access throttled when fetching %s for Get from %s' % |
96 (path, self._base_url)) | 106 (path, self._base_url)) |
97 time.sleep(30) | 107 raise FileSystemThrottledError( |
98 return retry().Then(get_content, handle) | 108 'Access throttled when fetching %s for Get from %s' % |
109 (path, self._base_url)) | |
99 if result.status_code != 200: | 110 if result.status_code != 200: |
100 raise FileSystemError( | 111 raise FileSystemError( |
101 'Got %s when fetching %s for Get from %s, content %s' % | 112 'Got %s when fetching %s for Get from %s, content %s' % |
102 (result.status_code, path, self._base_url, result.content)) | 113 (result.status_code, path, self._base_url, result.content)) |
103 return result.content | 114 return result.content |
104 | 115 |
105 return fetch_future.Then(get_content, handle) | 116 return fetch_future.Then(get_content, handle) |
106 | 117 |
107 def Read(self, paths, skip_not_found=False): | 118 def Read(self, paths, skip_not_found=False): |
108 # Directory content is formatted in JSON in Gitiles as follows: | 119 # Directory content is formatted in JSON in Gitiles as follows: |
(...skipping 15 matching lines...) Expand all Loading... | |
124 return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries] | 135 return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries] |
125 | 136 |
126 def fixup_url_format(path): | 137 def fixup_url_format(path): |
127 # By default, Gitiles URLs display resources in HTML. To get resources | 138 # By default, Gitiles URLs display resources in HTML. To get resources |
128 # suitable for our consumption, a '?format=' string must be appended to | 139 # suitable for our consumption, a '?format=' string must be appended to |
129 # the URL. The format may be one of 'JSON' or 'TEXT' for directory or | 140 # the URL. The format may be one of 'JSON' or 'TEXT' for directory or |
130 # text resources, respectively. | 141 # text resources, respectively. |
131 return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT) | 142 return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT) |
132 | 143 |
133 # A list of tuples of the form (path, Future). | 144 # A list of tuples of the form (path, Future). |
134 fetches = [] | 145 fetches = [(path, self._FetchAsync(fixup_url_format(path))) |
135 for path in paths: | 146 for path in paths] |
136 def make_fetch_future(): | |
137 return self._FetchAsync(fixup_url_format(path)) | |
138 fetches.append((path, make_fetch_future(), make_fetch_future)) | |
139 | 147 |
140 def parse_contents(results): | 148 def parse_contents(results): |
141 value = {} | 149 value = {} |
142 for path, content in izip(paths, results): | 150 for path, content in izip(paths, results): |
143 if content is None: | 151 if content is None: |
144 continue | 152 continue |
145 # Gitiles encodes text content in base64 (see | 153 # Gitiles encodes text content in base64 (see |
146 # http://tools.ietf.org/html/rfc4648 for info about base64). | 154 # http://tools.ietf.org/html/rfc4648 for info about base64). |
147 value[path] = (list_dir if IsDirectory(path) else b64decode)(content) | 155 value[path] = (list_dir if IsDirectory(path) else b64decode)(content) |
148 return value | 156 return value |
149 | 157 |
150 return All(self._ResolveFetchContent(path, future, factory, skip_not_found) | 158 return All(self._ResolveFetchContent(path, future, skip_not_found) |
151 for path, future, factory in fetches).Then(parse_contents) | 159 for path, future in fetches).Then(parse_contents) |
152 | 160 |
153 def Refresh(self): | 161 def Refresh(self): |
154 return Future(value=()) | 162 return Future(value=()) |
155 | 163 |
156 @memoize | 164 @memoize |
157 def _GetCommitInfo(self, key): | 165 def _GetCommitInfo(self, key): |
158 '''Gets the commit information specified by |key|. | 166 '''Gets the commit information specified by |key|. |
159 | 167 |
160 The JSON view for commit info looks like: | 168 The JSON view for commit info looks like: |
161 { | 169 { |
(...skipping 15 matching lines...) Expand all Loading... | |
177 "message": "...", | 185 "message": "...", |
178 "tree_diff": [...] | 186 "tree_diff": [...] |
179 } | 187 } |
180 ''' | 188 ''' |
181 # Commit information for a branch is obtained by appending '?format=JSON' | 189 # Commit information for a branch is obtained by appending '?format=JSON' |
182 # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is | 190 # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is |
183 # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves | 191 # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves |
184 # the root directory JSON content, whereas the former serves the branch | 192 # the root directory JSON content, whereas the former serves the branch |
185 # commit info JSON content. | 193 # commit info JSON content. |
186 | 194 |
187 def make_fetch_future(): | 195 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) |
188 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) | 196 fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT, |
189 return self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT, | 197 access_token=access_token) |
190 access_token = access_token) | 198 content_future = self._ResolveFetchContent(self._base_url, fetch_future) |
191 | |
192 fetch_future = make_fetch_future() | |
193 content_future = self._ResolveFetchContent(self._base_url, fetch_future, | |
194 make_fetch_future) | |
195 return content_future.Then(lambda json: _ParseGitilesJson(json)[key]) | 199 return content_future.Then(lambda json: _ParseGitilesJson(json)[key]) |
196 | 200 |
197 def GetCommitID(self): | 201 def GetCommitID(self): |
198 '''Returns a future that resolves to the commit ID for this branch. | 202 '''Returns a future that resolves to the commit ID for this branch. |
199 ''' | 203 ''' |
200 return self._GetCommitInfo('commit') | 204 return self._GetCommitInfo('commit') |
201 | 205 |
202 def GetPreviousCommitID(self): | 206 def GetPreviousCommitID(self): |
203 '''Returns a future that resolves to the previous commit ID for this branch. | 207 '''Returns a future that resolves to the previous commit ID for this branch. |
204 ''' | 208 ''' |
205 return self._GetCommitInfo('parents').Then(lambda parents: parents[0]) | 209 return self._GetCommitInfo('parents').Then(lambda parents: parents[0]) |
206 | 210 |
207 def StatAsync(self, path): | 211 def StatAsync(self, path): |
208 dir_, filename = posixpath.split(path) | 212 dir_, filename = posixpath.split(path) |
209 def stat(content): | 213 def stat(content): |
210 stat_info = _CreateStatInfo(content) | 214 stat_info = _CreateStatInfo(content) |
211 if stat_info.version is None: | 215 if stat_info.version is None: |
212 raise FileSystemError('Failed to find version of dir %s' % dir_) | 216 raise FileSystemError('Failed to find version of dir %s' % dir_) |
213 if IsDirectory(path): | 217 if IsDirectory(path): |
214 return stat_info | 218 return stat_info |
215 if filename not in stat_info.child_versions: | 219 if filename not in stat_info.child_versions: |
216 raise FileNotFoundError( | 220 raise FileNotFoundError( |
217 '%s from %s was not in child versions for Stat' % (filename, path)) | 221 '%s from %s was not in child versions for Stat' % (filename, path)) |
218 return StatInfo(stat_info.child_versions[filename]) | 222 return StatInfo(stat_info.child_versions[filename]) |
219 | 223 |
220 def make_fetch_future(): | 224 fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT) |
221 return self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT) | 225 return self._ResolveFetchContent(path, fetch_future).Then(stat) |
222 | |
223 fetch_future = make_fetch_future() | |
224 return self._ResolveFetchContent(path, fetch_future, | |
225 make_fetch_future).Then(stat) | |
226 | 226 |
227 def GetIdentity(self): | 227 def GetIdentity(self): |
228 # NOTE: Do not use commit information to create the string identity. | 228 # NOTE: Do not use commit information to create the string identity. |
229 # Doing so will mess up caching. | 229 # Doing so will mess up caching. |
230 if self._commit is None and self._branch != 'master': | 230 if self._commit is None and self._branch != 'master': |
231 str_id = GITILES_BRANCH_BASE | 231 str_id = '%s/%s/%s/%s' % ( |
232 GITILES_BASE, GITILES_SRC_ROOT, GITILES_BRANCHES_PATH, self._branch) | |
232 else: | 233 else: |
233 str_id = GITILES_BASE | 234 str_id = '%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT) |
234 return '@'.join((self.__class__.__name__, StringIdentity(str_id))) | 235 return '@'.join((self.__class__.__name__, StringIdentity(str_id))) |
OLD | NEW |