OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | 5 import logging |
6 import os | 6 import os |
7 from StringIO import StringIO | 7 from StringIO import StringIO |
8 import sys | |
9 | 8 |
10 from appengine_wrappers import webapp | 9 from appengine_wrappers import webapp |
11 from appengine_wrappers import memcache | 10 from appengine_wrappers import memcache |
12 from appengine_wrappers import urlfetch | 11 from appengine_wrappers import urlfetch |
13 | |
14 from api_data_source import APIDataSource | |
15 from api_list_data_source import APIListDataSource | |
16 from appengine_blobstore import AppEngineBlobstore | |
17 from in_memory_object_store import InMemoryObjectStore | |
18 from appengine_url_fetcher import AppEngineUrlFetcher | |
19 from branch_utility import BranchUtility | 12 from branch_utility import BranchUtility |
20 from example_zipper import ExampleZipper | |
21 from compiled_file_system import CompiledFileSystem | |
22 import compiled_file_system as compiled_fs | |
23 from github_file_system import GithubFileSystem | |
24 from intro_data_source import IntroDataSource | |
25 from known_issues_data_source import KnownIssuesDataSource | |
26 from local_file_system import LocalFileSystem | |
27 from memcache_file_system import MemcacheFileSystem | |
28 from reference_resolver import ReferenceResolver | |
29 from samples_data_source import SamplesDataSource | |
30 from server_instance import ServerInstance | 13 from server_instance import ServerInstance |
31 from sidenav_data_source import SidenavDataSource | 14 import svn_constants |
32 from subversion_file_system import SubversionFileSystem | 15 import time |
33 from template_data_source import TemplateDataSource | |
34 from third_party.json_schema_compiler.model import UnixName | |
35 import url_constants | |
36 | |
37 # Increment this version to force the server to reload all pages in the first | |
38 # cron job that is run. | |
39 _VERSION = 1 | |
40 | 16 |
41 # The default channel to serve docs for if no channel is specified. | 17 # The default channel to serve docs for if no channel is specified. |
42 _DEFAULT_CHANNEL = 'stable' | 18 _DEFAULT_CHANNEL = 'stable' |
43 | 19 |
44 BRANCH_UTILITY_MEMCACHE = InMemoryObjectStore('branch_utility') | |
45 BRANCH_UTILITY = BranchUtility(url_constants.OMAHA_PROXY_URL, | |
46 AppEngineUrlFetcher(None), | |
47 BRANCH_UTILITY_MEMCACHE) | |
48 | |
49 GITHUB_MEMCACHE = InMemoryObjectStore('github') | |
50 GITHUB_FILE_SYSTEM = GithubFileSystem( | |
51 AppEngineUrlFetcher(url_constants.GITHUB_URL), | |
52 GITHUB_MEMCACHE, | |
53 AppEngineBlobstore()) | |
54 GITHUB_COMPILED_FILE_SYSTEM = CompiledFileSystem.Factory(GITHUB_FILE_SYSTEM, | |
55 GITHUB_MEMCACHE) | |
56 | |
57 EXTENSIONS_PATH = 'chrome/common/extensions' | |
58 DOCS_PATH = 'docs' | |
59 API_PATH = 'api' | |
60 TEMPLATE_PATH = DOCS_PATH + '/templates' | |
61 INTRO_PATH = TEMPLATE_PATH + '/intros' | |
62 ARTICLE_PATH = TEMPLATE_PATH + '/articles' | |
63 PUBLIC_TEMPLATE_PATH = TEMPLATE_PATH + '/public' | |
64 PRIVATE_TEMPLATE_PATH = TEMPLATE_PATH + '/private' | |
65 EXAMPLES_PATH = DOCS_PATH + '/examples' | |
66 JSON_PATH = TEMPLATE_PATH + '/json' | |
67 | |
68 # Global cache of instances because Handler is recreated for every request. | |
69 SERVER_INSTANCES = {} | |
70 | |
71 def _GetURLFromBranch(branch): | |
72 if branch == 'trunk': | |
73 return url_constants.SVN_TRUNK_URL + '/src' | |
74 return url_constants.SVN_BRANCH_URL + '/' + branch + '/src' | |
75 | |
76 def _SplitFilenameUnix(base_dir, files): | |
77 return [UnixName(os.path.splitext(f.split('/')[-1])[0]) for f in files] | |
78 | |
79 def _CreateMemcacheFileSystem(branch, branch_memcache): | |
80 svn_url = _GetURLFromBranch(branch) + '/' + EXTENSIONS_PATH | |
81 stat_fetcher = AppEngineUrlFetcher( | |
82 svn_url.replace(url_constants.SVN_URL, url_constants.VIEWVC_URL)) | |
83 fetcher = AppEngineUrlFetcher(svn_url) | |
84 return MemcacheFileSystem(SubversionFileSystem(fetcher, stat_fetcher), | |
85 branch_memcache) | |
86 | |
87 _default_branch = BRANCH_UTILITY.GetBranchNumberForChannelName(_DEFAULT_CHANNEL) | |
88 APPS_MEMCACHE = InMemoryObjectStore(_default_branch) | |
89 APPS_FILE_SYSTEM = _CreateMemcacheFileSystem(_default_branch, APPS_MEMCACHE) | |
90 APPS_COMPILED_FILE_SYSTEM = CompiledFileSystem.Factory( | |
91 APPS_FILE_SYSTEM, | |
92 APPS_MEMCACHE).Create(_SplitFilenameUnix, compiled_fs.APPS_FS) | |
93 | |
94 EXTENSIONS_MEMCACHE = InMemoryObjectStore(_default_branch) | |
95 EXTENSIONS_FILE_SYSTEM = _CreateMemcacheFileSystem(_default_branch, | |
96 EXTENSIONS_MEMCACHE) | |
97 EXTENSIONS_COMPILED_FILE_SYSTEM = CompiledFileSystem.Factory( | |
98 EXTENSIONS_FILE_SYSTEM, | |
99 EXTENSIONS_MEMCACHE).Create(_SplitFilenameUnix, compiled_fs.EXTENSIONS_FS) | |
100 | |
101 KNOWN_ISSUES_DATA_SOURCE = KnownIssuesDataSource( | |
102 InMemoryObjectStore('KnownIssues'), | |
103 AppEngineUrlFetcher(None)) | |
104 | |
105 def _MakeInstanceKey(branch, number): | |
106 return '%s/%s' % (branch, number) | |
107 | |
108 def _GetInstanceForBranch(channel_name, local_path): | |
109 branch = BRANCH_UTILITY.GetBranchNumberForChannelName(channel_name) | |
110 | |
111 # The key for the server is a tuple of |channel_name| with |branch|, since | |
112 # sometimes stable and beta point to the same branch. | |
113 instance_key = _MakeInstanceKey(channel_name, branch) | |
114 instance = SERVER_INSTANCES.get(instance_key, None) | |
115 if instance is not None: | |
116 return instance | |
117 | |
118 branch_memcache = InMemoryObjectStore(branch) | |
119 file_system = _CreateMemcacheFileSystem(branch, branch_memcache) | |
120 cache_factory = CompiledFileSystem.Factory(file_system, branch_memcache) | |
121 api_list_data_source_factory = APIListDataSource.Factory(cache_factory, | |
122 file_system, | |
123 API_PATH, | |
124 PUBLIC_TEMPLATE_PATH) | |
125 api_data_source_factory = APIDataSource.Factory( | |
126 cache_factory, | |
127 API_PATH) | |
128 | |
129 # Give the ReferenceResolver a memcache, to speed up the lookup of | |
130 # duplicate $refs. | |
131 ref_resolver_factory = ReferenceResolver.Factory( | |
132 api_data_source_factory, | |
133 api_list_data_source_factory, | |
134 branch_memcache) | |
135 api_data_source_factory.SetReferenceResolverFactory(ref_resolver_factory) | |
136 samples_data_source_factory = SamplesDataSource.Factory( | |
137 channel_name, | |
138 file_system, | |
139 GITHUB_FILE_SYSTEM, | |
140 cache_factory, | |
141 GITHUB_COMPILED_FILE_SYSTEM, | |
142 ref_resolver_factory, | |
143 EXAMPLES_PATH) | |
144 api_data_source_factory.SetSamplesDataSourceFactory( | |
145 samples_data_source_factory) | |
146 intro_data_source_factory = IntroDataSource.Factory( | |
147 cache_factory, | |
148 ref_resolver_factory, | |
149 [INTRO_PATH, ARTICLE_PATH]) | |
150 sidenav_data_source_factory = SidenavDataSource.Factory(cache_factory, | |
151 JSON_PATH) | |
152 template_data_source_factory = TemplateDataSource.Factory( | |
153 channel_name, | |
154 api_data_source_factory, | |
155 api_list_data_source_factory, | |
156 intro_data_source_factory, | |
157 samples_data_source_factory, | |
158 KNOWN_ISSUES_DATA_SOURCE, | |
159 sidenav_data_source_factory, | |
160 cache_factory, | |
161 ref_resolver_factory, | |
162 PUBLIC_TEMPLATE_PATH, | |
163 PRIVATE_TEMPLATE_PATH) | |
164 example_zipper = ExampleZipper(file_system, | |
165 cache_factory, | |
166 DOCS_PATH) | |
167 | |
168 instance = ServerInstance(template_data_source_factory, | |
169 example_zipper, | |
170 cache_factory) | |
171 SERVER_INSTANCES[instance_key] = instance | |
172 return instance | |
173 | |
174 def _CleanBranches(): | |
175 keys = [_MakeInstanceKey(branch, number) | |
176 for branch, number in BRANCH_UTILITY.GetAllBranchNumbers()] | |
177 for key in SERVER_INSTANCES.keys(): | |
178 if key not in keys: | |
179 SERVER_INSTANCES.pop(key) | |
180 | |
181 class _MockResponse(object): | |
182 def __init__(self): | |
183 self.status = 200 | |
184 self.out = StringIO() | |
185 self.headers = {} | |
186 | |
187 def set_status(self, status): | |
188 self.status = status | |
189 | |
190 def clear(self, *args): | |
191 pass | |
192 | |
193 class _MockRequest(object): | |
194 def __init__(self, path): | |
195 self.headers = {} | |
196 self.path = path | |
197 self.url = 'http://localhost' + path | |
198 | |
199 class Handler(webapp.RequestHandler): | 20 class Handler(webapp.RequestHandler): |
200 def __init__(self, request, response, local_path=EXTENSIONS_PATH): | 21 def __init__(self, request, response): |
201 self._local_path = local_path | |
202 super(Handler, self).__init__(request, response) | 22 super(Handler, self).__init__(request, response) |
203 | 23 |
204 def _HandleGet(self, path): | 24 def _HandleGet(self, path): |
205 channel_name, real_path = BRANCH_UTILITY.SplitChannelNameFromPath(path) | 25 channel_name, real_path = BranchUtility.SplitChannelNameFromPath(path) |
206 | 26 |
207 if channel_name == _DEFAULT_CHANNEL: | 27 if channel_name == _DEFAULT_CHANNEL: |
208 self.redirect('/%s' % real_path) | 28 self.redirect('/%s' % real_path) |
209 return | 29 return |
210 | 30 |
211 # TODO: Detect that these are directories and serve index.html out of them. | 31 if channel_name is None: |
| 32 channel_name = _DEFAULT_CHANNEL |
| 33 |
| 34 # TODO(kalman): Check if |path| is a directory and serve path/index.html |
| 35 # rather than special-casing apps/extensions. |
212 if real_path.strip('/') == 'apps': | 36 if real_path.strip('/') == 'apps': |
213 real_path = 'apps/index.html' | 37 real_path = 'apps/index.html' |
214 if real_path.strip('/') == 'extensions': | 38 if real_path.strip('/') == 'extensions': |
215 real_path = 'extensions/index.html' | 39 real_path = 'extensions/index.html' |
216 | 40 |
217 if (not real_path.startswith('extensions/') and | 41 server_instance = ServerInstance.GetOrCreate(channel_name) |
218 not real_path.startswith('apps/') and | |
219 not real_path.startswith('static/')): | |
220 if self._RedirectBadPaths(real_path, channel_name): | |
221 return | |
222 | 42 |
223 _CleanBranches() | 43 canonical_path = server_instance.path_canonicalizer.Canonicalize(real_path) |
| 44 if real_path != canonical_path: |
| 45 self.redirect(canonical_path) |
| 46 return |
224 | 47 |
225 # Yes, do this after it's passed to RedirectBadPaths. That needs to know | 48 ServerInstance.GetOrCreate(channel_name).Get(real_path, |
226 # whether or not a branch was specified. | 49 self.request, |
227 if channel_name is None: | 50 self.response) |
228 channel_name = _DEFAULT_CHANNEL | |
229 _GetInstanceForBranch(channel_name, self._local_path).Get(real_path, | |
230 self.request, | |
231 self.response) | |
232 | |
233 def _Render(self, files, channel): | |
234 original_response = self.response | |
235 for f in files: | |
236 if f.endswith('404.html'): | |
237 continue | |
238 path = channel + f.split(PUBLIC_TEMPLATE_PATH)[-1] | |
239 self.request = _MockRequest(path) | |
240 self.response = _MockResponse() | |
241 try: | |
242 self._HandleGet(path) | |
243 except Exception as e: | |
244 logging.error('Error rendering %s: %s' % (path, str(e))) | |
245 self.response = original_response | |
246 | |
247 class _ValueHolder(object): | |
248 """Class to allow a value to be changed within a lambda. | |
249 """ | |
250 def __init__(self, starting_value): | |
251 self._value = starting_value | |
252 | |
253 def Set(self, value): | |
254 self._value = value | |
255 | |
256 def Get(self): | |
257 return self._value | |
258 | 51 |
259 def _HandleCron(self, path): | 52 def _HandleCron(self, path): |
260 # Cache population strategy: | 53 # Cron strategy: |
261 # | 54 # |
262 # We could list all files in PUBLIC_TEMPLATE_PATH then render them. However, | 55 # Find all public template files and static files, and render them. Most of |
263 # this would be inefficient in the common case where files haven't changed | 56 # the time these won't have changed since the last cron run, so it's a |
264 # since the last cron. | 57 # little wasteful, but hopefully rendering is really fast (if it isn't we |
265 # | 58 # have a problem). |
266 # Instead, let the CompiledFileSystem give us clues when to re-render: we | 59 class MockResponse(object): |
267 # use the CFS to check whether the templates, examples, or API folders have | 60 def __init__(self): |
268 # been changed. If there has been a change, the compilation function will | 61 self.status = 200 |
269 # be called. The same is then done separately with the apps samples page, | 62 self.out = StringIO() |
270 # since it pulls its data from Github. | 63 self.headers = {} |
| 64 def set_status(self, status): |
| 65 self.status = status |
| 66 def clear(self, *args): |
| 67 pass |
| 68 |
| 69 class MockRequest(object): |
| 70 def __init__(self, path): |
| 71 self.headers = {} |
| 72 self.path = path |
| 73 self.url = '//localhost/%s' % path |
| 74 |
271 channel = path.split('/')[-1] | 75 channel = path.split('/')[-1] |
272 branch = BRANCH_UTILITY.GetBranchNumberForChannelName(channel) | 76 logging.info('cron/%s: starting' % channel) |
273 logging.info('Running cron job for %s.' % branch) | |
274 branch_memcache = InMemoryObjectStore(branch) | |
275 file_system = _CreateMemcacheFileSystem(branch, branch_memcache) | |
276 factory = CompiledFileSystem.Factory(file_system, branch_memcache) | |
277 | 77 |
278 needs_render = self._ValueHolder(False) | 78 server_instance = ServerInstance.GetOrCreate(channel) |
279 invalidation_cache = factory.Create(lambda _, __: needs_render.Set(True), | |
280 compiled_fs.CRON_INVALIDATION, | |
281 version=_VERSION) | |
282 for path in [TEMPLATE_PATH, EXAMPLES_PATH, API_PATH]: | |
283 invalidation_cache.GetFromFile(path + '/') | |
284 | 79 |
285 if needs_render.Get(): | 80 def run_cron_for_dir(d): |
286 file_listing_cache = factory.Create(lambda _, x: x, | 81 error = None |
287 compiled_fs.CRON_FILE_LISTING) | 82 start_time = time.time() |
288 self._Render(file_listing_cache.GetFromFileListing(PUBLIC_TEMPLATE_PATH), | 83 files = [f for f in server_instance.content_cache.GetFromFileListing(d) |
289 channel) | 84 if not f.endswith('/')] |
| 85 for f in files: |
| 86 try: |
| 87 server_instance.Get(f, MockRequest(f), MockResponse()) |
| 88 except error: |
| 89 logging.error('cron/%s: error rendering %s/%s: %s' % ( |
| 90 channel, d, f, error)) |
| 91 logging.info('cron/%s: rendering %s files in %s took %s seconds' % ( |
| 92 channel, len(files), d, time.time() - start_time)) |
| 93 return error |
| 94 |
| 95 # Don't use "or" since we want to evaluate everything no matter what. |
| 96 was_error = any((run_cron_for_dir(svn_constants.PUBLIC_TEMPLATE_PATH), |
| 97 run_cron_for_dir(svn_constants.STATIC_PATH))) |
| 98 |
| 99 if was_error: |
| 100 self.response.status = 500 |
| 101 self.response.out.write('Failure') |
290 else: | 102 else: |
291 # If |needs_render| was True, this page was already rendered, and we don't | 103 self.response.status = 200 |
292 # need to render again. | 104 self.response.out.write('Success') |
293 github_invalidation_cache = GITHUB_COMPILED_FILE_SYSTEM.Create( | |
294 lambda _, __: needs_render.Set(True), | |
295 compiled_fs.CRON_GITHUB_INVALIDATION) | |
296 if needs_render.Get(): | |
297 self._Render([PUBLIC_TEMPLATE_PATH + '/apps/samples.html'], channel) | |
298 | 105 |
299 # It's good to keep the extensions samples page fresh, because if it | 106 logging.info('cron/%s: finished' % channel) |
300 # gets dropped from the cache ALL the extensions pages time out. | |
301 self._Render([PUBLIC_TEMPLATE_PATH + '/extensions/samples.html'], channel) | |
302 | |
303 self.response.out.write('Success') | |
304 | 107 |
305 def _RedirectSpecialCases(self, path): | 108 def _RedirectSpecialCases(self, path): |
306 google_dev_url = 'http://developer.google.com/chrome' | 109 google_dev_url = 'http://developer.google.com/chrome' |
307 if path == '/' or path == '/index.html': | 110 if path == '/' or path == '/index.html': |
308 self.redirect(google_dev_url) | 111 self.redirect(google_dev_url) |
309 return True | 112 return True |
310 | 113 |
311 if path == '/apps.html': | 114 if path == '/apps.html': |
312 self.redirect('/apps/about_apps.html') | 115 self.redirect('/apps/about_apps.html') |
313 return True | 116 return True |
314 | 117 |
315 return False | 118 return False |
316 | 119 |
317 def _RedirectBadPaths(self, path, channel_name): | |
318 if '/' in path or path == '404.html': | |
319 return False | |
320 apps_templates = APPS_COMPILED_FILE_SYSTEM.GetFromFileListing( | |
321 PUBLIC_TEMPLATE_PATH + '/apps') | |
322 extensions_templates = EXTENSIONS_COMPILED_FILE_SYSTEM.GetFromFileListing( | |
323 PUBLIC_TEMPLATE_PATH + '/extensions') | |
324 unix_path = UnixName(os.path.splitext(path)[0]) | |
325 if channel_name is None: | |
326 apps_path = '/apps/%s' % path | |
327 extensions_path = '/extensions/%s' % path | |
328 else: | |
329 apps_path = '/%s/apps/%s' % (channel_name, path) | |
330 extensions_path = '/%s/extensions/%s' % (channel_name, path) | |
331 if unix_path in extensions_templates: | |
332 self.redirect(extensions_path) | |
333 elif unix_path in apps_templates: | |
334 self.redirect(apps_path) | |
335 else: | |
336 self.redirect(extensions_path) | |
337 return True | |
338 | |
339 def _RedirectFromCodeDotGoogleDotCom(self, path): | 120 def _RedirectFromCodeDotGoogleDotCom(self, path): |
340 if (not self.request.url.startswith(('http://code.google.com', | 121 if (not self.request.url.startswith(('http://code.google.com', |
341 'https://code.google.com'))): | 122 'https://code.google.com'))): |
342 return False | 123 return False |
343 | 124 |
344 newUrl = 'http://developer.chrome.com/' | 125 new_url = 'http://developer.chrome.com/' |
345 | 126 |
346 # switch to https if necessary | 127 # switch to https if necessary |
347 if (self.request.url.startswith('https')): | 128 if (self.request.url.startswith('https')): |
348 newUrl = newUrl.replace('http', 'https', 1) | 129 new_url = new_url.replace('http', 'https', 1) |
349 | 130 |
350 path = path.split('/') | 131 path = path.split('/') |
351 if len(path) > 0 and path[0] == 'chrome': | 132 if len(path) > 0 and path[0] == 'chrome': |
352 path.pop(0) | 133 path.pop(0) |
353 for channel in BranchUtility.GetAllBranchNames(): | 134 for channel in BranchUtility.GetAllBranchNames(): |
354 if channel in path: | 135 if channel in path: |
355 position = path.index(channel) | 136 position = path.index(channel) |
356 path.pop(position) | 137 path.pop(position) |
357 path.insert(0, channel) | 138 path.insert(0, channel) |
358 newUrl += '/'.join(path) | 139 new_url += '/'.join(path) |
359 self.redirect(newUrl) | 140 self.redirect(new_url) |
360 return True | 141 return True |
361 | 142 |
362 def get(self): | 143 def get(self): |
363 path = self.request.path | 144 path = self.request.path |
364 if self._RedirectSpecialCases(path): | 145 if self._RedirectSpecialCases(path): |
365 return | 146 return |
366 | 147 |
367 if path.startswith('/cron'): | 148 if path.startswith('/cron'): |
368 self._HandleCron(path) | 149 self._HandleCron(path) |
369 return | 150 return |
370 | 151 |
371 # Redirect paths like "directory" to "directory/". This is so relative | 152 # Redirect paths like "directory" to "directory/". This is so relative |
372 # file paths will know to treat this as a directory. | 153 # file paths will know to treat this as a directory. |
373 if os.path.splitext(path)[1] == '' and path[-1] != '/': | 154 if os.path.splitext(path)[1] == '' and path[-1] != '/': |
374 self.redirect(path + '/') | 155 self.redirect(path + '/') |
375 return | 156 return |
376 | 157 |
377 path = path.strip('/') | 158 path = path.strip('/') |
378 if not self._RedirectFromCodeDotGoogleDotCom(path): | 159 if self._RedirectFromCodeDotGoogleDotCom(path): |
379 self._HandleGet(path) | 160 return |
| 161 |
| 162 self._HandleGet(path) |
OLD | NEW |