OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache. | |
6 | |
7 /* | |
8 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | |
9 * | |
10 * Redistribution and use in source and binary forms, with or without | |
11 * modification, are permitted provided that the following conditions | |
12 * are met: | |
13 * 1. Redistributions of source code must retain the above copyright | |
14 * notice, this list of conditions and the following disclaimer. | |
15 * 2. Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in the | |
17 * documentation and/or other materials provided with the distribution. | |
18 * | |
19 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
27 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 */ | |
31 | |
32 #include "webkit/browser/appcache/manifest_parser.h" | |
33 | |
34 #include "base/command_line.h" | |
35 #include "base/i18n/icu_string_conversions.h" | |
36 #include "base/logging.h" | |
37 #include "base/strings/utf_string_conversions.h" | |
38 #include "url/gurl.h" | |
39 | |
40 namespace appcache { | |
41 | |
42 namespace { | |
43 | |
44 // Helper function used to identify 'isPattern' annotations. | |
45 bool HasPatternMatchingAnnotation(const wchar_t* line_p, | |
46 const wchar_t* line_end) { | |
47 // Skip whitespace separating the resource url from the annotation. | |
48 // Note: trailing whitespace has already been trimmed from the line. | |
49 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
50 ++line_p; | |
51 if (line_p == line_end) | |
52 return false; | |
53 std::wstring annotation(line_p, line_end - line_p); | |
54 return annotation == L"isPattern"; | |
55 } | |
56 | |
57 } | |
58 | |
59 enum Mode { | |
60 EXPLICIT, | |
61 INTERCEPT, | |
62 FALLBACK, | |
63 ONLINE_WHITELIST, | |
64 UNKNOWN_MODE, | |
65 }; | |
66 | |
67 enum InterceptVerb { | |
68 RETURN, | |
69 EXECUTE, | |
70 UNKNOWN_VERB, | |
71 }; | |
72 | |
73 Manifest::Manifest() | |
74 : online_whitelist_all(false), | |
75 did_ignore_intercept_namespaces(false) { | |
76 } | |
77 | |
78 Manifest::~Manifest() {} | |
79 | |
80 bool ParseManifest(const GURL& manifest_url, const char* data, int length, | |
81 ParseMode parse_mode, Manifest& manifest) { | |
82 // This is an implementation of the parsing algorithm specified in | |
83 // the HTML5 offline web application docs: | |
84 // http://www.w3.org/TR/html5/offline.html | |
85 // Do not modify it without consulting those docs. | |
86 // Though you might be tempted to convert these wstrings to UTF-8 or | |
87 // base::string16, this implementation seems simpler given the constraints. | |
88 | |
89 const wchar_t kSignature[] = L"CACHE MANIFEST"; | |
90 const size_t kSignatureLength = arraysize(kSignature) - 1; | |
91 const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST"; | |
92 const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1; | |
93 | |
94 DCHECK(manifest.explicit_urls.empty()); | |
95 DCHECK(manifest.fallback_namespaces.empty()); | |
96 DCHECK(manifest.online_whitelist_namespaces.empty()); | |
97 DCHECK(!manifest.online_whitelist_all); | |
98 DCHECK(!manifest.did_ignore_intercept_namespaces); | |
99 | |
100 Mode mode = EXPLICIT; | |
101 | |
102 std::wstring data_string; | |
103 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string); | |
104 // until UTF8ToWide uses 0xFFFD Unicode replacement character. | |
105 base::CodepageToWide(std::string(data, length), base::kCodepageUTF8, | |
106 base::OnStringConversionError::SUBSTITUTE, &data_string); | |
107 const wchar_t* p = data_string.c_str(); | |
108 const wchar_t* end = p + data_string.length(); | |
109 | |
110 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" | |
111 // Example: "CACHE MANIFEST #comment" is a valid signature. | |
112 // Example: "CACHE MANIFEST;V2" is not. | |
113 | |
114 // When the input data starts with a UTF-8 Byte-Order-Mark | |
115 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a | |
116 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists. | |
117 int bom_offset = 0; | |
118 if (!data_string.empty() && data_string[0] == 0xFEFF) { | |
119 bom_offset = 1; | |
120 ++p; | |
121 } | |
122 | |
123 if (p >= end) | |
124 return false; | |
125 | |
126 // Check for a supported signature and skip p past it. | |
127 if (0 == data_string.compare(bom_offset, kSignatureLength, | |
128 kSignature)) { | |
129 p += kSignatureLength; | |
130 } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength, | |
131 kChromiumSignature)) { | |
132 p += kChromiumSignatureLength; | |
133 } else { | |
134 return false; | |
135 } | |
136 | |
137 // Character after "CACHE MANIFEST" must be whitespace. | |
138 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') | |
139 return false; | |
140 | |
141 // Skip to the end of the line. | |
142 while (p < end && *p != '\r' && *p != '\n') | |
143 ++p; | |
144 | |
145 while (1) { | |
146 // Skip whitespace | |
147 while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t')) | |
148 ++p; | |
149 | |
150 if (p == end) | |
151 break; | |
152 | |
153 const wchar_t* line_start = p; | |
154 | |
155 // Find the end of the line | |
156 while (p < end && *p != '\r' && *p != '\n') | |
157 ++p; | |
158 | |
159 // Check if we have a comment | |
160 if (*line_start == '#') | |
161 continue; | |
162 | |
163 // Get rid of trailing whitespace | |
164 const wchar_t* tmp = p - 1; | |
165 while (tmp > line_start && (*tmp == ' ' || *tmp == '\t')) | |
166 --tmp; | |
167 | |
168 std::wstring line(line_start, tmp - line_start + 1); | |
169 | |
170 if (line == L"CACHE:") { | |
171 mode = EXPLICIT; | |
172 } else if (line == L"FALLBACK:") { | |
173 mode = FALLBACK; | |
174 } else if (line == L"NETWORK:") { | |
175 mode = ONLINE_WHITELIST; | |
176 } else if (line == L"CHROMIUM-INTERCEPT:") { | |
177 mode = INTERCEPT; | |
178 } else if (*(line.end() - 1) == ':') { | |
179 mode = UNKNOWN_MODE; | |
180 } else if (mode == UNKNOWN_MODE) { | |
181 continue; | |
182 } else if (line == L"*" && mode == ONLINE_WHITELIST) { | |
183 manifest.online_whitelist_all = true; | |
184 continue; | |
185 } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) { | |
186 const wchar_t *line_p = line.c_str(); | |
187 const wchar_t *line_end = line_p + line.length(); | |
188 | |
189 // Look for whitespace separating the URL from subsequent ignored tokens. | |
190 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
191 ++line_p; | |
192 | |
193 base::string16 url16; | |
194 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &url16); | |
195 GURL url = manifest_url.Resolve(url16); | |
196 if (!url.is_valid()) | |
197 continue; | |
198 if (url.has_ref()) { | |
199 GURL::Replacements replacements; | |
200 replacements.ClearRef(); | |
201 url = url.ReplaceComponents(replacements); | |
202 } | |
203 | |
204 // Scheme component must be the same as the manifest URL's. | |
205 if (url.scheme() != manifest_url.scheme()) { | |
206 continue; | |
207 } | |
208 | |
209 // See http://code.google.com/p/chromium/issues/detail?id=69594 | |
210 // We willfully violate the HTML5 spec at this point in order | |
211 // to support the appcaching of cross-origin HTTPS resources. | |
212 // Per the spec, EXPLICIT cross-origin HTTS resources should be | |
213 // ignored here. We've opted for a milder constraint and allow | |
214 // caching unless the resource has a "no-store" header. That | |
215 // condition is enforced in AppCacheUpdateJob. | |
216 | |
217 if (mode == EXPLICIT) { | |
218 manifest.explicit_urls.insert(url.spec()); | |
219 } else { | |
220 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end); | |
221 manifest.online_whitelist_namespaces.push_back( | |
222 Namespace(APPCACHE_NETWORK_NAMESPACE, url, GURL(), is_pattern)); | |
223 } | |
224 } else if (mode == INTERCEPT) { | |
225 if (parse_mode != PARSE_MANIFEST_ALLOWING_INTERCEPTS) { | |
226 manifest.did_ignore_intercept_namespaces = true; | |
227 continue; | |
228 } | |
229 | |
230 // Lines of the form, | |
231 // <urlnamespace> <intercept_type> <targeturl> | |
232 const wchar_t* line_p = line.c_str(); | |
233 const wchar_t* line_end = line_p + line.length(); | |
234 | |
235 // Look for first whitespace separating the url namespace from | |
236 // the intercept type. | |
237 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
238 ++line_p; | |
239 | |
240 if (line_p == line_end) | |
241 continue; // There was no whitespace separating the URLs. | |
242 | |
243 base::string16 namespace_url16; | |
244 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16); | |
245 GURL namespace_url = manifest_url.Resolve(namespace_url16); | |
246 if (!namespace_url.is_valid()) | |
247 continue; | |
248 if (namespace_url.has_ref()) { | |
249 GURL::Replacements replacements; | |
250 replacements.ClearRef(); | |
251 namespace_url = namespace_url.ReplaceComponents(replacements); | |
252 } | |
253 | |
254 // The namespace URL must have the same scheme, host and port | |
255 // as the manifest's URL. | |
256 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) | |
257 continue; | |
258 | |
259 // Skip whitespace separating namespace from the type. | |
260 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
261 ++line_p; | |
262 | |
263 // Look for whitespace separating the type from the target url. | |
264 const wchar_t* type_start = line_p; | |
265 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
266 ++line_p; | |
267 | |
268 // Look for a type value we understand, otherwise skip the line. | |
269 InterceptVerb verb = UNKNOWN_VERB; | |
270 std::wstring type(type_start, line_p - type_start); | |
271 if (type == L"return") { | |
272 verb = RETURN; | |
273 } else if (type == L"execute" && | |
274 CommandLine::ForCurrentProcess()->HasSwitch( | |
275 kEnableExecutableHandlers)) { | |
276 verb = EXECUTE; | |
277 } | |
278 if (verb == UNKNOWN_VERB) | |
279 continue; | |
280 | |
281 // Skip whitespace separating type from the target_url. | |
282 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
283 ++line_p; | |
284 | |
285 // Look for whitespace separating the URL from subsequent ignored tokens. | |
286 const wchar_t* target_url_start = line_p; | |
287 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
288 ++line_p; | |
289 | |
290 base::string16 target_url16; | |
291 base::WideToUTF16(target_url_start, line_p - target_url_start, | |
292 &target_url16); | |
293 GURL target_url = manifest_url.Resolve(target_url16); | |
294 if (!target_url.is_valid()) | |
295 continue; | |
296 | |
297 if (target_url.has_ref()) { | |
298 GURL::Replacements replacements; | |
299 replacements.ClearRef(); | |
300 target_url = target_url.ReplaceComponents(replacements); | |
301 } | |
302 if (manifest_url.GetOrigin() != target_url.GetOrigin()) | |
303 continue; | |
304 | |
305 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end); | |
306 manifest.intercept_namespaces.push_back( | |
307 Namespace(APPCACHE_INTERCEPT_NAMESPACE, namespace_url, | |
308 target_url, is_pattern, verb == EXECUTE)); | |
309 } else if (mode == FALLBACK) { | |
310 const wchar_t* line_p = line.c_str(); | |
311 const wchar_t* line_end = line_p + line.length(); | |
312 | |
313 // Look for whitespace separating the two URLs | |
314 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
315 ++line_p; | |
316 | |
317 if (line_p == line_end) { | |
318 // There was no whitespace separating the URLs. | |
319 continue; | |
320 } | |
321 | |
322 base::string16 namespace_url16; | |
323 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16); | |
324 GURL namespace_url = manifest_url.Resolve(namespace_url16); | |
325 if (!namespace_url.is_valid()) | |
326 continue; | |
327 if (namespace_url.has_ref()) { | |
328 GURL::Replacements replacements; | |
329 replacements.ClearRef(); | |
330 namespace_url = namespace_url.ReplaceComponents(replacements); | |
331 } | |
332 | |
333 // Fallback namespace URL must have the same scheme, host and port | |
334 // as the manifest's URL. | |
335 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) { | |
336 continue; | |
337 } | |
338 | |
339 // Skip whitespace separating fallback namespace from URL. | |
340 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
341 ++line_p; | |
342 | |
343 // Look for whitespace separating the URL from subsequent ignored tokens. | |
344 const wchar_t* fallback_start = line_p; | |
345 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
346 ++line_p; | |
347 | |
348 base::string16 fallback_url16; | |
349 base::WideToUTF16(fallback_start, line_p - fallback_start, | |
350 &fallback_url16); | |
351 GURL fallback_url = manifest_url.Resolve(fallback_url16); | |
352 if (!fallback_url.is_valid()) | |
353 continue; | |
354 if (fallback_url.has_ref()) { | |
355 GURL::Replacements replacements; | |
356 replacements.ClearRef(); | |
357 fallback_url = fallback_url.ReplaceComponents(replacements); | |
358 } | |
359 | |
360 // Fallback entry URL must have the same scheme, host and port | |
361 // as the manifest's URL. | |
362 if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) { | |
363 continue; | |
364 } | |
365 | |
366 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end); | |
367 | |
368 // Store regardless of duplicate namespace URL. Only first match | |
369 // will ever be used. | |
370 manifest.fallback_namespaces.push_back( | |
371 Namespace(APPCACHE_FALLBACK_NAMESPACE, namespace_url, | |
372 fallback_url, is_pattern)); | |
373 } else { | |
374 NOTREACHED(); | |
375 } | |
376 } | |
377 | |
378 return true; | |
379 } | |
380 | |
381 } // namespace appcache | |
OLD | NEW |