OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache. | |
6 | |
7 /* | |
8 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | |
9 * | |
10 * Redistribution and use in source and binary forms, with or without | |
11 * modification, are permitted provided that the following conditions | |
12 * are met: | |
13 * 1. Redistributions of source code must retain the above copyright | |
14 * notice, this list of conditions and the following disclaimer. | |
15 * 2. Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in the | |
17 * documentation and/or other materials provided with the distribution. | |
18 * | |
19 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
27 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 */ | |
31 | |
32 #include "content/browser/appcache/manifest_parser.h" | |
33 | |
34 #include "base/command_line.h" | |
35 #include "base/i18n/icu_string_conversions.h" | |
36 #include "base/logging.h" | |
37 #include "base/strings/utf_string_conversions.h" | |
38 #include "url/gurl.h" | |
39 | |
40 namespace content { | |
41 | |
42 namespace { | |
43 | |
44 // Helper function used to identify 'isPattern' annotations. | |
45 bool HasPatternMatchingAnnotation(const wchar_t* line_p, | |
46 const wchar_t* line_end) { | |
47 // Skip whitespace separating the resource url from the annotation. | |
48 // Note: trailing whitespace has already been trimmed from the line. | |
49 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
50 ++line_p; | |
51 if (line_p == line_end) | |
52 return false; | |
53 std::wstring annotation(line_p, line_end - line_p); | |
54 return annotation == L"isPattern"; | |
55 } | |
56 | |
57 } | |
58 | |
59 enum Mode { | |
60 EXPLICIT, | |
61 INTERCEPT, | |
62 FALLBACK, | |
63 ONLINE_WHITELIST, | |
64 UNKNOWN_MODE, | |
65 }; | |
66 | |
67 enum InterceptVerb { | |
68 RETURN, | |
69 EXECUTE, | |
70 UNKNOWN_VERB, | |
71 }; | |
72 | |
73 Manifest::Manifest() | |
74 : online_whitelist_all(false), | |
75 did_ignore_intercept_namespaces(false) { | |
76 } | |
77 | |
78 Manifest::~Manifest() {} | |
79 | |
80 bool ParseManifest(const GURL& manifest_url, const char* data, int length, | |
81 ParseMode parse_mode, Manifest& manifest) { | |
82 // This is an implementation of the parsing algorithm specified in | |
83 // the HTML5 offline web application docs: | |
84 // http://www.w3.org/TR/html5/offline.html | |
85 // Do not modify it without consulting those docs. | |
86 // Though you might be tempted to convert these wstrings to UTF-8 or | |
87 // base::string16, this implementation seems simpler given the constraints. | |
88 | |
89 const wchar_t kSignature[] = L"CACHE MANIFEST"; | |
90 const size_t kSignatureLength = arraysize(kSignature) - 1; | |
91 const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST"; | |
92 const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1; | |
93 | |
94 DCHECK(manifest.explicit_urls.empty()); | |
95 DCHECK(manifest.fallback_namespaces.empty()); | |
96 DCHECK(manifest.online_whitelist_namespaces.empty()); | |
97 DCHECK(!manifest.online_whitelist_all); | |
98 DCHECK(!manifest.did_ignore_intercept_namespaces); | |
99 | |
100 Mode mode = EXPLICIT; | |
101 | |
102 std::wstring data_string; | |
103 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string); | |
104 // until UTF8ToWide uses 0xFFFD Unicode replacement character. | |
105 base::CodepageToWide(std::string(data, length), base::kCodepageUTF8, | |
106 base::OnStringConversionError::SUBSTITUTE, &data_string); | |
107 const wchar_t* p = data_string.c_str(); | |
108 const wchar_t* end = p + data_string.length(); | |
109 | |
110 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" | |
111 // Example: "CACHE MANIFEST #comment" is a valid signature. | |
112 // Example: "CACHE MANIFEST;V2" is not. | |
113 | |
114 // When the input data starts with a UTF-8 Byte-Order-Mark | |
115 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a | |
116 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists. | |
117 int bom_offset = 0; | |
118 if (!data_string.empty() && data_string[0] == 0xFEFF) { | |
119 bom_offset = 1; | |
120 ++p; | |
121 } | |
122 | |
123 if (p >= end) | |
124 return false; | |
125 | |
126 // Check for a supported signature and skip p past it. | |
127 if (0 == data_string.compare(bom_offset, kSignatureLength, | |
128 kSignature)) { | |
129 p += kSignatureLength; | |
130 } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength, | |
131 kChromiumSignature)) { | |
132 p += kChromiumSignatureLength; | |
133 } else { | |
134 return false; | |
135 } | |
136 | |
137 // Character after "CACHE MANIFEST" must be whitespace. | |
138 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') | |
139 return false; | |
140 | |
141 // Skip to the end of the line. | |
142 while (p < end && *p != '\r' && *p != '\n') | |
143 ++p; | |
144 | |
145 while (1) { | |
146 // Skip whitespace | |
147 while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t')) | |
148 ++p; | |
149 | |
150 if (p == end) | |
151 break; | |
152 | |
153 const wchar_t* line_start = p; | |
154 | |
155 // Find the end of the line | |
156 while (p < end && *p != '\r' && *p != '\n') | |
157 ++p; | |
158 | |
159 // Check if we have a comment | |
160 if (*line_start == '#') | |
161 continue; | |
162 | |
163 // Get rid of trailing whitespace | |
164 const wchar_t* tmp = p - 1; | |
165 while (tmp > line_start && (*tmp == ' ' || *tmp == '\t')) | |
166 --tmp; | |
167 | |
168 std::wstring line(line_start, tmp - line_start + 1); | |
169 | |
170 if (line == L"CACHE:") { | |
171 mode = EXPLICIT; | |
172 } else if (line == L"FALLBACK:") { | |
173 mode = FALLBACK; | |
174 } else if (line == L"NETWORK:") { | |
175 mode = ONLINE_WHITELIST; | |
176 } else if (line == L"CHROMIUM-INTERCEPT:") { | |
177 mode = INTERCEPT; | |
178 } else if (*(line.end() - 1) == ':') { | |
179 mode = UNKNOWN_MODE; | |
180 } else if (mode == UNKNOWN_MODE) { | |
181 continue; | |
182 } else if (line == L"*" && mode == ONLINE_WHITELIST) { | |
183 manifest.online_whitelist_all = true; | |
184 continue; | |
185 } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) { | |
186 const wchar_t *line_p = line.c_str(); | |
187 const wchar_t *line_end = line_p + line.length(); | |
188 | |
189 // Look for whitespace separating the URL from subsequent ignored tokens. | |
190 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
191 ++line_p; | |
192 | |
193 base::string16 url16; | |
194 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &url16); | |
195 GURL url = manifest_url.Resolve(url16); | |
196 if (!url.is_valid()) | |
197 continue; | |
198 if (url.has_ref()) { | |
199 GURL::Replacements replacements; | |
200 replacements.ClearRef(); | |
201 url = url.ReplaceComponents(replacements); | |
202 } | |
203 | |
204 // Scheme component must be the same as the manifest URL's. | |
205 if (url.scheme() != manifest_url.scheme()) { | |
206 continue; | |
207 } | |
208 | |
209 // See http://code.google.com/p/chromium/issues/detail?id=69594 | |
210 // We willfully violate the HTML5 spec at this point in order | |
211 // to support the appcaching of cross-origin HTTPS resources. | |
212 // Per the spec, EXPLICIT cross-origin HTTS resources should be | |
213 // ignored here. We've opted for a milder constraint and allow | |
214 // caching unless the resource has a "no-store" header. That | |
215 // condition is enforced in AppCacheUpdateJob. | |
216 | |
217 if (mode == EXPLICIT) { | |
218 manifest.explicit_urls.insert(url.spec()); | |
219 } else { | |
220 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end); | |
221 manifest.online_whitelist_namespaces.push_back( | |
222 AppCacheNamespace(APPCACHE_NETWORK_NAMESPACE, url, GURL(), | |
223 is_pattern)); | |
224 } | |
225 } else if (mode == INTERCEPT) { | |
226 if (parse_mode != PARSE_MANIFEST_ALLOWING_INTERCEPTS) { | |
227 manifest.did_ignore_intercept_namespaces = true; | |
228 continue; | |
229 } | |
230 | |
231 // Lines of the form, | |
232 // <urlnamespace> <intercept_type> <targeturl> | |
233 const wchar_t* line_p = line.c_str(); | |
234 const wchar_t* line_end = line_p + line.length(); | |
235 | |
236 // Look for first whitespace separating the url namespace from | |
237 // the intercept type. | |
238 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
239 ++line_p; | |
240 | |
241 if (line_p == line_end) | |
242 continue; // There was no whitespace separating the URLs. | |
243 | |
244 base::string16 namespace_url16; | |
245 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16); | |
246 GURL namespace_url = manifest_url.Resolve(namespace_url16); | |
247 if (!namespace_url.is_valid()) | |
248 continue; | |
249 if (namespace_url.has_ref()) { | |
250 GURL::Replacements replacements; | |
251 replacements.ClearRef(); | |
252 namespace_url = namespace_url.ReplaceComponents(replacements); | |
253 } | |
254 | |
255 // The namespace URL must have the same scheme, host and port | |
256 // as the manifest's URL. | |
257 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) | |
258 continue; | |
259 | |
260 // Skip whitespace separating namespace from the type. | |
261 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
262 ++line_p; | |
263 | |
264 // Look for whitespace separating the type from the target url. | |
265 const wchar_t* type_start = line_p; | |
266 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
267 ++line_p; | |
268 | |
269 // Look for a type value we understand, otherwise skip the line. | |
270 InterceptVerb verb = UNKNOWN_VERB; | |
271 std::wstring type(type_start, line_p - type_start); | |
272 if (type == L"return") { | |
273 verb = RETURN; | |
274 } else if (type == L"execute" && | |
275 base::CommandLine::ForCurrentProcess()->HasSwitch( | |
276 kEnableExecutableHandlers)) { | |
277 verb = EXECUTE; | |
278 } | |
279 if (verb == UNKNOWN_VERB) | |
280 continue; | |
281 | |
282 // Skip whitespace separating type from the target_url. | |
283 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
284 ++line_p; | |
285 | |
286 // Look for whitespace separating the URL from subsequent ignored tokens. | |
287 const wchar_t* target_url_start = line_p; | |
288 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
289 ++line_p; | |
290 | |
291 base::string16 target_url16; | |
292 base::WideToUTF16(target_url_start, line_p - target_url_start, | |
293 &target_url16); | |
294 GURL target_url = manifest_url.Resolve(target_url16); | |
295 if (!target_url.is_valid()) | |
296 continue; | |
297 | |
298 if (target_url.has_ref()) { | |
299 GURL::Replacements replacements; | |
300 replacements.ClearRef(); | |
301 target_url = target_url.ReplaceComponents(replacements); | |
302 } | |
303 if (manifest_url.GetOrigin() != target_url.GetOrigin()) | |
304 continue; | |
305 | |
306 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end); | |
307 manifest.intercept_namespaces.push_back( | |
308 AppCacheNamespace(APPCACHE_INTERCEPT_NAMESPACE, namespace_url, | |
309 target_url, is_pattern, verb == EXECUTE)); | |
310 } else if (mode == FALLBACK) { | |
311 const wchar_t* line_p = line.c_str(); | |
312 const wchar_t* line_end = line_p + line.length(); | |
313 | |
314 // Look for whitespace separating the two URLs | |
315 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
316 ++line_p; | |
317 | |
318 if (line_p == line_end) { | |
319 // There was no whitespace separating the URLs. | |
320 continue; | |
321 } | |
322 | |
323 base::string16 namespace_url16; | |
324 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16); | |
325 GURL namespace_url = manifest_url.Resolve(namespace_url16); | |
326 if (!namespace_url.is_valid()) | |
327 continue; | |
328 if (namespace_url.has_ref()) { | |
329 GURL::Replacements replacements; | |
330 replacements.ClearRef(); | |
331 namespace_url = namespace_url.ReplaceComponents(replacements); | |
332 } | |
333 | |
334 // Fallback namespace URL must have the same scheme, host and port | |
335 // as the manifest's URL. | |
336 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) { | |
337 continue; | |
338 } | |
339 | |
340 // Skip whitespace separating fallback namespace from URL. | |
341 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) | |
342 ++line_p; | |
343 | |
344 // Look for whitespace separating the URL from subsequent ignored tokens. | |
345 const wchar_t* fallback_start = line_p; | |
346 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | |
347 ++line_p; | |
348 | |
349 base::string16 fallback_url16; | |
350 base::WideToUTF16(fallback_start, line_p - fallback_start, | |
351 &fallback_url16); | |
352 GURL fallback_url = manifest_url.Resolve(fallback_url16); | |
353 if (!fallback_url.is_valid()) | |
354 continue; | |
355 if (fallback_url.has_ref()) { | |
356 GURL::Replacements replacements; | |
357 replacements.ClearRef(); | |
358 fallback_url = fallback_url.ReplaceComponents(replacements); | |
359 } | |
360 | |
361 // Fallback entry URL must have the same scheme, host and port | |
362 // as the manifest's URL. | |
363 if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) { | |
364 continue; | |
365 } | |
366 | |
367 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end); | |
368 | |
369 // Store regardless of duplicate namespace URL. Only first match | |
370 // will ever be used. | |
371 manifest.fallback_namespaces.push_back( | |
372 AppCacheNamespace(APPCACHE_FALLBACK_NAMESPACE, namespace_url, | |
373 fallback_url, is_pattern)); | |
374 } else { | |
375 NOTREACHED(); | |
376 } | |
377 } | |
378 | |
379 return true; | |
380 } | |
381 | |
382 } // namespace content | |
OLD | NEW |