Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(253)

Side by Side Diff: webkit/browser/appcache/manifest_parser.cc

Issue 344493002: Move all remaining appcache-related code to content namespace (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache.
6
7 /*
8 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
27 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "webkit/browser/appcache/manifest_parser.h"
33
34 #include "base/command_line.h"
35 #include "base/i18n/icu_string_conversions.h"
36 #include "base/logging.h"
37 #include "base/strings/utf_string_conversions.h"
38 #include "url/gurl.h"
39
40 namespace appcache {
41
42 namespace {
43
44 // Helper function used to identify 'isPattern' annotations.
45 bool HasPatternMatchingAnnotation(const wchar_t* line_p,
46 const wchar_t* line_end) {
47 // Skip whitespace separating the resource url from the annotation.
48 // Note: trailing whitespace has already been trimmed from the line.
49 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
50 ++line_p;
51 if (line_p == line_end)
52 return false;
53 std::wstring annotation(line_p, line_end - line_p);
54 return annotation == L"isPattern";
55 }
56
57 }
58
59 enum Mode {
60 EXPLICIT,
61 INTERCEPT,
62 FALLBACK,
63 ONLINE_WHITELIST,
64 UNKNOWN_MODE,
65 };
66
67 enum InterceptVerb {
68 RETURN,
69 EXECUTE,
70 UNKNOWN_VERB,
71 };
72
73 Manifest::Manifest()
74 : online_whitelist_all(false),
75 did_ignore_intercept_namespaces(false) {
76 }
77
78 Manifest::~Manifest() {}
79
80 bool ParseManifest(const GURL& manifest_url, const char* data, int length,
81 ParseMode parse_mode, Manifest& manifest) {
82 // This is an implementation of the parsing algorithm specified in
83 // the HTML5 offline web application docs:
84 // http://www.w3.org/TR/html5/offline.html
85 // Do not modify it without consulting those docs.
86 // Though you might be tempted to convert these wstrings to UTF-8 or
87 // base::string16, this implementation seems simpler given the constraints.
88
89 const wchar_t kSignature[] = L"CACHE MANIFEST";
90 const size_t kSignatureLength = arraysize(kSignature) - 1;
91 const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST";
92 const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1;
93
94 DCHECK(manifest.explicit_urls.empty());
95 DCHECK(manifest.fallback_namespaces.empty());
96 DCHECK(manifest.online_whitelist_namespaces.empty());
97 DCHECK(!manifest.online_whitelist_all);
98 DCHECK(!manifest.did_ignore_intercept_namespaces);
99
100 Mode mode = EXPLICIT;
101
102 std::wstring data_string;
103 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
104 // until UTF8ToWide uses 0xFFFD Unicode replacement character.
105 base::CodepageToWide(std::string(data, length), base::kCodepageUTF8,
106 base::OnStringConversionError::SUBSTITUTE, &data_string);
107 const wchar_t* p = data_string.c_str();
108 const wchar_t* end = p + data_string.length();
109
110 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?"
111 // Example: "CACHE MANIFEST #comment" is a valid signature.
112 // Example: "CACHE MANIFEST;V2" is not.
113
114 // When the input data starts with a UTF-8 Byte-Order-Mark
115 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a
116 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists.
117 int bom_offset = 0;
118 if (!data_string.empty() && data_string[0] == 0xFEFF) {
119 bom_offset = 1;
120 ++p;
121 }
122
123 if (p >= end)
124 return false;
125
126 // Check for a supported signature and skip p past it.
127 if (0 == data_string.compare(bom_offset, kSignatureLength,
128 kSignature)) {
129 p += kSignatureLength;
130 } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength,
131 kChromiumSignature)) {
132 p += kChromiumSignatureLength;
133 } else {
134 return false;
135 }
136
137 // Character after "CACHE MANIFEST" must be whitespace.
138 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
139 return false;
140
141 // Skip to the end of the line.
142 while (p < end && *p != '\r' && *p != '\n')
143 ++p;
144
145 while (1) {
146 // Skip whitespace
147 while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
148 ++p;
149
150 if (p == end)
151 break;
152
153 const wchar_t* line_start = p;
154
155 // Find the end of the line
156 while (p < end && *p != '\r' && *p != '\n')
157 ++p;
158
159 // Check if we have a comment
160 if (*line_start == '#')
161 continue;
162
163 // Get rid of trailing whitespace
164 const wchar_t* tmp = p - 1;
165 while (tmp > line_start && (*tmp == ' ' || *tmp == '\t'))
166 --tmp;
167
168 std::wstring line(line_start, tmp - line_start + 1);
169
170 if (line == L"CACHE:") {
171 mode = EXPLICIT;
172 } else if (line == L"FALLBACK:") {
173 mode = FALLBACK;
174 } else if (line == L"NETWORK:") {
175 mode = ONLINE_WHITELIST;
176 } else if (line == L"CHROMIUM-INTERCEPT:") {
177 mode = INTERCEPT;
178 } else if (*(line.end() - 1) == ':') {
179 mode = UNKNOWN_MODE;
180 } else if (mode == UNKNOWN_MODE) {
181 continue;
182 } else if (line == L"*" && mode == ONLINE_WHITELIST) {
183 manifest.online_whitelist_all = true;
184 continue;
185 } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) {
186 const wchar_t *line_p = line.c_str();
187 const wchar_t *line_end = line_p + line.length();
188
189 // Look for whitespace separating the URL from subsequent ignored tokens.
190 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
191 ++line_p;
192
193 base::string16 url16;
194 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &url16);
195 GURL url = manifest_url.Resolve(url16);
196 if (!url.is_valid())
197 continue;
198 if (url.has_ref()) {
199 GURL::Replacements replacements;
200 replacements.ClearRef();
201 url = url.ReplaceComponents(replacements);
202 }
203
204 // Scheme component must be the same as the manifest URL's.
205 if (url.scheme() != manifest_url.scheme()) {
206 continue;
207 }
208
209 // See http://code.google.com/p/chromium/issues/detail?id=69594
210 // We willfully violate the HTML5 spec at this point in order
211 // to support the appcaching of cross-origin HTTPS resources.
212 // Per the spec, EXPLICIT cross-origin HTTS resources should be
213 // ignored here. We've opted for a milder constraint and allow
214 // caching unless the resource has a "no-store" header. That
215 // condition is enforced in AppCacheUpdateJob.
216
217 if (mode == EXPLICIT) {
218 manifest.explicit_urls.insert(url.spec());
219 } else {
220 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
221 manifest.online_whitelist_namespaces.push_back(
222 Namespace(APPCACHE_NETWORK_NAMESPACE, url, GURL(), is_pattern));
223 }
224 } else if (mode == INTERCEPT) {
225 if (parse_mode != PARSE_MANIFEST_ALLOWING_INTERCEPTS) {
226 manifest.did_ignore_intercept_namespaces = true;
227 continue;
228 }
229
230 // Lines of the form,
231 // <urlnamespace> <intercept_type> <targeturl>
232 const wchar_t* line_p = line.c_str();
233 const wchar_t* line_end = line_p + line.length();
234
235 // Look for first whitespace separating the url namespace from
236 // the intercept type.
237 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
238 ++line_p;
239
240 if (line_p == line_end)
241 continue; // There was no whitespace separating the URLs.
242
243 base::string16 namespace_url16;
244 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
245 GURL namespace_url = manifest_url.Resolve(namespace_url16);
246 if (!namespace_url.is_valid())
247 continue;
248 if (namespace_url.has_ref()) {
249 GURL::Replacements replacements;
250 replacements.ClearRef();
251 namespace_url = namespace_url.ReplaceComponents(replacements);
252 }
253
254 // The namespace URL must have the same scheme, host and port
255 // as the manifest's URL.
256 if (manifest_url.GetOrigin() != namespace_url.GetOrigin())
257 continue;
258
259 // Skip whitespace separating namespace from the type.
260 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
261 ++line_p;
262
263 // Look for whitespace separating the type from the target url.
264 const wchar_t* type_start = line_p;
265 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
266 ++line_p;
267
268 // Look for a type value we understand, otherwise skip the line.
269 InterceptVerb verb = UNKNOWN_VERB;
270 std::wstring type(type_start, line_p - type_start);
271 if (type == L"return") {
272 verb = RETURN;
273 } else if (type == L"execute" &&
274 CommandLine::ForCurrentProcess()->HasSwitch(
275 kEnableExecutableHandlers)) {
276 verb = EXECUTE;
277 }
278 if (verb == UNKNOWN_VERB)
279 continue;
280
281 // Skip whitespace separating type from the target_url.
282 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
283 ++line_p;
284
285 // Look for whitespace separating the URL from subsequent ignored tokens.
286 const wchar_t* target_url_start = line_p;
287 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
288 ++line_p;
289
290 base::string16 target_url16;
291 base::WideToUTF16(target_url_start, line_p - target_url_start,
292 &target_url16);
293 GURL target_url = manifest_url.Resolve(target_url16);
294 if (!target_url.is_valid())
295 continue;
296
297 if (target_url.has_ref()) {
298 GURL::Replacements replacements;
299 replacements.ClearRef();
300 target_url = target_url.ReplaceComponents(replacements);
301 }
302 if (manifest_url.GetOrigin() != target_url.GetOrigin())
303 continue;
304
305 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
306 manifest.intercept_namespaces.push_back(
307 Namespace(APPCACHE_INTERCEPT_NAMESPACE, namespace_url,
308 target_url, is_pattern, verb == EXECUTE));
309 } else if (mode == FALLBACK) {
310 const wchar_t* line_p = line.c_str();
311 const wchar_t* line_end = line_p + line.length();
312
313 // Look for whitespace separating the two URLs
314 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
315 ++line_p;
316
317 if (line_p == line_end) {
318 // There was no whitespace separating the URLs.
319 continue;
320 }
321
322 base::string16 namespace_url16;
323 base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
324 GURL namespace_url = manifest_url.Resolve(namespace_url16);
325 if (!namespace_url.is_valid())
326 continue;
327 if (namespace_url.has_ref()) {
328 GURL::Replacements replacements;
329 replacements.ClearRef();
330 namespace_url = namespace_url.ReplaceComponents(replacements);
331 }
332
333 // Fallback namespace URL must have the same scheme, host and port
334 // as the manifest's URL.
335 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) {
336 continue;
337 }
338
339 // Skip whitespace separating fallback namespace from URL.
340 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
341 ++line_p;
342
343 // Look for whitespace separating the URL from subsequent ignored tokens.
344 const wchar_t* fallback_start = line_p;
345 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
346 ++line_p;
347
348 base::string16 fallback_url16;
349 base::WideToUTF16(fallback_start, line_p - fallback_start,
350 &fallback_url16);
351 GURL fallback_url = manifest_url.Resolve(fallback_url16);
352 if (!fallback_url.is_valid())
353 continue;
354 if (fallback_url.has_ref()) {
355 GURL::Replacements replacements;
356 replacements.ClearRef();
357 fallback_url = fallback_url.ReplaceComponents(replacements);
358 }
359
360 // Fallback entry URL must have the same scheme, host and port
361 // as the manifest's URL.
362 if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) {
363 continue;
364 }
365
366 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
367
368 // Store regardless of duplicate namespace URL. Only first match
369 // will ever be used.
370 manifest.fallback_namespaces.push_back(
371 Namespace(APPCACHE_FALLBACK_NAMESPACE, namespace_url,
372 fallback_url, is_pattern));
373 } else {
374 NOTREACHED();
375 }
376 }
377
378 return true;
379 }
380
381 } // namespace appcache
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698