OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache. | 5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache. |
6 | 6 |
7 /* | 7 /* |
8 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 8 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
9 * | 9 * |
10 * Redistribution and use in source and binary forms, with or without | 10 * Redistribution and use in source and binary forms, with or without |
(...skipping 22 matching lines...) Expand all Loading... |
33 | 33 |
34 #include "base/i18n/icu_string_conversions.h" | 34 #include "base/i18n/icu_string_conversions.h" |
35 #include "base/logging.h" | 35 #include "base/logging.h" |
36 #include "base/utf_string_conversions.h" | 36 #include "base/utf_string_conversions.h" |
37 #include "googleurl/src/gurl.h" | 37 #include "googleurl/src/gurl.h" |
38 | 38 |
39 namespace appcache { | 39 namespace appcache { |
40 | 40 |
41 enum Mode { | 41 enum Mode { |
42 EXPLICIT, | 42 EXPLICIT, |
| 43 INTERCEPT, |
43 FALLBACK, | 44 FALLBACK, |
44 ONLINE_WHITELIST, | 45 ONLINE_WHITELIST, |
45 UNKNOWN, | 46 UNKNOWN, |
46 }; | 47 }; |
47 | 48 |
48 Manifest::Manifest() : online_whitelist_all(false) {} | 49 Manifest::Manifest() : online_whitelist_all(false) {} |
49 | 50 |
50 Manifest::~Manifest() {} | 51 Manifest::~Manifest() {} |
51 | 52 |
52 bool ParseManifest(const GURL& manifest_url, const char* data, int length, | 53 bool ParseManifest(const GURL& manifest_url, const char* data, int length, |
53 Manifest& manifest) { | 54 Manifest& manifest) { |
54 // This is an implementation of the parsing algorithm specified in | 55 // This is an implementation of the parsing algorithm specified in |
55 // the HTML5 offline web application docs: | 56 // the HTML5 offline web application docs: |
56 // http://www.w3.org/TR/html5/offline.html | 57 // http://www.w3.org/TR/html5/offline.html |
57 // Do not modify it without consulting those docs. | 58 // Do not modify it without consulting those docs. |
58 // Though you might be tempted to convert these wstrings to UTF-8 or | 59 // Though you might be tempted to convert these wstrings to UTF-8 or |
59 // string16, this implementation seems simpler given the constraints. | 60 // string16, this implementation seems simpler given the constraints. |
60 | 61 |
61 const wchar_t kSignature[] = L"CACHE MANIFEST"; | 62 const wchar_t kSignature[] = L"CACHE MANIFEST"; |
62 const size_t kSignatureLength = arraysize(kSignature) - 1; | 63 const size_t kSignatureLength = arraysize(kSignature) - 1; |
| 64 const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST"; |
| 65 const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1; |
63 | 66 |
64 DCHECK(manifest.explicit_urls.empty()); | 67 DCHECK(manifest.explicit_urls.empty()); |
65 DCHECK(manifest.fallback_namespaces.empty()); | 68 DCHECK(manifest.fallback_namespaces.empty()); |
66 DCHECK(manifest.online_whitelist_namespaces.empty()); | 69 DCHECK(manifest.online_whitelist_namespaces.empty()); |
67 DCHECK(!manifest.online_whitelist_all); | 70 DCHECK(!manifest.online_whitelist_all); |
68 | 71 |
69 Mode mode = EXPLICIT; | 72 Mode mode = EXPLICIT; |
70 | 73 |
71 std::wstring data_string; | 74 std::wstring data_string; |
72 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string); | 75 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string); |
73 // until UTF8ToWide uses 0xFFFD Unicode replacement character. | 76 // until UTF8ToWide uses 0xFFFD Unicode replacement character. |
74 base::CodepageToWide(std::string(data, length), base::kCodepageUTF8, | 77 base::CodepageToWide(std::string(data, length), base::kCodepageUTF8, |
75 base::OnStringConversionError::SUBSTITUTE, &data_string); | 78 base::OnStringConversionError::SUBSTITUTE, &data_string); |
76 const wchar_t* p = data_string.c_str(); | 79 const wchar_t* p = data_string.c_str(); |
77 const wchar_t* end = p + data_string.length(); | 80 const wchar_t* end = p + data_string.length(); |
78 | 81 |
79 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" | 82 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" |
80 // Example: "CACHE MANIFEST #comment" is a valid signature. | 83 // Example: "CACHE MANIFEST #comment" is a valid signature. |
81 // Example: "CACHE MANIFEST;V2" is not. | 84 // Example: "CACHE MANIFEST;V2" is not. |
82 | 85 |
83 // When the input data starts with a UTF-8 Byte-Order-Mark | 86 // When the input data starts with a UTF-8 Byte-Order-Mark |
84 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a | 87 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a |
85 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists. | 88 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists. |
86 int bom_offset = 0; | 89 int bom_offset = 0; |
87 if (!data_string.empty() && data_string[0] == 0xFEFF) { | 90 if (!data_string.empty() && data_string[0] == 0xFEFF) { |
88 bom_offset = 1; | 91 bom_offset = 1; |
89 ++p; | 92 ++p; |
90 } | 93 } |
91 | 94 |
92 if (p >= end || | 95 if (p >= end) |
93 data_string.compare(bom_offset, kSignatureLength, kSignature)) { | 96 return false; |
| 97 |
| 98 // Check for a supported signature and skip p past it. |
| 99 if (0 == data_string.compare(bom_offset, kSignatureLength, |
| 100 kSignature)) { |
| 101 p += kSignatureLength; |
| 102 } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength, |
| 103 kChromiumSignature)) { |
| 104 p += kChromiumSignatureLength; |
| 105 } else { |
94 return false; | 106 return false; |
95 } | 107 } |
96 | 108 |
97 p += kSignatureLength; // Skip past "CACHE MANIFEST" | |
98 | |
99 // Character after "CACHE MANIFEST" must be whitespace. | 109 // Character after "CACHE MANIFEST" must be whitespace. |
100 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') | 110 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') |
101 return false; | 111 return false; |
102 | 112 |
103 // Skip to the end of the line. | 113 // Skip to the end of the line. |
104 while (p < end && *p != '\r' && *p != '\n') | 114 while (p < end && *p != '\r' && *p != '\n') |
105 ++p; | 115 ++p; |
106 | 116 |
107 while (1) { | 117 while (1) { |
108 // Skip whitespace | 118 // Skip whitespace |
(...skipping 19 matching lines...) Expand all Loading... |
128 --tmp; | 138 --tmp; |
129 | 139 |
130 std::wstring line(line_start, tmp - line_start + 1); | 140 std::wstring line(line_start, tmp - line_start + 1); |
131 | 141 |
132 if (line == L"CACHE:") { | 142 if (line == L"CACHE:") { |
133 mode = EXPLICIT; | 143 mode = EXPLICIT; |
134 } else if (line == L"FALLBACK:") { | 144 } else if (line == L"FALLBACK:") { |
135 mode = FALLBACK; | 145 mode = FALLBACK; |
136 } else if (line == L"NETWORK:") { | 146 } else if (line == L"NETWORK:") { |
137 mode = ONLINE_WHITELIST; | 147 mode = ONLINE_WHITELIST; |
| 148 } else if (line == L"CHROMIUM-INTERCEPT:") { |
| 149 mode = INTERCEPT; |
138 } else if (*(line.end() - 1) == ':') { | 150 } else if (*(line.end() - 1) == ':') { |
139 mode = UNKNOWN; | 151 mode = UNKNOWN; |
140 } else if (mode == UNKNOWN) { | 152 } else if (mode == UNKNOWN) { |
141 continue; | 153 continue; |
142 } else if (line == L"*" && mode == ONLINE_WHITELIST) { | 154 } else if (line == L"*" && mode == ONLINE_WHITELIST) { |
143 manifest.online_whitelist_all = true; | 155 manifest.online_whitelist_all = true; |
144 continue; | 156 continue; |
145 } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) { | 157 } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) { |
146 const wchar_t *line_p = line.c_str(); | 158 const wchar_t *line_p = line.c_str(); |
147 const wchar_t *line_end = line_p + line.length(); | 159 const wchar_t *line_end = line_p + line.length(); |
(...skipping 24 matching lines...) Expand all Loading... |
172 // Per the spec, EXPLICIT cross-origin HTTS resources should be | 184 // Per the spec, EXPLICIT cross-origin HTTS resources should be |
173 // ignored here. We've opted for a milder constraint and allow | 185 // ignored here. We've opted for a milder constraint and allow |
174 // caching unless the resource has a "no-store" header. That | 186 // caching unless the resource has a "no-store" header. That |
175 // condition is enforced in AppCacheUpdateJob. | 187 // condition is enforced in AppCacheUpdateJob. |
176 | 188 |
177 if (mode == EXPLICIT) { | 189 if (mode == EXPLICIT) { |
178 manifest.explicit_urls.insert(url.spec()); | 190 manifest.explicit_urls.insert(url.spec()); |
179 } else { | 191 } else { |
180 manifest.online_whitelist_namespaces.push_back(url); | 192 manifest.online_whitelist_namespaces.push_back(url); |
181 } | 193 } |
| 194 } else if (mode == INTERCEPT) { |
| 195 // Lines of the form, |
| 196 // <urlnamespace> <intercept_type> <targeturl> |
| 197 const wchar_t* line_p = line.c_str(); |
| 198 const wchar_t* line_end = line_p + line.length(); |
| 199 |
| 200 // Look for first whitespace separating the url namespace from |
| 201 // the intercept type. |
| 202 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') |
| 203 ++line_p; |
| 204 |
| 205 if (line_p == line_end) |
| 206 continue; // There was no whitespace separating the URLs. |
| 207 |
| 208 string16 namespace_url16; |
| 209 WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16); |
| 210 GURL namespace_url = manifest_url.Resolve(namespace_url16); |
| 211 if (!namespace_url.is_valid()) |
| 212 continue; |
| 213 if (namespace_url.has_ref()) { |
| 214 GURL::Replacements replacements; |
| 215 replacements.ClearRef(); |
| 216 namespace_url = namespace_url.ReplaceComponents(replacements); |
| 217 } |
| 218 |
| 219 // The namespace URL must have the same scheme, host and port |
| 220 // as the manifest's URL. |
| 221 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) |
| 222 continue; |
| 223 |
| 224 // Skip whitespace separating namespace from the type. |
| 225 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) |
| 226 ++line_p; |
| 227 |
| 228 // Look for whitespace separating the type from the target url. |
| 229 const wchar_t* type_start = line_p; |
| 230 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') |
| 231 ++line_p; |
| 232 |
| 233 // Look for a type value we understand, otherwise skip the line. |
| 234 std::wstring type(type_start, line_p - type_start); |
| 235 if (type != L"return") |
| 236 continue; |
| 237 |
| 238 // Skip whitespace separating type from the target_url. |
| 239 while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) |
| 240 ++line_p; |
| 241 |
| 242 // Look for whitespace separating the URL from subsequent ignored tokens. |
| 243 const wchar_t* target_url_start = line_p; |
| 244 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') |
| 245 ++line_p; |
| 246 |
| 247 string16 target_url16; |
| 248 WideToUTF16(target_url_start, line_p - target_url_start, &target_url16); |
| 249 GURL target_url = manifest_url.Resolve(target_url16); |
| 250 if (!target_url.is_valid()) |
| 251 continue; |
| 252 |
| 253 if (target_url.has_ref()) { |
| 254 GURL::Replacements replacements; |
| 255 replacements.ClearRef(); |
| 256 target_url = target_url.ReplaceComponents(replacements); |
| 257 } |
| 258 if (manifest_url.GetOrigin() != target_url.GetOrigin()) |
| 259 continue; |
| 260 |
| 261 manifest.intercept_namespaces.push_back( |
| 262 Namespace(INTERCEPT_NAMESPACE, namespace_url, target_url)); |
182 } else if (mode == FALLBACK) { | 263 } else if (mode == FALLBACK) { |
183 const wchar_t* line_p = line.c_str(); | 264 const wchar_t* line_p = line.c_str(); |
184 const wchar_t* line_end = line_p + line.length(); | 265 const wchar_t* line_end = line_p + line.length(); |
185 | 266 |
186 // Look for whitespace separating the two URLs | 267 // Look for whitespace separating the two URLs |
187 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') | 268 while (line_p < line_end && *line_p != '\t' && *line_p != ' ') |
188 ++line_p; | 269 ++line_p; |
189 | 270 |
190 if (line_p == line_end) { | 271 if (line_p == line_end) { |
191 // There was no whitespace separating the URLs. | 272 // There was no whitespace separating the URLs. |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
231 | 312 |
232 // Fallback entry URL must have the same scheme, host and port | 313 // Fallback entry URL must have the same scheme, host and port |
233 // as the manifest's URL. | 314 // as the manifest's URL. |
234 if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) { | 315 if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) { |
235 continue; | 316 continue; |
236 } | 317 } |
237 | 318 |
238 // Store regardless of duplicate namespace URL. Only first match | 319 // Store regardless of duplicate namespace URL. Only first match |
239 // will ever be used. | 320 // will ever be used. |
240 manifest.fallback_namespaces.push_back( | 321 manifest.fallback_namespaces.push_back( |
241 FallbackNamespace(namespace_url, fallback_url)); | 322 Namespace(FALLBACK_NAMESPACE, namespace_url, fallback_url)); |
242 } else { | 323 } else { |
243 NOTREACHED(); | 324 NOTREACHED(); |
244 } | 325 } |
245 } | 326 } |
246 | 327 |
247 return true; | 328 return true; |
248 } | 329 } |
249 | 330 |
250 } // namespace appcache | 331 } // namespace appcache |
OLD | NEW |