OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 for (Iter it = a_begin; it != a_end; ++it, ++b) { | 51 for (Iter it = a_begin; it != a_end; ++it, ++b) { |
52 if (!*b || ToLowerASCII(*it) != *b) | 52 if (!*b || ToLowerASCII(*it) != *b) |
53 return false; | 53 return false; |
54 } | 54 } |
55 return *b == 0; | 55 return *b == 0; |
56 } | 56 } |
57 | 57 |
58 const char kFileScheme[] = "file"; // Used in a number of places. | 58 const char kFileScheme[] = "file"; // Used in a number of places. |
59 const char kMailtoScheme[] = "mailto"; | 59 const char kMailtoScheme[] = "mailto"; |
60 | 60 |
61 const int kNumStandardURLSchemes = 5; | 61 const int kNumStandardURLSchemes = 7; |
62 const char* kStandardURLSchemes[kNumStandardURLSchemes] = { | 62 const char* kStandardURLSchemes[kNumStandardURLSchemes] = { |
63 "http", | 63 "http", |
64 "https", | 64 "https", |
65 kFileScheme, // Yes, file urls can have a hostname! | 65 kFileScheme, // Yes, file urls can have a hostname! |
66 "ftp", | 66 "ftp", |
67 "gopher", | 67 "gopher", |
| 68 "ws", // WebSocket. |
| 69 "wss", // WebSocket secure. |
68 }; | 70 }; |
69 | 71 |
70 // List of the currently installed standard schemes. This list is lazily | 72 // List of the currently installed standard schemes. This list is lazily |
71 // initialized by InitStandardSchemes and is leaked on shutdown to prevent | 73 // initialized by InitStandardSchemes and is leaked on shutdown to prevent |
72 // any destructors from being called that will slow us down or cause problems. | 74 // any destructors from being called that will slow us down or cause problems. |
73 std::vector<const char*>* standard_schemes = NULL; | 75 std::vector<const char*>* standard_schemes = NULL; |
74 | 76 |
75 // Ensures that the standard_schemes list is initialized, does nothing if it | 77 // Ensures that the standard_schemes list is initialized, does nothing if it |
76 // already has values. | 78 // already has values. |
77 void InitStandardSchemes() { | 79 void InitStandardSchemes() { |
(...skipping 11 matching lines...) Expand all Loading... |
89 const url_parse::Component& component, | 91 const url_parse::Component& component, |
90 const char* compare_to) { | 92 const char* compare_to) { |
91 if (!component.is_nonempty()) | 93 if (!component.is_nonempty()) |
92 return compare_to[0] == 0; // When component is empty, match empty scheme. | 94 return compare_to[0] == 0; // When component is empty, match empty scheme. |
93 return LowerCaseEqualsASCII(&spec[component.begin], | 95 return LowerCaseEqualsASCII(&spec[component.begin], |
94 &spec[component.end()], | 96 &spec[component.end()], |
95 compare_to); | 97 compare_to); |
96 } | 98 } |
97 | 99 |
98 // Returns true if the given scheme identified by |scheme| within |spec| is one | 100 // Returns true if the given scheme identified by |scheme| within |spec| is one |
99 // of the registered "standard" schemes. Note that this does not check for | 101 // of the registered "standard" schemes. |
100 // "://", use IsStandard for that. | |
101 template<typename CHAR> | 102 template<typename CHAR> |
102 bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) { | 103 bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) { |
103 if (!scheme.is_nonempty()) | 104 if (!scheme.is_nonempty()) |
104 return false; // Empty or invalid schemes are non-standard. | 105 return false; // Empty or invalid schemes are non-standard. |
105 | 106 |
106 InitStandardSchemes(); | 107 InitStandardSchemes(); |
107 for (size_t i = 0; i < standard_schemes->size(); i++) { | 108 for (size_t i = 0; i < standard_schemes->size(); i++) { |
108 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], | 109 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], |
109 standard_schemes->at(i))) | 110 standard_schemes->at(i))) |
110 return true; | 111 return true; |
111 } | 112 } |
112 return false; | 113 return false; |
113 } | 114 } |
114 | 115 |
115 // Returns true if the stuff following the scheme in the given spec indicates | |
116 // a "standard" URL. The presence of "://" after the scheme indicates that | |
117 // there is a hostname, etc. which we call a standard URL. | |
118 template<typename CHAR> | |
119 bool HasStandardSchemeSeparator(const CHAR* spec, int spec_len, | |
120 const url_parse::Component& scheme) { | |
121 int after_scheme = scheme.end(); | |
122 if (spec_len < after_scheme + 3) | |
123 return false; | |
124 return spec[after_scheme] == ':' && | |
125 spec[after_scheme + 1] == '/' && | |
126 spec[after_scheme + 2] == '/'; | |
127 } | |
128 | |
129 template<typename CHAR> | |
130 bool DoIsStandard(const CHAR* spec, int spec_len, | |
131 const url_parse::Component& scheme) { | |
132 return HasStandardSchemeSeparator(spec, spec_len, scheme) || | |
133 IsStandardScheme(spec, scheme); | |
134 } | |
135 | |
136 template<typename CHAR> | 116 template<typename CHAR> |
137 bool DoFindAndCompareScheme(const CHAR* str, | 117 bool DoFindAndCompareScheme(const CHAR* str, |
138 int str_len, | 118 int str_len, |
139 const char* compare, | 119 const char* compare, |
140 url_parse::Component* found_scheme) { | 120 url_parse::Component* found_scheme) { |
141 url_parse::Component our_scheme; | 121 url_parse::Component our_scheme; |
142 if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) { | 122 if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) { |
143 // No scheme. | 123 // No scheme. |
144 if (found_scheme) | 124 if (found_scheme) |
145 *found_scheme = url_parse::Component(); | 125 *found_scheme = url_parse::Component(); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || | 157 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || |
178 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { | 158 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { |
179 url_parse::ParseFileURL(spec, spec_len, &parsed_input); | 159 url_parse::ParseFileURL(spec, spec_len, &parsed_input); |
180 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, | 160 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, |
181 charset_converter, | 161 charset_converter, |
182 output, output_parsed); | 162 output, output_parsed); |
183 } | 163 } |
184 #endif | 164 #endif |
185 | 165 |
186 url_parse::Component scheme; | 166 url_parse::Component scheme; |
187 if(!url_parse::ExtractScheme(spec, spec_len, &scheme)) | 167 if (!url_parse::ExtractScheme(spec, spec_len, &scheme)) |
188 return false; | 168 return false; |
189 | 169 |
190 // This is the parsed version of the input URL, we have to canonicalize it | 170 // This is the parsed version of the input URL, we have to canonicalize it |
191 // before storing it in our object. | 171 // before storing it in our object. |
192 bool success; | 172 bool success; |
193 if (CompareSchemeComponent(spec, scheme, kFileScheme)) { | 173 if (CompareSchemeComponent(spec, scheme, kFileScheme)) { |
194 // File URLs are special. | 174 // File URLs are special. |
195 url_parse::ParseFileURL(spec, spec_len, &parsed_input); | 175 url_parse::ParseFileURL(spec, spec_len, &parsed_input); |
196 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, | 176 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, |
197 charset_converter, | 177 charset_converter, |
198 output, output_parsed); | 178 output, output_parsed); |
199 | 179 |
200 } else if (IsStandard(spec, spec_len, scheme)) { | 180 } else if (DoIsStandard(spec, scheme)) { |
201 // All "normal" URLs. | 181 // All "normal" URLs. |
202 url_parse::ParseStandardURL(spec, spec_len, &parsed_input); | 182 url_parse::ParseStandardURL(spec, spec_len, &parsed_input); |
203 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, | 183 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, |
204 charset_converter, | 184 charset_converter, |
205 output, output_parsed); | 185 output, output_parsed); |
206 | 186 |
207 } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) { | 187 } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) { |
208 // Mailto are treated like a standard url with only a scheme, path, query | 188 // Mailto are treated like a standard url with only a scheme, path, query |
209 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); | 189 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); |
210 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, | 190 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, |
(...skipping 21 matching lines...) Expand all Loading... |
232 // copying to the new buffer. | 212 // copying to the new buffer. |
233 url_canon::RawCanonOutputT<CHAR> whitespace_buffer; | 213 url_canon::RawCanonOutputT<CHAR> whitespace_buffer; |
234 int relative_length; | 214 int relative_length; |
235 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, | 215 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, |
236 &whitespace_buffer, | 216 &whitespace_buffer, |
237 &relative_length); | 217 &relative_length); |
238 | 218 |
239 // See if our base URL should be treated as "standard". | 219 // See if our base URL should be treated as "standard". |
240 bool standard_base_scheme = | 220 bool standard_base_scheme = |
241 base_parsed.scheme.is_nonempty() && | 221 base_parsed.scheme.is_nonempty() && |
242 IsStandard(base_spec, base_spec_len, base_parsed.scheme); | 222 DoIsStandard(base_spec, base_parsed.scheme); |
243 | 223 |
244 bool is_relative; | 224 bool is_relative; |
245 url_parse::Component relative_component; | 225 url_parse::Component relative_component; |
246 if (!url_canon::IsRelativeURL(base_spec, base_parsed, | 226 if (!url_canon::IsRelativeURL(base_spec, base_parsed, |
247 relative, relative_length, | 227 relative, relative_length, |
248 standard_base_scheme, | 228 standard_base_scheme, |
249 &is_relative, | 229 &is_relative, |
250 &relative_component)) { | 230 &relative_component)) { |
251 // Error resolving. | 231 // Error resolving. |
252 return false; | 232 return false; |
(...skipping 15 matching lines...) Expand all Loading... |
268 } | 248 } |
269 | 249 |
270 template<typename CHAR> | 250 template<typename CHAR> |
271 bool DoReplaceComponents(const char* spec, | 251 bool DoReplaceComponents(const char* spec, |
272 int spec_len, | 252 int spec_len, |
273 const url_parse::Parsed& parsed, | 253 const url_parse::Parsed& parsed, |
274 const url_canon::Replacements<CHAR>& replacements, | 254 const url_canon::Replacements<CHAR>& replacements, |
275 url_canon::CharsetConverter* charset_converter, | 255 url_canon::CharsetConverter* charset_converter, |
276 url_canon::CanonOutput* output, | 256 url_canon::CanonOutput* output, |
277 url_parse::Parsed* out_parsed) { | 257 url_parse::Parsed* out_parsed) { |
278 // Note that we dispatch to the parser according the the scheme type of | 258 // If the scheme is overridden, just do a simple string substitution and |
279 // the OUTPUT URL. Normally, this is the same as our scheme, but if the | 259 // reparse the whole thing. There are lots of edge cases that we really don't |
280 // scheme is being overridden, we need to test that. | 260 // want to deal with. Like what happens if I replace "http://e:8080/foo" |
| 261 // with a file. Does it become "file:///E:/8080/foo" where the port number |
| 262 // becomes part of the path? Parsing that string as a file URL says "yes" |
| 263 // but almost no sane rule for dealing with the components individually would |
| 264 // come up with that. |
| 265 // |
| 266 // Why allow these crazy cases at all? Programatically, there is almost no |
| 267 // case for replacing the scheme. The most common case for hitting this is |
| 268 // in JS when building up a URL using the location object. In this case, the |
| 269 // JS code expects the string substitution behavior: |
| 270 // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3 |
| 271 if (replacements.IsSchemeOverridden()) { |
| 272 // Canonicalize the new scheme so it is 8-bit and can be concatenated with |
| 273 // the existing spec. |
| 274 url_canon::RawCanonOutput<128> scheme_replaced; |
| 275 url_parse::Component scheme_replaced_parsed; |
| 276 url_canon::CanonicalizeScheme( |
| 277 replacements.sources().scheme, |
| 278 replacements.components().scheme, |
| 279 &scheme_replaced, &scheme_replaced_parsed); |
281 | 280 |
282 if (// Either the scheme is not replaced and the old one is a file, | 281 // We can assume that the input is canonicalized, which means it always has |
283 (!replacements.IsSchemeOverridden() && | 282 // a colon after the scheme (or where the scheme would be). |
284 CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) || | 283 int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1 |
285 // ...or it is being replaced and the new one is a file. | 284 : 1; |
286 (replacements.IsSchemeOverridden() && | 285 if (spec_len - spec_after_colon > 0) { |
287 CompareSchemeComponent(replacements.sources().scheme, | 286 scheme_replaced.Append(&spec[spec_after_colon], |
288 replacements.components().scheme, | 287 spec_len - spec_after_colon); |
289 kFileScheme))) { | 288 } |
| 289 |
| 290 // We now need to completely re-parse the resulting string since its meaning |
| 291 // may have changed with the different scheme. |
| 292 url_canon::RawCanonOutput<128> recanonicalized; |
| 293 url_parse::Parsed recanonicalized_parsed; |
| 294 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), |
| 295 charset_converter, |
| 296 &recanonicalized, &recanonicalized_parsed); |
| 297 |
| 298 // Recurse using the version with the scheme already replaced. This will now |
| 299 // use the replacement rules for the new scheme. |
| 300 // |
| 301 // Warning: this code assumes that ReplaceComponents will re-check all |
| 302 // components for validity. This is because we can't fail if DoCanonicalize |
| 303 // failed above since theoretically the thing making it fail could be |
| 304 // getting replaced here. If ReplaceComponents didn't re-check everything, |
| 305 // we wouldn't know if something *not* getting replaced is a problem. |
| 306 // If the scheme-specific replacers are made more intelligent so they don't |
| 307 // re-check everything, we should instead recanonicalize the whole thing |
| 308 // after this call to check validity (this assumes replacing the scheme is |
| 309 // much much less common than other types of replacements, like clearing the |
| 310 // ref). |
| 311 url_canon::Replacements<CHAR> replacements_no_scheme = replacements; |
| 312 replacements_no_scheme.SetScheme(NULL, url_parse::Component()); |
| 313 return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(), |
| 314 recanonicalized_parsed, replacements_no_scheme, |
| 315 charset_converter, output, out_parsed); |
| 316 } |
| 317 |
| 318 // If we get here, then we know the scheme doesn't need to be replaced, so can |
| 319 // just key off the scheme in the spec to know how to do the replacements. |
| 320 if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) { |
290 return url_canon::ReplaceFileURL(spec, parsed, replacements, | 321 return url_canon::ReplaceFileURL(spec, parsed, replacements, |
291 charset_converter, output, out_parsed); | 322 charset_converter, output, out_parsed); |
292 } | 323 } |
293 | 324 if (DoIsStandard(spec, parsed.scheme)) { |
294 if (// Either the scheme is not replaced and the old one is standard, | |
295 (!replacements.IsSchemeOverridden() && | |
296 IsStandard(spec, spec_len, parsed.scheme)) || | |
297 // ...or it is being replaced and the new one is standard. | |
298 (replacements.IsSchemeOverridden() && | |
299 IsStandardScheme(replacements.sources().scheme, | |
300 replacements.components().scheme))) { | |
301 // Standard URL with all parts. | |
302 return url_canon::ReplaceStandardURL(spec, parsed, replacements, | 325 return url_canon::ReplaceStandardURL(spec, parsed, replacements, |
303 charset_converter, output, out_parsed); | 326 charset_converter, output, out_parsed); |
304 } | 327 } |
305 | 328 if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) { |
306 if (// Either the scheme is not replaced and the old one is mailto, | |
307 (!replacements.IsSchemeOverridden() && | |
308 CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) || | |
309 // ...or it is being replaced and the new one is a mailto. | |
310 (replacements.IsSchemeOverridden() && | |
311 CompareSchemeComponent(replacements.sources().scheme, | |
312 replacements.components().scheme, | |
313 kMailtoScheme))) { | |
314 return url_canon::ReplaceMailtoURL(spec, parsed, replacements, | 329 return url_canon::ReplaceMailtoURL(spec, parsed, replacements, |
315 output, out_parsed); | 330 output, out_parsed); |
316 } | 331 } |
317 | 332 |
| 333 // Default is a path URL. |
318 return url_canon::ReplacePathURL(spec, parsed, replacements, | 334 return url_canon::ReplacePathURL(spec, parsed, replacements, |
319 output, out_parsed); | 335 output, out_parsed); |
320 } | 336 } |
321 | 337 |
322 } // namespace | 338 } // namespace |
323 | 339 |
324 void AddStandardScheme(const char* new_scheme) { | 340 void AddStandardScheme(const char* new_scheme) { |
325 size_t scheme_len = strlen(new_scheme); | 341 size_t scheme_len = strlen(new_scheme); |
326 if (scheme_len == 0) | 342 if (scheme_len == 0) |
327 return; | 343 return; |
328 | 344 |
329 // Dulicate the scheme into a new buffer and add it to the list of standard | 345 // Dulicate the scheme into a new buffer and add it to the list of standard |
330 // schemes. This pointer will be leaked on shutdown. | 346 // schemes. This pointer will be leaked on shutdown. |
331 char* dup_scheme = new char[scheme_len + 1]; | 347 char* dup_scheme = new char[scheme_len + 1]; |
332 memcpy(dup_scheme, new_scheme, scheme_len + 1); | 348 memcpy(dup_scheme, new_scheme, scheme_len + 1); |
333 | 349 |
334 InitStandardSchemes(); | 350 InitStandardSchemes(); |
335 standard_schemes->push_back(dup_scheme); | 351 standard_schemes->push_back(dup_scheme); |
336 } | 352 } |
337 | 353 |
338 bool IsStandard(const char* spec, int spec_len, | 354 bool IsStandard(const char* spec, const url_parse::Component& scheme) { |
339 const url_parse::Component& scheme) { | 355 return DoIsStandard(spec, scheme); |
340 return DoIsStandard(spec, spec_len, scheme); | |
341 } | 356 } |
342 | 357 |
343 bool IsStandard(const char16* spec, int spec_len, | 358 bool IsStandard(const char16* spec, const url_parse::Component& scheme) { |
344 const url_parse::Component& scheme) { | 359 return DoIsStandard(spec, scheme); |
345 return DoIsStandard(spec, spec_len, scheme); | |
346 } | 360 } |
347 | 361 |
348 bool FindAndCompareScheme(const char* str, | 362 bool FindAndCompareScheme(const char* str, |
349 int str_len, | 363 int str_len, |
350 const char* compare, | 364 const char* compare, |
351 url_parse::Component* found_scheme) { | 365 url_parse::Component* found_scheme) { |
352 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); | 366 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
353 } | 367 } |
354 | 368 |
355 bool FindAndCompareScheme(const char16* str, | 369 bool FindAndCompareScheme(const char16* str, |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
444 return a_begin == a_end && b_begin == b_end; | 458 return a_begin == a_end && b_begin == b_end; |
445 } | 459 } |
446 | 460 |
447 bool LowerCaseEqualsASCII(const char16* a_begin, | 461 bool LowerCaseEqualsASCII(const char16* a_begin, |
448 const char16* a_end, | 462 const char16* a_end, |
449 const char* b) { | 463 const char* b) { |
450 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | 464 return DoLowerCaseEqualsASCII(a_begin, a_end, b); |
451 } | 465 } |
452 | 466 |
453 } // namespace url_util | 467 } // namespace url_util |
OLD | NEW |