| OLD | NEW |
| 1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
| 2 // All rights reserved. | 2 // All rights reserved. |
| 3 // | 3 // |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
| 6 // met: | 6 // met: |
| 7 // | 7 // |
| 8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 51 for (Iter it = a_begin; it != a_end; ++it, ++b) { | 51 for (Iter it = a_begin; it != a_end; ++it, ++b) { |
| 52 if (!*b || ToLowerASCII(*it) != *b) | 52 if (!*b || ToLowerASCII(*it) != *b) |
| 53 return false; | 53 return false; |
| 54 } | 54 } |
| 55 return *b == 0; | 55 return *b == 0; |
| 56 } | 56 } |
| 57 | 57 |
| 58 const char kFileScheme[] = "file"; // Used in a number of places. | 58 const char kFileScheme[] = "file"; // Used in a number of places. |
| 59 const char kMailtoScheme[] = "mailto"; | 59 const char kMailtoScheme[] = "mailto"; |
| 60 | 60 |
| 61 const int kNumStandardURLSchemes = 5; | 61 const int kNumStandardURLSchemes = 7; |
| 62 const char* kStandardURLSchemes[kNumStandardURLSchemes] = { | 62 const char* kStandardURLSchemes[kNumStandardURLSchemes] = { |
| 63 "http", | 63 "http", |
| 64 "https", | 64 "https", |
| 65 kFileScheme, // Yes, file urls can have a hostname! | 65 kFileScheme, // Yes, file urls can have a hostname! |
| 66 "ftp", | 66 "ftp", |
| 67 "gopher", | 67 "gopher", |
| 68 "ws", // WebSocket. |
| 69 "wss", // WebSocket secure. |
| 68 }; | 70 }; |
| 69 | 71 |
| 70 // List of the currently installed standard schemes. This list is lazily | 72 // List of the currently installed standard schemes. This list is lazily |
| 71 // initialized by InitStandardSchemes and is leaked on shutdown to prevent | 73 // initialized by InitStandardSchemes and is leaked on shutdown to prevent |
| 72 // any destructors from being called that will slow us down or cause problems. | 74 // any destructors from being called that will slow us down or cause problems. |
| 73 std::vector<const char*>* standard_schemes = NULL; | 75 std::vector<const char*>* standard_schemes = NULL; |
| 74 | 76 |
| 75 // Ensures that the standard_schemes list is initialized, does nothing if it | 77 // Ensures that the standard_schemes list is initialized, does nothing if it |
| 76 // already has values. | 78 // already has values. |
| 77 void InitStandardSchemes() { | 79 void InitStandardSchemes() { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 89 const url_parse::Component& component, | 91 const url_parse::Component& component, |
| 90 const char* compare_to) { | 92 const char* compare_to) { |
| 91 if (!component.is_nonempty()) | 93 if (!component.is_nonempty()) |
| 92 return compare_to[0] == 0; // When component is empty, match empty scheme. | 94 return compare_to[0] == 0; // When component is empty, match empty scheme. |
| 93 return LowerCaseEqualsASCII(&spec[component.begin], | 95 return LowerCaseEqualsASCII(&spec[component.begin], |
| 94 &spec[component.end()], | 96 &spec[component.end()], |
| 95 compare_to); | 97 compare_to); |
| 96 } | 98 } |
| 97 | 99 |
| 98 // Returns true if the given scheme identified by |scheme| within |spec| is one | 100 // Returns true if the given scheme identified by |scheme| within |spec| is one |
| 99 // of the registered "standard" schemes. Note that this does not check for | 101 // of the registered "standard" schemes. |
| 100 // "://", use IsStandard for that. | |
| 101 template<typename CHAR> | 102 template<typename CHAR> |
| 102 bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) { | 103 bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) { |
| 103 if (!scheme.is_nonempty()) | 104 if (!scheme.is_nonempty()) |
| 104 return false; // Empty or invalid schemes are non-standard. | 105 return false; // Empty or invalid schemes are non-standard. |
| 105 | 106 |
| 106 InitStandardSchemes(); | 107 InitStandardSchemes(); |
| 107 for (size_t i = 0; i < standard_schemes->size(); i++) { | 108 for (size_t i = 0; i < standard_schemes->size(); i++) { |
| 108 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], | 109 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], |
| 109 standard_schemes->at(i))) | 110 standard_schemes->at(i))) |
| 110 return true; | 111 return true; |
| 111 } | 112 } |
| 112 return false; | 113 return false; |
| 113 } | 114 } |
| 114 | 115 |
| 115 // Returns true if the stuff following the scheme in the given spec indicates | |
| 116 // a "standard" URL. The presence of "://" after the scheme indicates that | |
| 117 // there is a hostname, etc. which we call a standard URL. | |
| 118 template<typename CHAR> | |
| 119 bool HasStandardSchemeSeparator(const CHAR* spec, int spec_len, | |
| 120 const url_parse::Component& scheme) { | |
| 121 int after_scheme = scheme.end(); | |
| 122 if (spec_len < after_scheme + 3) | |
| 123 return false; | |
| 124 return spec[after_scheme] == ':' && | |
| 125 spec[after_scheme + 1] == '/' && | |
| 126 spec[after_scheme + 2] == '/'; | |
| 127 } | |
| 128 | |
| 129 template<typename CHAR> | |
| 130 bool DoIsStandard(const CHAR* spec, int spec_len, | |
| 131 const url_parse::Component& scheme) { | |
| 132 return HasStandardSchemeSeparator(spec, spec_len, scheme) || | |
| 133 IsStandardScheme(spec, scheme); | |
| 134 } | |
| 135 | |
| 136 template<typename CHAR> | 116 template<typename CHAR> |
| 137 bool DoFindAndCompareScheme(const CHAR* str, | 117 bool DoFindAndCompareScheme(const CHAR* str, |
| 138 int str_len, | 118 int str_len, |
| 139 const char* compare, | 119 const char* compare, |
| 140 url_parse::Component* found_scheme) { | 120 url_parse::Component* found_scheme) { |
| 141 url_parse::Component our_scheme; | 121 url_parse::Component our_scheme; |
| 142 if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) { | 122 if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) { |
| 143 // No scheme. | 123 // No scheme. |
| 144 if (found_scheme) | 124 if (found_scheme) |
| 145 *found_scheme = url_parse::Component(); | 125 *found_scheme = url_parse::Component(); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 177 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || | 157 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || |
| 178 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { | 158 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { |
| 179 url_parse::ParseFileURL(spec, spec_len, &parsed_input); | 159 url_parse::ParseFileURL(spec, spec_len, &parsed_input); |
| 180 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, | 160 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, |
| 181 charset_converter, | 161 charset_converter, |
| 182 output, output_parsed); | 162 output, output_parsed); |
| 183 } | 163 } |
| 184 #endif | 164 #endif |
| 185 | 165 |
| 186 url_parse::Component scheme; | 166 url_parse::Component scheme; |
| 187 if(!url_parse::ExtractScheme(spec, spec_len, &scheme)) | 167 if (!url_parse::ExtractScheme(spec, spec_len, &scheme)) |
| 188 return false; | 168 return false; |
| 189 | 169 |
| 190 // This is the parsed version of the input URL, we have to canonicalize it | 170 // This is the parsed version of the input URL, we have to canonicalize it |
| 191 // before storing it in our object. | 171 // before storing it in our object. |
| 192 bool success; | 172 bool success; |
| 193 if (CompareSchemeComponent(spec, scheme, kFileScheme)) { | 173 if (CompareSchemeComponent(spec, scheme, kFileScheme)) { |
| 194 // File URLs are special. | 174 // File URLs are special. |
| 195 url_parse::ParseFileURL(spec, spec_len, &parsed_input); | 175 url_parse::ParseFileURL(spec, spec_len, &parsed_input); |
| 196 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, | 176 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, |
| 197 charset_converter, | 177 charset_converter, |
| 198 output, output_parsed); | 178 output, output_parsed); |
| 199 | 179 |
| 200 } else if (IsStandard(spec, spec_len, scheme)) { | 180 } else if (DoIsStandard(spec, scheme)) { |
| 201 // All "normal" URLs. | 181 // All "normal" URLs. |
| 202 url_parse::ParseStandardURL(spec, spec_len, &parsed_input); | 182 url_parse::ParseStandardURL(spec, spec_len, &parsed_input); |
| 203 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, | 183 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, |
| 204 charset_converter, | 184 charset_converter, |
| 205 output, output_parsed); | 185 output, output_parsed); |
| 206 | 186 |
| 207 } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) { | 187 } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) { |
| 208 // Mailto are treated like a standard url with only a scheme, path, query | 188 // Mailto are treated like a standard url with only a scheme, path, query |
| 209 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); | 189 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); |
| 210 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, | 190 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 232 // copying to the new buffer. | 212 // copying to the new buffer. |
| 233 url_canon::RawCanonOutputT<CHAR> whitespace_buffer; | 213 url_canon::RawCanonOutputT<CHAR> whitespace_buffer; |
| 234 int relative_length; | 214 int relative_length; |
| 235 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, | 215 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, |
| 236 &whitespace_buffer, | 216 &whitespace_buffer, |
| 237 &relative_length); | 217 &relative_length); |
| 238 | 218 |
| 239 // See if our base URL should be treated as "standard". | 219 // See if our base URL should be treated as "standard". |
| 240 bool standard_base_scheme = | 220 bool standard_base_scheme = |
| 241 base_parsed.scheme.is_nonempty() && | 221 base_parsed.scheme.is_nonempty() && |
| 242 IsStandard(base_spec, base_spec_len, base_parsed.scheme); | 222 DoIsStandard(base_spec, base_parsed.scheme); |
| 243 | 223 |
| 244 bool is_relative; | 224 bool is_relative; |
| 245 url_parse::Component relative_component; | 225 url_parse::Component relative_component; |
| 246 if (!url_canon::IsRelativeURL(base_spec, base_parsed, | 226 if (!url_canon::IsRelativeURL(base_spec, base_parsed, |
| 247 relative, relative_length, | 227 relative, relative_length, |
| 248 standard_base_scheme, | 228 standard_base_scheme, |
| 249 &is_relative, | 229 &is_relative, |
| 250 &relative_component)) { | 230 &relative_component)) { |
| 251 // Error resolving. | 231 // Error resolving. |
| 252 return false; | 232 return false; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 268 } | 248 } |
| 269 | 249 |
| 270 template<typename CHAR> | 250 template<typename CHAR> |
| 271 bool DoReplaceComponents(const char* spec, | 251 bool DoReplaceComponents(const char* spec, |
| 272 int spec_len, | 252 int spec_len, |
| 273 const url_parse::Parsed& parsed, | 253 const url_parse::Parsed& parsed, |
| 274 const url_canon::Replacements<CHAR>& replacements, | 254 const url_canon::Replacements<CHAR>& replacements, |
| 275 url_canon::CharsetConverter* charset_converter, | 255 url_canon::CharsetConverter* charset_converter, |
| 276 url_canon::CanonOutput* output, | 256 url_canon::CanonOutput* output, |
| 277 url_parse::Parsed* out_parsed) { | 257 url_parse::Parsed* out_parsed) { |
| 278 // Note that we dispatch to the parser according the the scheme type of | 258 // If the scheme is overridden, just do a simple string substitution and |
| 279 // the OUTPUT URL. Normally, this is the same as our scheme, but if the | 259 // reparse the whole thing. There are lots of edge cases that we really don't |
| 280 // scheme is being overridden, we need to test that. | 260 // want to deal with. Like what happens if I replace "http://e:8080/foo" |
| 261 // with a file. Does it become "file:///E:/8080/foo" where the port number |
| 262 // becomes part of the path? Parsing that string as a file URL says "yes" |
| 263 // but almost no sane rule for dealing with the components individually would |
| 264 // come up with that. |
| 265 // |
| 266 // Why allow these crazy cases at all? Programatically, there is almost no |
| 267 // case for replacing the scheme. The most common case for hitting this is |
| 268 // in JS when building up a URL using the location object. In this case, the |
| 269 // JS code expects the string substitution behavior: |
| 270 // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3 |
| 271 if (replacements.IsSchemeOverridden()) { |
| 272 // Canonicalize the new scheme so it is 8-bit and can be concatenated with |
| 273 // the existing spec. |
| 274 url_canon::RawCanonOutput<128> scheme_replaced; |
| 275 url_parse::Component scheme_replaced_parsed; |
| 276 url_canon::CanonicalizeScheme( |
| 277 replacements.sources().scheme, |
| 278 replacements.components().scheme, |
| 279 &scheme_replaced, &scheme_replaced_parsed); |
| 281 | 280 |
| 282 if (// Either the scheme is not replaced and the old one is a file, | 281 // We can assume that the input is canonicalized, which means it always has |
| 283 (!replacements.IsSchemeOverridden() && | 282 // a colon after the scheme (or where the scheme would be). |
| 284 CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) || | 283 int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1 |
| 285 // ...or it is being replaced and the new one is a file. | 284 : 1; |
| 286 (replacements.IsSchemeOverridden() && | 285 if (spec_len - spec_after_colon > 0) { |
| 287 CompareSchemeComponent(replacements.sources().scheme, | 286 scheme_replaced.Append(&spec[spec_after_colon], |
| 288 replacements.components().scheme, | 287 spec_len - spec_after_colon); |
| 289 kFileScheme))) { | 288 } |
| 289 |
| 290 // We now need to completely re-parse the resulting string since its meaning |
| 291 // may have changed with the different scheme. |
| 292 url_canon::RawCanonOutput<128> recanonicalized; |
| 293 url_parse::Parsed recanonicalized_parsed; |
| 294 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), |
| 295 charset_converter, |
| 296 &recanonicalized, &recanonicalized_parsed); |
| 297 |
| 298 // Recurse using the version with the scheme already replaced. This will now |
| 299 // use the replacement rules for the new scheme. |
| 300 // |
| 301 // Warning: this code assumes that ReplaceComponents will re-check all |
| 302 // components for validity. This is because we can't fail if DoCanonicalize |
| 303 // failed above since theoretically the thing making it fail could be |
| 304 // getting replaced here. If ReplaceComponents didn't re-check everything, |
| 305 // we wouldn't know if something *not* getting replaced is a problem. |
| 306 // If the scheme-specific replacers are made more intelligent so they don't |
| 307 // re-check everything, we should instead recanonicalize the whole thing |
| 308 // after this call to check validity (this assumes replacing the scheme is |
| 309 // much much less common than other types of replacements, like clearing the |
| 310 // ref). |
| 311 url_canon::Replacements<CHAR> replacements_no_scheme = replacements; |
| 312 replacements_no_scheme.SetScheme(NULL, url_parse::Component()); |
| 313 return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(), |
| 314 recanonicalized_parsed, replacements_no_scheme, |
| 315 charset_converter, output, out_parsed); |
| 316 } |
| 317 |
| 318 // If we get here, then we know the scheme doesn't need to be replaced, so can |
| 319 // just key off the scheme in the spec to know how to do the replacements. |
| 320 if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) { |
| 290 return url_canon::ReplaceFileURL(spec, parsed, replacements, | 321 return url_canon::ReplaceFileURL(spec, parsed, replacements, |
| 291 charset_converter, output, out_parsed); | 322 charset_converter, output, out_parsed); |
| 292 } | 323 } |
| 293 | 324 if (DoIsStandard(spec, parsed.scheme)) { |
| 294 if (// Either the scheme is not replaced and the old one is standard, | |
| 295 (!replacements.IsSchemeOverridden() && | |
| 296 IsStandard(spec, spec_len, parsed.scheme)) || | |
| 297 // ...or it is being replaced and the new one is standard. | |
| 298 (replacements.IsSchemeOverridden() && | |
| 299 IsStandardScheme(replacements.sources().scheme, | |
| 300 replacements.components().scheme))) { | |
| 301 // Standard URL with all parts. | |
| 302 return url_canon::ReplaceStandardURL(spec, parsed, replacements, | 325 return url_canon::ReplaceStandardURL(spec, parsed, replacements, |
| 303 charset_converter, output, out_parsed); | 326 charset_converter, output, out_parsed); |
| 304 } | 327 } |
| 305 | 328 if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) { |
| 306 if (// Either the scheme is not replaced and the old one is mailto, | |
| 307 (!replacements.IsSchemeOverridden() && | |
| 308 CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) || | |
| 309 // ...or it is being replaced and the new one is a mailto. | |
| 310 (replacements.IsSchemeOverridden() && | |
| 311 CompareSchemeComponent(replacements.sources().scheme, | |
| 312 replacements.components().scheme, | |
| 313 kMailtoScheme))) { | |
| 314 return url_canon::ReplaceMailtoURL(spec, parsed, replacements, | 329 return url_canon::ReplaceMailtoURL(spec, parsed, replacements, |
| 315 output, out_parsed); | 330 output, out_parsed); |
| 316 } | 331 } |
| 317 | 332 |
| 333 // Default is a path URL. |
| 318 return url_canon::ReplacePathURL(spec, parsed, replacements, | 334 return url_canon::ReplacePathURL(spec, parsed, replacements, |
| 319 output, out_parsed); | 335 output, out_parsed); |
| 320 } | 336 } |
| 321 | 337 |
| 322 } // namespace | 338 } // namespace |
| 323 | 339 |
| 324 void AddStandardScheme(const char* new_scheme) { | 340 void AddStandardScheme(const char* new_scheme) { |
| 325 size_t scheme_len = strlen(new_scheme); | 341 size_t scheme_len = strlen(new_scheme); |
| 326 if (scheme_len == 0) | 342 if (scheme_len == 0) |
| 327 return; | 343 return; |
| 328 | 344 |
| 329 // Dulicate the scheme into a new buffer and add it to the list of standard | 345 // Dulicate the scheme into a new buffer and add it to the list of standard |
| 330 // schemes. This pointer will be leaked on shutdown. | 346 // schemes. This pointer will be leaked on shutdown. |
| 331 char* dup_scheme = new char[scheme_len + 1]; | 347 char* dup_scheme = new char[scheme_len + 1]; |
| 332 memcpy(dup_scheme, new_scheme, scheme_len + 1); | 348 memcpy(dup_scheme, new_scheme, scheme_len + 1); |
| 333 | 349 |
| 334 InitStandardSchemes(); | 350 InitStandardSchemes(); |
| 335 standard_schemes->push_back(dup_scheme); | 351 standard_schemes->push_back(dup_scheme); |
| 336 } | 352 } |
| 337 | 353 |
| 338 bool IsStandard(const char* spec, int spec_len, | 354 bool IsStandard(const char* spec, const url_parse::Component& scheme) { |
| 339 const url_parse::Component& scheme) { | 355 return DoIsStandard(spec, scheme); |
| 340 return DoIsStandard(spec, spec_len, scheme); | |
| 341 } | 356 } |
| 342 | 357 |
| 343 bool IsStandard(const char16* spec, int spec_len, | 358 bool IsStandard(const char16* spec, const url_parse::Component& scheme) { |
| 344 const url_parse::Component& scheme) { | 359 return DoIsStandard(spec, scheme); |
| 345 return DoIsStandard(spec, spec_len, scheme); | |
| 346 } | 360 } |
| 347 | 361 |
| 348 bool FindAndCompareScheme(const char* str, | 362 bool FindAndCompareScheme(const char* str, |
| 349 int str_len, | 363 int str_len, |
| 350 const char* compare, | 364 const char* compare, |
| 351 url_parse::Component* found_scheme) { | 365 url_parse::Component* found_scheme) { |
| 352 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); | 366 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
| 353 } | 367 } |
| 354 | 368 |
| 355 bool FindAndCompareScheme(const char16* str, | 369 bool FindAndCompareScheme(const char16* str, |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 444 return a_begin == a_end && b_begin == b_end; | 458 return a_begin == a_end && b_begin == b_end; |
| 445 } | 459 } |
| 446 | 460 |
| 447 bool LowerCaseEqualsASCII(const char16* a_begin, | 461 bool LowerCaseEqualsASCII(const char16* a_begin, |
| 448 const char16* a_end, | 462 const char16* a_end, |
| 449 const char* b) { | 463 const char* b) { |
| 450 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | 464 return DoLowerCaseEqualsASCII(a_begin, a_end, b); |
| 451 } | 465 } |
| 452 | 466 |
| 453 } // namespace url_util | 467 } // namespace url_util |
| OLD | NEW |