| Index: src/url_util.cc
|
| ===================================================================
|
| --- src/url_util.cc (revision 122)
|
| +++ src/url_util.cc (working copy)
|
| @@ -58,13 +58,15 @@
|
| const char kFileScheme[] = "file"; // Used in a number of places.
|
| const char kMailtoScheme[] = "mailto";
|
|
|
| -const int kNumStandardURLSchemes = 5;
|
| +const int kNumStandardURLSchemes = 7;
|
| const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
|
| "http",
|
| "https",
|
| kFileScheme, // Yes, file urls can have a hostname!
|
| "ftp",
|
| "gopher",
|
| + "ws", // WebSocket.
|
| + "wss", // WebSocket secure.
|
| };
|
|
|
| // List of the currently installed standard schemes. This list is lazily
|
| @@ -96,10 +98,9 @@
|
| }
|
|
|
| // Returns true if the given scheme identified by |scheme| within |spec| is one
|
| -// of the registered "standard" schemes. Note that this does not check for
|
| -// "://", use IsStandard for that.
|
| +// of the registered "standard" schemes.
|
| template<typename CHAR>
|
| -bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) {
|
| +bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {
|
| if (!scheme.is_nonempty())
|
| return false; // Empty or invalid schemes are non-standard.
|
|
|
| @@ -112,28 +113,7 @@
|
| return false;
|
| }
|
|
|
| -// Returns true if the stuff following the scheme in the given spec indicates
|
| -// a "standard" URL. The presence of "://" after the scheme indicates that
|
| -// there is a hostname, etc. which we call a standard URL.
|
| template<typename CHAR>
|
| -bool HasStandardSchemeSeparator(const CHAR* spec, int spec_len,
|
| - const url_parse::Component& scheme) {
|
| - int after_scheme = scheme.end();
|
| - if (spec_len < after_scheme + 3)
|
| - return false;
|
| - return spec[after_scheme] == ':' &&
|
| - spec[after_scheme + 1] == '/' &&
|
| - spec[after_scheme + 2] == '/';
|
| -}
|
| -
|
| -template<typename CHAR>
|
| -bool DoIsStandard(const CHAR* spec, int spec_len,
|
| - const url_parse::Component& scheme) {
|
| - return HasStandardSchemeSeparator(spec, spec_len, scheme) ||
|
| - IsStandardScheme(spec, scheme);
|
| -}
|
| -
|
| -template<typename CHAR>
|
| bool DoFindAndCompareScheme(const CHAR* str,
|
| int str_len,
|
| const char* compare,
|
| @@ -184,7 +164,7 @@
|
| #endif
|
|
|
| url_parse::Component scheme;
|
| - if(!url_parse::ExtractScheme(spec, spec_len, &scheme))
|
| + if (!url_parse::ExtractScheme(spec, spec_len, &scheme))
|
| return false;
|
|
|
| // This is the parsed version of the input URL, we have to canonicalize it
|
| @@ -197,7 +177,7 @@
|
| charset_converter,
|
| output, output_parsed);
|
|
|
| - } else if (IsStandard(spec, spec_len, scheme)) {
|
| + } else if (DoIsStandard(spec, scheme)) {
|
| // All "normal" URLs.
|
| url_parse::ParseStandardURL(spec, spec_len, &parsed_input);
|
| success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,
|
| @@ -239,7 +219,7 @@
|
| // See if our base URL should be treated as "standard".
|
| bool standard_base_scheme =
|
| base_parsed.scheme.is_nonempty() &&
|
| - IsStandard(base_spec, base_spec_len, base_parsed.scheme);
|
| + DoIsStandard(base_spec, base_parsed.scheme);
|
|
|
| bool is_relative;
|
| url_parse::Component relative_component;
|
| @@ -275,46 +255,82 @@
|
| url_canon::CharsetConverter* charset_converter,
|
| url_canon::CanonOutput* output,
|
| url_parse::Parsed* out_parsed) {
|
| - // Note that we dispatch to the parser according the the scheme type of
|
| - // the OUTPUT URL. Normally, this is the same as our scheme, but if the
|
| - // scheme is being overridden, we need to test that.
|
| + // If the scheme is overridden, just do a simple string substitution and
|
| + // reparse the whole thing. There are lots of edge cases that we really don't
|
| + // want to deal with. Like what happens if I replace "http://e:8080/foo"
|
| + // with a file. Does it become "file:///E:/8080/foo" where the port number
|
| + // becomes part of the path? Parsing that string as a file URL says "yes"
|
| + // but almost no sane rule for dealing with the components individually would
|
| + // come up with that.
|
| + //
|
| + // Why allow these crazy cases at all? Programatically, there is almost no
|
| + // case for replacing the scheme. The most common case for hitting this is
|
| + // in JS when building up a URL using the location object. In this case, the
|
| + // JS code expects the string substitution behavior:
|
| + // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
|
| + if (replacements.IsSchemeOverridden()) {
|
| + // Canonicalize the new scheme so it is 8-bit and can be concatenated with
|
| + // the existing spec.
|
| + url_canon::RawCanonOutput<128> scheme_replaced;
|
| + url_parse::Component scheme_replaced_parsed;
|
| + url_canon::CanonicalizeScheme(
|
| + replacements.sources().scheme,
|
| + replacements.components().scheme,
|
| + &scheme_replaced, &scheme_replaced_parsed);
|
|
|
| - if (// Either the scheme is not replaced and the old one is a file,
|
| - (!replacements.IsSchemeOverridden() &&
|
| - CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) ||
|
| - // ...or it is being replaced and the new one is a file.
|
| - (replacements.IsSchemeOverridden() &&
|
| - CompareSchemeComponent(replacements.sources().scheme,
|
| - replacements.components().scheme,
|
| - kFileScheme))) {
|
| + // We can assume that the input is canonicalized, which means it always has
|
| + // a colon after the scheme (or where the scheme would be).
|
| + int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
|
| + : 1;
|
| + if (spec_len - spec_after_colon > 0) {
|
| + scheme_replaced.Append(&spec[spec_after_colon],
|
| + spec_len - spec_after_colon);
|
| + }
|
| +
|
| + // We now need to completely re-parse the resulting string since its meaning
|
| + // may have changed with the different scheme.
|
| + url_canon::RawCanonOutput<128> recanonicalized;
|
| + url_parse::Parsed recanonicalized_parsed;
|
| + DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),
|
| + charset_converter,
|
| + &recanonicalized, &recanonicalized_parsed);
|
| +
|
| + // Recurse using the version with the scheme already replaced. This will now
|
| + // use the replacement rules for the new scheme.
|
| + //
|
| + // Warning: this code assumes that ReplaceComponents will re-check all
|
| + // components for validity. This is because we can't fail if DoCanonicalize
|
| + // failed above since theoretically the thing making it fail could be
|
| + // getting replaced here. If ReplaceComponents didn't re-check everything,
|
| + // we wouldn't know if something *not* getting replaced is a problem.
|
| + // If the scheme-specific replacers are made more intelligent so they don't
|
| + // re-check everything, we should instead recanonicalize the whole thing
|
| + // after this call to check validity (this assumes replacing the scheme is
|
| + // much much less common than other types of replacements, like clearing the
|
| + // ref).
|
| + url_canon::Replacements<CHAR> replacements_no_scheme = replacements;
|
| + replacements_no_scheme.SetScheme(NULL, url_parse::Component());
|
| + return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
|
| + recanonicalized_parsed, replacements_no_scheme,
|
| + charset_converter, output, out_parsed);
|
| + }
|
| +
|
| + // If we get here, then we know the scheme doesn't need to be replaced, so can
|
| + // just key off the scheme in the spec to know how to do the replacements.
|
| + if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {
|
| return url_canon::ReplaceFileURL(spec, parsed, replacements,
|
| charset_converter, output, out_parsed);
|
| }
|
| -
|
| - if (// Either the scheme is not replaced and the old one is standard,
|
| - (!replacements.IsSchemeOverridden() &&
|
| - IsStandard(spec, spec_len, parsed.scheme)) ||
|
| - // ...or it is being replaced and the new one is standard.
|
| - (replacements.IsSchemeOverridden() &&
|
| - IsStandardScheme(replacements.sources().scheme,
|
| - replacements.components().scheme))) {
|
| - // Standard URL with all parts.
|
| + if (DoIsStandard(spec, parsed.scheme)) {
|
| return url_canon::ReplaceStandardURL(spec, parsed, replacements,
|
| charset_converter, output, out_parsed);
|
| }
|
| -
|
| - if (// Either the scheme is not replaced and the old one is mailto,
|
| - (!replacements.IsSchemeOverridden() &&
|
| - CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) ||
|
| - // ...or it is being replaced and the new one is a mailto.
|
| - (replacements.IsSchemeOverridden() &&
|
| - CompareSchemeComponent(replacements.sources().scheme,
|
| - replacements.components().scheme,
|
| - kMailtoScheme))) {
|
| + if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {
|
| return url_canon::ReplaceMailtoURL(spec, parsed, replacements,
|
| output, out_parsed);
|
| }
|
|
|
| + // Default is a path URL.
|
| return url_canon::ReplacePathURL(spec, parsed, replacements,
|
| output, out_parsed);
|
| }
|
| @@ -335,14 +351,12 @@
|
| standard_schemes->push_back(dup_scheme);
|
| }
|
|
|
| -bool IsStandard(const char* spec, int spec_len,
|
| - const url_parse::Component& scheme) {
|
| - return DoIsStandard(spec, spec_len, scheme);
|
| +bool IsStandard(const char* spec, const url_parse::Component& scheme) {
|
| + return DoIsStandard(spec, scheme);
|
| }
|
|
|
| -bool IsStandard(const char16* spec, int spec_len,
|
| - const url_parse::Component& scheme) {
|
| - return DoIsStandard(spec, spec_len, scheme);
|
| +bool IsStandard(const char16* spec, const url_parse::Component& scheme) {
|
| + return DoIsStandard(spec, scheme);
|
| }
|
|
|
| bool FindAndCompareScheme(const char* str,
|
|
|