Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(810)

Unified Diff: src/url_util.cc

Issue 564011: Remove the rule that "://" means a standard URL. This fixes a number of bugs... (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/
Patch Set: '' Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/url_util.h ('k') | src/url_util_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/url_util.cc
===================================================================
--- src/url_util.cc (revision 122)
+++ src/url_util.cc (working copy)
@@ -58,13 +58,15 @@
const char kFileScheme[] = "file"; // Used in a number of places.
const char kMailtoScheme[] = "mailto";
-const int kNumStandardURLSchemes = 5;
+const int kNumStandardURLSchemes = 7;
const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
"http",
"https",
kFileScheme, // Yes, file urls can have a hostname!
"ftp",
"gopher",
+ "ws", // WebSocket.
+ "wss", // WebSocket secure.
};
// List of the currently installed standard schemes. This list is lazily
@@ -96,10 +98,9 @@
}
// Returns true if the given scheme identified by |scheme| within |spec| is one
-// of the registered "standard" schemes. Note that this does not check for
-// "://", use IsStandard for that.
+// of the registered "standard" schemes.
template<typename CHAR>
-bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) {
+bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {
if (!scheme.is_nonempty())
return false; // Empty or invalid schemes are non-standard.
@@ -112,28 +113,7 @@
return false;
}
-// Returns true if the stuff following the scheme in the given spec indicates
-// a "standard" URL. The presence of "://" after the scheme indicates that
-// there is a hostname, etc. which we call a standard URL.
template<typename CHAR>
-bool HasStandardSchemeSeparator(const CHAR* spec, int spec_len,
- const url_parse::Component& scheme) {
- int after_scheme = scheme.end();
- if (spec_len < after_scheme + 3)
- return false;
- return spec[after_scheme] == ':' &&
- spec[after_scheme + 1] == '/' &&
- spec[after_scheme + 2] == '/';
-}
-
-template<typename CHAR>
-bool DoIsStandard(const CHAR* spec, int spec_len,
- const url_parse::Component& scheme) {
- return HasStandardSchemeSeparator(spec, spec_len, scheme) ||
- IsStandardScheme(spec, scheme);
-}
-
-template<typename CHAR>
bool DoFindAndCompareScheme(const CHAR* str,
int str_len,
const char* compare,
@@ -184,7 +164,7 @@
#endif
url_parse::Component scheme;
- if(!url_parse::ExtractScheme(spec, spec_len, &scheme))
+ if (!url_parse::ExtractScheme(spec, spec_len, &scheme))
return false;
// This is the parsed version of the input URL, we have to canonicalize it
@@ -197,7 +177,7 @@
charset_converter,
output, output_parsed);
- } else if (IsStandard(spec, spec_len, scheme)) {
+ } else if (DoIsStandard(spec, scheme)) {
// All "normal" URLs.
url_parse::ParseStandardURL(spec, spec_len, &parsed_input);
success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,
@@ -239,7 +219,7 @@
// See if our base URL should be treated as "standard".
bool standard_base_scheme =
base_parsed.scheme.is_nonempty() &&
- IsStandard(base_spec, base_spec_len, base_parsed.scheme);
+ DoIsStandard(base_spec, base_parsed.scheme);
bool is_relative;
url_parse::Component relative_component;
@@ -275,46 +255,82 @@
url_canon::CharsetConverter* charset_converter,
url_canon::CanonOutput* output,
url_parse::Parsed* out_parsed) {
- // Note that we dispatch to the parser according the the scheme type of
- // the OUTPUT URL. Normally, this is the same as our scheme, but if the
- // scheme is being overridden, we need to test that.
+ // If the scheme is overridden, just do a simple string substitution and
+ // reparse the whole thing. There are lots of edge cases that we really don't
+ // want to deal with. Like what happens if I replace "http://e:8080/foo"
+ // with a file. Does it become "file:///E:/8080/foo" where the port number
+ // becomes part of the path? Parsing that string as a file URL says "yes"
+ // but almost no sane rule for dealing with the components individually would
+ // come up with that.
+ //
+ // Why allow these crazy cases at all? Programatically, there is almost no
+ // case for replacing the scheme. The most common case for hitting this is
+ // in JS when building up a URL using the location object. In this case, the
+ // JS code expects the string substitution behavior:
+ // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
+ if (replacements.IsSchemeOverridden()) {
+ // Canonicalize the new scheme so it is 8-bit and can be concatenated with
+ // the existing spec.
+ url_canon::RawCanonOutput<128> scheme_replaced;
+ url_parse::Component scheme_replaced_parsed;
+ url_canon::CanonicalizeScheme(
+ replacements.sources().scheme,
+ replacements.components().scheme,
+ &scheme_replaced, &scheme_replaced_parsed);
- if (// Either the scheme is not replaced and the old one is a file,
- (!replacements.IsSchemeOverridden() &&
- CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) ||
- // ...or it is being replaced and the new one is a file.
- (replacements.IsSchemeOverridden() &&
- CompareSchemeComponent(replacements.sources().scheme,
- replacements.components().scheme,
- kFileScheme))) {
+ // We can assume that the input is canonicalized, which means it always has
+ // a colon after the scheme (or where the scheme would be).
+ int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
+ : 1;
+ if (spec_len - spec_after_colon > 0) {
+ scheme_replaced.Append(&spec[spec_after_colon],
+ spec_len - spec_after_colon);
+ }
+
+ // We now need to completely re-parse the resulting string since its meaning
+ // may have changed with the different scheme.
+ url_canon::RawCanonOutput<128> recanonicalized;
+ url_parse::Parsed recanonicalized_parsed;
+ DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),
+ charset_converter,
+ &recanonicalized, &recanonicalized_parsed);
+
+ // Recurse using the version with the scheme already replaced. This will now
+ // use the replacement rules for the new scheme.
+ //
+ // Warning: this code assumes that ReplaceComponents will re-check all
+ // components for validity. This is because we can't fail if DoCanonicalize
+ // failed above since theoretically the thing making it fail could be
+ // getting replaced here. If ReplaceComponents didn't re-check everything,
+ // we wouldn't know if something *not* getting replaced is a problem.
+ // If the scheme-specific replacers are made more intelligent so they don't
+ // re-check everything, we should instead recanonicalize the whole thing
+ // after this call to check validity (this assumes replacing the scheme is
+ // much much less common than other types of replacements, like clearing the
+ // ref).
+ url_canon::Replacements<CHAR> replacements_no_scheme = replacements;
+ replacements_no_scheme.SetScheme(NULL, url_parse::Component());
+ return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
+ recanonicalized_parsed, replacements_no_scheme,
+ charset_converter, output, out_parsed);
+ }
+
+ // If we get here, then we know the scheme doesn't need to be replaced, so can
+ // just key off the scheme in the spec to know how to do the replacements.
+ if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {
return url_canon::ReplaceFileURL(spec, parsed, replacements,
charset_converter, output, out_parsed);
}
-
- if (// Either the scheme is not replaced and the old one is standard,
- (!replacements.IsSchemeOverridden() &&
- IsStandard(spec, spec_len, parsed.scheme)) ||
- // ...or it is being replaced and the new one is standard.
- (replacements.IsSchemeOverridden() &&
- IsStandardScheme(replacements.sources().scheme,
- replacements.components().scheme))) {
- // Standard URL with all parts.
+ if (DoIsStandard(spec, parsed.scheme)) {
return url_canon::ReplaceStandardURL(spec, parsed, replacements,
charset_converter, output, out_parsed);
}
-
- if (// Either the scheme is not replaced and the old one is mailto,
- (!replacements.IsSchemeOverridden() &&
- CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) ||
- // ...or it is being replaced and the new one is a mailto.
- (replacements.IsSchemeOverridden() &&
- CompareSchemeComponent(replacements.sources().scheme,
- replacements.components().scheme,
- kMailtoScheme))) {
+ if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {
return url_canon::ReplaceMailtoURL(spec, parsed, replacements,
output, out_parsed);
}
+ // Default is a path URL.
return url_canon::ReplacePathURL(spec, parsed, replacements,
output, out_parsed);
}
@@ -335,14 +351,12 @@
standard_schemes->push_back(dup_scheme);
}
-bool IsStandard(const char* spec, int spec_len,
- const url_parse::Component& scheme) {
- return DoIsStandard(spec, spec_len, scheme);
+bool IsStandard(const char* spec, const url_parse::Component& scheme) {
+ return DoIsStandard(spec, scheme);
}
-bool IsStandard(const char16* spec, int spec_len,
- const url_parse::Component& scheme) {
- return DoIsStandard(spec, spec_len, scheme);
+bool IsStandard(const char16* spec, const url_parse::Component& scheme) {
+ return DoIsStandard(spec, scheme);
}
bool FindAndCompareScheme(const char* str,
« no previous file with comments | « src/url_util.h ('k') | src/url_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698