Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(253)

Side by Side Diff: src/url_util.cc

Issue 564011: Remove the rule that "://" means a standard URL. This fixes a number of bugs... (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/
Patch Set: '' Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/url_util.h ('k') | src/url_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2007, Google Inc. 1 // Copyright 2007, Google Inc.
2 // All rights reserved. 2 // All rights reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are 5 // modification, are permitted provided that the following conditions are
6 // met: 6 // met:
7 // 7 //
8 // * Redistributions of source code must retain the above copyright 8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer. 9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above 10 // * Redistributions in binary form must reproduce the above
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
51 for (Iter it = a_begin; it != a_end; ++it, ++b) { 51 for (Iter it = a_begin; it != a_end; ++it, ++b) {
52 if (!*b || ToLowerASCII(*it) != *b) 52 if (!*b || ToLowerASCII(*it) != *b)
53 return false; 53 return false;
54 } 54 }
55 return *b == 0; 55 return *b == 0;
56 } 56 }
57 57
58 const char kFileScheme[] = "file"; // Used in a number of places. 58 const char kFileScheme[] = "file"; // Used in a number of places.
59 const char kMailtoScheme[] = "mailto"; 59 const char kMailtoScheme[] = "mailto";
60 60
61 const int kNumStandardURLSchemes = 5; 61 const int kNumStandardURLSchemes = 7;
62 const char* kStandardURLSchemes[kNumStandardURLSchemes] = { 62 const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
63 "http", 63 "http",
64 "https", 64 "https",
65 kFileScheme, // Yes, file urls can have a hostname! 65 kFileScheme, // Yes, file urls can have a hostname!
66 "ftp", 66 "ftp",
67 "gopher", 67 "gopher",
68 "ws", // WebSocket.
69 "wss", // WebSocket secure.
68 }; 70 };
69 71
70 // List of the currently installed standard schemes. This list is lazily 72 // List of the currently installed standard schemes. This list is lazily
71 // initialized by InitStandardSchemes and is leaked on shutdown to prevent 73 // initialized by InitStandardSchemes and is leaked on shutdown to prevent
72 // any destructors from being called that will slow us down or cause problems. 74 // any destructors from being called that will slow us down or cause problems.
73 std::vector<const char*>* standard_schemes = NULL; 75 std::vector<const char*>* standard_schemes = NULL;
74 76
75 // Ensures that the standard_schemes list is initialized, does nothing if it 77 // Ensures that the standard_schemes list is initialized, does nothing if it
76 // already has values. 78 // already has values.
77 void InitStandardSchemes() { 79 void InitStandardSchemes() {
(...skipping 11 matching lines...) Expand all
89 const url_parse::Component& component, 91 const url_parse::Component& component,
90 const char* compare_to) { 92 const char* compare_to) {
91 if (!component.is_nonempty()) 93 if (!component.is_nonempty())
92 return compare_to[0] == 0; // When component is empty, match empty scheme. 94 return compare_to[0] == 0; // When component is empty, match empty scheme.
93 return LowerCaseEqualsASCII(&spec[component.begin], 95 return LowerCaseEqualsASCII(&spec[component.begin],
94 &spec[component.end()], 96 &spec[component.end()],
95 compare_to); 97 compare_to);
96 } 98 }
97 99
98 // Returns true if the given scheme identified by |scheme| within |spec| is one 100 // Returns true if the given scheme identified by |scheme| within |spec| is one
99 // of the registered "standard" schemes. Note that this does not check for 101 // of the registered "standard" schemes.
100 // "://", use IsStandard for that.
101 template<typename CHAR> 102 template<typename CHAR>
102 bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) { 103 bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {
103 if (!scheme.is_nonempty()) 104 if (!scheme.is_nonempty())
104 return false; // Empty or invalid schemes are non-standard. 105 return false; // Empty or invalid schemes are non-standard.
105 106
106 InitStandardSchemes(); 107 InitStandardSchemes();
107 for (size_t i = 0; i < standard_schemes->size(); i++) { 108 for (size_t i = 0; i < standard_schemes->size(); i++) {
108 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], 109 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],
109 standard_schemes->at(i))) 110 standard_schemes->at(i)))
110 return true; 111 return true;
111 } 112 }
112 return false; 113 return false;
113 } 114 }
114 115
115 // Returns true if the stuff following the scheme in the given spec indicates
116 // a "standard" URL. The presence of "://" after the scheme indicates that
117 // there is a hostname, etc. which we call a standard URL.
118 template<typename CHAR>
119 bool HasStandardSchemeSeparator(const CHAR* spec, int spec_len,
120 const url_parse::Component& scheme) {
121 int after_scheme = scheme.end();
122 if (spec_len < after_scheme + 3)
123 return false;
124 return spec[after_scheme] == ':' &&
125 spec[after_scheme + 1] == '/' &&
126 spec[after_scheme + 2] == '/';
127 }
128
129 template<typename CHAR>
130 bool DoIsStandard(const CHAR* spec, int spec_len,
131 const url_parse::Component& scheme) {
132 return HasStandardSchemeSeparator(spec, spec_len, scheme) ||
133 IsStandardScheme(spec, scheme);
134 }
135
136 template<typename CHAR> 116 template<typename CHAR>
137 bool DoFindAndCompareScheme(const CHAR* str, 117 bool DoFindAndCompareScheme(const CHAR* str,
138 int str_len, 118 int str_len,
139 const char* compare, 119 const char* compare,
140 url_parse::Component* found_scheme) { 120 url_parse::Component* found_scheme) {
141 url_parse::Component our_scheme; 121 url_parse::Component our_scheme;
142 if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) { 122 if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) {
143 // No scheme. 123 // No scheme.
144 if (found_scheme) 124 if (found_scheme)
145 *found_scheme = url_parse::Component(); 125 *found_scheme = url_parse::Component();
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
177 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || 157 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) ||
178 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { 158 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {
179 url_parse::ParseFileURL(spec, spec_len, &parsed_input); 159 url_parse::ParseFileURL(spec, spec_len, &parsed_input);
180 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, 160 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,
181 charset_converter, 161 charset_converter,
182 output, output_parsed); 162 output, output_parsed);
183 } 163 }
184 #endif 164 #endif
185 165
186 url_parse::Component scheme; 166 url_parse::Component scheme;
187 if(!url_parse::ExtractScheme(spec, spec_len, &scheme)) 167 if (!url_parse::ExtractScheme(spec, spec_len, &scheme))
188 return false; 168 return false;
189 169
190 // This is the parsed version of the input URL, we have to canonicalize it 170 // This is the parsed version of the input URL, we have to canonicalize it
191 // before storing it in our object. 171 // before storing it in our object.
192 bool success; 172 bool success;
193 if (CompareSchemeComponent(spec, scheme, kFileScheme)) { 173 if (CompareSchemeComponent(spec, scheme, kFileScheme)) {
194 // File URLs are special. 174 // File URLs are special.
195 url_parse::ParseFileURL(spec, spec_len, &parsed_input); 175 url_parse::ParseFileURL(spec, spec_len, &parsed_input);
196 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, 176 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,
197 charset_converter, 177 charset_converter,
198 output, output_parsed); 178 output, output_parsed);
199 179
200 } else if (IsStandard(spec, spec_len, scheme)) { 180 } else if (DoIsStandard(spec, scheme)) {
201 // All "normal" URLs. 181 // All "normal" URLs.
202 url_parse::ParseStandardURL(spec, spec_len, &parsed_input); 182 url_parse::ParseStandardURL(spec, spec_len, &parsed_input);
203 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, 183 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,
204 charset_converter, 184 charset_converter,
205 output, output_parsed); 185 output, output_parsed);
206 186
207 } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) { 187 } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) {
208 // Mailto are treated like a standard url with only a scheme, path, query 188 // Mailto are treated like a standard url with only a scheme, path, query
209 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); 189 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input);
210 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, 190 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input,
(...skipping 21 matching lines...) Expand all
232 // copying to the new buffer. 212 // copying to the new buffer.
233 url_canon::RawCanonOutputT<CHAR> whitespace_buffer; 213 url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
234 int relative_length; 214 int relative_length;
235 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, 215 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
236 &whitespace_buffer, 216 &whitespace_buffer,
237 &relative_length); 217 &relative_length);
238 218
239 // See if our base URL should be treated as "standard". 219 // See if our base URL should be treated as "standard".
240 bool standard_base_scheme = 220 bool standard_base_scheme =
241 base_parsed.scheme.is_nonempty() && 221 base_parsed.scheme.is_nonempty() &&
242 IsStandard(base_spec, base_spec_len, base_parsed.scheme); 222 DoIsStandard(base_spec, base_parsed.scheme);
243 223
244 bool is_relative; 224 bool is_relative;
245 url_parse::Component relative_component; 225 url_parse::Component relative_component;
246 if (!url_canon::IsRelativeURL(base_spec, base_parsed, 226 if (!url_canon::IsRelativeURL(base_spec, base_parsed,
247 relative, relative_length, 227 relative, relative_length,
248 standard_base_scheme, 228 standard_base_scheme,
249 &is_relative, 229 &is_relative,
250 &relative_component)) { 230 &relative_component)) {
251 // Error resolving. 231 // Error resolving.
252 return false; 232 return false;
(...skipping 15 matching lines...) Expand all
268 } 248 }
269 249
270 template<typename CHAR> 250 template<typename CHAR>
271 bool DoReplaceComponents(const char* spec, 251 bool DoReplaceComponents(const char* spec,
272 int spec_len, 252 int spec_len,
273 const url_parse::Parsed& parsed, 253 const url_parse::Parsed& parsed,
274 const url_canon::Replacements<CHAR>& replacements, 254 const url_canon::Replacements<CHAR>& replacements,
275 url_canon::CharsetConverter* charset_converter, 255 url_canon::CharsetConverter* charset_converter,
276 url_canon::CanonOutput* output, 256 url_canon::CanonOutput* output,
277 url_parse::Parsed* out_parsed) { 257 url_parse::Parsed* out_parsed) {
278 // Note that we dispatch to the parser according the the scheme type of 258 // If the scheme is overridden, just do a simple string substitution and
279 // the OUTPUT URL. Normally, this is the same as our scheme, but if the 259 // reparse the whole thing. There are lots of edge cases that we really don't
280 // scheme is being overridden, we need to test that. 260 // want to deal with. Like what happens if I replace "http://e:8080/foo"
261 // with a file. Does it become "file:///E:/8080/foo" where the port number
262 // becomes part of the path? Parsing that string as a file URL says "yes"
263 // but almost no sane rule for dealing with the components individually would
264 // come up with that.
265 //
266 // Why allow these crazy cases at all? Programatically, there is almost no
267 // case for replacing the scheme. The most common case for hitting this is
268 // in JS when building up a URL using the location object. In this case, the
269 // JS code expects the string substitution behavior:
270 // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
271 if (replacements.IsSchemeOverridden()) {
272 // Canonicalize the new scheme so it is 8-bit and can be concatenated with
273 // the existing spec.
274 url_canon::RawCanonOutput<128> scheme_replaced;
275 url_parse::Component scheme_replaced_parsed;
276 url_canon::CanonicalizeScheme(
277 replacements.sources().scheme,
278 replacements.components().scheme,
279 &scheme_replaced, &scheme_replaced_parsed);
281 280
282 if (// Either the scheme is not replaced and the old one is a file, 281 // We can assume that the input is canonicalized, which means it always has
283 (!replacements.IsSchemeOverridden() && 282 // a colon after the scheme (or where the scheme would be).
284 CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) || 283 int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
285 // ...or it is being replaced and the new one is a file. 284 : 1;
286 (replacements.IsSchemeOverridden() && 285 if (spec_len - spec_after_colon > 0) {
287 CompareSchemeComponent(replacements.sources().scheme, 286 scheme_replaced.Append(&spec[spec_after_colon],
288 replacements.components().scheme, 287 spec_len - spec_after_colon);
289 kFileScheme))) { 288 }
289
290 // We now need to completely re-parse the resulting string since its meaning
291 // may have changed with the different scheme.
292 url_canon::RawCanonOutput<128> recanonicalized;
293 url_parse::Parsed recanonicalized_parsed;
294 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),
295 charset_converter,
296 &recanonicalized, &recanonicalized_parsed);
297
298 // Recurse using the version with the scheme already replaced. This will now
299 // use the replacement rules for the new scheme.
300 //
301 // Warning: this code assumes that ReplaceComponents will re-check all
302 // components for validity. This is because we can't fail if DoCanonicalize
303 // failed above since theoretically the thing making it fail could be
304 // getting replaced here. If ReplaceComponents didn't re-check everything,
305 // we wouldn't know if something *not* getting replaced is a problem.
306 // If the scheme-specific replacers are made more intelligent so they don't
307 // re-check everything, we should instead recanonicalize the whole thing
308 // after this call to check validity (this assumes replacing the scheme is
309 // much much less common than other types of replacements, like clearing the
310 // ref).
311 url_canon::Replacements<CHAR> replacements_no_scheme = replacements;
312 replacements_no_scheme.SetScheme(NULL, url_parse::Component());
313 return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
314 recanonicalized_parsed, replacements_no_scheme,
315 charset_converter, output, out_parsed);
316 }
317
318 // If we get here, then we know the scheme doesn't need to be replaced, so can
319 // just key off the scheme in the spec to know how to do the replacements.
320 if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {
290 return url_canon::ReplaceFileURL(spec, parsed, replacements, 321 return url_canon::ReplaceFileURL(spec, parsed, replacements,
291 charset_converter, output, out_parsed); 322 charset_converter, output, out_parsed);
292 } 323 }
293 324 if (DoIsStandard(spec, parsed.scheme)) {
294 if (// Either the scheme is not replaced and the old one is standard,
295 (!replacements.IsSchemeOverridden() &&
296 IsStandard(spec, spec_len, parsed.scheme)) ||
297 // ...or it is being replaced and the new one is standard.
298 (replacements.IsSchemeOverridden() &&
299 IsStandardScheme(replacements.sources().scheme,
300 replacements.components().scheme))) {
301 // Standard URL with all parts.
302 return url_canon::ReplaceStandardURL(spec, parsed, replacements, 325 return url_canon::ReplaceStandardURL(spec, parsed, replacements,
303 charset_converter, output, out_parsed); 326 charset_converter, output, out_parsed);
304 } 327 }
305 328 if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {
306 if (// Either the scheme is not replaced and the old one is mailto,
307 (!replacements.IsSchemeOverridden() &&
308 CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) ||
309 // ...or it is being replaced and the new one is a mailto.
310 (replacements.IsSchemeOverridden() &&
311 CompareSchemeComponent(replacements.sources().scheme,
312 replacements.components().scheme,
313 kMailtoScheme))) {
314 return url_canon::ReplaceMailtoURL(spec, parsed, replacements, 329 return url_canon::ReplaceMailtoURL(spec, parsed, replacements,
315 output, out_parsed); 330 output, out_parsed);
316 } 331 }
317 332
333 // Default is a path URL.
318 return url_canon::ReplacePathURL(spec, parsed, replacements, 334 return url_canon::ReplacePathURL(spec, parsed, replacements,
319 output, out_parsed); 335 output, out_parsed);
320 } 336 }
321 337
322 } // namespace 338 } // namespace
323 339
324 void AddStandardScheme(const char* new_scheme) { 340 void AddStandardScheme(const char* new_scheme) {
325 size_t scheme_len = strlen(new_scheme); 341 size_t scheme_len = strlen(new_scheme);
326 if (scheme_len == 0) 342 if (scheme_len == 0)
327 return; 343 return;
328 344
329 // Dulicate the scheme into a new buffer and add it to the list of standard 345 // Dulicate the scheme into a new buffer and add it to the list of standard
330 // schemes. This pointer will be leaked on shutdown. 346 // schemes. This pointer will be leaked on shutdown.
331 char* dup_scheme = new char[scheme_len + 1]; 347 char* dup_scheme = new char[scheme_len + 1];
332 memcpy(dup_scheme, new_scheme, scheme_len + 1); 348 memcpy(dup_scheme, new_scheme, scheme_len + 1);
333 349
334 InitStandardSchemes(); 350 InitStandardSchemes();
335 standard_schemes->push_back(dup_scheme); 351 standard_schemes->push_back(dup_scheme);
336 } 352 }
337 353
338 bool IsStandard(const char* spec, int spec_len, 354 bool IsStandard(const char* spec, const url_parse::Component& scheme) {
339 const url_parse::Component& scheme) { 355 return DoIsStandard(spec, scheme);
340 return DoIsStandard(spec, spec_len, scheme);
341 } 356 }
342 357
343 bool IsStandard(const char16* spec, int spec_len, 358 bool IsStandard(const char16* spec, const url_parse::Component& scheme) {
344 const url_parse::Component& scheme) { 359 return DoIsStandard(spec, scheme);
345 return DoIsStandard(spec, spec_len, scheme);
346 } 360 }
347 361
348 bool FindAndCompareScheme(const char* str, 362 bool FindAndCompareScheme(const char* str,
349 int str_len, 363 int str_len,
350 const char* compare, 364 const char* compare,
351 url_parse::Component* found_scheme) { 365 url_parse::Component* found_scheme) {
352 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); 366 return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
353 } 367 }
354 368
355 bool FindAndCompareScheme(const char16* str, 369 bool FindAndCompareScheme(const char16* str,
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
444 return a_begin == a_end && b_begin == b_end; 458 return a_begin == a_end && b_begin == b_end;
445 } 459 }
446 460
447 bool LowerCaseEqualsASCII(const char16* a_begin, 461 bool LowerCaseEqualsASCII(const char16* a_begin,
448 const char16* a_end, 462 const char16* a_end,
449 const char* b) { 463 const char* b) {
450 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 464 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
451 } 465 }
452 466
453 } // namespace url_util 467 } // namespace url_util
OLDNEW
« no previous file with comments | « src/url_util.h ('k') | src/url_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698