Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(511)

Side by Side Diff: components/url_fixer/url_fixer.cc

Issue 320253004: Componentize URLFixerUpper. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/net/url_fixer_upper.h" 5 #include "components/url_fixer/url_fixer.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #if defined(OS_POSIX) 9 #if defined(OS_POSIX)
10 #include "base/environment.h" 10 #include "base/environment.h"
11 #endif 11 #endif
12 #include "base/file_util.h" 12 #include "base/file_util.h"
13 #include "base/logging.h" 13 #include "base/logging.h"
14 #include "base/strings/string_util.h" 14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
16 #include "chrome/common/url_constants.h"
17 #include "net/base/escape.h" 16 #include "net/base/escape.h"
18 #include "net/base/filename_util.h" 17 #include "net/base/filename_util.h"
19 #include "net/base/net_util.h" 18 #include "net/base/net_util.h"
20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 19 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
21 #include "url/url_file.h" 20 #include "url/url_file.h"
22 #include "url/url_parse.h" 21 #include "url/url_parse.h"
23 #include "url/url_util.h" 22 #include "url/url_util.h"
24 23
25 const char* URLFixerUpper::home_directory_override = NULL; 24 const char* url_fixer::home_directory_override = NULL;
26 25
27 namespace { 26 namespace {
28 27
28 // Hardcode these constants to avoid dependences on //chrome and //content.
29 const char kChromeUIScheme[] = "chrome";
30 const char kChromeUIDefaultHost[] = "version";
31 const char kViewSourceScheme[] = "view-source";
32
29 // TODO(estade): Remove these ugly, ugly functions. They are only used in 33 // TODO(estade): Remove these ugly, ugly functions. They are only used in
30 // SegmentURL. A url::Parsed object keeps track of a bunch of indices into 34 // SegmentURL. A url::Parsed object keeps track of a bunch of indices into
31 // a url string, and these need to be updated when the URL is converted from 35 // a url string, and these need to be updated when the URL is converted from
32 // UTF8 to UTF16. Instead of this after-the-fact adjustment, we should parse it 36 // UTF8 to UTF16. Instead of this after-the-fact adjustment, we should parse it
33 // in the correct string format to begin with. 37 // in the correct string format to begin with.
34 url::Component UTF8ComponentToUTF16Component( 38 url::Component UTF8ComponentToUTF16Component(
35 const std::string& text_utf8, 39 const std::string& text_utf8,
36 const url::Component& component_utf8) { 40 const url::Component& component_utf8) {
37 if (component_utf8.len == -1) 41 if (component_utf8.len == -1)
38 return url::Component(); 42 return url::Component();
39 43
40 std::string before_component_string = 44 std::string before_component_string =
41 text_utf8.substr(0, component_utf8.begin); 45 text_utf8.substr(0, component_utf8.begin);
42 std::string component_string = text_utf8.substr(component_utf8.begin, 46 std::string component_string =
43 component_utf8.len); 47 text_utf8.substr(component_utf8.begin, component_utf8.len);
44 base::string16 before_component_string_16 = 48 base::string16 before_component_string_16 =
45 base::UTF8ToUTF16(before_component_string); 49 base::UTF8ToUTF16(before_component_string);
46 base::string16 component_string_16 = base::UTF8ToUTF16(component_string); 50 base::string16 component_string_16 = base::UTF8ToUTF16(component_string);
47 url::Component component_16(before_component_string_16.length(), 51 url::Component component_16(before_component_string_16.length(),
48 component_string_16.length()); 52 component_string_16.length());
49 return component_16; 53 return component_16;
50 } 54 }
51 55
52 void UTF8PartsToUTF16Parts(const std::string& text_utf8, 56 void UTF8PartsToUTF16Parts(const std::string& text_utf8,
53 const url::Parsed& parts_utf8, 57 const url::Parsed& parts_utf8,
54 url::Parsed* parts) { 58 url::Parsed* parts) {
55 if (base::IsStringASCII(text_utf8)) { 59 if (base::IsStringASCII(text_utf8)) {
56 *parts = parts_utf8; 60 *parts = parts_utf8;
57 return; 61 return;
58 } 62 }
59 63
60 parts->scheme = 64 parts->scheme = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.scheme);
61 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.scheme); 65 parts->username =
62 parts ->username =
63 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.username); 66 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.username);
64 parts->password = 67 parts->password =
65 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.password); 68 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.password);
66 parts->host = 69 parts->host = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.host);
67 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.host); 70 parts->port = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.port);
68 parts->port = 71 parts->path = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.path);
69 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.port); 72 parts->query = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.query);
70 parts->path = 73 parts->ref = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref);
71 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.path);
72 parts->query =
73 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.query);
74 parts->ref =
75 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref);
76 } 74 }
77 75
78 base::TrimPositions TrimWhitespaceUTF8(const std::string& input, 76 base::TrimPositions TrimWhitespaceUTF8(const std::string& input,
79 base::TrimPositions positions, 77 base::TrimPositions positions,
80 std::string* output) { 78 std::string* output) {
81 // This implementation is not so fast since it converts the text encoding 79 // This implementation is not so fast since it converts the text encoding
82 // twice. Please feel free to file a bug if this function hurts the 80 // twice. Please feel free to file a bug if this function hurts the
83 // performance of Chrome. 81 // performance of Chrome.
84 DCHECK(base::IsStringUTF8(input)); 82 DCHECK(base::IsStringUTF8(input));
85 base::string16 input16 = base::UTF8ToUTF16(input); 83 base::string16 input16 = base::UTF8ToUTF16(input);
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
118 } 116 }
119 117
120 #if defined(OS_POSIX) 118 #if defined(OS_POSIX)
121 // Given a path that starts with ~, return a path that starts with an 119 // Given a path that starts with ~, return a path that starts with an
122 // expanded-out /user/foobar directory. 120 // expanded-out /user/foobar directory.
123 std::string FixupHomedir(const std::string& text) { 121 std::string FixupHomedir(const std::string& text) {
124 DCHECK(text.length() > 0 && text[0] == '~'); 122 DCHECK(text.length() > 0 && text[0] == '~');
125 123
126 if (text.length() == 1 || text[1] == '/') { 124 if (text.length() == 1 || text[1] == '/') {
127 const char* home = getenv(base::env_vars::kHome); 125 const char* home = getenv(base::env_vars::kHome);
128 if (URLFixerUpper::home_directory_override) 126 if (url_fixer::home_directory_override)
129 home = URLFixerUpper::home_directory_override; 127 home = url_fixer::home_directory_override;
130 // We'll probably break elsewhere if $HOME is undefined, but check here 128 // We'll probably break elsewhere if $HOME is undefined, but check here
131 // just in case. 129 // just in case.
132 if (!home) 130 if (!home)
133 return text; 131 return text;
134 return home + text.substr(1); 132 return home + text.substr(1);
135 } 133 }
136 134
137 // Otherwise, this is a path like ~foobar/baz, where we must expand to 135 // Otherwise, this is a path like ~foobar/baz, where we must expand to
138 // user foobar's home directory. Officially, we should use getpwent(), 136 // user foobar's home directory. Officially, we should use getpwent(),
139 // but that is a nasty blocking call. 137 // but that is a nasty blocking call.
140 138
141 #if defined(OS_MACOSX) 139 #if defined(OS_MACOSX)
142 static const char kHome[] = "/Users/"; 140 static const char kHome[] = "/Users/";
143 #else 141 #else
144 static const char kHome[] = "/home/"; 142 static const char kHome[] = "/home/";
145 #endif 143 #endif
146 return kHome + text.substr(1); 144 return kHome + text.substr(1);
147 } 145 }
148 #endif 146 #endif
149 147
(...skipping 16 matching lines...) Expand all
166 #elif defined(OS_POSIX) 164 #elif defined(OS_POSIX)
167 base::FilePath input_path(text); 165 base::FilePath input_path(text);
168 PrepareStringForFileOps(input_path, &filename); 166 PrepareStringForFileOps(input_path, &filename);
169 if (filename.length() > 0 && filename[0] == '~') 167 if (filename.length() > 0 && filename[0] == '~')
170 filename = FixupHomedir(filename); 168 filename = FixupHomedir(filename);
171 #endif 169 #endif
172 170
173 // Here, we know the input looks like a file. 171 // Here, we know the input looks like a file.
174 GURL file_url = net::FilePathToFileURL(base::FilePath(filename)); 172 GURL file_url = net::FilePathToFileURL(base::FilePath(filename));
175 if (file_url.is_valid()) { 173 if (file_url.is_valid()) {
176 return base::UTF16ToUTF8(net::FormatUrl(file_url, std::string(), 174 return base::UTF16ToUTF8(net::FormatUrl(file_url,
177 net::kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, NULL, 175 std::string(),
178 NULL, NULL)); 176 net::kFormatUrlOmitUsernamePassword,
177 net::UnescapeRule::NORMAL,
178 NULL,
179 NULL,
180 NULL));
179 } 181 }
180 182
181 // Invalid file URL, just return the input. 183 // Invalid file URL, just return the input.
182 return text; 184 return text;
183 } 185 }
184 186
185 // Checks |domain| to see if a valid TLD is already present. If not, appends 187 // Checks |domain| to see if a valid TLD is already present. If not, appends
186 // |desired_tld| to the domain, and prepends "www." unless it's already present. 188 // |desired_tld| to the domain, and prepends "www." unless it's already present.
187 void AddDesiredTLD(const std::string& desired_tld, std::string* domain) { 189 void AddDesiredTLD(const std::string& desired_tld, std::string* domain) {
188 if (desired_tld.empty() || domain->empty()) 190 if (desired_tld.empty() || domain->empty())
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 // Try to extract a valid scheme from the beginning of |text|. 344 // Try to extract a valid scheme from the beginning of |text|.
343 // If successful, set |scheme_component| to the text range where the scheme 345 // If successful, set |scheme_component| to the text range where the scheme
344 // was located, and fill |canon_scheme| with its canonicalized form. 346 // was located, and fill |canon_scheme| with its canonicalized form.
345 // Otherwise, return false and leave the outputs in an indeterminate state. 347 // Otherwise, return false and leave the outputs in an indeterminate state.
346 bool GetValidScheme(const std::string& text, 348 bool GetValidScheme(const std::string& text,
347 url::Component* scheme_component, 349 url::Component* scheme_component,
348 std::string* canon_scheme) { 350 std::string* canon_scheme) {
349 canon_scheme->clear(); 351 canon_scheme->clear();
350 352
351 // Locate everything up to (but not including) the first ':' 353 // Locate everything up to (but not including) the first ':'
352 if (!url::ExtractScheme(text.data(), static_cast<int>(text.length()), 354 if (!url::ExtractScheme(
353 scheme_component)) { 355 text.data(), static_cast<int>(text.length()), scheme_component)) {
354 return false; 356 return false;
355 } 357 }
356 358
357 // Make sure the scheme contains only valid characters, and convert 359 // Make sure the scheme contains only valid characters, and convert
358 // to lowercase. This also catches IPv6 literals like [::1], because 360 // to lowercase. This also catches IPv6 literals like [::1], because
359 // brackets are not in the whitelist. 361 // brackets are not in the whitelist.
360 url::StdStringCanonOutput canon_scheme_output(canon_scheme); 362 url::StdStringCanonOutput canon_scheme_output(canon_scheme);
361 url::Component canon_scheme_component; 363 url::Component canon_scheme_component;
362 if (!url::CanonicalizeScheme(text.data(), *scheme_component, 364 if (!url::CanonicalizeScheme(text.data(),
363 &canon_scheme_output, &canon_scheme_component)) { 365 *scheme_component,
366 &canon_scheme_output,
367 &canon_scheme_component)) {
364 return false; 368 return false;
365 } 369 }
366 370
367 // Strip the ':', and any trailing buffer space. 371 // Strip the ':', and any trailing buffer space.
368 DCHECK_EQ(0, canon_scheme_component.begin); 372 DCHECK_EQ(0, canon_scheme_component.begin);
369 canon_scheme->erase(canon_scheme_component.len); 373 canon_scheme->erase(canon_scheme_component.len);
370 374
371 // We need to fix up the segmentation for "www.example.com:/". For this 375 // We need to fix up the segmentation for "www.example.com:/". For this
372 // case, we guess that schemes with a "." are not actually schemes. 376 // case, we guess that schemes with a "." are not actually schemes.
373 if (canon_scheme->find('.') != std::string::npos) 377 if (canon_scheme->find('.') != std::string::npos)
374 return false; 378 return false;
375 379
376 // We need to fix up the segmentation for "www:123/". For this case, we 380 // We need to fix up the segmentation for "www:123/". For this case, we
377 // will add an HTTP scheme later and make the URL parser happy. 381 // will add an HTTP scheme later and make the URL parser happy.
378 // TODO(pkasting): Maybe we should try to use GURL's parser for this? 382 // TODO(pkasting): Maybe we should try to use GURL's parser for this?
379 if (HasPort(text, *scheme_component)) 383 if (HasPort(text, *scheme_component))
380 return false; 384 return false;
381 385
382 // Everything checks out. 386 // Everything checks out.
383 return true; 387 return true;
384 } 388 }
385 389
386 // Performs the work for URLFixerUpper::SegmentURL. |text| may be modified on 390 // Performs the work for url_fixer::SegmentURL. |text| may be modified on
387 // output on success: a semicolon following a valid scheme is replaced with a 391 // output on success: a semicolon following a valid scheme is replaced with a
388 // colon. 392 // colon.
389 std::string SegmentURLInternal(std::string* text, url::Parsed* parts) { 393 std::string SegmentURLInternal(std::string* text, url::Parsed* parts) {
390 // Initialize the result. 394 // Initialize the result.
391 *parts = url::Parsed(); 395 *parts = url::Parsed();
392 396
393 std::string trimmed; 397 std::string trimmed;
394 TrimWhitespaceUTF8(*text, base::TRIM_ALL, &trimmed); 398 TrimWhitespaceUTF8(*text, base::TRIM_ALL, &trimmed);
395 if (trimmed.empty()) 399 if (trimmed.empty())
396 return std::string(); // Nothing to segment. 400 return std::string(); // Nothing to segment.
(...skipping 19 matching lines...) Expand all
416 if (semicolon != 0 && semicolon != std::string::npos) { 420 if (semicolon != 0 && semicolon != std::string::npos) {
417 (*text)[semicolon] = ':'; 421 (*text)[semicolon] = ':';
418 if (GetValidScheme(*text, &parts->scheme, &scheme)) 422 if (GetValidScheme(*text, &parts->scheme, &scheme))
419 found_scheme = true; 423 found_scheme = true;
420 else 424 else
421 (*text)[semicolon] = ';'; 425 (*text)[semicolon] = ';';
422 } 426 }
423 if (!found_scheme) { 427 if (!found_scheme) {
424 // Couldn't determine the scheme, so just pick one. 428 // Couldn't determine the scheme, so just pick one.
425 parts->scheme.reset(); 429 parts->scheme.reset();
426 scheme = StartsWithASCII(*text, "ftp.", false) ? 430 scheme = StartsWithASCII(*text, "ftp.", false) ? url::kFtpScheme
427 url::kFtpScheme : url::kHttpScheme; 431 : url::kHttpScheme;
428 } 432 }
429 } 433 }
430 434
431 // Proceed with about and chrome schemes, but not file or nonstandard schemes. 435 // Proceed with about and chrome schemes, but not file or nonstandard schemes.
432 if ((scheme != url::kAboutScheme) && (scheme != content::kChromeUIScheme) && 436 if ((scheme != url::kAboutScheme) && (scheme != kChromeUIScheme) &&
433 ((scheme == url::kFileScheme) || 437 ((scheme == url::kFileScheme) ||
434 !url::IsStandard( 438 !url::IsStandard(
435 scheme.c_str(), 439 scheme.c_str(),
436 url::Component(0, static_cast<int>(scheme.length()))))) { 440 url::Component(0, static_cast<int>(scheme.length()))))) {
437 return scheme; 441 return scheme;
438 } 442 }
439 443
440 if (scheme == url::kFileSystemScheme) { 444 if (scheme == url::kFileSystemScheme) {
441 // Have the GURL parser do the heavy lifting for us. 445 // Have the GURL parser do the heavy lifting for us.
442 url::ParseFileSystemURL(text->data(), static_cast<int>(text->length()), 446 url::ParseFileSystemURL(
443 parts); 447 text->data(), static_cast<int>(text->length()), parts);
444 return scheme; 448 return scheme;
445 } 449 }
446 450
447 if (parts->scheme.is_valid()) { 451 if (parts->scheme.is_valid()) {
448 // Have the GURL parser do the heavy lifting for us. 452 // Have the GURL parser do the heavy lifting for us.
449 url::ParseStandardURL(text->data(), static_cast<int>(text->length()), 453 url::ParseStandardURL(
450 parts); 454 text->data(), static_cast<int>(text->length()), parts);
451 return scheme; 455 return scheme;
452 } 456 }
453 457
454 // We need to add a scheme in order for ParseStandardURL to be happy. 458 // We need to add a scheme in order for ParseStandardURL to be happy.
455 // Find the first non-whitespace character. 459 // Find the first non-whitespace character.
456 std::string::iterator first_nonwhite = text->begin(); 460 std::string::iterator first_nonwhite = text->begin();
457 while ((first_nonwhite != text->end()) && IsWhitespace(*first_nonwhite)) 461 while ((first_nonwhite != text->end()) && IsWhitespace(*first_nonwhite))
458 ++first_nonwhite; 462 ++first_nonwhite;
459 463
460 // Construct the text to parse by inserting the scheme. 464 // Construct the text to parse by inserting the scheme.
461 std::string inserted_text(scheme); 465 std::string inserted_text(scheme);
462 inserted_text.append(url::kStandardSchemeSeparator); 466 inserted_text.append(url::kStandardSchemeSeparator);
463 std::string text_to_parse(text->begin(), first_nonwhite); 467 std::string text_to_parse(text->begin(), first_nonwhite);
464 text_to_parse.append(inserted_text); 468 text_to_parse.append(inserted_text);
465 text_to_parse.append(first_nonwhite, text->end()); 469 text_to_parse.append(first_nonwhite, text->end());
466 470
467 // Have the GURL parser do the heavy lifting for us. 471 // Have the GURL parser do the heavy lifting for us.
468 url::ParseStandardURL(text_to_parse.data(), 472 url::ParseStandardURL(
469 static_cast<int>(text_to_parse.length()), parts); 473 text_to_parse.data(), static_cast<int>(text_to_parse.length()), parts);
470 474
471 // Offset the results of the parse to match the original text. 475 // Offset the results of the parse to match the original text.
472 const int offset = -static_cast<int>(inserted_text.length()); 476 const int offset = -static_cast<int>(inserted_text.length());
473 URLFixerUpper::OffsetComponent(offset, &parts->scheme); 477 url_fixer::OffsetComponent(offset, &parts->scheme);
474 URLFixerUpper::OffsetComponent(offset, &parts->username); 478 url_fixer::OffsetComponent(offset, &parts->username);
475 URLFixerUpper::OffsetComponent(offset, &parts->password); 479 url_fixer::OffsetComponent(offset, &parts->password);
476 URLFixerUpper::OffsetComponent(offset, &parts->host); 480 url_fixer::OffsetComponent(offset, &parts->host);
477 URLFixerUpper::OffsetComponent(offset, &parts->port); 481 url_fixer::OffsetComponent(offset, &parts->port);
478 URLFixerUpper::OffsetComponent(offset, &parts->path); 482 url_fixer::OffsetComponent(offset, &parts->path);
479 URLFixerUpper::OffsetComponent(offset, &parts->query); 483 url_fixer::OffsetComponent(offset, &parts->query);
480 URLFixerUpper::OffsetComponent(offset, &parts->ref); 484 url_fixer::OffsetComponent(offset, &parts->ref);
481 485
482 return scheme; 486 return scheme;
483 } 487 }
484 488
485 } // namespace 489 } // namespace
486 490
487 std::string URLFixerUpper::SegmentURL(const std::string& text, 491 std::string url_fixer::SegmentURL(const std::string& text, url::Parsed* parts) {
488 url::Parsed* parts) {
489 std::string mutable_text(text); 492 std::string mutable_text(text);
490 return SegmentURLInternal(&mutable_text, parts); 493 return SegmentURLInternal(&mutable_text, parts);
491 } 494 }
492 495
493 base::string16 URLFixerUpper::SegmentURL(const base::string16& text, 496 base::string16 url_fixer::SegmentURL(const base::string16& text,
494 url::Parsed* parts) { 497 url::Parsed* parts) {
495 std::string text_utf8 = base::UTF16ToUTF8(text); 498 std::string text_utf8 = base::UTF16ToUTF8(text);
496 url::Parsed parts_utf8; 499 url::Parsed parts_utf8;
497 std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); 500 std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8);
498 UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); 501 UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts);
499 return base::UTF8ToUTF16(scheme_utf8); 502 return base::UTF8ToUTF16(scheme_utf8);
500 } 503 }
501 504
502 GURL URLFixerUpper::FixupURL(const std::string& text, 505 GURL url_fixer::FixupURL(const std::string& text,
503 const std::string& desired_tld) { 506 const std::string& desired_tld) {
504 std::string trimmed; 507 std::string trimmed;
505 TrimWhitespaceUTF8(text, base::TRIM_ALL, &trimmed); 508 TrimWhitespaceUTF8(text, base::TRIM_ALL, &trimmed);
506 if (trimmed.empty()) 509 if (trimmed.empty())
507 return GURL(); // Nothing here. 510 return GURL(); // Nothing here.
508 511
509 // Segment the URL. 512 // Segment the URL.
510 url::Parsed parts; 513 url::Parsed parts;
511 std::string scheme(SegmentURLInternal(&trimmed, &parts)); 514 std::string scheme(SegmentURLInternal(&trimmed, &parts));
512 515
513 // For view-source: URLs, we strip "view-source:", do fixup, and stick it back 516 // For view-source: URLs, we strip "view-source:", do fixup, and stick it back
514 // on. This allows us to handle things like "view-source:google.com". 517 // on. This allows us to handle things like "view-source:google.com".
515 if (scheme == content::kViewSourceScheme) { 518 if (scheme == kViewSourceScheme) {
516 // Reject "view-source:view-source:..." to avoid deep recursion. 519 // Reject "view-source:view-source:..." to avoid deep recursion.
517 std::string view_source(content::kViewSourceScheme + std::string(":")); 520 std::string view_source(kViewSourceScheme + std::string(":"));
518 if (!StartsWithASCII(text, view_source + view_source, false)) { 521 if (!StartsWithASCII(text, view_source + view_source, false)) {
519 return GURL(content::kViewSourceScheme + std::string(":") + 522 return GURL(kViewSourceScheme + std::string(":") +
520 FixupURL(trimmed.substr(scheme.length() + 1), 523 FixupURL(trimmed.substr(scheme.length() + 1), desired_tld)
521 desired_tld).possibly_invalid_spec()); 524 .possibly_invalid_spec());
522 } 525 }
523 } 526 }
524 527
525 // We handle the file scheme separately. 528 // We handle the file scheme separately.
526 if (scheme == url::kFileScheme) 529 if (scheme == url::kFileScheme)
527 return GURL(parts.scheme.is_valid() ? text : FixupPath(text)); 530 return GURL(parts.scheme.is_valid() ? text : FixupPath(text));
528 531
529 // We handle the filesystem scheme separately. 532 // We handle the filesystem scheme separately.
530 if (scheme == url::kFileSystemScheme) { 533 if (scheme == url::kFileSystemScheme) {
531 if (parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid()) 534 if (parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid())
532 return GURL(text); 535 return GURL(text);
533 return GURL(); 536 return GURL();
534 } 537 }
535 538
536 // Parse and rebuild about: and chrome: URLs, except about:blank. 539 // Parse and rebuild about: and chrome: URLs, except about:blank.
537 bool chrome_url = 540 bool chrome_url =
538 !LowerCaseEqualsASCII(trimmed, url::kAboutBlankURL) && 541 !LowerCaseEqualsASCII(trimmed, url::kAboutBlankURL) &&
539 ((scheme == url::kAboutScheme) || (scheme == content::kChromeUIScheme)); 542 ((scheme == url::kAboutScheme) || (scheme == kChromeUIScheme));
540 543
541 // For some schemes whose layouts we understand, we rebuild it. 544 // For some schemes whose layouts we understand, we rebuild it.
542 if (chrome_url || 545 if (chrome_url ||
543 url::IsStandard(scheme.c_str(), 546 url::IsStandard(scheme.c_str(),
544 url::Component(0, static_cast<int>(scheme.length())))) { 547 url::Component(0, static_cast<int>(scheme.length())))) {
545 // Replace the about: scheme with the chrome: scheme. 548 // Replace the about: scheme with the chrome: scheme.
546 std::string url(chrome_url ? content::kChromeUIScheme : scheme); 549 std::string url(chrome_url ? kChromeUIScheme : scheme);
547 url.append(url::kStandardSchemeSeparator); 550 url.append(url::kStandardSchemeSeparator);
548 551
549 // We need to check whether the |username| is valid because it is our 552 // We need to check whether the |username| is valid because it is our
550 // responsibility to append the '@' to delineate the user information from 553 // responsibility to append the '@' to delineate the user information from
551 // the host portion of the URL. 554 // the host portion of the URL.
552 if (parts.username.is_valid()) { 555 if (parts.username.is_valid()) {
553 FixupUsername(trimmed, parts.username, &url); 556 FixupUsername(trimmed, parts.username, &url);
554 FixupPassword(trimmed, parts.password, &url); 557 FixupPassword(trimmed, parts.password, &url);
555 url.append("@"); 558 url.append("@");
556 } 559 }
557 560
558 FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url); 561 FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url);
559 if (chrome_url && !parts.host.is_valid()) 562 if (chrome_url && !parts.host.is_valid())
560 url.append(chrome::kChromeUIDefaultHost); 563 url.append(kChromeUIDefaultHost);
561 FixupPort(trimmed, parts.port, &url); 564 FixupPort(trimmed, parts.port, &url);
562 FixupPath(trimmed, parts.path, &url); 565 FixupPath(trimmed, parts.path, &url);
563 FixupQuery(trimmed, parts.query, &url); 566 FixupQuery(trimmed, parts.query, &url);
564 FixupRef(trimmed, parts.ref, &url); 567 FixupRef(trimmed, parts.ref, &url);
565 568
566 return GURL(url); 569 return GURL(url);
567 } 570 }
568 571
569 // In the worst-case, we insert a scheme if the URL lacks one. 572 // In the worst-case, we insert a scheme if the URL lacks one.
570 if (!parts.scheme.is_valid()) { 573 if (!parts.scheme.is_valid()) {
571 std::string fixed_scheme(scheme); 574 std::string fixed_scheme(scheme);
572 fixed_scheme.append(url::kStandardSchemeSeparator); 575 fixed_scheme.append(url::kStandardSchemeSeparator);
573 trimmed.insert(0, fixed_scheme); 576 trimmed.insert(0, fixed_scheme);
574 } 577 }
575 578
576 return GURL(trimmed); 579 return GURL(trimmed);
577 } 580 }
578 581
579 // The rules are different here than for regular fixup, since we need to handle 582 // The rules are different here than for regular fixup, since we need to handle
580 // input like "hello.html" and know to look in the current directory. Regular 583 // input like "hello.html" and know to look in the current directory. Regular
581 // fixup will look for cues that it is actually a file path before trying to 584 // fixup will look for cues that it is actually a file path before trying to
582 // figure out what file it is. If our logic doesn't work, we will fall back on 585 // figure out what file it is. If our logic doesn't work, we will fall back on
583 // regular fixup. 586 // regular fixup.
584 GURL URLFixerUpper::FixupRelativeFile(const base::FilePath& base_dir, 587 GURL url_fixer::FixupRelativeFile(const base::FilePath& base_dir,
585 const base::FilePath& text) { 588 const base::FilePath& text) {
586 base::FilePath old_cur_directory; 589 base::FilePath old_cur_directory;
587 if (!base_dir.empty()) { 590 if (!base_dir.empty()) {
588 // Save the old current directory before we move to the new one. 591 // Save the old current directory before we move to the new one.
589 base::GetCurrentDirectory(&old_cur_directory); 592 base::GetCurrentDirectory(&old_cur_directory);
590 base::SetCurrentDirectory(base_dir); 593 base::SetCurrentDirectory(base_dir);
591 } 594 }
592 595
593 // Allow funny input with extra whitespace and the wrong kind of slashes. 596 // Allow funny input with extra whitespace and the wrong kind of slashes.
594 base::FilePath::StringType trimmed; 597 base::FilePath::StringType trimmed;
595 PrepareStringForFileOps(text, &trimmed); 598 PrepareStringForFileOps(text, &trimmed);
596 599
597 bool is_file = true; 600 bool is_file = true;
598 // Avoid recognizing definite non-file URLs as file paths. 601 // Avoid recognizing definite non-file URLs as file paths.
599 GURL gurl(trimmed); 602 GURL gurl(trimmed);
600 if (gurl.is_valid() && gurl.IsStandard()) 603 if (gurl.is_valid() && gurl.IsStandard())
601 is_file = false; 604 is_file = false;
602 base::FilePath full_path; 605 base::FilePath full_path;
603 if (is_file && !ValidPathForFile(trimmed, &full_path)) { 606 if (is_file && !ValidPathForFile(trimmed, &full_path)) {
604 // Not a path as entered, try unescaping it in case the user has 607 // Not a path as entered, try unescaping it in case the user has
605 // escaped things. We need to go through 8-bit since the escaped values 608 // escaped things. We need to go through 8-bit since the escaped values
606 // only represent 8-bit values. 609 // only represent 8-bit values.
607 #if defined(OS_WIN) 610 #if defined(OS_WIN)
608 std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent( 611 std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent(
609 base::WideToUTF8(trimmed), 612 base::WideToUTF8(trimmed),
610 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS)); 613 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS));
611 #elif defined(OS_POSIX) 614 #elif defined(OS_POSIX)
612 std::string unescaped = net::UnescapeURLComponent( 615 std::string unescaped = net::UnescapeURLComponent(
613 trimmed, 616 trimmed,
614 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); 617 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
615 #endif 618 #endif
616 619
617 if (!ValidPathForFile(unescaped, &full_path)) 620 if (!ValidPathForFile(unescaped, &full_path))
618 is_file = false; 621 is_file = false;
619 } 622 }
620 623
621 // Put back the current directory if we saved it. 624 // Put back the current directory if we saved it.
622 if (!base_dir.empty()) 625 if (!base_dir.empty())
623 base::SetCurrentDirectory(old_cur_directory); 626 base::SetCurrentDirectory(old_cur_directory);
624 627
625 if (is_file) { 628 if (is_file) {
626 GURL file_url = net::FilePathToFileURL(full_path); 629 GURL file_url = net::FilePathToFileURL(full_path);
627 if (file_url.is_valid()) 630 if (file_url.is_valid())
628 return GURL(base::UTF16ToUTF8(net::FormatUrl(file_url, std::string(), 631 return GURL(
629 net::kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, NULL, 632 base::UTF16ToUTF8(net::FormatUrl(file_url,
630 NULL, NULL))); 633 std::string(),
634 net::kFormatUrlOmitUsernamePassword,
635 net::UnescapeRule::NORMAL,
636 NULL,
637 NULL,
638 NULL)));
631 // Invalid files fall through to regular processing. 639 // Invalid files fall through to regular processing.
632 } 640 }
633 641
634 // Fall back on regular fixup for this input. 642 // Fall back on regular fixup for this input.
635 #if defined(OS_WIN) 643 #if defined(OS_WIN)
636 std::string text_utf8 = base::WideToUTF8(text.value()); 644 std::string text_utf8 = base::WideToUTF8(text.value());
637 #elif defined(OS_POSIX) 645 #elif defined(OS_POSIX)
638 std::string text_utf8 = text.value(); 646 std::string text_utf8 = text.value();
639 #endif 647 #endif
640 return FixupURL(text_utf8, std::string()); 648 return FixupURL(text_utf8, std::string());
641 } 649 }
642 650
643 void URLFixerUpper::OffsetComponent(int offset, url::Component* part) { 651 void url_fixer::OffsetComponent(int offset, url::Component* part) {
644 DCHECK(part); 652 DCHECK(part);
645 653
646 if (part->is_valid()) { 654 if (part->is_valid()) {
647 // Offset the location of this component. 655 // Offset the location of this component.
648 part->begin += offset; 656 part->begin += offset;
649 657
650 // This part might not have existed in the original text. 658 // This part might not have existed in the original text.
651 if (part->begin < 0) 659 if (part->begin < 0)
652 part->reset(); 660 part->reset();
653 } 661 }
654 } 662 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698