| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/common/net/url_fixer_upper.h" | 5 #include "chrome/common/net/url_fixer_upper.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #if defined(OS_POSIX) | 9 #if defined(OS_POSIX) |
| 10 #include "base/environment.h" | 10 #include "base/environment.h" |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 const std::string& text_utf8, | 34 const std::string& text_utf8, |
| 35 const url_parse::Component& component_utf8) { | 35 const url_parse::Component& component_utf8) { |
| 36 if (component_utf8.len == -1) | 36 if (component_utf8.len == -1) |
| 37 return url_parse::Component(); | 37 return url_parse::Component(); |
| 38 | 38 |
| 39 std::string before_component_string = | 39 std::string before_component_string = |
| 40 text_utf8.substr(0, component_utf8.begin); | 40 text_utf8.substr(0, component_utf8.begin); |
| 41 std::string component_string = text_utf8.substr(component_utf8.begin, | 41 std::string component_string = text_utf8.substr(component_utf8.begin, |
| 42 component_utf8.len); | 42 component_utf8.len); |
| 43 base::string16 before_component_string_16 = | 43 base::string16 before_component_string_16 = |
| 44 UTF8ToUTF16(before_component_string); | 44 base::UTF8ToUTF16(before_component_string); |
| 45 base::string16 component_string_16 = UTF8ToUTF16(component_string); | 45 base::string16 component_string_16 = base::UTF8ToUTF16(component_string); |
| 46 url_parse::Component component_16(before_component_string_16.length(), | 46 url_parse::Component component_16(before_component_string_16.length(), |
| 47 component_string_16.length()); | 47 component_string_16.length()); |
| 48 return component_16; | 48 return component_16; |
| 49 } | 49 } |
| 50 | 50 |
| 51 void UTF8PartsToUTF16Parts(const std::string& text_utf8, | 51 void UTF8PartsToUTF16Parts(const std::string& text_utf8, |
| 52 const url_parse::Parsed& parts_utf8, | 52 const url_parse::Parsed& parts_utf8, |
| 53 url_parse::Parsed* parts) { | 53 url_parse::Parsed* parts) { |
| 54 if (IsStringASCII(text_utf8)) { | 54 if (IsStringASCII(text_utf8)) { |
| 55 *parts = parts_utf8; | 55 *parts = parts_utf8; |
| (...skipping 18 matching lines...) Expand all Loading... |
| 74 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref); | 74 UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref); |
| 75 } | 75 } |
| 76 | 76 |
| 77 TrimPositions TrimWhitespaceUTF8(const std::string& input, | 77 TrimPositions TrimWhitespaceUTF8(const std::string& input, |
| 78 TrimPositions positions, | 78 TrimPositions positions, |
| 79 std::string* output) { | 79 std::string* output) { |
| 80 // This implementation is not so fast since it converts the text encoding | 80 // This implementation is not so fast since it converts the text encoding |
| 81 // twice. Please feel free to file a bug if this function hurts the | 81 // twice. Please feel free to file a bug if this function hurts the |
| 82 // performance of Chrome. | 82 // performance of Chrome. |
| 83 DCHECK(IsStringUTF8(input)); | 83 DCHECK(IsStringUTF8(input)); |
| 84 base::string16 input16 = UTF8ToUTF16(input); | 84 base::string16 input16 = base::UTF8ToUTF16(input); |
| 85 base::string16 output16; | 85 base::string16 output16; |
| 86 TrimPositions result = TrimWhitespace(input16, positions, &output16); | 86 TrimPositions result = TrimWhitespace(input16, positions, &output16); |
| 87 *output = UTF16ToUTF8(output16); | 87 *output = base::UTF16ToUTF8(output16); |
| 88 return result; | 88 return result; |
| 89 } | 89 } |
| 90 | 90 |
| 91 // does some basic fixes for input that we want to test for file-ness | 91 // does some basic fixes for input that we want to test for file-ness |
| 92 void PrepareStringForFileOps(const base::FilePath& text, | 92 void PrepareStringForFileOps(const base::FilePath& text, |
| 93 base::FilePath::StringType* output) { | 93 base::FilePath::StringType* output) { |
| 94 #if defined(OS_WIN) | 94 #if defined(OS_WIN) |
| 95 TrimWhitespace(text.value(), TRIM_ALL, output); | 95 TrimWhitespace(text.value(), TRIM_ALL, output); |
| 96 replace(output->begin(), output->end(), '/', '\\'); | 96 replace(output->begin(), output->end(), '/', '\\'); |
| 97 #else | 97 #else |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 148 // Tries to create a file: URL from |text| if it looks like a filename, even if | 148 // Tries to create a file: URL from |text| if it looks like a filename, even if |
| 149 // it doesn't resolve as a valid path or to an existing file. Returns a | 149 // it doesn't resolve as a valid path or to an existing file. Returns a |
| 150 // (possibly invalid) file: URL in |fixed_up_url| for input beginning | 150 // (possibly invalid) file: URL in |fixed_up_url| for input beginning |
| 151 // with a drive specifier or "\\". Returns the unchanged input in other cases | 151 // with a drive specifier or "\\". Returns the unchanged input in other cases |
| 152 // (including file: URLs: these don't look like filenames). | 152 // (including file: URLs: these don't look like filenames). |
| 153 std::string FixupPath(const std::string& text) { | 153 std::string FixupPath(const std::string& text) { |
| 154 DCHECK(!text.empty()); | 154 DCHECK(!text.empty()); |
| 155 | 155 |
| 156 base::FilePath::StringType filename; | 156 base::FilePath::StringType filename; |
| 157 #if defined(OS_WIN) | 157 #if defined(OS_WIN) |
| 158 base::FilePath input_path(UTF8ToWide(text)); | 158 base::FilePath input_path(base::UTF8ToWide(text)); |
| 159 PrepareStringForFileOps(input_path, &filename); | 159 PrepareStringForFileOps(input_path, &filename); |
| 160 | 160 |
| 161 // Fixup Windows-style drive letters, where "C:" gets rewritten to "C|". | 161 // Fixup Windows-style drive letters, where "C:" gets rewritten to "C|". |
| 162 if (filename.length() > 1 && filename[1] == '|') | 162 if (filename.length() > 1 && filename[1] == '|') |
| 163 filename[1] = ':'; | 163 filename[1] = ':'; |
| 164 #elif defined(OS_POSIX) | 164 #elif defined(OS_POSIX) |
| 165 base::FilePath input_path(text); | 165 base::FilePath input_path(text); |
| 166 PrepareStringForFileOps(input_path, &filename); | 166 PrepareStringForFileOps(input_path, &filename); |
| 167 if (filename.length() > 0 && filename[0] == '~') | 167 if (filename.length() > 0 && filename[0] == '~') |
| 168 filename = FixupHomedir(filename); | 168 filename = FixupHomedir(filename); |
| 169 #endif | 169 #endif |
| 170 | 170 |
| 171 // Here, we know the input looks like a file. | 171 // Here, we know the input looks like a file. |
| 172 GURL file_url = net::FilePathToFileURL(base::FilePath(filename)); | 172 GURL file_url = net::FilePathToFileURL(base::FilePath(filename)); |
| 173 if (file_url.is_valid()) { | 173 if (file_url.is_valid()) { |
| 174 return UTF16ToUTF8(net::FormatUrl(file_url, std::string(), | 174 return base::UTF16ToUTF8(net::FormatUrl(file_url, std::string(), |
| 175 net::kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, NULL, | 175 net::kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, NULL, |
| 176 NULL, NULL)); | 176 NULL, NULL)); |
| 177 } | 177 } |
| 178 | 178 |
| 179 // Invalid file URL, just return the input. | 179 // Invalid file URL, just return the input. |
| 180 return text; | 180 return text; |
| 181 } | 181 } |
| 182 | 182 |
| 183 // Checks |domain| to see if a valid TLD is already present. If not, appends | 183 // Checks |domain| to see if a valid TLD is already present. If not, appends |
| 184 // |desired_tld| to the domain, and prepends "www." unless it's already present. | 184 // |desired_tld| to the domain, and prepends "www." unless it's already present. |
| (...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 479 } // namespace | 479 } // namespace |
| 480 | 480 |
| 481 std::string URLFixerUpper::SegmentURL(const std::string& text, | 481 std::string URLFixerUpper::SegmentURL(const std::string& text, |
| 482 url_parse::Parsed* parts) { | 482 url_parse::Parsed* parts) { |
| 483 std::string mutable_text(text); | 483 std::string mutable_text(text); |
| 484 return SegmentURLInternal(&mutable_text, parts); | 484 return SegmentURLInternal(&mutable_text, parts); |
| 485 } | 485 } |
| 486 | 486 |
| 487 base::string16 URLFixerUpper::SegmentURL(const base::string16& text, | 487 base::string16 URLFixerUpper::SegmentURL(const base::string16& text, |
| 488 url_parse::Parsed* parts) { | 488 url_parse::Parsed* parts) { |
| 489 std::string text_utf8 = UTF16ToUTF8(text); | 489 std::string text_utf8 = base::UTF16ToUTF8(text); |
| 490 url_parse::Parsed parts_utf8; | 490 url_parse::Parsed parts_utf8; |
| 491 std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); | 491 std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); |
| 492 UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); | 492 UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); |
| 493 return UTF8ToUTF16(scheme_utf8); | 493 return base::UTF8ToUTF16(scheme_utf8); |
| 494 } | 494 } |
| 495 | 495 |
| 496 GURL URLFixerUpper::FixupURL(const std::string& text, | 496 GURL URLFixerUpper::FixupURL(const std::string& text, |
| 497 const std::string& desired_tld) { | 497 const std::string& desired_tld) { |
| 498 std::string trimmed; | 498 std::string trimmed; |
| 499 TrimWhitespaceUTF8(text, TRIM_ALL, &trimmed); | 499 TrimWhitespaceUTF8(text, TRIM_ALL, &trimmed); |
| 500 if (trimmed.empty()) | 500 if (trimmed.empty()) |
| 501 return GURL(); // Nothing here. | 501 return GURL(); // Nothing here. |
| 502 | 502 |
| 503 // Segment the URL. | 503 // Segment the URL. |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 590 // Avoid recognizing definite non-file URLs as file paths. | 590 // Avoid recognizing definite non-file URLs as file paths. |
| 591 GURL gurl(trimmed); | 591 GURL gurl(trimmed); |
| 592 if (gurl.is_valid() && gurl.IsStandard()) | 592 if (gurl.is_valid() && gurl.IsStandard()) |
| 593 is_file = false; | 593 is_file = false; |
| 594 base::FilePath full_path; | 594 base::FilePath full_path; |
| 595 if (is_file && !ValidPathForFile(trimmed, &full_path)) { | 595 if (is_file && !ValidPathForFile(trimmed, &full_path)) { |
| 596 // Not a path as entered, try unescaping it in case the user has | 596 // Not a path as entered, try unescaping it in case the user has |
| 597 // escaped things. We need to go through 8-bit since the escaped values | 597 // escaped things. We need to go through 8-bit since the escaped values |
| 598 // only represent 8-bit values. | 598 // only represent 8-bit values. |
| 599 #if defined(OS_WIN) | 599 #if defined(OS_WIN) |
| 600 std::wstring unescaped = UTF8ToWide(net::UnescapeURLComponent( | 600 std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent( |
| 601 WideToUTF8(trimmed), | 601 base::WideToUTF8(trimmed), |
| 602 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS)); | 602 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS)); |
| 603 #elif defined(OS_POSIX) | 603 #elif defined(OS_POSIX) |
| 604 std::string unescaped = net::UnescapeURLComponent( | 604 std::string unescaped = net::UnescapeURLComponent( |
| 605 trimmed, | 605 trimmed, |
| 606 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 606 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
| 607 #endif | 607 #endif |
| 608 | 608 |
| 609 if (!ValidPathForFile(unescaped, &full_path)) | 609 if (!ValidPathForFile(unescaped, &full_path)) |
| 610 is_file = false; | 610 is_file = false; |
| 611 } | 611 } |
| 612 | 612 |
| 613 // Put back the current directory if we saved it. | 613 // Put back the current directory if we saved it. |
| 614 if (!base_dir.empty()) | 614 if (!base_dir.empty()) |
| 615 file_util::SetCurrentDirectory(old_cur_directory); | 615 file_util::SetCurrentDirectory(old_cur_directory); |
| 616 | 616 |
| 617 if (is_file) { | 617 if (is_file) { |
| 618 GURL file_url = net::FilePathToFileURL(full_path); | 618 GURL file_url = net::FilePathToFileURL(full_path); |
| 619 if (file_url.is_valid()) | 619 if (file_url.is_valid()) |
| 620 return GURL(UTF16ToUTF8(net::FormatUrl(file_url, std::string(), | 620 return GURL(base::UTF16ToUTF8(net::FormatUrl(file_url, std::string(), |
| 621 net::kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, NULL, | 621 net::kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, NULL, |
| 622 NULL, NULL))); | 622 NULL, NULL))); |
| 623 // Invalid files fall through to regular processing. | 623 // Invalid files fall through to regular processing. |
| 624 } | 624 } |
| 625 | 625 |
| 626 // Fall back on regular fixup for this input. | 626 // Fall back on regular fixup for this input. |
| 627 #if defined(OS_WIN) | 627 #if defined(OS_WIN) |
| 628 std::string text_utf8 = WideToUTF8(text.value()); | 628 std::string text_utf8 = base::WideToUTF8(text.value()); |
| 629 #elif defined(OS_POSIX) | 629 #elif defined(OS_POSIX) |
| 630 std::string text_utf8 = text.value(); | 630 std::string text_utf8 = text.value(); |
| 631 #endif | 631 #endif |
| 632 return FixupURL(text_utf8, std::string()); | 632 return FixupURL(text_utf8, std::string()); |
| 633 } | 633 } |
| 634 | 634 |
| 635 void URLFixerUpper::OffsetComponent(int offset, url_parse::Component* part) { | 635 void URLFixerUpper::OffsetComponent(int offset, url_parse::Component* part) { |
| 636 DCHECK(part); | 636 DCHECK(part); |
| 637 | 637 |
| 638 if (part->is_valid()) { | 638 if (part->is_valid()) { |
| 639 // Offset the location of this component. | 639 // Offset the location of this component. |
| 640 part->begin += offset; | 640 part->begin += offset; |
| 641 | 641 |
| 642 // This part might not have existed in the original text. | 642 // This part might not have existed in the original text. |
| 643 if (part->begin < 0) | 643 if (part->begin < 0) |
| 644 part->reset(); | 644 part->reset(); |
| 645 } | 645 } |
| 646 } | 646 } |
| OLD | NEW |