Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/i18n/time_formatting.h" | 10 #include "base/i18n/time_formatting.h" |
| (...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 569 output_component->reset(); | 569 output_component->reset(); |
| 570 } | 570 } |
| 571 } | 571 } |
| 572 | 572 |
| 573 } // namespace | 573 } // namespace |
| 574 | 574 |
| 575 const FormatUrlType kFormatUrlOmitNothing = 0; | 575 const FormatUrlType kFormatUrlOmitNothing = 0; |
| 576 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 576 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
| 577 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 577 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
| 578 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 578 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
| 579 const FormatUrlType kFormatUrlOmitScheme = 1 << 3; | |
| 580 const FormatUrlType kFormatUrlOmitPort = 1 << 4; | |
| 579 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 581 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
| 580 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 582 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
| 581 | 583 |
| 582 base::string16 IDNToUnicode(const std::string& host, | 584 base::string16 IDNToUnicode(const std::string& host, |
| 583 const std::string& languages) { | 585 const std::string& languages) { |
| 584 return IDNToUnicodeWithAdjustments(host, languages, NULL); | 586 return IDNToUnicodeWithAdjustments(host, languages, NULL); |
| 585 } | 587 } |
| 586 | 588 |
| 587 std::string GetDirectoryListingEntry(const base::string16& name, | 589 std::string GetDirectoryListingEntry(const base::string16& name, |
| 588 const std::string& raw_bytes, | 590 const std::string& raw_bytes, |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 681 unescape_rules, new_parsed, prefix_end, | 683 unescape_rules, new_parsed, prefix_end, |
| 682 adjustments); | 684 adjustments); |
| 683 } | 685 } |
| 684 | 686 |
| 685 // We handle both valid and invalid URLs (this will give us the spec | 687 // We handle both valid and invalid URLs (this will give us the spec |
| 686 // regardless of validity). | 688 // regardless of validity). |
| 687 const std::string& spec = url.possibly_invalid_spec(); | 689 const std::string& spec = url.possibly_invalid_spec(); |
| 688 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | 690 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
| 689 | 691 |
| 690 // Scheme & separators. These are ASCII. | 692 // Scheme & separators. These are ASCII. |
| 693 // Scheme removal occurs at the end. | |
| 691 base::string16 url_string; | 694 base::string16 url_string; |
| 692 url_string.insert( | 695 url_string.insert( |
| 693 url_string.end(), spec.begin(), | 696 url_string.end(), spec.begin(), |
| 694 spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); | 697 spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); |
| 695 const char kHTTP[] = "http://"; | |
| 696 const char kFTP[] = "ftp."; | |
| 697 // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
| 698 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
| 699 // the user inputs this into any field subject to fixup (which is basically | |
| 700 // all input fields), the meaning would be changed. (In fact, often the | |
| 701 // formatted URL is directly pre-filled into an input field.) For this reason | |
| 702 // we avoid stripping "http://" in this case. | |
| 703 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
| 704 EqualsASCII(url_string, kHTTP) && | |
| 705 !StartsWithASCII(url.host(), kFTP, true); | |
| 706 new_parsed->scheme = parsed.scheme; | 698 new_parsed->scheme = parsed.scheme; |
| 707 | 699 |
| 708 // Username & password. | 700 // Username & password. |
| 709 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | 701 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { |
| 710 // Remove the username and password fields. We don't want to display those | 702 // Remove the username and password fields. We don't want to display those |
| 711 // to the user since they can be used for attacks, | 703 // to the user since they can be used for attacks, |
| 712 // e.g. "http://google.com:search@evil.ru/" | 704 // e.g. "http://google.com:search@evil.ru/" |
| 713 new_parsed->username.reset(); | 705 new_parsed->username.reset(); |
| 714 new_parsed->password.reset(); | 706 new_parsed->password.reset(); |
| 715 // Update the adjustments based on removed username and/or password. | 707 // Update the adjustments based on removed username and/or password. |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 745 url_string.push_back('@'); | 737 url_string.push_back('@'); |
| 746 } | 738 } |
| 747 if (prefix_end) | 739 if (prefix_end) |
| 748 *prefix_end = static_cast<size_t>(url_string.length()); | 740 *prefix_end = static_cast<size_t>(url_string.length()); |
| 749 | 741 |
| 750 // Host. | 742 // Host. |
| 751 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages), | 743 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages), |
| 752 &url_string, &new_parsed->host, adjustments); | 744 &url_string, &new_parsed->host, adjustments); |
| 753 | 745 |
| 754 // Port. | 746 // Port. |
| 755 if (parsed.port.is_nonempty()) { | 747 if (parsed.port.is_nonempty() && !(format_types & kFormatUrlOmitPort)) { |
| 756 url_string.push_back(':'); | 748 url_string.push_back(':'); |
| 757 new_parsed->port.begin = url_string.length(); | 749 new_parsed->port.begin = url_string.length(); |
| 758 url_string.insert(url_string.end(), | 750 url_string.insert(url_string.end(), |
| 759 spec.begin() + parsed.port.begin, | 751 spec.begin() + parsed.port.begin, |
| 760 spec.begin() + parsed.port.end()); | 752 spec.begin() + parsed.port.end()); |
| 761 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | 753 new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
| 762 } else { | 754 } else { |
| 763 new_parsed->port.reset(); | 755 new_parsed->port.reset(); |
|
asanka
2015/05/11 23:39:44
The else clause now includes the 'parsed.port.is_n
palmer
2015/05/12 22:20:53
Done.
| |
| 764 } | 756 } |
| 765 | 757 |
| 766 // Path & query. Both get the same general unescape & convert treatment. | 758 // Path & query. Both get the same general unescape & convert treatment. |
| 767 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | 759 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || |
| 768 !CanStripTrailingSlash(url)) { | 760 !CanStripTrailingSlash(url)) { |
| 769 AppendFormattedComponent(spec, parsed.path, | 761 AppendFormattedComponent(spec, parsed.path, |
| 770 NonHostComponentTransform(unescape_rules), | 762 NonHostComponentTransform(unescape_rules), |
| 771 &url_string, &new_parsed->path, adjustments); | 763 &url_string, &new_parsed->path, adjustments); |
| 772 } else { | 764 } else { |
| 773 if (parsed.path.len > 0) { | 765 if (parsed.path.len > 0) { |
| 774 adjustments->push_back(base::OffsetAdjuster::Adjustment( | 766 adjustments->push_back(base::OffsetAdjuster::Adjustment( |
| 775 parsed.path.begin, parsed.path.len, 0)); | 767 parsed.path.begin, parsed.path.len, 0)); |
| 776 } | 768 } |
| 777 } | 769 } |
| 778 if (parsed.query.is_valid()) | 770 if (parsed.query.is_valid()) |
| 779 url_string.push_back('?'); | 771 url_string.push_back('?'); |
| 780 AppendFormattedComponent(spec, parsed.query, | 772 AppendFormattedComponent(spec, parsed.query, |
| 781 NonHostComponentTransform(unescape_rules), | 773 NonHostComponentTransform(unescape_rules), |
| 782 &url_string, &new_parsed->query, adjustments); | 774 &url_string, &new_parsed->query, adjustments); |
| 783 | 775 |
| 784 // Ref. This is valid, unescaped UTF-8, so we can just convert. | 776 // Ref. This is valid, unescaped UTF-8, so we can just convert. |
| 785 if (parsed.ref.is_valid()) | 777 if (parsed.ref.is_valid()) |
| 786 url_string.push_back('#'); | 778 url_string.push_back('#'); |
| 787 AppendFormattedComponent(spec, parsed.ref, | 779 AppendFormattedComponent(spec, parsed.ref, |
| 788 NonHostComponentTransform(UnescapeRule::NONE), | 780 NonHostComponentTransform(UnescapeRule::NONE), |
| 789 &url_string, &new_parsed->ref, adjustments); | 781 &url_string, &new_parsed->ref, adjustments); |
| 790 | 782 |
| 791 // If we need to strip out http do it after the fact. | 783 // Strip out the scheme, after the fact. |
| 792 if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { | 784 bool omit_all_schemes = !!(format_types & kFormatUrlOmitScheme); |
| 793 const size_t kHTTPSize = arraysize(kHTTP) - 1; | 785 // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This |
| 794 url_string = url_string.substr(kHTTPSize); | 786 // means that if we trim "http://" off a URL whose host starts with "ftp." and |
| 787 // the user inputs this into any field subject to fixup (which is basically | |
| 788 // all input fields), the meaning would be changed. (In fact, often the | |
| 789 // formatted URL is directly pre-filled into an input field.) For this reason | |
| 790 // we avoid stripping "http://" in this case. | |
| 791 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
| 792 url.SchemeIs("http") && | |
| 793 !StartsWithASCII(url.host(), "ftp.", true); | |
| 794 if (omit_all_schemes || omit_http) { | |
| 795 const int scheme_size = | |
| 796 parsed.CountCharactersBefore(url::Parsed::USERNAME, true); | |
| 797 url_string = url_string.substr(scheme_size); | |
| 795 // Because offsets in the |adjustments| are already calculated with respect | 798 // Because offsets in the |adjustments| are already calculated with respect |
| 796 // to the string with the http:// prefix in it, those offsets remain correct | 799 // to the string with the scheme prefix in it, those offsets remain correct |
| 797 // after stripping the prefix. The only thing necessary is to add an | 800 // after stripping the prefix. The only thing necessary is to add an |
| 798 // adjustment to reflect the stripped prefix. | 801 // adjustment to reflect the stripped prefix. |
| 799 adjustments->insert(adjustments->begin(), | 802 adjustments->insert(adjustments->begin(), |
| 800 base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); | 803 base::OffsetAdjuster::Adjustment(0, scheme_size, 0)); |
| 801 | 804 |
| 802 if (prefix_end) | 805 if (prefix_end) |
| 803 *prefix_end -= kHTTPSize; | 806 *prefix_end -= scheme_size; |
| 804 | 807 |
| 805 // Adjust new_parsed. | |
| 806 DCHECK(new_parsed->scheme.is_valid()); | 808 DCHECK(new_parsed->scheme.is_valid()); |
| 807 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | 809 DCHECK_EQ(scheme_size, new_parsed->scheme.len + 3); |
| 808 new_parsed->scheme.reset(); | 810 new_parsed->scheme.reset(); |
| 809 AdjustAllComponentsButScheme(delta, new_parsed); | 811 AdjustAllComponentsButScheme(-scheme_size, new_parsed); |
| 810 } | 812 } |
| 811 | 813 |
| 812 return url_string; | 814 return url_string; |
| 813 } | 815 } |
| 814 | 816 |
| 815 base::string16 FormatUrl(const GURL& url, | 817 base::string16 FormatUrl(const GURL& url, |
| 816 const std::string& languages, | 818 const std::string& languages, |
| 817 FormatUrlTypes format_types, | 819 FormatUrlTypes format_types, |
| 818 UnescapeRule::Type unescape_rules, | 820 UnescapeRule::Type unescape_rules, |
| 819 url::Parsed* new_parsed, | 821 url::Parsed* new_parsed, |
| 820 size_t* prefix_end, | 822 size_t* prefix_end, |
| 821 size_t* offset_for_adjustment) { | 823 size_t* offset_for_adjustment) { |
| 822 Offsets offsets; | 824 Offsets offsets; |
| 823 if (offset_for_adjustment) | 825 if (offset_for_adjustment) |
| 824 offsets.push_back(*offset_for_adjustment); | 826 offsets.push_back(*offset_for_adjustment); |
| 825 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, | 827 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
| 826 unescape_rules, new_parsed, prefix_end, &offsets); | 828 unescape_rules, new_parsed, prefix_end, &offsets); |
| 827 if (offset_for_adjustment) | 829 if (offset_for_adjustment) |
| 828 *offset_for_adjustment = offsets[0]; | 830 *offset_for_adjustment = offsets[0]; |
| 829 return result; | 831 return result; |
| 830 } | 832 } |
| 831 | 833 |
| 834 base::string16 FormatOriginForDisplay(const GURL& url, | |
| 835 const std::string& languages, | |
| 836 bool omit_scheme) { | |
| 837 if (!url.IsStandard()) | |
| 838 return FormatUrl(url, languages); | |
| 839 | |
| 840 if (url.SchemeIsFile()) { | |
| 841 // TODO(palmer): Determine whether to encode this policy in GURL::GetOrigin. | |
| 842 return (omit_scheme ? base::ASCIIToUTF16("") | |
| 843 : base::ASCIIToUTF16("file://")) + | |
| 844 base::UTF8ToUTF16(url.path()); | |
|
asanka
2015/05/11 23:39:44
It is (or should be) possible for url.host() to be
palmer
2015/05/12 22:20:53
For file: URLs, the path is the origin, so that's
| |
| 845 } | |
| 846 | |
| 847 if (url.SchemeIsFileSystem()) { | |
| 848 // TODO(palmer): Determine whether to encode this policy in GURL::GetOrigin. | |
| 849 // | |
| 850 // TODO(palmer): Determine whether GURL::IsStandard should return false for | |
|
asanka
2015/05/11 23:39:44
This is a good point. You should file a bug for it
palmer
2015/05/12 22:20:53
Done.
| |
| 851 // filesystem: URLs. Per | |
| 852 // http://www.html5rocks.com/en/tutorials/file/filesystem/, they are not | |
| 853 // standard. | |
| 854 const GURL* inner_url = url.inner_url(); | |
| 855 return base::ASCIIToUTF16("filesystem:") + | |
| 856 FormatOriginForDisplay(*inner_url, languages, omit_scheme); | |
|
asanka
2015/05/11 23:39:44
Perhaps add a comment that this method deals corre
palmer
2015/05/12 22:20:53
Yeah, wow. Either GURL::inner_url is broken, or my
| |
| 857 } | |
| 858 | |
| 859 GURL display_origin = url.GetOrigin(); | |
| 860 | |
| 861 FormatUrlTypes format_types = kFormatUrlOmitUsernamePassword | | |
| 862 kFormatUrlOmitTrailingSlashOnBareHostname; | |
| 863 if (omit_scheme) | |
| 864 format_types |= kFormatUrlOmitScheme; | |
| 865 | |
| 866 const int default_port = url::DefaultPortForScheme( | |
| 867 display_origin.scheme().c_str(), display_origin.scheme().length()); | |
| 868 if (display_origin.EffectiveIntPort() == default_port) | |
|
asanka
2015/05/11 23:39:44
Nit: use IntPort().
palmer
2015/05/12 22:20:53
Done.
| |
| 869 format_types |= kFormatUrlOmitPort; | |
| 870 | |
| 871 return FormatUrl(display_origin, languages, format_types, | |
| 872 UnescapeRule::SPACES, NULL, NULL, NULL); | |
|
asanka
2015/05/11 23:39:44
NULL -> nullptr
palmer
2015/05/12 22:20:53
Done.
| |
| 873 } | |
| 874 | |
| 832 } // namespace net | 875 } // namespace net |
| OLD | NEW |