OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/i18n/time_formatting.h" | 10 #include "base/i18n/time_formatting.h" |
(...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
569 output_component->reset(); | 569 output_component->reset(); |
570 } | 570 } |
571 } | 571 } |
572 | 572 |
573 } // namespace | 573 } // namespace |
574 | 574 |
575 const FormatUrlType kFormatUrlOmitNothing = 0; | 575 const FormatUrlType kFormatUrlOmitNothing = 0; |
576 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 576 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
577 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 577 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
578 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 578 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
579 const FormatUrlType kFormatUrlOmitScheme = 1 << 3; | |
580 const FormatUrlType kFormatUrlOmitPort = 1 << 4; | |
579 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 581 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
580 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 582 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
581 | 583 |
582 base::string16 IDNToUnicode(const std::string& host, | 584 base::string16 IDNToUnicode(const std::string& host, |
583 const std::string& languages) { | 585 const std::string& languages) { |
584 return IDNToUnicodeWithAdjustments(host, languages, NULL); | 586 return IDNToUnicodeWithAdjustments(host, languages, NULL); |
585 } | 587 } |
586 | 588 |
587 std::string GetDirectoryListingEntry(const base::string16& name, | 589 std::string GetDirectoryListingEntry(const base::string16& name, |
588 const std::string& raw_bytes, | 590 const std::string& raw_bytes, |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
681 unescape_rules, new_parsed, prefix_end, | 683 unescape_rules, new_parsed, prefix_end, |
682 adjustments); | 684 adjustments); |
683 } | 685 } |
684 | 686 |
685 // We handle both valid and invalid URLs (this will give us the spec | 687 // We handle both valid and invalid URLs (this will give us the spec |
686 // regardless of validity). | 688 // regardless of validity). |
687 const std::string& spec = url.possibly_invalid_spec(); | 689 const std::string& spec = url.possibly_invalid_spec(); |
688 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | 690 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
689 | 691 |
690 // Scheme & separators. These are ASCII. | 692 // Scheme & separators. These are ASCII. |
693 // Scheme removal occurs at the end. | |
691 base::string16 url_string; | 694 base::string16 url_string; |
692 url_string.insert( | 695 url_string.insert( |
693 url_string.end(), spec.begin(), | 696 url_string.end(), spec.begin(), |
694 spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); | 697 spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); |
695 const char kHTTP[] = "http://"; | |
696 const char kFTP[] = "ftp."; | |
697 // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
698 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
699 // the user inputs this into any field subject to fixup (which is basically | |
700 // all input fields), the meaning would be changed. (In fact, often the | |
701 // formatted URL is directly pre-filled into an input field.) For this reason | |
702 // we avoid stripping "http://" in this case. | |
703 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
704 EqualsASCII(url_string, kHTTP) && | |
705 !StartsWithASCII(url.host(), kFTP, true); | |
706 new_parsed->scheme = parsed.scheme; | 698 new_parsed->scheme = parsed.scheme; |
707 | 699 |
708 // Username & password. | 700 // Username & password. |
709 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | 701 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { |
710 // Remove the username and password fields. We don't want to display those | 702 // Remove the username and password fields. We don't want to display those |
711 // to the user since they can be used for attacks, | 703 // to the user since they can be used for attacks, |
712 // e.g. "http://google.com:search@evil.ru/" | 704 // e.g. "http://google.com:search@evil.ru/" |
713 new_parsed->username.reset(); | 705 new_parsed->username.reset(); |
714 new_parsed->password.reset(); | 706 new_parsed->password.reset(); |
715 // Update the adjustments based on removed username and/or password. | 707 // Update the adjustments based on removed username and/or password. |
(...skipping 29 matching lines...) Expand all Loading... | |
745 url_string.push_back('@'); | 737 url_string.push_back('@'); |
746 } | 738 } |
747 if (prefix_end) | 739 if (prefix_end) |
748 *prefix_end = static_cast<size_t>(url_string.length()); | 740 *prefix_end = static_cast<size_t>(url_string.length()); |
749 | 741 |
750 // Host. | 742 // Host. |
751 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages), | 743 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages), |
752 &url_string, &new_parsed->host, adjustments); | 744 &url_string, &new_parsed->host, adjustments); |
753 | 745 |
754 // Port. | 746 // Port. |
755 if (parsed.port.is_nonempty()) { | 747 if (parsed.port.is_nonempty() && !(format_types & kFormatUrlOmitPort)) { |
756 url_string.push_back(':'); | 748 url_string.push_back(':'); |
757 new_parsed->port.begin = url_string.length(); | 749 new_parsed->port.begin = url_string.length(); |
758 url_string.insert(url_string.end(), | 750 url_string.insert(url_string.end(), |
759 spec.begin() + parsed.port.begin, | 751 spec.begin() + parsed.port.begin, |
760 spec.begin() + parsed.port.end()); | 752 spec.begin() + parsed.port.end()); |
761 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | 753 new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
762 } else { | 754 } else { |
763 new_parsed->port.reset(); | 755 new_parsed->port.reset(); |
asanka
2015/05/11 23:39:44
The else clause now includes the 'parsed.port.is_n
palmer
2015/05/12 22:20:53
Done.
| |
764 } | 756 } |
765 | 757 |
766 // Path & query. Both get the same general unescape & convert treatment. | 758 // Path & query. Both get the same general unescape & convert treatment. |
767 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | 759 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || |
768 !CanStripTrailingSlash(url)) { | 760 !CanStripTrailingSlash(url)) { |
769 AppendFormattedComponent(spec, parsed.path, | 761 AppendFormattedComponent(spec, parsed.path, |
770 NonHostComponentTransform(unescape_rules), | 762 NonHostComponentTransform(unescape_rules), |
771 &url_string, &new_parsed->path, adjustments); | 763 &url_string, &new_parsed->path, adjustments); |
772 } else { | 764 } else { |
773 if (parsed.path.len > 0) { | 765 if (parsed.path.len > 0) { |
774 adjustments->push_back(base::OffsetAdjuster::Adjustment( | 766 adjustments->push_back(base::OffsetAdjuster::Adjustment( |
775 parsed.path.begin, parsed.path.len, 0)); | 767 parsed.path.begin, parsed.path.len, 0)); |
776 } | 768 } |
777 } | 769 } |
778 if (parsed.query.is_valid()) | 770 if (parsed.query.is_valid()) |
779 url_string.push_back('?'); | 771 url_string.push_back('?'); |
780 AppendFormattedComponent(spec, parsed.query, | 772 AppendFormattedComponent(spec, parsed.query, |
781 NonHostComponentTransform(unescape_rules), | 773 NonHostComponentTransform(unescape_rules), |
782 &url_string, &new_parsed->query, adjustments); | 774 &url_string, &new_parsed->query, adjustments); |
783 | 775 |
784 // Ref. This is valid, unescaped UTF-8, so we can just convert. | 776 // Ref. This is valid, unescaped UTF-8, so we can just convert. |
785 if (parsed.ref.is_valid()) | 777 if (parsed.ref.is_valid()) |
786 url_string.push_back('#'); | 778 url_string.push_back('#'); |
787 AppendFormattedComponent(spec, parsed.ref, | 779 AppendFormattedComponent(spec, parsed.ref, |
788 NonHostComponentTransform(UnescapeRule::NONE), | 780 NonHostComponentTransform(UnescapeRule::NONE), |
789 &url_string, &new_parsed->ref, adjustments); | 781 &url_string, &new_parsed->ref, adjustments); |
790 | 782 |
791 // If we need to strip out http do it after the fact. | 783 // Strip out the scheme, after the fact. |
792 if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { | 784 bool omit_all_schemes = !!(format_types & kFormatUrlOmitScheme); |
793 const size_t kHTTPSize = arraysize(kHTTP) - 1; | 785 // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This |
794 url_string = url_string.substr(kHTTPSize); | 786 // means that if we trim "http://" off a URL whose host starts with "ftp." and |
787 // the user inputs this into any field subject to fixup (which is basically | |
788 // all input fields), the meaning would be changed. (In fact, often the | |
789 // formatted URL is directly pre-filled into an input field.) For this reason | |
790 // we avoid stripping "http://" in this case. | |
791 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
792 url.SchemeIs("http") && | |
793 !StartsWithASCII(url.host(), "ftp.", true); | |
794 if (omit_all_schemes || omit_http) { | |
795 const int scheme_size = | |
796 parsed.CountCharactersBefore(url::Parsed::USERNAME, true); | |
797 url_string = url_string.substr(scheme_size); | |
795 // Because offsets in the |adjustments| are already calculated with respect | 798 // Because offsets in the |adjustments| are already calculated with respect |
796 // to the string with the http:// prefix in it, those offsets remain correct | 799 // to the string with the scheme prefix in it, those offsets remain correct |
797 // after stripping the prefix. The only thing necessary is to add an | 800 // after stripping the prefix. The only thing necessary is to add an |
798 // adjustment to reflect the stripped prefix. | 801 // adjustment to reflect the stripped prefix. |
799 adjustments->insert(adjustments->begin(), | 802 adjustments->insert(adjustments->begin(), |
800 base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); | 803 base::OffsetAdjuster::Adjustment(0, scheme_size, 0)); |
801 | 804 |
802 if (prefix_end) | 805 if (prefix_end) |
803 *prefix_end -= kHTTPSize; | 806 *prefix_end -= scheme_size; |
804 | 807 |
805 // Adjust new_parsed. | |
806 DCHECK(new_parsed->scheme.is_valid()); | 808 DCHECK(new_parsed->scheme.is_valid()); |
807 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | 809 DCHECK_EQ(scheme_size, new_parsed->scheme.len + 3); |
808 new_parsed->scheme.reset(); | 810 new_parsed->scheme.reset(); |
809 AdjustAllComponentsButScheme(delta, new_parsed); | 811 AdjustAllComponentsButScheme(-scheme_size, new_parsed); |
810 } | 812 } |
811 | 813 |
812 return url_string; | 814 return url_string; |
813 } | 815 } |
814 | 816 |
815 base::string16 FormatUrl(const GURL& url, | 817 base::string16 FormatUrl(const GURL& url, |
816 const std::string& languages, | 818 const std::string& languages, |
817 FormatUrlTypes format_types, | 819 FormatUrlTypes format_types, |
818 UnescapeRule::Type unescape_rules, | 820 UnescapeRule::Type unescape_rules, |
819 url::Parsed* new_parsed, | 821 url::Parsed* new_parsed, |
820 size_t* prefix_end, | 822 size_t* prefix_end, |
821 size_t* offset_for_adjustment) { | 823 size_t* offset_for_adjustment) { |
822 Offsets offsets; | 824 Offsets offsets; |
823 if (offset_for_adjustment) | 825 if (offset_for_adjustment) |
824 offsets.push_back(*offset_for_adjustment); | 826 offsets.push_back(*offset_for_adjustment); |
825 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, | 827 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
826 unescape_rules, new_parsed, prefix_end, &offsets); | 828 unescape_rules, new_parsed, prefix_end, &offsets); |
827 if (offset_for_adjustment) | 829 if (offset_for_adjustment) |
828 *offset_for_adjustment = offsets[0]; | 830 *offset_for_adjustment = offsets[0]; |
829 return result; | 831 return result; |
830 } | 832 } |
831 | 833 |
834 base::string16 FormatOriginForDisplay(const GURL& url, | |
835 const std::string& languages, | |
836 bool omit_scheme) { | |
837 if (!url.IsStandard()) | |
838 return FormatUrl(url, languages); | |
839 | |
840 if (url.SchemeIsFile()) { | |
841 // TODO(palmer): Determine whether to encode this policy in GURL::GetOrigin. | |
842 return (omit_scheme ? base::ASCIIToUTF16("") | |
843 : base::ASCIIToUTF16("file://")) + | |
844 base::UTF8ToUTF16(url.path()); | |
asanka
2015/05/11 23:39:44
It is (or should be) possible for url.host() to be
palmer
2015/05/12 22:20:53
For file: URLs, the path is the origin, so that's
| |
845 } | |
846 | |
847 if (url.SchemeIsFileSystem()) { | |
848 // TODO(palmer): Determine whether to encode this policy in GURL::GetOrigin. | |
849 // | |
850 // TODO(palmer): Determine whether GURL::IsStandard should return false for | |
asanka
2015/05/11 23:39:44
This is a good point. You should file a bug for it
palmer
2015/05/12 22:20:53
Done.
| |
851 // filesystem: URLs. Per | |
852 // http://www.html5rocks.com/en/tutorials/file/filesystem/, they are not | |
853 // standard. | |
854 const GURL* inner_url = url.inner_url(); | |
855 return base::ASCIIToUTF16("filesystem:") + | |
856 FormatOriginForDisplay(*inner_url, languages, omit_scheme); | |
asanka
2015/05/11 23:39:44
Perhaps add a comment that this method deals corre
palmer
2015/05/12 22:20:53
Yeah, wow. Either GURL::inner_url is broken, or my
| |
857 } | |
858 | |
859 GURL display_origin = url.GetOrigin(); | |
860 | |
861 FormatUrlTypes format_types = kFormatUrlOmitUsernamePassword | | |
862 kFormatUrlOmitTrailingSlashOnBareHostname; | |
863 if (omit_scheme) | |
864 format_types |= kFormatUrlOmitScheme; | |
865 | |
866 const int default_port = url::DefaultPortForScheme( | |
867 display_origin.scheme().c_str(), display_origin.scheme().length()); | |
868 if (display_origin.EffectiveIntPort() == default_port) | |
asanka
2015/05/11 23:39:44
Nit: use IntPort().
palmer
2015/05/12 22:20:53
Done.
| |
869 format_types |= kFormatUrlOmitPort; | |
870 | |
871 return FormatUrl(display_origin, languages, format_types, | |
872 UnescapeRule::SPACES, NULL, NULL, NULL); | |
asanka
2015/05/11 23:39:44
NULL -> nullptr
palmer
2015/05/12 22:20:53
Done.
| |
873 } | |
874 | |
832 } // namespace net | 875 } // namespace net |
OLD | NEW |