OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <map> | 8 #include <map> |
9 #include <unicode/ucnv.h> | 9 #include <unicode/ucnv.h> |
10 #include <unicode/uidna.h> | 10 #include <unicode/uidna.h> |
(...skipping 632 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
643 std::string languages_list(WideToASCII(languages)); | 643 std::string languages_list(WideToASCII(languages)); |
644 StringTokenizer t(languages_list, ","); | 644 StringTokenizer t(languages_list, ","); |
645 while (t.GetNext()) { | 645 while (t.GetNext()) { |
646 if (IsComponentCoveredByLang(component_characters, t.token())) | 646 if (IsComponentCoveredByLang(component_characters, t.token())) |
647 return true; | 647 return true; |
648 } | 648 } |
649 return false; | 649 return false; |
650 } | 650 } |
651 | 651 |
652 // Converts one component of a host (between dots) to IDN if safe. The result | 652 // Converts one component of a host (between dots) to IDN if safe. The result |
653 // will be APPENDED to the given output string and will be the same as the | 653 // will be APPENDED to the given output string and will be the same as the input |
654 // input if it is not IDN or the IDN is unsafe to display. | 654 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
655 void IDNToUnicodeOneComponent(const char16* comp, | 655 // conversion was performed. |
656 int comp_len, | 656 bool IDNToUnicodeOneComponent(const char16* comp, |
| 657 size_t comp_len, |
657 const std::wstring& languages, | 658 const std::wstring& languages, |
658 string16* out) { | 659 string16* out) { |
659 DCHECK(comp_len >= 0); | 660 DCHECK(out); |
660 if (comp_len == 0) | 661 if (comp_len == 0) |
661 return; | 662 return false; |
662 | 663 |
663 // Expand the output string to make room for a possibly longer string | 664 // Only transform if the input can be an IDN component. |
664 // (we'll expand if it's still not big enough below). | 665 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
665 int extra_space = 64; | 666 if ((comp_len > arraysize(kIdnPrefix)) && |
666 size_t host_begin_in_output = out->size(); | 667 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { |
| 668 // Repeatedly expand the output string until it's big enough. It looks like |
| 669 // ICU will return the required size of the buffer, but that's not |
| 670 // documented, so we'll just grow by 2x. This should be rare and is not on a |
| 671 // critical path. |
| 672 size_t original_length = out->length(); |
| 673 for (int extra_space = 64; ; extra_space *= 2) { |
| 674 UErrorCode status = U_ZERO_ERROR; |
| 675 out->resize(out->length() + extra_space); |
| 676 int output_chars = uidna_IDNToUnicode(comp, |
| 677 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, |
| 678 UIDNA_DEFAULT, NULL, &status); |
| 679 if (status == U_ZERO_ERROR) { |
| 680 // Converted successfully. |
| 681 out->resize(original_length + output_chars); |
| 682 if (IsIDNComponentSafe(out->data() + original_length, output_chars, |
| 683 languages)) |
| 684 return true; |
| 685 } |
667 | 686 |
668 // Just copy the input if it can't be an IDN component. | 687 if (status != U_BUFFER_OVERFLOW_ERROR) |
669 if (comp_len < 4 || | 688 break; |
670 comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') { | 689 } |
671 out->resize(host_begin_in_output + comp_len); | 690 // Failed, revert back to original string. |
672 for (int i = 0; i < comp_len; i++) | 691 out->resize(original_length); |
673 (*out)[host_begin_in_output + i] = comp[i]; | |
674 return; | |
675 } | 692 } |
676 | 693 |
677 while (true) { | 694 // We get here with no IDN or on error, in which case we just append the |
678 UErrorCode status = U_ZERO_ERROR; | 695 // literal input. |
679 out->resize(out->size() + extra_space); | 696 out->append(comp, comp_len); |
680 int output_chars = | 697 return false; |
681 uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output], | |
682 extra_space, UIDNA_DEFAULT, NULL, &status); | |
683 if (status == U_ZERO_ERROR) { | |
684 // Converted successfully. | |
685 out->resize(host_begin_in_output + output_chars); | |
686 if (!IsIDNComponentSafe(&out->data()[host_begin_in_output], | |
687 output_chars, | |
688 languages)) | |
689 break; // The error handling below will undo the IDN. | |
690 return; | |
691 } | |
692 if (status != U_BUFFER_OVERFLOW_ERROR) | |
693 break; | |
694 | |
695 // Need to loop again with a bigger buffer. It looks like ICU will | |
696 // return the required size of the buffer, but that's not documented, | |
697 // so we'll just grow by 2x. This should be rare and is not on a | |
698 // critical path. | |
699 extra_space *= 2; | |
700 } | |
701 | |
702 // We get here on error, in which case we replace anything that was added | |
703 // with the literal input. | |
704 out->resize(host_begin_in_output + comp_len); | |
705 for (int i = 0; i < comp_len; i++) | |
706 (*out)[host_begin_in_output + i] = comp[i]; | |
707 } | 698 } |
708 | 699 |
709 // Helper for FormatUrl(). | 700 // Helper for FormatUrl(). |
710 std::wstring FormatViewSourceUrl(const GURL& url, | 701 std::wstring FormatViewSourceUrl(const GURL& url, |
711 const std::wstring& languages, | 702 const std::wstring& languages, |
712 bool omit_username_password, | 703 bool omit_username_password, |
713 UnescapeRule::Type unescape_rules, | 704 UnescapeRule::Type unescape_rules, |
714 url_parse::Parsed* new_parsed, | 705 url_parse::Parsed* new_parsed, |
715 size_t* prefix_end) { | 706 size_t* prefix_end, |
| 707 size_t* offset_for_adjustment) { |
716 DCHECK(new_parsed); | 708 DCHECK(new_parsed); |
717 const wchar_t* const kWideViewSource = L"view-source:"; | 709 const wchar_t* const kWideViewSource = L"view-source:"; |
718 const size_t kViewSourceLengthPlus1 = 12; | 710 const size_t kViewSourceLengthPlus1 = 12; |
719 | 711 |
720 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); | 712 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); |
| 713 size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ? |
| 714 std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1); |
| 715 size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ? |
| 716 NULL : &temp_offset; |
721 std::wstring result = net::FormatUrl(real_url, languages, | 717 std::wstring result = net::FormatUrl(real_url, languages, |
722 omit_username_password, unescape_rules, new_parsed, prefix_end); | 718 omit_username_password, unescape_rules, new_parsed, prefix_end, |
| 719 temp_offset_ptr); |
723 result.insert(0, kWideViewSource); | 720 result.insert(0, kWideViewSource); |
724 | 721 |
725 // Adjust position values. | 722 // Adjust position values. |
726 if (prefix_end) | |
727 *prefix_end += kViewSourceLengthPlus1; | |
728 if (new_parsed->scheme.is_nonempty()) { | 723 if (new_parsed->scheme.is_nonempty()) { |
729 // Assume "view-source:real-scheme" as a scheme. | 724 // Assume "view-source:real-scheme" as a scheme. |
730 new_parsed->scheme.len += kViewSourceLengthPlus1; | 725 new_parsed->scheme.len += kViewSourceLengthPlus1; |
731 } else { | 726 } else { |
732 new_parsed->scheme.begin = 0; | 727 new_parsed->scheme.begin = 0; |
733 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; | 728 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; |
734 } | 729 } |
735 if (new_parsed->username.is_nonempty()) | 730 if (new_parsed->username.is_nonempty()) |
736 new_parsed->username.begin += kViewSourceLengthPlus1; | 731 new_parsed->username.begin += kViewSourceLengthPlus1; |
737 if (new_parsed->password.is_nonempty()) | 732 if (new_parsed->password.is_nonempty()) |
738 new_parsed->password.begin += kViewSourceLengthPlus1; | 733 new_parsed->password.begin += kViewSourceLengthPlus1; |
739 if (new_parsed->host.is_nonempty()) | 734 if (new_parsed->host.is_nonempty()) |
740 new_parsed->host.begin += kViewSourceLengthPlus1; | 735 new_parsed->host.begin += kViewSourceLengthPlus1; |
741 if (new_parsed->port.is_nonempty()) | 736 if (new_parsed->port.is_nonempty()) |
742 new_parsed->port.begin += kViewSourceLengthPlus1; | 737 new_parsed->port.begin += kViewSourceLengthPlus1; |
743 if (new_parsed->path.is_nonempty()) | 738 if (new_parsed->path.is_nonempty()) |
744 new_parsed->path.begin += kViewSourceLengthPlus1; | 739 new_parsed->path.begin += kViewSourceLengthPlus1; |
745 if (new_parsed->query.is_nonempty()) | 740 if (new_parsed->query.is_nonempty()) |
746 new_parsed->query.begin += kViewSourceLengthPlus1; | 741 new_parsed->query.begin += kViewSourceLengthPlus1; |
747 if (new_parsed->ref.is_nonempty()) | 742 if (new_parsed->ref.is_nonempty()) |
748 new_parsed->ref.begin += kViewSourceLengthPlus1; | 743 new_parsed->ref.begin += kViewSourceLengthPlus1; |
| 744 if (prefix_end) |
| 745 *prefix_end += kViewSourceLengthPlus1; |
| 746 if (temp_offset_ptr) { |
| 747 *offset_for_adjustment = (temp_offset == std::wstring::npos) ? |
| 748 std::wstring::npos : (temp_offset + kViewSourceLengthPlus1); |
| 749 } |
749 return result; | 750 return result; |
750 } | 751 } |
751 | 752 |
752 // Converts a UTF-8 string to a FilePath string type. | 753 // Converts a UTF-8 string to a FilePath string type. |
753 // | 754 // |
754 // This is inline with the hope that the function will be "free" on non-Windows | 755 // This is inline with the hope that the function will be "free" on non-Windows |
755 // platforms. | 756 // platforms. |
756 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) { | 757 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) { |
757 #if defined(OS_WIN) | 758 #if defined(OS_WIN) |
758 return FilePath::StringType(UTF8ToUTF16(utf8)); | 759 return FilePath::StringType(UTF8ToUTF16(utf8)); |
759 #else | 760 #else |
760 return utf8; | 761 return utf8; |
761 #endif | 762 #endif |
762 } | 763 } |
763 | 764 |
764 } // namespace | 765 } // namespace |
765 | 766 |
766 namespace net { | 767 namespace net { |
767 | 768 |
768 std::set<int> explicitly_allowed_ports; | 769 std::set<int> explicitly_allowed_ports; |
769 | 770 |
770 // Appends the substring |in_component| inside of the URL |spec| to |output|, | 771 // Appends the substring |in_component| inside of the URL |spec| to |output|, |
771 // and the resulting range will be filled into |out_component|. |unescape_rules| | 772 // and the resulting range will be filled into |out_component|. |unescape_rules| |
772 // defines how to clean the URL for human readability. | 773 // defines how to clean the URL for human readability. |offset_for_adjustment| |
| 774 // is an offset into |output| which will be adjusted based on how it maps to the |
| 775 // component being converted; if it is less than output->length(), it will be |
| 776 // untouched, and if it is greater than output->length() + in_component.len it |
| 777 // will be shortened by the difference in lengths between the input and output |
| 778 // components. Otherwise it points into the component being converted, and is |
| 779 // adjusted to point to the same logical place in |output|. |
| 780 // |offset_for_adjustment| may not be NULL. |
773 static void AppendFormattedComponent(const std::string& spec, | 781 static void AppendFormattedComponent(const std::string& spec, |
774 const url_parse::Component& in_component, | 782 const url_parse::Component& in_component, |
775 UnescapeRule::Type unescape_rules, | 783 UnescapeRule::Type unescape_rules, |
776 std::wstring* output, | 784 std::wstring* output, |
777 url_parse::Component* out_component); | 785 url_parse::Component* out_component, |
| 786 size_t* offset_for_adjustment); |
778 | 787 |
779 GURL FilePathToFileURL(const FilePath& path) { | 788 GURL FilePathToFileURL(const FilePath& path) { |
780 // Produce a URL like "file:///C:/foo" for a regular file, or | 789 // Produce a URL like "file:///C:/foo" for a regular file, or |
781 // "file://///server/path" for UNC. The URL canonicalizer will fix up the | 790 // "file://///server/path" for UNC. The URL canonicalizer will fix up the |
782 // latter case to be the canonical UNC form: "file://server/path" | 791 // latter case to be the canonical UNC form: "file://server/path" |
783 FilePath::StringType url_string(kFileURLPrefix); | 792 FilePath::StringType url_string(kFileURLPrefix); |
784 url_string.append(path.value()); | 793 url_string.append(path.value()); |
785 | 794 |
786 // Now do replacement of some characters. Since we assume the input is a | 795 // Now do replacement of some characters. Since we assume the input is a |
787 // literal filename, anything the URL parser might consider special should | 796 // literal filename, anything the URL parser might consider special should |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
842 return GetHeaderParamValueT(field, param_name); | 851 return GetHeaderParamValueT(field, param_name); |
843 } | 852 } |
844 | 853 |
845 // TODO(brettw) bug 734373: check the scripts for each host component and | 854 // TODO(brettw) bug 734373: check the scripts for each host component and |
846 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | 855 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
847 // scripts that the user has installed. For now, just put the entire | 856 // scripts that the user has installed. For now, just put the entire |
848 // path through IDN. Maybe this feature can be implemented in ICU itself? | 857 // path through IDN. Maybe this feature can be implemented in ICU itself? |
849 // | 858 // |
850 // We may want to skip this step in the case of file URLs to allow unicode | 859 // We may want to skip this step in the case of file URLs to allow unicode |
851 // UNC hostnames regardless of encodings. | 860 // UNC hostnames regardless of encodings. |
852 void IDNToUnicode(const char* host, | 861 std::wstring IDNToUnicode(const char* host, |
853 int host_len, | 862 size_t host_len, |
854 const std::wstring& languages, | 863 const std::wstring& languages, |
855 std::wstring* out) { | 864 size_t* offset_for_adjustment) { |
856 // Convert the ASCII input to a wide string for ICU. | 865 // Convert the ASCII input to a wide string for ICU. |
857 string16 input16; | 866 string16 input16; |
858 input16.reserve(host_len); | 867 input16.reserve(host_len); |
859 for (int i = 0; i < host_len; i++) | 868 std::copy(host, host + host_len, std::back_inserter(input16)); |
860 input16.push_back(host[i]); | |
861 | 869 |
862 string16 out16; | 870 string16 out16; |
863 // The output string is appended to, so convert what's already there if | 871 size_t output_offset = offset_for_adjustment ? |
864 // needed. | 872 *offset_for_adjustment : std::wstring::npos; |
865 #if defined(WCHAR_T_IS_UTF32) | |
866 WideToUTF16(out->data(), out->length(), &out16); | |
867 out->clear(); // for equivalence with the swap below | |
868 #elif defined(WCHAR_T_IS_UTF16) | |
869 out->swap(out16); | |
870 #endif | |
871 | 873 |
872 // Do each component of the host separately, since we enforce script matching | 874 // Do each component of the host separately, since we enforce script matching |
873 // on a per-component basis. | 875 // on a per-component basis. |
874 size_t cur_begin = 0; // Beginning of the current component (inclusive). | 876 for (size_t component_start = 0, component_end; |
875 while (cur_begin < input16.size()) { | 877 component_start < input16.length(); |
876 // Find the next dot or the end of the string. | 878 component_start = component_end + 1) { |
877 size_t next_dot = input16.find_first_of('.', cur_begin); | 879 // Find the end of the component. |
878 if (next_dot == std::wstring::npos) | 880 component_end = input16.find('.', component_start); |
879 next_dot = input16.size(); // For getting the last component. | 881 if (component_end == string16::npos) |
| 882 component_end = input16.length(); // For getting the last component. |
| 883 size_t component_length = component_end - component_start; |
880 | 884 |
881 if (next_dot > cur_begin) { | 885 size_t output_component_start = out16.length(); |
| 886 bool converted_idn = false; |
| 887 if (component_end > component_start) { |
882 // Add the substring that we just found. | 888 // Add the substring that we just found. |
883 IDNToUnicodeOneComponent(&input16[cur_begin], | 889 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, |
884 static_cast<int>(next_dot - cur_begin), | 890 component_length, languages, &out16); |
885 languages, | 891 } |
886 &out16); | 892 size_t output_component_length = out16.length() - output_component_start; |
| 893 |
| 894 if ((output_offset != std::wstring::npos) && |
| 895 (*offset_for_adjustment > component_start)) { |
| 896 if ((*offset_for_adjustment < component_end) && converted_idn) |
| 897 output_offset = std::wstring::npos; |
| 898 else |
| 899 output_offset += output_component_length - component_length; |
887 } | 900 } |
888 | 901 |
889 // Need to add the dot we just found (if we found one). This needs to be | 902 // Need to add the dot we just found (if we found one). |
890 // done before we break out below in case the URL ends in a dot. | 903 if (component_end < input16.length()) |
891 if (next_dot < input16.size()) | |
892 out16.push_back('.'); | 904 out16.push_back('.'); |
893 else | |
894 break; // No more components left. | |
895 | |
896 cur_begin = next_dot + 1; | |
897 } | 905 } |
898 | 906 |
899 #if defined(WCHAR_T_IS_UTF32) | 907 if (offset_for_adjustment) |
900 UTF16ToWide(out16.data(), out16.length(), out); | 908 *offset_for_adjustment = output_offset; |
901 #elif defined(WCHAR_T_IS_UTF16) | 909 |
902 out->swap(out16); | 910 return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment); |
903 #endif | |
904 } | 911 } |
905 | 912 |
906 std::string CanonicalizeHost(const std::string& host, | 913 std::string CanonicalizeHost(const std::string& host, |
907 url_canon::CanonHostInfo* host_info) { | 914 url_canon::CanonHostInfo* host_info) { |
908 // Try to canonicalize the host. | 915 // Try to canonicalize the host. |
909 const url_parse::Component raw_host_component( | 916 const url_parse::Component raw_host_component( |
910 0, static_cast<int>(host.length())); | 917 0, static_cast<int>(host.length())); |
911 std::string canon_host; | 918 std::string canon_host; |
912 url_canon::StdStringCanonOutput canon_host_output(&canon_host); | 919 url_canon::StdStringCanonOutput canon_host_output(&canon_host); |
913 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, | 920 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, |
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1255 DLOG(INFO) << "gethostname() failed with " << result; | 1262 DLOG(INFO) << "gethostname() failed with " << result; |
1256 buffer[0] = '\0'; | 1263 buffer[0] = '\0'; |
1257 } | 1264 } |
1258 return std::string(buffer); | 1265 return std::string(buffer); |
1259 } | 1266 } |
1260 | 1267 |
1261 void GetIdentityFromURL(const GURL& url, | 1268 void GetIdentityFromURL(const GURL& url, |
1262 std::wstring* username, | 1269 std::wstring* username, |
1263 std::wstring* password) { | 1270 std::wstring* password) { |
1264 UnescapeRule::Type flags = UnescapeRule::SPACES; | 1271 UnescapeRule::Type flags = UnescapeRule::SPACES; |
1265 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags); | 1272 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); |
1266 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags); | 1273 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); |
1267 } | 1274 } |
1268 | 1275 |
1269 void AppendFormattedHost(const GURL& url, | 1276 void AppendFormattedHost(const GURL& url, |
1270 const std::wstring& languages, | 1277 const std::wstring& languages, |
1271 std::wstring* output, | 1278 std::wstring* output, |
1272 url_parse::Parsed* new_parsed) { | 1279 url_parse::Parsed* new_parsed, |
| 1280 size_t* offset_for_adjustment) { |
| 1281 DCHECK(output); |
1273 const url_parse::Component& host = | 1282 const url_parse::Component& host = |
1274 url.parsed_for_possibly_invalid_spec().host; | 1283 url.parsed_for_possibly_invalid_spec().host; |
1275 | 1284 |
1276 if (host.is_nonempty()) { | 1285 if (host.is_nonempty()) { |
1277 // Handle possible IDN in the host name. | 1286 // Handle possible IDN in the host name. |
| 1287 int new_host_begin = static_cast<int>(output->length()); |
1278 if (new_parsed) | 1288 if (new_parsed) |
1279 new_parsed->host.begin = static_cast<int>(output->length()); | 1289 new_parsed->host.begin = new_host_begin; |
| 1290 size_t offset_past_current_output = |
| 1291 (!offset_for_adjustment || |
| 1292 (*offset_for_adjustment == std::wstring::npos) || |
| 1293 (*offset_for_adjustment < output->length())) ? |
| 1294 std::wstring::npos : (*offset_for_adjustment - output->length()); |
| 1295 size_t* offset_into_host = |
| 1296 (offset_past_current_output >= static_cast<size_t>(host.len)) ? |
| 1297 NULL : &offset_past_current_output; |
1280 | 1298 |
1281 const std::string& spec = url.possibly_invalid_spec(); | 1299 const std::string& spec = url.possibly_invalid_spec(); |
1282 DCHECK(host.begin >= 0 && | 1300 DCHECK(host.begin >= 0 && |
1283 ((spec.length() == 0 && host.begin == 0) || | 1301 ((spec.length() == 0 && host.begin == 0) || |
1284 host.begin < static_cast<int>(spec.length()))); | 1302 host.begin < static_cast<int>(spec.length()))); |
1285 net::IDNToUnicode(&spec[host.begin], host.len, languages, output); | 1303 output->append(net::IDNToUnicode(&spec[host.begin], |
| 1304 static_cast<size_t>(host.len), languages, offset_into_host)); |
1286 | 1305 |
1287 if (new_parsed) { | 1306 int new_host_len = static_cast<int>(output->length()) - new_host_begin; |
1288 new_parsed->host.len = | 1307 if (new_parsed) |
1289 static_cast<int>(output->length()) - new_parsed->host.begin; | 1308 new_parsed->host.len = new_host_len; |
| 1309 if (offset_into_host) { |
| 1310 *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ? |
| 1311 std::wstring::npos : (new_host_begin + *offset_into_host); |
| 1312 } else if (offset_past_current_output != std::wstring::npos) { |
| 1313 *offset_for_adjustment += new_host_len - host.len; |
1290 } | 1314 } |
1291 } else if (new_parsed) { | 1315 } else if (new_parsed) { |
1292 new_parsed->host.reset(); | 1316 new_parsed->host.reset(); |
1293 } | 1317 } |
1294 } | 1318 } |
1295 | 1319 |
1296 /* static */ | 1320 /* static */ |
1297 void AppendFormattedComponent(const std::string& spec, | 1321 void AppendFormattedComponent(const std::string& spec, |
1298 const url_parse::Component& in_component, | 1322 const url_parse::Component& in_component, |
1299 UnescapeRule::Type unescape_rules, | 1323 UnescapeRule::Type unescape_rules, |
1300 std::wstring* output, | 1324 std::wstring* output, |
1301 url_parse::Component* out_component) { | 1325 url_parse::Component* out_component, |
| 1326 size_t* offset_for_adjustment) { |
| 1327 DCHECK(output); |
| 1328 DCHECK(offset_for_adjustment); |
1302 if (in_component.is_nonempty()) { | 1329 if (in_component.is_nonempty()) { |
1303 out_component->begin = static_cast<int>(output->length()); | 1330 out_component->begin = static_cast<int>(output->length()); |
| 1331 size_t offset_past_current_output = |
| 1332 ((*offset_for_adjustment == std::wstring::npos) || |
| 1333 (*offset_for_adjustment < output->length())) ? |
| 1334 std::wstring::npos : (*offset_for_adjustment - output->length()); |
| 1335 size_t* offset_into_component = |
| 1336 (offset_past_current_output >= static_cast<size_t>(in_component.len)) ? |
| 1337 NULL : &offset_past_current_output; |
1304 if (unescape_rules == UnescapeRule::NONE) { | 1338 if (unescape_rules == UnescapeRule::NONE) { |
1305 output->append(UTF8ToWide(spec.substr( | 1339 output->append(UTF8ToWideAndAdjustOffset( |
1306 in_component.begin, in_component.len))); | 1340 spec.substr(in_component.begin, in_component.len), |
| 1341 offset_into_component)); |
1307 } else { | 1342 } else { |
1308 output->append(UnescapeAndDecodeUTF8URLComponent( | 1343 output->append(UnescapeAndDecodeUTF8URLComponent( |
1309 spec.substr(in_component.begin, in_component.len), | 1344 spec.substr(in_component.begin, in_component.len), unescape_rules, |
1310 unescape_rules)); | 1345 offset_into_component)); |
1311 } | 1346 } |
1312 out_component->len = | 1347 out_component->len = |
1313 static_cast<int>(output->length()) - out_component->begin; | 1348 static_cast<int>(output->length()) - out_component->begin; |
| 1349 if (offset_into_component) { |
| 1350 *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ? |
| 1351 std::wstring::npos : (out_component->begin + *offset_into_component); |
| 1352 } else if (offset_past_current_output != std::wstring::npos) { |
| 1353 *offset_for_adjustment += out_component->len - in_component.len; |
| 1354 } |
1314 } else { | 1355 } else { |
1315 out_component->reset(); | 1356 out_component->reset(); |
1316 } | 1357 } |
1317 } | 1358 } |
1318 | 1359 |
1319 std::wstring FormatUrl(const GURL& url, | 1360 std::wstring FormatUrl(const GURL& url, |
1320 const std::wstring& languages, | 1361 const std::wstring& languages, |
1321 bool omit_username_password, | 1362 bool omit_username_password, |
1322 UnescapeRule::Type unescape_rules, | 1363 UnescapeRule::Type unescape_rules, |
1323 url_parse::Parsed* new_parsed, | 1364 url_parse::Parsed* new_parsed, |
1324 size_t* prefix_end) { | 1365 size_t* prefix_end, |
| 1366 size_t* offset_for_adjustment) { |
1325 url_parse::Parsed parsed_temp; | 1367 url_parse::Parsed parsed_temp; |
1326 if (!new_parsed) | 1368 if (!new_parsed) |
1327 new_parsed = &parsed_temp; | 1369 new_parsed = &parsed_temp; |
| 1370 size_t offset_temp = std::wstring::npos; |
| 1371 if (!offset_for_adjustment) |
| 1372 offset_for_adjustment = &offset_temp; |
1328 | 1373 |
1329 std::wstring url_string; | 1374 std::wstring url_string; |
1330 | 1375 |
1331 // Check for empty URLs or 0 available text width. | 1376 // Check for empty URLs or 0 available text width. |
1332 if (url.is_empty()) { | 1377 if (url.is_empty()) { |
1333 if (prefix_end) | 1378 if (prefix_end) |
1334 *prefix_end = 0; | 1379 *prefix_end = 0; |
| 1380 *offset_for_adjustment = std::wstring::npos; |
1335 return url_string; | 1381 return url_string; |
1336 } | 1382 } |
1337 | 1383 |
1338 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | 1384 // Special handling for view-source:. Don't use chrome::kViewSourceScheme |
1339 // because this library shouldn't depend on chrome. | 1385 // because this library shouldn't depend on chrome. |
1340 const char* const kViewSource = "view-source"; | 1386 const char* const kViewSource = "view-source"; |
1341 const char* const kViewSourceTwice = "view-source:view-source:"; | 1387 const char* const kViewSourceTwice = "view-source:view-source:"; |
1342 // Rejects view-source:view-source:... to avoid deep recursive call. | 1388 // Rejects view-source:view-source:... to avoid deep recursive call. |
1343 if (url.SchemeIs(kViewSource) && | 1389 if (url.SchemeIs(kViewSource) && |
1344 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | 1390 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { |
1345 return FormatViewSourceUrl(url, languages, omit_username_password, | 1391 return FormatViewSourceUrl(url, languages, omit_username_password, |
1346 unescape_rules, new_parsed, prefix_end); | 1392 unescape_rules, new_parsed, prefix_end, offset_for_adjustment); |
1347 } | 1393 } |
1348 | 1394 |
1349 // We handle both valid and invalid URLs (this will give us the spec | 1395 // We handle both valid and invalid URLs (this will give us the spec |
1350 // regardless of validity). | 1396 // regardless of validity). |
1351 const std::string& spec = url.possibly_invalid_spec(); | 1397 const std::string& spec = url.possibly_invalid_spec(); |
1352 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | 1398 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
| 1399 if (*offset_for_adjustment >= spec.length()) |
| 1400 *offset_for_adjustment = std::wstring::npos; |
1353 | 1401 |
1354 // Copy everything before the username (the scheme and the separators.) | 1402 // Copy everything before the username (the scheme and the separators.) |
1355 // These are ASCII. | 1403 // These are ASCII. |
1356 int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); | 1404 std::copy(spec.begin(), |
1357 for (int i = 0; i < pre_end; ++i) | 1405 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, |
1358 url_string.push_back(spec[i]); | 1406 true), |
| 1407 std::back_inserter(url_string)); |
1359 new_parsed->scheme = parsed.scheme; | 1408 new_parsed->scheme = parsed.scheme; |
1360 | 1409 |
1361 if (omit_username_password) { | 1410 if (omit_username_password) { |
1362 // Remove the username and password fields. We don't want to display those | 1411 // Remove the username and password fields. We don't want to display those |
1363 // to the user since they can be used for attacks, | 1412 // to the user since they can be used for attacks, |
1364 // e.g. "http://google.com:search@evil.ru/" | 1413 // e.g. "http://google.com:search@evil.ru/" |
1365 new_parsed->username.reset(); | 1414 new_parsed->username.reset(); |
1366 new_parsed->password.reset(); | 1415 new_parsed->password.reset(); |
| 1416 if ((*offset_for_adjustment != std::wstring::npos) && |
| 1417 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { |
| 1418 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { |
| 1419 // The seeming off-by-one and off-by-two in these first two lines are to |
| 1420 // account for the ':' after the username and '@' after the password. |
| 1421 if (*offset_for_adjustment > |
| 1422 static_cast<size_t>(parsed.password.end())) { |
| 1423 *offset_for_adjustment -= |
| 1424 (parsed.username.len + parsed.password.len + 2); |
| 1425 } else if (*offset_for_adjustment > |
| 1426 static_cast<size_t>(parsed.username.begin)) { |
| 1427 *offset_for_adjustment = std::wstring::npos; |
| 1428 } |
| 1429 } else { |
| 1430 const url_parse::Component* nonempty_component = |
| 1431 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; |
| 1432 // The seeming off-by-one in these first two lines is to account for the |
| 1433 // '@' after the username/password. |
| 1434 if (*offset_for_adjustment > |
| 1435 static_cast<size_t>(nonempty_component->end())) { |
| 1436 *offset_for_adjustment -= (nonempty_component->len + 1); |
| 1437 } else if (*offset_for_adjustment > |
| 1438 static_cast<size_t>(nonempty_component->begin)) { |
| 1439 *offset_for_adjustment = std::wstring::npos; |
| 1440 } |
| 1441 } |
| 1442 } |
1367 } else { | 1443 } else { |
1368 AppendFormattedComponent( | 1444 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, |
1369 spec, parsed.username, unescape_rules, | 1445 &new_parsed->username, offset_for_adjustment); |
1370 &url_string, &new_parsed->username); | |
1371 if (parsed.password.is_valid()) { | 1446 if (parsed.password.is_valid()) { |
1372 url_string.push_back(':'); | 1447 url_string.push_back(':'); |
1373 } | 1448 } |
1374 AppendFormattedComponent( | 1449 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, |
1375 spec, parsed.password, unescape_rules, | 1450 &new_parsed->password, offset_for_adjustment); |
1376 &url_string, &new_parsed->password); | |
1377 if (parsed.username.is_valid() || parsed.password.is_valid()) { | 1451 if (parsed.username.is_valid() || parsed.password.is_valid()) { |
1378 url_string.push_back('@'); | 1452 url_string.push_back('@'); |
1379 } | 1453 } |
1380 } | 1454 } |
1381 if (prefix_end) | 1455 if (prefix_end) |
1382 *prefix_end = static_cast<size_t>(url_string.length()); | 1456 *prefix_end = static_cast<size_t>(url_string.length()); |
1383 | 1457 |
1384 AppendFormattedHost(url, languages, &url_string, new_parsed); | 1458 AppendFormattedHost(url, languages, &url_string, new_parsed, |
| 1459 offset_for_adjustment); |
1385 | 1460 |
1386 // Port. | 1461 // Port. |
1387 if (parsed.port.is_nonempty()) { | 1462 if (parsed.port.is_nonempty()) { |
1388 url_string.push_back(':'); | 1463 url_string.push_back(':'); |
1389 int begin = url_string.length(); | 1464 new_parsed->port.begin = url_string.length(); |
1390 for (int i = parsed.port.begin; i < parsed.port.end(); ++i) | 1465 std::copy(spec.begin() + parsed.port.begin, |
1391 url_string.push_back(spec[i]); | 1466 spec.begin() + parsed.port.end(), std::back_inserter(url_string)); |
1392 new_parsed->port.begin = begin; | 1467 new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
1393 new_parsed->port.len = url_string.length() - begin; | |
1394 } else { | 1468 } else { |
1395 new_parsed->port.reset(); | 1469 new_parsed->port.reset(); |
1396 } | 1470 } |
1397 | 1471 |
1398 // Path and query both get the same general unescape & convert treatment. | 1472 // Path and query both get the same general unescape & convert treatment. |
1399 AppendFormattedComponent( | 1473 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, |
1400 spec, parsed.path, unescape_rules, &url_string, | 1474 &new_parsed->path, offset_for_adjustment); |
1401 &new_parsed->path); | |
1402 if (parsed.query.is_valid()) | 1475 if (parsed.query.is_valid()) |
1403 url_string.push_back('?'); | 1476 url_string.push_back('?'); |
1404 AppendFormattedComponent( | 1477 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, |
1405 spec, parsed.query, unescape_rules, &url_string, | 1478 &new_parsed->query, offset_for_adjustment); |
1406 &new_parsed->query); | |
1407 | 1479 |
1408 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | 1480 // Reference is stored in valid, unescaped UTF-8, so we can just convert. |
1409 if (parsed.ref.is_valid()) { | 1481 if (parsed.ref.is_valid()) { |
1410 url_string.push_back('#'); | 1482 url_string.push_back('#'); |
1411 int begin = url_string.length(); | 1483 new_parsed->ref.begin = url_string.length(); |
1412 if (parsed.ref.len > 0) | 1484 size_t offset_past_current_output = |
1413 url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], | 1485 ((*offset_for_adjustment == std::wstring::npos) || |
1414 parsed.ref.len))); | 1486 (*offset_for_adjustment < url_string.length())) ? |
1415 new_parsed->ref.begin = begin; | 1487 std::wstring::npos : (*offset_for_adjustment - url_string.length()); |
1416 new_parsed->ref.len = url_string.length() - begin; | 1488 size_t* offset_into_ref = |
| 1489 (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ? |
| 1490 NULL : &offset_past_current_output; |
| 1491 if (parsed.ref.len > 0) { |
| 1492 url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin, |
| 1493 parsed.ref.len), |
| 1494 offset_into_ref)); |
| 1495 } |
| 1496 new_parsed->ref.len = url_string.length() - new_parsed->ref.begin; |
| 1497 if (offset_into_ref) { |
| 1498 *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ? |
| 1499 std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref); |
| 1500 } else if (offset_past_current_output != std::wstring::npos) { |
| 1501 // We clamped the offset near the beginning of this function to ensure it |
| 1502 // was within the input URL. If we reach here, the input was something |
| 1503 // invalid and non-parseable such that the offset was past any component |
| 1504 // we could figure out. In this case it won't be represented in the |
| 1505 // output string, so reset it. |
| 1506 *offset_for_adjustment = std::wstring::npos; |
| 1507 } |
1417 } | 1508 } |
1418 | 1509 |
1419 return url_string; | 1510 return url_string; |
1420 } | 1511 } |
1421 | 1512 |
1422 GURL SimplifyUrlForRequest(const GURL& url) { | 1513 GURL SimplifyUrlForRequest(const GURL& url) { |
1423 DCHECK(url.is_valid()); | 1514 DCHECK(url.is_valid()); |
1424 GURL::Replacements replacements; | 1515 GURL::Replacements replacements; |
1425 replacements.ClearUsername(); | 1516 replacements.ClearUsername(); |
1426 replacements.ClearPassword(); | 1517 replacements.ClearPassword(); |
(...skipping 24 matching lines...) Expand all Loading... |
1451 if (length > 0) | 1542 if (length > 0) |
1452 ports.insert(StringToInt(WideToASCII( | 1543 ports.insert(StringToInt(WideToASCII( |
1453 allowed_ports.substr(last, length)))); | 1544 allowed_ports.substr(last, length)))); |
1454 last = i + 1; | 1545 last = i + 1; |
1455 } | 1546 } |
1456 } | 1547 } |
1457 explicitly_allowed_ports = ports; | 1548 explicitly_allowed_ports = ports; |
1458 } | 1549 } |
1459 | 1550 |
1460 } // namespace net | 1551 } // namespace net |
OLD | NEW |