| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <map> | 8 #include <map> |
| 9 #include <unicode/ucnv.h> | 9 #include <unicode/ucnv.h> |
| 10 #include <unicode/uidna.h> | 10 #include <unicode/uidna.h> |
| (...skipping 632 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 643 std::string languages_list(WideToASCII(languages)); | 643 std::string languages_list(WideToASCII(languages)); |
| 644 StringTokenizer t(languages_list, ","); | 644 StringTokenizer t(languages_list, ","); |
| 645 while (t.GetNext()) { | 645 while (t.GetNext()) { |
| 646 if (IsComponentCoveredByLang(component_characters, t.token())) | 646 if (IsComponentCoveredByLang(component_characters, t.token())) |
| 647 return true; | 647 return true; |
| 648 } | 648 } |
| 649 return false; | 649 return false; |
| 650 } | 650 } |
| 651 | 651 |
| 652 // Converts one component of a host (between dots) to IDN if safe. The result | 652 // Converts one component of a host (between dots) to IDN if safe. The result |
| 653 // will be APPENDED to the given output string and will be the same as the | 653 // will be APPENDED to the given output string and will be the same as the input |
| 654 // input if it is not IDN or the IDN is unsafe to display. | 654 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
| 655 void IDNToUnicodeOneComponent(const char16* comp, | 655 // conversion was performed. |
| 656 int comp_len, | 656 bool IDNToUnicodeOneComponent(const char16* comp, |
| 657 size_t comp_len, |
| 657 const std::wstring& languages, | 658 const std::wstring& languages, |
| 658 string16* out) { | 659 string16* out) { |
| 659 DCHECK(comp_len >= 0); | 660 DCHECK(out); |
| 660 if (comp_len == 0) | 661 if (comp_len == 0) |
| 661 return; | 662 return false; |
| 662 | 663 |
| 663 // Expand the output string to make room for a possibly longer string | 664 // Only transform if the input can be an IDN component. |
| 664 // (we'll expand if it's still not big enough below). | 665 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
| 665 int extra_space = 64; | 666 if ((comp_len > arraysize(kIdnPrefix)) && |
| 666 size_t host_begin_in_output = out->size(); | 667 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { |
| 668 // Repeatedly expand the output string until it's big enough. It looks like |
| 669 // ICU will return the required size of the buffer, but that's not |
| 670 // documented, so we'll just grow by 2x. This should be rare and is not on a |
| 671 // critical path. |
| 672 size_t original_length = out->length(); |
| 673 for (int extra_space = 64; ; extra_space *= 2) { |
| 674 UErrorCode status = U_ZERO_ERROR; |
| 675 out->resize(out->length() + extra_space); |
| 676 int output_chars = uidna_IDNToUnicode(comp, |
| 677 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, |
| 678 UIDNA_DEFAULT, NULL, &status); |
| 679 if (status == U_ZERO_ERROR) { |
| 680 // Converted successfully. |
| 681 out->resize(original_length + output_chars); |
| 682 if (IsIDNComponentSafe(out->data() + original_length, output_chars, |
| 683 languages)) |
| 684 return true; |
| 685 } |
| 667 | 686 |
| 668 // Just copy the input if it can't be an IDN component. | 687 if (status != U_BUFFER_OVERFLOW_ERROR) |
| 669 if (comp_len < 4 || | 688 break; |
| 670 comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') { | 689 } |
| 671 out->resize(host_begin_in_output + comp_len); | 690 // Failed, revert back to original string. |
| 672 for (int i = 0; i < comp_len; i++) | 691 out->resize(original_length); |
| 673 (*out)[host_begin_in_output + i] = comp[i]; | |
| 674 return; | |
| 675 } | 692 } |
| 676 | 693 |
| 677 while (true) { | 694 // We get here with no IDN or on error, in which case we just append the |
| 678 UErrorCode status = U_ZERO_ERROR; | 695 // literal input. |
| 679 out->resize(out->size() + extra_space); | 696 out->append(comp, comp_len); |
| 680 int output_chars = | 697 return false; |
| 681 uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output], | |
| 682 extra_space, UIDNA_DEFAULT, NULL, &status); | |
| 683 if (status == U_ZERO_ERROR) { | |
| 684 // Converted successfully. | |
| 685 out->resize(host_begin_in_output + output_chars); | |
| 686 if (!IsIDNComponentSafe(&out->data()[host_begin_in_output], | |
| 687 output_chars, | |
| 688 languages)) | |
| 689 break; // The error handling below will undo the IDN. | |
| 690 return; | |
| 691 } | |
| 692 if (status != U_BUFFER_OVERFLOW_ERROR) | |
| 693 break; | |
| 694 | |
| 695 // Need to loop again with a bigger buffer. It looks like ICU will | |
| 696 // return the required size of the buffer, but that's not documented, | |
| 697 // so we'll just grow by 2x. This should be rare and is not on a | |
| 698 // critical path. | |
| 699 extra_space *= 2; | |
| 700 } | |
| 701 | |
| 702 // We get here on error, in which case we replace anything that was added | |
| 703 // with the literal input. | |
| 704 out->resize(host_begin_in_output + comp_len); | |
| 705 for (int i = 0; i < comp_len; i++) | |
| 706 (*out)[host_begin_in_output + i] = comp[i]; | |
| 707 } | 698 } |
| 708 | 699 |
| 709 // Helper for FormatUrl(). | 700 // Helper for FormatUrl(). |
| 710 std::wstring FormatViewSourceUrl(const GURL& url, | 701 std::wstring FormatViewSourceUrl(const GURL& url, |
| 711 const std::wstring& languages, | 702 const std::wstring& languages, |
| 712 bool omit_username_password, | 703 bool omit_username_password, |
| 713 UnescapeRule::Type unescape_rules, | 704 UnescapeRule::Type unescape_rules, |
| 714 url_parse::Parsed* new_parsed, | 705 url_parse::Parsed* new_parsed, |
| 715 size_t* prefix_end) { | 706 size_t* prefix_end, |
| 707 size_t* offset_for_adjustment) { |
| 716 DCHECK(new_parsed); | 708 DCHECK(new_parsed); |
| 717 const wchar_t* const kWideViewSource = L"view-source:"; | 709 const wchar_t* const kWideViewSource = L"view-source:"; |
| 718 const size_t kViewSourceLengthPlus1 = 12; | 710 const size_t kViewSourceLengthPlus1 = 12; |
| 719 | 711 |
| 720 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); | 712 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); |
| 713 size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ? |
| 714 std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1); |
| 715 size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ? |
| 716 NULL : &temp_offset; |
| 721 std::wstring result = net::FormatUrl(real_url, languages, | 717 std::wstring result = net::FormatUrl(real_url, languages, |
| 722 omit_username_password, unescape_rules, new_parsed, prefix_end); | 718 omit_username_password, unescape_rules, new_parsed, prefix_end, |
| 719 temp_offset_ptr); |
| 723 result.insert(0, kWideViewSource); | 720 result.insert(0, kWideViewSource); |
| 724 | 721 |
| 725 // Adjust position values. | 722 // Adjust position values. |
| 726 if (prefix_end) | |
| 727 *prefix_end += kViewSourceLengthPlus1; | |
| 728 if (new_parsed->scheme.is_nonempty()) { | 723 if (new_parsed->scheme.is_nonempty()) { |
| 729 // Assume "view-source:real-scheme" as a scheme. | 724 // Assume "view-source:real-scheme" as a scheme. |
| 730 new_parsed->scheme.len += kViewSourceLengthPlus1; | 725 new_parsed->scheme.len += kViewSourceLengthPlus1; |
| 731 } else { | 726 } else { |
| 732 new_parsed->scheme.begin = 0; | 727 new_parsed->scheme.begin = 0; |
| 733 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; | 728 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; |
| 734 } | 729 } |
| 735 if (new_parsed->username.is_nonempty()) | 730 if (new_parsed->username.is_nonempty()) |
| 736 new_parsed->username.begin += kViewSourceLengthPlus1; | 731 new_parsed->username.begin += kViewSourceLengthPlus1; |
| 737 if (new_parsed->password.is_nonempty()) | 732 if (new_parsed->password.is_nonempty()) |
| 738 new_parsed->password.begin += kViewSourceLengthPlus1; | 733 new_parsed->password.begin += kViewSourceLengthPlus1; |
| 739 if (new_parsed->host.is_nonempty()) | 734 if (new_parsed->host.is_nonempty()) |
| 740 new_parsed->host.begin += kViewSourceLengthPlus1; | 735 new_parsed->host.begin += kViewSourceLengthPlus1; |
| 741 if (new_parsed->port.is_nonempty()) | 736 if (new_parsed->port.is_nonempty()) |
| 742 new_parsed->port.begin += kViewSourceLengthPlus1; | 737 new_parsed->port.begin += kViewSourceLengthPlus1; |
| 743 if (new_parsed->path.is_nonempty()) | 738 if (new_parsed->path.is_nonempty()) |
| 744 new_parsed->path.begin += kViewSourceLengthPlus1; | 739 new_parsed->path.begin += kViewSourceLengthPlus1; |
| 745 if (new_parsed->query.is_nonempty()) | 740 if (new_parsed->query.is_nonempty()) |
| 746 new_parsed->query.begin += kViewSourceLengthPlus1; | 741 new_parsed->query.begin += kViewSourceLengthPlus1; |
| 747 if (new_parsed->ref.is_nonempty()) | 742 if (new_parsed->ref.is_nonempty()) |
| 748 new_parsed->ref.begin += kViewSourceLengthPlus1; | 743 new_parsed->ref.begin += kViewSourceLengthPlus1; |
| 744 if (prefix_end) |
| 745 *prefix_end += kViewSourceLengthPlus1; |
| 746 if (temp_offset_ptr) { |
| 747 *offset_for_adjustment = (temp_offset == std::wstring::npos) ? |
| 748 std::wstring::npos : (temp_offset + kViewSourceLengthPlus1); |
| 749 } |
| 749 return result; | 750 return result; |
| 750 } | 751 } |
| 751 | 752 |
| 752 // Converts a UTF-8 string to a FilePath string type. | 753 // Converts a UTF-8 string to a FilePath string type. |
| 753 // | 754 // |
| 754 // This is inline with the hope that the function will be "free" on non-Windows | 755 // This is inline with the hope that the function will be "free" on non-Windows |
| 755 // platforms. | 756 // platforms. |
| 756 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) { | 757 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) { |
| 757 #if defined(OS_WIN) | 758 #if defined(OS_WIN) |
| 758 return FilePath::StringType(UTF8ToUTF16(utf8)); | 759 return FilePath::StringType(UTF8ToUTF16(utf8)); |
| 759 #else | 760 #else |
| 760 return utf8; | 761 return utf8; |
| 761 #endif | 762 #endif |
| 762 } | 763 } |
| 763 | 764 |
| 764 } // namespace | 765 } // namespace |
| 765 | 766 |
| 766 namespace net { | 767 namespace net { |
| 767 | 768 |
| 768 std::set<int> explicitly_allowed_ports; | 769 std::set<int> explicitly_allowed_ports; |
| 769 | 770 |
| 770 // Appends the substring |in_component| inside of the URL |spec| to |output|, | 771 // Appends the substring |in_component| inside of the URL |spec| to |output|, |
| 771 // and the resulting range will be filled into |out_component|. |unescape_rules| | 772 // and the resulting range will be filled into |out_component|. |unescape_rules| |
| 772 // defines how to clean the URL for human readability. | 773 // defines how to clean the URL for human readability. |offset_for_adjustment| |
| 774 // is an offset into |output| which will be adjusted based on how it maps to the |
| 775 // component being converted; if it is less than output->length(), it will be |
| 776 // untouched, and if it is greater than output->length() + in_component.len it |
| 777 // will be shortened by the difference in lengths between the input and output |
| 778 // components. Otherwise it points into the component being converted, and is |
| 779 // adjusted to point to the same logical place in |output|. |
| 780 // |offset_for_adjustment| may not be NULL. |
| 773 static void AppendFormattedComponent(const std::string& spec, | 781 static void AppendFormattedComponent(const std::string& spec, |
| 774 const url_parse::Component& in_component, | 782 const url_parse::Component& in_component, |
| 775 UnescapeRule::Type unescape_rules, | 783 UnescapeRule::Type unescape_rules, |
| 776 std::wstring* output, | 784 std::wstring* output, |
| 777 url_parse::Component* out_component); | 785 url_parse::Component* out_component, |
| 786 size_t* offset_for_adjustment); |
| 778 | 787 |
| 779 GURL FilePathToFileURL(const FilePath& path) { | 788 GURL FilePathToFileURL(const FilePath& path) { |
| 780 // Produce a URL like "file:///C:/foo" for a regular file, or | 789 // Produce a URL like "file:///C:/foo" for a regular file, or |
| 781 // "file://///server/path" for UNC. The URL canonicalizer will fix up the | 790 // "file://///server/path" for UNC. The URL canonicalizer will fix up the |
| 782 // latter case to be the canonical UNC form: "file://server/path" | 791 // latter case to be the canonical UNC form: "file://server/path" |
| 783 FilePath::StringType url_string(kFileURLPrefix); | 792 FilePath::StringType url_string(kFileURLPrefix); |
| 784 url_string.append(path.value()); | 793 url_string.append(path.value()); |
| 785 | 794 |
| 786 // Now do replacement of some characters. Since we assume the input is a | 795 // Now do replacement of some characters. Since we assume the input is a |
| 787 // literal filename, anything the URL parser might consider special should | 796 // literal filename, anything the URL parser might consider special should |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 842 return GetHeaderParamValueT(field, param_name); | 851 return GetHeaderParamValueT(field, param_name); |
| 843 } | 852 } |
| 844 | 853 |
| 845 // TODO(brettw) bug 734373: check the scripts for each host component and | 854 // TODO(brettw) bug 734373: check the scripts for each host component and |
| 846 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | 855 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
| 847 // scripts that the user has installed. For now, just put the entire | 856 // scripts that the user has installed. For now, just put the entire |
| 848 // path through IDN. Maybe this feature can be implemented in ICU itself? | 857 // path through IDN. Maybe this feature can be implemented in ICU itself? |
| 849 // | 858 // |
| 850 // We may want to skip this step in the case of file URLs to allow unicode | 859 // We may want to skip this step in the case of file URLs to allow unicode |
| 851 // UNC hostnames regardless of encodings. | 860 // UNC hostnames regardless of encodings. |
| 852 void IDNToUnicode(const char* host, | 861 std::wstring IDNToUnicode(const char* host, |
| 853 int host_len, | 862 size_t host_len, |
| 854 const std::wstring& languages, | 863 const std::wstring& languages, |
| 855 std::wstring* out) { | 864 size_t* offset_for_adjustment) { |
| 856 // Convert the ASCII input to a wide string for ICU. | 865 // Convert the ASCII input to a wide string for ICU. |
| 857 string16 input16; | 866 string16 input16; |
| 858 input16.reserve(host_len); | 867 input16.reserve(host_len); |
| 859 for (int i = 0; i < host_len; i++) | 868 std::copy(host, host + host_len, std::back_inserter(input16)); |
| 860 input16.push_back(host[i]); | |
| 861 | 869 |
| 862 string16 out16; | 870 string16 out16; |
| 863 // The output string is appended to, so convert what's already there if | 871 size_t output_offset = offset_for_adjustment ? |
| 864 // needed. | 872 *offset_for_adjustment : std::wstring::npos; |
| 865 #if defined(WCHAR_T_IS_UTF32) | |
| 866 WideToUTF16(out->data(), out->length(), &out16); | |
| 867 out->clear(); // for equivalence with the swap below | |
| 868 #elif defined(WCHAR_T_IS_UTF16) | |
| 869 out->swap(out16); | |
| 870 #endif | |
| 871 | 873 |
| 872 // Do each component of the host separately, since we enforce script matching | 874 // Do each component of the host separately, since we enforce script matching |
| 873 // on a per-component basis. | 875 // on a per-component basis. |
| 874 size_t cur_begin = 0; // Beginning of the current component (inclusive). | 876 for (size_t component_start = 0, component_end; |
| 875 while (cur_begin < input16.size()) { | 877 component_start < input16.length(); |
| 876 // Find the next dot or the end of the string. | 878 component_start = component_end + 1) { |
| 877 size_t next_dot = input16.find_first_of('.', cur_begin); | 879 // Find the end of the component. |
| 878 if (next_dot == std::wstring::npos) | 880 component_end = input16.find('.', component_start); |
| 879 next_dot = input16.size(); // For getting the last component. | 881 if (component_end == string16::npos) |
| 882 component_end = input16.length(); // For getting the last component. |
| 883 size_t component_length = component_end - component_start; |
| 880 | 884 |
| 881 if (next_dot > cur_begin) { | 885 size_t output_component_start = out16.length(); |
| 886 bool converted_idn = false; |
| 887 if (component_end > component_start) { |
| 882 // Add the substring that we just found. | 888 // Add the substring that we just found. |
| 883 IDNToUnicodeOneComponent(&input16[cur_begin], | 889 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, |
| 884 static_cast<int>(next_dot - cur_begin), | 890 component_length, languages, &out16); |
| 885 languages, | 891 } |
| 886 &out16); | 892 size_t output_component_length = out16.length() - output_component_start; |
| 893 |
| 894 if ((output_offset != std::wstring::npos) && |
| 895 (*offset_for_adjustment > component_start)) { |
| 896 if ((*offset_for_adjustment < component_end) && converted_idn) |
| 897 output_offset = std::wstring::npos; |
| 898 else |
| 899 output_offset += output_component_length - component_length; |
| 887 } | 900 } |
| 888 | 901 |
| 889 // Need to add the dot we just found (if we found one). This needs to be | 902 // Need to add the dot we just found (if we found one). |
| 890 // done before we break out below in case the URL ends in a dot. | 903 if (component_end < input16.length()) |
| 891 if (next_dot < input16.size()) | |
| 892 out16.push_back('.'); | 904 out16.push_back('.'); |
| 893 else | |
| 894 break; // No more components left. | |
| 895 | |
| 896 cur_begin = next_dot + 1; | |
| 897 } | 905 } |
| 898 | 906 |
| 899 #if defined(WCHAR_T_IS_UTF32) | 907 if (offset_for_adjustment) |
| 900 UTF16ToWide(out16.data(), out16.length(), out); | 908 *offset_for_adjustment = output_offset; |
| 901 #elif defined(WCHAR_T_IS_UTF16) | 909 |
| 902 out->swap(out16); | 910 return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment); |
| 903 #endif | |
| 904 } | 911 } |
| 905 | 912 |
| 906 std::string CanonicalizeHost(const std::string& host, | 913 std::string CanonicalizeHost(const std::string& host, |
| 907 url_canon::CanonHostInfo* host_info) { | 914 url_canon::CanonHostInfo* host_info) { |
| 908 // Try to canonicalize the host. | 915 // Try to canonicalize the host. |
| 909 const url_parse::Component raw_host_component( | 916 const url_parse::Component raw_host_component( |
| 910 0, static_cast<int>(host.length())); | 917 0, static_cast<int>(host.length())); |
| 911 std::string canon_host; | 918 std::string canon_host; |
| 912 url_canon::StdStringCanonOutput canon_host_output(&canon_host); | 919 url_canon::StdStringCanonOutput canon_host_output(&canon_host); |
| 913 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, | 920 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, |
| (...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1255 DLOG(INFO) << "gethostname() failed with " << result; | 1262 DLOG(INFO) << "gethostname() failed with " << result; |
| 1256 buffer[0] = '\0'; | 1263 buffer[0] = '\0'; |
| 1257 } | 1264 } |
| 1258 return std::string(buffer); | 1265 return std::string(buffer); |
| 1259 } | 1266 } |
| 1260 | 1267 |
| 1261 void GetIdentityFromURL(const GURL& url, | 1268 void GetIdentityFromURL(const GURL& url, |
| 1262 std::wstring* username, | 1269 std::wstring* username, |
| 1263 std::wstring* password) { | 1270 std::wstring* password) { |
| 1264 UnescapeRule::Type flags = UnescapeRule::SPACES; | 1271 UnescapeRule::Type flags = UnescapeRule::SPACES; |
| 1265 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags); | 1272 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); |
| 1266 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags); | 1273 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); |
| 1267 } | 1274 } |
| 1268 | 1275 |
| 1269 void AppendFormattedHost(const GURL& url, | 1276 void AppendFormattedHost(const GURL& url, |
| 1270 const std::wstring& languages, | 1277 const std::wstring& languages, |
| 1271 std::wstring* output, | 1278 std::wstring* output, |
| 1272 url_parse::Parsed* new_parsed) { | 1279 url_parse::Parsed* new_parsed, |
| 1280 size_t* offset_for_adjustment) { |
| 1281 DCHECK(output); |
| 1273 const url_parse::Component& host = | 1282 const url_parse::Component& host = |
| 1274 url.parsed_for_possibly_invalid_spec().host; | 1283 url.parsed_for_possibly_invalid_spec().host; |
| 1275 | 1284 |
| 1276 if (host.is_nonempty()) { | 1285 if (host.is_nonempty()) { |
| 1277 // Handle possible IDN in the host name. | 1286 // Handle possible IDN in the host name. |
| 1287 int new_host_begin = static_cast<int>(output->length()); |
| 1278 if (new_parsed) | 1288 if (new_parsed) |
| 1279 new_parsed->host.begin = static_cast<int>(output->length()); | 1289 new_parsed->host.begin = new_host_begin; |
| 1290 size_t offset_past_current_output = |
| 1291 (!offset_for_adjustment || |
| 1292 (*offset_for_adjustment == std::wstring::npos) || |
| 1293 (*offset_for_adjustment < output->length())) ? |
| 1294 std::wstring::npos : (*offset_for_adjustment - output->length()); |
| 1295 size_t* offset_into_host = |
| 1296 (offset_past_current_output >= static_cast<size_t>(host.len)) ? |
| 1297 NULL : &offset_past_current_output; |
| 1280 | 1298 |
| 1281 const std::string& spec = url.possibly_invalid_spec(); | 1299 const std::string& spec = url.possibly_invalid_spec(); |
| 1282 DCHECK(host.begin >= 0 && | 1300 DCHECK(host.begin >= 0 && |
| 1283 ((spec.length() == 0 && host.begin == 0) || | 1301 ((spec.length() == 0 && host.begin == 0) || |
| 1284 host.begin < static_cast<int>(spec.length()))); | 1302 host.begin < static_cast<int>(spec.length()))); |
| 1285 net::IDNToUnicode(&spec[host.begin], host.len, languages, output); | 1303 output->append(net::IDNToUnicode(&spec[host.begin], |
| 1304 static_cast<size_t>(host.len), languages, offset_into_host)); |
| 1286 | 1305 |
| 1287 if (new_parsed) { | 1306 int new_host_len = static_cast<int>(output->length()) - new_host_begin; |
| 1288 new_parsed->host.len = | 1307 if (new_parsed) |
| 1289 static_cast<int>(output->length()) - new_parsed->host.begin; | 1308 new_parsed->host.len = new_host_len; |
| 1309 if (offset_into_host) { |
| 1310 *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ? |
| 1311 std::wstring::npos : (new_host_begin + *offset_into_host); |
| 1312 } else if (offset_past_current_output != std::wstring::npos) { |
| 1313 *offset_for_adjustment += new_host_len - host.len; |
| 1290 } | 1314 } |
| 1291 } else if (new_parsed) { | 1315 } else if (new_parsed) { |
| 1292 new_parsed->host.reset(); | 1316 new_parsed->host.reset(); |
| 1293 } | 1317 } |
| 1294 } | 1318 } |
| 1295 | 1319 |
| 1296 /* static */ | 1320 /* static */ |
| 1297 void AppendFormattedComponent(const std::string& spec, | 1321 void AppendFormattedComponent(const std::string& spec, |
| 1298 const url_parse::Component& in_component, | 1322 const url_parse::Component& in_component, |
| 1299 UnescapeRule::Type unescape_rules, | 1323 UnescapeRule::Type unescape_rules, |
| 1300 std::wstring* output, | 1324 std::wstring* output, |
| 1301 url_parse::Component* out_component) { | 1325 url_parse::Component* out_component, |
| 1326 size_t* offset_for_adjustment) { |
| 1327 DCHECK(output); |
| 1328 DCHECK(offset_for_adjustment); |
| 1302 if (in_component.is_nonempty()) { | 1329 if (in_component.is_nonempty()) { |
| 1303 out_component->begin = static_cast<int>(output->length()); | 1330 out_component->begin = static_cast<int>(output->length()); |
| 1331 size_t offset_past_current_output = |
| 1332 ((*offset_for_adjustment == std::wstring::npos) || |
| 1333 (*offset_for_adjustment < output->length())) ? |
| 1334 std::wstring::npos : (*offset_for_adjustment - output->length()); |
| 1335 size_t* offset_into_component = |
| 1336 (offset_past_current_output >= static_cast<size_t>(in_component.len)) ? |
| 1337 NULL : &offset_past_current_output; |
| 1304 if (unescape_rules == UnescapeRule::NONE) { | 1338 if (unescape_rules == UnescapeRule::NONE) { |
| 1305 output->append(UTF8ToWide(spec.substr( | 1339 output->append(UTF8ToWideAndAdjustOffset( |
| 1306 in_component.begin, in_component.len))); | 1340 spec.substr(in_component.begin, in_component.len), |
| 1341 offset_into_component)); |
| 1307 } else { | 1342 } else { |
| 1308 output->append(UnescapeAndDecodeUTF8URLComponent( | 1343 output->append(UnescapeAndDecodeUTF8URLComponent( |
| 1309 spec.substr(in_component.begin, in_component.len), | 1344 spec.substr(in_component.begin, in_component.len), unescape_rules, |
| 1310 unescape_rules)); | 1345 offset_into_component)); |
| 1311 } | 1346 } |
| 1312 out_component->len = | 1347 out_component->len = |
| 1313 static_cast<int>(output->length()) - out_component->begin; | 1348 static_cast<int>(output->length()) - out_component->begin; |
| 1349 if (offset_into_component) { |
| 1350 *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ? |
| 1351 std::wstring::npos : (out_component->begin + *offset_into_component); |
| 1352 } else if (offset_past_current_output != std::wstring::npos) { |
| 1353 *offset_for_adjustment += out_component->len - in_component.len; |
| 1354 } |
| 1314 } else { | 1355 } else { |
| 1315 out_component->reset(); | 1356 out_component->reset(); |
| 1316 } | 1357 } |
| 1317 } | 1358 } |
| 1318 | 1359 |
| 1319 std::wstring FormatUrl(const GURL& url, | 1360 std::wstring FormatUrl(const GURL& url, |
| 1320 const std::wstring& languages, | 1361 const std::wstring& languages, |
| 1321 bool omit_username_password, | 1362 bool omit_username_password, |
| 1322 UnescapeRule::Type unescape_rules, | 1363 UnescapeRule::Type unescape_rules, |
| 1323 url_parse::Parsed* new_parsed, | 1364 url_parse::Parsed* new_parsed, |
| 1324 size_t* prefix_end) { | 1365 size_t* prefix_end, |
| 1366 size_t* offset_for_adjustment) { |
| 1325 url_parse::Parsed parsed_temp; | 1367 url_parse::Parsed parsed_temp; |
| 1326 if (!new_parsed) | 1368 if (!new_parsed) |
| 1327 new_parsed = &parsed_temp; | 1369 new_parsed = &parsed_temp; |
| 1370 size_t offset_temp = std::wstring::npos; |
| 1371 if (!offset_for_adjustment) |
| 1372 offset_for_adjustment = &offset_temp; |
| 1328 | 1373 |
| 1329 std::wstring url_string; | 1374 std::wstring url_string; |
| 1330 | 1375 |
| 1331 // Check for empty URLs or 0 available text width. | 1376 // Check for empty URLs or 0 available text width. |
| 1332 if (url.is_empty()) { | 1377 if (url.is_empty()) { |
| 1333 if (prefix_end) | 1378 if (prefix_end) |
| 1334 *prefix_end = 0; | 1379 *prefix_end = 0; |
| 1380 *offset_for_adjustment = std::wstring::npos; |
| 1335 return url_string; | 1381 return url_string; |
| 1336 } | 1382 } |
| 1337 | 1383 |
| 1338 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | 1384 // Special handling for view-source:. Don't use chrome::kViewSourceScheme |
| 1339 // because this library shouldn't depend on chrome. | 1385 // because this library shouldn't depend on chrome. |
| 1340 const char* const kViewSource = "view-source"; | 1386 const char* const kViewSource = "view-source"; |
| 1341 const char* const kViewSourceTwice = "view-source:view-source:"; | 1387 const char* const kViewSourceTwice = "view-source:view-source:"; |
| 1342 // Rejects view-source:view-source:... to avoid deep recursive call. | 1388 // Rejects view-source:view-source:... to avoid deep recursive call. |
| 1343 if (url.SchemeIs(kViewSource) && | 1389 if (url.SchemeIs(kViewSource) && |
| 1344 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | 1390 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { |
| 1345 return FormatViewSourceUrl(url, languages, omit_username_password, | 1391 return FormatViewSourceUrl(url, languages, omit_username_password, |
| 1346 unescape_rules, new_parsed, prefix_end); | 1392 unescape_rules, new_parsed, prefix_end, offset_for_adjustment); |
| 1347 } | 1393 } |
| 1348 | 1394 |
| 1349 // We handle both valid and invalid URLs (this will give us the spec | 1395 // We handle both valid and invalid URLs (this will give us the spec |
| 1350 // regardless of validity). | 1396 // regardless of validity). |
| 1351 const std::string& spec = url.possibly_invalid_spec(); | 1397 const std::string& spec = url.possibly_invalid_spec(); |
| 1352 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | 1398 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
| 1399 if (*offset_for_adjustment >= spec.length()) |
| 1400 *offset_for_adjustment = std::wstring::npos; |
| 1353 | 1401 |
| 1354 // Copy everything before the username (the scheme and the separators.) | 1402 // Copy everything before the username (the scheme and the separators.) |
| 1355 // These are ASCII. | 1403 // These are ASCII. |
| 1356 int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); | 1404 std::copy(spec.begin(), |
| 1357 for (int i = 0; i < pre_end; ++i) | 1405 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, |
| 1358 url_string.push_back(spec[i]); | 1406 true), |
| 1407 std::back_inserter(url_string)); |
| 1359 new_parsed->scheme = parsed.scheme; | 1408 new_parsed->scheme = parsed.scheme; |
| 1360 | 1409 |
| 1361 if (omit_username_password) { | 1410 if (omit_username_password) { |
| 1362 // Remove the username and password fields. We don't want to display those | 1411 // Remove the username and password fields. We don't want to display those |
| 1363 // to the user since they can be used for attacks, | 1412 // to the user since they can be used for attacks, |
| 1364 // e.g. "http://google.com:search@evil.ru/" | 1413 // e.g. "http://google.com:search@evil.ru/" |
| 1365 new_parsed->username.reset(); | 1414 new_parsed->username.reset(); |
| 1366 new_parsed->password.reset(); | 1415 new_parsed->password.reset(); |
| 1416 if ((*offset_for_adjustment != std::wstring::npos) && |
| 1417 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { |
| 1418 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { |
| 1419 // The seeming off-by-one and off-by-two in these first two lines are to |
| 1420 // account for the ':' after the username and '@' after the password. |
| 1421 if (*offset_for_adjustment > |
| 1422 static_cast<size_t>(parsed.password.end())) { |
| 1423 *offset_for_adjustment -= |
| 1424 (parsed.username.len + parsed.password.len + 2); |
| 1425 } else if (*offset_for_adjustment > |
| 1426 static_cast<size_t>(parsed.username.begin)) { |
| 1427 *offset_for_adjustment = std::wstring::npos; |
| 1428 } |
| 1429 } else { |
| 1430 const url_parse::Component* nonempty_component = |
| 1431 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; |
| 1432 // The seeming off-by-one in these first two lines is to account for the |
| 1433 // '@' after the username/password. |
| 1434 if (*offset_for_adjustment > |
| 1435 static_cast<size_t>(nonempty_component->end())) { |
| 1436 *offset_for_adjustment -= (nonempty_component->len + 1); |
| 1437 } else if (*offset_for_adjustment > |
| 1438 static_cast<size_t>(nonempty_component->begin)) { |
| 1439 *offset_for_adjustment = std::wstring::npos; |
| 1440 } |
| 1441 } |
| 1442 } |
| 1367 } else { | 1443 } else { |
| 1368 AppendFormattedComponent( | 1444 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, |
| 1369 spec, parsed.username, unescape_rules, | 1445 &new_parsed->username, offset_for_adjustment); |
| 1370 &url_string, &new_parsed->username); | |
| 1371 if (parsed.password.is_valid()) { | 1446 if (parsed.password.is_valid()) { |
| 1372 url_string.push_back(':'); | 1447 url_string.push_back(':'); |
| 1373 } | 1448 } |
| 1374 AppendFormattedComponent( | 1449 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, |
| 1375 spec, parsed.password, unescape_rules, | 1450 &new_parsed->password, offset_for_adjustment); |
| 1376 &url_string, &new_parsed->password); | |
| 1377 if (parsed.username.is_valid() || parsed.password.is_valid()) { | 1451 if (parsed.username.is_valid() || parsed.password.is_valid()) { |
| 1378 url_string.push_back('@'); | 1452 url_string.push_back('@'); |
| 1379 } | 1453 } |
| 1380 } | 1454 } |
| 1381 if (prefix_end) | 1455 if (prefix_end) |
| 1382 *prefix_end = static_cast<size_t>(url_string.length()); | 1456 *prefix_end = static_cast<size_t>(url_string.length()); |
| 1383 | 1457 |
| 1384 AppendFormattedHost(url, languages, &url_string, new_parsed); | 1458 AppendFormattedHost(url, languages, &url_string, new_parsed, |
| 1459 offset_for_adjustment); |
| 1385 | 1460 |
| 1386 // Port. | 1461 // Port. |
| 1387 if (parsed.port.is_nonempty()) { | 1462 if (parsed.port.is_nonempty()) { |
| 1388 url_string.push_back(':'); | 1463 url_string.push_back(':'); |
| 1389 int begin = url_string.length(); | 1464 new_parsed->port.begin = url_string.length(); |
| 1390 for (int i = parsed.port.begin; i < parsed.port.end(); ++i) | 1465 std::copy(spec.begin() + parsed.port.begin, |
| 1391 url_string.push_back(spec[i]); | 1466 spec.begin() + parsed.port.end(), std::back_inserter(url_string)); |
| 1392 new_parsed->port.begin = begin; | 1467 new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
| 1393 new_parsed->port.len = url_string.length() - begin; | |
| 1394 } else { | 1468 } else { |
| 1395 new_parsed->port.reset(); | 1469 new_parsed->port.reset(); |
| 1396 } | 1470 } |
| 1397 | 1471 |
| 1398 // Path and query both get the same general unescape & convert treatment. | 1472 // Path and query both get the same general unescape & convert treatment. |
| 1399 AppendFormattedComponent( | 1473 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, |
| 1400 spec, parsed.path, unescape_rules, &url_string, | 1474 &new_parsed->path, offset_for_adjustment); |
| 1401 &new_parsed->path); | |
| 1402 if (parsed.query.is_valid()) | 1475 if (parsed.query.is_valid()) |
| 1403 url_string.push_back('?'); | 1476 url_string.push_back('?'); |
| 1404 AppendFormattedComponent( | 1477 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, |
| 1405 spec, parsed.query, unescape_rules, &url_string, | 1478 &new_parsed->query, offset_for_adjustment); |
| 1406 &new_parsed->query); | |
| 1407 | 1479 |
| 1408 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | 1480 // Reference is stored in valid, unescaped UTF-8, so we can just convert. |
| 1409 if (parsed.ref.is_valid()) { | 1481 if (parsed.ref.is_valid()) { |
| 1410 url_string.push_back('#'); | 1482 url_string.push_back('#'); |
| 1411 int begin = url_string.length(); | 1483 new_parsed->ref.begin = url_string.length(); |
| 1412 if (parsed.ref.len > 0) | 1484 size_t offset_past_current_output = |
| 1413 url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], | 1485 ((*offset_for_adjustment == std::wstring::npos) || |
| 1414 parsed.ref.len))); | 1486 (*offset_for_adjustment < url_string.length())) ? |
| 1415 new_parsed->ref.begin = begin; | 1487 std::wstring::npos : (*offset_for_adjustment - url_string.length()); |
| 1416 new_parsed->ref.len = url_string.length() - begin; | 1488 size_t* offset_into_ref = |
| 1489 (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ? |
| 1490 NULL : &offset_past_current_output; |
| 1491 if (parsed.ref.len > 0) { |
| 1492 url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin, |
| 1493 parsed.ref.len), |
| 1494 offset_into_ref)); |
| 1495 } |
| 1496 new_parsed->ref.len = url_string.length() - new_parsed->ref.begin; |
| 1497 if (offset_into_ref) { |
| 1498 *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ? |
| 1499 std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref); |
| 1500 } else if (offset_past_current_output != std::wstring::npos) { |
| 1501 // We clamped the offset near the beginning of this function to ensure it |
| 1502 // was within the input URL. If we reach here, the input was something |
| 1503 // invalid and non-parseable such that the offset was past any component |
| 1504 // we could figure out. In this case it won't be represented in the |
| 1505 // output string, so reset it. |
| 1506 *offset_for_adjustment = std::wstring::npos; |
| 1507 } |
| 1417 } | 1508 } |
| 1418 | 1509 |
| 1419 return url_string; | 1510 return url_string; |
| 1420 } | 1511 } |
| 1421 | 1512 |
| 1422 GURL SimplifyUrlForRequest(const GURL& url) { | 1513 GURL SimplifyUrlForRequest(const GURL& url) { |
| 1423 DCHECK(url.is_valid()); | 1514 DCHECK(url.is_valid()); |
| 1424 GURL::Replacements replacements; | 1515 GURL::Replacements replacements; |
| 1425 replacements.ClearUsername(); | 1516 replacements.ClearUsername(); |
| 1426 replacements.ClearPassword(); | 1517 replacements.ClearPassword(); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 1451 if (length > 0) | 1542 if (length > 0) |
| 1452 ports.insert(StringToInt(WideToASCII( | 1543 ports.insert(StringToInt(WideToASCII( |
| 1453 allowed_ports.substr(last, length)))); | 1544 allowed_ports.substr(last, length)))); |
| 1454 last = i + 1; | 1545 last = i + 1; |
| 1455 } | 1546 } |
| 1456 } | 1547 } |
| 1457 explicitly_allowed_ports = ports; | 1548 explicitly_allowed_ports = ports; |
| 1458 } | 1549 } |
| 1459 | 1550 |
| 1460 } // namespace net | 1551 } // namespace net |
| OLD | NEW |