Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(208)

Side by Side Diff: net/base/net_util.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <map> 8 #include <map>
9 #include <unicode/ucnv.h> 9 #include <unicode/ucnv.h>
10 #include <unicode/uidna.h> 10 #include <unicode/uidna.h>
(...skipping 632 matching lines...) Expand 10 before | Expand all | Expand 10 after
643 std::string languages_list(WideToASCII(languages)); 643 std::string languages_list(WideToASCII(languages));
644 StringTokenizer t(languages_list, ","); 644 StringTokenizer t(languages_list, ",");
645 while (t.GetNext()) { 645 while (t.GetNext()) {
646 if (IsComponentCoveredByLang(component_characters, t.token())) 646 if (IsComponentCoveredByLang(component_characters, t.token()))
647 return true; 647 return true;
648 } 648 }
649 return false; 649 return false;
650 } 650 }
651 651
652 // Converts one component of a host (between dots) to IDN if safe. The result 652 // Converts one component of a host (between dots) to IDN if safe. The result
653 // will be APPENDED to the given output string and will be the same as the 653 // will be APPENDED to the given output string and will be the same as the input
654 // input if it is not IDN or the IDN is unsafe to display. 654 // if it is not IDN or the IDN is unsafe to display. Returns whether any
655 void IDNToUnicodeOneComponent(const char16* comp, 655 // conversion was performed.
656 int comp_len, 656 bool IDNToUnicodeOneComponent(const char16* comp,
657 size_t comp_len,
657 const std::wstring& languages, 658 const std::wstring& languages,
658 string16* out) { 659 string16* out) {
659 DCHECK(comp_len >= 0); 660 DCHECK(out);
660 if (comp_len == 0) 661 if (comp_len == 0)
661 return; 662 return false;
662 663
663 // Expand the output string to make room for a possibly longer string 664 // Only transform if the input can be an IDN component.
664 // (we'll expand if it's still not big enough below). 665 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
665 int extra_space = 64; 666 if ((comp_len > arraysize(kIdnPrefix)) &&
666 size_t host_begin_in_output = out->size(); 667 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {
668 // Repeatedly expand the output string until it's big enough. It looks like
669 // ICU will return the required size of the buffer, but that's not
670 // documented, so we'll just grow by 2x. This should be rare and is not on a
671 // critical path.
672 size_t original_length = out->length();
673 for (int extra_space = 64; ; extra_space *= 2) {
674 UErrorCode status = U_ZERO_ERROR;
675 out->resize(out->length() + extra_space);
676 int output_chars = uidna_IDNToUnicode(comp,
677 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
678 UIDNA_DEFAULT, NULL, &status);
679 if (status == U_ZERO_ERROR) {
680 // Converted successfully.
681 out->resize(original_length + output_chars);
682 if (IsIDNComponentSafe(out->data() + original_length, output_chars,
683 languages))
684 return true;
685 }
667 686
668 // Just copy the input if it can't be an IDN component. 687 if (status != U_BUFFER_OVERFLOW_ERROR)
669 if (comp_len < 4 || 688 break;
670 comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') { 689 }
671 out->resize(host_begin_in_output + comp_len); 690 // Failed, revert back to original string.
672 for (int i = 0; i < comp_len; i++) 691 out->resize(original_length);
673 (*out)[host_begin_in_output + i] = comp[i];
674 return;
675 } 692 }
676 693
677 while (true) { 694 // We get here with no IDN or on error, in which case we just append the
678 UErrorCode status = U_ZERO_ERROR; 695 // literal input.
679 out->resize(out->size() + extra_space); 696 out->append(comp, comp_len);
680 int output_chars = 697 return false;
681 uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output],
682 extra_space, UIDNA_DEFAULT, NULL, &status);
683 if (status == U_ZERO_ERROR) {
684 // Converted successfully.
685 out->resize(host_begin_in_output + output_chars);
686 if (!IsIDNComponentSafe(&out->data()[host_begin_in_output],
687 output_chars,
688 languages))
689 break; // The error handling below will undo the IDN.
690 return;
691 }
692 if (status != U_BUFFER_OVERFLOW_ERROR)
693 break;
694
695 // Need to loop again with a bigger buffer. It looks like ICU will
696 // return the required size of the buffer, but that's not documented,
697 // so we'll just grow by 2x. This should be rare and is not on a
698 // critical path.
699 extra_space *= 2;
700 }
701
702 // We get here on error, in which case we replace anything that was added
703 // with the literal input.
704 out->resize(host_begin_in_output + comp_len);
705 for (int i = 0; i < comp_len; i++)
706 (*out)[host_begin_in_output + i] = comp[i];
707 } 698 }
708 699
709 // Helper for FormatUrl(). 700 // Helper for FormatUrl().
710 std::wstring FormatViewSourceUrl(const GURL& url, 701 std::wstring FormatViewSourceUrl(const GURL& url,
711 const std::wstring& languages, 702 const std::wstring& languages,
712 bool omit_username_password, 703 bool omit_username_password,
713 UnescapeRule::Type unescape_rules, 704 UnescapeRule::Type unescape_rules,
714 url_parse::Parsed* new_parsed, 705 url_parse::Parsed* new_parsed,
715 size_t* prefix_end) { 706 size_t* prefix_end,
707 size_t* offset_for_adjustment) {
716 DCHECK(new_parsed); 708 DCHECK(new_parsed);
717 const wchar_t* const kWideViewSource = L"view-source:"; 709 const wchar_t* const kWideViewSource = L"view-source:";
718 const size_t kViewSourceLengthPlus1 = 12; 710 const size_t kViewSourceLengthPlus1 = 12;
719 711
720 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); 712 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));
713 size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ?
714 std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1);
715 size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ?
716 NULL : &temp_offset;
721 std::wstring result = net::FormatUrl(real_url, languages, 717 std::wstring result = net::FormatUrl(real_url, languages,
722 omit_username_password, unescape_rules, new_parsed, prefix_end); 718 omit_username_password, unescape_rules, new_parsed, prefix_end,
719 temp_offset_ptr);
723 result.insert(0, kWideViewSource); 720 result.insert(0, kWideViewSource);
724 721
725 // Adjust position values. 722 // Adjust position values.
726 if (prefix_end)
727 *prefix_end += kViewSourceLengthPlus1;
728 if (new_parsed->scheme.is_nonempty()) { 723 if (new_parsed->scheme.is_nonempty()) {
729 // Assume "view-source:real-scheme" as a scheme. 724 // Assume "view-source:real-scheme" as a scheme.
730 new_parsed->scheme.len += kViewSourceLengthPlus1; 725 new_parsed->scheme.len += kViewSourceLengthPlus1;
731 } else { 726 } else {
732 new_parsed->scheme.begin = 0; 727 new_parsed->scheme.begin = 0;
733 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; 728 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1;
734 } 729 }
735 if (new_parsed->username.is_nonempty()) 730 if (new_parsed->username.is_nonempty())
736 new_parsed->username.begin += kViewSourceLengthPlus1; 731 new_parsed->username.begin += kViewSourceLengthPlus1;
737 if (new_parsed->password.is_nonempty()) 732 if (new_parsed->password.is_nonempty())
738 new_parsed->password.begin += kViewSourceLengthPlus1; 733 new_parsed->password.begin += kViewSourceLengthPlus1;
739 if (new_parsed->host.is_nonempty()) 734 if (new_parsed->host.is_nonempty())
740 new_parsed->host.begin += kViewSourceLengthPlus1; 735 new_parsed->host.begin += kViewSourceLengthPlus1;
741 if (new_parsed->port.is_nonempty()) 736 if (new_parsed->port.is_nonempty())
742 new_parsed->port.begin += kViewSourceLengthPlus1; 737 new_parsed->port.begin += kViewSourceLengthPlus1;
743 if (new_parsed->path.is_nonempty()) 738 if (new_parsed->path.is_nonempty())
744 new_parsed->path.begin += kViewSourceLengthPlus1; 739 new_parsed->path.begin += kViewSourceLengthPlus1;
745 if (new_parsed->query.is_nonempty()) 740 if (new_parsed->query.is_nonempty())
746 new_parsed->query.begin += kViewSourceLengthPlus1; 741 new_parsed->query.begin += kViewSourceLengthPlus1;
747 if (new_parsed->ref.is_nonempty()) 742 if (new_parsed->ref.is_nonempty())
748 new_parsed->ref.begin += kViewSourceLengthPlus1; 743 new_parsed->ref.begin += kViewSourceLengthPlus1;
744 if (prefix_end)
745 *prefix_end += kViewSourceLengthPlus1;
746 if (temp_offset_ptr) {
747 *offset_for_adjustment = (temp_offset == std::wstring::npos) ?
748 std::wstring::npos : (temp_offset + kViewSourceLengthPlus1);
749 }
749 return result; 750 return result;
750 } 751 }
751 752
752 // Converts a UTF-8 string to a FilePath string type. 753 // Converts a UTF-8 string to a FilePath string type.
753 // 754 //
754 // This is inline with the hope that the function will be "free" on non-Windows 755 // This is inline with the hope that the function will be "free" on non-Windows
755 // platforms. 756 // platforms.
756 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) { 757 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) {
757 #if defined(OS_WIN) 758 #if defined(OS_WIN)
758 return FilePath::StringType(UTF8ToUTF16(utf8)); 759 return FilePath::StringType(UTF8ToUTF16(utf8));
759 #else 760 #else
760 return utf8; 761 return utf8;
761 #endif 762 #endif
762 } 763 }
763 764
764 } // namespace 765 } // namespace
765 766
766 namespace net { 767 namespace net {
767 768
768 std::set<int> explicitly_allowed_ports; 769 std::set<int> explicitly_allowed_ports;
769 770
770 // Appends the substring |in_component| inside of the URL |spec| to |output|, 771 // Appends the substring |in_component| inside of the URL |spec| to |output|,
771 // and the resulting range will be filled into |out_component|. |unescape_rules| 772 // and the resulting range will be filled into |out_component|. |unescape_rules|
772 // defines how to clean the URL for human readability. 773 // defines how to clean the URL for human readability. |offset_for_adjustment|
774 // is an offset into |output| which will be adjusted based on how it maps to the
775 // component being converted; if it is less than output->length(), it will be
776 // untouched, and if it is greater than output->length() + in_component.len it
777 // will be shortened by the difference in lengths between the input and output
778 // components. Otherwise it points into the component being converted, and is
779 // adjusted to point to the same logical place in |output|.
780 // |offset_for_adjustment| may not be NULL.
773 static void AppendFormattedComponent(const std::string& spec, 781 static void AppendFormattedComponent(const std::string& spec,
774 const url_parse::Component& in_component, 782 const url_parse::Component& in_component,
775 UnescapeRule::Type unescape_rules, 783 UnescapeRule::Type unescape_rules,
776 std::wstring* output, 784 std::wstring* output,
777 url_parse::Component* out_component); 785 url_parse::Component* out_component,
786 size_t* offset_for_adjustment);
778 787
779 GURL FilePathToFileURL(const FilePath& path) { 788 GURL FilePathToFileURL(const FilePath& path) {
780 // Produce a URL like "file:///C:/foo" for a regular file, or 789 // Produce a URL like "file:///C:/foo" for a regular file, or
781 // "file://///server/path" for UNC. The URL canonicalizer will fix up the 790 // "file://///server/path" for UNC. The URL canonicalizer will fix up the
782 // latter case to be the canonical UNC form: "file://server/path" 791 // latter case to be the canonical UNC form: "file://server/path"
783 FilePath::StringType url_string(kFileURLPrefix); 792 FilePath::StringType url_string(kFileURLPrefix);
784 url_string.append(path.value()); 793 url_string.append(path.value());
785 794
786 // Now do replacement of some characters. Since we assume the input is a 795 // Now do replacement of some characters. Since we assume the input is a
787 // literal filename, anything the URL parser might consider special should 796 // literal filename, anything the URL parser might consider special should
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
842 return GetHeaderParamValueT(field, param_name); 851 return GetHeaderParamValueT(field, param_name);
843 } 852 }
844 853
845 // TODO(brettw) bug 734373: check the scripts for each host component and 854 // TODO(brettw) bug 734373: check the scripts for each host component and
846 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for 855 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
847 // scripts that the user has installed. For now, just put the entire 856 // scripts that the user has installed. For now, just put the entire
848 // path through IDN. Maybe this feature can be implemented in ICU itself? 857 // path through IDN. Maybe this feature can be implemented in ICU itself?
849 // 858 //
850 // We may want to skip this step in the case of file URLs to allow unicode 859 // We may want to skip this step in the case of file URLs to allow unicode
851 // UNC hostnames regardless of encodings. 860 // UNC hostnames regardless of encodings.
852 void IDNToUnicode(const char* host, 861 std::wstring IDNToUnicode(const char* host,
853 int host_len, 862 size_t host_len,
854 const std::wstring& languages, 863 const std::wstring& languages,
855 std::wstring* out) { 864 size_t* offset_for_adjustment) {
856 // Convert the ASCII input to a wide string for ICU. 865 // Convert the ASCII input to a wide string for ICU.
857 string16 input16; 866 string16 input16;
858 input16.reserve(host_len); 867 input16.reserve(host_len);
859 for (int i = 0; i < host_len; i++) 868 std::copy(host, host + host_len, std::back_inserter(input16));
860 input16.push_back(host[i]);
861 869
862 string16 out16; 870 string16 out16;
863 // The output string is appended to, so convert what's already there if 871 size_t output_offset = offset_for_adjustment ?
864 // needed. 872 *offset_for_adjustment : std::wstring::npos;
865 #if defined(WCHAR_T_IS_UTF32)
866 WideToUTF16(out->data(), out->length(), &out16);
867 out->clear(); // for equivalence with the swap below
868 #elif defined(WCHAR_T_IS_UTF16)
869 out->swap(out16);
870 #endif
871 873
872 // Do each component of the host separately, since we enforce script matching 874 // Do each component of the host separately, since we enforce script matching
873 // on a per-component basis. 875 // on a per-component basis.
874 size_t cur_begin = 0; // Beginning of the current component (inclusive). 876 for (size_t component_start = 0, component_end;
875 while (cur_begin < input16.size()) { 877 component_start < input16.length();
876 // Find the next dot or the end of the string. 878 component_start = component_end + 1) {
877 size_t next_dot = input16.find_first_of('.', cur_begin); 879 // Find the end of the component.
878 if (next_dot == std::wstring::npos) 880 component_end = input16.find('.', component_start);
879 next_dot = input16.size(); // For getting the last component. 881 if (component_end == string16::npos)
882 component_end = input16.length(); // For getting the last component.
883 size_t component_length = component_end - component_start;
880 884
881 if (next_dot > cur_begin) { 885 size_t output_component_start = out16.length();
886 bool converted_idn = false;
887 if (component_end > component_start) {
882 // Add the substring that we just found. 888 // Add the substring that we just found.
883 IDNToUnicodeOneComponent(&input16[cur_begin], 889 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
884 static_cast<int>(next_dot - cur_begin), 890 component_length, languages, &out16);
885 languages, 891 }
886 &out16); 892 size_t output_component_length = out16.length() - output_component_start;
893
894 if ((output_offset != std::wstring::npos) &&
895 (*offset_for_adjustment > component_start)) {
896 if ((*offset_for_adjustment < component_end) && converted_idn)
897 output_offset = std::wstring::npos;
898 else
899 output_offset += output_component_length - component_length;
887 } 900 }
888 901
889 // Need to add the dot we just found (if we found one). This needs to be 902 // Need to add the dot we just found (if we found one).
890 // done before we break out below in case the URL ends in a dot. 903 if (component_end < input16.length())
891 if (next_dot < input16.size())
892 out16.push_back('.'); 904 out16.push_back('.');
893 else
894 break; // No more components left.
895
896 cur_begin = next_dot + 1;
897 } 905 }
898 906
899 #if defined(WCHAR_T_IS_UTF32) 907 if (offset_for_adjustment)
900 UTF16ToWide(out16.data(), out16.length(), out); 908 *offset_for_adjustment = output_offset;
901 #elif defined(WCHAR_T_IS_UTF16) 909
902 out->swap(out16); 910 return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment);
903 #endif
904 } 911 }
905 912
906 std::string CanonicalizeHost(const std::string& host, 913 std::string CanonicalizeHost(const std::string& host,
907 url_canon::CanonHostInfo* host_info) { 914 url_canon::CanonHostInfo* host_info) {
908 // Try to canonicalize the host. 915 // Try to canonicalize the host.
909 const url_parse::Component raw_host_component( 916 const url_parse::Component raw_host_component(
910 0, static_cast<int>(host.length())); 917 0, static_cast<int>(host.length()));
911 std::string canon_host; 918 std::string canon_host;
912 url_canon::StdStringCanonOutput canon_host_output(&canon_host); 919 url_canon::StdStringCanonOutput canon_host_output(&canon_host);
913 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, 920 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after
1255 DLOG(INFO) << "gethostname() failed with " << result; 1262 DLOG(INFO) << "gethostname() failed with " << result;
1256 buffer[0] = '\0'; 1263 buffer[0] = '\0';
1257 } 1264 }
1258 return std::string(buffer); 1265 return std::string(buffer);
1259 } 1266 }
1260 1267
1261 void GetIdentityFromURL(const GURL& url, 1268 void GetIdentityFromURL(const GURL& url,
1262 std::wstring* username, 1269 std::wstring* username,
1263 std::wstring* password) { 1270 std::wstring* password) {
1264 UnescapeRule::Type flags = UnescapeRule::SPACES; 1271 UnescapeRule::Type flags = UnescapeRule::SPACES;
1265 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags); 1272 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);
1266 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags); 1273 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);
1267 } 1274 }
1268 1275
1269 void AppendFormattedHost(const GURL& url, 1276 void AppendFormattedHost(const GURL& url,
1270 const std::wstring& languages, 1277 const std::wstring& languages,
1271 std::wstring* output, 1278 std::wstring* output,
1272 url_parse::Parsed* new_parsed) { 1279 url_parse::Parsed* new_parsed,
1280 size_t* offset_for_adjustment) {
1281 DCHECK(output);
1273 const url_parse::Component& host = 1282 const url_parse::Component& host =
1274 url.parsed_for_possibly_invalid_spec().host; 1283 url.parsed_for_possibly_invalid_spec().host;
1275 1284
1276 if (host.is_nonempty()) { 1285 if (host.is_nonempty()) {
1277 // Handle possible IDN in the host name. 1286 // Handle possible IDN in the host name.
1287 int new_host_begin = static_cast<int>(output->length());
1278 if (new_parsed) 1288 if (new_parsed)
1279 new_parsed->host.begin = static_cast<int>(output->length()); 1289 new_parsed->host.begin = new_host_begin;
1290 size_t offset_past_current_output =
1291 (!offset_for_adjustment ||
1292 (*offset_for_adjustment == std::wstring::npos) ||
1293 (*offset_for_adjustment < output->length())) ?
1294 std::wstring::npos : (*offset_for_adjustment - output->length());
1295 size_t* offset_into_host =
1296 (offset_past_current_output >= static_cast<size_t>(host.len)) ?
1297 NULL : &offset_past_current_output;
1280 1298
1281 const std::string& spec = url.possibly_invalid_spec(); 1299 const std::string& spec = url.possibly_invalid_spec();
1282 DCHECK(host.begin >= 0 && 1300 DCHECK(host.begin >= 0 &&
1283 ((spec.length() == 0 && host.begin == 0) || 1301 ((spec.length() == 0 && host.begin == 0) ||
1284 host.begin < static_cast<int>(spec.length()))); 1302 host.begin < static_cast<int>(spec.length())));
1285 net::IDNToUnicode(&spec[host.begin], host.len, languages, output); 1303 output->append(net::IDNToUnicode(&spec[host.begin],
1304 static_cast<size_t>(host.len), languages, offset_into_host));
1286 1305
1287 if (new_parsed) { 1306 int new_host_len = static_cast<int>(output->length()) - new_host_begin;
1288 new_parsed->host.len = 1307 if (new_parsed)
1289 static_cast<int>(output->length()) - new_parsed->host.begin; 1308 new_parsed->host.len = new_host_len;
1309 if (offset_into_host) {
1310 *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ?
1311 std::wstring::npos : (new_host_begin + *offset_into_host);
1312 } else if (offset_past_current_output != std::wstring::npos) {
1313 *offset_for_adjustment += new_host_len - host.len;
1290 } 1314 }
1291 } else if (new_parsed) { 1315 } else if (new_parsed) {
1292 new_parsed->host.reset(); 1316 new_parsed->host.reset();
1293 } 1317 }
1294 } 1318 }
1295 1319
1296 /* static */ 1320 /* static */
1297 void AppendFormattedComponent(const std::string& spec, 1321 void AppendFormattedComponent(const std::string& spec,
1298 const url_parse::Component& in_component, 1322 const url_parse::Component& in_component,
1299 UnescapeRule::Type unescape_rules, 1323 UnescapeRule::Type unescape_rules,
1300 std::wstring* output, 1324 std::wstring* output,
1301 url_parse::Component* out_component) { 1325 url_parse::Component* out_component,
1326 size_t* offset_for_adjustment) {
1327 DCHECK(output);
1328 DCHECK(offset_for_adjustment);
1302 if (in_component.is_nonempty()) { 1329 if (in_component.is_nonempty()) {
1303 out_component->begin = static_cast<int>(output->length()); 1330 out_component->begin = static_cast<int>(output->length());
1331 size_t offset_past_current_output =
1332 ((*offset_for_adjustment == std::wstring::npos) ||
1333 (*offset_for_adjustment < output->length())) ?
1334 std::wstring::npos : (*offset_for_adjustment - output->length());
1335 size_t* offset_into_component =
1336 (offset_past_current_output >= static_cast<size_t>(in_component.len)) ?
1337 NULL : &offset_past_current_output;
1304 if (unescape_rules == UnescapeRule::NONE) { 1338 if (unescape_rules == UnescapeRule::NONE) {
1305 output->append(UTF8ToWide(spec.substr( 1339 output->append(UTF8ToWideAndAdjustOffset(
1306 in_component.begin, in_component.len))); 1340 spec.substr(in_component.begin, in_component.len),
1341 offset_into_component));
1307 } else { 1342 } else {
1308 output->append(UnescapeAndDecodeUTF8URLComponent( 1343 output->append(UnescapeAndDecodeUTF8URLComponent(
1309 spec.substr(in_component.begin, in_component.len), 1344 spec.substr(in_component.begin, in_component.len), unescape_rules,
1310 unescape_rules)); 1345 offset_into_component));
1311 } 1346 }
1312 out_component->len = 1347 out_component->len =
1313 static_cast<int>(output->length()) - out_component->begin; 1348 static_cast<int>(output->length()) - out_component->begin;
1349 if (offset_into_component) {
1350 *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ?
1351 std::wstring::npos : (out_component->begin + *offset_into_component);
1352 } else if (offset_past_current_output != std::wstring::npos) {
1353 *offset_for_adjustment += out_component->len - in_component.len;
1354 }
1314 } else { 1355 } else {
1315 out_component->reset(); 1356 out_component->reset();
1316 } 1357 }
1317 } 1358 }
1318 1359
1319 std::wstring FormatUrl(const GURL& url, 1360 std::wstring FormatUrl(const GURL& url,
1320 const std::wstring& languages, 1361 const std::wstring& languages,
1321 bool omit_username_password, 1362 bool omit_username_password,
1322 UnescapeRule::Type unescape_rules, 1363 UnescapeRule::Type unescape_rules,
1323 url_parse::Parsed* new_parsed, 1364 url_parse::Parsed* new_parsed,
1324 size_t* prefix_end) { 1365 size_t* prefix_end,
1366 size_t* offset_for_adjustment) {
1325 url_parse::Parsed parsed_temp; 1367 url_parse::Parsed parsed_temp;
1326 if (!new_parsed) 1368 if (!new_parsed)
1327 new_parsed = &parsed_temp; 1369 new_parsed = &parsed_temp;
1370 size_t offset_temp = std::wstring::npos;
1371 if (!offset_for_adjustment)
1372 offset_for_adjustment = &offset_temp;
1328 1373
1329 std::wstring url_string; 1374 std::wstring url_string;
1330 1375
1331 // Check for empty URLs or 0 available text width. 1376 // Check for empty URLs or 0 available text width.
1332 if (url.is_empty()) { 1377 if (url.is_empty()) {
1333 if (prefix_end) 1378 if (prefix_end)
1334 *prefix_end = 0; 1379 *prefix_end = 0;
1380 *offset_for_adjustment = std::wstring::npos;
1335 return url_string; 1381 return url_string;
1336 } 1382 }
1337 1383
1338 // Special handling for view-source:. Don't use chrome::kViewSourceScheme 1384 // Special handling for view-source:. Don't use chrome::kViewSourceScheme
1339 // because this library shouldn't depend on chrome. 1385 // because this library shouldn't depend on chrome.
1340 const char* const kViewSource = "view-source"; 1386 const char* const kViewSource = "view-source";
1341 const char* const kViewSourceTwice = "view-source:view-source:"; 1387 const char* const kViewSourceTwice = "view-source:view-source:";
1342 // Rejects view-source:view-source:... to avoid deep recursive call. 1388 // Rejects view-source:view-source:... to avoid deep recursive call.
1343 if (url.SchemeIs(kViewSource) && 1389 if (url.SchemeIs(kViewSource) &&
1344 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { 1390 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
1345 return FormatViewSourceUrl(url, languages, omit_username_password, 1391 return FormatViewSourceUrl(url, languages, omit_username_password,
1346 unescape_rules, new_parsed, prefix_end); 1392 unescape_rules, new_parsed, prefix_end, offset_for_adjustment);
1347 } 1393 }
1348 1394
1349 // We handle both valid and invalid URLs (this will give us the spec 1395 // We handle both valid and invalid URLs (this will give us the spec
1350 // regardless of validity). 1396 // regardless of validity).
1351 const std::string& spec = url.possibly_invalid_spec(); 1397 const std::string& spec = url.possibly_invalid_spec();
1352 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 1398 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
1399 if (*offset_for_adjustment >= spec.length())
1400 *offset_for_adjustment = std::wstring::npos;
1353 1401
1354 // Copy everything before the username (the scheme and the separators.) 1402 // Copy everything before the username (the scheme and the separators.)
1355 // These are ASCII. 1403 // These are ASCII.
1356 int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); 1404 std::copy(spec.begin(),
1357 for (int i = 0; i < pre_end; ++i) 1405 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
1358 url_string.push_back(spec[i]); 1406 true),
1407 std::back_inserter(url_string));
1359 new_parsed->scheme = parsed.scheme; 1408 new_parsed->scheme = parsed.scheme;
1360 1409
1361 if (omit_username_password) { 1410 if (omit_username_password) {
1362 // Remove the username and password fields. We don't want to display those 1411 // Remove the username and password fields. We don't want to display those
1363 // to the user since they can be used for attacks, 1412 // to the user since they can be used for attacks,
1364 // e.g. "http://google.com:search@evil.ru/" 1413 // e.g. "http://google.com:search@evil.ru/"
1365 new_parsed->username.reset(); 1414 new_parsed->username.reset();
1366 new_parsed->password.reset(); 1415 new_parsed->password.reset();
1416 if ((*offset_for_adjustment != std::wstring::npos) &&
1417 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
1418 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
1419 // The seeming off-by-one and off-by-two in these first two lines are to
1420 // account for the ':' after the username and '@' after the password.
1421 if (*offset_for_adjustment >
1422 static_cast<size_t>(parsed.password.end())) {
1423 *offset_for_adjustment -=
1424 (parsed.username.len + parsed.password.len + 2);
1425 } else if (*offset_for_adjustment >
1426 static_cast<size_t>(parsed.username.begin)) {
1427 *offset_for_adjustment = std::wstring::npos;
1428 }
1429 } else {
1430 const url_parse::Component* nonempty_component =
1431 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
1432 // The seeming off-by-one in these first two lines is to account for the
1433 // '@' after the username/password.
1434 if (*offset_for_adjustment >
1435 static_cast<size_t>(nonempty_component->end())) {
1436 *offset_for_adjustment -= (nonempty_component->len + 1);
1437 } else if (*offset_for_adjustment >
1438 static_cast<size_t>(nonempty_component->begin)) {
1439 *offset_for_adjustment = std::wstring::npos;
1440 }
1441 }
1442 }
1367 } else { 1443 } else {
1368 AppendFormattedComponent( 1444 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
1369 spec, parsed.username, unescape_rules, 1445 &new_parsed->username, offset_for_adjustment);
1370 &url_string, &new_parsed->username);
1371 if (parsed.password.is_valid()) { 1446 if (parsed.password.is_valid()) {
1372 url_string.push_back(':'); 1447 url_string.push_back(':');
1373 } 1448 }
1374 AppendFormattedComponent( 1449 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
1375 spec, parsed.password, unescape_rules, 1450 &new_parsed->password, offset_for_adjustment);
1376 &url_string, &new_parsed->password);
1377 if (parsed.username.is_valid() || parsed.password.is_valid()) { 1451 if (parsed.username.is_valid() || parsed.password.is_valid()) {
1378 url_string.push_back('@'); 1452 url_string.push_back('@');
1379 } 1453 }
1380 } 1454 }
1381 if (prefix_end) 1455 if (prefix_end)
1382 *prefix_end = static_cast<size_t>(url_string.length()); 1456 *prefix_end = static_cast<size_t>(url_string.length());
1383 1457
1384 AppendFormattedHost(url, languages, &url_string, new_parsed); 1458 AppendFormattedHost(url, languages, &url_string, new_parsed,
1459 offset_for_adjustment);
1385 1460
1386 // Port. 1461 // Port.
1387 if (parsed.port.is_nonempty()) { 1462 if (parsed.port.is_nonempty()) {
1388 url_string.push_back(':'); 1463 url_string.push_back(':');
1389 int begin = url_string.length(); 1464 new_parsed->port.begin = url_string.length();
1390 for (int i = parsed.port.begin; i < parsed.port.end(); ++i) 1465 std::copy(spec.begin() + parsed.port.begin,
1391 url_string.push_back(spec[i]); 1466 spec.begin() + parsed.port.end(), std::back_inserter(url_string));
1392 new_parsed->port.begin = begin; 1467 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
1393 new_parsed->port.len = url_string.length() - begin;
1394 } else { 1468 } else {
1395 new_parsed->port.reset(); 1469 new_parsed->port.reset();
1396 } 1470 }
1397 1471
1398 // Path and query both get the same general unescape & convert treatment. 1472 // Path and query both get the same general unescape & convert treatment.
1399 AppendFormattedComponent( 1473 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
1400 spec, parsed.path, unescape_rules, &url_string, 1474 &new_parsed->path, offset_for_adjustment);
1401 &new_parsed->path);
1402 if (parsed.query.is_valid()) 1475 if (parsed.query.is_valid())
1403 url_string.push_back('?'); 1476 url_string.push_back('?');
1404 AppendFormattedComponent( 1477 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
1405 spec, parsed.query, unescape_rules, &url_string, 1478 &new_parsed->query, offset_for_adjustment);
1406 &new_parsed->query);
1407 1479
1408 // Reference is stored in valid, unescaped UTF-8, so we can just convert. 1480 // Reference is stored in valid, unescaped UTF-8, so we can just convert.
1409 if (parsed.ref.is_valid()) { 1481 if (parsed.ref.is_valid()) {
1410 url_string.push_back('#'); 1482 url_string.push_back('#');
1411 int begin = url_string.length(); 1483 new_parsed->ref.begin = url_string.length();
1412 if (parsed.ref.len > 0) 1484 size_t offset_past_current_output =
1413 url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], 1485 ((*offset_for_adjustment == std::wstring::npos) ||
1414 parsed.ref.len))); 1486 (*offset_for_adjustment < url_string.length())) ?
1415 new_parsed->ref.begin = begin; 1487 std::wstring::npos : (*offset_for_adjustment - url_string.length());
1416 new_parsed->ref.len = url_string.length() - begin; 1488 size_t* offset_into_ref =
1489 (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ?
1490 NULL : &offset_past_current_output;
1491 if (parsed.ref.len > 0) {
1492 url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin,
1493 parsed.ref.len),
1494 offset_into_ref));
1495 }
1496 new_parsed->ref.len = url_string.length() - new_parsed->ref.begin;
1497 if (offset_into_ref) {
1498 *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ?
1499 std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref);
1500 } else if (offset_past_current_output != std::wstring::npos) {
1501 // We clamped the offset near the beginning of this function to ensure it
1502 // was within the input URL. If we reach here, the input was something
1503 // invalid and non-parseable such that the offset was past any component
1504 // we could figure out. In this case it won't be represented in the
1505 // output string, so reset it.
1506 *offset_for_adjustment = std::wstring::npos;
1507 }
1417 } 1508 }
1418 1509
1419 return url_string; 1510 return url_string;
1420 } 1511 }
1421 1512
1422 GURL SimplifyUrlForRequest(const GURL& url) { 1513 GURL SimplifyUrlForRequest(const GURL& url) {
1423 DCHECK(url.is_valid()); 1514 DCHECK(url.is_valid());
1424 GURL::Replacements replacements; 1515 GURL::Replacements replacements;
1425 replacements.ClearUsername(); 1516 replacements.ClearUsername();
1426 replacements.ClearPassword(); 1517 replacements.ClearPassword();
(...skipping 24 matching lines...) Expand all
1451 if (length > 0) 1542 if (length > 0)
1452 ports.insert(StringToInt(WideToASCII( 1543 ports.insert(StringToInt(WideToASCII(
1453 allowed_ports.substr(last, length)))); 1544 allowed_ports.substr(last, length))));
1454 last = i + 1; 1545 last = i + 1;
1455 } 1546 }
1456 } 1547 }
1457 explicitly_allowed_ports = ports; 1548 explicitly_allowed_ports = ports;
1458 } 1549 }
1459 1550
1460 } // namespace net 1551 } // namespace net
OLDNEW
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698