net/base/net_util.cc - Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur...

Side by Side Diff: net/base/net_util.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <map>	8 #include <map>

9 #include <unicode/ucnv.h>	9 #include <unicode/ucnv.h>

10 #include <unicode/uidna.h>	10 #include <unicode/uidna.h>

(...skipping 632 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
643 std::string languages_list(WideToASCII(languages));	643 std::string languages_list(WideToASCII(languages));

644 StringTokenizer t(languages_list, ",");	644 StringTokenizer t(languages_list, ",");

645 while (t.GetNext()) {	645 while (t.GetNext()) {

646 if (IsComponentCoveredByLang(component_characters, t.token()))	646 if (IsComponentCoveredByLang(component_characters, t.token()))

647 return true;	647 return true;

648 }	648 }

649 return false;	649 return false;

650 }	650 }

651	651

652 // Converts one component of a host (between dots) to IDN if safe. The result	652 // Converts one component of a host (between dots) to IDN if safe. The result

653 // will be APPENDED to the given output string and will be the same as the	653 // will be APPENDED to the given output string and will be the same as the input

654 // input if it is not IDN or the IDN is unsafe to display.	654 // if it is not IDN or the IDN is unsafe to display. Returns whether any

655 void IDNToUnicodeOneComponent(const char16* comp,	655 // conversion was performed.

656 int comp_len,	656 bool IDNToUnicodeOneComponent(const char16* comp,

	657 size_t comp_len,

657 const std::wstring& languages,	658 const std::wstring& languages,

658 string16* out) {	659 string16* out) {

659 DCHECK(comp_len >= 0);	660 DCHECK(out);

660 if (comp_len == 0)	661 if (comp_len == 0)

661 return;	662 return false;

662	663

663 // Expand the output string to make room for a possibly longer string	664 // Only transform if the input can be an IDN component.

664 // (we'll expand if it's still not big enough below).	665 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

665 int extra_space = 64;	666 if ((comp_len > arraysize(kIdnPrefix)) &&

666 size_t host_begin_in_output = out->size();	667 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {

	668 // Repeatedly expand the output string until it's big enough. It looks like

	669 // ICU will return the required size of the buffer, but that's not

	670 // documented, so we'll just grow by 2x. This should be rare and is not on a

	671 // critical path.

	672 size_t original_length = out->length();

	673 for (int extra_space = 64; ; extra_space *= 2) {

	674 UErrorCode status = U_ZERO_ERROR;

	675 out->resize(out->length() + extra_space);

	676 int output_chars = uidna_IDNToUnicode(comp,

	677 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,

	678 UIDNA_DEFAULT, NULL, &status);

	679 if (status == U_ZERO_ERROR) {

	680 // Converted successfully.

	681 out->resize(original_length + output_chars);

	682 if (IsIDNComponentSafe(out->data() + original_length, output_chars,

	683 languages))

	684 return true;

	685 }

667	686

668 // Just copy the input if it can't be an IDN component.	687 if (status != U_BUFFER_OVERFLOW_ERROR)

669 if (comp_len < 4 \|\|	688 break;

670 comp[0] != 'x' \|\| comp[1] != 'n' \|\| comp[2] != '-' \|\| comp[3] != '-') {	689 }

671 out->resize(host_begin_in_output + comp_len);	690 // Failed, revert back to original string.

672 for (int i = 0; i < comp_len; i++)	691 out->resize(original_length);

673 (*out)[host_begin_in_output + i] = comp[i];

674 return;

675 }	692 }

676	693

677 while (true) {	694 // We get here with no IDN or on error, in which case we just append the

678 UErrorCode status = U_ZERO_ERROR;	695 // literal input.

679 out->resize(out->size() + extra_space);	696 out->append(comp, comp_len);

680 int output_chars =	697 return false;

681 uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output],

682 extra_space, UIDNA_DEFAULT, NULL, &status);

683 if (status == U_ZERO_ERROR) {

684 // Converted successfully.

685 out->resize(host_begin_in_output + output_chars);

686 if (!IsIDNComponentSafe(&out->data()[host_begin_in_output],

687 output_chars,

688 languages))

689 break; // The error handling below will undo the IDN.

690 return;

691 }

692 if (status != U_BUFFER_OVERFLOW_ERROR)

693 break;

694

695 // Need to loop again with a bigger buffer. It looks like ICU will

696 // return the required size of the buffer, but that's not documented,

697 // so we'll just grow by 2x. This should be rare and is not on a

698 // critical path.

699 extra_space *= 2;

700 }

701

702 // We get here on error, in which case we replace anything that was added

703 // with the literal input.

704 out->resize(host_begin_in_output + comp_len);

705 for (int i = 0; i < comp_len; i++)

706 (*out)[host_begin_in_output + i] = comp[i];

707 }	698 }

708	699

709 // Helper for FormatUrl().	700 // Helper for FormatUrl().

710 std::wstring FormatViewSourceUrl(const GURL& url,	701 std::wstring FormatViewSourceUrl(const GURL& url,

711 const std::wstring& languages,	702 const std::wstring& languages,

712 bool omit_username_password,	703 bool omit_username_password,

713 UnescapeRule::Type unescape_rules,	704 UnescapeRule::Type unescape_rules,

714 url_parse::Parsed* new_parsed,	705 url_parse::Parsed* new_parsed,

715 size_t* prefix_end) {	706 size_t* prefix_end,

	707 size_t* offset_for_adjustment) {

716 DCHECK(new_parsed);	708 DCHECK(new_parsed);

717 const wchar_t* const kWideViewSource = L"view-source:";	709 const wchar_t* const kWideViewSource = L"view-source:";

718 const size_t kViewSourceLengthPlus1 = 12;	710 const size_t kViewSourceLengthPlus1 = 12;

719	711

720 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));	712 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));

	713 size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ?

	714 std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1);

	715 size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ?

	716 NULL : &temp_offset;

721 std::wstring result = net::FormatUrl(real_url, languages,	717 std::wstring result = net::FormatUrl(real_url, languages,

722 omit_username_password, unescape_rules, new_parsed, prefix_end);	718 omit_username_password, unescape_rules, new_parsed, prefix_end,

	719 temp_offset_ptr);

723 result.insert(0, kWideViewSource);	720 result.insert(0, kWideViewSource);

724	721

725 // Adjust position values.	722 // Adjust position values.

726 if (prefix_end)

727 *prefix_end += kViewSourceLengthPlus1;

728 if (new_parsed->scheme.is_nonempty()) {	723 if (new_parsed->scheme.is_nonempty()) {

729 // Assume "view-source:real-scheme" as a scheme.	724 // Assume "view-source:real-scheme" as a scheme.

730 new_parsed->scheme.len += kViewSourceLengthPlus1;	725 new_parsed->scheme.len += kViewSourceLengthPlus1;

731 } else {	726 } else {

732 new_parsed->scheme.begin = 0;	727 new_parsed->scheme.begin = 0;

733 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1;	728 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1;

734 }	729 }

735 if (new_parsed->username.is_nonempty())	730 if (new_parsed->username.is_nonempty())

736 new_parsed->username.begin += kViewSourceLengthPlus1;	731 new_parsed->username.begin += kViewSourceLengthPlus1;

737 if (new_parsed->password.is_nonempty())	732 if (new_parsed->password.is_nonempty())

738 new_parsed->password.begin += kViewSourceLengthPlus1;	733 new_parsed->password.begin += kViewSourceLengthPlus1;

739 if (new_parsed->host.is_nonempty())	734 if (new_parsed->host.is_nonempty())

740 new_parsed->host.begin += kViewSourceLengthPlus1;	735 new_parsed->host.begin += kViewSourceLengthPlus1;

741 if (new_parsed->port.is_nonempty())	736 if (new_parsed->port.is_nonempty())

742 new_parsed->port.begin += kViewSourceLengthPlus1;	737 new_parsed->port.begin += kViewSourceLengthPlus1;

743 if (new_parsed->path.is_nonempty())	738 if (new_parsed->path.is_nonempty())

744 new_parsed->path.begin += kViewSourceLengthPlus1;	739 new_parsed->path.begin += kViewSourceLengthPlus1;

745 if (new_parsed->query.is_nonempty())	740 if (new_parsed->query.is_nonempty())

746 new_parsed->query.begin += kViewSourceLengthPlus1;	741 new_parsed->query.begin += kViewSourceLengthPlus1;

747 if (new_parsed->ref.is_nonempty())	742 if (new_parsed->ref.is_nonempty())

748 new_parsed->ref.begin += kViewSourceLengthPlus1;	743 new_parsed->ref.begin += kViewSourceLengthPlus1;

	744 if (prefix_end)

	745 *prefix_end += kViewSourceLengthPlus1;

	746 if (temp_offset_ptr) {

	747 *offset_for_adjustment = (temp_offset == std::wstring::npos) ?

	748 std::wstring::npos : (temp_offset + kViewSourceLengthPlus1);

	749 }

749 return result;	750 return result;

750 }	751 }

751	752

752 // Converts a UTF-8 string to a FilePath string type.	753 // Converts a UTF-8 string to a FilePath string type.

753 //	754 //

754 // This is inline with the hope that the function will be "free" on non-Windows	755 // This is inline with the hope that the function will be "free" on non-Windows

755 // platforms.	756 // platforms.

756 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) {	757 inline FilePath::StringType UTF8ToFilePathString(const std::string& utf8) {

757 #if defined(OS_WIN)	758 #if defined(OS_WIN)

758 return FilePath::StringType(UTF8ToUTF16(utf8));	759 return FilePath::StringType(UTF8ToUTF16(utf8));

759 #else	760 #else

760 return utf8;	761 return utf8;

761 #endif	762 #endif

762 }	763 }

763	764

764 } // namespace	765 } // namespace

765	766

766 namespace net {	767 namespace net {

767	768

768 std::set<int> explicitly_allowed_ports;	769 std::set<int> explicitly_allowed_ports;

769	770

770 // Appends the substring \|in_component\| inside of the URL \|spec\| to \|output\|,	771 // Appends the substring \|in_component\| inside of the URL \|spec\| to \|output\|,

771 // and the resulting range will be filled into \|out_component\|. \|unescape_rules\|	772 // and the resulting range will be filled into \|out_component\|. \|unescape_rules\|

772 // defines how to clean the URL for human readability.	773 // defines how to clean the URL for human readability. \|offset_for_adjustment\|

	774 // is an offset into \|output\| which will be adjusted based on how it maps to the

	775 // component being converted; if it is less than output->length(), it will be

	776 // untouched, and if it is greater than output->length() + in_component.len it

	777 // will be shortened by the difference in lengths between the input and output

	778 // components. Otherwise it points into the component being converted, and is

	779 // adjusted to point to the same logical place in \|output\|.

	780 // \|offset_for_adjustment\| may not be NULL.

773 static void AppendFormattedComponent(const std::string& spec,	781 static void AppendFormattedComponent(const std::string& spec,

774 const url_parse::Component& in_component,	782 const url_parse::Component& in_component,

775 UnescapeRule::Type unescape_rules,	783 UnescapeRule::Type unescape_rules,

776 std::wstring* output,	784 std::wstring* output,

777 url_parse::Component* out_component);	785 url_parse::Component* out_component,

	786 size_t* offset_for_adjustment);

778	787

779 GURL FilePathToFileURL(const FilePath& path) {	788 GURL FilePathToFileURL(const FilePath& path) {

780 // Produce a URL like "file:///C:/foo" for a regular file, or	789 // Produce a URL like "file:///C:/foo" for a regular file, or

781 // "file://///server/path" for UNC. The URL canonicalizer will fix up the	790 // "file://///server/path" for UNC. The URL canonicalizer will fix up the

782 // latter case to be the canonical UNC form: "file://server/path"	791 // latter case to be the canonical UNC form: "file://server/path"

783 FilePath::StringType url_string(kFileURLPrefix);	792 FilePath::StringType url_string(kFileURLPrefix);

784 url_string.append(path.value());	793 url_string.append(path.value());

785	794

786 // Now do replacement of some characters. Since we assume the input is a	795 // Now do replacement of some characters. Since we assume the input is a

787 // literal filename, anything the URL parser might consider special should	796 // literal filename, anything the URL parser might consider special should

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
842 return GetHeaderParamValueT(field, param_name);	851 return GetHeaderParamValueT(field, param_name);

843 }	852 }

844	853

845 // TODO(brettw) bug 734373: check the scripts for each host component and	854 // TODO(brettw) bug 734373: check the scripts for each host component and

846 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for	855 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for

847 // scripts that the user has installed. For now, just put the entire	856 // scripts that the user has installed. For now, just put the entire

848 // path through IDN. Maybe this feature can be implemented in ICU itself?	857 // path through IDN. Maybe this feature can be implemented in ICU itself?

849 //	858 //

850 // We may want to skip this step in the case of file URLs to allow unicode	859 // We may want to skip this step in the case of file URLs to allow unicode

851 // UNC hostnames regardless of encodings.	860 // UNC hostnames regardless of encodings.

852 void IDNToUnicode(const char* host,	861 std::wstring IDNToUnicode(const char* host,

853 int host_len,	862 size_t host_len,

854 const std::wstring& languages,	863 const std::wstring& languages,

855 std::wstring* out) {	864 size_t* offset_for_adjustment) {

856 // Convert the ASCII input to a wide string for ICU.	865 // Convert the ASCII input to a wide string for ICU.

857 string16 input16;	866 string16 input16;

858 input16.reserve(host_len);	867 input16.reserve(host_len);

859 for (int i = 0; i < host_len; i++)	868 std::copy(host, host + host_len, std::back_inserter(input16));

860 input16.push_back(host[i]);

861	869

862 string16 out16;	870 string16 out16;

863 // The output string is appended to, so convert what's already there if	871 size_t output_offset = offset_for_adjustment ?

864 // needed.	872 *offset_for_adjustment : std::wstring::npos;

865 #if defined(WCHAR_T_IS_UTF32)

866 WideToUTF16(out->data(), out->length(), &out16);

867 out->clear(); // for equivalence with the swap below

868 #elif defined(WCHAR_T_IS_UTF16)

869 out->swap(out16);

870 #endif

871	873

872 // Do each component of the host separately, since we enforce script matching	874 // Do each component of the host separately, since we enforce script matching

873 // on a per-component basis.	875 // on a per-component basis.

874 size_t cur_begin = 0; // Beginning of the current component (inclusive).	876 for (size_t component_start = 0, component_end;

875 while (cur_begin < input16.size()) {	877 component_start < input16.length();

876 // Find the next dot or the end of the string.	878 component_start = component_end + 1) {

877 size_t next_dot = input16.find_first_of('.', cur_begin);	879 // Find the end of the component.

878 if (next_dot == std::wstring::npos)	880 component_end = input16.find('.', component_start);

879 next_dot = input16.size(); // For getting the last component.	881 if (component_end == string16::npos)

	882 component_end = input16.length(); // For getting the last component.

	883 size_t component_length = component_end - component_start;

880	884

881 if (next_dot > cur_begin) {	885 size_t output_component_start = out16.length();

	886 bool converted_idn = false;

	887 if (component_end > component_start) {

882 // Add the substring that we just found.	888 // Add the substring that we just found.

883 IDNToUnicodeOneComponent(&input16[cur_begin],	889 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,

884 static_cast<int>(next_dot - cur_begin),	890 component_length, languages, &out16);

885 languages,	891 }

886 &out16);	892 size_t output_component_length = out16.length() - output_component_start;

	893

	894 if ((output_offset != std::wstring::npos) &&

	895 (*offset_for_adjustment > component_start)) {

	896 if ((*offset_for_adjustment < component_end) && converted_idn)

	897 output_offset = std::wstring::npos;

	898 else

	899 output_offset += output_component_length - component_length;

887 }	900 }

888	901

889 // Need to add the dot we just found (if we found one). This needs to be	902 // Need to add the dot we just found (if we found one).

890 // done before we break out below in case the URL ends in a dot.	903 if (component_end < input16.length())

891 if (next_dot < input16.size())

892 out16.push_back('.');	904 out16.push_back('.');

893 else

894 break; // No more components left.

895

896 cur_begin = next_dot + 1;

897 }	905 }

898	906

899 #if defined(WCHAR_T_IS_UTF32)	907 if (offset_for_adjustment)

900 UTF16ToWide(out16.data(), out16.length(), out);	908 *offset_for_adjustment = output_offset;

901 #elif defined(WCHAR_T_IS_UTF16)	909

902 out->swap(out16);	910 return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment);

903 #endif

904 }	911 }

905	912

906 std::string CanonicalizeHost(const std::string& host,	913 std::string CanonicalizeHost(const std::string& host,

907 url_canon::CanonHostInfo* host_info) {	914 url_canon::CanonHostInfo* host_info) {

908 // Try to canonicalize the host.	915 // Try to canonicalize the host.

909 const url_parse::Component raw_host_component(	916 const url_parse::Component raw_host_component(

910 0, static_cast<int>(host.length()));	917 0, static_cast<int>(host.length()));

911 std::string canon_host;	918 std::string canon_host;

912 url_canon::StdStringCanonOutput canon_host_output(&canon_host);	919 url_canon::StdStringCanonOutput canon_host_output(&canon_host);

913 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,	920 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,

(...skipping 341 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1255 DLOG(INFO) << "gethostname() failed with " << result;	1262 DLOG(INFO) << "gethostname() failed with " << result;

1256 buffer[0] = '\0';	1263 buffer[0] = '\0';

1257 }	1264 }

1258 return std::string(buffer);	1265 return std::string(buffer);

1259 }	1266 }

1260	1267

1261 void GetIdentityFromURL(const GURL& url,	1268 void GetIdentityFromURL(const GURL& url,

1262 std::wstring* username,	1269 std::wstring* username,

1263 std::wstring* password) {	1270 std::wstring* password) {

1264 UnescapeRule::Type flags = UnescapeRule::SPACES;	1271 UnescapeRule::Type flags = UnescapeRule::SPACES;

1265 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags);	1272 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);

1266 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags);	1273 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);

1267 }	1274 }

1268	1275

1269 void AppendFormattedHost(const GURL& url,	1276 void AppendFormattedHost(const GURL& url,

1270 const std::wstring& languages,	1277 const std::wstring& languages,

1271 std::wstring* output,	1278 std::wstring* output,

1272 url_parse::Parsed* new_parsed) {	1279 url_parse::Parsed* new_parsed,

	1280 size_t* offset_for_adjustment) {

	1281 DCHECK(output);

1273 const url_parse::Component& host =	1282 const url_parse::Component& host =

1274 url.parsed_for_possibly_invalid_spec().host;	1283 url.parsed_for_possibly_invalid_spec().host;

1275	1284

1276 if (host.is_nonempty()) {	1285 if (host.is_nonempty()) {

1277 // Handle possible IDN in the host name.	1286 // Handle possible IDN in the host name.

	1287 int new_host_begin = static_cast<int>(output->length());

1278 if (new_parsed)	1288 if (new_parsed)

1279 new_parsed->host.begin = static_cast<int>(output->length());	1289 new_parsed->host.begin = new_host_begin;

	1290 size_t offset_past_current_output =

	1291 (!offset_for_adjustment \|\|

	1292 (*offset_for_adjustment == std::wstring::npos) \|\|

	1293 (*offset_for_adjustment < output->length())) ?

	1294 std::wstring::npos : (*offset_for_adjustment - output->length());

	1295 size_t* offset_into_host =

	1296 (offset_past_current_output >= static_cast<size_t>(host.len)) ?

	1297 NULL : &offset_past_current_output;

1280	1298

1281 const std::string& spec = url.possibly_invalid_spec();	1299 const std::string& spec = url.possibly_invalid_spec();

1282 DCHECK(host.begin >= 0 &&	1300 DCHECK(host.begin >= 0 &&

1283 ((spec.length() == 0 && host.begin == 0) \|\|	1301 ((spec.length() == 0 && host.begin == 0) \|\|

1284 host.begin < static_cast<int>(spec.length())));	1302 host.begin < static_cast<int>(spec.length())));

1285 net::IDNToUnicode(&spec[host.begin], host.len, languages, output);	1303 output->append(net::IDNToUnicode(&spec[host.begin],

	1304 static_cast<size_t>(host.len), languages, offset_into_host));

1286	1305

1287 if (new_parsed) {	1306 int new_host_len = static_cast<int>(output->length()) - new_host_begin;

1288 new_parsed->host.len =	1307 if (new_parsed)

1289 static_cast<int>(output->length()) - new_parsed->host.begin;	1308 new_parsed->host.len = new_host_len;

	1309 if (offset_into_host) {

	1310 offset_for_adjustment = (offset_into_host == std::wstring::npos) ?

	1311 std::wstring::npos : (new_host_begin + *offset_into_host);

	1312 } else if (offset_past_current_output != std::wstring::npos) {

	1313 *offset_for_adjustment += new_host_len - host.len;

1290 }	1314 }

1291 } else if (new_parsed) {	1315 } else if (new_parsed) {

1292 new_parsed->host.reset();	1316 new_parsed->host.reset();

1293 }	1317 }

1294 }	1318 }

1295	1319

1296 /* static */	1320 /* static */

1297 void AppendFormattedComponent(const std::string& spec,	1321 void AppendFormattedComponent(const std::string& spec,

1298 const url_parse::Component& in_component,	1322 const url_parse::Component& in_component,

1299 UnescapeRule::Type unescape_rules,	1323 UnescapeRule::Type unescape_rules,

1300 std::wstring* output,	1324 std::wstring* output,

1301 url_parse::Component* out_component) {	1325 url_parse::Component* out_component,

	1326 size_t* offset_for_adjustment) {

	1327 DCHECK(output);

	1328 DCHECK(offset_for_adjustment);

1302 if (in_component.is_nonempty()) {	1329 if (in_component.is_nonempty()) {

1303 out_component->begin = static_cast<int>(output->length());	1330 out_component->begin = static_cast<int>(output->length());

	1331 size_t offset_past_current_output =

	1332 ((*offset_for_adjustment == std::wstring::npos) \|\|

	1333 (*offset_for_adjustment < output->length())) ?

	1334 std::wstring::npos : (*offset_for_adjustment - output->length());

	1335 size_t* offset_into_component =

	1336 (offset_past_current_output >= static_cast<size_t>(in_component.len)) ?

	1337 NULL : &offset_past_current_output;

1304 if (unescape_rules == UnescapeRule::NONE) {	1338 if (unescape_rules == UnescapeRule::NONE) {

1305 output->append(UTF8ToWide(spec.substr(	1339 output->append(UTF8ToWideAndAdjustOffset(

1306 in_component.begin, in_component.len)));	1340 spec.substr(in_component.begin, in_component.len),

	1341 offset_into_component));

1307 } else {	1342 } else {

1308 output->append(UnescapeAndDecodeUTF8URLComponent(	1343 output->append(UnescapeAndDecodeUTF8URLComponent(

1309 spec.substr(in_component.begin, in_component.len),	1344 spec.substr(in_component.begin, in_component.len), unescape_rules,

1310 unescape_rules));	1345 offset_into_component));

1311 }	1346 }

1312 out_component->len =	1347 out_component->len =

1313 static_cast<int>(output->length()) - out_component->begin;	1348 static_cast<int>(output->length()) - out_component->begin;

	1349 if (offset_into_component) {

	1350 offset_for_adjustment = (offset_into_component == std::wstring::npos) ?

	1351 std::wstring::npos : (out_component->begin + *offset_into_component);

	1352 } else if (offset_past_current_output != std::wstring::npos) {

	1353 *offset_for_adjustment += out_component->len - in_component.len;

	1354 }

1314 } else {	1355 } else {

1315 out_component->reset();	1356 out_component->reset();

1316 }	1357 }

1317 }	1358 }

1318	1359

1319 std::wstring FormatUrl(const GURL& url,	1360 std::wstring FormatUrl(const GURL& url,

1320 const std::wstring& languages,	1361 const std::wstring& languages,

1321 bool omit_username_password,	1362 bool omit_username_password,

1322 UnescapeRule::Type unescape_rules,	1363 UnescapeRule::Type unescape_rules,

1323 url_parse::Parsed* new_parsed,	1364 url_parse::Parsed* new_parsed,

1324 size_t* prefix_end) {	1365 size_t* prefix_end,

	1366 size_t* offset_for_adjustment) {

1325 url_parse::Parsed parsed_temp;	1367 url_parse::Parsed parsed_temp;

1326 if (!new_parsed)	1368 if (!new_parsed)

1327 new_parsed = &parsed_temp;	1369 new_parsed = &parsed_temp;

	1370 size_t offset_temp = std::wstring::npos;

	1371 if (!offset_for_adjustment)

	1372 offset_for_adjustment = &offset_temp;

1328	1373

1329 std::wstring url_string;	1374 std::wstring url_string;

1330	1375

1331 // Check for empty URLs or 0 available text width.	1376 // Check for empty URLs or 0 available text width.

1332 if (url.is_empty()) {	1377 if (url.is_empty()) {

1333 if (prefix_end)	1378 if (prefix_end)

1334 *prefix_end = 0;	1379 *prefix_end = 0;

	1380 *offset_for_adjustment = std::wstring::npos;

1335 return url_string;	1381 return url_string;

1336 }	1382 }

1337	1383

1338 // Special handling for view-source:. Don't use chrome::kViewSourceScheme	1384 // Special handling for view-source:. Don't use chrome::kViewSourceScheme

1339 // because this library shouldn't depend on chrome.	1385 // because this library shouldn't depend on chrome.

1340 const char* const kViewSource = "view-source";	1386 const char* const kViewSource = "view-source";

1341 const char* const kViewSourceTwice = "view-source:view-source:";	1387 const char* const kViewSourceTwice = "view-source:view-source:";

1342 // Rejects view-source:view-source:... to avoid deep recursive call.	1388 // Rejects view-source:view-source:... to avoid deep recursive call.

1343 if (url.SchemeIs(kViewSource) &&	1389 if (url.SchemeIs(kViewSource) &&

1344 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {	1390 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {

1345 return FormatViewSourceUrl(url, languages, omit_username_password,	1391 return FormatViewSourceUrl(url, languages, omit_username_password,

1346 unescape_rules, new_parsed, prefix_end);	1392 unescape_rules, new_parsed, prefix_end, offset_for_adjustment);

1347 }	1393 }

1348	1394

1349 // We handle both valid and invalid URLs (this will give us the spec	1395 // We handle both valid and invalid URLs (this will give us the spec

1350 // regardless of validity).	1396 // regardless of validity).

1351 const std::string& spec = url.possibly_invalid_spec();	1397 const std::string& spec = url.possibly_invalid_spec();

1352 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();	1398 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

	1399 if (*offset_for_adjustment >= spec.length())

	1400 *offset_for_adjustment = std::wstring::npos;

1353	1401

1354 // Copy everything before the username (the scheme and the separators.)	1402 // Copy everything before the username (the scheme and the separators.)

1355 // These are ASCII.	1403 // These are ASCII.

1356 int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true);	1404 std::copy(spec.begin(),

1357 for (int i = 0; i < pre_end; ++i)	1405 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,

1358 url_string.push_back(spec[i]);	1406 true),

	1407 std::back_inserter(url_string));

1359 new_parsed->scheme = parsed.scheme;	1408 new_parsed->scheme = parsed.scheme;

1360	1409

1361 if (omit_username_password) {	1410 if (omit_username_password) {

1362 // Remove the username and password fields. We don't want to display those	1411 // Remove the username and password fields. We don't want to display those

1363 // to the user since they can be used for attacks,	1412 // to the user since they can be used for attacks,

1364 // e.g. "http://google.com:search@evil.ru/"	1413 // e.g. "http://google.com:search@evil.ru/"

1365 new_parsed->username.reset();	1414 new_parsed->username.reset();

1366 new_parsed->password.reset();	1415 new_parsed->password.reset();

	1416 if ((*offset_for_adjustment != std::wstring::npos) &&

	1417 (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty())) {

	1418 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {

	1419 // The seeming off-by-one and off-by-two in these first two lines are to

	1420 // account for the ':' after the username and '@' after the password.

	1421 if (*offset_for_adjustment >

	1422 static_cast<size_t>(parsed.password.end())) {

	1423 *offset_for_adjustment -=

	1424 (parsed.username.len + parsed.password.len + 2);

	1425 } else if (*offset_for_adjustment >

	1426 static_cast<size_t>(parsed.username.begin)) {

	1427 *offset_for_adjustment = std::wstring::npos;

	1428 }

	1429 } else {

	1430 const url_parse::Component* nonempty_component =

	1431 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;

	1432 // The seeming off-by-one in these first two lines is to account for the

	1433 // '@' after the username/password.

	1434 if (*offset_for_adjustment >

	1435 static_cast<size_t>(nonempty_component->end())) {

	1436 *offset_for_adjustment -= (nonempty_component->len + 1);

	1437 } else if (*offset_for_adjustment >

	1438 static_cast<size_t>(nonempty_component->begin)) {

	1439 *offset_for_adjustment = std::wstring::npos;

	1440 }

	1441 }

	1442 }

1367 } else {	1443 } else {

1368 AppendFormattedComponent(	1444 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,

1369 spec, parsed.username, unescape_rules,	1445 &new_parsed->username, offset_for_adjustment);

1370 &url_string, &new_parsed->username);

1371 if (parsed.password.is_valid()) {	1446 if (parsed.password.is_valid()) {

1372 url_string.push_back(':');	1447 url_string.push_back(':');

1373 }	1448 }

1374 AppendFormattedComponent(	1449 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,

1375 spec, parsed.password, unescape_rules,	1450 &new_parsed->password, offset_for_adjustment);

1376 &url_string, &new_parsed->password);

1377 if (parsed.username.is_valid() \|\| parsed.password.is_valid()) {	1451 if (parsed.username.is_valid() \|\| parsed.password.is_valid()) {

1378 url_string.push_back('@');	1452 url_string.push_back('@');

1379 }	1453 }

1380 }	1454 }

1381 if (prefix_end)	1455 if (prefix_end)

1382 *prefix_end = static_cast<size_t>(url_string.length());	1456 *prefix_end = static_cast<size_t>(url_string.length());

1383	1457

1384 AppendFormattedHost(url, languages, &url_string, new_parsed);	1458 AppendFormattedHost(url, languages, &url_string, new_parsed,

	1459 offset_for_adjustment);

1385	1460

1386 // Port.	1461 // Port.

1387 if (parsed.port.is_nonempty()) {	1462 if (parsed.port.is_nonempty()) {

1388 url_string.push_back(':');	1463 url_string.push_back(':');

1389 int begin = url_string.length();	1464 new_parsed->port.begin = url_string.length();

1390 for (int i = parsed.port.begin; i < parsed.port.end(); ++i)	1465 std::copy(spec.begin() + parsed.port.begin,

1391 url_string.push_back(spec[i]);	1466 spec.begin() + parsed.port.end(), std::back_inserter(url_string));

1392 new_parsed->port.begin = begin;	1467 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

1393 new_parsed->port.len = url_string.length() - begin;

1394 } else {	1468 } else {

1395 new_parsed->port.reset();	1469 new_parsed->port.reset();

1396 }	1470 }

1397	1471

1398 // Path and query both get the same general unescape & convert treatment.	1472 // Path and query both get the same general unescape & convert treatment.

1399 AppendFormattedComponent(	1473 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,

1400 spec, parsed.path, unescape_rules, &url_string,	1474 &new_parsed->path, offset_for_adjustment);

1401 &new_parsed->path);

1402 if (parsed.query.is_valid())	1475 if (parsed.query.is_valid())

1403 url_string.push_back('?');	1476 url_string.push_back('?');

1404 AppendFormattedComponent(	1477 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,

1405 spec, parsed.query, unescape_rules, &url_string,	1478 &new_parsed->query, offset_for_adjustment);

1406 &new_parsed->query);

1407	1479

1408 // Reference is stored in valid, unescaped UTF-8, so we can just convert.	1480 // Reference is stored in valid, unescaped UTF-8, so we can just convert.

1409 if (parsed.ref.is_valid()) {	1481 if (parsed.ref.is_valid()) {

1410 url_string.push_back('#');	1482 url_string.push_back('#');

1411 int begin = url_string.length();	1483 new_parsed->ref.begin = url_string.length();

1412 if (parsed.ref.len > 0)	1484 size_t offset_past_current_output =

1413 url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin],	1485 ((*offset_for_adjustment == std::wstring::npos) \|\|

1414 parsed.ref.len)));	1486 (*offset_for_adjustment < url_string.length())) ?

1415 new_parsed->ref.begin = begin;	1487 std::wstring::npos : (*offset_for_adjustment - url_string.length());

1416 new_parsed->ref.len = url_string.length() - begin;	1488 size_t* offset_into_ref =

	1489 (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ?

	1490 NULL : &offset_past_current_output;

	1491 if (parsed.ref.len > 0) {

	1492 url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin,

	1493 parsed.ref.len),

	1494 offset_into_ref));

	1495 }

	1496 new_parsed->ref.len = url_string.length() - new_parsed->ref.begin;

	1497 if (offset_into_ref) {

	1498 offset_for_adjustment = (offset_into_ref == std::wstring::npos) ?

	1499 std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref);

	1500 } else if (offset_past_current_output != std::wstring::npos) {

	1501 // We clamped the offset near the beginning of this function to ensure it

	1502 // was within the input URL. If we reach here, the input was something

	1503 // invalid and non-parseable such that the offset was past any component

	1504 // we could figure out. In this case it won't be represented in the

	1505 // output string, so reset it.

	1506 *offset_for_adjustment = std::wstring::npos;

	1507 }

1417 }	1508 }

1418	1509

1419 return url_string;	1510 return url_string;

1420 }	1511 }

1421	1512

1422 GURL SimplifyUrlForRequest(const GURL& url) {	1513 GURL SimplifyUrlForRequest(const GURL& url) {

1423 DCHECK(url.is_valid());	1514 DCHECK(url.is_valid());

1424 GURL::Replacements replacements;	1515 GURL::Replacements replacements;

1425 replacements.ClearUsername();	1516 replacements.ClearUsername();

1426 replacements.ClearPassword();	1517 replacements.ClearPassword();

(...skipping 24 matching lines...) Expand all Loading...
1451 if (length > 0)	1542 if (length > 0)

1452 ports.insert(StringToInt(WideToASCII(	1543 ports.insert(StringToInt(WideToASCII(

1453 allowed_ports.substr(last, length))));	1544 allowed_ports.substr(last, length))));

1454 last = i + 1;	1545 last = i + 1;

1455 }	1546 }

1456 }	1547 }

1457 explicitly_allowed_ports = ports;	1548 explicitly_allowed_ports = ports;

1458 }	1549 }

1459	1550

1460 } // namespace net	1551 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »