net/base/net_util.cc - Issue 115346: Convert Punycode domains to Unicode in URLs shown in the followings:...

Side by Side Diff: net/base/net_util.cc

Issue 115346: Convert Punycode domains to Unicode in URLs shown in the followings:... (Closed) Base URL: http://src.chromium.org/svn/trunk/src/

Patch Set: '' Created 11 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <algorithm>	5 #include <algorithm>

6 #include <unicode/ucnv.h>	6 #include <unicode/ucnv.h>

7 #include <unicode/uidna.h>	7 #include <unicode/uidna.h>

8 #include <unicode/ulocdata.h>	8 #include <unicode/ulocdata.h>

9 #include <unicode/uniset.h>	9 #include <unicode/uniset.h>

10 #include <unicode/uscript.h>	10 #include <unicode/uscript.h>

(...skipping 637 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
648 // with the literal input.	648 // with the literal input.

649 out->resize(host_begin_in_output + comp_len);	649 out->resize(host_begin_in_output + comp_len);

650 for (int i = 0; i < comp_len; i++)	650 for (int i = 0; i < comp_len; i++)

651 (*out)[host_begin_in_output + i] = comp[i];	651 (*out)[host_begin_in_output + i] = comp[i];

652 }	652 }

653	653

654 } // namespace	654 } // namespace

655	655

656 namespace net {	656 namespace net {

657	657

	658 // Appends the substring \|in_component\| inside of the URL \|spec\| to \|output\|,

	659 // and the resulting range will be filled into \|out_component\|. Calls the

	660 // unescaper for the substring if \|unescape\| is true.

	661 static void AppendFormattedComponent(const std::string& spec,

	662 const url_parse::Component& in_component,

	663 bool unescape,

	664 std::wstring* output,

	665 url_parse::Component* out_component);

	666

658 GURL FilePathToFileURL(const FilePath& path) {	667 GURL FilePathToFileURL(const FilePath& path) {

659 // Produce a URL like "file:///C:/foo" for a regular file, or	668 // Produce a URL like "file:///C:/foo" for a regular file, or

660 // "file://///server/path" for UNC. The URL canonicalizer will fix up the	669 // "file://///server/path" for UNC. The URL canonicalizer will fix up the

661 // latter case to be the canonical UNC form: "file://server/path"	670 // latter case to be the canonical UNC form: "file://server/path"

662 FilePath::StringType url_string(kFileURLPrefix);	671 FilePath::StringType url_string(kFileURLPrefix);

663 url_string.append(path.value());	672 url_string.append(path.value());

664	673

665 // Now do replacement of some characters. Since we assume the input is a	674 // Now do replacement of some characters. Since we assume the input is a

666 // literal filename, anything the URL parser might consider special should	675 // literal filename, anything the URL parser might consider special should

667 // be escaped here.	676 // be escaped here.

(...skipping 356 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1024 // Host names are limited to 255 bytes.	1033 // Host names are limited to 255 bytes.

1025 char buffer[256];	1034 char buffer[256];

1026 int result = gethostname(buffer, sizeof(buffer));	1035 int result = gethostname(buffer, sizeof(buffer));

1027 if (result != 0) {	1036 if (result != 0) {

1028 DLOG(INFO) << "gethostname() failed with " << result;	1037 DLOG(INFO) << "gethostname() failed with " << result;

1029 buffer[0] = '\0';	1038 buffer[0] = '\0';

1030 }	1039 }

1031 return std::string(buffer);	1040 return std::string(buffer);

1032 }	1041 }

1033	1042

	1043 void AppendFormattedHost(const GURL& url,

	1044 const std::wstring& languages,

	1045 std::wstring* output,

	1046 url_parse::Parsed* new_parsed) {

	1047 const url_parse::Component& host =

	1048 url.parsed_for_possibly_invalid_spec().host;

	1049

	1050 if (host.is_nonempty()) {

	1051 // Handle possible IDN in the host name.

	1052 if (new_parsed)

	1053 new_parsed->host.begin = static_cast<int>(output->length());

	1054

	1055 const std::string& spec = url.possibly_invalid_spec();

	1056 DCHECK(host.begin >= 0 &&

	1057 ((spec.length() == 0 && host.begin == 0) \|\|

	1058 host.begin < static_cast<int>(spec.length())));

	1059 net::IDNToUnicode(&spec[host.begin], host.len, languages, output);

	1060

	1061 if (new_parsed) {

	1062 new_parsed->host.len =

	1063 static_cast<int>(output->length()) - new_parsed->host.begin;

	1064 }

	1065 } else if (new_parsed) {

	1066 new_parsed->host.reset();

	1067 }

	1068 }

	1069

	1070 /* static */

	1071 void AppendFormattedComponent(const std::string& spec,

	1072 const url_parse::Component& in_component,

	1073 bool unescape,

	1074 std::wstring* output,

	1075 url_parse::Component* out_component) {

	1076 if (in_component.is_nonempty()) {

	1077 out_component->begin = static_cast<int>(output->length());

	1078 if (unescape) {

	1079 output->append(UnescapeAndDecodeUTF8URLComponent(

	1080 spec.substr(in_component.begin, in_component.len),

	1081 UnescapeRule::NORMAL));

	1082 } else {

	1083 output->append(UTF8ToWide(spec.substr(

	1084 in_component.begin, in_component.len)));

	1085 }

	1086 out_component->len =

	1087 static_cast<int>(output->length()) - out_component->begin;

	1088 } else {

	1089 out_component->reset();

	1090 }

	1091 }

	1092

	1093 std::wstring FormatUrl(

	1094 const GURL& url, const std::wstring& languages, bool omit_username_password,

	1095 bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) {

	1096 url_parse::Parsed parsed_temp;

	1097 if (!new_parsed)

	1098 new_parsed = &parsed_temp;

	1099

	1100 std::wstring url_string;

	1101

	1102 // Check for empty URLs or 0 available text width.

	1103 if (url.is_empty()) {

	1104 if (prefix_end)

	1105 *prefix_end = 0;

	1106 return url_string;

	1107 }

	1108

	1109 // We handle both valid and invalid URLs (this will give us the spec

	1110 // regardless of validity).

	1111 const std::string& spec = url.possibly_invalid_spec();

	1112 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

	1113

	1114 // Copy everything before the username (the scheme and the separators.)

	1115 // These are ASCII.

	1116 int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true);

	1117 for (int i = 0; i < pre_end; ++i)

	1118 url_string.push_back(spec[i]);

	1119 new_parsed->scheme = parsed.scheme;

	1120

	1121 if (omit_username_password) {

	1122 // Remove the username and password fields. We don't want to display those

	1123 // to the user since they can be used for attacks,

	1124 // e.g. "http://google.com:search@evil.ru/"

	1125 new_parsed->username.reset();

	1126 new_parsed->password.reset();

	1127 } else {

	1128 AppendFormattedComponent(

	1129 spec, parsed.username, unescape, &url_string, &new_parsed->username);

	1130 if (parsed.password.is_valid()) {

	1131 url_string.push_back(':');

	1132 }

	1133 AppendFormattedComponent(

	1134 spec, parsed.password, unescape, &url_string, &new_parsed->password);

	1135 if (parsed.username.is_valid() \|\| parsed.password.is_valid()) {

	1136 url_string.push_back('@');

	1137 }

	1138 }

	1139 if (prefix_end)

	1140 *prefix_end = static_cast<size_t>(url_string.length());

	1141

	1142 AppendFormattedHost(url, languages, &url_string, new_parsed);

	1143

	1144 // Port.

	1145 if (parsed.port.is_nonempty()) {

	1146 url_string.push_back(':');

	1147 int begin = url_string.length();

	1148 for (int i = parsed.port.begin; i < parsed.port.end(); ++i)

	1149 url_string.push_back(spec[i]);

	1150 new_parsed->port.begin = begin;

	1151 new_parsed->port.len = url_string.length() - begin;

	1152 } else {

	1153 new_parsed->port.reset();

	1154 }

	1155

	1156 // Path and query both get the same general unescape & convert treatment.

	1157 AppendFormattedComponent(

	1158 spec, parsed.path, unescape, &url_string, &new_parsed->path);

	1159 if (parsed.query.is_valid())

	1160 url_string.push_back('?');

	1161 AppendFormattedComponent(

	1162 spec, parsed.query, unescape, &url_string, &new_parsed->query);

	1163

	1164 // Reference is stored in valid, unescaped UTF-8, so we can just convert.

	1165 if (parsed.ref.is_valid()) {

	1166 url_string.push_back('#');

	1167 int begin = url_string.length();

	1168 if (parsed.ref.len > 0)

	1169 url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin],

	1170 parsed.ref.len)));

	1171 new_parsed->ref.begin = begin;

	1172 new_parsed->ref.len = url_string.length() - begin;

	1173 }

	1174

	1175 return url_string;

	1176 }

	1177

1034 } // namespace net	1178 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »