Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Side by Side Diff: net/base/net_util.cc

Issue 115346: Convert Punycode domains to Unicode in URLs shown in the followings:... (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: '' Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <algorithm> 5 #include <algorithm>
6 #include <unicode/ucnv.h> 6 #include <unicode/ucnv.h>
7 #include <unicode/uidna.h> 7 #include <unicode/uidna.h>
8 #include <unicode/ulocdata.h> 8 #include <unicode/ulocdata.h>
9 #include <unicode/uniset.h> 9 #include <unicode/uniset.h>
10 #include <unicode/uscript.h> 10 #include <unicode/uscript.h>
(...skipping 637 matching lines...) Expand 10 before | Expand all | Expand 10 after
648 // with the literal input. 648 // with the literal input.
649 out->resize(host_begin_in_output + comp_len); 649 out->resize(host_begin_in_output + comp_len);
650 for (int i = 0; i < comp_len; i++) 650 for (int i = 0; i < comp_len; i++)
651 (*out)[host_begin_in_output + i] = comp[i]; 651 (*out)[host_begin_in_output + i] = comp[i];
652 } 652 }
653 653
654 } // namespace 654 } // namespace
655 655
656 namespace net { 656 namespace net {
657 657
658 // Appends the substring |in_component| inside of the URL |spec| to |output|,
659 // and the resulting range will be filled into |out_component|. Calls the
660 // unescaper for the substring if |unescape| is true.
661 static void AppendFormattedComponent(const std::string& spec,
662 const url_parse::Component& in_component,
663 bool unescape,
664 std::wstring* output,
665 url_parse::Component* out_component);
666
658 GURL FilePathToFileURL(const FilePath& path) { 667 GURL FilePathToFileURL(const FilePath& path) {
659 // Produce a URL like "file:///C:/foo" for a regular file, or 668 // Produce a URL like "file:///C:/foo" for a regular file, or
660 // "file://///server/path" for UNC. The URL canonicalizer will fix up the 669 // "file://///server/path" for UNC. The URL canonicalizer will fix up the
661 // latter case to be the canonical UNC form: "file://server/path" 670 // latter case to be the canonical UNC form: "file://server/path"
662 FilePath::StringType url_string(kFileURLPrefix); 671 FilePath::StringType url_string(kFileURLPrefix);
663 url_string.append(path.value()); 672 url_string.append(path.value());
664 673
665 // Now do replacement of some characters. Since we assume the input is a 674 // Now do replacement of some characters. Since we assume the input is a
666 // literal filename, anything the URL parser might consider special should 675 // literal filename, anything the URL parser might consider special should
667 // be escaped here. 676 // be escaped here.
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after
1024 // Host names are limited to 255 bytes. 1033 // Host names are limited to 255 bytes.
1025 char buffer[256]; 1034 char buffer[256];
1026 int result = gethostname(buffer, sizeof(buffer)); 1035 int result = gethostname(buffer, sizeof(buffer));
1027 if (result != 0) { 1036 if (result != 0) {
1028 DLOG(INFO) << "gethostname() failed with " << result; 1037 DLOG(INFO) << "gethostname() failed with " << result;
1029 buffer[0] = '\0'; 1038 buffer[0] = '\0';
1030 } 1039 }
1031 return std::string(buffer); 1040 return std::string(buffer);
1032 } 1041 }
1033 1042
1043 void AppendFormattedHost(const GURL& url,
1044 const std::wstring& languages,
1045 std::wstring* output,
1046 url_parse::Parsed* new_parsed) {
1047 const url_parse::Component& host =
1048 url.parsed_for_possibly_invalid_spec().host;
1049
1050 if (host.is_nonempty()) {
1051 // Handle possible IDN in the host name.
1052 if (new_parsed)
1053 new_parsed->host.begin = static_cast<int>(output->length());
1054
1055 const std::string& spec = url.possibly_invalid_spec();
1056 DCHECK(host.begin >= 0 &&
1057 ((spec.length() == 0 && host.begin == 0) ||
1058 host.begin < static_cast<int>(spec.length())));
1059 net::IDNToUnicode(&spec[host.begin], host.len, languages, output);
1060
1061 if (new_parsed) {
1062 new_parsed->host.len =
1063 static_cast<int>(output->length()) - new_parsed->host.begin;
1064 }
1065 } else if (new_parsed) {
1066 new_parsed->host.reset();
1067 }
1068 }
1069
1070 /* static */
1071 void AppendFormattedComponent(const std::string& spec,
1072 const url_parse::Component& in_component,
1073 bool unescape,
1074 std::wstring* output,
1075 url_parse::Component* out_component) {
1076 if (in_component.is_nonempty()) {
1077 out_component->begin = static_cast<int>(output->length());
1078 if (unescape) {
1079 output->append(UnescapeAndDecodeUTF8URLComponent(
1080 spec.substr(in_component.begin, in_component.len),
1081 UnescapeRule::NORMAL));
1082 } else {
1083 output->append(UTF8ToWide(spec.substr(
1084 in_component.begin, in_component.len)));
1085 }
1086 out_component->len =
1087 static_cast<int>(output->length()) - out_component->begin;
1088 } else {
1089 out_component->reset();
1090 }
1091 }
1092
1093 std::wstring FormatUrl(
1094 const GURL& url, const std::wstring& languages, bool omit_username_password,
1095 bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) {
1096 url_parse::Parsed parsed_temp;
1097 if (!new_parsed)
1098 new_parsed = &parsed_temp;
1099
1100 std::wstring url_string;
1101
1102 // Check for empty URLs or 0 available text width.
1103 if (url.is_empty()) {
1104 if (prefix_end)
1105 *prefix_end = 0;
1106 return url_string;
1107 }
1108
1109 // We handle both valid and invalid URLs (this will give us the spec
1110 // regardless of validity).
1111 const std::string& spec = url.possibly_invalid_spec();
1112 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
1113
1114 // Copy everything before the username (the scheme and the separators.)
1115 // These are ASCII.
1116 int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true);
1117 for (int i = 0; i < pre_end; ++i)
1118 url_string.push_back(spec[i]);
1119 new_parsed->scheme = parsed.scheme;
1120
1121 if (omit_username_password) {
1122 // Remove the username and password fields. We don't want to display those
1123 // to the user since they can be used for attacks,
1124 // e.g. "http://google.com:search@evil.ru/"
1125 new_parsed->username.reset();
1126 new_parsed->password.reset();
1127 } else {
1128 AppendFormattedComponent(
1129 spec, parsed.username, unescape, &url_string, &new_parsed->username);
1130 if (parsed.password.is_valid()) {
1131 url_string.push_back(':');
1132 }
1133 AppendFormattedComponent(
1134 spec, parsed.password, unescape, &url_string, &new_parsed->password);
1135 if (parsed.username.is_valid() || parsed.password.is_valid()) {
1136 url_string.push_back('@');
1137 }
1138 }
1139 if (prefix_end)
1140 *prefix_end = static_cast<size_t>(url_string.length());
1141
1142 AppendFormattedHost(url, languages, &url_string, new_parsed);
1143
1144 // Port.
1145 if (parsed.port.is_nonempty()) {
1146 url_string.push_back(':');
1147 int begin = url_string.length();
1148 for (int i = parsed.port.begin; i < parsed.port.end(); ++i)
1149 url_string.push_back(spec[i]);
1150 new_parsed->port.begin = begin;
1151 new_parsed->port.len = url_string.length() - begin;
1152 } else {
1153 new_parsed->port.reset();
1154 }
1155
1156 // Path and query both get the same general unescape & convert treatment.
1157 AppendFormattedComponent(
1158 spec, parsed.path, unescape, &url_string, &new_parsed->path);
1159 if (parsed.query.is_valid())
1160 url_string.push_back('?');
1161 AppendFormattedComponent(
1162 spec, parsed.query, unescape, &url_string, &new_parsed->query);
1163
1164 // Reference is stored in valid, unescaped UTF-8, so we can just convert.
1165 if (parsed.ref.is_valid()) {
1166 url_string.push_back('#');
1167 int begin = url_string.length();
1168 if (parsed.ref.len > 0)
1169 url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin],
1170 parsed.ref.len)));
1171 new_parsed->ref.begin = begin;
1172 new_parsed->ref.len = url_string.length() - begin;
1173 }
1174
1175 return url_string;
1176 }
1177
1034 } // namespace net 1178 } // namespace net
OLDNEW
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698