OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <map> | 9 #include <map> |
10 | 10 |
11 #include "build/build_config.h" | 11 #include "build/build_config.h" |
12 | 12 |
13 #if defined(OS_WIN) | 13 #if defined(OS_WIN) |
14 #include <windows.h> | 14 #include <windows.h> |
15 #include <winsock2.h> | 15 #include <winsock2.h> |
16 #include <iphlpapi.h> | 16 #include <iphlpapi.h> |
17 #pragma comment(lib, "iphlpapi.lib") | 17 #pragma comment(lib, "iphlpapi.lib") |
18 #elif defined(OS_POSIX) | 18 #elif defined(OS_POSIX) |
19 #include <fcntl.h> | 19 #include <fcntl.h> |
20 #if !defined(OS_ANDROID) | 20 #if !defined(OS_ANDROID) |
21 #include <ifaddrs.h> | 21 #include <ifaddrs.h> |
22 #endif | 22 #endif |
23 #include <netdb.h> | 23 #include <netdb.h> |
24 #include <net/if.h> | 24 #include <net/if.h> |
25 #include <netinet/in.h> | 25 #include <netinet/in.h> |
26 #endif | 26 #endif |
27 | 27 |
28 #include "base/base64.h" | |
29 #include "base/basictypes.h" | 28 #include "base/basictypes.h" |
30 #include "base/file_path.h" | 29 #include "base/file_path.h" |
31 #include "base/file_util.h" | 30 #include "base/file_util.h" |
32 #include "base/i18n/file_util_icu.h" | 31 #include "base/i18n/file_util_icu.h" |
33 #include "base/i18n/icu_string_conversions.h" | 32 #include "base/i18n/icu_string_conversions.h" |
34 #include "base/i18n/time_formatting.h" | 33 #include "base/i18n/time_formatting.h" |
35 #include "base/json/string_escape.h" | 34 #include "base/json/string_escape.h" |
36 #include "base/lazy_instance.h" | 35 #include "base/lazy_instance.h" |
37 #include "base/logging.h" | 36 #include "base/logging.h" |
38 #include "base/memory/singleton.h" | 37 #include "base/memory/singleton.h" |
(...skipping 25 matching lines...) Expand all Loading... |
64 #include "net/base/dns_util.h" | 63 #include "net/base/dns_util.h" |
65 #include "net/base/escape.h" | 64 #include "net/base/escape.h" |
66 #include "net/base/mime_util.h" | 65 #include "net/base/mime_util.h" |
67 #include "net/base/net_module.h" | 66 #include "net/base/net_module.h" |
68 #if defined(OS_WIN) | 67 #if defined(OS_WIN) |
69 #include "net/base/winsock_init.h" | 68 #include "net/base/winsock_init.h" |
70 #endif | 69 #endif |
71 #include "net/http/http_content_disposition.h" | 70 #include "net/http/http_content_disposition.h" |
72 #include "unicode/datefmt.h" | 71 #include "unicode/datefmt.h" |
73 #include "unicode/regex.h" | 72 #include "unicode/regex.h" |
74 #include "unicode/ucnv.h" | |
75 #include "unicode/uidna.h" | 73 #include "unicode/uidna.h" |
76 #include "unicode/ulocdata.h" | 74 #include "unicode/ulocdata.h" |
77 #include "unicode/uniset.h" | 75 #include "unicode/uniset.h" |
78 #include "unicode/uscript.h" | 76 #include "unicode/uscript.h" |
79 #include "unicode/uset.h" | 77 #include "unicode/uset.h" |
80 | 78 |
81 using base::Time; | 79 using base::Time; |
82 | 80 |
83 namespace net { | 81 namespace net { |
84 | 82 |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
168 #if defined(OS_WIN) | 166 #if defined(OS_WIN) |
169 std::string::size_type CountTrailingChars( | 167 std::string::size_type CountTrailingChars( |
170 const std::string& input, | 168 const std::string& input, |
171 const std::string::value_type trailing_chars[]) { | 169 const std::string::value_type trailing_chars[]) { |
172 const size_t last_good_char = input.find_last_not_of(trailing_chars); | 170 const size_t last_good_char = input.find_last_not_of(trailing_chars); |
173 return (last_good_char == std::string::npos) ? | 171 return (last_good_char == std::string::npos) ? |
174 input.length() : (input.length() - last_good_char - 1); | 172 input.length() : (input.length() - last_good_char - 1); |
175 } | 173 } |
176 #endif | 174 #endif |
177 | 175 |
178 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence | |
179 // of bytes. If input is invalid, return false. | |
180 bool QPDecode(const std::string& input, std::string* output) { | |
181 std::string temp; | |
182 temp.reserve(input.size()); | |
183 for (std::string::const_iterator it = input.begin(); it != input.end(); | |
184 ++it) { | |
185 if (*it == '_') { | |
186 temp.push_back(' '); | |
187 } else if (*it == '=') { | |
188 if ((input.end() - it < 3) || | |
189 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) || | |
190 !IsHexDigit(static_cast<unsigned char>(*(it + 2)))) | |
191 return false; | |
192 unsigned char ch = HexDigitToInt(*(it + 1)) * 16 + | |
193 HexDigitToInt(*(it + 2)); | |
194 temp.push_back(static_cast<char>(ch)); | |
195 ++it; | |
196 ++it; | |
197 } else if (0x20 < *it && *it < 0x7F) { | |
198 // In a Q-encoded word, only printable ASCII characters | |
199 // represent themselves. Besides, space, '=', '_' and '?' are | |
200 // not allowed, but they're already filtered out. | |
201 DCHECK_NE('=', *it); | |
202 DCHECK_NE('?', *it); | |
203 DCHECK_NE('_', *it); | |
204 temp.push_back(*it); | |
205 } else { | |
206 return false; | |
207 } | |
208 } | |
209 output->swap(temp); | |
210 return true; | |
211 } | |
212 | |
213 enum RFC2047EncodingType {Q_ENCODING, B_ENCODING}; | |
214 bool DecodeBQEncoding(const std::string& part, | |
215 RFC2047EncodingType enc_type, | |
216 const std::string& charset, | |
217 std::string* output) { | |
218 std::string decoded; | |
219 if (!((enc_type == B_ENCODING) ? | |
220 base::Base64Decode(part, &decoded) : QPDecode(part, &decoded))) | |
221 return false; | |
222 | |
223 if (decoded.empty()) { | |
224 output->clear(); | |
225 return true; | |
226 } | |
227 | |
228 UErrorCode err = U_ZERO_ERROR; | |
229 UConverter* converter(ucnv_open(charset.c_str(), &err)); | |
230 if (U_FAILURE(err)) | |
231 return false; | |
232 | |
233 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. | |
234 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes | |
235 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a | |
236 // trailing '\0'. | |
237 size_t output_length = decoded.length() * 3 + 1; | |
238 char* buf = WriteInto(output, output_length); | |
239 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length, | |
240 decoded.data(), decoded.length(), &err); | |
241 ucnv_close(converter); | |
242 if (U_FAILURE(err)) | |
243 return false; | |
244 output->resize(output_length); | |
245 return true; | |
246 } | |
247 | |
248 bool DecodeWord(const std::string& encoded_word, | |
249 const std::string& referrer_charset, | |
250 bool* is_rfc2047, | |
251 std::string* output) { | |
252 *is_rfc2047 = false; | |
253 output->clear(); | |
254 if (encoded_word.empty()) | |
255 return true; | |
256 | |
257 if (!IsStringASCII(encoded_word)) { | |
258 // Try UTF-8, referrer_charset and the native OS default charset in turn. | |
259 if (IsStringUTF8(encoded_word)) { | |
260 *output = encoded_word; | |
261 } else { | |
262 string16 utf16_output; | |
263 if (!referrer_charset.empty() && | |
264 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | |
265 base::OnStringConversionError::FAIL, | |
266 &utf16_output)) { | |
267 *output = UTF16ToUTF8(utf16_output); | |
268 } else { | |
269 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | |
270 } | |
271 } | |
272 | |
273 return true; | |
274 } | |
275 | |
276 // RFC 2047 : one of encoding methods supported by Firefox and relatively | |
277 // widely used by web servers. | |
278 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. | |
279 // We don't care about the length restriction (72 bytes) because | |
280 // many web servers generate encoded words longer than the limit. | |
281 std::string tmp; | |
282 *is_rfc2047 = true; | |
283 int part_index = 0; | |
284 std::string charset; | |
285 StringTokenizer t(encoded_word, "?"); | |
286 RFC2047EncodingType enc_type = Q_ENCODING; | |
287 while (*is_rfc2047 && t.GetNext()) { | |
288 std::string part = t.token(); | |
289 switch (part_index) { | |
290 case 0: | |
291 if (part != "=") { | |
292 *is_rfc2047 = false; | |
293 break; | |
294 } | |
295 ++part_index; | |
296 break; | |
297 case 1: | |
298 // Do we need charset validity check here? | |
299 charset = part; | |
300 ++part_index; | |
301 break; | |
302 case 2: | |
303 if (part.size() > 1 || | |
304 part.find_first_of("bBqQ") == std::string::npos) { | |
305 *is_rfc2047 = false; | |
306 break; | |
307 } | |
308 if (part[0] == 'b' || part[0] == 'B') { | |
309 enc_type = B_ENCODING; | |
310 } | |
311 ++part_index; | |
312 break; | |
313 case 3: | |
314 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); | |
315 if (!*is_rfc2047) { | |
316 // Last minute failure. Invalid B/Q encoding. Rather than | |
317 // passing it through, return now. | |
318 return false; | |
319 } | |
320 ++part_index; | |
321 break; | |
322 case 4: | |
323 if (part != "=") { | |
324 // Another last minute failure ! | |
325 // Likely to be a case of two encoded-words in a row or | |
326 // an encoded word followed by a non-encoded word. We can be | |
327 // generous, but it does not help much in terms of compatibility, | |
328 // I believe. Return immediately. | |
329 *is_rfc2047 = false; | |
330 return false; | |
331 } | |
332 ++part_index; | |
333 break; | |
334 default: | |
335 *is_rfc2047 = false; | |
336 return false; | |
337 } | |
338 } | |
339 | |
340 if (*is_rfc2047) { | |
341 if (*(encoded_word.end() - 1) == '=') { | |
342 output->swap(tmp); | |
343 return true; | |
344 } | |
345 // encoded_word ending prematurelly with '?' or extra '?' | |
346 *is_rfc2047 = false; | |
347 return false; | |
348 } | |
349 | |
350 // We're not handling 'especial' characters quoted with '\', but | |
351 // it should be Ok because we're not an email client but a | |
352 // web browser. | |
353 | |
354 // What IE6/7 does: %-escaped UTF-8. | |
355 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); | |
356 if (IsStringUTF8(tmp)) { | |
357 output->swap(tmp); | |
358 return true; | |
359 // We can try either the OS default charset or 'origin charset' here, | |
360 // As far as I can tell, IE does not support it. However, I've seen | |
361 // web servers emit %-escaped string in a legacy encoding (usually | |
362 // origin charset). | |
363 // TODO(jungshik) : Test IE further and consider adding a fallback here. | |
364 } | |
365 return false; | |
366 } | |
367 | |
368 // Does some simple normalization of scripts so we can allow certain scripts | 176 // Does some simple normalization of scripts so we can allow certain scripts |
369 // to exist together. | 177 // to exist together. |
370 // TODO(brettw) bug 880223: we should allow some other languages to be | 178 // TODO(brettw) bug 880223: we should allow some other languages to be |
371 // oombined such as Chinese and Latin. We will probably need a more | 179 // oombined such as Chinese and Latin. We will probably need a more |
372 // complicated system of language pairs to have more fine-grained control. | 180 // complicated system of language pairs to have more fine-grained control. |
373 UScriptCode NormalizeScript(UScriptCode code) { | 181 UScriptCode NormalizeScript(UScriptCode code) { |
374 switch (code) { | 182 switch (code) { |
375 case USCRIPT_KATAKANA: | 183 case USCRIPT_KATAKANA: |
376 case USCRIPT_HIRAGANA: | 184 case USCRIPT_HIRAGANA: |
377 case USCRIPT_KATAKANA_OR_HIRAGANA: | 185 case USCRIPT_KATAKANA_OR_HIRAGANA: |
(...skipping 554 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
932 // don't attempt to divine a file name out of them. | 740 // don't attempt to divine a file name out of them. |
933 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) | 741 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) |
934 return std::string(); | 742 return std::string(); |
935 | 743 |
936 const std::string unescaped_url_filename = UnescapeURLComponent( | 744 const std::string unescaped_url_filename = UnescapeURLComponent( |
937 url.ExtractFileName(), | 745 url.ExtractFileName(), |
938 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); | 746 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
939 | 747 |
940 // The URL's path should be escaped UTF-8, but may not be. | 748 // The URL's path should be escaped UTF-8, but may not be. |
941 std::string decoded_filename = unescaped_url_filename; | 749 std::string decoded_filename = unescaped_url_filename; |
942 if (!IsStringASCII(decoded_filename)) { | 750 if (!IsStringUTF8(decoded_filename)) { |
943 bool ignore; | |
944 // TODO(jshin): this is probably not robust enough. To be sure, we need | 751 // TODO(jshin): this is probably not robust enough. To be sure, we need |
945 // encoding detection. | 752 // encoding detection. |
946 DecodeWord(unescaped_url_filename, referrer_charset, &ignore, | 753 string16 utf16_output; |
947 &decoded_filename); | 754 if (!referrer_charset.empty() && |
| 755 base::CodepageToUTF16(unescaped_url_filename, |
| 756 referrer_charset.c_str(), |
| 757 base::OnStringConversionError::FAIL, |
| 758 &utf16_output)) { |
| 759 decoded_filename = UTF16ToUTF8(utf16_output); |
| 760 } else { |
| 761 decoded_filename = WideToUTF8( |
| 762 base::SysNativeMBToWide(unescaped_url_filename)); |
| 763 } |
948 } | 764 } |
949 // If the URL contains a (possibly empty) query, assume it is a generator, and | 765 // If the URL contains a (possibly empty) query, assume it is a generator, and |
950 // allow the determined extension to be overwritten. | 766 // allow the determined extension to be overwritten. |
951 *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); | 767 *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); |
952 | 768 |
953 return decoded_filename; | 769 return decoded_filename; |
954 } | 770 } |
955 | 771 |
956 #if defined(OS_WIN) | 772 #if defined(OS_WIN) |
957 // Returns whether the specified extension is automatically integrated into the | 773 // Returns whether the specified extension is automatically integrated into the |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1151 return std::string(); | 967 return std::string(); |
1152 | 968 |
1153 begin += match.length(); | 969 begin += match.length(); |
1154 | 970 |
1155 std::string ret; | 971 std::string ret; |
1156 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')), | 972 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')), |
1157 TRIM_ALL, &ret); | 973 TRIM_ALL, &ret); |
1158 return ret; | 974 return ret; |
1159 } | 975 } |
1160 | 976 |
1161 bool DecodeCharset(const std::string& input, | |
1162 std::string* decoded_charset, | |
1163 std::string* value) { | |
1164 StringTokenizer t(input, "'"); | |
1165 t.set_options(StringTokenizer::RETURN_DELIMS); | |
1166 std::string temp_charset; | |
1167 std::string temp_value; | |
1168 int numDelimsSeen = 0; | |
1169 while (t.GetNext()) { | |
1170 if (t.token_is_delim()) { | |
1171 ++numDelimsSeen; | |
1172 continue; | |
1173 } else { | |
1174 switch (numDelimsSeen) { | |
1175 case 0: | |
1176 temp_charset = t.token(); | |
1177 break; | |
1178 case 1: | |
1179 // Language is ignored. | |
1180 break; | |
1181 case 2: | |
1182 temp_value = t.token(); | |
1183 break; | |
1184 default: | |
1185 return false; | |
1186 } | |
1187 } | |
1188 } | |
1189 if (numDelimsSeen != 2) | |
1190 return false; | |
1191 if (temp_charset.empty() || temp_value.empty()) | |
1192 return false; | |
1193 decoded_charset->swap(temp_charset); | |
1194 value->swap(temp_value); | |
1195 return true; | |
1196 } | |
1197 | |
1198 bool DecodeFilenameValue(const std::string& input, | |
1199 const std::string& referrer_charset, | |
1200 std::string* output) { | |
1201 std::string tmp; | |
1202 // Tokenize with whitespace characters. | |
1203 StringTokenizer t(input, " \t\n\r"); | |
1204 t.set_options(StringTokenizer::RETURN_DELIMS); | |
1205 bool is_previous_token_rfc2047 = true; | |
1206 while (t.GetNext()) { | |
1207 if (t.token_is_delim()) { | |
1208 // If the previous non-delimeter token is not RFC2047-encoded, | |
1209 // put in a space in its place. Otheriwse, skip over it. | |
1210 if (!is_previous_token_rfc2047) { | |
1211 tmp.push_back(' '); | |
1212 } | |
1213 continue; | |
1214 } | |
1215 // We don't support a single multibyte character split into | |
1216 // adjacent encoded words. Some broken mail clients emit headers | |
1217 // with that problem, but most web servers usually encode a filename | |
1218 // in a single encoded-word. Firefox/Thunderbird do not support | |
1219 // it, either. | |
1220 std::string decoded; | |
1221 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | |
1222 &decoded)) | |
1223 return false; | |
1224 tmp.append(decoded); | |
1225 } | |
1226 output->swap(tmp); | |
1227 return true; | |
1228 } | |
1229 | |
1230 bool DecodeExtValue(const std::string& param_value, std::string* decoded) { | |
1231 if (param_value.find('"') != std::string::npos) | |
1232 return false; | |
1233 | |
1234 std::string charset; | |
1235 std::string value; | |
1236 if (!DecodeCharset(param_value, &charset, &value)) | |
1237 return false; | |
1238 | |
1239 // RFC 5987 value should be ASCII-only. | |
1240 if (!IsStringASCII(value)) { | |
1241 decoded->clear(); | |
1242 return true; | |
1243 } | |
1244 | |
1245 std::string unescaped = UnescapeURLComponent(value, | |
1246 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); | |
1247 | |
1248 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | |
1249 } | |
1250 | |
1251 string16 IDNToUnicode(const std::string& host, | 977 string16 IDNToUnicode(const std::string& host, |
1252 const std::string& languages) { | 978 const std::string& languages) { |
1253 return IDNToUnicodeWithOffsets(host, languages, NULL); | 979 return IDNToUnicodeWithOffsets(host, languages, NULL); |
1254 } | 980 } |
1255 | 981 |
1256 std::string CanonicalizeHost(const std::string& host, | 982 std::string CanonicalizeHost(const std::string& host, |
1257 url_canon::CanonHostInfo* host_info) { | 983 url_canon::CanonHostInfo* host_info) { |
1258 // Try to canonicalize the host. | 984 // Try to canonicalize the host. |
1259 const url_parse::Component raw_host_component( | 985 const url_parse::Component raw_host_component( |
1260 0, static_cast<int>(host.length())); | 986 0, static_cast<int>(host.length())); |
(...skipping 1186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2447 | 2173 |
2448 NetworkInterface::NetworkInterface(const std::string& name, | 2174 NetworkInterface::NetworkInterface(const std::string& name, |
2449 const IPAddressNumber& address) | 2175 const IPAddressNumber& address) |
2450 : name(name), address(address) { | 2176 : name(name), address(address) { |
2451 } | 2177 } |
2452 | 2178 |
2453 NetworkInterface::~NetworkInterface() { | 2179 NetworkInterface::~NetworkInterface() { |
2454 } | 2180 } |
2455 | 2181 |
2456 } // namespace net | 2182 } // namespace net |
OLD | NEW |