Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(294)

Side by Side Diff: net/base/net_util.cc

Issue 11471041: Move DecodeFilenameValue and DecodeExt value into http_content_disposition. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Move non-net code out of net namespace. Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 #include <map> 9 #include <map>
10 10
11 #include "build/build_config.h" 11 #include "build/build_config.h"
12 12
13 #if defined(OS_WIN) 13 #if defined(OS_WIN)
14 #include <windows.h> 14 #include <windows.h>
15 #include <winsock2.h> 15 #include <winsock2.h>
16 #include <iphlpapi.h> 16 #include <iphlpapi.h>
17 #pragma comment(lib, "iphlpapi.lib") 17 #pragma comment(lib, "iphlpapi.lib")
18 #elif defined(OS_POSIX) 18 #elif defined(OS_POSIX)
19 #include <fcntl.h> 19 #include <fcntl.h>
20 #if !defined(OS_ANDROID) 20 #if !defined(OS_ANDROID)
21 #include <ifaddrs.h> 21 #include <ifaddrs.h>
22 #endif 22 #endif
23 #include <netdb.h> 23 #include <netdb.h>
24 #include <net/if.h> 24 #include <net/if.h>
25 #include <netinet/in.h> 25 #include <netinet/in.h>
26 #endif 26 #endif
27 27
28 #include "base/base64.h"
29 #include "base/basictypes.h" 28 #include "base/basictypes.h"
30 #include "base/file_path.h" 29 #include "base/file_path.h"
31 #include "base/file_util.h" 30 #include "base/file_util.h"
32 #include "base/i18n/file_util_icu.h" 31 #include "base/i18n/file_util_icu.h"
33 #include "base/i18n/icu_string_conversions.h" 32 #include "base/i18n/icu_string_conversions.h"
34 #include "base/i18n/time_formatting.h" 33 #include "base/i18n/time_formatting.h"
35 #include "base/json/string_escape.h" 34 #include "base/json/string_escape.h"
36 #include "base/lazy_instance.h" 35 #include "base/lazy_instance.h"
37 #include "base/logging.h" 36 #include "base/logging.h"
38 #include "base/memory/singleton.h" 37 #include "base/memory/singleton.h"
(...skipping 25 matching lines...) Expand all
64 #include "net/base/dns_util.h" 63 #include "net/base/dns_util.h"
65 #include "net/base/escape.h" 64 #include "net/base/escape.h"
66 #include "net/base/mime_util.h" 65 #include "net/base/mime_util.h"
67 #include "net/base/net_module.h" 66 #include "net/base/net_module.h"
68 #if defined(OS_WIN) 67 #if defined(OS_WIN)
69 #include "net/base/winsock_init.h" 68 #include "net/base/winsock_init.h"
70 #endif 69 #endif
71 #include "net/http/http_content_disposition.h" 70 #include "net/http/http_content_disposition.h"
72 #include "unicode/datefmt.h" 71 #include "unicode/datefmt.h"
73 #include "unicode/regex.h" 72 #include "unicode/regex.h"
74 #include "unicode/ucnv.h"
75 #include "unicode/uidna.h" 73 #include "unicode/uidna.h"
76 #include "unicode/ulocdata.h" 74 #include "unicode/ulocdata.h"
77 #include "unicode/uniset.h" 75 #include "unicode/uniset.h"
78 #include "unicode/uscript.h" 76 #include "unicode/uscript.h"
79 #include "unicode/uset.h" 77 #include "unicode/uset.h"
80 78
81 using base::Time; 79 using base::Time;
82 80
83 namespace net { 81 namespace net {
84 82
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 #if defined(OS_WIN) 166 #if defined(OS_WIN)
169 std::string::size_type CountTrailingChars( 167 std::string::size_type CountTrailingChars(
170 const std::string& input, 168 const std::string& input,
171 const std::string::value_type trailing_chars[]) { 169 const std::string::value_type trailing_chars[]) {
172 const size_t last_good_char = input.find_last_not_of(trailing_chars); 170 const size_t last_good_char = input.find_last_not_of(trailing_chars);
173 return (last_good_char == std::string::npos) ? 171 return (last_good_char == std::string::npos) ?
174 input.length() : (input.length() - last_good_char - 1); 172 input.length() : (input.length() - last_good_char - 1);
175 } 173 }
176 #endif 174 #endif
177 175
178 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence
179 // of bytes. If input is invalid, return false.
180 bool QPDecode(const std::string& input, std::string* output) {
181 std::string temp;
182 temp.reserve(input.size());
183 for (std::string::const_iterator it = input.begin(); it != input.end();
184 ++it) {
185 if (*it == '_') {
186 temp.push_back(' ');
187 } else if (*it == '=') {
188 if ((input.end() - it < 3) ||
189 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) ||
190 !IsHexDigit(static_cast<unsigned char>(*(it + 2))))
191 return false;
192 unsigned char ch = HexDigitToInt(*(it + 1)) * 16 +
193 HexDigitToInt(*(it + 2));
194 temp.push_back(static_cast<char>(ch));
195 ++it;
196 ++it;
197 } else if (0x20 < *it && *it < 0x7F) {
198 // In a Q-encoded word, only printable ASCII characters
199 // represent themselves. Besides, space, '=', '_' and '?' are
200 // not allowed, but they're already filtered out.
201 DCHECK_NE('=', *it);
202 DCHECK_NE('?', *it);
203 DCHECK_NE('_', *it);
204 temp.push_back(*it);
205 } else {
206 return false;
207 }
208 }
209 output->swap(temp);
210 return true;
211 }
212
213 enum RFC2047EncodingType {Q_ENCODING, B_ENCODING};
214 bool DecodeBQEncoding(const std::string& part,
215 RFC2047EncodingType enc_type,
216 const std::string& charset,
217 std::string* output) {
218 std::string decoded;
219 if (!((enc_type == B_ENCODING) ?
220 base::Base64Decode(part, &decoded) : QPDecode(part, &decoded)))
221 return false;
222
223 if (decoded.empty()) {
224 output->clear();
225 return true;
226 }
227
228 UErrorCode err = U_ZERO_ERROR;
229 UConverter* converter(ucnv_open(charset.c_str(), &err));
230 if (U_FAILURE(err))
231 return false;
232
233 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
234 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
235 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
236 // trailing '\0'.
237 size_t output_length = decoded.length() * 3 + 1;
238 char* buf = WriteInto(output, output_length);
239 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,
240 decoded.data(), decoded.length(), &err);
241 ucnv_close(converter);
242 if (U_FAILURE(err))
243 return false;
244 output->resize(output_length);
245 return true;
246 }
247
248 bool DecodeWord(const std::string& encoded_word,
249 const std::string& referrer_charset,
250 bool* is_rfc2047,
251 std::string* output) {
252 *is_rfc2047 = false;
253 output->clear();
254 if (encoded_word.empty())
255 return true;
256
257 if (!IsStringASCII(encoded_word)) {
258 // Try UTF-8, referrer_charset and the native OS default charset in turn.
259 if (IsStringUTF8(encoded_word)) {
260 *output = encoded_word;
261 } else {
262 string16 utf16_output;
263 if (!referrer_charset.empty() &&
264 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),
265 base::OnStringConversionError::FAIL,
266 &utf16_output)) {
267 *output = UTF16ToUTF8(utf16_output);
268 } else {
269 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
270 }
271 }
272
273 return true;
274 }
275
276 // RFC 2047 : one of encoding methods supported by Firefox and relatively
277 // widely used by web servers.
278 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
279 // We don't care about the length restriction (72 bytes) because
280 // many web servers generate encoded words longer than the limit.
281 std::string tmp;
282 *is_rfc2047 = true;
283 int part_index = 0;
284 std::string charset;
285 StringTokenizer t(encoded_word, "?");
286 RFC2047EncodingType enc_type = Q_ENCODING;
287 while (*is_rfc2047 && t.GetNext()) {
288 std::string part = t.token();
289 switch (part_index) {
290 case 0:
291 if (part != "=") {
292 *is_rfc2047 = false;
293 break;
294 }
295 ++part_index;
296 break;
297 case 1:
298 // Do we need charset validity check here?
299 charset = part;
300 ++part_index;
301 break;
302 case 2:
303 if (part.size() > 1 ||
304 part.find_first_of("bBqQ") == std::string::npos) {
305 *is_rfc2047 = false;
306 break;
307 }
308 if (part[0] == 'b' || part[0] == 'B') {
309 enc_type = B_ENCODING;
310 }
311 ++part_index;
312 break;
313 case 3:
314 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);
315 if (!*is_rfc2047) {
316 // Last minute failure. Invalid B/Q encoding. Rather than
317 // passing it through, return now.
318 return false;
319 }
320 ++part_index;
321 break;
322 case 4:
323 if (part != "=") {
324 // Another last minute failure !
325 // Likely to be a case of two encoded-words in a row or
326 // an encoded word followed by a non-encoded word. We can be
327 // generous, but it does not help much in terms of compatibility,
328 // I believe. Return immediately.
329 *is_rfc2047 = false;
330 return false;
331 }
332 ++part_index;
333 break;
334 default:
335 *is_rfc2047 = false;
336 return false;
337 }
338 }
339
340 if (*is_rfc2047) {
341 if (*(encoded_word.end() - 1) == '=') {
342 output->swap(tmp);
343 return true;
344 }
345 // encoded_word ending prematurelly with '?' or extra '?'
346 *is_rfc2047 = false;
347 return false;
348 }
349
350 // We're not handling 'especial' characters quoted with '\', but
351 // it should be Ok because we're not an email client but a
352 // web browser.
353
354 // What IE6/7 does: %-escaped UTF-8.
355 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);
356 if (IsStringUTF8(tmp)) {
357 output->swap(tmp);
358 return true;
359 // We can try either the OS default charset or 'origin charset' here,
360 // As far as I can tell, IE does not support it. However, I've seen
361 // web servers emit %-escaped string in a legacy encoding (usually
362 // origin charset).
363 // TODO(jungshik) : Test IE further and consider adding a fallback here.
364 }
365 return false;
366 }
367
368 // Does some simple normalization of scripts so we can allow certain scripts 176 // Does some simple normalization of scripts so we can allow certain scripts
369 // to exist together. 177 // to exist together.
370 // TODO(brettw) bug 880223: we should allow some other languages to be 178 // TODO(brettw) bug 880223: we should allow some other languages to be
371 // oombined such as Chinese and Latin. We will probably need a more 179 // oombined such as Chinese and Latin. We will probably need a more
372 // complicated system of language pairs to have more fine-grained control. 180 // complicated system of language pairs to have more fine-grained control.
373 UScriptCode NormalizeScript(UScriptCode code) { 181 UScriptCode NormalizeScript(UScriptCode code) {
374 switch (code) { 182 switch (code) {
375 case USCRIPT_KATAKANA: 183 case USCRIPT_KATAKANA:
376 case USCRIPT_HIRAGANA: 184 case USCRIPT_HIRAGANA:
377 case USCRIPT_KATAKANA_OR_HIRAGANA: 185 case USCRIPT_KATAKANA_OR_HIRAGANA:
(...skipping 554 matching lines...) Expand 10 before | Expand all | Expand 10 after
932 // don't attempt to divine a file name out of them. 740 // don't attempt to divine a file name out of them.
933 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) 741 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data"))
934 return std::string(); 742 return std::string();
935 743
936 const std::string unescaped_url_filename = UnescapeURLComponent( 744 const std::string unescaped_url_filename = UnescapeURLComponent(
937 url.ExtractFileName(), 745 url.ExtractFileName(),
938 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 746 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
939 747
940 // The URL's path should be escaped UTF-8, but may not be. 748 // The URL's path should be escaped UTF-8, but may not be.
941 std::string decoded_filename = unescaped_url_filename; 749 std::string decoded_filename = unescaped_url_filename;
942 if (!IsStringASCII(decoded_filename)) { 750 if (!IsStringUTF8(decoded_filename)) {
943 bool ignore;
944 // TODO(jshin): this is probably not robust enough. To be sure, we need 751 // TODO(jshin): this is probably not robust enough. To be sure, we need
945 // encoding detection. 752 // encoding detection.
946 DecodeWord(unescaped_url_filename, referrer_charset, &ignore, 753 string16 utf16_output;
947 &decoded_filename); 754 if (!referrer_charset.empty() &&
755 base::CodepageToUTF16(unescaped_url_filename,
756 referrer_charset.c_str(),
757 base::OnStringConversionError::FAIL,
758 &utf16_output)) {
759 decoded_filename = UTF16ToUTF8(utf16_output);
760 } else {
761 decoded_filename = WideToUTF8(
762 base::SysNativeMBToWide(unescaped_url_filename));
763 }
948 } 764 }
949 // If the URL contains a (possibly empty) query, assume it is a generator, and 765 // If the URL contains a (possibly empty) query, assume it is a generator, and
950 // allow the determined extension to be overwritten. 766 // allow the determined extension to be overwritten.
951 *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); 767 *should_overwrite_extension = !decoded_filename.empty() && url.has_query();
952 768
953 return decoded_filename; 769 return decoded_filename;
954 } 770 }
955 771
956 #if defined(OS_WIN) 772 #if defined(OS_WIN)
957 // Returns whether the specified extension is automatically integrated into the 773 // Returns whether the specified extension is automatically integrated into the
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
1151 return std::string(); 967 return std::string();
1152 968
1153 begin += match.length(); 969 begin += match.length();
1154 970
1155 std::string ret; 971 std::string ret;
1156 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')), 972 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')),
1157 TRIM_ALL, &ret); 973 TRIM_ALL, &ret);
1158 return ret; 974 return ret;
1159 } 975 }
1160 976
1161 bool DecodeCharset(const std::string& input,
1162 std::string* decoded_charset,
1163 std::string* value) {
1164 StringTokenizer t(input, "'");
1165 t.set_options(StringTokenizer::RETURN_DELIMS);
1166 std::string temp_charset;
1167 std::string temp_value;
1168 int numDelimsSeen = 0;
1169 while (t.GetNext()) {
1170 if (t.token_is_delim()) {
1171 ++numDelimsSeen;
1172 continue;
1173 } else {
1174 switch (numDelimsSeen) {
1175 case 0:
1176 temp_charset = t.token();
1177 break;
1178 case 1:
1179 // Language is ignored.
1180 break;
1181 case 2:
1182 temp_value = t.token();
1183 break;
1184 default:
1185 return false;
1186 }
1187 }
1188 }
1189 if (numDelimsSeen != 2)
1190 return false;
1191 if (temp_charset.empty() || temp_value.empty())
1192 return false;
1193 decoded_charset->swap(temp_charset);
1194 value->swap(temp_value);
1195 return true;
1196 }
1197
1198 bool DecodeFilenameValue(const std::string& input,
1199 const std::string& referrer_charset,
1200 std::string* output) {
1201 std::string tmp;
1202 // Tokenize with whitespace characters.
1203 StringTokenizer t(input, " \t\n\r");
1204 t.set_options(StringTokenizer::RETURN_DELIMS);
1205 bool is_previous_token_rfc2047 = true;
1206 while (t.GetNext()) {
1207 if (t.token_is_delim()) {
1208 // If the previous non-delimeter token is not RFC2047-encoded,
1209 // put in a space in its place. Otheriwse, skip over it.
1210 if (!is_previous_token_rfc2047) {
1211 tmp.push_back(' ');
1212 }
1213 continue;
1214 }
1215 // We don't support a single multibyte character split into
1216 // adjacent encoded words. Some broken mail clients emit headers
1217 // with that problem, but most web servers usually encode a filename
1218 // in a single encoded-word. Firefox/Thunderbird do not support
1219 // it, either.
1220 std::string decoded;
1221 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
1222 &decoded))
1223 return false;
1224 tmp.append(decoded);
1225 }
1226 output->swap(tmp);
1227 return true;
1228 }
1229
1230 bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
1231 if (param_value.find('"') != std::string::npos)
1232 return false;
1233
1234 std::string charset;
1235 std::string value;
1236 if (!DecodeCharset(param_value, &charset, &value))
1237 return false;
1238
1239 // RFC 5987 value should be ASCII-only.
1240 if (!IsStringASCII(value)) {
1241 decoded->clear();
1242 return true;
1243 }
1244
1245 std::string unescaped = UnescapeURLComponent(value,
1246 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
1247
1248 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded);
1249 }
1250
1251 string16 IDNToUnicode(const std::string& host, 977 string16 IDNToUnicode(const std::string& host,
1252 const std::string& languages) { 978 const std::string& languages) {
1253 return IDNToUnicodeWithOffsets(host, languages, NULL); 979 return IDNToUnicodeWithOffsets(host, languages, NULL);
1254 } 980 }
1255 981
1256 std::string CanonicalizeHost(const std::string& host, 982 std::string CanonicalizeHost(const std::string& host,
1257 url_canon::CanonHostInfo* host_info) { 983 url_canon::CanonHostInfo* host_info) {
1258 // Try to canonicalize the host. 984 // Try to canonicalize the host.
1259 const url_parse::Component raw_host_component( 985 const url_parse::Component raw_host_component(
1260 0, static_cast<int>(host.length())); 986 0, static_cast<int>(host.length()));
(...skipping 1186 matching lines...) Expand 10 before | Expand all | Expand 10 after
2447 2173
2448 NetworkInterface::NetworkInterface(const std::string& name, 2174 NetworkInterface::NetworkInterface(const std::string& name,
2449 const IPAddressNumber& address) 2175 const IPAddressNumber& address)
2450 : name(name), address(address) { 2176 : name(name), address(address) {
2451 } 2177 }
2452 2178
2453 NetworkInterface::~NetworkInterface() { 2179 NetworkInterface::~NetworkInterface() {
2454 } 2180 }
2455 2181
2456 } // namespace net 2182 } // namespace net
OLDNEW
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698