net/base/net_util.cc - Issue 11471041: Move DecodeFilenameValue and DecodeExt value into http_content_disposition.

Side by Side Diff: net/base/net_util.cc

Issue 11471041: Move DecodeFilenameValue and DecodeExt value into http_content_disposition. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <iterator>	8 #include <iterator>

9 #include <map>	9 #include <map>

10	10

11 #include "build/build_config.h"	11 #include "build/build_config.h"

12	12

13 #if defined(OS_WIN)	13 #if defined(OS_WIN)

14 #include <windows.h>	14 #include <windows.h>

15 #include <winsock2.h>	15 #include <winsock2.h>

16 #include <iphlpapi.h>	16 #include <iphlpapi.h>

17 #pragma comment(lib, "iphlpapi.lib")	17 #pragma comment(lib, "iphlpapi.lib")

18 #elif defined(OS_POSIX)	18 #elif defined(OS_POSIX)

19 #include <fcntl.h>	19 #include <fcntl.h>

20 #if !defined(OS_ANDROID)	20 #if !defined(OS_ANDROID)

21 #include <ifaddrs.h>	21 #include <ifaddrs.h>

22 #endif	22 #endif

23 #include <netdb.h>	23 #include <netdb.h>

24 #include <net/if.h>	24 #include <net/if.h>

25 #include <netinet/in.h>	25 #include <netinet/in.h>

26 #endif	26 #endif

27	27

28 #include "base/base64.h"

29 #include "base/basictypes.h"	28 #include "base/basictypes.h"

30 #include "base/file_path.h"	29 #include "base/file_path.h"

31 #include "base/file_util.h"	30 #include "base/file_util.h"

32 #include "base/i18n/file_util_icu.h"	31 #include "base/i18n/file_util_icu.h"

33 #include "base/i18n/icu_string_conversions.h"	32 #include "base/i18n/icu_string_conversions.h"

34 #include "base/i18n/time_formatting.h"	33 #include "base/i18n/time_formatting.h"

35 #include "base/json/string_escape.h"	34 #include "base/json/string_escape.h"

36 #include "base/lazy_instance.h"	35 #include "base/lazy_instance.h"

37 #include "base/logging.h"	36 #include "base/logging.h"

38 #include "base/memory/singleton.h"	37 #include "base/memory/singleton.h"

(...skipping 25 matching lines...) Expand all Loading...
64 #include "net/base/dns_util.h"	63 #include "net/base/dns_util.h"

65 #include "net/base/escape.h"	64 #include "net/base/escape.h"

66 #include "net/base/mime_util.h"	65 #include "net/base/mime_util.h"

67 #include "net/base/net_module.h"	66 #include "net/base/net_module.h"

68 #if defined(OS_WIN)	67 #if defined(OS_WIN)

69 #include "net/base/winsock_init.h"	68 #include "net/base/winsock_init.h"

70 #endif	69 #endif

71 #include "net/http/http_content_disposition.h"	70 #include "net/http/http_content_disposition.h"

72 #include "unicode/datefmt.h"	71 #include "unicode/datefmt.h"

73 #include "unicode/regex.h"	72 #include "unicode/regex.h"

74 #include "unicode/ucnv.h"

75 #include "unicode/uidna.h"	73 #include "unicode/uidna.h"

76 #include "unicode/ulocdata.h"	74 #include "unicode/ulocdata.h"

77 #include "unicode/uniset.h"	75 #include "unicode/uniset.h"

78 #include "unicode/uscript.h"	76 #include "unicode/uscript.h"

79 #include "unicode/uset.h"	77 #include "unicode/uset.h"

80	78

81 using base::Time;	79 using base::Time;

82	80

83 namespace net {	81 namespace net {

84	82

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
168 #if defined(OS_WIN)	166 #if defined(OS_WIN)

169 std::string::size_type CountTrailingChars(	167 std::string::size_type CountTrailingChars(

170 const std::string& input,	168 const std::string& input,

171 const std::string::value_type trailing_chars[]) {	169 const std::string::value_type trailing_chars[]) {

172 const size_t last_good_char = input.find_last_not_of(trailing_chars);	170 const size_t last_good_char = input.find_last_not_of(trailing_chars);

173 return (last_good_char == std::string::npos) ?	171 return (last_good_char == std::string::npos) ?

174 input.length() : (input.length() - last_good_char - 1);	172 input.length() : (input.length() - last_good_char - 1);

175 }	173 }

176 #endif	174 #endif

177	175

178 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence

179 // of bytes. If input is invalid, return false.

180 bool QPDecode(const std::string& input, std::string* output) {

181 std::string temp;

182 temp.reserve(input.size());

183 for (std::string::const_iterator it = input.begin(); it != input.end();

184 ++it) {

185 if (*it == '_') {

186 temp.push_back(' ');

187 } else if (*it == '=') {

188 if ((input.end() - it < 3) \|\|

189 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) \|\|

190 !IsHexDigit(static_cast<unsigned char>(*(it + 2))))

191 return false;

192 unsigned char ch = HexDigitToInt((it + 1)) 16 +

193 HexDigitToInt(*(it + 2));

194 temp.push_back(static_cast<char>(ch));

195 ++it;

196 ++it;

197 } else if (0x20 < it && it < 0x7F) {

198 // In a Q-encoded word, only printable ASCII characters

199 // represent themselves. Besides, space, '=', '_' and '?' are

200 // not allowed, but they're already filtered out.

201 DCHECK_NE('=', *it);

202 DCHECK_NE('?', *it);

203 DCHECK_NE('_', *it);

204 temp.push_back(*it);

205 } else {

206 return false;

207 }

208 }

209 output->swap(temp);

210 return true;

211 }

212

213 enum RFC2047EncodingType {Q_ENCODING, B_ENCODING};

214 bool DecodeBQEncoding(const std::string& part,

215 RFC2047EncodingType enc_type,

216 const std::string& charset,

217 std::string* output) {

218 std::string decoded;

219 if (!((enc_type == B_ENCODING) ?

220 base::Base64Decode(part, &decoded) : QPDecode(part, &decoded)))

221 return false;

222

223 if (decoded.empty()) {

224 output->clear();

225 return true;

226 }

227

228 UErrorCode err = U_ZERO_ERROR;

229 UConverter* converter(ucnv_open(charset.c_str(), &err));

230 if (U_FAILURE(err))

231 return false;

232

233 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.

234 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes

235 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a

236 // trailing '\0'.

237 size_t output_length = decoded.length() * 3 + 1;

238 char* buf = WriteInto(output, output_length);

239 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,

240 decoded.data(), decoded.length(), &err);

241 ucnv_close(converter);

242 if (U_FAILURE(err))

243 return false;

244 output->resize(output_length);

245 return true;

246 }

247

248 bool DecodeWord(const std::string& encoded_word,

249 const std::string& referrer_charset,

250 bool* is_rfc2047,

251 std::string* output) {

252 *is_rfc2047 = false;

253 output->clear();

254 if (encoded_word.empty())

255 return true;

256

257 if (!IsStringASCII(encoded_word)) {

258 // Try UTF-8, referrer_charset and the native OS default charset in turn.

259 if (IsStringUTF8(encoded_word)) {

260 *output = encoded_word;

261 } else {

262 string16 utf16_output;

263 if (!referrer_charset.empty() &&

264 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),

265 base::OnStringConversionError::FAIL,

266 &utf16_output)) {

267 *output = UTF16ToUTF8(utf16_output);

268 } else {

269 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));

270 }

271 }

272

273 return true;

274 }

275

276 // RFC 2047 : one of encoding methods supported by Firefox and relatively

277 // widely used by web servers.

278 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.

279 // We don't care about the length restriction (72 bytes) because

280 // many web servers generate encoded words longer than the limit.

281 std::string tmp;

282 *is_rfc2047 = true;

283 int part_index = 0;

284 std::string charset;

285 StringTokenizer t(encoded_word, "?");

286 RFC2047EncodingType enc_type = Q_ENCODING;

287 while (*is_rfc2047 && t.GetNext()) {

288 std::string part = t.token();

289 switch (part_index) {

290 case 0:

291 if (part != "=") {

292 *is_rfc2047 = false;

293 break;

294 }

295 ++part_index;

296 break;

297 case 1:

298 // Do we need charset validity check here?

299 charset = part;

300 ++part_index;

301 break;

302 case 2:

303 if (part.size() > 1 \|\|

304 part.find_first_of("bBqQ") == std::string::npos) {

305 *is_rfc2047 = false;

306 break;

307 }

308 if (part[0] == 'b' \|\| part[0] == 'B') {

309 enc_type = B_ENCODING;

310 }

311 ++part_index;

312 break;

313 case 3:

314 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);

315 if (!*is_rfc2047) {

316 // Last minute failure. Invalid B/Q encoding. Rather than

317 // passing it through, return now.

318 return false;

319 }

320 ++part_index;

321 break;

322 case 4:

323 if (part != "=") {

324 // Another last minute failure !

325 // Likely to be a case of two encoded-words in a row or

326 // an encoded word followed by a non-encoded word. We can be

327 // generous, but it does not help much in terms of compatibility,

328 // I believe. Return immediately.

329 *is_rfc2047 = false;

330 return false;

331 }

332 ++part_index;

333 break;

334 default:

335 *is_rfc2047 = false;

336 return false;

337 }

338 }

339

340 if (*is_rfc2047) {

341 if (*(encoded_word.end() - 1) == '=') {

342 output->swap(tmp);

343 return true;

344 }

345 // encoded_word ending prematurelly with '?' or extra '?'

346 *is_rfc2047 = false;

347 return false;

348 }

349

350 // We're not handling 'especial' characters quoted with '\', but

351 // it should be Ok because we're not an email client but a

352 // web browser.

353

354 // What IE6/7 does: %-escaped UTF-8.

355 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);

356 if (IsStringUTF8(tmp)) {

357 output->swap(tmp);

358 return true;

359 // We can try either the OS default charset or 'origin charset' here,

360 // As far as I can tell, IE does not support it. However, I've seen

361 // web servers emit %-escaped string in a legacy encoding (usually

362 // origin charset).

363 // TODO(jungshik) : Test IE further and consider adding a fallback here.

364 }

365 return false;

366 }

367

368 // Does some simple normalization of scripts so we can allow certain scripts	176 // Does some simple normalization of scripts so we can allow certain scripts

369 // to exist together.	177 // to exist together.

370 // TODO(brettw) bug 880223: we should allow some other languages to be	178 // TODO(brettw) bug 880223: we should allow some other languages to be

371 // oombined such as Chinese and Latin. We will probably need a more	179 // oombined such as Chinese and Latin. We will probably need a more

372 // complicated system of language pairs to have more fine-grained control.	180 // complicated system of language pairs to have more fine-grained control.

373 UScriptCode NormalizeScript(UScriptCode code) {	181 UScriptCode NormalizeScript(UScriptCode code) {

374 switch (code) {	182 switch (code) {

375 case USCRIPT_KATAKANA:	183 case USCRIPT_KATAKANA:

376 case USCRIPT_HIRAGANA:	184 case USCRIPT_HIRAGANA:

377 case USCRIPT_KATAKANA_OR_HIRAGANA:	185 case USCRIPT_KATAKANA_OR_HIRAGANA:

(...skipping 554 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
932 // don't attempt to divine a file name out of them.	740 // don't attempt to divine a file name out of them.

933 if (!url.is_valid() \|\| url.SchemeIs("about") \|\| url.SchemeIs("data"))	741 if (!url.is_valid() \|\| url.SchemeIs("about") \|\| url.SchemeIs("data"))

934 return std::string();	742 return std::string();

935	743

936 const std::string unescaped_url_filename = UnescapeURLComponent(	744 const std::string unescaped_url_filename = UnescapeURLComponent(

937 url.ExtractFileName(),	745 url.ExtractFileName(),

938 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS);	746 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS);

939	747

940 // The URL's path should be escaped UTF-8, but may not be.	748 // The URL's path should be escaped UTF-8, but may not be.

941 std::string decoded_filename = unescaped_url_filename;	749 std::string decoded_filename = unescaped_url_filename;

942 if (!IsStringASCII(decoded_filename)) {	750 if (!IsStringUTF8(decoded_filename)) {

943 bool ignore;

944 // TODO(jshin): this is probably not robust enough. To be sure, we need	751 // TODO(jshin): this is probably not robust enough. To be sure, we need

945 // encoding detection.	752 // encoding detection.

946 DecodeWord(unescaped_url_filename, referrer_charset, &ignore,	753 string16 utf16_output;

947 &decoded_filename);	754 if (!referrer_charset.empty() &&

	755 base::CodepageToUTF16(unescaped_url_filename,

	756 referrer_charset.c_str(),

	757 base::OnStringConversionError::FAIL,

	758 &utf16_output)) {

	759 decoded_filename = UTF16ToUTF8(utf16_output);

	760 } else {

	761 decoded_filename = WideToUTF8(

	762 base::SysNativeMBToWide(unescaped_url_filename));

	763 }

948 }	764 }

949 // If the URL contains a (possibly empty) query, assume it is a generator, and	765 // If the URL contains a (possibly empty) query, assume it is a generator, and

950 // allow the determined extension to be overwritten.	766 // allow the determined extension to be overwritten.

951 *should_overwrite_extension = !decoded_filename.empty() && url.has_query();	767 *should_overwrite_extension = !decoded_filename.empty() && url.has_query();

952	768

953 return decoded_filename;	769 return decoded_filename;

954 }	770 }

955	771

956 #if defined(OS_WIN)	772 #if defined(OS_WIN)

957 // Returns whether the specified extension is automatically integrated into the	773 // Returns whether the specified extension is automatically integrated into the

(...skipping 193 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1151 return std::string();	967 return std::string();

1152	968

1153 begin += match.length();	969 begin += match.length();

1154	970

1155 std::string ret;	971 std::string ret;

1156 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')),	972 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')),

1157 TRIM_ALL, &ret);	973 TRIM_ALL, &ret);

1158 return ret;	974 return ret;

1159 }	975 }

1160	976

1161 bool DecodeCharset(const std::string& input,

1162 std::string* decoded_charset,

1163 std::string* value) {

1164 StringTokenizer t(input, "'");

1165 t.set_options(StringTokenizer::RETURN_DELIMS);

1166 std::string temp_charset;

1167 std::string temp_value;

1168 int numDelimsSeen = 0;

1169 while (t.GetNext()) {

1170 if (t.token_is_delim()) {

1171 ++numDelimsSeen;

1172 continue;

1173 } else {

1174 switch (numDelimsSeen) {

1175 case 0:

1176 temp_charset = t.token();

1177 break;

1178 case 1:

1179 // Language is ignored.

1180 break;

1181 case 2:

1182 temp_value = t.token();

1183 break;

1184 default:

1185 return false;

1186 }

1187 }

1188 }

1189 if (numDelimsSeen != 2)

1190 return false;

1191 if (temp_charset.empty() \|\| temp_value.empty())

1192 return false;

1193 decoded_charset->swap(temp_charset);

1194 value->swap(temp_value);

1195 return true;

1196 }

1197

1198 bool DecodeFilenameValue(const std::string& input,

1199 const std::string& referrer_charset,

1200 std::string* output) {

1201 std::string tmp;

1202 // Tokenize with whitespace characters.

1203 StringTokenizer t(input, " \t\n\r");

1204 t.set_options(StringTokenizer::RETURN_DELIMS);

1205 bool is_previous_token_rfc2047 = true;

1206 while (t.GetNext()) {

1207 if (t.token_is_delim()) {

1208 // If the previous non-delimeter token is not RFC2047-encoded,

1209 // put in a space in its place. Otheriwse, skip over it.

1210 if (!is_previous_token_rfc2047) {

1211 tmp.push_back(' ');

1212 }

1213 continue;

1214 }

1215 // We don't support a single multibyte character split into

1216 // adjacent encoded words. Some broken mail clients emit headers

1217 // with that problem, but most web servers usually encode a filename

1218 // in a single encoded-word. Firefox/Thunderbird do not support

1219 // it, either.

1220 std::string decoded;

1221 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

1222 &decoded))

1223 return false;

1224 tmp.append(decoded);

1225 }

1226 output->swap(tmp);

1227 return true;

1228 }

1229

1230 bool DecodeExtValue(const std::string& param_value, std::string* decoded) {

1231 if (param_value.find('"') != std::string::npos)

1232 return false;

1233

1234 std::string charset;

1235 std::string value;

1236 if (!DecodeCharset(param_value, &charset, &value))

1237 return false;

1238

1239 // RFC 5987 value should be ASCII-only.

1240 if (!IsStringASCII(value)) {

1241 decoded->clear();

1242 return true;

1243 }

1244

1245 std::string unescaped = UnescapeURLComponent(value,

1246 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS);

1247

1248 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded);

1249 }

1250

1251 string16 IDNToUnicode(const std::string& host,	977 string16 IDNToUnicode(const std::string& host,

1252 const std::string& languages) {	978 const std::string& languages) {

1253 return IDNToUnicodeWithOffsets(host, languages, NULL);	979 return IDNToUnicodeWithOffsets(host, languages, NULL);

1254 }	980 }

1255	981

1256 std::string CanonicalizeHost(const std::string& host,	982 std::string CanonicalizeHost(const std::string& host,

1257 url_canon::CanonHostInfo* host_info) {	983 url_canon::CanonHostInfo* host_info) {

1258 // Try to canonicalize the host.	984 // Try to canonicalize the host.

1259 const url_parse::Component raw_host_component(	985 const url_parse::Component raw_host_component(

1260 0, static_cast<int>(host.length()));	986 0, static_cast<int>(host.length()));

(...skipping 1186 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2447	2173

2448 NetworkInterface::NetworkInterface(const std::string& name,	2174 NetworkInterface::NetworkInterface(const std::string& name,

2449 const IPAddressNumber& address)	2175 const IPAddressNumber& address)

2450 : name(name), address(address) {	2176 : name(name), address(address) {

2451 }	2177 }

2452	2178

2453 NetworkInterface::~NetworkInterface() {	2179 NetworkInterface::~NetworkInterface() {

2454 }	2180 }

2455	2181

2456 } // namespace net	2182 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | net/http/http_content_disposition.cc » ('J')