net/base/net_util.cc - Issue 83002: download filename fix

Side by Side Diff: net/base/net_util.cc

Issue 83002: download filename fix (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <algorithm>	5 #include <algorithm>

6 #include <unicode/ucnv.h>	6 #include <unicode/ucnv.h>

7 #include <unicode/uidna.h>	7 #include <unicode/uidna.h>

8 #include <unicode/ulocdata.h>	8 #include <unicode/ulocdata.h>

9 #include <unicode/uniset.h>	9 #include <unicode/uniset.h>

10 #include <unicode/uscript.h>	10 #include <unicode/uscript.h>

(...skipping 229 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
240 decoded.data(), length, &err);	240 decoded.data(), length, &err);

241 ucnv_close(converter);	241 ucnv_close(converter);

242 if (U_FAILURE(err)) {	242 if (U_FAILURE(err)) {

243 return false;	243 return false;

244 }	244 }

245 output->resize(length);	245 output->resize(length);

246 return true;	246 return true;

247 }	247 }

248	248

249 bool DecodeWord(const std::string& encoded_word,	249 bool DecodeWord(const std::string& encoded_word,

	250 const std::string& referrer_charset,

250 bool *is_rfc2047,	251 bool *is_rfc2047,

251 std::string* output) {	252 std::string* output) {

252 // TODO(jungshik) : Revisit this later. Do we want to pass through non-ASCII

253 // strings which can be mozibake? WinHTTP converts a raw 8bit string

254 // UTF-16 assuming it's in the OS default encoding.

255 if (!IsStringASCII(encoded_word)) {	253 if (!IsStringASCII(encoded_word)) {

256 // Try falling back to the NativeMB encoding if the raw input is not UTF-8.	254 // Try UTF-8, referrer_charset and the native OS default charset in turn.

257 if (IsStringUTF8(encoded_word)) {	255 if (IsStringUTF8(encoded_word)) {

258 *output = encoded_word;	256 *output = encoded_word;

259 } else {	257 } else {

260 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));	258 std::wstring wide_output;

	259 if (!referrer_charset.empty() &&

	260 CodepageToWide(encoded_word, referrer_charset.c_str(),

	261 OnStringUtilConversionError::FAIL, &wide_output)) {

	262 *output = WideToUTF8(wide_output);

	263 } else {

	264 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));

	265 }

261 }	266 }

262 *is_rfc2047 = false;	267 *is_rfc2047 = false;

263 return true;	268 return true;

264 }	269 }

265	270

266 // RFC 2047 : one of encoding methods supported by Firefox and relatively	271 // RFC 2047 : one of encoding methods supported by Firefox and relatively

267 // widely used by web servers.	272 // widely used by web servers.

268 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.	273 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.

269 // We don't care about the length restriction (72 bytes) because	274 // We don't care about the length restriction (72 bytes) because

270 // many web servers generate encoded words longer than the limit.	275 // many web servers generate encoded words longer than the limit.

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
350 return true;	355 return true;

351 // We can try either the OS default charset or 'origin charset' here,	356 // We can try either the OS default charset or 'origin charset' here,

352 // As far as I can tell, IE does not support it. However, I've seen	357 // As far as I can tell, IE does not support it. However, I've seen

353 // web servers emit %-escaped string in a legacy encoding (usually	358 // web servers emit %-escaped string in a legacy encoding (usually

354 // origin charset).	359 // origin charset).

355 // TODO(jungshik) : Test IE further and consider adding a fallback here.	360 // TODO(jungshik) : Test IE further and consider adding a fallback here.

356 }	361 }

357 return false;	362 return false;

358 }	363 }

359	364

360 bool DecodeParamValue(const std::string& input, std::string* output) {	365 bool DecodeParamValue(const std::string& input,

	366 const std::string& referrer_charset,

	367 std::string* output) {

361 std::string tmp;	368 std::string tmp;

362 // Tokenize with whitespace characters.	369 // Tokenize with whitespace characters.

363 StringTokenizer t(input, " \t\n\r");	370 StringTokenizer t(input, " \t\n\r");

364 t.set_options(StringTokenizer::RETURN_DELIMS);	371 t.set_options(StringTokenizer::RETURN_DELIMS);

365 bool is_previous_token_rfc2047 = true;	372 bool is_previous_token_rfc2047 = true;

366 while (t.GetNext()) {	373 while (t.GetNext()) {

367 if (t.token_is_delim()) {	374 if (t.token_is_delim()) {

368 // If the previous non-delimeter token is not RFC2047-encoded,	375 // If the previous non-delimeter token is not RFC2047-encoded,

369 // put in a space in its place. Otheriwse, skip over it.	376 // put in a space in its place. Otheriwse, skip over it.

370 if (!is_previous_token_rfc2047) {	377 if (!is_previous_token_rfc2047) {

371 tmp.push_back(' ');	378 tmp.push_back(' ');

372 }	379 }

373 continue;	380 continue;

374 }	381 }

375 // We don't support a single multibyte character split into	382 // We don't support a single multibyte character split into

376 // adjacent encoded words. Some broken mail clients emit headers	383 // adjacent encoded words. Some broken mail clients emit headers

377 // with that problem, but most web servers usually encode a filename	384 // with that problem, but most web servers usually encode a filename

378 // in a single encoded-word. Firefox/Thunderbird do not support	385 // in a single encoded-word. Firefox/Thunderbird do not support

379 // it, either.	386 // it, either.

380 std::string decoded;	387 std::string decoded;

381 if (!DecodeWord(t.token(), &is_previous_token_rfc2047, &decoded))	388 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

	389 &decoded))

382 return false;	390 return false;

383 tmp.append(decoded);	391 tmp.append(decoded);

384 }	392 }

385 output->swap(tmp);	393 output->swap(tmp);

386 return true;	394 return true;

387 }	395 }

388	396

389 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm	397 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm

390 // sure this doesn't properly handle all (most?) cases.	398 // sure this doesn't properly handle all (most?) cases.

391 template<typename STR>	399 template<typename STR>

(...skipping 284 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
676 std::wstring GetSpecificHeader(const std::wstring& headers,	684 std::wstring GetSpecificHeader(const std::wstring& headers,

677 const std::wstring& name) {	685 const std::wstring& name) {

678 return GetSpecificHeaderT(headers, name);	686 return GetSpecificHeaderT(headers, name);

679 }	687 }

680	688

681 std::string GetSpecificHeader(const std::string& headers,	689 std::string GetSpecificHeader(const std::string& headers,

682 const std::string& name) {	690 const std::string& name) {

683 return GetSpecificHeaderT(headers, name);	691 return GetSpecificHeaderT(headers, name);

684 }	692 }

685	693

686 std::wstring GetFileNameFromCD(const std::string& header) {	694 std::wstring GetFileNameFromCD(const std::string& header,

	695 const std::string& referrer_charset) {

687 std::string param_value = GetHeaderParamValue(header, "filename");	696 std::string param_value = GetHeaderParamValue(header, "filename");

688 if (param_value.empty()) {	697 if (param_value.empty()) {

689 // Some servers use 'name' parameter.	698 // Some servers use 'name' parameter.

690 param_value = GetHeaderParamValue(header, "name");	699 param_value = GetHeaderParamValue(header, "name");

691 }	700 }

692 if (param_value.empty())	701 if (param_value.empty())

693 return std::wstring();	702 return std::wstring();

694 std::string decoded;	703 std::string decoded;

695 if (DecodeParamValue(param_value, &decoded))	704 if (DecodeParamValue(param_value, referrer_charset, &decoded))

696 return UTF8ToWide(decoded);	705 return UTF8ToWide(decoded);

697 return std::wstring();	706 return std::wstring();

698 }	707 }

699	708

700 std::wstring GetHeaderParamValue(const std::wstring& field,	709 std::wstring GetHeaderParamValue(const std::wstring& field,

701 const std::wstring& param_name) {	710 const std::wstring& param_name) {

702 return GetHeaderParamValueT(field, param_name);	711 return GetHeaderParamValueT(field, param_name);

703 }	712 }

704	713

705 std::string GetHeaderParamValue(const std::string& field,	714 std::string GetHeaderParamValue(const std::string& field,

(...skipping 150 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
856 }	865 }

857	866

858 std::wstring StripWWW(const std::wstring& text) {	867 std::wstring StripWWW(const std::wstring& text) {

859 const std::wstring www(L"www.");	868 const std::wstring www(L"www.");

860 return (text.compare(0, www.length(), www) == 0) ?	869 return (text.compare(0, www.length(), www) == 0) ?

861 text.substr(www.length()) : text;	870 text.substr(www.length()) : text;

862 }	871 }

863	872

864 std::wstring GetSuggestedFilename(const GURL& url,	873 std::wstring GetSuggestedFilename(const GURL& url,

865 const std::string& content_disposition,	874 const std::string& content_disposition,

	875 const std::string& referrer_charset,

866 const std::wstring& default_name) {	876 const std::wstring& default_name) {

867 std::wstring filename = GetFileNameFromCD(content_disposition);	877 std::wstring filename = GetFileNameFromCD(content_disposition,

	878 referrer_charset);

868 if (!filename.empty()) {	879 if (!filename.empty()) {

869 // Remove any path information the server may have sent, take the name	880 // Remove any path information the server may have sent, take the name

870 // only.	881 // only.

871 filename = file_util::GetFilenameFromPath(filename);	882 filename = file_util::GetFilenameFromPath(filename);

872 // Next, remove "." from the beginning and end of the file name to avoid	883 // Next, remove "." from the beginning and end of the file name to avoid

873 // tricks with hidden files, "..", and "."	884 // tricks with hidden files, "..", and "."

874 TrimString(filename, L".", &filename);	885 TrimString(filename, L".", &filename);

875 }	886 }

876 if (filename.empty()) {	887 if (filename.empty()) {

877 if (url.is_valid()) {	888 if (url.is_valid()) {

(...skipping 16 matching lines...) Expand all Loading...
894 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)	905 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)

895 filename = url.host().empty() ? L"download" : UTF8ToWide(url.host());	906 filename = url.host().empty() ? L"download" : UTF8ToWide(url.host());

896 } else	907 } else

897 NOTREACHED();	908 NOTREACHED();

898 }	909 }

899	910

900 file_util::ReplaceIllegalCharacters(&filename, '-');	911 file_util::ReplaceIllegalCharacters(&filename, '-');

901 return filename;	912 return filename;

902 }	913 }

903	914

904 std::wstring GetSuggestedFilename(const GURL& url,

905 const std::wstring& content_disposition,

906 const std::wstring& default_name) {

907 return GetSuggestedFilename(

908 url, WideToUTF8(content_disposition), default_name);

909 }

910

911 bool IsPortAllowedByDefault(int port) {	915 bool IsPortAllowedByDefault(int port) {

912 int array_size = arraysize(kRestrictedPorts);	916 int array_size = arraysize(kRestrictedPorts);

913 for (int i = 0; i < array_size; i++) {	917 for (int i = 0; i < array_size; i++) {

914 if (kRestrictedPorts[i] == port) {	918 if (kRestrictedPorts[i] == port) {

915 return false;	919 return false;

916 }	920 }

917 }	921 }

918 return true;	922 return true;

919 }	923 }

920	924

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1021 char buffer[256];	1025 char buffer[256];

1022 int result = gethostname(buffer, sizeof(buffer));	1026 int result = gethostname(buffer, sizeof(buffer));

1023 if (result != 0) {	1027 if (result != 0) {

1024 DLOG(INFO) << "gethostname() failed with " << result;	1028 DLOG(INFO) << "gethostname() failed with " << result;

1025 buffer[0] = '\0';	1029 buffer[0] = '\0';

1026 }	1030 }

1027 return std::string(buffer);	1031 return std::string(buffer);

1028 }	1032 }

1029	1033

1030 } // namespace net	1034 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »