Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(416)

Unified Diff: net/url_request/url_request_new_ftp_job.cc

Issue 151065: Fix the local file listing and FTP file listing. For the former, use the OS f... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« base/string_util_unittest.cc ('K') | « net/url_request/url_request_new_ftp_job.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/url_request/url_request_new_ftp_job.cc
===================================================================
--- net/url_request/url_request_new_ftp_job.cc (revision 20014)
+++ net/url_request/url_request_new_ftp_job.cc (working copy)
@@ -7,6 +7,7 @@
#include "base/compiler_specific.h"
#include "base/file_version_info.h"
#include "base/message_loop.h"
+#include "base/sys_string_conversions.h"
#include "net/base/escape.h"
#include "net/base/net_errors.h"
#include "net/base/net_util.h"
@@ -16,7 +17,47 @@
#include "net/url_request/url_request.h"
#include "net/url_request/url_request_context.h"
#include "net/url_request/url_request_error_job.h"
+#include "unicode/ucsdet.h"
+namespace {
+
+// A very simple-minded character encoding detection.
+// TODO(jungshik): We can apply more heuristics here (e.g. using various hints
+// like TLD, the UI language/default encoding of a client, etc). In that case,
+// this should be pulled out of here and moved somewhere in base because there
+// can be other use cases.
+std::string DetectEncoding(const char*input, size_t len) {
wtc 2009/07/15 23:53:23 Nit: add a space before 'input'.
+ if (IsStringASCII(std::string(input, len)))
+ return std::string();
+ UErrorCode status = U_ZERO_ERROR;
+ UCharsetDetector* detector = ucsdet_open(&status);
+ ucsdet_setText(detector, input, static_cast<int32_t>(len), &status);
+ const UCharsetMatch* match = ucsdet_detect(detector, &status);
+ const char* encoding = ucsdet_getName(match, &status);
+ // Should we check the quality of the match? A rather arbitrary number is
+ // assigned by ICU and it's hard to come up with a lower limit.
+ if (U_FAILURE(status))
+ return std::string();
+ return encoding;
+}
+
+string16 RawByteSequenceToFilename(const char* raw_filename,
+ const std::string& encoding) {
+ if (encoding.empty())
+ return ASCIIToUTF16(raw_filename);
+
+ // Try the detected encoding before falling back to the native codepage.
+ // Using the native codepage does not make much sense, but we don't have
+ // much else to resort to.
+ string16 filename;
+ if (!CodepageToUTF16(raw_filename, encoding.c_str(),
+ OnStringUtilConversionError::SUBSTITUTE, &filename))
+ filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename));
+ return filename;
+}
+
+} // namespace
+
URLRequestNewFtpJob::URLRequestNewFtpJob(URLRequest* request)
: URLRequestJob(request),
server_auth_state_(net::AUTH_STATE_DONT_NEED_AUTH),
@@ -69,17 +110,36 @@
if (response_info_ == NULL) {
response_info_ = transaction_->GetResponseInfo();
if (response_info_->is_directory_listing) {
- // Unescape the URL path and pass the raw 8bit directly to the browser.
- directory_html_ = net::GetDirectoryListingHeader(
+ std::string escaped_path =
UnescapeURLComponent(request_->url().path(),
- UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
+ string16 path_utf16;
+ // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII,
+ // but many old FTP servers use legacy encodings. Try UTF-8 first and
+ // detect the encoding.
+ if (IsStringUTF8(escaped_path)) {
+ path_utf16 = UTF8ToUTF16(escaped_path);
+ } else {
+ std::string encoding = DetectEncoding(escaped_path.c_str(),
+ escaped_path.size());
+ // Try the detected encoding. If it fails, resort to the
+ // OS native encoding.
+ if (encoding.empty() ||
+ !CodepageToUTF16(escaped_path, encoding.c_str(),
+ OnStringUtilConversionError::SUBSTITUTE,
+ &path_utf16))
+ path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(escaped_path));
+ }
+
+ directory_html_ = net::GetDirectoryListingHeader(path_utf16);
// If this isn't top level directory (i.e. the path isn't "/",)
// add a link to the parent directory.
if (request_->url().path().length() > 1)
- directory_html_.append(net::GetDirectoryListingEntry("..",
- false,
- 0,
- base::Time()));
+ directory_html_.append(
+ net::GetDirectoryListingEntry(ASCIIToUTF16(".."),
+ std::string(),
+ false, 0,
+ base::Time()));
}
}
if (!directory_html_.empty()) {
@@ -121,6 +181,20 @@
std::string file_entry;
std::string line;
buf->data()[bytes_read] = 0;
+
+ // If all we've seen so far is ASCII, encoding_ is empty. Try to detect the
+ // encoding. We don't do the separate UTF-8 check here because the encoding
+ // detection with a longer chunk (as opposed to the relatively short path
+ // component of the url) is unlikely to mistake UTF-8 for a legacy encoding.
+ // If it turns out to be wrong, a separate UTF-8 check has to be added.
+ //
+ // TODO(jungshik): UTF-8 has to be 'enforced' without any heuristics when
+ // we're talking to an FTP server compliant to RFC 2640 (that is, its response
+ // to FEAT command includes 'UTF8').
+ // See http://wiki.filezilla-project.org/Character_Set
+ if (encoding_.empty())
+ encoding_ = DetectEncoding(buf->data(), bytes_read);
+
int64 file_size;
std::istringstream iss(buf->data());
while (getline(iss, line)) {
@@ -144,6 +218,7 @@
et.day_of_week = result.fe_time.tm_wday;
file_entry.append(net::GetDirectoryListingEntry(
+ RawByteSequenceToFilename(result.fe_fname, encoding_),
result.fe_fname, true, 0, base::Time::FromLocalExploded(et)));
break;
case net::FTP_TYPE_FILE:
@@ -163,6 +238,7 @@
// It returns wrong date/time (Differnce is 1 day and 17 Hours).
if (StringToInt64(result.fe_size, &file_size))
file_entry.append(net::GetDirectoryListingEntry(
+ RawByteSequenceToFilename(result.fe_fname, encoding_),
result.fe_fname, false, file_size,
base::Time::FromLocalExploded(et)));
break;
Property changes on: net\url_request\url_request_new_ftp_job.cc
___________________________________________________________________
Added: svn:eol-style
+ LF
« base/string_util_unittest.cc ('K') | « net/url_request/url_request_new_ftp_job.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698