Index: webkit/glue/ftp_directory_listing_response_delegate.cc |
diff --git a/webkit/glue/ftp_directory_listing_response_delegate.cc b/webkit/glue/ftp_directory_listing_response_delegate.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..a0ee89dd764a5a9cbe6f489a7786719f5e17a7cc |
--- /dev/null |
+++ b/webkit/glue/ftp_directory_listing_response_delegate.cc |
@@ -0,0 +1,253 @@ |
+// Copyright (c) 2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "webkit/glue/ftp_directory_listing_response_delegate.h" |
+ |
+#include <vector> |
+ |
+#include "base/logging.h" |
+#include "base/string_util.h" |
+#include "base/sys_string_conversions.h" |
+#include "base/time.h" |
+#include "net/base/escape.h" |
+#include "net/base/net_util.h" |
+#include "net/ftp/ftp_server_type_histograms.h" |
+#include "unicode/ucsdet.h" |
+#include "webkit/api/public/WebURL.h" |
+#include "webkit/api/public/WebURLLoaderClient.h" |
+ |
+using WebKit::WebURLLoader; |
+using WebKit::WebURLLoaderClient; |
+using WebKit::WebURLResponse; |
+ |
+namespace { |
+ |
+// A very simple-minded character encoding detection. |
+// TODO(jungshik): We can apply more heuristics here (e.g. using various hints |
+// like TLD, the UI language/default encoding of a client, etc). In that case, |
+// this should be pulled out of here and moved somewhere in base because there |
+// can be other use cases. |
+std::string DetectEncoding(const std::string& text) { |
+ if (IsStringASCII(text)) |
+ return std::string(); |
+ UErrorCode status = U_ZERO_ERROR; |
+ UCharsetDetector* detector = ucsdet_open(&status); |
+ ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), |
+ &status); |
+ const UCharsetMatch* match = ucsdet_detect(detector, &status); |
+ const char* encoding = ucsdet_getName(match, &status); |
+ // Should we check the quality of the match? A rather arbitrary number is |
+ // assigned by ICU and it's hard to come up with a lower limit. |
+ if (U_FAILURE(status)) |
+ return std::string(); |
+ return encoding; |
+} |
+ |
+string16 RawByteSequenceToFilename(const char* raw_filename, |
+ const std::string& encoding) { |
+ if (encoding.empty()) |
+ return ASCIIToUTF16(raw_filename); |
+ |
+ // Try the detected encoding before falling back to the native codepage. |
+ // Using the native codepage does not make much sense, but we don't have |
+ // much else to resort to. |
+ string16 filename; |
+ if (!CodepageToUTF16(raw_filename, encoding.c_str(), |
+ OnStringUtilConversionError::SUBSTITUTE, &filename)) |
+ filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename)); |
+ return filename; |
+} |
+ |
+void ExtractFullLinesFromBuffer(std::string* buffer, |
+ std::vector<std::string>* lines) { |
+ int cut_pos = 0; |
+ for (size_t i = 0; i < buffer->length(); i++) { |
+ if (i >= 1 && (*buffer)[i - 1] == '\r' && (*buffer)[i] == '\n') { |
+ lines->push_back(buffer->substr(cut_pos, i - cut_pos - 1)); |
+ cut_pos = i + 1; |
+ } |
+ } |
+ buffer->erase(0, cut_pos); |
+} |
+ |
+void LogFtpServerType(char server_type) { |
+ switch (server_type) { |
+ case 'E': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_EPLF); |
+ break; |
+ case 'V': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_VMS); |
+ break; |
+ case 'C': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_CMS); |
+ break; |
+ case 'W': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_DOS); |
+ break; |
+ case 'O': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_OS2); |
+ break; |
+ case 'U': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_LSL); |
+ break; |
+ case 'w': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_W16); |
+ break; |
+ case 'D': |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_DLS); |
+ break; |
+ default: |
+ net::UpdateFtpServerTypeHistograms(net::SERVER_UNKNOWN); |
+ break; |
+ } |
+} |
+ |
+} // namespace |
+ |
+namespace webkit_glue { |
+ |
+FtpDirectoryListingResponseDelegate::FtpDirectoryListingResponseDelegate( |
+ WebURLLoaderClient* client, |
+ WebURLLoader* loader, |
+ const WebURLResponse& response) |
+ : client_(client), |
+ loader_(loader), |
+ original_response_(response) { |
+ Init(); |
+} |
+ |
+void FtpDirectoryListingResponseDelegate::OnReceivedData(const char* data, |
+ int data_len) { |
+ input_buffer_.append(data, data_len); |
+ |
+ // If all we've seen so far is ASCII, encoding_ is empty. Try to detect the |
+ // encoding. We don't do the separate UTF-8 check here because the encoding |
+ // detection with a longer chunk (as opposed to the relatively short path |
+ // component of the url) is unlikely to mistake UTF-8 for a legacy encoding. |
+ // If it turns out to be wrong, a separate UTF-8 check has to be added. |
+ // |
+ // TODO(jungshik): UTF-8 has to be 'enforced' without any heuristics when |
+ // we're talking to an FTP server compliant to RFC 2640 (that is, its response |
+ // to FEAT command includes 'UTF8'). |
+ // See http://wiki.filezilla-project.org/Character_Set |
+ if (encoding_.empty()) |
+ encoding_ = DetectEncoding(input_buffer_); |
+ |
+ std::vector<std::string> lines; |
+ ExtractFullLinesFromBuffer(&input_buffer_, &lines); |
+ |
+ for (std::vector<std::string>::const_iterator line = lines.begin(); |
+ line != lines.end(); ++line) { |
+ struct net::list_result result; |
+ int line_type = net::ParseFTPList(line->c_str(), &parse_state_, &result); |
+ |
+ // The original code assumed months are in range 0-11 (PRExplodedTime), |
+ // but our Time class expects a 1-12 range. Adjust it here, because |
+ // the third-party parsing code uses bit-shifting on the month, |
+ // and it'd be too easy to break that logic. |
+ result.fe_time.month++; |
+ DCHECK_LE(1, result.fe_time.month); |
+ DCHECK_GE(12, result.fe_time.month); |
+ |
+ int64 file_size; |
+ switch (line_type) { |
+ case 'd': // Directory entry. |
+ response_buffer_.append(net::GetDirectoryListingEntry( |
+ RawByteSequenceToFilename(result.fe_fname, encoding_), |
+ result.fe_fname, true, 0, |
+ base::Time::FromLocalExploded(result.fe_time))); |
+ break; |
+ case 'f': // File entry. |
+ if (StringToInt64(result.fe_size, &file_size)) { |
+ response_buffer_.append(net::GetDirectoryListingEntry( |
+ RawByteSequenceToFilename(result.fe_fname, encoding_), |
+ result.fe_fname, false, file_size, |
+ base::Time::FromLocalExploded(result.fe_time))); |
+ } |
+ break; |
+ case 'l': { // Symlink entry. |
+ std::string filename(result.fe_fname, result.fe_fnlen); |
+ |
+ // Parsers for styles 'U' and 'W' handle " -> " themselves. |
+ if (parse_state_.lstyle != 'U' && parse_state_.lstyle != 'W') { |
+ std::string::size_type offset = filename.find(" -> "); |
+ if (offset != std::string::npos) |
+ filename = filename.substr(0, offset); |
+ } |
+ |
+ if (StringToInt64(result.fe_size, &file_size)) { |
+ response_buffer_.append(net::GetDirectoryListingEntry( |
+ RawByteSequenceToFilename(filename.c_str(), encoding_), |
+ filename, false, file_size, |
+ base::Time::FromLocalExploded(result.fe_time))); |
+ } |
+ } |
+ break; |
+ case '?': // Junk entry. |
+ case '"': // Comment entry. |
+ break; |
+ default: |
+ NOTREACHED(); |
+ break; |
+ } |
+ } |
+ |
+ SendResponseBufferToClient(); |
+} |
+ |
+void FtpDirectoryListingResponseDelegate::OnCompletedRequest() { |
+ SendResponseBufferToClient(); |
+ |
+ // Only log the server type if we got enough data to reliably detect it. |
+ if (parse_state_.parsed_one) |
+ LogFtpServerType(parse_state_.lstyle); |
+} |
+ |
+void FtpDirectoryListingResponseDelegate::Init() { |
+ memset(&parse_state_, 0, sizeof(parse_state_)); |
+ |
+ GURL response_url(original_response_.url()); |
+ UnescapeRule::Type unescape_rules = UnescapeRule::SPACES | |
+ UnescapeRule::URL_SPECIAL_CHARS; |
+ std::string unescaped_path = UnescapeURLComponent(response_url.path(), |
+ unescape_rules); |
+ string16 path_utf16; |
+ // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII, |
+ // but many old FTP servers use legacy encodings. Try UTF-8 first and |
+ // detect the encoding. |
+ if (IsStringUTF8(unescaped_path)) { |
+ path_utf16 = UTF8ToUTF16(unescaped_path); |
+ } else { |
+ std::string encoding = DetectEncoding(unescaped_path); |
+ // Try the detected encoding. If it fails, resort to the |
+ // OS native encoding. |
+ if (encoding.empty() || |
+ !CodepageToUTF16(unescaped_path, encoding.c_str(), |
+ OnStringUtilConversionError::SUBSTITUTE, |
+ &path_utf16)) |
+ path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path)); |
+ } |
+ |
+ response_buffer_ = net::GetDirectoryListingHeader(path_utf16); |
+ |
+ // If this isn't top level directory (i.e. the path isn't "/",) |
+ // add a link to the parent directory. |
+ if (response_url.path().length() > 1) { |
+ response_buffer_.append( |
+ net::GetDirectoryListingEntry(ASCIIToUTF16(".."), |
+ std::string(), |
+ false, 0, |
+ base::Time())); |
+ } |
+} |
+ |
+void FtpDirectoryListingResponseDelegate::SendResponseBufferToClient() { |
+ if (!response_buffer_.empty()) { |
+ client_->didReceiveData(loader_, response_buffer_.data(), |
+ response_buffer_.length(), -1); |
+ response_buffer_.clear(); |
+ } |
+} |
+ |
+} // namespace webkit_glue |