| Index: webkit/glue/ftp_directory_listing_response_delegate.cc
|
| diff --git a/webkit/glue/ftp_directory_listing_response_delegate.cc b/webkit/glue/ftp_directory_listing_response_delegate.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..a0ee89dd764a5a9cbe6f489a7786719f5e17a7cc
|
| --- /dev/null
|
| +++ b/webkit/glue/ftp_directory_listing_response_delegate.cc
|
| @@ -0,0 +1,253 @@
|
| +// Copyright (c) 2009 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "webkit/glue/ftp_directory_listing_response_delegate.h"
|
| +
|
| +#include <vector>
|
| +
|
| +#include "base/logging.h"
|
| +#include "base/string_util.h"
|
| +#include "base/sys_string_conversions.h"
|
| +#include "base/time.h"
|
| +#include "net/base/escape.h"
|
| +#include "net/base/net_util.h"
|
| +#include "net/ftp/ftp_server_type_histograms.h"
|
| +#include "unicode/ucsdet.h"
|
| +#include "webkit/api/public/WebURL.h"
|
| +#include "webkit/api/public/WebURLLoaderClient.h"
|
| +
|
| +using WebKit::WebURLLoader;
|
| +using WebKit::WebURLLoaderClient;
|
| +using WebKit::WebURLResponse;
|
| +
|
| +namespace {
|
| +
|
| +// A very simple-minded character encoding detection.
|
| +// TODO(jungshik): We can apply more heuristics here (e.g. using various hints
|
| +// like TLD, the UI language/default encoding of a client, etc). In that case,
|
| +// this should be pulled out of here and moved somewhere in base because there
|
| +// can be other use cases.
|
| +std::string DetectEncoding(const std::string& text) {
|
| + if (IsStringASCII(text))
|
| + return std::string();
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCharsetDetector* detector = ucsdet_open(&status);
|
| + ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()),
|
| + &status);
|
| + const UCharsetMatch* match = ucsdet_detect(detector, &status);
|
| + const char* encoding = ucsdet_getName(match, &status);
|
| + // Should we check the quality of the match? A rather arbitrary number is
|
| + // assigned by ICU and it's hard to come up with a lower limit.
|
| + if (U_FAILURE(status))
|
| + return std::string();
|
| + return encoding;
|
| +}
|
| +
|
| +string16 RawByteSequenceToFilename(const char* raw_filename,
|
| + const std::string& encoding) {
|
| + if (encoding.empty())
|
| + return ASCIIToUTF16(raw_filename);
|
| +
|
| + // Try the detected encoding before falling back to the native codepage.
|
| + // Using the native codepage does not make much sense, but we don't have
|
| + // much else to resort to.
|
| + string16 filename;
|
| + if (!CodepageToUTF16(raw_filename, encoding.c_str(),
|
| + OnStringUtilConversionError::SUBSTITUTE, &filename))
|
| + filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename));
|
| + return filename;
|
| +}
|
| +
|
| +void ExtractFullLinesFromBuffer(std::string* buffer,
|
| + std::vector<std::string>* lines) {
|
| + int cut_pos = 0;
|
| + for (size_t i = 0; i < buffer->length(); i++) {
|
| + if (i >= 1 && (*buffer)[i - 1] == '\r' && (*buffer)[i] == '\n') {
|
| + lines->push_back(buffer->substr(cut_pos, i - cut_pos - 1));
|
| + cut_pos = i + 1;
|
| + }
|
| + }
|
| + buffer->erase(0, cut_pos);
|
| +}
|
| +
|
| +void LogFtpServerType(char server_type) {
|
| + switch (server_type) {
|
| + case 'E':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_EPLF);
|
| + break;
|
| + case 'V':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_VMS);
|
| + break;
|
| + case 'C':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_CMS);
|
| + break;
|
| + case 'W':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_DOS);
|
| + break;
|
| + case 'O':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_OS2);
|
| + break;
|
| + case 'U':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_LSL);
|
| + break;
|
| + case 'w':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_W16);
|
| + break;
|
| + case 'D':
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_DLS);
|
| + break;
|
| + default:
|
| + net::UpdateFtpServerTypeHistograms(net::SERVER_UNKNOWN);
|
| + break;
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +namespace webkit_glue {
|
| +
|
| +FtpDirectoryListingResponseDelegate::FtpDirectoryListingResponseDelegate(
|
| + WebURLLoaderClient* client,
|
| + WebURLLoader* loader,
|
| + const WebURLResponse& response)
|
| + : client_(client),
|
| + loader_(loader),
|
| + original_response_(response) {
|
| + Init();
|
| +}
|
| +
|
| +void FtpDirectoryListingResponseDelegate::OnReceivedData(const char* data,
|
| + int data_len) {
|
| + input_buffer_.append(data, data_len);
|
| +
|
| + // If all we've seen so far is ASCII, encoding_ is empty. Try to detect the
|
| + // encoding. We don't do the separate UTF-8 check here because the encoding
|
| + // detection with a longer chunk (as opposed to the relatively short path
|
| + // component of the url) is unlikely to mistake UTF-8 for a legacy encoding.
|
| + // If it turns out to be wrong, a separate UTF-8 check has to be added.
|
| + //
|
| + // TODO(jungshik): UTF-8 has to be 'enforced' without any heuristics when
|
| + // we're talking to an FTP server compliant to RFC 2640 (that is, its response
|
| + // to FEAT command includes 'UTF8').
|
| + // See http://wiki.filezilla-project.org/Character_Set
|
| + if (encoding_.empty())
|
| + encoding_ = DetectEncoding(input_buffer_);
|
| +
|
| + std::vector<std::string> lines;
|
| + ExtractFullLinesFromBuffer(&input_buffer_, &lines);
|
| +
|
| + for (std::vector<std::string>::const_iterator line = lines.begin();
|
| + line != lines.end(); ++line) {
|
| + struct net::list_result result;
|
| + int line_type = net::ParseFTPList(line->c_str(), &parse_state_, &result);
|
| +
|
| + // The original code assumed months are in range 0-11 (PRExplodedTime),
|
| + // but our Time class expects a 1-12 range. Adjust it here, because
|
| + // the third-party parsing code uses bit-shifting on the month,
|
| + // and it'd be too easy to break that logic.
|
| + result.fe_time.month++;
|
| + DCHECK_LE(1, result.fe_time.month);
|
| + DCHECK_GE(12, result.fe_time.month);
|
| +
|
| + int64 file_size;
|
| + switch (line_type) {
|
| + case 'd': // Directory entry.
|
| + response_buffer_.append(net::GetDirectoryListingEntry(
|
| + RawByteSequenceToFilename(result.fe_fname, encoding_),
|
| + result.fe_fname, true, 0,
|
| + base::Time::FromLocalExploded(result.fe_time)));
|
| + break;
|
| + case 'f': // File entry.
|
| + if (StringToInt64(result.fe_size, &file_size)) {
|
| + response_buffer_.append(net::GetDirectoryListingEntry(
|
| + RawByteSequenceToFilename(result.fe_fname, encoding_),
|
| + result.fe_fname, false, file_size,
|
| + base::Time::FromLocalExploded(result.fe_time)));
|
| + }
|
| + break;
|
| + case 'l': { // Symlink entry.
|
| + std::string filename(result.fe_fname, result.fe_fnlen);
|
| +
|
| + // Parsers for styles 'U' and 'W' handle " -> " themselves.
|
| + if (parse_state_.lstyle != 'U' && parse_state_.lstyle != 'W') {
|
| + std::string::size_type offset = filename.find(" -> ");
|
| + if (offset != std::string::npos)
|
| + filename = filename.substr(0, offset);
|
| + }
|
| +
|
| + if (StringToInt64(result.fe_size, &file_size)) {
|
| + response_buffer_.append(net::GetDirectoryListingEntry(
|
| + RawByteSequenceToFilename(filename.c_str(), encoding_),
|
| + filename, false, file_size,
|
| + base::Time::FromLocalExploded(result.fe_time)));
|
| + }
|
| + }
|
| + break;
|
| + case '?': // Junk entry.
|
| + case '"': // Comment entry.
|
| + break;
|
| + default:
|
| + NOTREACHED();
|
| + break;
|
| + }
|
| + }
|
| +
|
| + SendResponseBufferToClient();
|
| +}
|
| +
|
| +void FtpDirectoryListingResponseDelegate::OnCompletedRequest() {
|
| + SendResponseBufferToClient();
|
| +
|
| + // Only log the server type if we got enough data to reliably detect it.
|
| + if (parse_state_.parsed_one)
|
| + LogFtpServerType(parse_state_.lstyle);
|
| +}
|
| +
|
| +void FtpDirectoryListingResponseDelegate::Init() {
|
| + memset(&parse_state_, 0, sizeof(parse_state_));
|
| +
|
| + GURL response_url(original_response_.url());
|
| + UnescapeRule::Type unescape_rules = UnescapeRule::SPACES |
|
| + UnescapeRule::URL_SPECIAL_CHARS;
|
| + std::string unescaped_path = UnescapeURLComponent(response_url.path(),
|
| + unescape_rules);
|
| + string16 path_utf16;
|
| + // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII,
|
| + // but many old FTP servers use legacy encodings. Try UTF-8 first and
|
| + // detect the encoding.
|
| + if (IsStringUTF8(unescaped_path)) {
|
| + path_utf16 = UTF8ToUTF16(unescaped_path);
|
| + } else {
|
| + std::string encoding = DetectEncoding(unescaped_path);
|
| + // Try the detected encoding. If it fails, resort to the
|
| + // OS native encoding.
|
| + if (encoding.empty() ||
|
| + !CodepageToUTF16(unescaped_path, encoding.c_str(),
|
| + OnStringUtilConversionError::SUBSTITUTE,
|
| + &path_utf16))
|
| + path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path));
|
| + }
|
| +
|
| + response_buffer_ = net::GetDirectoryListingHeader(path_utf16);
|
| +
|
| + // If this isn't top level directory (i.e. the path isn't "/",)
|
| + // add a link to the parent directory.
|
| + if (response_url.path().length() > 1) {
|
| + response_buffer_.append(
|
| + net::GetDirectoryListingEntry(ASCIIToUTF16(".."),
|
| + std::string(),
|
| + false, 0,
|
| + base::Time()));
|
| + }
|
| +}
|
| +
|
| +void FtpDirectoryListingResponseDelegate::SendResponseBufferToClient() {
|
| + if (!response_buffer_.empty()) {
|
| + client_->didReceiveData(loader_, response_buffer_.data(),
|
| + response_buffer_.length(), -1);
|
| + response_buffer_.clear();
|
| + }
|
| +}
|
| +
|
| +} // namespace webkit_glue
|
|
|