OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "webkit/glue/ftp_directory_listing_response_delegate.h" |
| 6 |
| 7 #include <vector> |
| 8 |
| 9 #include "base/logging.h" |
| 10 #include "base/string_util.h" |
| 11 #include "base/sys_string_conversions.h" |
| 12 #include "base/time.h" |
| 13 #include "net/base/escape.h" |
| 14 #include "net/base/net_util.h" |
| 15 #include "net/ftp/ftp_server_type_histograms.h" |
| 16 #include "unicode/ucsdet.h" |
| 17 #include "webkit/api/public/WebURL.h" |
| 18 #include "webkit/api/public/WebURLLoaderClient.h" |
| 19 |
| 20 using WebKit::WebURLLoader; |
| 21 using WebKit::WebURLLoaderClient; |
| 22 using WebKit::WebURLResponse; |
| 23 |
| 24 namespace { |
| 25 |
| 26 // A very simple-minded character encoding detection. |
| 27 // TODO(jungshik): We can apply more heuristics here (e.g. using various hints |
| 28 // like TLD, the UI language/default encoding of a client, etc). In that case, |
| 29 // this should be pulled out of here and moved somewhere in base because there |
| 30 // can be other use cases. |
| 31 std::string DetectEncoding(const std::string& text) { |
| 32 if (IsStringASCII(text)) |
| 33 return std::string(); |
| 34 UErrorCode status = U_ZERO_ERROR; |
| 35 UCharsetDetector* detector = ucsdet_open(&status); |
| 36 ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), |
| 37 &status); |
| 38 const UCharsetMatch* match = ucsdet_detect(detector, &status); |
| 39 const char* encoding = ucsdet_getName(match, &status); |
| 40 // Should we check the quality of the match? A rather arbitrary number is |
| 41 // assigned by ICU and it's hard to come up with a lower limit. |
| 42 if (U_FAILURE(status)) |
| 43 return std::string(); |
| 44 return encoding; |
| 45 } |
| 46 |
| 47 string16 RawByteSequenceToFilename(const char* raw_filename, |
| 48 const std::string& encoding) { |
| 49 if (encoding.empty()) |
| 50 return ASCIIToUTF16(raw_filename); |
| 51 |
| 52 // Try the detected encoding before falling back to the native codepage. |
| 53 // Using the native codepage does not make much sense, but we don't have |
| 54 // much else to resort to. |
| 55 string16 filename; |
| 56 if (!CodepageToUTF16(raw_filename, encoding.c_str(), |
| 57 OnStringUtilConversionError::SUBSTITUTE, &filename)) |
| 58 filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename)); |
| 59 return filename; |
| 60 } |
| 61 |
| 62 void ExtractFullLinesFromBuffer(std::string* buffer, |
| 63 std::vector<std::string>* lines) { |
| 64 int cut_pos = 0; |
| 65 for (size_t i = 0; i < buffer->length(); i++) { |
| 66 if (i >= 1 && (*buffer)[i - 1] == '\r' && (*buffer)[i] == '\n') { |
| 67 lines->push_back(buffer->substr(cut_pos, i - cut_pos - 1)); |
| 68 cut_pos = i + 1; |
| 69 } |
| 70 } |
| 71 buffer->erase(0, cut_pos); |
| 72 } |
| 73 |
| 74 void LogFtpServerType(char server_type) { |
| 75 switch (server_type) { |
| 76 case 'E': |
| 77 net::UpdateFtpServerTypeHistograms(net::SERVER_EPLF); |
| 78 break; |
| 79 case 'V': |
| 80 net::UpdateFtpServerTypeHistograms(net::SERVER_VMS); |
| 81 break; |
| 82 case 'C': |
| 83 net::UpdateFtpServerTypeHistograms(net::SERVER_CMS); |
| 84 break; |
| 85 case 'W': |
| 86 net::UpdateFtpServerTypeHistograms(net::SERVER_DOS); |
| 87 break; |
| 88 case 'O': |
| 89 net::UpdateFtpServerTypeHistograms(net::SERVER_OS2); |
| 90 break; |
| 91 case 'U': |
| 92 net::UpdateFtpServerTypeHistograms(net::SERVER_LSL); |
| 93 break; |
| 94 case 'w': |
| 95 net::UpdateFtpServerTypeHistograms(net::SERVER_W16); |
| 96 break; |
| 97 case 'D': |
| 98 net::UpdateFtpServerTypeHistograms(net::SERVER_DLS); |
| 99 break; |
| 100 default: |
| 101 net::UpdateFtpServerTypeHistograms(net::SERVER_UNKNOWN); |
| 102 break; |
| 103 } |
| 104 } |
| 105 |
| 106 } // namespace |
| 107 |
| 108 namespace webkit_glue { |
| 109 |
| 110 FtpDirectoryListingResponseDelegate::FtpDirectoryListingResponseDelegate( |
| 111 WebURLLoaderClient* client, |
| 112 WebURLLoader* loader, |
| 113 const WebURLResponse& response) |
| 114 : client_(client), |
| 115 loader_(loader), |
| 116 original_response_(response) { |
| 117 Init(); |
| 118 } |
| 119 |
| 120 void FtpDirectoryListingResponseDelegate::OnReceivedData(const char* data, |
| 121 int data_len) { |
| 122 input_buffer_.append(data, data_len); |
| 123 |
| 124 // If all we've seen so far is ASCII, encoding_ is empty. Try to detect the |
| 125 // encoding. We don't do the separate UTF-8 check here because the encoding |
| 126 // detection with a longer chunk (as opposed to the relatively short path |
| 127 // component of the url) is unlikely to mistake UTF-8 for a legacy encoding. |
| 128 // If it turns out to be wrong, a separate UTF-8 check has to be added. |
| 129 // |
| 130 // TODO(jungshik): UTF-8 has to be 'enforced' without any heuristics when |
| 131 // we're talking to an FTP server compliant to RFC 2640 (that is, its response |
| 132 // to FEAT command includes 'UTF8'). |
| 133 // See http://wiki.filezilla-project.org/Character_Set |
| 134 if (encoding_.empty()) |
| 135 encoding_ = DetectEncoding(input_buffer_); |
| 136 |
| 137 std::vector<std::string> lines; |
| 138 ExtractFullLinesFromBuffer(&input_buffer_, &lines); |
| 139 |
| 140 for (std::vector<std::string>::const_iterator line = lines.begin(); |
| 141 line != lines.end(); ++line) { |
| 142 struct net::list_result result; |
| 143 int line_type = net::ParseFTPList(line->c_str(), &parse_state_, &result); |
| 144 |
| 145 // The original code assumed months are in range 0-11 (PRExplodedTime), |
| 146 // but our Time class expects a 1-12 range. Adjust it here, because |
| 147 // the third-party parsing code uses bit-shifting on the month, |
| 148 // and it'd be too easy to break that logic. |
| 149 result.fe_time.month++; |
| 150 DCHECK_LE(1, result.fe_time.month); |
| 151 DCHECK_GE(12, result.fe_time.month); |
| 152 |
| 153 int64 file_size; |
| 154 switch (line_type) { |
| 155 case 'd': // Directory entry. |
| 156 response_buffer_.append(net::GetDirectoryListingEntry( |
| 157 RawByteSequenceToFilename(result.fe_fname, encoding_), |
| 158 result.fe_fname, true, 0, |
| 159 base::Time::FromLocalExploded(result.fe_time))); |
| 160 break; |
| 161 case 'f': // File entry. |
| 162 if (StringToInt64(result.fe_size, &file_size)) { |
| 163 response_buffer_.append(net::GetDirectoryListingEntry( |
| 164 RawByteSequenceToFilename(result.fe_fname, encoding_), |
| 165 result.fe_fname, false, file_size, |
| 166 base::Time::FromLocalExploded(result.fe_time))); |
| 167 } |
| 168 break; |
| 169 case 'l': { // Symlink entry. |
| 170 std::string filename(result.fe_fname, result.fe_fnlen); |
| 171 |
| 172 // Parsers for styles 'U' and 'W' handle " -> " themselves. |
| 173 if (parse_state_.lstyle != 'U' && parse_state_.lstyle != 'W') { |
| 174 std::string::size_type offset = filename.find(" -> "); |
| 175 if (offset != std::string::npos) |
| 176 filename = filename.substr(0, offset); |
| 177 } |
| 178 |
| 179 if (StringToInt64(result.fe_size, &file_size)) { |
| 180 response_buffer_.append(net::GetDirectoryListingEntry( |
| 181 RawByteSequenceToFilename(filename.c_str(), encoding_), |
| 182 filename, false, file_size, |
| 183 base::Time::FromLocalExploded(result.fe_time))); |
| 184 } |
| 185 } |
| 186 break; |
| 187 case '?': // Junk entry. |
| 188 case '"': // Comment entry. |
| 189 break; |
| 190 default: |
| 191 NOTREACHED(); |
| 192 break; |
| 193 } |
| 194 } |
| 195 |
| 196 SendResponseBufferToClient(); |
| 197 } |
| 198 |
| 199 void FtpDirectoryListingResponseDelegate::OnCompletedRequest() { |
| 200 SendResponseBufferToClient(); |
| 201 |
| 202 // Only log the server type if we got enough data to reliably detect it. |
| 203 if (parse_state_.parsed_one) |
| 204 LogFtpServerType(parse_state_.lstyle); |
| 205 } |
| 206 |
| 207 void FtpDirectoryListingResponseDelegate::Init() { |
| 208 memset(&parse_state_, 0, sizeof(parse_state_)); |
| 209 |
| 210 GURL response_url(original_response_.url()); |
| 211 UnescapeRule::Type unescape_rules = UnescapeRule::SPACES | |
| 212 UnescapeRule::URL_SPECIAL_CHARS; |
| 213 std::string unescaped_path = UnescapeURLComponent(response_url.path(), |
| 214 unescape_rules); |
| 215 string16 path_utf16; |
| 216 // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII, |
| 217 // but many old FTP servers use legacy encodings. Try UTF-8 first and |
| 218 // detect the encoding. |
| 219 if (IsStringUTF8(unescaped_path)) { |
| 220 path_utf16 = UTF8ToUTF16(unescaped_path); |
| 221 } else { |
| 222 std::string encoding = DetectEncoding(unescaped_path); |
| 223 // Try the detected encoding. If it fails, resort to the |
| 224 // OS native encoding. |
| 225 if (encoding.empty() || |
| 226 !CodepageToUTF16(unescaped_path, encoding.c_str(), |
| 227 OnStringUtilConversionError::SUBSTITUTE, |
| 228 &path_utf16)) |
| 229 path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path)); |
| 230 } |
| 231 |
| 232 response_buffer_ = net::GetDirectoryListingHeader(path_utf16); |
| 233 |
| 234 // If this isn't top level directory (i.e. the path isn't "/",) |
| 235 // add a link to the parent directory. |
| 236 if (response_url.path().length() > 1) { |
| 237 response_buffer_.append( |
| 238 net::GetDirectoryListingEntry(ASCIIToUTF16(".."), |
| 239 std::string(), |
| 240 false, 0, |
| 241 base::Time())); |
| 242 } |
| 243 } |
| 244 |
| 245 void FtpDirectoryListingResponseDelegate::SendResponseBufferToClient() { |
| 246 if (!response_buffer_.empty()) { |
| 247 client_->didReceiveData(loader_, response_buffer_.data(), |
| 248 response_buffer_.length(), -1); |
| 249 response_buffer_.clear(); |
| 250 } |
| 251 } |
| 252 |
| 253 } // namespace webkit_glue |
OLD | NEW |