Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(373)

Side by Side Diff: webkit/glue/ftp_directory_listing_response_delegate.cc

Issue 210027: Move FTP LIST parsing code to the renderer process. (Closed)
Patch Set: fixes Created 11 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "webkit/glue/ftp_directory_listing_response_delegate.h"
6
7 #include <vector>
8
9 #include "base/logging.h"
10 #include "base/string_util.h"
11 #include "base/sys_string_conversions.h"
12 #include "base/time.h"
13 #include "net/base/escape.h"
14 #include "net/base/net_util.h"
15 #include "net/ftp/ftp_server_type_histograms.h"
16 #include "unicode/ucsdet.h"
17 #include "webkit/api/public/WebURL.h"
18 #include "webkit/api/public/WebURLLoaderClient.h"
19
20 using WebKit::WebURLLoader;
21 using WebKit::WebURLLoaderClient;
22 using WebKit::WebURLResponse;
23
24 namespace {
25
26 // A very simple-minded character encoding detection.
27 // TODO(jungshik): We can apply more heuristics here (e.g. using various hints
28 // like TLD, the UI language/default encoding of a client, etc). In that case,
29 // this should be pulled out of here and moved somewhere in base because there
30 // can be other use cases.
31 std::string DetectEncoding(const std::string& text) {
32 if (IsStringASCII(text))
33 return std::string();
34 UErrorCode status = U_ZERO_ERROR;
35 UCharsetDetector* detector = ucsdet_open(&status);
36 ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()),
37 &status);
38 const UCharsetMatch* match = ucsdet_detect(detector, &status);
39 const char* encoding = ucsdet_getName(match, &status);
40 // Should we check the quality of the match? A rather arbitrary number is
41 // assigned by ICU and it's hard to come up with a lower limit.
42 if (U_FAILURE(status))
43 return std::string();
44 return encoding;
45 }
46
47 string16 RawByteSequenceToFilename(const char* raw_filename,
48 const std::string& encoding) {
49 if (encoding.empty())
50 return ASCIIToUTF16(raw_filename);
51
52 // Try the detected encoding before falling back to the native codepage.
53 // Using the native codepage does not make much sense, but we don't have
54 // much else to resort to.
55 string16 filename;
56 if (!CodepageToUTF16(raw_filename, encoding.c_str(),
57 OnStringUtilConversionError::SUBSTITUTE, &filename))
58 filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename));
59 return filename;
60 }
61
62 void ExtractFullLinesFromBuffer(std::string* buffer,
63 std::vector<std::string>* lines) {
64 int cut_pos = 0;
65 for (size_t i = 0; i < buffer->length(); i++) {
66 if (i >= 1 && (*buffer)[i - 1] == '\r' && (*buffer)[i] == '\n') {
67 lines->push_back(buffer->substr(cut_pos, i - cut_pos - 1));
68 cut_pos = i + 1;
69 }
70 }
71 buffer->erase(0, cut_pos);
72 }
73
74 void LogFtpServerType(char server_type) {
75 switch (server_type) {
76 case 'E':
77 net::UpdateFtpServerTypeHistograms(net::SERVER_EPLF);
78 break;
79 case 'V':
80 net::UpdateFtpServerTypeHistograms(net::SERVER_VMS);
81 break;
82 case 'C':
83 net::UpdateFtpServerTypeHistograms(net::SERVER_CMS);
84 break;
85 case 'W':
86 net::UpdateFtpServerTypeHistograms(net::SERVER_DOS);
87 break;
88 case 'O':
89 net::UpdateFtpServerTypeHistograms(net::SERVER_OS2);
90 break;
91 case 'U':
92 net::UpdateFtpServerTypeHistograms(net::SERVER_LSL);
93 break;
94 case 'w':
95 net::UpdateFtpServerTypeHistograms(net::SERVER_W16);
96 break;
97 case 'D':
98 net::UpdateFtpServerTypeHistograms(net::SERVER_DLS);
99 break;
100 default:
101 net::UpdateFtpServerTypeHistograms(net::SERVER_UNKNOWN);
102 break;
103 }
104 }
105
106 } // namespace
107
108 namespace webkit_glue {
109
110 FtpDirectoryListingResponseDelegate::FtpDirectoryListingResponseDelegate(
111 WebURLLoaderClient* client,
112 WebURLLoader* loader,
113 const WebURLResponse& response)
114 : client_(client),
115 loader_(loader),
116 original_response_(response) {
117 Init();
118 }
119
120 void FtpDirectoryListingResponseDelegate::OnReceivedData(const char* data,
121 int data_len) {
122 input_buffer_.append(data, data_len);
123
124 // If all we've seen so far is ASCII, encoding_ is empty. Try to detect the
125 // encoding. We don't do the separate UTF-8 check here because the encoding
126 // detection with a longer chunk (as opposed to the relatively short path
127 // component of the url) is unlikely to mistake UTF-8 for a legacy encoding.
128 // If it turns out to be wrong, a separate UTF-8 check has to be added.
129 //
130 // TODO(jungshik): UTF-8 has to be 'enforced' without any heuristics when
131 // we're talking to an FTP server compliant to RFC 2640 (that is, its response
132 // to FEAT command includes 'UTF8').
133 // See http://wiki.filezilla-project.org/Character_Set
134 if (encoding_.empty())
135 encoding_ = DetectEncoding(input_buffer_);
136
137 std::vector<std::string> lines;
138 ExtractFullLinesFromBuffer(&input_buffer_, &lines);
139
140 for (std::vector<std::string>::const_iterator line = lines.begin();
141 line != lines.end(); ++line) {
142 struct net::list_result result;
143 int line_type = net::ParseFTPList(line->c_str(), &parse_state_, &result);
144
145 // The original code assumed months are in range 0-11 (PRExplodedTime),
146 // but our Time class expects a 1-12 range. Adjust it here, because
147 // the third-party parsing code uses bit-shifting on the month,
148 // and it'd be too easy to break that logic.
149 result.fe_time.month++;
150 DCHECK_LE(1, result.fe_time.month);
151 DCHECK_GE(12, result.fe_time.month);
152
153 int64 file_size;
154 switch (line_type) {
155 case 'd': // Directory entry.
156 response_buffer_.append(net::GetDirectoryListingEntry(
157 RawByteSequenceToFilename(result.fe_fname, encoding_),
158 result.fe_fname, true, 0,
159 base::Time::FromLocalExploded(result.fe_time)));
160 break;
161 case 'f': // File entry.
162 if (StringToInt64(result.fe_size, &file_size)) {
163 response_buffer_.append(net::GetDirectoryListingEntry(
164 RawByteSequenceToFilename(result.fe_fname, encoding_),
165 result.fe_fname, false, file_size,
166 base::Time::FromLocalExploded(result.fe_time)));
167 }
168 break;
169 case 'l': { // Symlink entry.
170 std::string filename(result.fe_fname, result.fe_fnlen);
171
172 // Parsers for styles 'U' and 'W' handle " -> " themselves.
173 if (parse_state_.lstyle != 'U' && parse_state_.lstyle != 'W') {
174 std::string::size_type offset = filename.find(" -> ");
175 if (offset != std::string::npos)
176 filename = filename.substr(0, offset);
177 }
178
179 if (StringToInt64(result.fe_size, &file_size)) {
180 response_buffer_.append(net::GetDirectoryListingEntry(
181 RawByteSequenceToFilename(filename.c_str(), encoding_),
182 filename, false, file_size,
183 base::Time::FromLocalExploded(result.fe_time)));
184 }
185 }
186 break;
187 case '?': // Junk entry.
188 case '"': // Comment entry.
189 break;
190 default:
191 NOTREACHED();
192 break;
193 }
194 }
195
196 SendResponseBufferToClient();
197 }
198
199 void FtpDirectoryListingResponseDelegate::OnCompletedRequest() {
200 SendResponseBufferToClient();
201
202 // Only log the server type if we got enough data to reliably detect it.
203 if (parse_state_.parsed_one)
204 LogFtpServerType(parse_state_.lstyle);
205 }
206
207 void FtpDirectoryListingResponseDelegate::Init() {
208 memset(&parse_state_, 0, sizeof(parse_state_));
209
210 GURL response_url(original_response_.url());
211 UnescapeRule::Type unescape_rules = UnescapeRule::SPACES |
212 UnescapeRule::URL_SPECIAL_CHARS;
213 std::string unescaped_path = UnescapeURLComponent(response_url.path(),
214 unescape_rules);
215 string16 path_utf16;
216 // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII,
217 // but many old FTP servers use legacy encodings. Try UTF-8 first and
218 // detect the encoding.
219 if (IsStringUTF8(unescaped_path)) {
220 path_utf16 = UTF8ToUTF16(unescaped_path);
221 } else {
222 std::string encoding = DetectEncoding(unescaped_path);
223 // Try the detected encoding. If it fails, resort to the
224 // OS native encoding.
225 if (encoding.empty() ||
226 !CodepageToUTF16(unescaped_path, encoding.c_str(),
227 OnStringUtilConversionError::SUBSTITUTE,
228 &path_utf16))
229 path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path));
230 }
231
232 response_buffer_ = net::GetDirectoryListingHeader(path_utf16);
233
234 // If this isn't top level directory (i.e. the path isn't "/",)
235 // add a link to the parent directory.
236 if (response_url.path().length() > 1) {
237 response_buffer_.append(
238 net::GetDirectoryListingEntry(ASCIIToUTF16(".."),
239 std::string(),
240 false, 0,
241 base::Time()));
242 }
243 }
244
245 void FtpDirectoryListingResponseDelegate::SendResponseBufferToClient() {
246 if (!response_buffer_.empty()) {
247 client_->didReceiveData(loader_, response_buffer_.data(),
248 response_buffer_.length(), -1);
249 response_buffer_.clear();
250 }
251 }
252
253 } // namespace webkit_glue
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698