Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Side by Side Diff: net/ftp/ftp_directory_listing_parser_vms.cc

Issue 6670085: FTP: Detect the character encoding only after the entire listing is received. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: update histograms for SERVER_UNKNOWN Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/ftp/ftp_directory_listing_parser_vms.h" 5 #include "net/ftp/ftp_directory_listing_parser_vms.h"
6 6
7 #include <vector> 7 #include <vector>
8 8
9 #include "base/string_number_conversions.h" 9 #include "base/string_number_conversions.h"
10 #include "base/string_split.h" 10 #include "base/string_split.h"
11 #include "base/string_util.h" 11 #include "base/string_util.h"
12 #include "base/time.h"
12 #include "base/utf_string_conversions.h" 13 #include "base/utf_string_conversions.h"
14 #include "net/ftp/ftp_directory_listing_parser.h"
13 #include "net/ftp/ftp_util.h" 15 #include "net/ftp/ftp_util.h"
14 16
15 namespace { 17 namespace {
16 18
17 // Converts the filename component in listing to the filename we can display. 19 // Converts the filename component in listing to the filename we can display.
18 // Returns true on success. 20 // Returns true on success.
19 bool ParseVmsFilename(const string16& raw_filename, string16* parsed_filename, 21 bool ParseVmsFilename(const string16& raw_filename, string16* parsed_filename,
20 bool* is_directory) { 22 net::FtpDirectoryListingEntry::Type* type) {
21 // On VMS, the files and directories are versioned. The version number is 23 // On VMS, the files and directories are versioned. The version number is
22 // separated from the file name by a semicolon. Example: ANNOUNCE.TXT;2. 24 // separated from the file name by a semicolon. Example: ANNOUNCE.TXT;2.
23 std::vector<string16> listing_parts; 25 std::vector<string16> listing_parts;
24 base::SplitString(raw_filename, ';', &listing_parts); 26 base::SplitString(raw_filename, ';', &listing_parts);
25 if (listing_parts.size() != 2) 27 if (listing_parts.size() != 2)
26 return false; 28 return false;
27 int version_number; 29 int version_number;
28 if (!base::StringToInt(listing_parts[1], &version_number)) 30 if (!base::StringToInt(listing_parts[1], &version_number))
29 return false; 31 return false;
30 if (version_number < 0) 32 if (version_number < 0)
31 return false; 33 return false;
32 34
33 // Even directories have extensions in the listings. Don't display extensions 35 // Even directories have extensions in the listings. Don't display extensions
34 // for directories; it's awkward for non-VMS users. Also, VMS is 36 // for directories; it's awkward for non-VMS users. Also, VMS is
35 // case-insensitive, but generally uses uppercase characters. This may look 37 // case-insensitive, but generally uses uppercase characters. This may look
36 // awkward, so we convert them to lower case. 38 // awkward, so we convert them to lower case.
37 std::vector<string16> filename_parts; 39 std::vector<string16> filename_parts;
38 base::SplitString(listing_parts[0], '.', &filename_parts); 40 base::SplitString(listing_parts[0], '.', &filename_parts);
39 if (filename_parts.size() != 2) 41 if (filename_parts.size() != 2)
40 return false; 42 return false;
41 if (EqualsASCII(filename_parts[1], "DIR")) { 43 if (EqualsASCII(filename_parts[1], "DIR")) {
42 *parsed_filename = StringToLowerASCII(filename_parts[0]); 44 *parsed_filename = StringToLowerASCII(filename_parts[0]);
43 *is_directory = true; 45 *type = net::FtpDirectoryListingEntry::DIRECTORY;
44 } else { 46 } else {
45 *parsed_filename = StringToLowerASCII(listing_parts[0]); 47 *parsed_filename = StringToLowerASCII(listing_parts[0]);
46 *is_directory = false; 48 *type = net::FtpDirectoryListingEntry::FILE;
47 } 49 }
48 return true; 50 return true;
49 } 51 }
50 52
51 bool ParseVmsFilesize(const string16& input, int64* size) { 53 bool ParseVmsFilesize(const string16& input, int64* size) {
52 // VMS's directory listing gives us file size in blocks. We assume that 54 // VMS's directory listing gives us file size in blocks. We assume that
53 // the block size is 512 bytes. It doesn't give accurate file size, but is the 55 // the block size is 512 bytes. It doesn't give accurate file size, but is the
54 // best information we have. 56 // best information we have.
55 const int kBlockSize = 512; 57 const int kBlockSize = 512;
56 58
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
110 LooksLikeVmsFileProtectionListingPart(parts[2]) && 112 LooksLikeVmsFileProtectionListingPart(parts[2]) &&
111 LooksLikeVmsFileProtectionListingPart(parts[3]); 113 LooksLikeVmsFileProtectionListingPart(parts[3]);
112 } 114 }
113 115
114 bool LooksLikeVmsUserIdentificationCode(const string16& input) { 116 bool LooksLikeVmsUserIdentificationCode(const string16& input) {
115 if (input.length() < 2) 117 if (input.length() < 2)
116 return false; 118 return false;
117 return input[0] == '[' && input[input.length() - 1] == ']'; 119 return input[0] == '[' && input[input.length() - 1] == ']';
118 } 120 }
119 121
122 bool LooksLikePermissionDeniedError(const string16& text) {
123 static const char* kPermissionDeniedMessages[] = {
124 "%RMS-E-PRV",
125 "privilege",
126 };
127
128 for (size_t i = 0; i < arraysize(kPermissionDeniedMessages); i++) {
129 if (text.find(ASCIIToUTF16(kPermissionDeniedMessages[i])) != string16::npos)
130 return true;
131 }
132
133 return false;
134 }
135
120 bool VmsDateListingToTime(const std::vector<string16>& columns, 136 bool VmsDateListingToTime(const std::vector<string16>& columns,
121 base::Time* time) { 137 base::Time* time) {
122 DCHECK_EQ(3U, columns.size()); 138 DCHECK_EQ(4U, columns.size());
123 139
124 base::Time::Exploded time_exploded = { 0 }; 140 base::Time::Exploded time_exploded = { 0 };
125 141
126 // Date should be in format DD-MMM-YYYY. 142 // Date should be in format DD-MMM-YYYY.
127 std::vector<string16> date_parts; 143 std::vector<string16> date_parts;
128 base::SplitString(columns[1], '-', &date_parts); 144 base::SplitString(columns[2], '-', &date_parts);
129 if (date_parts.size() != 3) 145 if (date_parts.size() != 3)
130 return false; 146 return false;
131 if (!base::StringToInt(date_parts[0], &time_exploded.day_of_month)) 147 if (!base::StringToInt(date_parts[0], &time_exploded.day_of_month))
132 return false; 148 return false;
133 if (!net::FtpUtil::ThreeLetterMonthToNumber(date_parts[1], 149 if (!net::FtpUtil::ThreeLetterMonthToNumber(date_parts[1],
134 &time_exploded.month)) 150 &time_exploded.month))
135 return false; 151 return false;
136 if (!base::StringToInt(date_parts[2], &time_exploded.year)) 152 if (!base::StringToInt(date_parts[2], &time_exploded.year))
137 return false; 153 return false;
138 154
139 // Time can be in format HH:MM, HH:MM:SS, or HH:MM:SS.mm. Try to recognize the 155 // Time can be in format HH:MM, HH:MM:SS, or HH:MM:SS.mm. Try to recognize the
140 // last type first. Do not parse the seconds, they will be ignored anyway. 156 // last type first. Do not parse the seconds, they will be ignored anyway.
141 string16 time_column(columns[2]); 157 string16 time_column(columns[3]);
142 if (time_column.length() == 11 && time_column[8] == '.') 158 if (time_column.length() == 11 && time_column[8] == '.')
143 time_column = time_column.substr(0, 8); 159 time_column = time_column.substr(0, 8);
144 if (time_column.length() == 8 && time_column[5] == ':') 160 if (time_column.length() == 8 && time_column[5] == ':')
145 time_column = time_column.substr(0, 5); 161 time_column = time_column.substr(0, 5);
146 if (time_column.length() != 5) 162 if (time_column.length() != 5)
147 return false; 163 return false;
148 std::vector<string16> time_parts; 164 std::vector<string16> time_parts;
149 base::SplitString(time_column, ':', &time_parts); 165 base::SplitString(time_column, ':', &time_parts);
150 if (time_parts.size() != 2) 166 if (time_parts.size() != 2)
151 return false; 167 return false;
152 if (!base::StringToInt(time_parts[0], &time_exploded.hour)) 168 if (!base::StringToInt(time_parts[0], &time_exploded.hour))
153 return false; 169 return false;
154 if (!base::StringToInt(time_parts[1], &time_exploded.minute)) 170 if (!base::StringToInt(time_parts[1], &time_exploded.minute))
155 return false; 171 return false;
156 172
157 // We don't know the time zone of the server, so just use local time. 173 // We don't know the time zone of the server, so just use local time.
158 *time = base::Time::FromLocalExploded(time_exploded); 174 *time = base::Time::FromLocalExploded(time_exploded);
159 return true; 175 return true;
160 } 176 }
161 177
162 } // namespace 178 } // namespace
163 179
164 namespace net { 180 namespace net {
eroman 2011/03/24 23:09:35 nit: i suggest moving this up, so the anonymous na
Paweł Hajdan Jr. 2011/03/26 09:47:50 Done.
165 181
166 FtpDirectoryListingParserVms::FtpDirectoryListingParserVms() 182 bool ParseFtpDirectoryListingVms(
167 : state_(STATE_INITIAL), 183 const std::vector<string16>& lines,
168 last_is_directory_(false) { 184 std::vector<FtpDirectoryListingEntry>* entries) {
169 } 185 // The first non-empty line is the listing header. It often
186 // starts with "Directory ", but not always. We set a flag after
187 // seing the header.
188 bool seen_header = false;
170 189
171 FtpDirectoryListingParserVms::~FtpDirectoryListingParserVms() {} 190 for (size_t i = 0; i < lines.size(); i++) {
191 if (lines[i].empty())
192 continue;
172 193
173 FtpServerType FtpDirectoryListingParserVms::GetServerType() const { 194 if (StartsWith(lines[i], ASCIIToUTF16("Total of "), true)) {
174 return SERVER_VMS; 195 // After the "total" line, all following lines must be empty.
175 } 196 for (size_t j = i + 1; j < lines.size(); j++)
197 if (!lines[j].empty())
198 return false;
176 199
177 bool FtpDirectoryListingParserVms::ConsumeLine(const string16& line) {
178 switch (state_) {
179 case STATE_INITIAL:
180 DCHECK(last_filename_.empty());
181 if (line.empty())
182 return true;
183 if (StartsWith(line, ASCIIToUTF16("Total of "), true)) {
184 state_ = STATE_END;
185 return true;
186 }
187 // We assume that the first non-empty line is the listing header. It often
188 // starts with "Directory ", but not always.
189 state_ = STATE_RECEIVED_HEADER;
190 return true; 200 return true;
191 case STATE_RECEIVED_HEADER: 201 }
192 DCHECK(last_filename_.empty()); 202
193 if (line.empty()) 203 if (!seen_header) {
194 return true; 204 seen_header = true;
195 state_ = STATE_ENTRIES; 205 continue;
196 return ConsumeEntryLine(line); 206 }
197 case STATE_ENTRIES: 207
198 if (line.empty()) { 208 if (LooksLikePermissionDeniedError(lines[i]))
199 if (!last_filename_.empty()) 209 continue;
200 return false; 210
201 state_ = STATE_RECEIVED_LAST_ENTRY; 211 std::vector<string16> columns;
202 return true; 212 base::SplitString(CollapseWhitespace(lines[i], false), ' ', &columns);
203 } 213
204 return ConsumeEntryLine(line); 214 if (columns.size() == 1) {
205 case STATE_RECEIVED_LAST_ENTRY: 215 // There can be no continuation if the current line is the last one.
206 DCHECK(last_filename_.empty()); 216 if (i == lines.size() - 1)
207 if (line.empty())
208 return true;
209 if (!StartsWith(line, ASCIIToUTF16("Total of "), true))
210 return false; 217 return false;
211 state_ = STATE_END; 218
212 return true; 219 // Join the current and next line and split them into columns.
213 case STATE_END: 220 columns.clear();
214 DCHECK(last_filename_.empty()); 221 base::SplitString(
222 CollapseWhitespace(lines[i] + ASCIIToUTF16(" ") + lines[i + 1],
223 false),
224 ' ',
225 &columns);
226 i++;
227 }
228
229 FtpDirectoryListingEntry entry;
230 if (!ParseVmsFilename(columns[0], &entry.name, &entry.type))
215 return false; 231 return false;
216 default: 232
217 NOTREACHED(); 233 // There are different variants of a VMS listing. Some display
234 // the protection listing and user identification code, some do not.
235 if (columns.size() == 6) {
236 if (!LooksLikeVmsFileProtectionListing(columns[5]))
237 return false;
238 if (!LooksLikeVmsUserIdentificationCode(columns[4]))
239 return false;
240
241 // Drop the unneeded data, so that the following code can always expect
242 // just four columns.
243 columns.resize(4);
244 }
245
246 if (columns.size() != 4)
218 return false; 247 return false;
219 }
220 }
221 248
222 bool FtpDirectoryListingParserVms::OnEndOfInput() { 249 if (!ParseVmsFilesize(columns[1], &entry.size))
223 return (state_ == STATE_END); 250 return false;
224 } 251 if (entry.size < 0)
252 return false;
253 if (entry.type != FtpDirectoryListingEntry::FILE)
254 entry.size = -1;
255 if (!VmsDateListingToTime(columns, &entry.last_modified))
256 return false;
225 257
226 bool FtpDirectoryListingParserVms::EntryAvailable() const { 258 entries->push_back(entry);
227 return !entries_.empty();
228 }
229
230 FtpDirectoryListingEntry FtpDirectoryListingParserVms::PopEntry() {
231 FtpDirectoryListingEntry entry = entries_.front();
232 entries_.pop();
233 return entry;
234 }
235
236 bool FtpDirectoryListingParserVms::ConsumeEntryLine(const string16& line) {
237 std::vector<string16> columns;
238 base::SplitString(CollapseWhitespace(line, false), ' ', &columns);
239
240 if (columns.size() == 1) {
241 if (!last_filename_.empty())
242 return false;
243 return ParseVmsFilename(columns[0], &last_filename_, &last_is_directory_);
244 } 259 }
245 260
246 // Recognize listing entries which generate "access denied" message even when 261 // The only place where we return true is after receiving the "Total" line,
247 // trying to list them. We don't display them in the final listing. 262 // that should be present in every VMS listing.
248 static const char* kAccessDeniedMessages[] = { 263 return false;
249 "%RMS-E-PRV",
250 "privilege",
251 };
252 for (size_t i = 0; i < arraysize(kAccessDeniedMessages); i++) {
253 if (line.find(ASCIIToUTF16(kAccessDeniedMessages[i])) != string16::npos) {
254 last_filename_.clear();
255 last_is_directory_ = false;
256 return true;
257 }
258 }
259
260 string16 filename;
261 bool is_directory = false;
262 if (last_filename_.empty()) {
263 if (!ParseVmsFilename(columns[0], &filename, &is_directory))
264 return false;
265 columns.erase(columns.begin());
266 } else {
267 filename = last_filename_;
268 is_directory = last_is_directory_;
269 last_filename_.clear();
270 last_is_directory_ = false;
271 }
272
273 if (columns.size() > 5)
274 return false;
275
276 if (columns.size() == 5) {
277 if (!LooksLikeVmsFileProtectionListing(columns[4]))
278 return false;
279 if (!LooksLikeVmsUserIdentificationCode(columns[3]))
280 return false;
281 columns.resize(3);
282 }
283
284 if (columns.size() != 3)
285 return false;
286
287 FtpDirectoryListingEntry entry;
288 entry.name = filename;
289 entry.type = is_directory ? FtpDirectoryListingEntry::DIRECTORY
290 : FtpDirectoryListingEntry::FILE;
291 if (!ParseVmsFilesize(columns[0], &entry.size))
292 return false;
293 if (entry.size < 0)
294 return false;
295 if (entry.type != FtpDirectoryListingEntry::FILE)
296 entry.size = -1;
297 if (!VmsDateListingToTime(columns, &entry.last_modified))
298 return false;
299
300 entries_.push(entry);
301 return true;
302 } 264 }
303 265
304 } // namespace net 266 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698