Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(111)

Side by Side Diff: net/ftp/ftp_directory_listing_parsers.cc

Issue 465035: Split FTP LIST parsing code into individual files for each listing style. (Closed)
Patch Set: fix Created 11 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this
2 // source code is governed by a BSD-style license that can be found in the
3 // LICENSE file.
4
5 #include "net/ftp/ftp_directory_listing_parsers.h"
6
7 #include <ctype.h>
8
9 #include "base/string_util.h"
10
11 namespace {
12
13 bool LooksLikeUnixPermission(const string16& text) {
14 if (text.length() != 3)
15 return false;
16
17 // Meaning of the flags:
18 // r - file is readable
19 // w - file is writable
20 // x - file is executable
21 // s or S - setuid/setgid bit set
22 // t or T - "sticky" bit set
23 return ((text[0] == 'r' || text[0] == '-') &&
24 (text[1] == 'w' || text[1] == '-') &&
25 (text[2] == 'x' || text[2] == 's' || text[2] == 'S' ||
26 text[2] == 't' || text[2] == 'T' || text[2] == '-'));
27 }
28
29 bool LooksLikeUnixPermissionsListing(const string16& text) {
30 if (text.length() != 10)
31 return false;
32
33 if (text[0] != 'b' && text[0] != 'c' && text[0] != 'd' &&
34 text[0] != 'l' && text[0] != 'p' && text[0] != 's' &&
35 text[0] != '-')
36 return false;
37
38 return (LooksLikeUnixPermission(text.substr(1, 3)) &&
39 LooksLikeUnixPermission(text.substr(4, 3)) &&
40 LooksLikeUnixPermission(text.substr(7, 3)));
41 }
42
43 bool IsStringNonNegativeInteger(const string16& text) {
44 int number;
45 if (!StringToInt(text, &number))
46 return false;
47
48 return number >= 0;
49 }
50
51 string16 GetStringPartAfterColumns(const string16& text, int columns) {
52 DCHECK_LE(1, columns);
53 int columns_so_far = 0;
54 size_t last = 0;
55 for (size_t i = 1; i < text.length(); ++i) {
56 if (!isspace(text[i - 1]) && isspace(text[i])) {
57 last = i;
58 if (++columns_so_far == columns)
59 break;
60 }
61 }
62 string16 result(text.substr(last));
63 TrimWhitespace(result, TRIM_ALL, &result);
64 return result;
65 }
66
67 bool ThreeLetterMonthToNumber(const string16& text, int* number) {
68 const static char* months[] = { "jan", "feb", "mar", "apr", "may", "jun",
69 "jul", "aug", "sep", "oct", "nov", "dec" };
70
71 for (size_t i = 0; i < arraysize(months); i++) {
72 if (LowerCaseEqualsASCII(text, months[i])) {
73 *number = i + 1;
74 return true;
75 }
76 }
77
78 // Special cases for listings in German (other three-letter month
79 // abbreviations are the same as in English). Note that we don't need to do
80 // a case-insensitive compare here. Only "ls -l" style listings may use
81 // localized month names, and they will always start capitalized. Also,
82 // converting non-ASCII characters to lowercase would be more complicated.
83 if (text == UTF8ToUTF16("M\xc3\xa4r")) {
84 // The full month name is M-(a-umlaut)-rz (March), which is M-(a-umlaut)r
85 // when abbreviated.
86 *number = 3;
87 return true;
88 }
89 if (text == ASCIIToUTF16("Mai")) {
90 *number = 5;
91 return true;
92 }
93 if (text == ASCIIToUTF16("Okt")) {
94 *number = 10;
95 return true;
96 }
97 if (text == ASCIIToUTF16("Dez")) {
98 *number = 12;
99 return true;
100 }
101
102 return false;
103 }
104
105 bool UnixDateListingToTime(const std::vector<string16>& columns,
106 base::Time* time) {
107 DCHECK_LE(9U, columns.size());
108
109 base::Time::Exploded time_exploded = { 0 };
110
111 if (!ThreeLetterMonthToNumber(columns[5], &time_exploded.month))
112 return false;
113
114 if (!StringToInt(columns[6], &time_exploded.day_of_month))
115 return false;
116
117 if (!StringToInt(columns[7], &time_exploded.year)) {
118 // Maybe it's time. Does it look like time (MM:HH)?
119 if (columns[7].length() != 5 || columns[7][2] != ':')
120 return false;
121
122 if (!StringToInt(columns[7].substr(0, 2), &time_exploded.hour))
123 return false;
124
125 if (!StringToInt(columns[7].substr(3, 2), &time_exploded.minute))
126 return false;
127
128 // Use current year.
129 base::Time::Exploded now_exploded;
130 base::Time::Now().LocalExplode(&now_exploded);
131 time_exploded.year = now_exploded.year;
132 }
133
134 // We don't know the time zone of the server, so just use local time.
135 *time = base::Time::FromLocalExploded(time_exploded);
136 return true;
137 }
138
139 bool WindowsDateListingToTime(const std::vector<string16>& columns,
140 base::Time* time) {
141 DCHECK_EQ(4U, columns.size());
142
143 base::Time::Exploded time_exploded = { 0 };
144
145 // Date should be in format MM-DD-YY[YY].
146 std::vector<string16> date_parts;
147 SplitString(columns[0], '-', &date_parts);
148 if (date_parts.size() != 3)
149 return false;
150 if (!StringToInt(date_parts[0], &time_exploded.month))
151 return false;
152 if (!StringToInt(date_parts[1], &time_exploded.day_of_month))
153 return false;
154 if (!StringToInt(date_parts[2], &time_exploded.year))
155 return false;
156 if (time_exploded.year < 0)
157 return false;
158 // If year has only two digits then assume that 00-79 is 2000-2079,
159 // and 80-99 is 1980-1999.
160 if (time_exploded.year < 80)
161 time_exploded.year += 2000;
162 else if (time_exploded.year < 100)
163 time_exploded.year += 1900;
164
165 // Time should be in format HH:MM(AM|PM)
166 if (columns[1].length() != 7)
167 return false;
168 std::vector<string16> time_parts;
169 SplitString(columns[1].substr(0, 5), ':', &time_parts);
170 if (time_parts.size() != 2)
171 return false;
172 if (!StringToInt(time_parts[0], &time_exploded.hour))
173 return false;
174 if (!StringToInt(time_parts[1], &time_exploded.minute))
175 return false;
176 string16 am_or_pm(columns[1].substr(5, 2));
177 if (EqualsASCII(am_or_pm, "PM"))
178 time_exploded.hour += 12;
179 else if (!EqualsASCII(am_or_pm, "AM"))
180 return false;
181
182 // We don't know the time zone of the server, so just use local time.
183 *time = base::Time::FromLocalExploded(time_exploded);
184 return true;
185 }
186
187 // Converts the filename component in listing to the filename we can display.
188 // Returns true on success.
189 bool ParseVmsFilename(const string16& raw_filename, string16* parsed_filename,
190 bool* is_directory) {
191 // On VMS, the files and directories are versioned. The version number is
192 // separated from the file name by a semicolon. Example: ANNOUNCE.TXT;2.
193 std::vector<string16> listing_parts;
194 SplitString(raw_filename, ';', &listing_parts);
195 if (listing_parts.size() != 2)
196 return false;
197 if (!IsStringNonNegativeInteger(listing_parts[1]))
198 return false;
199
200 // Even directories have extensions in the listings. Don't display extensions
201 // for directories; it's awkward for non-VMS users. Also, VMS is
202 // case-insensitive, but generally uses uppercase characters. This may look
203 // awkward, so we convert them to lower case.
204 std::vector<string16> filename_parts;
205 SplitString(listing_parts[0], '.', &filename_parts);
206 if (filename_parts.size() != 2)
207 return false;
208 if (EqualsASCII(filename_parts[1], "DIR")) {
209 *parsed_filename = StringToLowerASCII(filename_parts[0]);
210 *is_directory = true;
211 } else {
212 *parsed_filename = StringToLowerASCII(listing_parts[0]);
213 *is_directory = false;
214 }
215 return true;
216 }
217
218 bool ParseVmsFilesize(const string16& input, int64* size) {
219 // VMS's directory listing gives us file size in blocks. We assume that
220 // the block size is 512 bytes. It doesn't give accurate file size, but is the
221 // best information we have.
222 const int kBlockSize = 512;
223
224 if (StringToInt64(input, size)) {
225 *size *= kBlockSize;
226 return true;
227 }
228
229 std::vector<string16> parts;
230 SplitString(input, '/', &parts);
231 if (parts.size() != 2)
232 return false;
233
234 int64 blocks_used, blocks_allocated;
235 if (!StringToInt64(parts[0], &blocks_used))
236 return false;
237 if (!StringToInt64(parts[1], &blocks_allocated))
238 return false;
239 if (blocks_used > blocks_allocated)
240 return false;
241
242 *size = blocks_used * kBlockSize;
243 return true;
244 }
245
246 bool LooksLikeVmsFileProtectionListingPart(const string16& input) {
247 if (input.length() > 4)
248 return false;
249
250 // On VMS there are four different permission bits: Read, Write, Execute,
251 // and Delete. They appear in that order in the permission listing.
252 std::string pattern("RWED");
253 string16 match(input);
254 while (!match.empty() && !pattern.empty()) {
255 if (match[0] == pattern[0])
256 match = match.substr(1);
257 pattern = pattern.substr(1);
258 }
259 return match.empty();
260 }
261
262 bool LooksLikeVmsFileProtectionListing(const string16& input) {
263 if (input.length() < 2)
264 return false;
265 if (input[0] != '(' || input[input.length() - 1] != ')')
266 return false;
267
268 // We expect four parts of the file protection listing: for System, Owner,
269 // Group, and World.
270 std::vector<string16> parts;
271 SplitString(input.substr(1, input.length() - 2), ',', &parts);
272 if (parts.size() != 4)
273 return false;
274
275 return LooksLikeVmsFileProtectionListingPart(parts[0]) &&
276 LooksLikeVmsFileProtectionListingPart(parts[1]) &&
277 LooksLikeVmsFileProtectionListingPart(parts[2]) &&
278 LooksLikeVmsFileProtectionListingPart(parts[3]);
279 }
280
281 bool LooksLikeVmsUserIdentificationCode(const string16& input) {
282 if (input.length() < 2)
283 return false;
284 return input[0] == '[' && input[input.length() - 1] == ']';
285 }
286
287 bool VmsDateListingToTime(const std::vector<string16>& columns,
288 base::Time* time) {
289 DCHECK_EQ(3U, columns.size());
290
291 base::Time::Exploded time_exploded = { 0 };
292
293 // Date should be in format DD-MMM-YYYY.
294 std::vector<string16> date_parts;
295 SplitString(columns[1], '-', &date_parts);
296 if (date_parts.size() != 3)
297 return false;
298 if (!StringToInt(date_parts[0], &time_exploded.day_of_month))
299 return false;
300 if (!ThreeLetterMonthToNumber(date_parts[1], &time_exploded.month))
301 return false;
302 if (!StringToInt(date_parts[2], &time_exploded.year))
303 return false;
304
305 // Time can be in format HH:MM, HH:MM:SS, or HH:MM:SS.mm. Try to recognize the
306 // last type first. Do not parse the seconds, they will be ignored anyway.
307 string16 time_column(columns[2]);
308 if (time_column.length() == 11 && time_column[8] == '.')
309 time_column = time_column.substr(0, 8);
310 if (time_column.length() == 8 && time_column[5] == ':')
311 time_column = time_column.substr(0, 5);
312 if (time_column.length() != 5)
313 return false;
314 std::vector<string16> time_parts;
315 SplitString(time_column, ':', &time_parts);
316 if (time_parts.size() != 2)
317 return false;
318 if (!StringToInt(time_parts[0], &time_exploded.hour))
319 return false;
320 if (!StringToInt(time_parts[1], &time_exploded.minute))
321 return false;
322
323 // We don't know the time zone of the server, so just use local time.
324 *time = base::Time::FromLocalExploded(time_exploded);
325 return true;
326 }
327
328 } // namespace
329
330 namespace net {
331
332 FtpDirectoryListingParser::~FtpDirectoryListingParser() {
333 }
334
335 FtpLsDirectoryListingParser::FtpLsDirectoryListingParser()
336 : received_nonempty_line_(false) {
337 }
338
339 bool FtpLsDirectoryListingParser::ConsumeLine(const string16& line) {
340 if (StartsWith(line, ASCIIToUTF16("total "), true) ||
341 StartsWith(line, ASCIIToUTF16("Gesamt "), true)) {
342 // Some FTP servers put a "total n" line at the beginning of the listing
343 // (n is an integer). Allow such a line, but only once, and only if it's
344 // the first non-empty line.
345 //
346 // Note: "Gesamt" is a German word for "total". The case is important here:
347 // for "ls -l" style listings, "total" will be lowercase, and Gesamt will be
348 // capitalized. This helps us distinguish that from a VMS-style listing,
349 // which would use "Total" (note the uppercase first letter).
350
351 if (received_nonempty_line_)
352 return false;
353
354 received_nonempty_line_ = true;
355 return true;
356 }
357 if (line.empty() && !received_nonempty_line_) {
358 // Allow empty lines only at the beginning of the listing. For example VMS
359 // systems in Unix emulation mode add an empty line before the first listing
360 // entry.
361 return true;
362 }
363 received_nonempty_line_ = true;
364
365 std::vector<string16> columns;
366 SplitString(CollapseWhitespace(line, false), ' ', &columns);
367
368 // We may receive file names containing spaces, which can make the number of
369 // columns arbitrarily large. We will handle that later. For now just make
370 // sure we have all the columns that should normally be there.
371 if (columns.size() < 9)
372 return false;
373
374 if (!LooksLikeUnixPermissionsListing(columns[0]))
375 return false;
376
377 FtpDirectoryListingEntry entry;
378 if (columns[0][0] == 'l') {
379 entry.type = FtpDirectoryListingEntry::SYMLINK;
380 } else if (columns[0][0] == 'd') {
381 entry.type = FtpDirectoryListingEntry::DIRECTORY;
382 } else {
383 entry.type = FtpDirectoryListingEntry::FILE;
384 }
385
386 if (!IsStringNonNegativeInteger(columns[1]))
387 return false;
388
389 if (!StringToInt64(columns[4], &entry.size))
390 return false;
391 if (entry.size < 0)
392 return false;
393 if (entry.type != FtpDirectoryListingEntry::FILE)
394 entry.size = -1;
395
396 if (!UnixDateListingToTime(columns, &entry.last_modified))
397 return false;
398
399 entry.name = GetStringPartAfterColumns(line, 8);
400 if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
401 string16::size_type pos = entry.name.rfind(ASCIIToUTF16(" -> "));
402 if (pos == string16::npos)
403 return false;
404 entry.name = entry.name.substr(0, pos);
405 }
406
407 entries_.push(entry);
408 return true;
409 }
410
411 bool FtpLsDirectoryListingParser::OnEndOfInput() {
412 return true;
413 }
414
415 bool FtpLsDirectoryListingParser::EntryAvailable() const {
416 return !entries_.empty();
417 }
418
419 FtpDirectoryListingEntry FtpLsDirectoryListingParser::PopEntry() {
420 FtpDirectoryListingEntry entry = entries_.front();
421 entries_.pop();
422 return entry;
423 }
424
425 FtpWindowsDirectoryListingParser::FtpWindowsDirectoryListingParser() {
426 }
427
428 bool FtpWindowsDirectoryListingParser::ConsumeLine(const string16& line) {
429 std::vector<string16> columns;
430 SplitString(CollapseWhitespace(line, false), ' ', &columns);
431 if (columns.size() != 4)
432 return false;
433
434 FtpDirectoryListingEntry entry;
435 entry.name = columns[3];
436
437 if (EqualsASCII(columns[2], "<DIR>")) {
438 entry.type = FtpDirectoryListingEntry::DIRECTORY;
439 entry.size = -1;
440 } else {
441 entry.type = FtpDirectoryListingEntry::FILE;
442 if (!StringToInt64(columns[2], &entry.size))
443 return false;
444 if (entry.size < 0)
445 return false;
446 }
447
448 if (!WindowsDateListingToTime(columns, &entry.last_modified))
449 return false;
450
451 entries_.push(entry);
452 return true;
453 }
454
455 bool FtpWindowsDirectoryListingParser::OnEndOfInput() {
456 return true;
457 }
458
459 bool FtpWindowsDirectoryListingParser::EntryAvailable() const {
460 return !entries_.empty();
461 }
462
463 FtpDirectoryListingEntry FtpWindowsDirectoryListingParser::PopEntry() {
464 FtpDirectoryListingEntry entry = entries_.front();
465 entries_.pop();
466 return entry;
467 }
468
469 FtpVmsDirectoryListingParser::FtpVmsDirectoryListingParser()
470 : state_(STATE_INITIAL),
471 last_is_directory_(false) {
472 }
473
474 bool FtpVmsDirectoryListingParser::ConsumeLine(const string16& line) {
475 switch (state_) {
476 case STATE_INITIAL:
477 DCHECK(last_filename_.empty());
478 if (line.empty())
479 return true;
480 if (StartsWith(line, ASCIIToUTF16("Total of "), true)) {
481 state_ = STATE_END;
482 return true;
483 }
484 // We assume that the first non-empty line is the listing header. It often
485 // starts with "Directory ", but not always.
486 state_ = STATE_RECEIVED_HEADER;
487 return true;
488 case STATE_RECEIVED_HEADER:
489 DCHECK(last_filename_.empty());
490 if (line.empty())
491 return true;
492 state_ = STATE_ENTRIES;
493 return ConsumeEntryLine(line);
494 case STATE_ENTRIES:
495 if (line.empty()) {
496 if (!last_filename_.empty())
497 return false;
498 state_ = STATE_RECEIVED_LAST_ENTRY;
499 return true;
500 }
501 return ConsumeEntryLine(line);
502 case STATE_RECEIVED_LAST_ENTRY:
503 DCHECK(last_filename_.empty());
504 if (line.empty())
505 return true;
506 if (!StartsWith(line, ASCIIToUTF16("Total of "), true))
507 return false;
508 state_ = STATE_END;
509 return true;
510 case STATE_END:
511 DCHECK(last_filename_.empty());
512 return false;
513 default:
514 NOTREACHED();
515 return false;
516 }
517 }
518
519 bool FtpVmsDirectoryListingParser::OnEndOfInput() {
520 return (state_ == STATE_END);
521 }
522
523 bool FtpVmsDirectoryListingParser::EntryAvailable() const {
524 return !entries_.empty();
525 }
526
527 FtpDirectoryListingEntry FtpVmsDirectoryListingParser::PopEntry() {
528 FtpDirectoryListingEntry entry = entries_.front();
529 entries_.pop();
530 return entry;
531 }
532
533 bool FtpVmsDirectoryListingParser::ConsumeEntryLine(const string16& line) {
534 std::vector<string16> columns;
535 SplitString(CollapseWhitespace(line, false), ' ', &columns);
536
537 if (columns.size() == 1) {
538 if (!last_filename_.empty())
539 return false;
540 return ParseVmsFilename(columns[0], &last_filename_, &last_is_directory_);
541 }
542
543 // Recognize listing entries which generate "access denied" message even when
544 // trying to list them. We don't display them in the final listing.
545 static const char* kAccessDeniedMessages[] = {
546 "%RMS-E-PRV",
547 "privilege",
548 };
549 for (size_t i = 0; i < arraysize(kAccessDeniedMessages); i++) {
550 if (line.find(ASCIIToUTF16(kAccessDeniedMessages[i])) != string16::npos) {
551 last_filename_.clear();
552 last_is_directory_ = false;
553 return true;
554 }
555 }
556
557 string16 filename;
558 bool is_directory = false;
559 if (last_filename_.empty()) {
560 if (!ParseVmsFilename(columns[0], &filename, &is_directory))
561 return false;
562 columns.erase(columns.begin());
563 } else {
564 filename = last_filename_;
565 is_directory = last_is_directory_;
566 last_filename_.clear();
567 last_is_directory_ = false;
568 }
569
570 if (columns.size() > 5)
571 return false;
572
573 if (columns.size() == 5) {
574 if (!LooksLikeVmsFileProtectionListing(columns[4]))
575 return false;
576 if (!LooksLikeVmsUserIdentificationCode(columns[3]))
577 return false;
578 columns.resize(3);
579 }
580
581 if (columns.size() != 3)
582 return false;
583
584 FtpDirectoryListingEntry entry;
585 entry.name = filename;
586 entry.type = is_directory ? FtpDirectoryListingEntry::DIRECTORY
587 : FtpDirectoryListingEntry::FILE;
588 if (!ParseVmsFilesize(columns[0], &entry.size))
589 return false;
590 if (entry.size < 0)
591 return false;
592 if (entry.type != FtpDirectoryListingEntry::FILE)
593 entry.size = -1;
594 if (!VmsDateListingToTime(columns, &entry.last_modified))
595 return false;
596
597 entries_.push(entry);
598 return true;
599 }
600
601 } // namespace net
OLDNEW
« no previous file with comments | « net/ftp/ftp_directory_listing_parsers.h ('k') | net/ftp/ftp_directory_listing_parsers_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698