OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this |
| 2 // source code is governed by a BSD-style license that can be found in the |
| 3 // LICENSE file. |
| 4 |
| 5 #include "net/ftp/ftp_directory_listing_buffer.h" |
| 6 |
| 7 #include "base/i18n/icu_string_conversions.h" |
| 8 #include "base/stl_util-inl.h" |
| 9 #include "base/string_util.h" |
| 10 #include "net/base/net_errors.h" |
| 11 #include "net/ftp/ftp_directory_listing_parsers.h" |
| 12 #include "unicode/ucsdet.h" |
| 13 |
| 14 namespace { |
| 15 |
| 16 // A very simple-minded character encoding detection. |
| 17 // TODO(jungshik): We can apply more heuristics here (e.g. using various hints |
| 18 // like TLD, the UI language/default encoding of a client, etc). In that case, |
| 19 // this should be pulled out of here and moved somewhere in base because there |
| 20 // can be other use cases. |
| 21 std::string DetectEncoding(const std::string& text) { |
| 22 if (IsStringASCII(text)) |
| 23 return std::string(); |
| 24 UErrorCode status = U_ZERO_ERROR; |
| 25 UCharsetDetector* detector = ucsdet_open(&status); |
| 26 ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), |
| 27 &status); |
| 28 const UCharsetMatch* match = ucsdet_detect(detector, &status); |
| 29 const char* encoding = ucsdet_getName(match, &status); |
| 30 // Should we check the quality of the match? A rather arbitrary number is |
| 31 // assigned by ICU and it's hard to come up with a lower limit. |
| 32 if (U_FAILURE(status)) |
| 33 return std::string(); |
| 34 return encoding; |
| 35 } |
| 36 |
| 37 } // namespace |
| 38 |
| 39 namespace net { |
| 40 |
| 41 FtpDirectoryListingBuffer::FtpDirectoryListingBuffer() |
| 42 : current_parser_(NULL) { |
| 43 parsers_.insert(new FtpLsDirectoryListingParser()); |
| 44 } |
| 45 |
| 46 FtpDirectoryListingBuffer::~FtpDirectoryListingBuffer() { |
| 47 STLDeleteElements(&parsers_); |
| 48 } |
| 49 |
| 50 int FtpDirectoryListingBuffer::ConsumeData(const char* data, int data_length) { |
| 51 buffer_.append(data, data_length); |
| 52 |
| 53 if (!encoding_.empty() || buffer_.length() > 1024) { |
| 54 int rv = ExtractFullLinesFromBuffer(); |
| 55 if (rv != OK) |
| 56 return rv; |
| 57 } |
| 58 |
| 59 return ParseLines(); |
| 60 } |
| 61 |
| 62 int FtpDirectoryListingBuffer::ProcessRemainingData() { |
| 63 int rv = ExtractFullLinesFromBuffer(); |
| 64 if (rv != OK) |
| 65 return rv; |
| 66 |
| 67 return ParseLines(); |
| 68 } |
| 69 |
| 70 bool FtpDirectoryListingBuffer::EntryAvailable() const { |
| 71 return (current_parser_ ? current_parser_->EntryAvailable() : false); |
| 72 } |
| 73 |
| 74 FtpDirectoryListingEntry FtpDirectoryListingBuffer::PopEntry() { |
| 75 DCHECK(EntryAvailable()); |
| 76 return current_parser_->PopEntry(); |
| 77 } |
| 78 |
| 79 bool FtpDirectoryListingBuffer::ConvertToDetectedEncoding( |
| 80 const std::string& from, string16* to) { |
| 81 std::string encoding(encoding_.empty() ? "ascii" : encoding_); |
| 82 return base::CodepageToUTF16(from, encoding.c_str(), |
| 83 base::OnStringConversionError::FAIL, to); |
| 84 } |
| 85 |
| 86 int FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() { |
| 87 if (encoding_.empty()) |
| 88 encoding_ = DetectEncoding(buffer_); |
| 89 |
| 90 int cut_pos = 0; |
| 91 for (size_t i = 0; i < buffer_.length(); ++i) { |
| 92 if (i >= 1 && buffer_[i - 1] == '\r' && buffer_[i] == '\n') { |
| 93 std::string line(buffer_.substr(cut_pos, i - cut_pos - 1)); |
| 94 cut_pos = i + 1; |
| 95 string16 line_converted; |
| 96 if (!ConvertToDetectedEncoding(line, &line_converted)) { |
| 97 buffer_.erase(0, cut_pos); |
| 98 return ERR_ENCODING_CONVERSION_FAILED; |
| 99 } |
| 100 lines_.push_back(line_converted); |
| 101 } |
| 102 } |
| 103 buffer_.erase(0, cut_pos); |
| 104 return OK; |
| 105 } |
| 106 |
| 107 int FtpDirectoryListingBuffer::ParseLines() { |
| 108 while (!lines_.empty()) { |
| 109 string16 line = lines_.front(); |
| 110 lines_.pop_front(); |
| 111 if (current_parser_) { |
| 112 if (!current_parser_->ConsumeLine(line)) |
| 113 return ERR_FAILED; |
| 114 } else { |
| 115 ParserSet::iterator i = parsers_.begin(); |
| 116 while (i != parsers_.end()) { |
| 117 if ((*i)->ConsumeLine(line)) { |
| 118 i++; |
| 119 } else { |
| 120 delete *i; |
| 121 parsers_.erase(i++); |
| 122 } |
| 123 } |
| 124 if (parsers_.empty()) |
| 125 return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; |
| 126 if (parsers_.size() == 1) |
| 127 current_parser_ = *parsers_.begin(); |
| 128 } |
| 129 } |
| 130 |
| 131 return OK; |
| 132 } |
| 133 |
| 134 } // namespace net |
OLD | NEW |