| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/ftp/ftp_util.h" | |
| 6 | |
| 7 #include <map> | |
| 8 #include <vector> | |
| 9 | |
| 10 #include "base/i18n/case_conversion.h" | |
| 11 #include "base/i18n/char_iterator.h" | |
| 12 #include "base/logging.h" | |
| 13 #include "base/memory/singleton.h" | |
| 14 #include "base/strings/string_number_conversions.h" | |
| 15 #include "base/strings/string_piece.h" | |
| 16 #include "base/strings/string_split.h" | |
| 17 #include "base/strings/string_tokenizer.h" | |
| 18 #include "base/strings/string_util.h" | |
| 19 #include "base/strings/utf_string_conversions.h" | |
| 20 #include "base/time/time.h" | |
| 21 #include "third_party/icu/source/common/unicode/uchar.h" | |
| 22 #include "third_party/icu/source/i18n/unicode/datefmt.h" | |
| 23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h" | |
| 24 | |
| 25 using base::ASCIIToUTF16; | |
| 26 using base::StringPiece16; | |
| 27 | |
| 28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS | |
| 29 // a path looks differently depending on whether it's a file or directory. | |
| 30 | |
| 31 namespace net { | |
| 32 | |
| 33 // static | |
| 34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) { | |
| 35 if (unix_path.empty()) | |
| 36 return std::string(); | |
| 37 | |
| 38 base::StringTokenizer tokenizer(unix_path, "/"); | |
| 39 std::vector<std::string> tokens; | |
| 40 while (tokenizer.GetNext()) | |
| 41 tokens.push_back(tokenizer.token()); | |
| 42 | |
| 43 if (unix_path[0] == '/') { | |
| 44 // It's an absolute path. | |
| 45 | |
| 46 if (tokens.empty()) { | |
| 47 DCHECK_EQ(1U, unix_path.length()); | |
| 48 return "[]"; | |
| 49 } | |
| 50 | |
| 51 if (tokens.size() == 1) | |
| 52 return unix_path.substr(1); // Drop the leading slash. | |
| 53 | |
| 54 std::string result(tokens[0] + ":["); | |
| 55 if (tokens.size() == 2) { | |
| 56 // Don't ask why, it just works that way on VMS. | |
| 57 result.append("000000"); | |
| 58 } else { | |
| 59 result.append(tokens[1]); | |
| 60 for (size_t i = 2; i < tokens.size() - 1; i++) | |
| 61 result.append("." + tokens[i]); | |
| 62 } | |
| 63 result.append("]" + tokens[tokens.size() - 1]); | |
| 64 return result; | |
| 65 } | |
| 66 | |
| 67 if (tokens.size() == 1) | |
| 68 return unix_path; | |
| 69 | |
| 70 std::string result("["); | |
| 71 for (size_t i = 0; i < tokens.size() - 1; i++) | |
| 72 result.append("." + tokens[i]); | |
| 73 result.append("]" + tokens[tokens.size() - 1]); | |
| 74 return result; | |
| 75 } | |
| 76 | |
| 77 // static | |
| 78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) { | |
| 79 if (unix_path.empty()) | |
| 80 return std::string(); | |
| 81 | |
| 82 std::string path(unix_path); | |
| 83 | |
| 84 if (path[path.length() - 1] != '/') | |
| 85 path.append("/"); | |
| 86 | |
| 87 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the | |
| 88 // real path and removing it after conversion. | |
| 89 path.append("x"); | |
| 90 path = UnixFilePathToVMS(path); | |
| 91 return path.substr(0, path.length() - 1); | |
| 92 } | |
| 93 | |
| 94 // static | |
| 95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) { | |
| 96 if (vms_path.empty()) | |
| 97 return "."; | |
| 98 | |
| 99 if (vms_path[0] == '/') { | |
| 100 // This is not really a VMS path. Most likely the server is emulating UNIX. | |
| 101 // Return path as-is. | |
| 102 return vms_path; | |
| 103 } | |
| 104 | |
| 105 if (vms_path == "[]") | |
| 106 return "/"; | |
| 107 | |
| 108 std::string result(vms_path); | |
| 109 if (vms_path[0] == '[') { | |
| 110 // It's a relative path. | |
| 111 ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string()); | |
| 112 } else { | |
| 113 // It's an absolute path. | |
| 114 result.insert(0, "/"); | |
| 115 ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/"); | |
| 116 ReplaceSubstringsAfterOffset(&result, 0, ":[", "/"); | |
| 117 } | |
| 118 std::replace(result.begin(), result.end(), '.', '/'); | |
| 119 std::replace(result.begin(), result.end(), ']', '/'); | |
| 120 | |
| 121 // Make sure the result doesn't end with a slash. | |
| 122 if (result.length() && result[result.length() - 1] == '/') | |
| 123 result = result.substr(0, result.length() - 1); | |
| 124 | |
| 125 return result; | |
| 126 } | |
| 127 | |
| 128 namespace { | |
| 129 | |
| 130 // Lazy-initialized map of abbreviated month names. | |
| 131 class AbbreviatedMonthsMap { | |
| 132 public: | |
| 133 static AbbreviatedMonthsMap* GetInstance() { | |
| 134 return Singleton<AbbreviatedMonthsMap>::get(); | |
| 135 } | |
| 136 | |
| 137 // Converts abbreviated month name |text| to its number (in range 1-12). | |
| 138 // On success returns true and puts the number in |number|. | |
| 139 bool GetMonthNumber(const base::string16& text, int* number) { | |
| 140 // Ignore the case of the month names. The simplest way to handle that | |
| 141 // is to make everything lowercase. | |
| 142 base::string16 text_lower(base::i18n::ToLower(text)); | |
| 143 | |
| 144 if (map_.find(text_lower) == map_.end()) | |
| 145 return false; | |
| 146 | |
| 147 *number = map_[text_lower]; | |
| 148 return true; | |
| 149 } | |
| 150 | |
| 151 private: | |
| 152 friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>; | |
| 153 | |
| 154 // Constructor, initializes the map based on ICU data. It is much faster | |
| 155 // to do that just once. | |
| 156 AbbreviatedMonthsMap() { | |
| 157 int32_t locales_count; | |
| 158 const icu::Locale* locales = | |
| 159 icu::DateFormat::getAvailableLocales(locales_count); | |
| 160 | |
| 161 for (int32_t locale = 0; locale < locales_count; locale++) { | |
| 162 UErrorCode status(U_ZERO_ERROR); | |
| 163 | |
| 164 icu::DateFormatSymbols format_symbols(locales[locale], status); | |
| 165 | |
| 166 // If we cannot get format symbols for some locale, it's not a fatal | |
| 167 // error. Just try another one. | |
| 168 if (U_FAILURE(status)) | |
| 169 continue; | |
| 170 | |
| 171 int32_t months_count; | |
| 172 const icu::UnicodeString* months = | |
| 173 format_symbols.getShortMonths(months_count); | |
| 174 | |
| 175 for (int32_t month = 0; month < months_count; month++) { | |
| 176 base::string16 month_name(months[month].getBuffer(), | |
| 177 static_cast<size_t>(months[month].length())); | |
| 178 | |
| 179 // Ignore the case of the month names. The simplest way to handle that | |
| 180 // is to make everything lowercase. | |
| 181 month_name = base::i18n::ToLower(month_name); | |
| 182 | |
| 183 map_[month_name] = month + 1; | |
| 184 | |
| 185 // Sometimes ICU returns longer strings, but in FTP listings a shorter | |
| 186 // abbreviation is used (for example for the Russian locale). Make sure | |
| 187 // we always have a map entry for a three-letter abbreviation. | |
| 188 map_[month_name.substr(0, 3)] = month + 1; | |
| 189 } | |
| 190 } | |
| 191 | |
| 192 // Fail loudly if the data returned by ICU is obviously incomplete. | |
| 193 // This is intended to catch cases like http://crbug.com/177428 | |
| 194 // much earlier. Note that the issue above turned out to be non-trivial | |
| 195 // to reproduce - crash data is much better indicator of a problem | |
| 196 // than incomplete bug reports. | |
| 197 CHECK_EQ(1, map_[ASCIIToUTF16("jan")]); | |
| 198 CHECK_EQ(2, map_[ASCIIToUTF16("feb")]); | |
| 199 CHECK_EQ(3, map_[ASCIIToUTF16("mar")]); | |
| 200 CHECK_EQ(4, map_[ASCIIToUTF16("apr")]); | |
| 201 CHECK_EQ(5, map_[ASCIIToUTF16("may")]); | |
| 202 CHECK_EQ(6, map_[ASCIIToUTF16("jun")]); | |
| 203 CHECK_EQ(7, map_[ASCIIToUTF16("jul")]); | |
| 204 CHECK_EQ(8, map_[ASCIIToUTF16("aug")]); | |
| 205 CHECK_EQ(9, map_[ASCIIToUTF16("sep")]); | |
| 206 CHECK_EQ(10, map_[ASCIIToUTF16("oct")]); | |
| 207 CHECK_EQ(11, map_[ASCIIToUTF16("nov")]); | |
| 208 CHECK_EQ(12, map_[ASCIIToUTF16("dec")]); | |
| 209 } | |
| 210 | |
| 211 // Maps lowercase month names to numbers in range 1-12. | |
| 212 std::map<base::string16, int> map_; | |
| 213 | |
| 214 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap); | |
| 215 }; | |
| 216 | |
| 217 } // namespace | |
| 218 | |
| 219 // static | |
| 220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text, | |
| 221 int* number) { | |
| 222 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number); | |
| 223 } | |
| 224 | |
| 225 // static | |
| 226 bool FtpUtil::LsDateListingToTime(const base::string16& month, | |
| 227 const base::string16& day, | |
| 228 const base::string16& rest, | |
| 229 const base::Time& current_time, | |
| 230 base::Time* result) { | |
| 231 base::Time::Exploded time_exploded = { 0 }; | |
| 232 | |
| 233 if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) { | |
| 234 // Work around garbage sent by some servers in the same column | |
| 235 // as the month. Take just last 3 characters of the string. | |
| 236 if (month.length() < 3 || | |
| 237 !AbbreviatedMonthToNumber(month.substr(month.length() - 3), | |
| 238 &time_exploded.month)) { | |
| 239 return false; | |
| 240 } | |
| 241 } | |
| 242 | |
| 243 if (!base::StringToInt(day, &time_exploded.day_of_month)) | |
| 244 return false; | |
| 245 if (time_exploded.day_of_month > 31) | |
| 246 return false; | |
| 247 | |
| 248 if (!base::StringToInt(rest, &time_exploded.year)) { | |
| 249 // Maybe it's time. Does it look like time? Note that it can be any of | |
| 250 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M". | |
| 251 if (rest.length() > 5) | |
| 252 return false; | |
| 253 | |
| 254 size_t colon_pos = rest.find(':'); | |
| 255 if (colon_pos == base::string16::npos) | |
| 256 return false; | |
| 257 if (colon_pos > 2) | |
| 258 return false; | |
| 259 | |
| 260 if (!base::StringToInt( | |
| 261 StringPiece16(rest.begin(), rest.begin() + colon_pos), | |
| 262 &time_exploded.hour)) { | |
| 263 return false; | |
| 264 } | |
| 265 if (!base::StringToInt( | |
| 266 StringPiece16(rest.begin() + colon_pos + 1, rest.end()), | |
| 267 &time_exploded.minute)) { | |
| 268 return false; | |
| 269 } | |
| 270 | |
| 271 // Guess the year. | |
| 272 base::Time::Exploded current_exploded; | |
| 273 current_time.LocalExplode(¤t_exploded); | |
| 274 | |
| 275 // If it's not possible for the parsed date to be in the current year, | |
| 276 // use the previous year. | |
| 277 if (time_exploded.month > current_exploded.month || | |
| 278 (time_exploded.month == current_exploded.month && | |
| 279 time_exploded.day_of_month > current_exploded.day_of_month)) { | |
| 280 time_exploded.year = current_exploded.year - 1; | |
| 281 } else { | |
| 282 time_exploded.year = current_exploded.year; | |
| 283 } | |
| 284 } | |
| 285 | |
| 286 // We don't know the time zone of the listing, so just use local time. | |
| 287 *result = base::Time::FromLocalExploded(time_exploded); | |
| 288 return true; | |
| 289 } | |
| 290 | |
| 291 // static | |
| 292 bool FtpUtil::WindowsDateListingToTime(const base::string16& date, | |
| 293 const base::string16& time, | |
| 294 base::Time* result) { | |
| 295 base::Time::Exploded time_exploded = { 0 }; | |
| 296 | |
| 297 // Date should be in format MM-DD-YY[YY]. | |
| 298 std::vector<base::string16> date_parts; | |
| 299 base::SplitString(date, '-', &date_parts); | |
| 300 if (date_parts.size() != 3) | |
| 301 return false; | |
| 302 if (!base::StringToInt(date_parts[0], &time_exploded.month)) | |
| 303 return false; | |
| 304 if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month)) | |
| 305 return false; | |
| 306 if (!base::StringToInt(date_parts[2], &time_exploded.year)) | |
| 307 return false; | |
| 308 if (time_exploded.year < 0) | |
| 309 return false; | |
| 310 // If year has only two digits then assume that 00-79 is 2000-2079, | |
| 311 // and 80-99 is 1980-1999. | |
| 312 if (time_exploded.year < 80) | |
| 313 time_exploded.year += 2000; | |
| 314 else if (time_exploded.year < 100) | |
| 315 time_exploded.year += 1900; | |
| 316 | |
| 317 // Time should be in format HH:MM[(AM|PM)] | |
| 318 if (time.length() < 5) | |
| 319 return false; | |
| 320 | |
| 321 std::vector<base::string16> time_parts; | |
| 322 base::SplitString(time.substr(0, 5), ':', &time_parts); | |
| 323 if (time_parts.size() != 2) | |
| 324 return false; | |
| 325 if (!base::StringToInt(time_parts[0], &time_exploded.hour)) | |
| 326 return false; | |
| 327 if (!base::StringToInt(time_parts[1], &time_exploded.minute)) | |
| 328 return false; | |
| 329 if (!time_exploded.HasValidValues()) | |
| 330 return false; | |
| 331 | |
| 332 if (time.length() > 5) { | |
| 333 if (time.length() != 7) | |
| 334 return false; | |
| 335 base::string16 am_or_pm(time.substr(5, 2)); | |
| 336 if (EqualsASCII(am_or_pm, "PM")) { | |
| 337 if (time_exploded.hour < 12) | |
| 338 time_exploded.hour += 12; | |
| 339 } else if (EqualsASCII(am_or_pm, "AM")) { | |
| 340 if (time_exploded.hour == 12) | |
| 341 time_exploded.hour = 0; | |
| 342 } else { | |
| 343 return false; | |
| 344 } | |
| 345 } | |
| 346 | |
| 347 // We don't know the time zone of the server, so just use local time. | |
| 348 *result = base::Time::FromLocalExploded(time_exploded); | |
| 349 return true; | |
| 350 } | |
| 351 | |
| 352 // static | |
| 353 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text, | |
| 354 int columns) { | |
| 355 base::i18n::UTF16CharIterator iter(&text); | |
| 356 | |
| 357 // TODO(jshin): Is u_isspace the right function to use here? | |
| 358 for (int i = 0; i < columns; i++) { | |
| 359 // Skip the leading whitespace. | |
| 360 while (!iter.end() && u_isspace(iter.get())) | |
| 361 iter.Advance(); | |
| 362 | |
| 363 // Skip the actual text of i-th column. | |
| 364 while (!iter.end() && !u_isspace(iter.get())) | |
| 365 iter.Advance(); | |
| 366 } | |
| 367 | |
| 368 base::string16 result(text.substr(iter.array_pos())); | |
| 369 base::TrimWhitespace(result, base::TRIM_ALL, &result); | |
| 370 return result; | |
| 371 } | |
| 372 | |
| 373 } // namespace | |
| OLD | NEW |