OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/ftp/ftp_util.h" | |
6 | |
7 #include <map> | |
8 #include <vector> | |
9 | |
10 #include "base/i18n/case_conversion.h" | |
11 #include "base/i18n/char_iterator.h" | |
12 #include "base/logging.h" | |
13 #include "base/memory/singleton.h" | |
14 #include "base/strings/string_number_conversions.h" | |
15 #include "base/strings/string_piece.h" | |
16 #include "base/strings/string_split.h" | |
17 #include "base/strings/string_tokenizer.h" | |
18 #include "base/strings/string_util.h" | |
19 #include "base/strings/utf_string_conversions.h" | |
20 #include "base/time/time.h" | |
21 #include "third_party/icu/source/common/unicode/uchar.h" | |
22 #include "third_party/icu/source/i18n/unicode/datefmt.h" | |
23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h" | |
24 | |
25 using base::ASCIIToUTF16; | |
26 using base::StringPiece16; | |
27 | |
28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS | |
29 // a path looks differently depending on whether it's a file or directory. | |
30 | |
31 namespace net { | |
32 | |
33 // static | |
34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) { | |
35 if (unix_path.empty()) | |
36 return std::string(); | |
37 | |
38 base::StringTokenizer tokenizer(unix_path, "/"); | |
39 std::vector<std::string> tokens; | |
40 while (tokenizer.GetNext()) | |
41 tokens.push_back(tokenizer.token()); | |
42 | |
43 if (unix_path[0] == '/') { | |
44 // It's an absolute path. | |
45 | |
46 if (tokens.empty()) { | |
47 DCHECK_EQ(1U, unix_path.length()); | |
48 return "[]"; | |
49 } | |
50 | |
51 if (tokens.size() == 1) | |
52 return unix_path.substr(1); // Drop the leading slash. | |
53 | |
54 std::string result(tokens[0] + ":["); | |
55 if (tokens.size() == 2) { | |
56 // Don't ask why, it just works that way on VMS. | |
57 result.append("000000"); | |
58 } else { | |
59 result.append(tokens[1]); | |
60 for (size_t i = 2; i < tokens.size() - 1; i++) | |
61 result.append("." + tokens[i]); | |
62 } | |
63 result.append("]" + tokens[tokens.size() - 1]); | |
64 return result; | |
65 } | |
66 | |
67 if (tokens.size() == 1) | |
68 return unix_path; | |
69 | |
70 std::string result("["); | |
71 for (size_t i = 0; i < tokens.size() - 1; i++) | |
72 result.append("." + tokens[i]); | |
73 result.append("]" + tokens[tokens.size() - 1]); | |
74 return result; | |
75 } | |
76 | |
77 // static | |
78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) { | |
79 if (unix_path.empty()) | |
80 return std::string(); | |
81 | |
82 std::string path(unix_path); | |
83 | |
84 if (path[path.length() - 1] != '/') | |
85 path.append("/"); | |
86 | |
87 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the | |
88 // real path and removing it after conversion. | |
89 path.append("x"); | |
90 path = UnixFilePathToVMS(path); | |
91 return path.substr(0, path.length() - 1); | |
92 } | |
93 | |
94 // static | |
95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) { | |
96 if (vms_path.empty()) | |
97 return "."; | |
98 | |
99 if (vms_path[0] == '/') { | |
100 // This is not really a VMS path. Most likely the server is emulating UNIX. | |
101 // Return path as-is. | |
102 return vms_path; | |
103 } | |
104 | |
105 if (vms_path == "[]") | |
106 return "/"; | |
107 | |
108 std::string result(vms_path); | |
109 if (vms_path[0] == '[') { | |
110 // It's a relative path. | |
111 ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string()); | |
112 } else { | |
113 // It's an absolute path. | |
114 result.insert(0, "/"); | |
115 ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/"); | |
116 ReplaceSubstringsAfterOffset(&result, 0, ":[", "/"); | |
117 } | |
118 std::replace(result.begin(), result.end(), '.', '/'); | |
119 std::replace(result.begin(), result.end(), ']', '/'); | |
120 | |
121 // Make sure the result doesn't end with a slash. | |
122 if (result.length() && result[result.length() - 1] == '/') | |
123 result = result.substr(0, result.length() - 1); | |
124 | |
125 return result; | |
126 } | |
127 | |
128 namespace { | |
129 | |
130 // Lazy-initialized map of abbreviated month names. | |
131 class AbbreviatedMonthsMap { | |
132 public: | |
133 static AbbreviatedMonthsMap* GetInstance() { | |
134 return Singleton<AbbreviatedMonthsMap>::get(); | |
135 } | |
136 | |
137 // Converts abbreviated month name |text| to its number (in range 1-12). | |
138 // On success returns true and puts the number in |number|. | |
139 bool GetMonthNumber(const base::string16& text, int* number) { | |
140 // Ignore the case of the month names. The simplest way to handle that | |
141 // is to make everything lowercase. | |
142 base::string16 text_lower(base::i18n::ToLower(text)); | |
143 | |
144 if (map_.find(text_lower) == map_.end()) | |
145 return false; | |
146 | |
147 *number = map_[text_lower]; | |
148 return true; | |
149 } | |
150 | |
151 private: | |
152 friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>; | |
153 | |
154 // Constructor, initializes the map based on ICU data. It is much faster | |
155 // to do that just once. | |
156 AbbreviatedMonthsMap() { | |
157 int32_t locales_count; | |
158 const icu::Locale* locales = | |
159 icu::DateFormat::getAvailableLocales(locales_count); | |
160 | |
161 for (int32_t locale = 0; locale < locales_count; locale++) { | |
162 UErrorCode status(U_ZERO_ERROR); | |
163 | |
164 icu::DateFormatSymbols format_symbols(locales[locale], status); | |
165 | |
166 // If we cannot get format symbols for some locale, it's not a fatal | |
167 // error. Just try another one. | |
168 if (U_FAILURE(status)) | |
169 continue; | |
170 | |
171 int32_t months_count; | |
172 const icu::UnicodeString* months = | |
173 format_symbols.getShortMonths(months_count); | |
174 | |
175 for (int32_t month = 0; month < months_count; month++) { | |
176 base::string16 month_name(months[month].getBuffer(), | |
177 static_cast<size_t>(months[month].length())); | |
178 | |
179 // Ignore the case of the month names. The simplest way to handle that | |
180 // is to make everything lowercase. | |
181 month_name = base::i18n::ToLower(month_name); | |
182 | |
183 map_[month_name] = month + 1; | |
184 | |
185 // Sometimes ICU returns longer strings, but in FTP listings a shorter | |
186 // abbreviation is used (for example for the Russian locale). Make sure | |
187 // we always have a map entry for a three-letter abbreviation. | |
188 map_[month_name.substr(0, 3)] = month + 1; | |
189 } | |
190 } | |
191 | |
192 // Fail loudly if the data returned by ICU is obviously incomplete. | |
193 // This is intended to catch cases like http://crbug.com/177428 | |
194 // much earlier. Note that the issue above turned out to be non-trivial | |
195 // to reproduce - crash data is much better indicator of a problem | |
196 // than incomplete bug reports. | |
197 CHECK_EQ(1, map_[ASCIIToUTF16("jan")]); | |
198 CHECK_EQ(2, map_[ASCIIToUTF16("feb")]); | |
199 CHECK_EQ(3, map_[ASCIIToUTF16("mar")]); | |
200 CHECK_EQ(4, map_[ASCIIToUTF16("apr")]); | |
201 CHECK_EQ(5, map_[ASCIIToUTF16("may")]); | |
202 CHECK_EQ(6, map_[ASCIIToUTF16("jun")]); | |
203 CHECK_EQ(7, map_[ASCIIToUTF16("jul")]); | |
204 CHECK_EQ(8, map_[ASCIIToUTF16("aug")]); | |
205 CHECK_EQ(9, map_[ASCIIToUTF16("sep")]); | |
206 CHECK_EQ(10, map_[ASCIIToUTF16("oct")]); | |
207 CHECK_EQ(11, map_[ASCIIToUTF16("nov")]); | |
208 CHECK_EQ(12, map_[ASCIIToUTF16("dec")]); | |
209 } | |
210 | |
211 // Maps lowercase month names to numbers in range 1-12. | |
212 std::map<base::string16, int> map_; | |
213 | |
214 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap); | |
215 }; | |
216 | |
217 } // namespace | |
218 | |
219 // static | |
220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text, | |
221 int* number) { | |
222 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number); | |
223 } | |
224 | |
225 // static | |
226 bool FtpUtil::LsDateListingToTime(const base::string16& month, | |
227 const base::string16& day, | |
228 const base::string16& rest, | |
229 const base::Time& current_time, | |
230 base::Time* result) { | |
231 base::Time::Exploded time_exploded = { 0 }; | |
232 | |
233 if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) { | |
234 // Work around garbage sent by some servers in the same column | |
235 // as the month. Take just last 3 characters of the string. | |
236 if (month.length() < 3 || | |
237 !AbbreviatedMonthToNumber(month.substr(month.length() - 3), | |
238 &time_exploded.month)) { | |
239 return false; | |
240 } | |
241 } | |
242 | |
243 if (!base::StringToInt(day, &time_exploded.day_of_month)) | |
244 return false; | |
245 if (time_exploded.day_of_month > 31) | |
246 return false; | |
247 | |
248 if (!base::StringToInt(rest, &time_exploded.year)) { | |
249 // Maybe it's time. Does it look like time? Note that it can be any of | |
250 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M". | |
251 if (rest.length() > 5) | |
252 return false; | |
253 | |
254 size_t colon_pos = rest.find(':'); | |
255 if (colon_pos == base::string16::npos) | |
256 return false; | |
257 if (colon_pos > 2) | |
258 return false; | |
259 | |
260 if (!base::StringToInt( | |
261 StringPiece16(rest.begin(), rest.begin() + colon_pos), | |
262 &time_exploded.hour)) { | |
263 return false; | |
264 } | |
265 if (!base::StringToInt( | |
266 StringPiece16(rest.begin() + colon_pos + 1, rest.end()), | |
267 &time_exploded.minute)) { | |
268 return false; | |
269 } | |
270 | |
271 // Guess the year. | |
272 base::Time::Exploded current_exploded; | |
273 current_time.LocalExplode(¤t_exploded); | |
274 | |
275 // If it's not possible for the parsed date to be in the current year, | |
276 // use the previous year. | |
277 if (time_exploded.month > current_exploded.month || | |
278 (time_exploded.month == current_exploded.month && | |
279 time_exploded.day_of_month > current_exploded.day_of_month)) { | |
280 time_exploded.year = current_exploded.year - 1; | |
281 } else { | |
282 time_exploded.year = current_exploded.year; | |
283 } | |
284 } | |
285 | |
286 // We don't know the time zone of the listing, so just use local time. | |
287 *result = base::Time::FromLocalExploded(time_exploded); | |
288 return true; | |
289 } | |
290 | |
291 // static | |
292 bool FtpUtil::WindowsDateListingToTime(const base::string16& date, | |
293 const base::string16& time, | |
294 base::Time* result) { | |
295 base::Time::Exploded time_exploded = { 0 }; | |
296 | |
297 // Date should be in format MM-DD-YY[YY]. | |
298 std::vector<base::string16> date_parts; | |
299 base::SplitString(date, '-', &date_parts); | |
300 if (date_parts.size() != 3) | |
301 return false; | |
302 if (!base::StringToInt(date_parts[0], &time_exploded.month)) | |
303 return false; | |
304 if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month)) | |
305 return false; | |
306 if (!base::StringToInt(date_parts[2], &time_exploded.year)) | |
307 return false; | |
308 if (time_exploded.year < 0) | |
309 return false; | |
310 // If year has only two digits then assume that 00-79 is 2000-2079, | |
311 // and 80-99 is 1980-1999. | |
312 if (time_exploded.year < 80) | |
313 time_exploded.year += 2000; | |
314 else if (time_exploded.year < 100) | |
315 time_exploded.year += 1900; | |
316 | |
317 // Time should be in format HH:MM[(AM|PM)] | |
318 if (time.length() < 5) | |
319 return false; | |
320 | |
321 std::vector<base::string16> time_parts; | |
322 base::SplitString(time.substr(0, 5), ':', &time_parts); | |
323 if (time_parts.size() != 2) | |
324 return false; | |
325 if (!base::StringToInt(time_parts[0], &time_exploded.hour)) | |
326 return false; | |
327 if (!base::StringToInt(time_parts[1], &time_exploded.minute)) | |
328 return false; | |
329 if (!time_exploded.HasValidValues()) | |
330 return false; | |
331 | |
332 if (time.length() > 5) { | |
333 if (time.length() != 7) | |
334 return false; | |
335 base::string16 am_or_pm(time.substr(5, 2)); | |
336 if (EqualsASCII(am_or_pm, "PM")) { | |
337 if (time_exploded.hour < 12) | |
338 time_exploded.hour += 12; | |
339 } else if (EqualsASCII(am_or_pm, "AM")) { | |
340 if (time_exploded.hour == 12) | |
341 time_exploded.hour = 0; | |
342 } else { | |
343 return false; | |
344 } | |
345 } | |
346 | |
347 // We don't know the time zone of the server, so just use local time. | |
348 *result = base::Time::FromLocalExploded(time_exploded); | |
349 return true; | |
350 } | |
351 | |
352 // static | |
353 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text, | |
354 int columns) { | |
355 base::i18n::UTF16CharIterator iter(&text); | |
356 | |
357 // TODO(jshin): Is u_isspace the right function to use here? | |
358 for (int i = 0; i < columns; i++) { | |
359 // Skip the leading whitespace. | |
360 while (!iter.end() && u_isspace(iter.get())) | |
361 iter.Advance(); | |
362 | |
363 // Skip the actual text of i-th column. | |
364 while (!iter.end() && !u_isspace(iter.get())) | |
365 iter.Advance(); | |
366 } | |
367 | |
368 base::string16 result(text.substr(iter.array_pos())); | |
369 base::TrimWhitespace(result, base::TRIM_ALL, &result); | |
370 return result; | |
371 } | |
372 | |
373 } // namespace | |
OLD | NEW |