OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/string_util.h" | 5 #include "base/string_util_static.h" |
6 | |
7 #include "build/build_config.h" | |
8 | |
9 #include <ctype.h> | |
10 #include <errno.h> | |
11 #include <math.h> | |
12 #include <stdarg.h> | |
13 #include <stdio.h> | |
14 #include <stdlib.h> | |
15 #include <string.h> | |
16 #include <time.h> | |
17 #include <wchar.h> | |
18 #include <wctype.h> | |
19 | |
20 #include <algorithm> | |
21 #include <vector> | |
22 | |
23 #include "base/basictypes.h" | |
24 #include "base/logging.h" | |
25 #include "base/memory/singleton.h" | |
26 #include "base/third_party/dmg_fp/dmg_fp.h" | |
27 #include "base/utf_string_conversion_utils.h" | |
28 #include "base/utf_string_conversions.h" | |
29 #include "base/third_party/icu/icu_utf.h" | |
30 | |
31 namespace { | |
32 | |
33 // Force the singleton used by Empty[W]String[16] to be a unique type. This | |
34 // prevents other code that might accidentally use Singleton<string> from | |
35 // getting our internal one. | |
36 struct EmptyStrings { | |
37 EmptyStrings() {} | |
38 const std::string s; | |
39 const std::wstring ws; | |
40 const string16 s16; | |
41 | |
42 static EmptyStrings* GetInstance() { | |
43 return Singleton<EmptyStrings>::get(); | |
44 } | |
45 }; | |
46 | |
47 // Used by ReplaceStringPlaceholders to track the position in the string of | |
48 // replaced parameters. | |
49 struct ReplacementOffset { | |
50 ReplacementOffset(uintptr_t parameter, size_t offset) | |
51 : parameter(parameter), | |
52 offset(offset) {} | |
53 | |
54 // Index of the parameter. | |
55 uintptr_t parameter; | |
56 | |
57 // Starting position in the string. | |
58 size_t offset; | |
59 }; | |
60 | |
61 static bool CompareParameter(const ReplacementOffset& elem1, | |
62 const ReplacementOffset& elem2) { | |
63 return elem1.parameter < elem2.parameter; | |
64 } | |
65 | |
66 } // namespace | |
67 | |
68 namespace base { | |
69 | |
70 bool IsWprintfFormatPortable(const wchar_t* format) { | |
71 for (const wchar_t* position = format; *position != '\0'; ++position) { | |
72 if (*position == '%') { | |
73 bool in_specification = true; | |
74 bool modifier_l = false; | |
75 while (in_specification) { | |
76 // Eat up characters until reaching a known specifier. | |
77 if (*++position == '\0') { | |
78 // The format string ended in the middle of a specification. Call | |
79 // it portable because no unportable specifications were found. The | |
80 // string is equally broken on all platforms. | |
81 return true; | |
82 } | |
83 | |
84 if (*position == 'l') { | |
85 // 'l' is the only thing that can save the 's' and 'c' specifiers. | |
86 modifier_l = true; | |
87 } else if (((*position == 's' || *position == 'c') && !modifier_l) || | |
88 *position == 'S' || *position == 'C' || *position == 'F' || | |
89 *position == 'D' || *position == 'O' || *position == 'U') { | |
90 // Not portable. | |
91 return false; | |
92 } | |
93 | |
94 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { | |
95 // Portable, keep scanning the rest of the format string. | |
96 in_specification = false; | |
97 } | |
98 } | |
99 } | |
100 } | |
101 | |
102 return true; | |
103 } | |
104 | |
105 } // namespace base | |
106 | |
107 | |
108 const std::string& EmptyString() { | |
109 return EmptyStrings::GetInstance()->s; | |
110 } | |
111 | |
112 const std::wstring& EmptyWString() { | |
113 return EmptyStrings::GetInstance()->ws; | |
114 } | |
115 | |
116 const string16& EmptyString16() { | |
117 return EmptyStrings::GetInstance()->s16; | |
118 } | |
119 | 6 |
120 #define WHITESPACE_UNICODE \ | 7 #define WHITESPACE_UNICODE \ |
121 0x0009, /* <control-0009> to <control-000D> */ \ | 8 0x0009, /* <control-0009> to <control-000D> */ \ |
122 0x000A, \ | 9 0x000A, \ |
123 0x000B, \ | 10 0x000B, \ |
124 0x000C, \ | 11 0x000C, \ |
125 0x000D, \ | 12 0x000D, \ |
126 0x0020, /* Space */ \ | 13 0x0020, /* Space */ \ |
127 0x0085, /* <control-0085> */ \ | 14 0x0085, /* <control-0085> */ \ |
128 0x00A0, /* No-Break Space */ \ | 15 0x00A0, /* No-Break Space */ \ |
(...skipping 28 matching lines...) Expand all Loading... |
157 0x09, // <control-0009> to <control-000D> | 44 0x09, // <control-0009> to <control-000D> |
158 0x0A, | 45 0x0A, |
159 0x0B, | 46 0x0B, |
160 0x0C, | 47 0x0C, |
161 0x0D, | 48 0x0D, |
162 0x20, // Space | 49 0x20, // Space |
163 0 | 50 0 |
164 }; | 51 }; |
165 | 52 |
166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; | 53 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; |
167 | |
168 template<typename STR> | |
169 bool RemoveCharsT(const STR& input, | |
170 const typename STR::value_type remove_chars[], | |
171 STR* output) { | |
172 bool removed = false; | |
173 size_t found; | |
174 | |
175 *output = input; | |
176 | |
177 found = output->find_first_of(remove_chars); | |
178 while (found != STR::npos) { | |
179 removed = true; | |
180 output->replace(found, 1, STR()); | |
181 found = output->find_first_of(remove_chars, found); | |
182 } | |
183 | |
184 return removed; | |
185 } | |
186 | |
187 bool RemoveChars(const std::wstring& input, | |
188 const wchar_t remove_chars[], | |
189 std::wstring* output) { | |
190 return RemoveCharsT(input, remove_chars, output); | |
191 } | |
192 | |
193 #if !defined(WCHAR_T_IS_UTF16) | |
194 bool RemoveChars(const string16& input, | |
195 const char16 remove_chars[], | |
196 string16* output) { | |
197 return RemoveCharsT(input, remove_chars, output); | |
198 } | |
199 #endif | |
200 | |
201 bool RemoveChars(const std::string& input, | |
202 const char remove_chars[], | |
203 std::string* output) { | |
204 return RemoveCharsT(input, remove_chars, output); | |
205 } | |
206 | |
207 template<typename STR> | |
208 TrimPositions TrimStringT(const STR& input, | |
209 const typename STR::value_type trim_chars[], | |
210 TrimPositions positions, | |
211 STR* output) { | |
212 // Find the edges of leading/trailing whitespace as desired. | |
213 const typename STR::size_type last_char = input.length() - 1; | |
214 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? | |
215 input.find_first_not_of(trim_chars) : 0; | |
216 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? | |
217 input.find_last_not_of(trim_chars) : last_char; | |
218 | |
219 // When the string was all whitespace, report that we stripped off whitespace | |
220 // from whichever position the caller was interested in. For empty input, we | |
221 // stripped no whitespace, but we still need to clear |output|. | |
222 if (input.empty() || | |
223 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { | |
224 bool input_was_empty = input.empty(); // in case output == &input | |
225 output->clear(); | |
226 return input_was_empty ? TRIM_NONE : positions; | |
227 } | |
228 | |
229 // Trim the whitespace. | |
230 *output = | |
231 input.substr(first_good_char, last_good_char - first_good_char + 1); | |
232 | |
233 // Return where we trimmed from. | |
234 return static_cast<TrimPositions>( | |
235 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | | |
236 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); | |
237 } | |
238 | |
239 bool TrimString(const std::wstring& input, | |
240 const wchar_t trim_chars[], | |
241 std::wstring* output) { | |
242 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
243 } | |
244 | |
245 #if !defined(WCHAR_T_IS_UTF16) | |
246 bool TrimString(const string16& input, | |
247 const char16 trim_chars[], | |
248 string16* output) { | |
249 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
250 } | |
251 #endif | |
252 | |
253 bool TrimString(const std::string& input, | |
254 const char trim_chars[], | |
255 std::string* output) { | |
256 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
257 } | |
258 | |
259 void TruncateUTF8ToByteSize(const std::string& input, | |
260 const size_t byte_size, | |
261 std::string* output) { | |
262 DCHECK(output); | |
263 if (byte_size > input.length()) { | |
264 *output = input; | |
265 return; | |
266 } | |
267 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); | |
268 // Note: This cast is necessary because CBU8_NEXT uses int32s. | |
269 int32 truncation_length = static_cast<int32>(byte_size); | |
270 int32 char_index = truncation_length - 1; | |
271 const char* data = input.data(); | |
272 | |
273 // Using CBU8, we will move backwards from the truncation point | |
274 // to the beginning of the string looking for a valid UTF8 | |
275 // character. Once a full UTF8 character is found, we will | |
276 // truncate the string to the end of that character. | |
277 while (char_index >= 0) { | |
278 int32 prev = char_index; | |
279 uint32 code_point = 0; | |
280 CBU8_NEXT(data, char_index, truncation_length, code_point); | |
281 if (!base::IsValidCharacter(code_point) || | |
282 !base::IsValidCodepoint(code_point)) { | |
283 char_index = prev - 1; | |
284 } else { | |
285 break; | |
286 } | |
287 } | |
288 | |
289 if (char_index >= 0 ) | |
290 *output = input.substr(0, char_index); | |
291 else | |
292 output->clear(); | |
293 } | |
294 | |
295 TrimPositions TrimWhitespace(const std::wstring& input, | |
296 TrimPositions positions, | |
297 std::wstring* output) { | |
298 return TrimStringT(input, kWhitespaceWide, positions, output); | |
299 } | |
300 | |
301 #if !defined(WCHAR_T_IS_UTF16) | |
302 TrimPositions TrimWhitespace(const string16& input, | |
303 TrimPositions positions, | |
304 string16* output) { | |
305 return TrimStringT(input, kWhitespaceUTF16, positions, output); | |
306 } | |
307 #endif | |
308 | |
309 TrimPositions TrimWhitespaceASCII(const std::string& input, | |
310 TrimPositions positions, | |
311 std::string* output) { | |
312 return TrimStringT(input, kWhitespaceASCII, positions, output); | |
313 } | |
314 | |
315 // This function is only for backward-compatibility. | |
316 // To be removed when all callers are updated. | |
317 TrimPositions TrimWhitespace(const std::string& input, | |
318 TrimPositions positions, | |
319 std::string* output) { | |
320 return TrimWhitespaceASCII(input, positions, output); | |
321 } | |
322 | |
323 template<typename STR> | |
324 STR CollapseWhitespaceT(const STR& text, | |
325 bool trim_sequences_with_line_breaks) { | |
326 STR result; | |
327 result.resize(text.size()); | |
328 | |
329 // Set flags to pretend we're already in a trimmed whitespace sequence, so we | |
330 // will trim any leading whitespace. | |
331 bool in_whitespace = true; | |
332 bool already_trimmed = true; | |
333 | |
334 int chars_written = 0; | |
335 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { | |
336 if (IsWhitespace(*i)) { | |
337 if (!in_whitespace) { | |
338 // Reduce all whitespace sequences to a single space. | |
339 in_whitespace = true; | |
340 result[chars_written++] = L' '; | |
341 } | |
342 if (trim_sequences_with_line_breaks && !already_trimmed && | |
343 ((*i == '\n') || (*i == '\r'))) { | |
344 // Whitespace sequences containing CR or LF are eliminated entirely. | |
345 already_trimmed = true; | |
346 --chars_written; | |
347 } | |
348 } else { | |
349 // Non-whitespace chracters are copied straight across. | |
350 in_whitespace = false; | |
351 already_trimmed = false; | |
352 result[chars_written++] = *i; | |
353 } | |
354 } | |
355 | |
356 if (in_whitespace && !already_trimmed) { | |
357 // Any trailing whitespace is eliminated. | |
358 --chars_written; | |
359 } | |
360 | |
361 result.resize(chars_written); | |
362 return result; | |
363 } | |
364 | |
365 std::wstring CollapseWhitespace(const std::wstring& text, | |
366 bool trim_sequences_with_line_breaks) { | |
367 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
368 } | |
369 | |
370 #if !defined(WCHAR_T_IS_UTF16) | |
371 string16 CollapseWhitespace(const string16& text, | |
372 bool trim_sequences_with_line_breaks) { | |
373 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
374 } | |
375 #endif | |
376 | |
377 std::string CollapseWhitespaceASCII(const std::string& text, | |
378 bool trim_sequences_with_line_breaks) { | |
379 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
380 } | |
381 | |
382 bool ContainsOnlyWhitespaceASCII(const std::string& str) { | |
383 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { | |
384 if (!IsAsciiWhitespace(*i)) | |
385 return false; | |
386 } | |
387 return true; | |
388 } | |
389 | |
390 bool ContainsOnlyWhitespace(const string16& str) { | |
391 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) { | |
392 if (!IsWhitespace(*i)) | |
393 return false; | |
394 } | |
395 return true; | |
396 } | |
397 | |
398 template<typename STR> | |
399 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { | |
400 for (typename STR::const_iterator iter = input.begin(); | |
401 iter != input.end(); ++iter) { | |
402 if (characters.find(*iter) == STR::npos) | |
403 return false; | |
404 } | |
405 return true; | |
406 } | |
407 | |
408 bool ContainsOnlyChars(const std::wstring& input, | |
409 const std::wstring& characters) { | |
410 return ContainsOnlyCharsT(input, characters); | |
411 } | |
412 | |
413 #if !defined(WCHAR_T_IS_UTF16) | |
414 bool ContainsOnlyChars(const string16& input, const string16& characters) { | |
415 return ContainsOnlyCharsT(input, characters); | |
416 } | |
417 #endif | |
418 | |
419 bool ContainsOnlyChars(const std::string& input, | |
420 const std::string& characters) { | |
421 return ContainsOnlyCharsT(input, characters); | |
422 } | |
423 | |
424 std::string WideToASCII(const std::wstring& wide) { | |
425 DCHECK(IsStringASCII(wide)) << wide; | |
426 return std::string(wide.begin(), wide.end()); | |
427 } | |
428 | |
429 std::string UTF16ToASCII(const string16& utf16) { | |
430 DCHECK(IsStringASCII(utf16)) << utf16; | |
431 return std::string(utf16.begin(), utf16.end()); | |
432 } | |
433 | |
434 // Latin1 is just the low range of Unicode, so we can copy directly to convert. | |
435 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { | |
436 std::string output; | |
437 output.resize(wide.size()); | |
438 latin1->clear(); | |
439 for (size_t i = 0; i < wide.size(); i++) { | |
440 if (wide[i] > 255) | |
441 return false; | |
442 output[i] = static_cast<char>(wide[i]); | |
443 } | |
444 latin1->swap(output); | |
445 return true; | |
446 } | |
447 | |
448 template<class STR> | |
449 static bool DoIsStringASCII(const STR& str) { | |
450 for (size_t i = 0; i < str.length(); i++) { | |
451 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; | |
452 if (c > 0x7F) | |
453 return false; | |
454 } | |
455 return true; | |
456 } | |
457 | |
458 bool IsStringASCII(const std::wstring& str) { | |
459 return DoIsStringASCII(str); | |
460 } | |
461 | |
462 #if !defined(WCHAR_T_IS_UTF16) | |
463 bool IsStringASCII(const string16& str) { | |
464 return DoIsStringASCII(str); | |
465 } | |
466 #endif | |
467 | |
468 bool IsStringASCII(const base::StringPiece& str) { | |
469 return DoIsStringASCII(str); | |
470 } | |
471 | |
472 bool IsStringUTF8(const std::string& str) { | |
473 const char *src = str.data(); | |
474 int32 src_len = static_cast<int32>(str.length()); | |
475 int32 char_index = 0; | |
476 | |
477 while (char_index < src_len) { | |
478 int32 code_point; | |
479 CBU8_NEXT(src, char_index, src_len, code_point); | |
480 if (!base::IsValidCharacter(code_point)) | |
481 return false; | |
482 } | |
483 return true; | |
484 } | |
485 | |
486 template<typename Iter> | |
487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin, | |
488 Iter a_end, | |
489 const char* b) { | |
490 for (Iter it = a_begin; it != a_end; ++it, ++b) { | |
491 if (!*b || base::ToLowerASCII(*it) != *b) | |
492 return false; | |
493 } | |
494 return *b == 0; | |
495 } | |
496 | |
497 // Front-ends for LowerCaseEqualsASCII. | |
498 bool LowerCaseEqualsASCII(const std::string& a, const char* b) { | |
499 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
500 } | |
501 | |
502 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) { | |
503 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
504 } | |
505 | |
506 #if !defined(WCHAR_T_IS_UTF16) | |
507 bool LowerCaseEqualsASCII(const string16& a, const char* b) { | |
508 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
509 } | |
510 #endif | |
511 | |
512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | |
513 std::string::const_iterator a_end, | |
514 const char* b) { | |
515 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
516 } | |
517 | |
518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
519 std::wstring::const_iterator a_end, | |
520 const char* b) { | |
521 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
522 } | |
523 | |
524 #if !defined(WCHAR_T_IS_UTF16) | |
525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | |
526 string16::const_iterator a_end, | |
527 const char* b) { | |
528 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
529 } | |
530 #endif | |
531 | |
532 bool LowerCaseEqualsASCII(const char* a_begin, | |
533 const char* a_end, | |
534 const char* b) { | |
535 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
536 } | |
537 | |
538 bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
539 const wchar_t* a_end, | |
540 const char* b) { | |
541 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
542 } | |
543 | |
544 #if !defined(WCHAR_T_IS_UTF16) | |
545 bool LowerCaseEqualsASCII(const char16* a_begin, | |
546 const char16* a_end, | |
547 const char* b) { | |
548 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
549 } | |
550 #endif | |
551 | |
552 bool EqualsASCII(const string16& a, const base::StringPiece& b) { | |
553 if (a.length() != b.length()) | |
554 return false; | |
555 return std::equal(b.begin(), b.end(), a.begin()); | |
556 } | |
557 | |
558 bool StartsWithASCII(const std::string& str, | |
559 const std::string& search, | |
560 bool case_sensitive) { | |
561 if (case_sensitive) | |
562 return str.compare(0, search.length(), search) == 0; | |
563 else | |
564 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; | |
565 } | |
566 | |
567 template <typename STR> | |
568 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { | |
569 if (case_sensitive) { | |
570 return str.compare(0, search.length(), search) == 0; | |
571 } else { | |
572 if (search.size() > str.size()) | |
573 return false; | |
574 return std::equal(search.begin(), search.end(), str.begin(), | |
575 base::CaseInsensitiveCompare<typename STR::value_type>()); | |
576 } | |
577 } | |
578 | |
579 bool StartsWith(const std::wstring& str, const std::wstring& search, | |
580 bool case_sensitive) { | |
581 return StartsWithT(str, search, case_sensitive); | |
582 } | |
583 | |
584 #if !defined(WCHAR_T_IS_UTF16) | |
585 bool StartsWith(const string16& str, const string16& search, | |
586 bool case_sensitive) { | |
587 return StartsWithT(str, search, case_sensitive); | |
588 } | |
589 #endif | |
590 | |
591 template <typename STR> | |
592 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { | |
593 typename STR::size_type str_length = str.length(); | |
594 typename STR::size_type search_length = search.length(); | |
595 if (search_length > str_length) | |
596 return false; | |
597 if (case_sensitive) { | |
598 return str.compare(str_length - search_length, search_length, search) == 0; | |
599 } else { | |
600 return std::equal(search.begin(), search.end(), | |
601 str.begin() + (str_length - search_length), | |
602 base::CaseInsensitiveCompare<typename STR::value_type>()); | |
603 } | |
604 } | |
605 | |
606 bool EndsWith(const std::string& str, const std::string& search, | |
607 bool case_sensitive) { | |
608 return EndsWithT(str, search, case_sensitive); | |
609 } | |
610 | |
611 bool EndsWith(const std::wstring& str, const std::wstring& search, | |
612 bool case_sensitive) { | |
613 return EndsWithT(str, search, case_sensitive); | |
614 } | |
615 | |
616 #if !defined(WCHAR_T_IS_UTF16) | |
617 bool EndsWith(const string16& str, const string16& search, | |
618 bool case_sensitive) { | |
619 return EndsWithT(str, search, case_sensitive); | |
620 } | |
621 #endif | |
622 | |
623 DataUnits GetByteDisplayUnits(int64 bytes) { | |
624 // The byte thresholds at which we display amounts. A byte count is displayed | |
625 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1]. | |
626 // This must match the DataUnits enum. | |
627 static const int64 kUnitThresholds[] = { | |
628 0, // DATA_UNITS_BYTE, | |
629 3*1024, // DATA_UNITS_KIBIBYTE, | |
630 2*1024*1024, // DATA_UNITS_MEBIBYTE, | |
631 1024*1024*1024 // DATA_UNITS_GIBIBYTE, | |
632 }; | |
633 | |
634 if (bytes < 0) { | |
635 NOTREACHED() << "Negative bytes value"; | |
636 return DATA_UNITS_BYTE; | |
637 } | |
638 | |
639 int unit_index = arraysize(kUnitThresholds); | |
640 while (--unit_index > 0) { | |
641 if (bytes >= kUnitThresholds[unit_index]) | |
642 break; | |
643 } | |
644 | |
645 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE); | |
646 return DataUnits(unit_index); | |
647 } | |
648 | |
649 // TODO(mpcomplete): deal with locale | |
650 // Byte suffixes. This must match the DataUnits enum. | |
651 static const char* const kByteStrings[] = { | |
652 "B", | |
653 "kB", | |
654 "MB", | |
655 "GB" | |
656 }; | |
657 | |
658 static const char* const kSpeedStrings[] = { | |
659 "B/s", | |
660 "kB/s", | |
661 "MB/s", | |
662 "GB/s" | |
663 }; | |
664 | |
665 string16 FormatBytesInternal(int64 bytes, | |
666 DataUnits units, | |
667 bool show_units, | |
668 const char* const* suffix) { | |
669 if (bytes < 0) { | |
670 NOTREACHED() << "Negative bytes value"; | |
671 return string16(); | |
672 } | |
673 | |
674 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE); | |
675 | |
676 // Put the quantity in the right units. | |
677 double unit_amount = static_cast<double>(bytes); | |
678 for (int i = 0; i < units; ++i) | |
679 unit_amount /= 1024.0; | |
680 | |
681 char buf[64]; | |
682 if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100) | |
683 base::snprintf(buf, arraysize(buf), "%.1lf", unit_amount); | |
684 else | |
685 base::snprintf(buf, arraysize(buf), "%.0lf", unit_amount); | |
686 | |
687 std::string ret(buf); | |
688 if (show_units) { | |
689 ret += " "; | |
690 ret += suffix[units]; | |
691 } | |
692 | |
693 return ASCIIToUTF16(ret); | |
694 } | |
695 | |
696 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units) { | |
697 return FormatBytesInternal(bytes, units, show_units, kByteStrings); | |
698 } | |
699 | |
700 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units) { | |
701 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings); | |
702 } | |
703 | |
704 template<class StringType> | |
705 void DoReplaceSubstringsAfterOffset(StringType* str, | |
706 typename StringType::size_type start_offset, | |
707 const StringType& find_this, | |
708 const StringType& replace_with, | |
709 bool replace_all) { | |
710 if ((start_offset == StringType::npos) || (start_offset >= str->length())) | |
711 return; | |
712 | |
713 DCHECK(!find_this.empty()); | |
714 for (typename StringType::size_type offs(str->find(find_this, start_offset)); | |
715 offs != StringType::npos; offs = str->find(find_this, offs)) { | |
716 str->replace(offs, find_this.length(), replace_with); | |
717 offs += replace_with.length(); | |
718 | |
719 if (!replace_all) | |
720 break; | |
721 } | |
722 } | |
723 | |
724 void ReplaceFirstSubstringAfterOffset(string16* str, | |
725 string16::size_type start_offset, | |
726 const string16& find_this, | |
727 const string16& replace_with) { | |
728 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
729 false); // replace first instance | |
730 } | |
731 | |
732 void ReplaceFirstSubstringAfterOffset(std::string* str, | |
733 std::string::size_type start_offset, | |
734 const std::string& find_this, | |
735 const std::string& replace_with) { | |
736 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
737 false); // replace first instance | |
738 } | |
739 | |
740 void ReplaceSubstringsAfterOffset(string16* str, | |
741 string16::size_type start_offset, | |
742 const string16& find_this, | |
743 const string16& replace_with) { | |
744 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
745 true); // replace all instances | |
746 } | |
747 | |
748 void ReplaceSubstringsAfterOffset(std::string* str, | |
749 std::string::size_type start_offset, | |
750 const std::string& find_this, | |
751 const std::string& replace_with) { | |
752 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
753 true); // replace all instances | |
754 } | |
755 | |
756 | |
757 template<typename STR> | |
758 static size_t TokenizeT(const STR& str, | |
759 const STR& delimiters, | |
760 std::vector<STR>* tokens) { | |
761 tokens->clear(); | |
762 | |
763 typename STR::size_type start = str.find_first_not_of(delimiters); | |
764 while (start != STR::npos) { | |
765 typename STR::size_type end = str.find_first_of(delimiters, start + 1); | |
766 if (end == STR::npos) { | |
767 tokens->push_back(str.substr(start)); | |
768 break; | |
769 } else { | |
770 tokens->push_back(str.substr(start, end - start)); | |
771 start = str.find_first_not_of(delimiters, end + 1); | |
772 } | |
773 } | |
774 | |
775 return tokens->size(); | |
776 } | |
777 | |
778 size_t Tokenize(const std::wstring& str, | |
779 const std::wstring& delimiters, | |
780 std::vector<std::wstring>* tokens) { | |
781 return TokenizeT(str, delimiters, tokens); | |
782 } | |
783 | |
784 #if !defined(WCHAR_T_IS_UTF16) | |
785 size_t Tokenize(const string16& str, | |
786 const string16& delimiters, | |
787 std::vector<string16>* tokens) { | |
788 return TokenizeT(str, delimiters, tokens); | |
789 } | |
790 #endif | |
791 | |
792 size_t Tokenize(const std::string& str, | |
793 const std::string& delimiters, | |
794 std::vector<std::string>* tokens) { | |
795 return TokenizeT(str, delimiters, tokens); | |
796 } | |
797 | |
798 size_t Tokenize(const base::StringPiece& str, | |
799 const base::StringPiece& delimiters, | |
800 std::vector<base::StringPiece>* tokens) { | |
801 return TokenizeT(str, delimiters, tokens); | |
802 } | |
803 | |
804 template<typename STR> | |
805 static STR JoinStringT(const std::vector<STR>& parts, | |
806 typename STR::value_type sep) { | |
807 if (parts.empty()) | |
808 return STR(); | |
809 | |
810 STR result(parts[0]); | |
811 typename std::vector<STR>::const_iterator iter = parts.begin(); | |
812 ++iter; | |
813 | |
814 for (; iter != parts.end(); ++iter) { | |
815 result += sep; | |
816 result += *iter; | |
817 } | |
818 | |
819 return result; | |
820 } | |
821 | |
822 std::string JoinString(const std::vector<std::string>& parts, char sep) { | |
823 return JoinStringT(parts, sep); | |
824 } | |
825 | |
826 string16 JoinString(const std::vector<string16>& parts, char16 sep) { | |
827 return JoinStringT(parts, sep); | |
828 } | |
829 | |
830 template<class FormatStringType, class OutStringType> | |
831 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, | |
832 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { | |
833 size_t substitutions = subst.size(); | |
834 DCHECK(substitutions < 10); | |
835 | |
836 size_t sub_length = 0; | |
837 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); | |
838 iter != subst.end(); ++iter) { | |
839 sub_length += iter->length(); | |
840 } | |
841 | |
842 OutStringType formatted; | |
843 formatted.reserve(format_string.length() + sub_length); | |
844 | |
845 std::vector<ReplacementOffset> r_offsets; | |
846 for (typename FormatStringType::const_iterator i = format_string.begin(); | |
847 i != format_string.end(); ++i) { | |
848 if ('$' == *i) { | |
849 if (i + 1 != format_string.end()) { | |
850 ++i; | |
851 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; | |
852 if ('$' == *i) { | |
853 while (i != format_string.end() && '$' == *i) { | |
854 formatted.push_back('$'); | |
855 ++i; | |
856 } | |
857 --i; | |
858 } else { | |
859 uintptr_t index = *i - '1'; | |
860 if (offsets) { | |
861 ReplacementOffset r_offset(index, | |
862 static_cast<int>(formatted.size())); | |
863 r_offsets.insert(std::lower_bound(r_offsets.begin(), | |
864 r_offsets.end(), | |
865 r_offset, | |
866 &CompareParameter), | |
867 r_offset); | |
868 } | |
869 if (index < substitutions) | |
870 formatted.append(subst.at(index)); | |
871 } | |
872 } | |
873 } else { | |
874 formatted.push_back(*i); | |
875 } | |
876 } | |
877 if (offsets) { | |
878 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); | |
879 i != r_offsets.end(); ++i) { | |
880 offsets->push_back(i->offset); | |
881 } | |
882 } | |
883 return formatted; | |
884 } | |
885 | |
886 string16 ReplaceStringPlaceholders(const string16& format_string, | |
887 const std::vector<string16>& subst, | |
888 std::vector<size_t>* offsets) { | |
889 return DoReplaceStringPlaceholders(format_string, subst, offsets); | |
890 } | |
891 | |
892 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, | |
893 const std::vector<std::string>& subst, | |
894 std::vector<size_t>* offsets) { | |
895 return DoReplaceStringPlaceholders(format_string, subst, offsets); | |
896 } | |
897 | |
898 string16 ReplaceStringPlaceholders(const string16& format_string, | |
899 const string16& a, | |
900 size_t* offset) { | |
901 std::vector<size_t> offsets; | |
902 std::vector<string16> subst; | |
903 subst.push_back(a); | |
904 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); | |
905 | |
906 DCHECK(offsets.size() == 1); | |
907 if (offset) { | |
908 *offset = offsets[0]; | |
909 } | |
910 return result; | |
911 } | |
912 | |
913 static bool IsWildcard(base_icu::UChar32 character) { | |
914 return character == '*' || character == '?'; | |
915 } | |
916 | |
917 // Move the strings pointers to the point where they start to differ. | |
918 template <typename CHAR, typename NEXT> | |
919 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, | |
920 const CHAR** string, const CHAR* string_end, | |
921 NEXT next) { | |
922 const CHAR* escape = NULL; | |
923 while (*pattern != pattern_end && *string != string_end) { | |
924 if (!escape && IsWildcard(**pattern)) { | |
925 // We don't want to match wildcard here, except if it's escaped. | |
926 return; | |
927 } | |
928 | |
929 // Check if the escapement char is found. If so, skip it and move to the | |
930 // next character. | |
931 if (!escape && **pattern == '\\') { | |
932 escape = *pattern; | |
933 next(pattern, pattern_end); | |
934 continue; | |
935 } | |
936 | |
937 // Check if the chars match, if so, increment the ptrs. | |
938 const CHAR* pattern_next = *pattern; | |
939 const CHAR* string_next = *string; | |
940 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | |
941 if (pattern_char == next(&string_next, string_end) && | |
942 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { | |
943 *pattern = pattern_next; | |
944 *string = string_next; | |
945 } else { | |
946 // Uh ho, it did not match, we are done. If the last char was an | |
947 // escapement, that means that it was an error to advance the ptr here, | |
948 // let's put it back where it was. This also mean that the MatchPattern | |
949 // function will return false because if we can't match an escape char | |
950 // here, then no one will. | |
951 if (escape) { | |
952 *pattern = escape; | |
953 } | |
954 return; | |
955 } | |
956 | |
957 escape = NULL; | |
958 } | |
959 } | |
960 | |
961 template <typename CHAR, typename NEXT> | |
962 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { | |
963 while (*pattern != end) { | |
964 if (!IsWildcard(**pattern)) | |
965 return; | |
966 next(pattern, end); | |
967 } | |
968 } | |
969 | |
970 template <typename CHAR, typename NEXT> | |
971 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, | |
972 const CHAR* pattern, const CHAR* pattern_end, | |
973 int depth, | |
974 NEXT next) { | |
975 const int kMaxDepth = 16; | |
976 if (depth > kMaxDepth) | |
977 return false; | |
978 | |
979 // Eat all the matching chars. | |
980 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); | |
981 | |
982 // If the string is empty, then the pattern must be empty too, or contains | |
983 // only wildcards. | |
984 if (eval == eval_end) { | |
985 EatWildcard(&pattern, pattern_end, next); | |
986 return pattern == pattern_end; | |
987 } | |
988 | |
989 // Pattern is empty but not string, this is not a match. | |
990 if (pattern == pattern_end) | |
991 return false; | |
992 | |
993 // If this is a question mark, then we need to compare the rest with | |
994 // the current string or the string with one character eaten. | |
995 const CHAR* next_pattern = pattern; | |
996 next(&next_pattern, pattern_end); | |
997 if (pattern[0] == '?') { | |
998 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
999 depth + 1, next)) | |
1000 return true; | |
1001 const CHAR* next_eval = eval; | |
1002 next(&next_eval, eval_end); | |
1003 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, | |
1004 depth + 1, next)) | |
1005 return true; | |
1006 } | |
1007 | |
1008 // This is a *, try to match all the possible substrings with the remainder | |
1009 // of the pattern. | |
1010 if (pattern[0] == '*') { | |
1011 // Collapse duplicate wild cards (********** into *) so that the | |
1012 // method does not recurse unnecessarily. http://crbug.com/52839 | |
1013 EatWildcard(&next_pattern, pattern_end, next); | |
1014 | |
1015 while (eval != eval_end) { | |
1016 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
1017 depth + 1, next)) | |
1018 return true; | |
1019 eval++; | |
1020 } | |
1021 | |
1022 // We reached the end of the string, let see if the pattern contains only | |
1023 // wildcards. | |
1024 if (eval == eval_end) { | |
1025 EatWildcard(&pattern, pattern_end, next); | |
1026 if (pattern != pattern_end) | |
1027 return false; | |
1028 return true; | |
1029 } | |
1030 } | |
1031 | |
1032 return false; | |
1033 } | |
1034 | |
1035 struct NextCharUTF8 { | |
1036 base_icu::UChar32 operator()(const char** p, const char* end) { | |
1037 base_icu::UChar32 c; | |
1038 int offset = 0; | |
1039 CBU8_NEXT(*p, offset, end - *p, c); | |
1040 *p += offset; | |
1041 return c; | |
1042 } | |
1043 }; | |
1044 | |
1045 struct NextCharUTF16 { | |
1046 base_icu::UChar32 operator()(const char16** p, const char16* end) { | |
1047 base_icu::UChar32 c; | |
1048 int offset = 0; | |
1049 CBU16_NEXT(*p, offset, end - *p, c); | |
1050 *p += offset; | |
1051 return c; | |
1052 } | |
1053 }; | |
1054 | |
1055 bool MatchPattern(const base::StringPiece& eval, | |
1056 const base::StringPiece& pattern) { | |
1057 return MatchPatternT(eval.data(), eval.data() + eval.size(), | |
1058 pattern.data(), pattern.data() + pattern.size(), | |
1059 0, NextCharUTF8()); | |
1060 } | |
1061 | |
1062 bool MatchPattern(const string16& eval, const string16& pattern) { | |
1063 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), | |
1064 pattern.c_str(), pattern.c_str() + pattern.size(), | |
1065 0, NextCharUTF16()); | |
1066 } | |
1067 | |
1068 // The following code is compatible with the OpenBSD lcpy interface. See: | |
1069 // http://www.gratisoft.us/todd/papers/strlcpy.html | |
1070 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c | |
1071 | |
1072 namespace { | |
1073 | |
1074 template <typename CHAR> | |
1075 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { | |
1076 for (size_t i = 0; i < dst_size; ++i) { | |
1077 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. | |
1078 return i; | |
1079 } | |
1080 | |
1081 // We were left off at dst_size. We over copied 1 byte. Null terminate. | |
1082 if (dst_size != 0) | |
1083 dst[dst_size - 1] = 0; | |
1084 | |
1085 // Count the rest of the |src|, and return it's length in characters. | |
1086 while (src[dst_size]) ++dst_size; | |
1087 return dst_size; | |
1088 } | |
1089 | |
1090 } // namespace | |
1091 | |
1092 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { | |
1093 return lcpyT<char>(dst, src, dst_size); | |
1094 } | |
1095 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { | |
1096 return lcpyT<wchar_t>(dst, src, dst_size); | |
1097 } | |
OLD | NEW |