Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Side by Side Diff: base/string_util_static.cc

Issue 6877053: Base: More adjustments to BASE_API and project dependencies to (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/string_util.h" 5 #include "base/string_util_static.h"
6
7 #include "build/build_config.h"
8
9 #include <ctype.h>
10 #include <errno.h>
11 #include <math.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <wchar.h>
18 #include <wctype.h>
19
20 #include <algorithm>
21 #include <vector>
22
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/third_party/dmg_fp/dmg_fp.h"
27 #include "base/utf_string_conversion_utils.h"
28 #include "base/utf_string_conversions.h"
29 #include "base/third_party/icu/icu_utf.h"
30
31 namespace {
32
33 // Force the singleton used by Empty[W]String[16] to be a unique type. This
34 // prevents other code that might accidentally use Singleton<string> from
35 // getting our internal one.
36 struct EmptyStrings {
37 EmptyStrings() {}
38 const std::string s;
39 const std::wstring ws;
40 const string16 s16;
41
42 static EmptyStrings* GetInstance() {
43 return Singleton<EmptyStrings>::get();
44 }
45 };
46
47 // Used by ReplaceStringPlaceholders to track the position in the string of
48 // replaced parameters.
49 struct ReplacementOffset {
50 ReplacementOffset(uintptr_t parameter, size_t offset)
51 : parameter(parameter),
52 offset(offset) {}
53
54 // Index of the parameter.
55 uintptr_t parameter;
56
57 // Starting position in the string.
58 size_t offset;
59 };
60
61 static bool CompareParameter(const ReplacementOffset& elem1,
62 const ReplacementOffset& elem2) {
63 return elem1.parameter < elem2.parameter;
64 }
65
66 } // namespace
67
68 namespace base {
69
70 bool IsWprintfFormatPortable(const wchar_t* format) {
71 for (const wchar_t* position = format; *position != '\0'; ++position) {
72 if (*position == '%') {
73 bool in_specification = true;
74 bool modifier_l = false;
75 while (in_specification) {
76 // Eat up characters until reaching a known specifier.
77 if (*++position == '\0') {
78 // The format string ended in the middle of a specification. Call
79 // it portable because no unportable specifications were found. The
80 // string is equally broken on all platforms.
81 return true;
82 }
83
84 if (*position == 'l') {
85 // 'l' is the only thing that can save the 's' and 'c' specifiers.
86 modifier_l = true;
87 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
88 *position == 'S' || *position == 'C' || *position == 'F' ||
89 *position == 'D' || *position == 'O' || *position == 'U') {
90 // Not portable.
91 return false;
92 }
93
94 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
95 // Portable, keep scanning the rest of the format string.
96 in_specification = false;
97 }
98 }
99 }
100 }
101
102 return true;
103 }
104
105 } // namespace base
106
107
108 const std::string& EmptyString() {
109 return EmptyStrings::GetInstance()->s;
110 }
111
112 const std::wstring& EmptyWString() {
113 return EmptyStrings::GetInstance()->ws;
114 }
115
116 const string16& EmptyString16() {
117 return EmptyStrings::GetInstance()->s16;
118 }
119 6
120 #define WHITESPACE_UNICODE \ 7 #define WHITESPACE_UNICODE \
121 0x0009, /* <control-0009> to <control-000D> */ \ 8 0x0009, /* <control-0009> to <control-000D> */ \
122 0x000A, \ 9 0x000A, \
123 0x000B, \ 10 0x000B, \
124 0x000C, \ 11 0x000C, \
125 0x000D, \ 12 0x000D, \
126 0x0020, /* Space */ \ 13 0x0020, /* Space */ \
127 0x0085, /* <control-0085> */ \ 14 0x0085, /* <control-0085> */ \
128 0x00A0, /* No-Break Space */ \ 15 0x00A0, /* No-Break Space */ \
(...skipping 28 matching lines...) Expand all
157 0x09, // <control-0009> to <control-000D> 44 0x09, // <control-0009> to <control-000D>
158 0x0A, 45 0x0A,
159 0x0B, 46 0x0B,
160 0x0C, 47 0x0C,
161 0x0D, 48 0x0D,
162 0x20, // Space 49 0x20, // Space
163 0 50 0
164 }; 51 };
165 52
166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; 53 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
167
168 template<typename STR>
169 bool RemoveCharsT(const STR& input,
170 const typename STR::value_type remove_chars[],
171 STR* output) {
172 bool removed = false;
173 size_t found;
174
175 *output = input;
176
177 found = output->find_first_of(remove_chars);
178 while (found != STR::npos) {
179 removed = true;
180 output->replace(found, 1, STR());
181 found = output->find_first_of(remove_chars, found);
182 }
183
184 return removed;
185 }
186
187 bool RemoveChars(const std::wstring& input,
188 const wchar_t remove_chars[],
189 std::wstring* output) {
190 return RemoveCharsT(input, remove_chars, output);
191 }
192
193 #if !defined(WCHAR_T_IS_UTF16)
194 bool RemoveChars(const string16& input,
195 const char16 remove_chars[],
196 string16* output) {
197 return RemoveCharsT(input, remove_chars, output);
198 }
199 #endif
200
201 bool RemoveChars(const std::string& input,
202 const char remove_chars[],
203 std::string* output) {
204 return RemoveCharsT(input, remove_chars, output);
205 }
206
207 template<typename STR>
208 TrimPositions TrimStringT(const STR& input,
209 const typename STR::value_type trim_chars[],
210 TrimPositions positions,
211 STR* output) {
212 // Find the edges of leading/trailing whitespace as desired.
213 const typename STR::size_type last_char = input.length() - 1;
214 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
215 input.find_first_not_of(trim_chars) : 0;
216 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
217 input.find_last_not_of(trim_chars) : last_char;
218
219 // When the string was all whitespace, report that we stripped off whitespace
220 // from whichever position the caller was interested in. For empty input, we
221 // stripped no whitespace, but we still need to clear |output|.
222 if (input.empty() ||
223 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
224 bool input_was_empty = input.empty(); // in case output == &input
225 output->clear();
226 return input_was_empty ? TRIM_NONE : positions;
227 }
228
229 // Trim the whitespace.
230 *output =
231 input.substr(first_good_char, last_good_char - first_good_char + 1);
232
233 // Return where we trimmed from.
234 return static_cast<TrimPositions>(
235 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
236 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
237 }
238
239 bool TrimString(const std::wstring& input,
240 const wchar_t trim_chars[],
241 std::wstring* output) {
242 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
243 }
244
245 #if !defined(WCHAR_T_IS_UTF16)
246 bool TrimString(const string16& input,
247 const char16 trim_chars[],
248 string16* output) {
249 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
250 }
251 #endif
252
253 bool TrimString(const std::string& input,
254 const char trim_chars[],
255 std::string* output) {
256 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
257 }
258
259 void TruncateUTF8ToByteSize(const std::string& input,
260 const size_t byte_size,
261 std::string* output) {
262 DCHECK(output);
263 if (byte_size > input.length()) {
264 *output = input;
265 return;
266 }
267 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
268 // Note: This cast is necessary because CBU8_NEXT uses int32s.
269 int32 truncation_length = static_cast<int32>(byte_size);
270 int32 char_index = truncation_length - 1;
271 const char* data = input.data();
272
273 // Using CBU8, we will move backwards from the truncation point
274 // to the beginning of the string looking for a valid UTF8
275 // character. Once a full UTF8 character is found, we will
276 // truncate the string to the end of that character.
277 while (char_index >= 0) {
278 int32 prev = char_index;
279 uint32 code_point = 0;
280 CBU8_NEXT(data, char_index, truncation_length, code_point);
281 if (!base::IsValidCharacter(code_point) ||
282 !base::IsValidCodepoint(code_point)) {
283 char_index = prev - 1;
284 } else {
285 break;
286 }
287 }
288
289 if (char_index >= 0 )
290 *output = input.substr(0, char_index);
291 else
292 output->clear();
293 }
294
295 TrimPositions TrimWhitespace(const std::wstring& input,
296 TrimPositions positions,
297 std::wstring* output) {
298 return TrimStringT(input, kWhitespaceWide, positions, output);
299 }
300
301 #if !defined(WCHAR_T_IS_UTF16)
302 TrimPositions TrimWhitespace(const string16& input,
303 TrimPositions positions,
304 string16* output) {
305 return TrimStringT(input, kWhitespaceUTF16, positions, output);
306 }
307 #endif
308
309 TrimPositions TrimWhitespaceASCII(const std::string& input,
310 TrimPositions positions,
311 std::string* output) {
312 return TrimStringT(input, kWhitespaceASCII, positions, output);
313 }
314
315 // This function is only for backward-compatibility.
316 // To be removed when all callers are updated.
317 TrimPositions TrimWhitespace(const std::string& input,
318 TrimPositions positions,
319 std::string* output) {
320 return TrimWhitespaceASCII(input, positions, output);
321 }
322
323 template<typename STR>
324 STR CollapseWhitespaceT(const STR& text,
325 bool trim_sequences_with_line_breaks) {
326 STR result;
327 result.resize(text.size());
328
329 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
330 // will trim any leading whitespace.
331 bool in_whitespace = true;
332 bool already_trimmed = true;
333
334 int chars_written = 0;
335 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
336 if (IsWhitespace(*i)) {
337 if (!in_whitespace) {
338 // Reduce all whitespace sequences to a single space.
339 in_whitespace = true;
340 result[chars_written++] = L' ';
341 }
342 if (trim_sequences_with_line_breaks && !already_trimmed &&
343 ((*i == '\n') || (*i == '\r'))) {
344 // Whitespace sequences containing CR or LF are eliminated entirely.
345 already_trimmed = true;
346 --chars_written;
347 }
348 } else {
349 // Non-whitespace chracters are copied straight across.
350 in_whitespace = false;
351 already_trimmed = false;
352 result[chars_written++] = *i;
353 }
354 }
355
356 if (in_whitespace && !already_trimmed) {
357 // Any trailing whitespace is eliminated.
358 --chars_written;
359 }
360
361 result.resize(chars_written);
362 return result;
363 }
364
365 std::wstring CollapseWhitespace(const std::wstring& text,
366 bool trim_sequences_with_line_breaks) {
367 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
368 }
369
370 #if !defined(WCHAR_T_IS_UTF16)
371 string16 CollapseWhitespace(const string16& text,
372 bool trim_sequences_with_line_breaks) {
373 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
374 }
375 #endif
376
377 std::string CollapseWhitespaceASCII(const std::string& text,
378 bool trim_sequences_with_line_breaks) {
379 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
380 }
381
382 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
383 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
384 if (!IsAsciiWhitespace(*i))
385 return false;
386 }
387 return true;
388 }
389
390 bool ContainsOnlyWhitespace(const string16& str) {
391 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {
392 if (!IsWhitespace(*i))
393 return false;
394 }
395 return true;
396 }
397
398 template<typename STR>
399 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
400 for (typename STR::const_iterator iter = input.begin();
401 iter != input.end(); ++iter) {
402 if (characters.find(*iter) == STR::npos)
403 return false;
404 }
405 return true;
406 }
407
408 bool ContainsOnlyChars(const std::wstring& input,
409 const std::wstring& characters) {
410 return ContainsOnlyCharsT(input, characters);
411 }
412
413 #if !defined(WCHAR_T_IS_UTF16)
414 bool ContainsOnlyChars(const string16& input, const string16& characters) {
415 return ContainsOnlyCharsT(input, characters);
416 }
417 #endif
418
419 bool ContainsOnlyChars(const std::string& input,
420 const std::string& characters) {
421 return ContainsOnlyCharsT(input, characters);
422 }
423
424 std::string WideToASCII(const std::wstring& wide) {
425 DCHECK(IsStringASCII(wide)) << wide;
426 return std::string(wide.begin(), wide.end());
427 }
428
429 std::string UTF16ToASCII(const string16& utf16) {
430 DCHECK(IsStringASCII(utf16)) << utf16;
431 return std::string(utf16.begin(), utf16.end());
432 }
433
434 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
435 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
436 std::string output;
437 output.resize(wide.size());
438 latin1->clear();
439 for (size_t i = 0; i < wide.size(); i++) {
440 if (wide[i] > 255)
441 return false;
442 output[i] = static_cast<char>(wide[i]);
443 }
444 latin1->swap(output);
445 return true;
446 }
447
448 template<class STR>
449 static bool DoIsStringASCII(const STR& str) {
450 for (size_t i = 0; i < str.length(); i++) {
451 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
452 if (c > 0x7F)
453 return false;
454 }
455 return true;
456 }
457
458 bool IsStringASCII(const std::wstring& str) {
459 return DoIsStringASCII(str);
460 }
461
462 #if !defined(WCHAR_T_IS_UTF16)
463 bool IsStringASCII(const string16& str) {
464 return DoIsStringASCII(str);
465 }
466 #endif
467
468 bool IsStringASCII(const base::StringPiece& str) {
469 return DoIsStringASCII(str);
470 }
471
472 bool IsStringUTF8(const std::string& str) {
473 const char *src = str.data();
474 int32 src_len = static_cast<int32>(str.length());
475 int32 char_index = 0;
476
477 while (char_index < src_len) {
478 int32 code_point;
479 CBU8_NEXT(src, char_index, src_len, code_point);
480 if (!base::IsValidCharacter(code_point))
481 return false;
482 }
483 return true;
484 }
485
486 template<typename Iter>
487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
488 Iter a_end,
489 const char* b) {
490 for (Iter it = a_begin; it != a_end; ++it, ++b) {
491 if (!*b || base::ToLowerASCII(*it) != *b)
492 return false;
493 }
494 return *b == 0;
495 }
496
497 // Front-ends for LowerCaseEqualsASCII.
498 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
499 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
500 }
501
502 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
503 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
504 }
505
506 #if !defined(WCHAR_T_IS_UTF16)
507 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
508 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
509 }
510 #endif
511
512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
513 std::string::const_iterator a_end,
514 const char* b) {
515 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
516 }
517
518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
519 std::wstring::const_iterator a_end,
520 const char* b) {
521 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
522 }
523
524 #if !defined(WCHAR_T_IS_UTF16)
525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
526 string16::const_iterator a_end,
527 const char* b) {
528 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
529 }
530 #endif
531
532 bool LowerCaseEqualsASCII(const char* a_begin,
533 const char* a_end,
534 const char* b) {
535 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
536 }
537
538 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
539 const wchar_t* a_end,
540 const char* b) {
541 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
542 }
543
544 #if !defined(WCHAR_T_IS_UTF16)
545 bool LowerCaseEqualsASCII(const char16* a_begin,
546 const char16* a_end,
547 const char* b) {
548 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
549 }
550 #endif
551
552 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
553 if (a.length() != b.length())
554 return false;
555 return std::equal(b.begin(), b.end(), a.begin());
556 }
557
558 bool StartsWithASCII(const std::string& str,
559 const std::string& search,
560 bool case_sensitive) {
561 if (case_sensitive)
562 return str.compare(0, search.length(), search) == 0;
563 else
564 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
565 }
566
567 template <typename STR>
568 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
569 if (case_sensitive) {
570 return str.compare(0, search.length(), search) == 0;
571 } else {
572 if (search.size() > str.size())
573 return false;
574 return std::equal(search.begin(), search.end(), str.begin(),
575 base::CaseInsensitiveCompare<typename STR::value_type>());
576 }
577 }
578
579 bool StartsWith(const std::wstring& str, const std::wstring& search,
580 bool case_sensitive) {
581 return StartsWithT(str, search, case_sensitive);
582 }
583
584 #if !defined(WCHAR_T_IS_UTF16)
585 bool StartsWith(const string16& str, const string16& search,
586 bool case_sensitive) {
587 return StartsWithT(str, search, case_sensitive);
588 }
589 #endif
590
591 template <typename STR>
592 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
593 typename STR::size_type str_length = str.length();
594 typename STR::size_type search_length = search.length();
595 if (search_length > str_length)
596 return false;
597 if (case_sensitive) {
598 return str.compare(str_length - search_length, search_length, search) == 0;
599 } else {
600 return std::equal(search.begin(), search.end(),
601 str.begin() + (str_length - search_length),
602 base::CaseInsensitiveCompare<typename STR::value_type>());
603 }
604 }
605
606 bool EndsWith(const std::string& str, const std::string& search,
607 bool case_sensitive) {
608 return EndsWithT(str, search, case_sensitive);
609 }
610
611 bool EndsWith(const std::wstring& str, const std::wstring& search,
612 bool case_sensitive) {
613 return EndsWithT(str, search, case_sensitive);
614 }
615
616 #if !defined(WCHAR_T_IS_UTF16)
617 bool EndsWith(const string16& str, const string16& search,
618 bool case_sensitive) {
619 return EndsWithT(str, search, case_sensitive);
620 }
621 #endif
622
623 DataUnits GetByteDisplayUnits(int64 bytes) {
624 // The byte thresholds at which we display amounts. A byte count is displayed
625 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
626 // This must match the DataUnits enum.
627 static const int64 kUnitThresholds[] = {
628 0, // DATA_UNITS_BYTE,
629 3*1024, // DATA_UNITS_KIBIBYTE,
630 2*1024*1024, // DATA_UNITS_MEBIBYTE,
631 1024*1024*1024 // DATA_UNITS_GIBIBYTE,
632 };
633
634 if (bytes < 0) {
635 NOTREACHED() << "Negative bytes value";
636 return DATA_UNITS_BYTE;
637 }
638
639 int unit_index = arraysize(kUnitThresholds);
640 while (--unit_index > 0) {
641 if (bytes >= kUnitThresholds[unit_index])
642 break;
643 }
644
645 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE);
646 return DataUnits(unit_index);
647 }
648
649 // TODO(mpcomplete): deal with locale
650 // Byte suffixes. This must match the DataUnits enum.
651 static const char* const kByteStrings[] = {
652 "B",
653 "kB",
654 "MB",
655 "GB"
656 };
657
658 static const char* const kSpeedStrings[] = {
659 "B/s",
660 "kB/s",
661 "MB/s",
662 "GB/s"
663 };
664
665 string16 FormatBytesInternal(int64 bytes,
666 DataUnits units,
667 bool show_units,
668 const char* const* suffix) {
669 if (bytes < 0) {
670 NOTREACHED() << "Negative bytes value";
671 return string16();
672 }
673
674 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE);
675
676 // Put the quantity in the right units.
677 double unit_amount = static_cast<double>(bytes);
678 for (int i = 0; i < units; ++i)
679 unit_amount /= 1024.0;
680
681 char buf[64];
682 if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100)
683 base::snprintf(buf, arraysize(buf), "%.1lf", unit_amount);
684 else
685 base::snprintf(buf, arraysize(buf), "%.0lf", unit_amount);
686
687 std::string ret(buf);
688 if (show_units) {
689 ret += " ";
690 ret += suffix[units];
691 }
692
693 return ASCIIToUTF16(ret);
694 }
695
696 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units) {
697 return FormatBytesInternal(bytes, units, show_units, kByteStrings);
698 }
699
700 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
701 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
702 }
703
704 template<class StringType>
705 void DoReplaceSubstringsAfterOffset(StringType* str,
706 typename StringType::size_type start_offset,
707 const StringType& find_this,
708 const StringType& replace_with,
709 bool replace_all) {
710 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
711 return;
712
713 DCHECK(!find_this.empty());
714 for (typename StringType::size_type offs(str->find(find_this, start_offset));
715 offs != StringType::npos; offs = str->find(find_this, offs)) {
716 str->replace(offs, find_this.length(), replace_with);
717 offs += replace_with.length();
718
719 if (!replace_all)
720 break;
721 }
722 }
723
724 void ReplaceFirstSubstringAfterOffset(string16* str,
725 string16::size_type start_offset,
726 const string16& find_this,
727 const string16& replace_with) {
728 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
729 false); // replace first instance
730 }
731
732 void ReplaceFirstSubstringAfterOffset(std::string* str,
733 std::string::size_type start_offset,
734 const std::string& find_this,
735 const std::string& replace_with) {
736 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
737 false); // replace first instance
738 }
739
740 void ReplaceSubstringsAfterOffset(string16* str,
741 string16::size_type start_offset,
742 const string16& find_this,
743 const string16& replace_with) {
744 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
745 true); // replace all instances
746 }
747
748 void ReplaceSubstringsAfterOffset(std::string* str,
749 std::string::size_type start_offset,
750 const std::string& find_this,
751 const std::string& replace_with) {
752 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
753 true); // replace all instances
754 }
755
756
757 template<typename STR>
758 static size_t TokenizeT(const STR& str,
759 const STR& delimiters,
760 std::vector<STR>* tokens) {
761 tokens->clear();
762
763 typename STR::size_type start = str.find_first_not_of(delimiters);
764 while (start != STR::npos) {
765 typename STR::size_type end = str.find_first_of(delimiters, start + 1);
766 if (end == STR::npos) {
767 tokens->push_back(str.substr(start));
768 break;
769 } else {
770 tokens->push_back(str.substr(start, end - start));
771 start = str.find_first_not_of(delimiters, end + 1);
772 }
773 }
774
775 return tokens->size();
776 }
777
778 size_t Tokenize(const std::wstring& str,
779 const std::wstring& delimiters,
780 std::vector<std::wstring>* tokens) {
781 return TokenizeT(str, delimiters, tokens);
782 }
783
784 #if !defined(WCHAR_T_IS_UTF16)
785 size_t Tokenize(const string16& str,
786 const string16& delimiters,
787 std::vector<string16>* tokens) {
788 return TokenizeT(str, delimiters, tokens);
789 }
790 #endif
791
792 size_t Tokenize(const std::string& str,
793 const std::string& delimiters,
794 std::vector<std::string>* tokens) {
795 return TokenizeT(str, delimiters, tokens);
796 }
797
798 size_t Tokenize(const base::StringPiece& str,
799 const base::StringPiece& delimiters,
800 std::vector<base::StringPiece>* tokens) {
801 return TokenizeT(str, delimiters, tokens);
802 }
803
804 template<typename STR>
805 static STR JoinStringT(const std::vector<STR>& parts,
806 typename STR::value_type sep) {
807 if (parts.empty())
808 return STR();
809
810 STR result(parts[0]);
811 typename std::vector<STR>::const_iterator iter = parts.begin();
812 ++iter;
813
814 for (; iter != parts.end(); ++iter) {
815 result += sep;
816 result += *iter;
817 }
818
819 return result;
820 }
821
822 std::string JoinString(const std::vector<std::string>& parts, char sep) {
823 return JoinStringT(parts, sep);
824 }
825
826 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
827 return JoinStringT(parts, sep);
828 }
829
830 template<class FormatStringType, class OutStringType>
831 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
832 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
833 size_t substitutions = subst.size();
834 DCHECK(substitutions < 10);
835
836 size_t sub_length = 0;
837 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
838 iter != subst.end(); ++iter) {
839 sub_length += iter->length();
840 }
841
842 OutStringType formatted;
843 formatted.reserve(format_string.length() + sub_length);
844
845 std::vector<ReplacementOffset> r_offsets;
846 for (typename FormatStringType::const_iterator i = format_string.begin();
847 i != format_string.end(); ++i) {
848 if ('$' == *i) {
849 if (i + 1 != format_string.end()) {
850 ++i;
851 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
852 if ('$' == *i) {
853 while (i != format_string.end() && '$' == *i) {
854 formatted.push_back('$');
855 ++i;
856 }
857 --i;
858 } else {
859 uintptr_t index = *i - '1';
860 if (offsets) {
861 ReplacementOffset r_offset(index,
862 static_cast<int>(formatted.size()));
863 r_offsets.insert(std::lower_bound(r_offsets.begin(),
864 r_offsets.end(),
865 r_offset,
866 &CompareParameter),
867 r_offset);
868 }
869 if (index < substitutions)
870 formatted.append(subst.at(index));
871 }
872 }
873 } else {
874 formatted.push_back(*i);
875 }
876 }
877 if (offsets) {
878 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
879 i != r_offsets.end(); ++i) {
880 offsets->push_back(i->offset);
881 }
882 }
883 return formatted;
884 }
885
886 string16 ReplaceStringPlaceholders(const string16& format_string,
887 const std::vector<string16>& subst,
888 std::vector<size_t>* offsets) {
889 return DoReplaceStringPlaceholders(format_string, subst, offsets);
890 }
891
892 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
893 const std::vector<std::string>& subst,
894 std::vector<size_t>* offsets) {
895 return DoReplaceStringPlaceholders(format_string, subst, offsets);
896 }
897
898 string16 ReplaceStringPlaceholders(const string16& format_string,
899 const string16& a,
900 size_t* offset) {
901 std::vector<size_t> offsets;
902 std::vector<string16> subst;
903 subst.push_back(a);
904 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
905
906 DCHECK(offsets.size() == 1);
907 if (offset) {
908 *offset = offsets[0];
909 }
910 return result;
911 }
912
913 static bool IsWildcard(base_icu::UChar32 character) {
914 return character == '*' || character == '?';
915 }
916
917 // Move the strings pointers to the point where they start to differ.
918 template <typename CHAR, typename NEXT>
919 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
920 const CHAR** string, const CHAR* string_end,
921 NEXT next) {
922 const CHAR* escape = NULL;
923 while (*pattern != pattern_end && *string != string_end) {
924 if (!escape && IsWildcard(**pattern)) {
925 // We don't want to match wildcard here, except if it's escaped.
926 return;
927 }
928
929 // Check if the escapement char is found. If so, skip it and move to the
930 // next character.
931 if (!escape && **pattern == '\\') {
932 escape = *pattern;
933 next(pattern, pattern_end);
934 continue;
935 }
936
937 // Check if the chars match, if so, increment the ptrs.
938 const CHAR* pattern_next = *pattern;
939 const CHAR* string_next = *string;
940 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
941 if (pattern_char == next(&string_next, string_end) &&
942 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
943 *pattern = pattern_next;
944 *string = string_next;
945 } else {
946 // Uh ho, it did not match, we are done. If the last char was an
947 // escapement, that means that it was an error to advance the ptr here,
948 // let's put it back where it was. This also mean that the MatchPattern
949 // function will return false because if we can't match an escape char
950 // here, then no one will.
951 if (escape) {
952 *pattern = escape;
953 }
954 return;
955 }
956
957 escape = NULL;
958 }
959 }
960
961 template <typename CHAR, typename NEXT>
962 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
963 while (*pattern != end) {
964 if (!IsWildcard(**pattern))
965 return;
966 next(pattern, end);
967 }
968 }
969
970 template <typename CHAR, typename NEXT>
971 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
972 const CHAR* pattern, const CHAR* pattern_end,
973 int depth,
974 NEXT next) {
975 const int kMaxDepth = 16;
976 if (depth > kMaxDepth)
977 return false;
978
979 // Eat all the matching chars.
980 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
981
982 // If the string is empty, then the pattern must be empty too, or contains
983 // only wildcards.
984 if (eval == eval_end) {
985 EatWildcard(&pattern, pattern_end, next);
986 return pattern == pattern_end;
987 }
988
989 // Pattern is empty but not string, this is not a match.
990 if (pattern == pattern_end)
991 return false;
992
993 // If this is a question mark, then we need to compare the rest with
994 // the current string or the string with one character eaten.
995 const CHAR* next_pattern = pattern;
996 next(&next_pattern, pattern_end);
997 if (pattern[0] == '?') {
998 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
999 depth + 1, next))
1000 return true;
1001 const CHAR* next_eval = eval;
1002 next(&next_eval, eval_end);
1003 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
1004 depth + 1, next))
1005 return true;
1006 }
1007
1008 // This is a *, try to match all the possible substrings with the remainder
1009 // of the pattern.
1010 if (pattern[0] == '*') {
1011 // Collapse duplicate wild cards (********** into *) so that the
1012 // method does not recurse unnecessarily. http://crbug.com/52839
1013 EatWildcard(&next_pattern, pattern_end, next);
1014
1015 while (eval != eval_end) {
1016 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1017 depth + 1, next))
1018 return true;
1019 eval++;
1020 }
1021
1022 // We reached the end of the string, let see if the pattern contains only
1023 // wildcards.
1024 if (eval == eval_end) {
1025 EatWildcard(&pattern, pattern_end, next);
1026 if (pattern != pattern_end)
1027 return false;
1028 return true;
1029 }
1030 }
1031
1032 return false;
1033 }
1034
1035 struct NextCharUTF8 {
1036 base_icu::UChar32 operator()(const char** p, const char* end) {
1037 base_icu::UChar32 c;
1038 int offset = 0;
1039 CBU8_NEXT(*p, offset, end - *p, c);
1040 *p += offset;
1041 return c;
1042 }
1043 };
1044
1045 struct NextCharUTF16 {
1046 base_icu::UChar32 operator()(const char16** p, const char16* end) {
1047 base_icu::UChar32 c;
1048 int offset = 0;
1049 CBU16_NEXT(*p, offset, end - *p, c);
1050 *p += offset;
1051 return c;
1052 }
1053 };
1054
1055 bool MatchPattern(const base::StringPiece& eval,
1056 const base::StringPiece& pattern) {
1057 return MatchPatternT(eval.data(), eval.data() + eval.size(),
1058 pattern.data(), pattern.data() + pattern.size(),
1059 0, NextCharUTF8());
1060 }
1061
1062 bool MatchPattern(const string16& eval, const string16& pattern) {
1063 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
1064 pattern.c_str(), pattern.c_str() + pattern.size(),
1065 0, NextCharUTF16());
1066 }
1067
1068 // The following code is compatible with the OpenBSD lcpy interface. See:
1069 // http://www.gratisoft.us/todd/papers/strlcpy.html
1070 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1071
1072 namespace {
1073
1074 template <typename CHAR>
1075 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1076 for (size_t i = 0; i < dst_size; ++i) {
1077 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1078 return i;
1079 }
1080
1081 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1082 if (dst_size != 0)
1083 dst[dst_size - 1] = 0;
1084
1085 // Count the rest of the |src|, and return it's length in characters.
1086 while (src[dst_size]) ++dst_size;
1087 return dst_size;
1088 }
1089
1090 } // namespace
1091
1092 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1093 return lcpyT<char>(dst, src, dst_size);
1094 }
1095 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1096 return lcpyT<wchar_t>(dst, src, dst_size);
1097 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698