| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/string_util.h" | 5 #include "base/string_util_static.h" |
| 6 | |
| 7 #include "build/build_config.h" | |
| 8 | |
| 9 #include <ctype.h> | |
| 10 #include <errno.h> | |
| 11 #include <math.h> | |
| 12 #include <stdarg.h> | |
| 13 #include <stdio.h> | |
| 14 #include <stdlib.h> | |
| 15 #include <string.h> | |
| 16 #include <time.h> | |
| 17 #include <wchar.h> | |
| 18 #include <wctype.h> | |
| 19 | |
| 20 #include <algorithm> | |
| 21 #include <vector> | |
| 22 | |
| 23 #include "base/basictypes.h" | |
| 24 #include "base/logging.h" | |
| 25 #include "base/memory/singleton.h" | |
| 26 #include "base/third_party/dmg_fp/dmg_fp.h" | |
| 27 #include "base/utf_string_conversion_utils.h" | |
| 28 #include "base/utf_string_conversions.h" | |
| 29 #include "base/third_party/icu/icu_utf.h" | |
| 30 | |
| 31 namespace { | |
| 32 | |
| 33 // Force the singleton used by Empty[W]String[16] to be a unique type. This | |
| 34 // prevents other code that might accidentally use Singleton<string> from | |
| 35 // getting our internal one. | |
| 36 struct EmptyStrings { | |
| 37 EmptyStrings() {} | |
| 38 const std::string s; | |
| 39 const std::wstring ws; | |
| 40 const string16 s16; | |
| 41 | |
| 42 static EmptyStrings* GetInstance() { | |
| 43 return Singleton<EmptyStrings>::get(); | |
| 44 } | |
| 45 }; | |
| 46 | |
| 47 // Used by ReplaceStringPlaceholders to track the position in the string of | |
| 48 // replaced parameters. | |
| 49 struct ReplacementOffset { | |
| 50 ReplacementOffset(uintptr_t parameter, size_t offset) | |
| 51 : parameter(parameter), | |
| 52 offset(offset) {} | |
| 53 | |
| 54 // Index of the parameter. | |
| 55 uintptr_t parameter; | |
| 56 | |
| 57 // Starting position in the string. | |
| 58 size_t offset; | |
| 59 }; | |
| 60 | |
| 61 static bool CompareParameter(const ReplacementOffset& elem1, | |
| 62 const ReplacementOffset& elem2) { | |
| 63 return elem1.parameter < elem2.parameter; | |
| 64 } | |
| 65 | |
| 66 } // namespace | |
| 67 | |
| 68 namespace base { | |
| 69 | |
| 70 bool IsWprintfFormatPortable(const wchar_t* format) { | |
| 71 for (const wchar_t* position = format; *position != '\0'; ++position) { | |
| 72 if (*position == '%') { | |
| 73 bool in_specification = true; | |
| 74 bool modifier_l = false; | |
| 75 while (in_specification) { | |
| 76 // Eat up characters until reaching a known specifier. | |
| 77 if (*++position == '\0') { | |
| 78 // The format string ended in the middle of a specification. Call | |
| 79 // it portable because no unportable specifications were found. The | |
| 80 // string is equally broken on all platforms. | |
| 81 return true; | |
| 82 } | |
| 83 | |
| 84 if (*position == 'l') { | |
| 85 // 'l' is the only thing that can save the 's' and 'c' specifiers. | |
| 86 modifier_l = true; | |
| 87 } else if (((*position == 's' || *position == 'c') && !modifier_l) || | |
| 88 *position == 'S' || *position == 'C' || *position == 'F' || | |
| 89 *position == 'D' || *position == 'O' || *position == 'U') { | |
| 90 // Not portable. | |
| 91 return false; | |
| 92 } | |
| 93 | |
| 94 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { | |
| 95 // Portable, keep scanning the rest of the format string. | |
| 96 in_specification = false; | |
| 97 } | |
| 98 } | |
| 99 } | |
| 100 } | |
| 101 | |
| 102 return true; | |
| 103 } | |
| 104 | |
| 105 } // namespace base | |
| 106 | |
| 107 | |
| 108 const std::string& EmptyString() { | |
| 109 return EmptyStrings::GetInstance()->s; | |
| 110 } | |
| 111 | |
| 112 const std::wstring& EmptyWString() { | |
| 113 return EmptyStrings::GetInstance()->ws; | |
| 114 } | |
| 115 | |
| 116 const string16& EmptyString16() { | |
| 117 return EmptyStrings::GetInstance()->s16; | |
| 118 } | |
| 119 | 6 |
| 120 #define WHITESPACE_UNICODE \ | 7 #define WHITESPACE_UNICODE \ |
| 121 0x0009, /* <control-0009> to <control-000D> */ \ | 8 0x0009, /* <control-0009> to <control-000D> */ \ |
| 122 0x000A, \ | 9 0x000A, \ |
| 123 0x000B, \ | 10 0x000B, \ |
| 124 0x000C, \ | 11 0x000C, \ |
| 125 0x000D, \ | 12 0x000D, \ |
| 126 0x0020, /* Space */ \ | 13 0x0020, /* Space */ \ |
| 127 0x0085, /* <control-0085> */ \ | 14 0x0085, /* <control-0085> */ \ |
| 128 0x00A0, /* No-Break Space */ \ | 15 0x00A0, /* No-Break Space */ \ |
| (...skipping 28 matching lines...) Expand all Loading... |
| 157 0x09, // <control-0009> to <control-000D> | 44 0x09, // <control-0009> to <control-000D> |
| 158 0x0A, | 45 0x0A, |
| 159 0x0B, | 46 0x0B, |
| 160 0x0C, | 47 0x0C, |
| 161 0x0D, | 48 0x0D, |
| 162 0x20, // Space | 49 0x20, // Space |
| 163 0 | 50 0 |
| 164 }; | 51 }; |
| 165 | 52 |
| 166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; | 53 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; |
| 167 | |
| 168 template<typename STR> | |
| 169 bool RemoveCharsT(const STR& input, | |
| 170 const typename STR::value_type remove_chars[], | |
| 171 STR* output) { | |
| 172 bool removed = false; | |
| 173 size_t found; | |
| 174 | |
| 175 *output = input; | |
| 176 | |
| 177 found = output->find_first_of(remove_chars); | |
| 178 while (found != STR::npos) { | |
| 179 removed = true; | |
| 180 output->replace(found, 1, STR()); | |
| 181 found = output->find_first_of(remove_chars, found); | |
| 182 } | |
| 183 | |
| 184 return removed; | |
| 185 } | |
| 186 | |
| 187 bool RemoveChars(const std::wstring& input, | |
| 188 const wchar_t remove_chars[], | |
| 189 std::wstring* output) { | |
| 190 return RemoveCharsT(input, remove_chars, output); | |
| 191 } | |
| 192 | |
| 193 #if !defined(WCHAR_T_IS_UTF16) | |
| 194 bool RemoveChars(const string16& input, | |
| 195 const char16 remove_chars[], | |
| 196 string16* output) { | |
| 197 return RemoveCharsT(input, remove_chars, output); | |
| 198 } | |
| 199 #endif | |
| 200 | |
| 201 bool RemoveChars(const std::string& input, | |
| 202 const char remove_chars[], | |
| 203 std::string* output) { | |
| 204 return RemoveCharsT(input, remove_chars, output); | |
| 205 } | |
| 206 | |
| 207 template<typename STR> | |
| 208 TrimPositions TrimStringT(const STR& input, | |
| 209 const typename STR::value_type trim_chars[], | |
| 210 TrimPositions positions, | |
| 211 STR* output) { | |
| 212 // Find the edges of leading/trailing whitespace as desired. | |
| 213 const typename STR::size_type last_char = input.length() - 1; | |
| 214 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? | |
| 215 input.find_first_not_of(trim_chars) : 0; | |
| 216 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? | |
| 217 input.find_last_not_of(trim_chars) : last_char; | |
| 218 | |
| 219 // When the string was all whitespace, report that we stripped off whitespace | |
| 220 // from whichever position the caller was interested in. For empty input, we | |
| 221 // stripped no whitespace, but we still need to clear |output|. | |
| 222 if (input.empty() || | |
| 223 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { | |
| 224 bool input_was_empty = input.empty(); // in case output == &input | |
| 225 output->clear(); | |
| 226 return input_was_empty ? TRIM_NONE : positions; | |
| 227 } | |
| 228 | |
| 229 // Trim the whitespace. | |
| 230 *output = | |
| 231 input.substr(first_good_char, last_good_char - first_good_char + 1); | |
| 232 | |
| 233 // Return where we trimmed from. | |
| 234 return static_cast<TrimPositions>( | |
| 235 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | | |
| 236 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); | |
| 237 } | |
| 238 | |
| 239 bool TrimString(const std::wstring& input, | |
| 240 const wchar_t trim_chars[], | |
| 241 std::wstring* output) { | |
| 242 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
| 243 } | |
| 244 | |
| 245 #if !defined(WCHAR_T_IS_UTF16) | |
| 246 bool TrimString(const string16& input, | |
| 247 const char16 trim_chars[], | |
| 248 string16* output) { | |
| 249 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
| 250 } | |
| 251 #endif | |
| 252 | |
| 253 bool TrimString(const std::string& input, | |
| 254 const char trim_chars[], | |
| 255 std::string* output) { | |
| 256 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
| 257 } | |
| 258 | |
| 259 void TruncateUTF8ToByteSize(const std::string& input, | |
| 260 const size_t byte_size, | |
| 261 std::string* output) { | |
| 262 DCHECK(output); | |
| 263 if (byte_size > input.length()) { | |
| 264 *output = input; | |
| 265 return; | |
| 266 } | |
| 267 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); | |
| 268 // Note: This cast is necessary because CBU8_NEXT uses int32s. | |
| 269 int32 truncation_length = static_cast<int32>(byte_size); | |
| 270 int32 char_index = truncation_length - 1; | |
| 271 const char* data = input.data(); | |
| 272 | |
| 273 // Using CBU8, we will move backwards from the truncation point | |
| 274 // to the beginning of the string looking for a valid UTF8 | |
| 275 // character. Once a full UTF8 character is found, we will | |
| 276 // truncate the string to the end of that character. | |
| 277 while (char_index >= 0) { | |
| 278 int32 prev = char_index; | |
| 279 uint32 code_point = 0; | |
| 280 CBU8_NEXT(data, char_index, truncation_length, code_point); | |
| 281 if (!base::IsValidCharacter(code_point) || | |
| 282 !base::IsValidCodepoint(code_point)) { | |
| 283 char_index = prev - 1; | |
| 284 } else { | |
| 285 break; | |
| 286 } | |
| 287 } | |
| 288 | |
| 289 if (char_index >= 0 ) | |
| 290 *output = input.substr(0, char_index); | |
| 291 else | |
| 292 output->clear(); | |
| 293 } | |
| 294 | |
| 295 TrimPositions TrimWhitespace(const std::wstring& input, | |
| 296 TrimPositions positions, | |
| 297 std::wstring* output) { | |
| 298 return TrimStringT(input, kWhitespaceWide, positions, output); | |
| 299 } | |
| 300 | |
| 301 #if !defined(WCHAR_T_IS_UTF16) | |
| 302 TrimPositions TrimWhitespace(const string16& input, | |
| 303 TrimPositions positions, | |
| 304 string16* output) { | |
| 305 return TrimStringT(input, kWhitespaceUTF16, positions, output); | |
| 306 } | |
| 307 #endif | |
| 308 | |
| 309 TrimPositions TrimWhitespaceASCII(const std::string& input, | |
| 310 TrimPositions positions, | |
| 311 std::string* output) { | |
| 312 return TrimStringT(input, kWhitespaceASCII, positions, output); | |
| 313 } | |
| 314 | |
| 315 // This function is only for backward-compatibility. | |
| 316 // To be removed when all callers are updated. | |
| 317 TrimPositions TrimWhitespace(const std::string& input, | |
| 318 TrimPositions positions, | |
| 319 std::string* output) { | |
| 320 return TrimWhitespaceASCII(input, positions, output); | |
| 321 } | |
| 322 | |
| 323 template<typename STR> | |
| 324 STR CollapseWhitespaceT(const STR& text, | |
| 325 bool trim_sequences_with_line_breaks) { | |
| 326 STR result; | |
| 327 result.resize(text.size()); | |
| 328 | |
| 329 // Set flags to pretend we're already in a trimmed whitespace sequence, so we | |
| 330 // will trim any leading whitespace. | |
| 331 bool in_whitespace = true; | |
| 332 bool already_trimmed = true; | |
| 333 | |
| 334 int chars_written = 0; | |
| 335 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { | |
| 336 if (IsWhitespace(*i)) { | |
| 337 if (!in_whitespace) { | |
| 338 // Reduce all whitespace sequences to a single space. | |
| 339 in_whitespace = true; | |
| 340 result[chars_written++] = L' '; | |
| 341 } | |
| 342 if (trim_sequences_with_line_breaks && !already_trimmed && | |
| 343 ((*i == '\n') || (*i == '\r'))) { | |
| 344 // Whitespace sequences containing CR or LF are eliminated entirely. | |
| 345 already_trimmed = true; | |
| 346 --chars_written; | |
| 347 } | |
| 348 } else { | |
| 349 // Non-whitespace chracters are copied straight across. | |
| 350 in_whitespace = false; | |
| 351 already_trimmed = false; | |
| 352 result[chars_written++] = *i; | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 if (in_whitespace && !already_trimmed) { | |
| 357 // Any trailing whitespace is eliminated. | |
| 358 --chars_written; | |
| 359 } | |
| 360 | |
| 361 result.resize(chars_written); | |
| 362 return result; | |
| 363 } | |
| 364 | |
| 365 std::wstring CollapseWhitespace(const std::wstring& text, | |
| 366 bool trim_sequences_with_line_breaks) { | |
| 367 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
| 368 } | |
| 369 | |
| 370 #if !defined(WCHAR_T_IS_UTF16) | |
| 371 string16 CollapseWhitespace(const string16& text, | |
| 372 bool trim_sequences_with_line_breaks) { | |
| 373 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
| 374 } | |
| 375 #endif | |
| 376 | |
| 377 std::string CollapseWhitespaceASCII(const std::string& text, | |
| 378 bool trim_sequences_with_line_breaks) { | |
| 379 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
| 380 } | |
| 381 | |
| 382 bool ContainsOnlyWhitespaceASCII(const std::string& str) { | |
| 383 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { | |
| 384 if (!IsAsciiWhitespace(*i)) | |
| 385 return false; | |
| 386 } | |
| 387 return true; | |
| 388 } | |
| 389 | |
| 390 bool ContainsOnlyWhitespace(const string16& str) { | |
| 391 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) { | |
| 392 if (!IsWhitespace(*i)) | |
| 393 return false; | |
| 394 } | |
| 395 return true; | |
| 396 } | |
| 397 | |
| 398 template<typename STR> | |
| 399 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { | |
| 400 for (typename STR::const_iterator iter = input.begin(); | |
| 401 iter != input.end(); ++iter) { | |
| 402 if (characters.find(*iter) == STR::npos) | |
| 403 return false; | |
| 404 } | |
| 405 return true; | |
| 406 } | |
| 407 | |
| 408 bool ContainsOnlyChars(const std::wstring& input, | |
| 409 const std::wstring& characters) { | |
| 410 return ContainsOnlyCharsT(input, characters); | |
| 411 } | |
| 412 | |
| 413 #if !defined(WCHAR_T_IS_UTF16) | |
| 414 bool ContainsOnlyChars(const string16& input, const string16& characters) { | |
| 415 return ContainsOnlyCharsT(input, characters); | |
| 416 } | |
| 417 #endif | |
| 418 | |
| 419 bool ContainsOnlyChars(const std::string& input, | |
| 420 const std::string& characters) { | |
| 421 return ContainsOnlyCharsT(input, characters); | |
| 422 } | |
| 423 | |
| 424 std::string WideToASCII(const std::wstring& wide) { | |
| 425 DCHECK(IsStringASCII(wide)) << wide; | |
| 426 return std::string(wide.begin(), wide.end()); | |
| 427 } | |
| 428 | |
| 429 std::string UTF16ToASCII(const string16& utf16) { | |
| 430 DCHECK(IsStringASCII(utf16)) << utf16; | |
| 431 return std::string(utf16.begin(), utf16.end()); | |
| 432 } | |
| 433 | |
| 434 // Latin1 is just the low range of Unicode, so we can copy directly to convert. | |
| 435 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { | |
| 436 std::string output; | |
| 437 output.resize(wide.size()); | |
| 438 latin1->clear(); | |
| 439 for (size_t i = 0; i < wide.size(); i++) { | |
| 440 if (wide[i] > 255) | |
| 441 return false; | |
| 442 output[i] = static_cast<char>(wide[i]); | |
| 443 } | |
| 444 latin1->swap(output); | |
| 445 return true; | |
| 446 } | |
| 447 | |
| 448 template<class STR> | |
| 449 static bool DoIsStringASCII(const STR& str) { | |
| 450 for (size_t i = 0; i < str.length(); i++) { | |
| 451 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; | |
| 452 if (c > 0x7F) | |
| 453 return false; | |
| 454 } | |
| 455 return true; | |
| 456 } | |
| 457 | |
| 458 bool IsStringASCII(const std::wstring& str) { | |
| 459 return DoIsStringASCII(str); | |
| 460 } | |
| 461 | |
| 462 #if !defined(WCHAR_T_IS_UTF16) | |
| 463 bool IsStringASCII(const string16& str) { | |
| 464 return DoIsStringASCII(str); | |
| 465 } | |
| 466 #endif | |
| 467 | |
| 468 bool IsStringASCII(const base::StringPiece& str) { | |
| 469 return DoIsStringASCII(str); | |
| 470 } | |
| 471 | |
| 472 bool IsStringUTF8(const std::string& str) { | |
| 473 const char *src = str.data(); | |
| 474 int32 src_len = static_cast<int32>(str.length()); | |
| 475 int32 char_index = 0; | |
| 476 | |
| 477 while (char_index < src_len) { | |
| 478 int32 code_point; | |
| 479 CBU8_NEXT(src, char_index, src_len, code_point); | |
| 480 if (!base::IsValidCharacter(code_point)) | |
| 481 return false; | |
| 482 } | |
| 483 return true; | |
| 484 } | |
| 485 | |
| 486 template<typename Iter> | |
| 487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin, | |
| 488 Iter a_end, | |
| 489 const char* b) { | |
| 490 for (Iter it = a_begin; it != a_end; ++it, ++b) { | |
| 491 if (!*b || base::ToLowerASCII(*it) != *b) | |
| 492 return false; | |
| 493 } | |
| 494 return *b == 0; | |
| 495 } | |
| 496 | |
| 497 // Front-ends for LowerCaseEqualsASCII. | |
| 498 bool LowerCaseEqualsASCII(const std::string& a, const char* b) { | |
| 499 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
| 500 } | |
| 501 | |
| 502 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) { | |
| 503 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
| 504 } | |
| 505 | |
| 506 #if !defined(WCHAR_T_IS_UTF16) | |
| 507 bool LowerCaseEqualsASCII(const string16& a, const char* b) { | |
| 508 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
| 509 } | |
| 510 #endif | |
| 511 | |
| 512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | |
| 513 std::string::const_iterator a_end, | |
| 514 const char* b) { | |
| 515 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
| 516 } | |
| 517 | |
| 518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
| 519 std::wstring::const_iterator a_end, | |
| 520 const char* b) { | |
| 521 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
| 522 } | |
| 523 | |
| 524 #if !defined(WCHAR_T_IS_UTF16) | |
| 525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | |
| 526 string16::const_iterator a_end, | |
| 527 const char* b) { | |
| 528 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
| 529 } | |
| 530 #endif | |
| 531 | |
| 532 bool LowerCaseEqualsASCII(const char* a_begin, | |
| 533 const char* a_end, | |
| 534 const char* b) { | |
| 535 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
| 536 } | |
| 537 | |
| 538 bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
| 539 const wchar_t* a_end, | |
| 540 const char* b) { | |
| 541 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
| 542 } | |
| 543 | |
| 544 #if !defined(WCHAR_T_IS_UTF16) | |
| 545 bool LowerCaseEqualsASCII(const char16* a_begin, | |
| 546 const char16* a_end, | |
| 547 const char* b) { | |
| 548 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
| 549 } | |
| 550 #endif | |
| 551 | |
| 552 bool EqualsASCII(const string16& a, const base::StringPiece& b) { | |
| 553 if (a.length() != b.length()) | |
| 554 return false; | |
| 555 return std::equal(b.begin(), b.end(), a.begin()); | |
| 556 } | |
| 557 | |
| 558 bool StartsWithASCII(const std::string& str, | |
| 559 const std::string& search, | |
| 560 bool case_sensitive) { | |
| 561 if (case_sensitive) | |
| 562 return str.compare(0, search.length(), search) == 0; | |
| 563 else | |
| 564 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; | |
| 565 } | |
| 566 | |
| 567 template <typename STR> | |
| 568 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { | |
| 569 if (case_sensitive) { | |
| 570 return str.compare(0, search.length(), search) == 0; | |
| 571 } else { | |
| 572 if (search.size() > str.size()) | |
| 573 return false; | |
| 574 return std::equal(search.begin(), search.end(), str.begin(), | |
| 575 base::CaseInsensitiveCompare<typename STR::value_type>()); | |
| 576 } | |
| 577 } | |
| 578 | |
| 579 bool StartsWith(const std::wstring& str, const std::wstring& search, | |
| 580 bool case_sensitive) { | |
| 581 return StartsWithT(str, search, case_sensitive); | |
| 582 } | |
| 583 | |
| 584 #if !defined(WCHAR_T_IS_UTF16) | |
| 585 bool StartsWith(const string16& str, const string16& search, | |
| 586 bool case_sensitive) { | |
| 587 return StartsWithT(str, search, case_sensitive); | |
| 588 } | |
| 589 #endif | |
| 590 | |
| 591 template <typename STR> | |
| 592 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { | |
| 593 typename STR::size_type str_length = str.length(); | |
| 594 typename STR::size_type search_length = search.length(); | |
| 595 if (search_length > str_length) | |
| 596 return false; | |
| 597 if (case_sensitive) { | |
| 598 return str.compare(str_length - search_length, search_length, search) == 0; | |
| 599 } else { | |
| 600 return std::equal(search.begin(), search.end(), | |
| 601 str.begin() + (str_length - search_length), | |
| 602 base::CaseInsensitiveCompare<typename STR::value_type>()); | |
| 603 } | |
| 604 } | |
| 605 | |
| 606 bool EndsWith(const std::string& str, const std::string& search, | |
| 607 bool case_sensitive) { | |
| 608 return EndsWithT(str, search, case_sensitive); | |
| 609 } | |
| 610 | |
| 611 bool EndsWith(const std::wstring& str, const std::wstring& search, | |
| 612 bool case_sensitive) { | |
| 613 return EndsWithT(str, search, case_sensitive); | |
| 614 } | |
| 615 | |
| 616 #if !defined(WCHAR_T_IS_UTF16) | |
| 617 bool EndsWith(const string16& str, const string16& search, | |
| 618 bool case_sensitive) { | |
| 619 return EndsWithT(str, search, case_sensitive); | |
| 620 } | |
| 621 #endif | |
| 622 | |
| 623 DataUnits GetByteDisplayUnits(int64 bytes) { | |
| 624 // The byte thresholds at which we display amounts. A byte count is displayed | |
| 625 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1]. | |
| 626 // This must match the DataUnits enum. | |
| 627 static const int64 kUnitThresholds[] = { | |
| 628 0, // DATA_UNITS_BYTE, | |
| 629 3*1024, // DATA_UNITS_KIBIBYTE, | |
| 630 2*1024*1024, // DATA_UNITS_MEBIBYTE, | |
| 631 1024*1024*1024 // DATA_UNITS_GIBIBYTE, | |
| 632 }; | |
| 633 | |
| 634 if (bytes < 0) { | |
| 635 NOTREACHED() << "Negative bytes value"; | |
| 636 return DATA_UNITS_BYTE; | |
| 637 } | |
| 638 | |
| 639 int unit_index = arraysize(kUnitThresholds); | |
| 640 while (--unit_index > 0) { | |
| 641 if (bytes >= kUnitThresholds[unit_index]) | |
| 642 break; | |
| 643 } | |
| 644 | |
| 645 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE); | |
| 646 return DataUnits(unit_index); | |
| 647 } | |
| 648 | |
| 649 // TODO(mpcomplete): deal with locale | |
| 650 // Byte suffixes. This must match the DataUnits enum. | |
| 651 static const char* const kByteStrings[] = { | |
| 652 "B", | |
| 653 "kB", | |
| 654 "MB", | |
| 655 "GB" | |
| 656 }; | |
| 657 | |
| 658 static const char* const kSpeedStrings[] = { | |
| 659 "B/s", | |
| 660 "kB/s", | |
| 661 "MB/s", | |
| 662 "GB/s" | |
| 663 }; | |
| 664 | |
| 665 string16 FormatBytesInternal(int64 bytes, | |
| 666 DataUnits units, | |
| 667 bool show_units, | |
| 668 const char* const* suffix) { | |
| 669 if (bytes < 0) { | |
| 670 NOTREACHED() << "Negative bytes value"; | |
| 671 return string16(); | |
| 672 } | |
| 673 | |
| 674 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE); | |
| 675 | |
| 676 // Put the quantity in the right units. | |
| 677 double unit_amount = static_cast<double>(bytes); | |
| 678 for (int i = 0; i < units; ++i) | |
| 679 unit_amount /= 1024.0; | |
| 680 | |
| 681 char buf[64]; | |
| 682 if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100) | |
| 683 base::snprintf(buf, arraysize(buf), "%.1lf", unit_amount); | |
| 684 else | |
| 685 base::snprintf(buf, arraysize(buf), "%.0lf", unit_amount); | |
| 686 | |
| 687 std::string ret(buf); | |
| 688 if (show_units) { | |
| 689 ret += " "; | |
| 690 ret += suffix[units]; | |
| 691 } | |
| 692 | |
| 693 return ASCIIToUTF16(ret); | |
| 694 } | |
| 695 | |
| 696 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units) { | |
| 697 return FormatBytesInternal(bytes, units, show_units, kByteStrings); | |
| 698 } | |
| 699 | |
| 700 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units) { | |
| 701 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings); | |
| 702 } | |
| 703 | |
| 704 template<class StringType> | |
| 705 void DoReplaceSubstringsAfterOffset(StringType* str, | |
| 706 typename StringType::size_type start_offset, | |
| 707 const StringType& find_this, | |
| 708 const StringType& replace_with, | |
| 709 bool replace_all) { | |
| 710 if ((start_offset == StringType::npos) || (start_offset >= str->length())) | |
| 711 return; | |
| 712 | |
| 713 DCHECK(!find_this.empty()); | |
| 714 for (typename StringType::size_type offs(str->find(find_this, start_offset)); | |
| 715 offs != StringType::npos; offs = str->find(find_this, offs)) { | |
| 716 str->replace(offs, find_this.length(), replace_with); | |
| 717 offs += replace_with.length(); | |
| 718 | |
| 719 if (!replace_all) | |
| 720 break; | |
| 721 } | |
| 722 } | |
| 723 | |
| 724 void ReplaceFirstSubstringAfterOffset(string16* str, | |
| 725 string16::size_type start_offset, | |
| 726 const string16& find_this, | |
| 727 const string16& replace_with) { | |
| 728 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
| 729 false); // replace first instance | |
| 730 } | |
| 731 | |
| 732 void ReplaceFirstSubstringAfterOffset(std::string* str, | |
| 733 std::string::size_type start_offset, | |
| 734 const std::string& find_this, | |
| 735 const std::string& replace_with) { | |
| 736 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
| 737 false); // replace first instance | |
| 738 } | |
| 739 | |
| 740 void ReplaceSubstringsAfterOffset(string16* str, | |
| 741 string16::size_type start_offset, | |
| 742 const string16& find_this, | |
| 743 const string16& replace_with) { | |
| 744 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
| 745 true); // replace all instances | |
| 746 } | |
| 747 | |
| 748 void ReplaceSubstringsAfterOffset(std::string* str, | |
| 749 std::string::size_type start_offset, | |
| 750 const std::string& find_this, | |
| 751 const std::string& replace_with) { | |
| 752 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
| 753 true); // replace all instances | |
| 754 } | |
| 755 | |
| 756 | |
| 757 template<typename STR> | |
| 758 static size_t TokenizeT(const STR& str, | |
| 759 const STR& delimiters, | |
| 760 std::vector<STR>* tokens) { | |
| 761 tokens->clear(); | |
| 762 | |
| 763 typename STR::size_type start = str.find_first_not_of(delimiters); | |
| 764 while (start != STR::npos) { | |
| 765 typename STR::size_type end = str.find_first_of(delimiters, start + 1); | |
| 766 if (end == STR::npos) { | |
| 767 tokens->push_back(str.substr(start)); | |
| 768 break; | |
| 769 } else { | |
| 770 tokens->push_back(str.substr(start, end - start)); | |
| 771 start = str.find_first_not_of(delimiters, end + 1); | |
| 772 } | |
| 773 } | |
| 774 | |
| 775 return tokens->size(); | |
| 776 } | |
| 777 | |
| 778 size_t Tokenize(const std::wstring& str, | |
| 779 const std::wstring& delimiters, | |
| 780 std::vector<std::wstring>* tokens) { | |
| 781 return TokenizeT(str, delimiters, tokens); | |
| 782 } | |
| 783 | |
| 784 #if !defined(WCHAR_T_IS_UTF16) | |
| 785 size_t Tokenize(const string16& str, | |
| 786 const string16& delimiters, | |
| 787 std::vector<string16>* tokens) { | |
| 788 return TokenizeT(str, delimiters, tokens); | |
| 789 } | |
| 790 #endif | |
| 791 | |
| 792 size_t Tokenize(const std::string& str, | |
| 793 const std::string& delimiters, | |
| 794 std::vector<std::string>* tokens) { | |
| 795 return TokenizeT(str, delimiters, tokens); | |
| 796 } | |
| 797 | |
| 798 size_t Tokenize(const base::StringPiece& str, | |
| 799 const base::StringPiece& delimiters, | |
| 800 std::vector<base::StringPiece>* tokens) { | |
| 801 return TokenizeT(str, delimiters, tokens); | |
| 802 } | |
| 803 | |
| 804 template<typename STR> | |
| 805 static STR JoinStringT(const std::vector<STR>& parts, | |
| 806 typename STR::value_type sep) { | |
| 807 if (parts.empty()) | |
| 808 return STR(); | |
| 809 | |
| 810 STR result(parts[0]); | |
| 811 typename std::vector<STR>::const_iterator iter = parts.begin(); | |
| 812 ++iter; | |
| 813 | |
| 814 for (; iter != parts.end(); ++iter) { | |
| 815 result += sep; | |
| 816 result += *iter; | |
| 817 } | |
| 818 | |
| 819 return result; | |
| 820 } | |
| 821 | |
| 822 std::string JoinString(const std::vector<std::string>& parts, char sep) { | |
| 823 return JoinStringT(parts, sep); | |
| 824 } | |
| 825 | |
| 826 string16 JoinString(const std::vector<string16>& parts, char16 sep) { | |
| 827 return JoinStringT(parts, sep); | |
| 828 } | |
| 829 | |
| 830 template<class FormatStringType, class OutStringType> | |
| 831 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, | |
| 832 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { | |
| 833 size_t substitutions = subst.size(); | |
| 834 DCHECK(substitutions < 10); | |
| 835 | |
| 836 size_t sub_length = 0; | |
| 837 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); | |
| 838 iter != subst.end(); ++iter) { | |
| 839 sub_length += iter->length(); | |
| 840 } | |
| 841 | |
| 842 OutStringType formatted; | |
| 843 formatted.reserve(format_string.length() + sub_length); | |
| 844 | |
| 845 std::vector<ReplacementOffset> r_offsets; | |
| 846 for (typename FormatStringType::const_iterator i = format_string.begin(); | |
| 847 i != format_string.end(); ++i) { | |
| 848 if ('$' == *i) { | |
| 849 if (i + 1 != format_string.end()) { | |
| 850 ++i; | |
| 851 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; | |
| 852 if ('$' == *i) { | |
| 853 while (i != format_string.end() && '$' == *i) { | |
| 854 formatted.push_back('$'); | |
| 855 ++i; | |
| 856 } | |
| 857 --i; | |
| 858 } else { | |
| 859 uintptr_t index = *i - '1'; | |
| 860 if (offsets) { | |
| 861 ReplacementOffset r_offset(index, | |
| 862 static_cast<int>(formatted.size())); | |
| 863 r_offsets.insert(std::lower_bound(r_offsets.begin(), | |
| 864 r_offsets.end(), | |
| 865 r_offset, | |
| 866 &CompareParameter), | |
| 867 r_offset); | |
| 868 } | |
| 869 if (index < substitutions) | |
| 870 formatted.append(subst.at(index)); | |
| 871 } | |
| 872 } | |
| 873 } else { | |
| 874 formatted.push_back(*i); | |
| 875 } | |
| 876 } | |
| 877 if (offsets) { | |
| 878 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); | |
| 879 i != r_offsets.end(); ++i) { | |
| 880 offsets->push_back(i->offset); | |
| 881 } | |
| 882 } | |
| 883 return formatted; | |
| 884 } | |
| 885 | |
| 886 string16 ReplaceStringPlaceholders(const string16& format_string, | |
| 887 const std::vector<string16>& subst, | |
| 888 std::vector<size_t>* offsets) { | |
| 889 return DoReplaceStringPlaceholders(format_string, subst, offsets); | |
| 890 } | |
| 891 | |
| 892 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, | |
| 893 const std::vector<std::string>& subst, | |
| 894 std::vector<size_t>* offsets) { | |
| 895 return DoReplaceStringPlaceholders(format_string, subst, offsets); | |
| 896 } | |
| 897 | |
| 898 string16 ReplaceStringPlaceholders(const string16& format_string, | |
| 899 const string16& a, | |
| 900 size_t* offset) { | |
| 901 std::vector<size_t> offsets; | |
| 902 std::vector<string16> subst; | |
| 903 subst.push_back(a); | |
| 904 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); | |
| 905 | |
| 906 DCHECK(offsets.size() == 1); | |
| 907 if (offset) { | |
| 908 *offset = offsets[0]; | |
| 909 } | |
| 910 return result; | |
| 911 } | |
| 912 | |
| 913 static bool IsWildcard(base_icu::UChar32 character) { | |
| 914 return character == '*' || character == '?'; | |
| 915 } | |
| 916 | |
| 917 // Move the strings pointers to the point where they start to differ. | |
| 918 template <typename CHAR, typename NEXT> | |
| 919 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, | |
| 920 const CHAR** string, const CHAR* string_end, | |
| 921 NEXT next) { | |
| 922 const CHAR* escape = NULL; | |
| 923 while (*pattern != pattern_end && *string != string_end) { | |
| 924 if (!escape && IsWildcard(**pattern)) { | |
| 925 // We don't want to match wildcard here, except if it's escaped. | |
| 926 return; | |
| 927 } | |
| 928 | |
| 929 // Check if the escapement char is found. If so, skip it and move to the | |
| 930 // next character. | |
| 931 if (!escape && **pattern == '\\') { | |
| 932 escape = *pattern; | |
| 933 next(pattern, pattern_end); | |
| 934 continue; | |
| 935 } | |
| 936 | |
| 937 // Check if the chars match, if so, increment the ptrs. | |
| 938 const CHAR* pattern_next = *pattern; | |
| 939 const CHAR* string_next = *string; | |
| 940 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | |
| 941 if (pattern_char == next(&string_next, string_end) && | |
| 942 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { | |
| 943 *pattern = pattern_next; | |
| 944 *string = string_next; | |
| 945 } else { | |
| 946 // Uh ho, it did not match, we are done. If the last char was an | |
| 947 // escapement, that means that it was an error to advance the ptr here, | |
| 948 // let's put it back where it was. This also mean that the MatchPattern | |
| 949 // function will return false because if we can't match an escape char | |
| 950 // here, then no one will. | |
| 951 if (escape) { | |
| 952 *pattern = escape; | |
| 953 } | |
| 954 return; | |
| 955 } | |
| 956 | |
| 957 escape = NULL; | |
| 958 } | |
| 959 } | |
| 960 | |
| 961 template <typename CHAR, typename NEXT> | |
| 962 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { | |
| 963 while (*pattern != end) { | |
| 964 if (!IsWildcard(**pattern)) | |
| 965 return; | |
| 966 next(pattern, end); | |
| 967 } | |
| 968 } | |
| 969 | |
| 970 template <typename CHAR, typename NEXT> | |
| 971 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, | |
| 972 const CHAR* pattern, const CHAR* pattern_end, | |
| 973 int depth, | |
| 974 NEXT next) { | |
| 975 const int kMaxDepth = 16; | |
| 976 if (depth > kMaxDepth) | |
| 977 return false; | |
| 978 | |
| 979 // Eat all the matching chars. | |
| 980 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); | |
| 981 | |
| 982 // If the string is empty, then the pattern must be empty too, or contains | |
| 983 // only wildcards. | |
| 984 if (eval == eval_end) { | |
| 985 EatWildcard(&pattern, pattern_end, next); | |
| 986 return pattern == pattern_end; | |
| 987 } | |
| 988 | |
| 989 // Pattern is empty but not string, this is not a match. | |
| 990 if (pattern == pattern_end) | |
| 991 return false; | |
| 992 | |
| 993 // If this is a question mark, then we need to compare the rest with | |
| 994 // the current string or the string with one character eaten. | |
| 995 const CHAR* next_pattern = pattern; | |
| 996 next(&next_pattern, pattern_end); | |
| 997 if (pattern[0] == '?') { | |
| 998 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
| 999 depth + 1, next)) | |
| 1000 return true; | |
| 1001 const CHAR* next_eval = eval; | |
| 1002 next(&next_eval, eval_end); | |
| 1003 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, | |
| 1004 depth + 1, next)) | |
| 1005 return true; | |
| 1006 } | |
| 1007 | |
| 1008 // This is a *, try to match all the possible substrings with the remainder | |
| 1009 // of the pattern. | |
| 1010 if (pattern[0] == '*') { | |
| 1011 // Collapse duplicate wild cards (********** into *) so that the | |
| 1012 // method does not recurse unnecessarily. http://crbug.com/52839 | |
| 1013 EatWildcard(&next_pattern, pattern_end, next); | |
| 1014 | |
| 1015 while (eval != eval_end) { | |
| 1016 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
| 1017 depth + 1, next)) | |
| 1018 return true; | |
| 1019 eval++; | |
| 1020 } | |
| 1021 | |
| 1022 // We reached the end of the string, let see if the pattern contains only | |
| 1023 // wildcards. | |
| 1024 if (eval == eval_end) { | |
| 1025 EatWildcard(&pattern, pattern_end, next); | |
| 1026 if (pattern != pattern_end) | |
| 1027 return false; | |
| 1028 return true; | |
| 1029 } | |
| 1030 } | |
| 1031 | |
| 1032 return false; | |
| 1033 } | |
| 1034 | |
| 1035 struct NextCharUTF8 { | |
| 1036 base_icu::UChar32 operator()(const char** p, const char* end) { | |
| 1037 base_icu::UChar32 c; | |
| 1038 int offset = 0; | |
| 1039 CBU8_NEXT(*p, offset, end - *p, c); | |
| 1040 *p += offset; | |
| 1041 return c; | |
| 1042 } | |
| 1043 }; | |
| 1044 | |
| 1045 struct NextCharUTF16 { | |
| 1046 base_icu::UChar32 operator()(const char16** p, const char16* end) { | |
| 1047 base_icu::UChar32 c; | |
| 1048 int offset = 0; | |
| 1049 CBU16_NEXT(*p, offset, end - *p, c); | |
| 1050 *p += offset; | |
| 1051 return c; | |
| 1052 } | |
| 1053 }; | |
| 1054 | |
| 1055 bool MatchPattern(const base::StringPiece& eval, | |
| 1056 const base::StringPiece& pattern) { | |
| 1057 return MatchPatternT(eval.data(), eval.data() + eval.size(), | |
| 1058 pattern.data(), pattern.data() + pattern.size(), | |
| 1059 0, NextCharUTF8()); | |
| 1060 } | |
| 1061 | |
| 1062 bool MatchPattern(const string16& eval, const string16& pattern) { | |
| 1063 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), | |
| 1064 pattern.c_str(), pattern.c_str() + pattern.size(), | |
| 1065 0, NextCharUTF16()); | |
| 1066 } | |
| 1067 | |
| 1068 // The following code is compatible with the OpenBSD lcpy interface. See: | |
| 1069 // http://www.gratisoft.us/todd/papers/strlcpy.html | |
| 1070 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c | |
| 1071 | |
| 1072 namespace { | |
| 1073 | |
| 1074 template <typename CHAR> | |
| 1075 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { | |
| 1076 for (size_t i = 0; i < dst_size; ++i) { | |
| 1077 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. | |
| 1078 return i; | |
| 1079 } | |
| 1080 | |
| 1081 // We were left off at dst_size. We over copied 1 byte. Null terminate. | |
| 1082 if (dst_size != 0) | |
| 1083 dst[dst_size - 1] = 0; | |
| 1084 | |
| 1085 // Count the rest of the |src|, and return it's length in characters. | |
| 1086 while (src[dst_size]) ++dst_size; | |
| 1087 return dst_size; | |
| 1088 } | |
| 1089 | |
| 1090 } // namespace | |
| 1091 | |
| 1092 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { | |
| 1093 return lcpyT<char>(dst, src, dst_size); | |
| 1094 } | |
| 1095 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { | |
| 1096 return lcpyT<wchar_t>(dst, src, dst_size); | |
| 1097 } | |
| OLD | NEW |