OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/string_util.h" | 5 #include "base/string_util.h" |
6 | 6 |
7 #include "build/build_config.h" | 7 #include "build/build_config.h" |
8 | 8 |
9 #include <ctype.h> | 9 #include <ctype.h> |
10 #include <errno.h> | 10 #include <errno.h> |
(...skipping 432 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
443 std::string WideToASCII(const std::wstring& wide) { | 443 std::string WideToASCII(const std::wstring& wide) { |
444 DCHECK(IsStringASCII(wide)); | 444 DCHECK(IsStringASCII(wide)); |
445 return std::string(wide.begin(), wide.end()); | 445 return std::string(wide.begin(), wide.end()); |
446 } | 446 } |
447 | 447 |
448 std::wstring ASCIIToWide(const std::string& ascii) { | 448 std::wstring ASCIIToWide(const std::string& ascii) { |
449 DCHECK(IsStringASCII(ascii)); | 449 DCHECK(IsStringASCII(ascii)); |
450 return std::wstring(ascii.begin(), ascii.end()); | 450 return std::wstring(ascii.begin(), ascii.end()); |
451 } | 451 } |
452 | 452 |
453 std::string UTF16ToASCII(const string16& utf16) { | |
454 DCHECK(IsStringASCII(utf16)); | |
455 return std::string(utf16.begin(), utf16.end()); | |
456 } | |
457 | |
458 string16 ASCIIToUTF16(const std::string& ascii) { | |
459 DCHECK(IsStringASCII(ascii)); | |
460 return string16(ascii.begin(), ascii.end()); | |
461 } | |
462 | |
463 // Latin1 is just the low range of Unicode, so we can copy directly to convert. | 453 // Latin1 is just the low range of Unicode, so we can copy directly to convert. |
464 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { | 454 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { |
465 std::string output; | 455 std::string output; |
466 output.resize(wide.size()); | 456 output.resize(wide.size()); |
467 latin1->clear(); | 457 latin1->clear(); |
468 for (size_t i = 0; i < wide.size(); i++) { | 458 for (size_t i = 0; i < wide.size(); i++) { |
469 if (wide[i] > 255) | 459 if (wide[i] > 255) |
470 return false; | 460 return false; |
471 output[i] = static_cast<char>(wide[i]); | 461 output[i] = static_cast<char>(wide[i]); |
472 } | 462 } |
473 latin1->swap(output); | 463 latin1->swap(output); |
474 return true; | 464 return true; |
475 } | 465 } |
476 | 466 |
477 bool IsString8Bit(const std::wstring& str) { | 467 bool IsString8Bit(const std::wstring& str) { |
478 for (size_t i = 0; i < str.length(); i++) { | 468 for (size_t i = 0; i < str.length(); i++) { |
479 if (str[i] > 255) | 469 if (str[i] > 255) |
480 return false; | 470 return false; |
481 } | 471 } |
482 return true; | 472 return true; |
483 } | 473 } |
484 | 474 |
485 template<class STR> | 475 bool IsStringASCII(const std::wstring& str) { |
486 static bool DoIsStringASCII(const STR& str) { | |
487 for (size_t i = 0; i < str.length(); i++) { | 476 for (size_t i = 0; i < str.length(); i++) { |
488 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; | 477 if (str[i] > 0x7F) |
489 if (c > 0x7F) | |
490 return false; | 478 return false; |
491 } | 479 } |
492 return true; | 480 return true; |
493 } | 481 } |
494 | 482 |
495 bool IsStringASCII(const std::wstring& str) { | |
496 return DoIsStringASCII(str); | |
497 } | |
498 | |
499 #if !defined(WCHAR_T_IS_UTF16) | |
500 bool IsStringASCII(const string16& str) { | |
501 return DoIsStringASCII(str); | |
502 } | |
503 #endif | |
504 | |
505 bool IsStringASCII(const std::string& str) { | 483 bool IsStringASCII(const std::string& str) { |
506 return DoIsStringASCII(str); | 484 for (size_t i = 0; i < str.length(); i++) { |
| 485 if (static_cast<unsigned char>(str[i]) > 0x7F) |
| 486 return false; |
| 487 } |
| 488 return true; |
507 } | 489 } |
508 | 490 |
509 // Helper functions that determine whether the given character begins a | 491 // Helper functions that determine whether the given character begins a |
510 // UTF-8 sequence of bytes with the given length. A character satisfies | 492 // UTF-8 sequence of bytes with the given length. A character satisfies |
511 // "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte | 493 // "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte |
512 // character. | 494 // character. |
513 static inline bool IsBegin2ByteUTF8(int c) { | 495 static inline bool IsBegin2ByteUTF8(int c) { |
514 return (c & 0xE0) == 0xC0; | 496 return (c & 0xE0) == 0xC0; |
515 } | 497 } |
516 static inline bool IsBegin3ByteUTF8(int c) { | 498 static inline bool IsBegin3ByteUTF8(int c) { |
(...skipping 1107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1624 // Each input byte creates two output hex characters. | 1606 // Each input byte creates two output hex characters. |
1625 std::string ret(size * 2, '\0'); | 1607 std::string ret(size * 2, '\0'); |
1626 | 1608 |
1627 for (size_t i = 0; i < size; ++i) { | 1609 for (size_t i = 0; i < size; ++i) { |
1628 char b = reinterpret_cast<const char*>(bytes)[i]; | 1610 char b = reinterpret_cast<const char*>(bytes)[i]; |
1629 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; | 1611 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; |
1630 ret[(i * 2) + 1] = kHexChars[b & 0xf]; | 1612 ret[(i * 2) + 1] = kHexChars[b & 0xf]; |
1631 } | 1613 } |
1632 return ret; | 1614 return ret; |
1633 } | 1615 } |
OLD | NEW |