Chromium Code Reviews| Index: base/strings/string_util.cc |
| diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc |
| index ab9570f6741a5f8d981dbfd278fa37512320da50..87d51336ffacbf604823c9ab7a2dd08614bf4bf9 100644 |
| --- a/base/strings/string_util.cc |
| +++ b/base/strings/string_util.cc |
| @@ -64,6 +64,34 @@ static bool CompareParameter(const ReplacementOffset& elem1, |
| return elem1.parameter < elem2.parameter; |
| } |
| +// Assuming that a pointer is the size of a "machine word", then |
| +// uintptr_t is an integer type that is also a machine word. |
| +typedef uintptr_t MachineWord; |
| +const uintptr_t machineWordAlignmentMask = sizeof(MachineWord) - 1; |
|
brettw
2014/09/16 17:14:28
Constant naming: kMachineWord....
mnaganov (inactive)
2014/09/17 15:15:01
Done.
|
| + |
| +inline bool IsAlignedToMachineWord(const void* pointer) { |
| + return !(reinterpret_cast<MachineWord>(pointer) & machineWordAlignmentMask); |
| +} |
| + |
| +template<typename T> inline T* AlignToMachineWord(T* pointer) { |
| + return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & |
| + ~machineWordAlignmentMask); |
| +} |
| + |
| +template<size_t size, typename CharacterType> struct NonASCIIMask; |
| +template<> struct NonASCIIMask<4, base::char16> { |
| + static inline uint32_t value() { return 0xFF80FF80U; } |
| +}; |
| +template<> struct NonASCIIMask<4, char> { |
| + static inline uint32_t value() { return 0x80808080U; } |
| +}; |
| +template<> struct NonASCIIMask<8, base::char16> { |
| + static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } |
| +}; |
| +template<> struct NonASCIIMask<8, char> { |
| + static inline uint64_t value() { return 0x8080808080808080ULL; } |
| +}; |
| + |
| } // namespace |
| namespace base { |
| @@ -322,22 +350,50 @@ bool ContainsOnlyChars(const StringPiece16& input, |
| return input.find_first_not_of(characters) == StringPiece16::npos; |
| } |
| -template<class STR> |
| -static bool DoIsStringASCII(const STR& str) { |
| - for (size_t i = 0; i < str.length(); i++) { |
| - typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; |
| - if (c > 0x7F) |
| - return false; |
| +template <class Char> |
| +inline bool DoIsStringASCII(const Char* characters, size_t length) { |
| + MachineWord all_char_bits = 0; |
| + const Char* end = characters + length; |
| + |
| + // Prologue: align the input. |
| + while (!IsAlignedToMachineWord(characters) && characters != end) { |
| + all_char_bits |= *characters; |
| + ++characters; |
| } |
| - return true; |
| + |
| + // Compare the values of CPU word size. |
| + const Char* word_end = AlignToMachineWord(end); |
| + const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); |
| + while (characters < word_end) { |
| + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); |
| + characters += loop_increment; |
| + } |
| + |
| + // Process the remaining bytes. |
| + while (characters != end) { |
| + all_char_bits |= *characters; |
| + ++characters; |
| + } |
| + |
| + MachineWord non_ascii_bit_mask = |
| + NonASCIIMask<sizeof(MachineWord), Char>::value(); |
| + return !(all_char_bits & non_ascii_bit_mask); |
| } |
| bool IsStringASCII(const StringPiece& str) { |
| - return DoIsStringASCII(str); |
| + return DoIsStringASCII(str.data(), str.length()); |
| } |
| bool IsStringASCII(const string16& str) { |
| - return DoIsStringASCII(str); |
| + return DoIsStringASCII(str.data(), str.length()); |
| +} |
| + |
| +bool IsStringASCII(const char* src, size_t src_len) { |
| + return DoIsStringASCII(src, src_len); |
| +} |
| + |
| +bool IsStringASCII(const char16* src, size_t src_len) { |
| + return DoIsStringASCII(src, src_len); |
| } |
| bool IsStringUTF8(const std::string& str) { |