| Index: base/strings/string_util.cc
|
| diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
|
| index ab9570f6741a5f8d981dbfd278fa37512320da50..65eeacb34a33bf973136eb12f4a525739fac99f0 100644
|
| --- a/base/strings/string_util.cc
|
| +++ b/base/strings/string_util.cc
|
| @@ -64,6 +64,34 @@ static bool CompareParameter(const ReplacementOffset& elem1,
|
| return elem1.parameter < elem2.parameter;
|
| }
|
|
|
| +// Assuming that a pointer is the size of a "machine word", then
|
| +// uintptr_t is an integer type that is also a machine word.
|
| +typedef uintptr_t MachineWord;
|
| +const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
|
| +
|
| +inline bool IsAlignedToMachineWord(const void* pointer) {
|
| + return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
|
| +}
|
| +
|
| +template<typename T> inline T* AlignToMachineWord(T* pointer) {
|
| + return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
|
| + ~kMachineWordAlignmentMask);
|
| +}
|
| +
|
| +template<size_t size, typename CharacterType> struct NonASCIIMask;
|
| +template<> struct NonASCIIMask<4, base::char16> {
|
| + static inline uint32_t value() { return 0xFF80FF80U; }
|
| +};
|
| +template<> struct NonASCIIMask<4, char> {
|
| + static inline uint32_t value() { return 0x80808080U; }
|
| +};
|
| +template<> struct NonASCIIMask<8, base::char16> {
|
| + static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
|
| +};
|
| +template<> struct NonASCIIMask<8, char> {
|
| + static inline uint64_t value() { return 0x8080808080808080ULL; }
|
| +};
|
| +
|
| } // namespace
|
|
|
| namespace base {
|
| @@ -322,22 +350,46 @@ bool ContainsOnlyChars(const StringPiece16& input,
|
| return input.find_first_not_of(characters) == StringPiece16::npos;
|
| }
|
|
|
| -template<class STR>
|
| -static bool DoIsStringASCII(const STR& str) {
|
| - for (size_t i = 0; i < str.length(); i++) {
|
| - typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
|
| - if (c > 0x7F)
|
| - return false;
|
| +template <class Char>
|
| +inline bool DoIsStringASCII(const Char* characters, size_t length) {
|
| + MachineWord all_char_bits = 0;
|
| + const Char* end = characters + length;
|
| +
|
| + // Prologue: align the input.
|
| + while (!IsAlignedToMachineWord(characters) && characters != end) {
|
| + all_char_bits |= *characters;
|
| + ++characters;
|
| }
|
| - return true;
|
| +
|
| + // Compare the values of CPU word size.
|
| + const Char* word_end = AlignToMachineWord(end);
|
| + const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
|
| + while (characters < word_end) {
|
| + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
|
| + characters += loop_increment;
|
| + }
|
| +
|
| + // Process the remaining bytes.
|
| + while (characters != end) {
|
| + all_char_bits |= *characters;
|
| + ++characters;
|
| + }
|
| +
|
| + MachineWord non_ascii_bit_mask =
|
| + NonASCIIMask<sizeof(MachineWord), Char>::value();
|
| + return !(all_char_bits & non_ascii_bit_mask);
|
| }
|
|
|
| bool IsStringASCII(const StringPiece& str) {
|
| - return DoIsStringASCII(str);
|
| + return DoIsStringASCII(str.data(), str.length());
|
| +}
|
| +
|
| +bool IsStringASCII(const StringPiece16& str) {
|
| + return DoIsStringASCII(str.data(), str.length());
|
| }
|
|
|
| bool IsStringASCII(const string16& str) {
|
| - return DoIsStringASCII(str);
|
| + return DoIsStringASCII(str.data(), str.length());
|
| }
|
|
|
| bool IsStringUTF8(const std::string& str) {
|
|
|