Index: base/strings/string_util.cc |
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc |
index ab9570f6741a5f8d981dbfd278fa37512320da50..65eeacb34a33bf973136eb12f4a525739fac99f0 100644 |
--- a/base/strings/string_util.cc |
+++ b/base/strings/string_util.cc |
@@ -64,6 +64,34 @@ static bool CompareParameter(const ReplacementOffset& elem1, |
return elem1.parameter < elem2.parameter; |
} |
+// Assuming that a pointer is the size of a "machine word", then |
+// uintptr_t is an integer type that is also a machine word. |
+typedef uintptr_t MachineWord; |
+const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; |
+ |
+inline bool IsAlignedToMachineWord(const void* pointer) { |
+ return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask); |
+} |
+ |
+template<typename T> inline T* AlignToMachineWord(T* pointer) { |
+ return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & |
+ ~kMachineWordAlignmentMask); |
+} |
+ |
+template<size_t size, typename CharacterType> struct NonASCIIMask; |
+template<> struct NonASCIIMask<4, base::char16> { |
+ static inline uint32_t value() { return 0xFF80FF80U; } |
+}; |
+template<> struct NonASCIIMask<4, char> { |
+ static inline uint32_t value() { return 0x80808080U; } |
+}; |
+template<> struct NonASCIIMask<8, base::char16> { |
+ static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } |
+}; |
+template<> struct NonASCIIMask<8, char> { |
+ static inline uint64_t value() { return 0x8080808080808080ULL; } |
+}; |
+ |
} // namespace |
namespace base { |
@@ -322,22 +350,46 @@ bool ContainsOnlyChars(const StringPiece16& input, |
return input.find_first_not_of(characters) == StringPiece16::npos; |
} |
-template<class STR> |
-static bool DoIsStringASCII(const STR& str) { |
- for (size_t i = 0; i < str.length(); i++) { |
- typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; |
- if (c > 0x7F) |
- return false; |
+template <class Char> |
+inline bool DoIsStringASCII(const Char* characters, size_t length) { |
+ MachineWord all_char_bits = 0; |
+ const Char* end = characters + length; |
+ |
+ // Prologue: align the input. |
+ while (!IsAlignedToMachineWord(characters) && characters != end) { |
+ all_char_bits |= *characters; |
+ ++characters; |
} |
- return true; |
+ |
+ // Compare the values of CPU word size. |
+ const Char* word_end = AlignToMachineWord(end); |
+ const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); |
+ while (characters < word_end) { |
+ all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); |
+ characters += loop_increment; |
+ } |
+ |
+ // Process the remaining bytes. |
+ while (characters != end) { |
+ all_char_bits |= *characters; |
+ ++characters; |
+ } |
+ |
+ MachineWord non_ascii_bit_mask = |
+ NonASCIIMask<sizeof(MachineWord), Char>::value(); |
+ return !(all_char_bits & non_ascii_bit_mask); |
} |
bool IsStringASCII(const StringPiece& str) { |
- return DoIsStringASCII(str); |
+ return DoIsStringASCII(str.data(), str.length()); |
+} |
+ |
+bool IsStringASCII(const StringPiece16& str) { |
+ return DoIsStringASCII(str.data(), str.length()); |
} |
bool IsStringASCII(const string16& str) { |
- return DoIsStringASCII(str); |
+ return DoIsStringASCII(str.data(), str.length()); |
} |
bool IsStringUTF8(const std::string& str) { |