Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Unified Diff: base/strings/string_util.cc

Issue 543043002: Implement fast path in UTF8ToUTF16 for pure ASCII strings (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebased Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/strings/string_util.cc
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index ab9570f6741a5f8d981dbfd278fa37512320da50..65eeacb34a33bf973136eb12f4a525739fac99f0 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc
@@ -64,6 +64,34 @@ static bool CompareParameter(const ReplacementOffset& elem1,
return elem1.parameter < elem2.parameter;
}
+// Assuming that a pointer is the size of a "machine word", then
+// uintptr_t is an integer type that is also a machine word.
+typedef uintptr_t MachineWord;
+const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
+
+inline bool IsAlignedToMachineWord(const void* pointer) {
+ return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
+}
+
+template<typename T> inline T* AlignToMachineWord(T* pointer) {
+ return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
+ ~kMachineWordAlignmentMask);
+}
+
+template<size_t size, typename CharacterType> struct NonASCIIMask;
+template<> struct NonASCIIMask<4, base::char16> {
+ static inline uint32_t value() { return 0xFF80FF80U; }
+};
+template<> struct NonASCIIMask<4, char> {
+ static inline uint32_t value() { return 0x80808080U; }
+};
+template<> struct NonASCIIMask<8, base::char16> {
+ static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
+};
+template<> struct NonASCIIMask<8, char> {
+ static inline uint64_t value() { return 0x8080808080808080ULL; }
+};
+
} // namespace
namespace base {
@@ -322,22 +350,46 @@ bool ContainsOnlyChars(const StringPiece16& input,
return input.find_first_not_of(characters) == StringPiece16::npos;
}
-template<class STR>
-static bool DoIsStringASCII(const STR& str) {
- for (size_t i = 0; i < str.length(); i++) {
- typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
- if (c > 0x7F)
- return false;
+template <class Char>
+inline bool DoIsStringASCII(const Char* characters, size_t length) {
+ MachineWord all_char_bits = 0;
+ const Char* end = characters + length;
+
+ // Prologue: align the input.
+ while (!IsAlignedToMachineWord(characters) && characters != end) {
+ all_char_bits |= *characters;
+ ++characters;
}
- return true;
+
+ // Compare the values of CPU word size.
+ const Char* word_end = AlignToMachineWord(end);
+ const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
+ while (characters < word_end) {
+ all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
+ characters += loop_increment;
+ }
+
+ // Process the remaining bytes.
+ while (characters != end) {
+ all_char_bits |= *characters;
+ ++characters;
+ }
+
+ MachineWord non_ascii_bit_mask =
+ NonASCIIMask<sizeof(MachineWord), Char>::value();
+ return !(all_char_bits & non_ascii_bit_mask);
}
bool IsStringASCII(const StringPiece& str) {
- return DoIsStringASCII(str);
+ return DoIsStringASCII(str.data(), str.length());
+}
+
+bool IsStringASCII(const StringPiece16& str) {
+ return DoIsStringASCII(str.data(), str.length());
}
bool IsStringASCII(const string16& str) {
- return DoIsStringASCII(str);
+ return DoIsStringASCII(str.data(), str.length());
}
bool IsStringUTF8(const std::string& str) {
« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698