Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1808)

Side by Side Diff: base/strings/string_util.cc

Issue 543043002: Implement fast path in UTF8ToUTF16 for pure ASCII strings (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebased Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/strings/string_util.h" 5 #include "base/strings/string_util.h"
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <errno.h> 8 #include <errno.h>
9 #include <math.h> 9 #include <math.h>
10 #include <stdarg.h> 10 #include <stdarg.h>
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
57 57
58 // Starting position in the string. 58 // Starting position in the string.
59 size_t offset; 59 size_t offset;
60 }; 60 };
61 61
62 static bool CompareParameter(const ReplacementOffset& elem1, 62 static bool CompareParameter(const ReplacementOffset& elem1,
63 const ReplacementOffset& elem2) { 63 const ReplacementOffset& elem2) {
64 return elem1.parameter < elem2.parameter; 64 return elem1.parameter < elem2.parameter;
65 } 65 }
66 66
67 // Assuming that a pointer is the size of a "machine word", then
68 // uintptr_t is an integer type that is also a machine word.
69 typedef uintptr_t MachineWord;
70 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
71
72 inline bool IsAlignedToMachineWord(const void* pointer) {
73 return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
74 }
75
76 template<typename T> inline T* AlignToMachineWord(T* pointer) {
77 return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
78 ~kMachineWordAlignmentMask);
79 }
80
81 template<size_t size, typename CharacterType> struct NonASCIIMask;
82 template<> struct NonASCIIMask<4, base::char16> {
83 static inline uint32_t value() { return 0xFF80FF80U; }
84 };
85 template<> struct NonASCIIMask<4, char> {
86 static inline uint32_t value() { return 0x80808080U; }
87 };
88 template<> struct NonASCIIMask<8, base::char16> {
89 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
90 };
91 template<> struct NonASCIIMask<8, char> {
92 static inline uint64_t value() { return 0x8080808080808080ULL; }
93 };
94
67 } // namespace 95 } // namespace
68 96
69 namespace base { 97 namespace base {
70 98
71 bool IsWprintfFormatPortable(const wchar_t* format) { 99 bool IsWprintfFormatPortable(const wchar_t* format) {
72 for (const wchar_t* position = format; *position != '\0'; ++position) { 100 for (const wchar_t* position = format; *position != '\0'; ++position) {
73 if (*position == '%') { 101 if (*position == '%') {
74 bool in_specification = true; 102 bool in_specification = true;
75 bool modifier_l = false; 103 bool modifier_l = false;
76 while (in_specification) { 104 while (in_specification) {
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 bool ContainsOnlyChars(const StringPiece& input, 343 bool ContainsOnlyChars(const StringPiece& input,
316 const StringPiece& characters) { 344 const StringPiece& characters) {
317 return input.find_first_not_of(characters) == StringPiece::npos; 345 return input.find_first_not_of(characters) == StringPiece::npos;
318 } 346 }
319 347
320 bool ContainsOnlyChars(const StringPiece16& input, 348 bool ContainsOnlyChars(const StringPiece16& input,
321 const StringPiece16& characters) { 349 const StringPiece16& characters) {
322 return input.find_first_not_of(characters) == StringPiece16::npos; 350 return input.find_first_not_of(characters) == StringPiece16::npos;
323 } 351 }
324 352
325 template<class STR> 353 template <class Char>
326 static bool DoIsStringASCII(const STR& str) { 354 inline bool DoIsStringASCII(const Char* characters, size_t length) {
327 for (size_t i = 0; i < str.length(); i++) { 355 MachineWord all_char_bits = 0;
328 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 356 const Char* end = characters + length;
329 if (c > 0x7F) 357
330 return false; 358 // Prologue: align the input.
359 while (!IsAlignedToMachineWord(characters) && characters != end) {
360 all_char_bits |= *characters;
361 ++characters;
331 } 362 }
332 return true; 363
364 // Compare the values of CPU word size.
365 const Char* word_end = AlignToMachineWord(end);
366 const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
367 while (characters < word_end) {
368 all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
369 characters += loop_increment;
370 }
371
372 // Process the remaining bytes.
373 while (characters != end) {
374 all_char_bits |= *characters;
375 ++characters;
376 }
377
378 MachineWord non_ascii_bit_mask =
379 NonASCIIMask<sizeof(MachineWord), Char>::value();
380 return !(all_char_bits & non_ascii_bit_mask);
333 } 381 }
334 382
335 bool IsStringASCII(const StringPiece& str) { 383 bool IsStringASCII(const StringPiece& str) {
336 return DoIsStringASCII(str); 384 return DoIsStringASCII(str.data(), str.length());
385 }
386
387 bool IsStringASCII(const StringPiece16& str) {
388 return DoIsStringASCII(str.data(), str.length());
337 } 389 }
338 390
339 bool IsStringASCII(const string16& str) { 391 bool IsStringASCII(const string16& str) {
340 return DoIsStringASCII(str); 392 return DoIsStringASCII(str.data(), str.length());
341 } 393 }
342 394
343 bool IsStringUTF8(const std::string& str) { 395 bool IsStringUTF8(const std::string& str) {
344 const char *src = str.data(); 396 const char *src = str.data();
345 int32 src_len = static_cast<int32>(str.length()); 397 int32 src_len = static_cast<int32>(str.length());
346 int32 char_index = 0; 398 int32 char_index = 0;
347 399
348 while (char_index < src_len) { 400 while (char_index < src_len) {
349 int32 code_point; 401 int32 code_point;
350 CBU8_NEXT(src, char_index, src_len, code_point); 402 CBU8_NEXT(src, char_index, src_len, code_point);
(...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after
883 } 935 }
884 936
885 } // namespace 937 } // namespace
886 938
887 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 939 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
888 return lcpyT<char>(dst, src, dst_size); 940 return lcpyT<char>(dst, src, dst_size);
889 } 941 }
890 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 942 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
891 return lcpyT<wchar_t>(dst, src, dst_size); 943 return lcpyT<wchar_t>(dst, src, dst_size);
892 } 944 }
OLDNEW
« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698