Chromium Code Reviews| Index: src/runtime/runtime-i18n.cc |
| diff --git a/src/runtime/runtime-i18n.cc b/src/runtime/runtime-i18n.cc |
| index 75e0952581b3723aea67e4e9b108534d926fec86..ae86acc606b9a33169f6b6ad27df65fe9a3de8a2 100644 |
| --- a/src/runtime/runtime-i18n.cc |
| +++ b/src/runtime/runtime-i18n.cc |
| @@ -8,13 +8,14 @@ |
| #include <memory> |
| -#include "src/api.h" |
| #include "src/api-natives.h" |
| +#include "src/api.h" |
| #include "src/arguments.h" |
| #include "src/factory.h" |
| #include "src/i18n.h" |
| #include "src/isolate-inl.h" |
| #include "src/messages.h" |
| +#include "src/utils.h" |
| #include "unicode/brkiter.h" |
| #include "unicode/calendar.h" |
| @@ -1091,23 +1092,25 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) { |
| int length = s->length(); |
| s = String::Flatten(s); |
| - // First scan the string for uppercase and non-ASCII characters: |
| + |
| + bool is_ascii = true; |
| if (s->HasOnlyOneByteChars()) { |
| - int first_index_to_lower = length; |
| - for (int index = 0; index < length; ++index) { |
| - // Blink specializes this path for one-byte strings, so it |
| - // does not need to do a generic get, but can do the equivalent |
| - // of SeqOneByteStringGet. |
| - uint16_t ch = s->Get(index); |
| - if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| - first_index_to_lower = index; |
| - break; |
| + // Scan the string for uppercase and non-ASCII characters for strings |
| + // shorter than a machine-word without any memory allocation overhead. |
|
Yang
2016/12/05 19:19:29
What is the rationale for doing this only to short
|
| + int index_to_first_upper = length; |
| + if (static_cast<size_t>(length) < sizeof(uintptr_t)) { |
| + for (int index = 0; index < length; ++index) { |
| + uint16_t ch = s->Get(index); |
| + if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| + is_ascii = !(ch & ~0x7F); |
| + index_to_first_upper = index; |
| + break; |
| + } |
| } |
| + // Nothing to do if the string is all ASCII with no uppercase. |
| + if (index_to_first_upper == length) return *s; |
| } |
|
Yang
2016/12/05 19:19:29
So if the string is longer than a word, we always
|
| - // Nothing to do if the string is all ASCII with no uppercase. |
| - if (first_index_to_lower == length) return *s; |
| - |
| // We depend here on the invariant that the length of a Latin1 |
| // string is invariant under ToLowerCase, and the result always |
| // fits in the Latin1 range in the *root locale*. It does not hold |
| @@ -1118,19 +1121,45 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) { |
| DisallowHeapAllocation no_gc; |
| String::FlatContent flat = s->GetFlatContent(); |
| + uint8_t* dest = result->GetChars(); |
| + // Instead of checking is_ascii here, we'd better modify FastAsciiConvert |
|
Yang
2016/12/05 19:19:29
Is this a TODO?
|
| + // to return the index to the first non-ASCII character. |
| + if (flat.IsOneByte() && is_ascii) { |
| + const uint8_t* src = flat.ToOneByteVector().start(); |
| + bool has_changed_character = false; |
| + bool is_ascii = FastAsciiConvert<true>(reinterpret_cast<char*>(dest), |
| + reinterpret_cast<const char*>(src), |
| + length, &has_changed_character); |
| + // If not ASCII, we discard the result and start anew. |
| + if (is_ascii) return has_changed_character ? *result : *s; |
| + } |
| + |
| + if (index_to_first_upper == length) { |
| + for (int index = 0; index < length; ++index) { |
| + uint16_t ch = s->Get(index); |
| + if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| + index_to_first_upper = index; |
| + break; |
| + } |
| + } |
| + } |
| + |
| if (flat.IsOneByte()) { |
| + // An ASCII input without any uppercase characters is already handled by |
| + // the short-string scanner and FastAsciiConvert. |
| + DCHECK(index_to_first_upper < length); |
| const uint8_t* src = flat.ToOneByteVector().start(); |
| - CopyChars(result->GetChars(), src, |
| - static_cast<size_t>(first_index_to_lower)); |
| - for (int index = first_index_to_lower; index < length; ++index) { |
| + CopyChars(dest, src, static_cast<size_t>(index_to_first_upper)); |
| + for (int index = index_to_first_upper; index < length; ++index) { |
| uint16_t ch = static_cast<uint16_t>(src[index]); |
| result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); |
| } |
| } else { |
| + // Nothing to do if the string is all ASCII with no uppercase. |
| + if (index_to_first_upper == length) return *s; |
| const uint16_t* src = flat.ToUC16Vector().start(); |
| - CopyChars(result->GetChars(), src, |
| - static_cast<size_t>(first_index_to_lower)); |
| - for (int index = first_index_to_lower; index < length; ++index) { |
| + CopyChars(dest, src, static_cast<size_t>(index_to_first_upper)); |
| + for (int index = index_to_first_upper; index < length; ++index) { |
| uint16_t ch = src[index]; |
| result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); |
| } |
| @@ -1152,29 +1181,32 @@ RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) { |
| DCHECK_EQ(args.length(), 1); |
| CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| - // This function could be optimized for no-op cases the way lowercase |
| - // counterpart is, but in empirical testing, few actual calls to upper() |
| - // are no-ops. So, it wouldn't be worth the extra time for pre-scanning. |
| - |
| int32_t length = s->length(); |
| s = String::Flatten(s); |
| if (s->HasOnlyOneByteChars()) { |
| +#if 0 |
| Handle<SeqOneByteString> result; |
| ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| isolate, result, isolate->factory()->NewRawOneByteString(length)); |
| +#endif |
| + Handle<SeqOneByteString> result = |
| + isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
|
jungshik at Google
2016/12/02 06:52:38
The first part of Runtime_STringToUpperCaseI18N fo
Dan Ehrenberg
2016/12/02 23:35:08
I think we would crash if out of memory, and the o
Yang
2016/12/05 19:19:30
Sounds right to me as well. No need to check for e
|
| int sharp_s_count; |
| bool is_result_single_byte; |
| { |
| DisallowHeapAllocation no_gc; |
| String::FlatContent flat = s->GetFlatContent(); |
| - // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII |
| - // could be removed because ToUpperOneByte is pretty fast now (it |
| - // does not call ICU API any more.). |
| if (flat.IsOneByte()) { |
| Vector<const uint8_t> src = flat.ToOneByteVector(); |
| - if (ToUpperFastASCII(src, result)) return *result; |
| + bool has_changed_character = false; |
| + bool is_ascii = |
| + FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()), |
| + reinterpret_cast<const char*>(src.start()), |
| + length, &has_changed_character); |
| + // If not ASCII, we discard the result and use the table for Latin1. |
| + if (is_ascii) return has_changed_character ? *result : *s; |
| is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); |
| } else { |
| DCHECK(flat.IsTwoByte()); |