Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Unified Diff: src/runtime/runtime-i18n.cc

Issue 2533983006: Optimize case conversion with icu_case_mapping (Closed)
Patch Set: do not use ASSIGN_RETURN_FAILURE_ON_EXCEPTION in ToUpper Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/runtime/runtime-i18n.cc
diff --git a/src/runtime/runtime-i18n.cc b/src/runtime/runtime-i18n.cc
index 75e0952581b3723aea67e4e9b108534d926fec86..ae86acc606b9a33169f6b6ad27df65fe9a3de8a2 100644
--- a/src/runtime/runtime-i18n.cc
+++ b/src/runtime/runtime-i18n.cc
@@ -8,13 +8,14 @@
#include <memory>
-#include "src/api.h"
#include "src/api-natives.h"
+#include "src/api.h"
#include "src/arguments.h"
#include "src/factory.h"
#include "src/i18n.h"
#include "src/isolate-inl.h"
#include "src/messages.h"
+#include "src/utils.h"
#include "unicode/brkiter.h"
#include "unicode/calendar.h"
@@ -1091,23 +1092,25 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
int length = s->length();
s = String::Flatten(s);
- // First scan the string for uppercase and non-ASCII characters:
+
+ bool is_ascii = true;
if (s->HasOnlyOneByteChars()) {
- int first_index_to_lower = length;
- for (int index = 0; index < length; ++index) {
- // Blink specializes this path for one-byte strings, so it
- // does not need to do a generic get, but can do the equivalent
- // of SeqOneByteStringGet.
- uint16_t ch = s->Get(index);
- if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
- first_index_to_lower = index;
- break;
+ // Scan the string for uppercase and non-ASCII characters for strings
+ // shorter than a machine-word without any memory allocation overhead.
Yang 2016/12/05 19:19:29 What is the rationale for doing this only to short
+ int index_to_first_upper = length;
+ if (static_cast<size_t>(length) < sizeof(uintptr_t)) {
+ for (int index = 0; index < length; ++index) {
+ uint16_t ch = s->Get(index);
+ if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
+ is_ascii = !(ch & ~0x7F);
+ index_to_first_upper = index;
+ break;
+ }
}
+ // Nothing to do if the string is all ASCII with no uppercase.
+ if (index_to_first_upper == length) return *s;
}
Yang 2016/12/05 19:19:29 So if the string is longer than a word, we always
- // Nothing to do if the string is all ASCII with no uppercase.
- if (first_index_to_lower == length) return *s;
-
// We depend here on the invariant that the length of a Latin1
// string is invariant under ToLowerCase, and the result always
// fits in the Latin1 range in the *root locale*. It does not hold
@@ -1118,19 +1121,45 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
+ uint8_t* dest = result->GetChars();
+ // Instead of checking is_ascii here, we'd better modify FastAsciiConvert
Yang 2016/12/05 19:19:29 Is this a TODO?
+ // to return the index to the first non-ASCII character.
+ if (flat.IsOneByte() && is_ascii) {
+ const uint8_t* src = flat.ToOneByteVector().start();
+ bool has_changed_character = false;
+ bool is_ascii = FastAsciiConvert<true>(reinterpret_cast<char*>(dest),
+ reinterpret_cast<const char*>(src),
+ length, &has_changed_character);
+ // If not ASCII, we discard the result and start anew.
+ if (is_ascii) return has_changed_character ? *result : *s;
+ }
+
+ if (index_to_first_upper == length) {
+ for (int index = 0; index < length; ++index) {
+ uint16_t ch = s->Get(index);
+ if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
+ index_to_first_upper = index;
+ break;
+ }
+ }
+ }
+
if (flat.IsOneByte()) {
+ // An ASCII input without any uppercase characters is already handled by
+ // the short-string scanner and FastAsciiConvert.
+ DCHECK(index_to_first_upper < length);
const uint8_t* src = flat.ToOneByteVector().start();
- CopyChars(result->GetChars(), src,
- static_cast<size_t>(first_index_to_lower));
- for (int index = first_index_to_lower; index < length; ++index) {
+ CopyChars(dest, src, static_cast<size_t>(index_to_first_upper));
+ for (int index = index_to_first_upper; index < length; ++index) {
uint16_t ch = static_cast<uint16_t>(src[index]);
result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
}
} else {
+ // Nothing to do if the string is all ASCII with no uppercase.
+ if (index_to_first_upper == length) return *s;
const uint16_t* src = flat.ToUC16Vector().start();
- CopyChars(result->GetChars(), src,
- static_cast<size_t>(first_index_to_lower));
- for (int index = first_index_to_lower; index < length; ++index) {
+ CopyChars(dest, src, static_cast<size_t>(index_to_first_upper));
+ for (int index = index_to_first_upper; index < length; ++index) {
uint16_t ch = src[index];
result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
}
@@ -1152,29 +1181,32 @@ RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
- // This function could be optimized for no-op cases the way lowercase
- // counterpart is, but in empirical testing, few actual calls to upper()
- // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
-
int32_t length = s->length();
s = String::Flatten(s);
if (s->HasOnlyOneByteChars()) {
+#if 0
Handle<SeqOneByteString> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawOneByteString(length));
+#endif
+ Handle<SeqOneByteString> result =
+ isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
jungshik at Google 2016/12/02 06:52:38 The first part of Runtime_STringToUpperCaseI18N fo
Dan Ehrenberg 2016/12/02 23:35:08 I think we would crash if out of memory, and the o
Yang 2016/12/05 19:19:30 Sounds right to me as well. No need to check for e
int sharp_s_count;
bool is_result_single_byte;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
- // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
- // could be removed because ToUpperOneByte is pretty fast now (it
- // does not call ICU API any more.).
if (flat.IsOneByte()) {
Vector<const uint8_t> src = flat.ToOneByteVector();
- if (ToUpperFastASCII(src, result)) return *result;
+ bool has_changed_character = false;
+ bool is_ascii =
+ FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()),
+ reinterpret_cast<const char*>(src.start()),
+ length, &has_changed_character);
+ // If not ASCII, we discard the result and use the table for Latin1.
+ if (is_ascii) return has_changed_character ? *result : *s;
is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
} else {
DCHECK(flat.IsTwoByte());

Powered by Google App Engine
This is Rietveld 408576698