Index: src/runtime/runtime-i18n.cc |
diff --git a/src/runtime/runtime-i18n.cc b/src/runtime/runtime-i18n.cc |
index 8f05809c50c7c87007fb27d2b18d93b6299edd60..f4a3394c4c4d2efabfe81235c485f2230735797e 100644 |
--- a/src/runtime/runtime-i18n.cc |
+++ b/src/runtime/runtime-i18n.cc |
@@ -753,13 +753,6 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) { |
} |
namespace { |
-inline void LocaleConvertCaseHelper(icu::UnicodeString* s, bool is_to_upper, |
- const icu::Locale& locale) { |
- if (is_to_upper) |
- s->toUpper(locale); |
- else |
- s->toLower(locale); |
-} |
void ConvertCaseWithTransliterator(icu::UnicodeString* input, |
const char* transliterator_id) { |
UErrorCode status = U_ZERO_ERROR; |
@@ -778,43 +771,79 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, |
}; |
RUNTIME_ASSERT(locale_id >= -1 && |
locale_id < static_cast<int>(arraysize(conversion_locales))); |
- int32_t length = s->length(); |
- icu::UnicodeString converted; |
- { |
- DisallowHeapAllocation no_gc; |
- DCHECK(s->IsFlat()); |
- String::FlatContent flat = s->GetFlatContent(); |
+ int32_t src_length = s->length(); |
+ const UChar* src = nullptr; |
- const UChar* src; |
- if (flat.IsOneByte()) { |
- base::SmartArrayPointer<uc16> sap = s->ToWideCString(); |
- src = reinterpret_cast<const UChar*>(sap.get()); |
- converted = icu::UnicodeString(src, length); |
- } else { |
- src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
- converted = icu::UnicodeString(src, length); |
- } |
+ base::SmartArrayPointer<uc16> sap; |
+ if (s->IsOneByteRepresentationUnderneath()) { |
+ sap = s->ToWideCString(); |
+ src = reinterpret_cast<const UChar*>(sap.get()); |
} |
- if (locale_id == -1) { |
- LocaleConvertCaseHelper(&converted, is_to_upper, icu::Locale::getRoot()); |
- } else if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) { |
- // TODO(jshin): Once http://bugs.icu-project.org/trac/ticket/10582 is |
- // fixed, remove this special-casing for uppercasing in Greek(el) locale. |
- // This is ~500 times slower than using the case conversion API. |
- ConvertCaseWithTransliterator(&converted, "el-Upper"); |
- } else { |
- LocaleConvertCaseHelper(&converted, is_to_upper, |
- icu::Locale(conversion_locales[locale_id])); |
+ // Greek (id == 1) uppercasing has to be done via transliteration. |
+ // TODO(jshin): Drop this special-casing once ICU's regular case conversion |
+ // API supports Greek uppercasing. See |
+ // http://bugs.icu-project.org/trac/ticket/10582 . |
+ // ICU's C API for transliteration is nasty and we just use C++ API. |
+ if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) { |
+ icu::UnicodeString converted; |
+ { |
+ DisallowHeapAllocation no_gc; |
+ String::FlatContent flat = s->GetFlatContent(); |
+ if (src == nullptr) { |
+ DCHECK(flat.IsTwoByte()); |
+ src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
+ } |
+ // Starts with the source string and will be replaced by the converted |
+ // result. |
+ converted.fastCopyFrom(icu::UnicodeString(false, src, src_length)); |
+ ConvertCaseWithTransliterator(&converted, "el-Upper"); |
+ } |
+ Handle<String> result; |
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
+ isolate, result, |
+ isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
+ reinterpret_cast<const uint16_t*>(converted.getBuffer()), |
+ converted.length()))); |
+ return *result; |
} |
- Handle<String> result; |
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
- isolate, result, |
- isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
- reinterpret_cast<const uint16_t*>(converted.getBuffer()), |
- converted.length()))); |
- return *result; |
+ typedef int32_t (*case_conversion_fn)( |
+ UChar * dest, int32_t destCapacity, const UChar* src, int32_t srcLength, |
+ const char* locale, UErrorCode* pErrorCode); |
+ case_conversion_fn fn = is_to_upper ? u_strToUpper : u_strToLower; |
+ const char* locale = locale_id == -1 ? "" : conversion_locales[locale_id]; |
+ |
+ int32_t dest_length = src_length; |
+ UErrorCode error; |
+ Handle<SeqTwoByteString> result; |
+ do { |
+ result = |
+ isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked(); |
+ base::SmartArrayPointer<uc16> sap; |
+ DisallowHeapAllocation no_gc; |
+ String::FlatContent flat = s->GetFlatContent(); |
+ // For OneByteString, |src| is already obtained with |sap| outside the loop. |
+ if (flat.IsTwoByte()) |
+ src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
+ error = U_ZERO_ERROR; |
+ dest_length = fn(reinterpret_cast<UChar*>(result->GetChars()), dest_length, |
+ src, src_length, locale, &error); |
+ } while (error == U_BUFFER_OVERFLOW_ERROR); |
+ |
+ // In most cases, the output will fill the destination buffer completely |
+ // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING). |
+ // Only in rare cases, it'll be shorter than the destination buffer and |
+ // |result| has to be truncated. |
+ DCHECK(U_SUCCESS(error)); |
+ if (U_SUCCESS(error)) { |
+ if (V8_UNLIKELY(error != U_STRING_NOT_TERMINATED_WARNING)) { |
+ result = Handle<SeqTwoByteString>::cast( |
+ SeqString::Truncate(result, dest_length)); |
+ } |
+ return *result; |
+ } |
+ return *s; |
} |
inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } |