Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(937)

Unified Diff: src/runtime/runtime-i18n.cc

Issue 1875263006: Experimental CL on top of https://codereview.chromium.org/1812673005/ (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@caseconv
Patch Set: back to do-while loop with DisallowHeapAlloc inside the loop per adamk@ Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/intl/general/case-mapping.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime/runtime-i18n.cc
diff --git a/src/runtime/runtime-i18n.cc b/src/runtime/runtime-i18n.cc
index 8f05809c50c7c87007fb27d2b18d93b6299edd60..f4a3394c4c4d2efabfe81235c485f2230735797e 100644
--- a/src/runtime/runtime-i18n.cc
+++ b/src/runtime/runtime-i18n.cc
@@ -753,13 +753,6 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
}
namespace {
-inline void LocaleConvertCaseHelper(icu::UnicodeString* s, bool is_to_upper,
- const icu::Locale& locale) {
- if (is_to_upper)
- s->toUpper(locale);
- else
- s->toLower(locale);
-}
void ConvertCaseWithTransliterator(icu::UnicodeString* input,
const char* transliterator_id) {
UErrorCode status = U_ZERO_ERROR;
@@ -778,43 +771,79 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
};
RUNTIME_ASSERT(locale_id >= -1 &&
locale_id < static_cast<int>(arraysize(conversion_locales)));
- int32_t length = s->length();
- icu::UnicodeString converted;
- {
- DisallowHeapAllocation no_gc;
- DCHECK(s->IsFlat());
- String::FlatContent flat = s->GetFlatContent();
+ int32_t src_length = s->length();
+ const UChar* src = nullptr;
- const UChar* src;
- if (flat.IsOneByte()) {
- base::SmartArrayPointer<uc16> sap = s->ToWideCString();
- src = reinterpret_cast<const UChar*>(sap.get());
- converted = icu::UnicodeString(src, length);
- } else {
- src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
- converted = icu::UnicodeString(src, length);
- }
+ base::SmartArrayPointer<uc16> sap;
+ if (s->IsOneByteRepresentationUnderneath()) {
+ sap = s->ToWideCString();
+ src = reinterpret_cast<const UChar*>(sap.get());
}
- if (locale_id == -1) {
- LocaleConvertCaseHelper(&converted, is_to_upper, icu::Locale::getRoot());
- } else if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) {
- // TODO(jshin): Once http://bugs.icu-project.org/trac/ticket/10582 is
- // fixed, remove this special-casing for uppercasing in Greek(el) locale.
- // This is ~500 times slower than using the case conversion API.
- ConvertCaseWithTransliterator(&converted, "el-Upper");
- } else {
- LocaleConvertCaseHelper(&converted, is_to_upper,
- icu::Locale(conversion_locales[locale_id]));
+ // Greek (id == 1) uppercasing has to be done via transliteration.
+ // TODO(jshin): Drop this special-casing once ICU's regular case conversion
+ // API supports Greek uppercasing. See
+ // http://bugs.icu-project.org/trac/ticket/10582 .
+ // ICU's C API for transliteration is nasty and we just use C++ API.
+ if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) {
+ icu::UnicodeString converted;
+ {
+ DisallowHeapAllocation no_gc;
+ String::FlatContent flat = s->GetFlatContent();
+ if (src == nullptr) {
+ DCHECK(flat.IsTwoByte());
+ src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
+ }
+ // Starts with the source string and will be replaced by the converted
+ // result.
+ converted.fastCopyFrom(icu::UnicodeString(false, src, src_length));
+ ConvertCaseWithTransliterator(&converted, "el-Upper");
+ }
+ Handle<String> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result,
+ isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
+ reinterpret_cast<const uint16_t*>(converted.getBuffer()),
+ converted.length())));
+ return *result;
}
- Handle<String> result;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, result,
- isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
- reinterpret_cast<const uint16_t*>(converted.getBuffer()),
- converted.length())));
- return *result;
+ typedef int32_t (*case_conversion_fn)(
+ UChar * dest, int32_t destCapacity, const UChar* src, int32_t srcLength,
+ const char* locale, UErrorCode* pErrorCode);
+ case_conversion_fn fn = is_to_upper ? u_strToUpper : u_strToLower;
+ const char* locale = locale_id == -1 ? "" : conversion_locales[locale_id];
+
+ int32_t dest_length = src_length;
+ UErrorCode error;
+ Handle<SeqTwoByteString> result;
+ do {
+ result =
+ isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked();
+ base::SmartArrayPointer<uc16> sap;
+ DisallowHeapAllocation no_gc;
+ String::FlatContent flat = s->GetFlatContent();
+ // For OneByteString, |src| is already obtained with |sap| outside the loop.
+ if (flat.IsTwoByte())
+ src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
+ error = U_ZERO_ERROR;
+ dest_length = fn(reinterpret_cast<UChar*>(result->GetChars()), dest_length,
+ src, src_length, locale, &error);
+ } while (error == U_BUFFER_OVERFLOW_ERROR);
+
+ // In most cases, the output will fill the destination buffer completely
+ // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
+ // Only in rare cases, it'll be shorter than the destination buffer and
+ // |result| has to be truncated.
+ DCHECK(U_SUCCESS(error));
+ if (U_SUCCESS(error)) {
+ if (V8_UNLIKELY(error != U_STRING_NOT_TERMINATED_WARNING)) {
+ result = Handle<SeqTwoByteString>::cast(
+ SeqString::Truncate(result, dest_length));
+ }
+ return *result;
+ }
+ return *s;
}
inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
« no previous file with comments | « no previous file | test/intl/general/case-mapping.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698