Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Side by Side Diff: src/runtime/runtime-i18n.cc

Issue 1875263006: Experimental CL on top of https://codereview.chromium.org/1812673005/ (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@caseconv
Patch Set: back to do-while loop with DisallowHeapAlloc inside the loop per adamk@ Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/intl/general/case-mapping.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 5
6 #ifdef V8_I18N_SUPPORT 6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h" 7 #include "src/runtime/runtime-utils.h"
8 8
9 #include "src/api.h" 9 #include "src/api.h"
10 #include "src/api-natives.h" 10 #include "src/api-natives.h"
(...skipping 735 matching lines...) Expand 10 before | Expand all | Expand 10 after
746 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { 746 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
747 return *isolate->factory()->NewStringFromStaticChars("kana"); 747 return *isolate->factory()->NewStringFromStaticChars("kana");
748 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { 748 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
749 return *isolate->factory()->NewStringFromStaticChars("ideo"); 749 return *isolate->factory()->NewStringFromStaticChars("ideo");
750 } else { 750 } else {
751 return *isolate->factory()->NewStringFromStaticChars("unknown"); 751 return *isolate->factory()->NewStringFromStaticChars("unknown");
752 } 752 }
753 } 753 }
754 754
755 namespace { 755 namespace {
756 inline void LocaleConvertCaseHelper(icu::UnicodeString* s, bool is_to_upper,
757 const icu::Locale& locale) {
758 if (is_to_upper)
759 s->toUpper(locale);
760 else
761 s->toLower(locale);
762 }
763 void ConvertCaseWithTransliterator(icu::UnicodeString* input, 756 void ConvertCaseWithTransliterator(icu::UnicodeString* input,
764 const char* transliterator_id) { 757 const char* transliterator_id) {
765 UErrorCode status = U_ZERO_ERROR; 758 UErrorCode status = U_ZERO_ERROR;
766 base::SmartPointer<icu::Transliterator> translit( 759 base::SmartPointer<icu::Transliterator> translit(
767 icu::Transliterator::createInstance( 760 icu::Transliterator::createInstance(
768 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, 761 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
769 status)); 762 status));
770 if (U_FAILURE(status)) return; 763 if (U_FAILURE(status)) return;
771 translit->transliterate(*input); 764 translit->transliterate(*input);
772 } 765 }
773 766
774 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, 767 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
775 bool is_to_upper, int locale_id) { 768 bool is_to_upper, int locale_id) {
776 static const char* conversion_locales[] = { 769 static const char* conversion_locales[] = {
777 "az", "el", "lt", "tr", 770 "az", "el", "lt", "tr",
778 }; 771 };
779 RUNTIME_ASSERT(locale_id >= -1 && 772 RUNTIME_ASSERT(locale_id >= -1 &&
780 locale_id < static_cast<int>(arraysize(conversion_locales))); 773 locale_id < static_cast<int>(arraysize(conversion_locales)));
781 int32_t length = s->length(); 774 int32_t src_length = s->length();
782 icu::UnicodeString converted; 775 const UChar* src = nullptr;
783 {
784 DisallowHeapAllocation no_gc;
785 DCHECK(s->IsFlat());
786 String::FlatContent flat = s->GetFlatContent();
787 776
788 const UChar* src; 777 base::SmartArrayPointer<uc16> sap;
789 if (flat.IsOneByte()) { 778 if (s->IsOneByteRepresentationUnderneath()) {
790 base::SmartArrayPointer<uc16> sap = s->ToWideCString(); 779 sap = s->ToWideCString();
791 src = reinterpret_cast<const UChar*>(sap.get()); 780 src = reinterpret_cast<const UChar*>(sap.get());
792 converted = icu::UnicodeString(src, length);
793 } else {
794 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
795 converted = icu::UnicodeString(src, length);
796 }
797 } 781 }
798 782
799 if (locale_id == -1) { 783 // Greek (id == 1) uppercasing has to be done via transliteration.
800 LocaleConvertCaseHelper(&converted, is_to_upper, icu::Locale::getRoot()); 784 // TODO(jshin): Drop this special-casing once ICU's regular case conversion
801 } else if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) { 785 // API supports Greek uppercasing. See
802 // TODO(jshin): Once http://bugs.icu-project.org/trac/ticket/10582 is 786 // http://bugs.icu-project.org/trac/ticket/10582 .
803 // fixed, remove this special-casing for uppercasing in Greek(el) locale. 787 // ICU's C API for transliteration is nasty and we just use C++ API.
804 // This is ~500 times slower than using the case conversion API. 788 if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) {
805 ConvertCaseWithTransliterator(&converted, "el-Upper"); 789 icu::UnicodeString converted;
806 } else { 790 {
807 LocaleConvertCaseHelper(&converted, is_to_upper, 791 DisallowHeapAllocation no_gc;
808 icu::Locale(conversion_locales[locale_id])); 792 String::FlatContent flat = s->GetFlatContent();
793 if (src == nullptr) {
794 DCHECK(flat.IsTwoByte());
795 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
796 }
797 // Starts with the source string and will be replaced by the converted
798 // result.
799 converted.fastCopyFrom(icu::UnicodeString(false, src, src_length));
800 ConvertCaseWithTransliterator(&converted, "el-Upper");
801 }
802 Handle<String> result;
803 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
804 isolate, result,
805 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
806 reinterpret_cast<const uint16_t*>(converted.getBuffer()),
807 converted.length())));
808 return *result;
809 } 809 }
810 810
811 Handle<String> result; 811 typedef int32_t (*case_conversion_fn)(
812 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 812 UChar * dest, int32_t destCapacity, const UChar* src, int32_t srcLength,
813 isolate, result, 813 const char* locale, UErrorCode* pErrorCode);
814 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( 814 case_conversion_fn fn = is_to_upper ? u_strToUpper : u_strToLower;
815 reinterpret_cast<const uint16_t*>(converted.getBuffer()), 815 const char* locale = locale_id == -1 ? "" : conversion_locales[locale_id];
816 converted.length()))); 816
817 return *result; 817 int32_t dest_length = src_length;
818 UErrorCode error;
819 Handle<SeqTwoByteString> result;
820 do {
821 result =
822 isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked();
823 base::SmartArrayPointer<uc16> sap;
824 DisallowHeapAllocation no_gc;
825 String::FlatContent flat = s->GetFlatContent();
826 // For OneByteString, |src| is already obtained with |sap| outside the loop.
827 if (flat.IsTwoByte())
828 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
829 error = U_ZERO_ERROR;
830 dest_length = fn(reinterpret_cast<UChar*>(result->GetChars()), dest_length,
831 src, src_length, locale, &error);
832 } while (error == U_BUFFER_OVERFLOW_ERROR);
833
834 // In most cases, the output will fill the destination buffer completely
835 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
836 // Only in rare cases, it'll be shorter than the destination buffer and
837 // |result| has to be truncated.
838 DCHECK(U_SUCCESS(error));
839 if (U_SUCCESS(error)) {
840 if (V8_UNLIKELY(error != U_STRING_NOT_TERMINATED_WARNING)) {
841 result = Handle<SeqTwoByteString>::cast(
842 SeqString::Truncate(result, dest_length));
843 }
844 return *result;
845 }
846 return *s;
818 } 847 }
819 848
820 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } 849 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
821 850
822 inline uint16_t ToASCIILower(uint16_t ch) { 851 inline uint16_t ToASCIILower(uint16_t ch) {
823 return ch | ((ch >= 'A' && ch <= 'Z') << 5); 852 return ch | ((ch >= 'A' && ch <= 'Z') << 5);
824 } 853 }
825 854
826 inline uint16_t ToASCIIUpper(uint16_t ch) { 855 inline uint16_t ToASCIIUpper(uint16_t ch) {
827 return ch & ~((ch >= 'a' && ch <= 'z') << 5); 856 return ch & ~((ch >= 'a' && ch <= 'z') << 5);
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
970 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1); 999 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
971 CONVERT_NUMBER_CHECKED(int, lang_id, Int32, args[2]); 1000 CONVERT_NUMBER_CHECKED(int, lang_id, Int32, args[2]);
972 1001
973 return LocaleConvertCase(s, isolate, is_upper, lang_id); 1002 return LocaleConvertCase(s, isolate, is_upper, lang_id);
974 } 1003 }
975 1004
976 } // namespace internal 1005 } // namespace internal
977 } // namespace v8 1006 } // namespace v8
978 1007
979 #endif // V8_I18N_SUPPORT 1008 #endif // V8_I18N_SUPPORT
OLDNEW
« no previous file with comments | « no previous file | test/intl/general/case-mapping.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698