OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 #ifdef V8_I18N_SUPPORT | 6 #ifdef V8_I18N_SUPPORT |
7 #include "src/runtime/runtime-utils.h" | 7 #include "src/runtime/runtime-utils.h" |
8 | 8 |
9 #include "src/api.h" | 9 #include "src/api.h" |
10 #include "src/api-natives.h" | 10 #include "src/api-natives.h" |
(...skipping 735 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
746 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { | 746 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { |
747 return *isolate->factory()->NewStringFromStaticChars("kana"); | 747 return *isolate->factory()->NewStringFromStaticChars("kana"); |
748 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { | 748 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { |
749 return *isolate->factory()->NewStringFromStaticChars("ideo"); | 749 return *isolate->factory()->NewStringFromStaticChars("ideo"); |
750 } else { | 750 } else { |
751 return *isolate->factory()->NewStringFromStaticChars("unknown"); | 751 return *isolate->factory()->NewStringFromStaticChars("unknown"); |
752 } | 752 } |
753 } | 753 } |
754 | 754 |
755 namespace { | 755 namespace { |
756 inline void LocaleConvertCaseHelper(icu::UnicodeString* s, bool is_to_upper, | |
757 const icu::Locale& locale) { | |
758 if (is_to_upper) | |
759 s->toUpper(locale); | |
760 else | |
761 s->toLower(locale); | |
762 } | |
763 void ConvertCaseWithTransliterator(icu::UnicodeString* input, | 756 void ConvertCaseWithTransliterator(icu::UnicodeString* input, |
764 const char* transliterator_id) { | 757 const char* transliterator_id) { |
765 UErrorCode status = U_ZERO_ERROR; | 758 UErrorCode status = U_ZERO_ERROR; |
766 base::SmartPointer<icu::Transliterator> translit( | 759 base::SmartPointer<icu::Transliterator> translit( |
767 icu::Transliterator::createInstance( | 760 icu::Transliterator::createInstance( |
768 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, | 761 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, |
769 status)); | 762 status)); |
770 if (U_FAILURE(status)) return; | 763 if (U_FAILURE(status)) return; |
771 translit->transliterate(*input); | 764 translit->transliterate(*input); |
772 } | 765 } |
773 | 766 |
774 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, | 767 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, |
775 bool is_to_upper, int locale_id) { | 768 bool is_to_upper, int locale_id) { |
776 static const char* conversion_locales[] = { | 769 static const char* conversion_locales[] = { |
777 "az", "el", "lt", "tr", | 770 "az", "el", "lt", "tr", |
778 }; | 771 }; |
779 RUNTIME_ASSERT(locale_id >= -1 && | 772 RUNTIME_ASSERT(locale_id >= -1 && |
780 locale_id < static_cast<int>(arraysize(conversion_locales))); | 773 locale_id < static_cast<int>(arraysize(conversion_locales))); |
781 int32_t length = s->length(); | 774 int32_t src_length = s->length(); |
782 icu::UnicodeString converted; | 775 const UChar* src = nullptr; |
783 { | |
784 DisallowHeapAllocation no_gc; | |
785 DCHECK(s->IsFlat()); | |
786 String::FlatContent flat = s->GetFlatContent(); | |
787 | 776 |
788 const UChar* src; | 777 base::SmartArrayPointer<uc16> sap; |
789 if (flat.IsOneByte()) { | 778 if (s->IsOneByteRepresentationUnderneath()) { |
790 base::SmartArrayPointer<uc16> sap = s->ToWideCString(); | 779 sap = s->ToWideCString(); |
791 src = reinterpret_cast<const UChar*>(sap.get()); | 780 src = reinterpret_cast<const UChar*>(sap.get()); |
792 converted = icu::UnicodeString(src, length); | |
793 } else { | |
794 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); | |
795 converted = icu::UnicodeString(src, length); | |
796 } | |
797 } | 781 } |
798 | 782 |
799 if (locale_id == -1) { | 783 // Greek (id == 1) uppercasing has to be done via transliteration. |
800 LocaleConvertCaseHelper(&converted, is_to_upper, icu::Locale::getRoot()); | 784 // TODO(jshin): Drop this special-casing once ICU's regular case conversion |
801 } else if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) { | 785 // API supports Greek uppercasing. See |
802 // TODO(jshin): Once http://bugs.icu-project.org/trac/ticket/10582 is | 786 // http://bugs.icu-project.org/trac/ticket/10582 . |
803 // fixed, remove this special-casing for uppercasing in Greek(el) locale. | 787 // ICU's C API for transliteration is nasty and we just use C++ API. |
804 // This is ~500 times slower than using the case conversion API. | 788 if (V8_UNLIKELY(locale_id == 1 && is_to_upper)) { |
805 ConvertCaseWithTransliterator(&converted, "el-Upper"); | 789 icu::UnicodeString converted; |
806 } else { | 790 { |
807 LocaleConvertCaseHelper(&converted, is_to_upper, | 791 DisallowHeapAllocation no_gc; |
808 icu::Locale(conversion_locales[locale_id])); | 792 String::FlatContent flat = s->GetFlatContent(); |
| 793 if (src == nullptr) { |
| 794 DCHECK(flat.IsTwoByte()); |
| 795 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
| 796 } |
| 797 // Starts with the source string and will be replaced by the converted |
| 798 // result. |
| 799 converted.fastCopyFrom(icu::UnicodeString(false, src, src_length)); |
| 800 ConvertCaseWithTransliterator(&converted, "el-Upper"); |
| 801 } |
| 802 Handle<String> result; |
| 803 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 804 isolate, result, |
| 805 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 806 reinterpret_cast<const uint16_t*>(converted.getBuffer()), |
| 807 converted.length()))); |
| 808 return *result; |
809 } | 809 } |
810 | 810 |
811 Handle<String> result; | 811 typedef int32_t (*case_conversion_fn)( |
812 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 812 UChar * dest, int32_t destCapacity, const UChar* src, int32_t srcLength, |
813 isolate, result, | 813 const char* locale, UErrorCode* pErrorCode); |
814 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( | 814 case_conversion_fn fn = is_to_upper ? u_strToUpper : u_strToLower; |
815 reinterpret_cast<const uint16_t*>(converted.getBuffer()), | 815 const char* locale = locale_id == -1 ? "" : conversion_locales[locale_id]; |
816 converted.length()))); | 816 |
817 return *result; | 817 int32_t dest_length = src_length; |
| 818 UErrorCode error; |
| 819 Handle<SeqTwoByteString> result; |
| 820 do { |
| 821 result = |
| 822 isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked(); |
| 823 base::SmartArrayPointer<uc16> sap; |
| 824 DisallowHeapAllocation no_gc; |
| 825 String::FlatContent flat = s->GetFlatContent(); |
| 826 // For OneByteString, |src| is already obtained with |sap| outside the loop. |
| 827 if (flat.IsTwoByte()) |
| 828 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
| 829 error = U_ZERO_ERROR; |
| 830 dest_length = fn(reinterpret_cast<UChar*>(result->GetChars()), dest_length, |
| 831 src, src_length, locale, &error); |
| 832 } while (error == U_BUFFER_OVERFLOW_ERROR); |
| 833 |
| 834 // In most cases, the output will fill the destination buffer completely |
| 835 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING). |
| 836 // Only in rare cases, it'll be shorter than the destination buffer and |
| 837 // |result| has to be truncated. |
| 838 DCHECK(U_SUCCESS(error)); |
| 839 if (U_SUCCESS(error)) { |
| 840 if (V8_UNLIKELY(error != U_STRING_NOT_TERMINATED_WARNING)) { |
| 841 result = Handle<SeqTwoByteString>::cast( |
| 842 SeqString::Truncate(result, dest_length)); |
| 843 } |
| 844 return *result; |
| 845 } |
| 846 return *s; |
818 } | 847 } |
819 | 848 |
820 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } | 849 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } |
821 | 850 |
822 inline uint16_t ToASCIILower(uint16_t ch) { | 851 inline uint16_t ToASCIILower(uint16_t ch) { |
823 return ch | ((ch >= 'A' && ch <= 'Z') << 5); | 852 return ch | ((ch >= 'A' && ch <= 'Z') << 5); |
824 } | 853 } |
825 | 854 |
826 inline uint16_t ToASCIIUpper(uint16_t ch) { | 855 inline uint16_t ToASCIIUpper(uint16_t ch) { |
827 return ch & ~((ch >= 'a' && ch <= 'z') << 5); | 856 return ch & ~((ch >= 'a' && ch <= 'z') << 5); |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
970 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1); | 999 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1); |
971 CONVERT_NUMBER_CHECKED(int, lang_id, Int32, args[2]); | 1000 CONVERT_NUMBER_CHECKED(int, lang_id, Int32, args[2]); |
972 | 1001 |
973 return LocaleConvertCase(s, isolate, is_upper, lang_id); | 1002 return LocaleConvertCase(s, isolate, is_upper, lang_id); |
974 } | 1003 } |
975 | 1004 |
976 } // namespace internal | 1005 } // namespace internal |
977 } // namespace v8 | 1006 } // namespace v8 |
978 | 1007 |
979 #endif // V8_I18N_SUPPORT | 1008 #endif // V8_I18N_SUPPORT |
OLD | NEW |